From b37382d7e16c6e4d1b858668eca7259266274b2d Mon Sep 17 00:00:00 2001 From: Stephane Rigaud Date: Mon, 25 Sep 2023 16:55:34 +0200 Subject: [PATCH 1/2] split and add preamble --- preamble/preamble_def.cl | 119 ++++++++++++++++++++++++++++ preamble/preamble_def.cu | 167 +++++++++++++++++++++++++++++++++++++++ preamble/preamble_io.cl | 74 +++++++++++++++++ preamble/preamble_io.cu | 76 ++++++++++++++++++ 4 files changed, 436 insertions(+) create mode 100644 preamble/preamble_def.cl create mode 100644 preamble/preamble_def.cu create mode 100644 preamble/preamble_io.cl create mode 100644 preamble/preamble_io.cu diff --git a/preamble/preamble_def.cl b/preamble/preamble_def.cl new file mode 100644 index 0000000..81d87d2 --- /dev/null +++ b/preamble/preamble_def.cl @@ -0,0 +1,119 @@ +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#pragma OPENCL EXTENSION cl_amd_printf : enable +#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable + +#ifndef M_PI + #define M_PI 3.14159265358979323846f /* pi */ +#endif + +#ifndef M_LOG2E + #define M_LOG2E 1.4426950408889634074f /* log_2 e */ +#endif + +#ifndef M_LOG10E + #define M_LOG10E 0.43429448190325182765f /* log_10 e */ +#endif + +#ifndef M_LN2 + #define M_LN2 0.69314718055994530942f /* log_e 2 */ +#endif + +#ifndef M_LN10 + #define M_LN10 2.30258509299404568402f /* log_e 10 */ +#endif + +#ifndef BUFFER_READ_WRITE + #define BUFFER_READ_WRITE 1 + +#define MINMAX_TYPE int + + +inline uchar clij_convert_uchar_sat(float value) { + if (value > 255) { + return 255; + } + if (value < 0) { + return 0; + } + return (uchar)value; +} + + +inline char clij_convert_char_sat(float value) { + if (value > 127) { + return 127; + } + if (value < -128) { + return -128; + } + return (char)value; +} + + +inline ushort clij_convert_ushort_sat(float value) { + if (value > 65535) { + return 65535; + } + if (value < 0) { + return 0; + } + return (ushort)value; +} + + +inline short clij_convert_short_sat(float value) { + if (value > 32767) { + return 32767; + } + if (value < -32768) { + return -32768; + } + return (short)value; +} + +inline uint clij_convert_uint_sat(float value) { + if (value > 4294967295) { + return 4294967295; + } + if (value < 0) { + return 0; + } + return (uint)value; +} + +inline int clij_convert_int_sat(float value) { + if (value > 2147483647) { + return 2147483647; + } + if (value < -2147483648) { + return -2147483648; + } + return (int)value; +} + +inline uint clij_convert_ulong_sat(float value) { + if (value > 18446744073709551615) { + return 18446744073709551615; + } + if (value < 0) { + return 0; + } + return (ulong)value; +} + +inline int clij_convert_long_sat(float value) { + if (value > 9223372036854775807) { + return 9223372036854775807; + } + if (value < -9223372036854775808 ) { + return -9223372036854775808 ; + } + return (long)value; +} + +inline float clij_convert_float_sat(float value) { + return value; +} + +#define READ_IMAGE(a,b,c) READ_ ## a ## _IMAGE(a,b,c) +#define WRITE_IMAGE(a,b,c) WRITE_ ## a ## _IMAGE(a,b,c) \ No newline at end of file diff --git a/preamble/preamble_def.cu b/preamble/preamble_def.cu new file mode 100644 index 0000000..c18c56f --- /dev/null +++ b/preamble/preamble_def.cu @@ -0,0 +1,167 @@ +#define MINMAX_TYPE int +#define sampler_t int + +#define FLT_MIN 1.19209e-07 +#define FLT_MAX 1e+37 +#define MAX_ARRAY_SIZE 1000 + +#define uchar unsigned char +#define ushort unsigned short +#define uint unsigned int +#define ulong unsigned long + +__device__ inline int2 operator+(int2 a, int2 b) +{ + return make_int2(a.x + b.x, a.y + b.y); +} + +__device__ inline int4 operator+(int4 a, int4 b) +{ + return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +} + +__device__ inline int2 operator*(int b, int2 a) +{ + return make_int2(b * a.x, b * a.y); +} + +__device__ inline int4 operator*(int b, int4 a) +{ + return make_int4(b * a.x, b * a.y, b * a.z, b * a.w); +} + +__device__ inline float pow(float x, int y) { + return pow(float(x), float(y)); +} + +__device__ inline float2 sqrt(float2 a) { + return make_float2(sqrt(a.x), sqrt(a.y)); +} + +__device__ inline float4 cross(float4 a, float4 b) +{ + return make_float4(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x, 0); +} + +__device__ inline float dot(float4 a, float4 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +} + +__device__ inline float length(float4 v) +{ + return sqrtf(dot(v, v)); +} + +__device__ inline unsigned int atomic_add(unsigned int* address, unsigned int value) { + return atomicAdd(address, value); +} + +__device__ inline uchar clij_convert_uchar_sat(float value) { + if (value > 255) { + return 255; + } + if (value < 0) { + return 0; + } + return (uchar)value; +} + +__device__ inline char clij_convert_char_sat(float value) { + if (value > 127) { + return 127; + } + if (value < -128) { + return -128; + } + return (char)value; +} + +__device__ inline ushort clij_convert_ushort_sat(float value) { + if (value > 65535) { + return 65535; + } + if (value < 0) { + return 0; + } + return (ushort)value; +} + +__device__ inline short clij_convert_short_sat(float value) { + if (value > 32767) { + return 32767; + } + if (value < -32768) { + return -32768; + } + return (short)value; +} + +__device__ inline uint clij_convert_uint_sat(float value) { + if (value > 4294967295) { + return 4294967295; + } + if (value < 0) { + return 0; + } + return (uint)value; +} + +__device__ inline uint convert_uint_sat(float value) { + if (value > 4294967295) { + return 4294967295; + } + if (value < 0) { + return 0; + } + return (uint)value; +} + +__device__ inline int clij_convert_int_sat(float value) { + if (value > 2147483647) { + return 2147483647; + } + if (value < -2147483648) { + return -2147483648; + } + return (int)value; +} + +__device__ inline uint clij_convert_ulong_sat(float value) { + if (value > 18446744073709551615) { + return 18446744073709551615; + } + if (value < 0) { + return 0; + } + return (ulong)value; +} + +__device__ inline int clij_convert_long_sat(float value) { + if (value > 9223372036854775807) { + return 9223372036854775807; + } + if (value < -9223372036854775808 ) { + return -9223372036854775808 ; + } + return (long)value; +} + +__device__ inline float clij_convert_float_sat(float value) { + return value; +} + +#define READ_IMAGE(a,b,c) READ_ ## a ## _IMAGE(a,b,c) +#define WRITE_IMAGE(a,b,c) WRITE_ ## a ## _IMAGE(a,b,c) + +#define GET_IMAGE_WIDTH(image_key) IMAGE_SIZE_ ## image_key ## _WIDTH +#define GET_IMAGE_HEIGHT(image_key) IMAGE_SIZE_ ## image_key ## _HEIGHT +#define GET_IMAGE_DEPTH(image_key) IMAGE_SIZE_ ## image_key ## _DEPTH + +#define CLK_NORMALIZED_COORDS_FALSE 1 +#define CLK_ADDRESS_CLAMP_TO_EDGE 2 +#define CLK_FILTER_NEAREST 4 +#define CLK_NORMALIZED_COORDS_TRUE 8 +#define CLK_ADDRESS_CLAMP 16 +#define CLK_FILTER_LINEAR 32 +#define CLK_ADDRESS_NONE 64 \ No newline at end of file diff --git a/preamble/preamble_io.cl b/preamble/preamble_io.cl new file mode 100644 index 0000000..afdad63 --- /dev/null +++ b/preamble/preamble_io.cl @@ -0,0 +1,74 @@ +inline {pixel_type}2 read_buffer3d{short_pixel_type}(int read_buffer_width, int read_buffer_height, int read_buffer_depth, __global {pixel_type} * buffer_var, sampler_t sampler, int4 position ) +{ + int4 pos = (int4){position.x, position.y, position.z, 0}; + if (true) { // if (CLK_ADDRESS_CLAMP_TO_EDGE & sampler) { + pos.x = max((MINMAX_TYPE)pos.x, (MINMAX_TYPE)0); + pos.y = max((MINMAX_TYPE)pos.y, (MINMAX_TYPE)0); + pos.z = max((MINMAX_TYPE)pos.z, (MINMAX_TYPE)0); + pos.x = min((MINMAX_TYPE)pos.x, (MINMAX_TYPE)read_buffer_width - 1); + pos.y = min((MINMAX_TYPE)pos.y, (MINMAX_TYPE)read_buffer_height - 1); + pos.z = min((MINMAX_TYPE)pos.z, (MINMAX_TYPE)read_buffer_depth - 1); + } + int pos_in_buffer = pos.x + pos.y * read_buffer_width + pos.z * read_buffer_width * read_buffer_height; + if (pos.x < 0 || pos.x >= read_buffer_width || pos.y < 0 || pos.y >= read_buffer_height || pos.z < 0 || pos.z >= read_buffer_depth) { + return ({pixel_type}2){0, 0}; + } + return ({pixel_type}2){buffer_var[pos_in_buffer],0}; +} + +inline void write_buffer3d{short_pixel_type}(int write_buffer_width, int write_buffer_height, int write_buffer_depth, __global {pixel_type} * buffer_var, int4 pos, short value ) +{ + int pos_in_buffer = pos.x + pos.y * write_buffer_width + pos.z * write_buffer_width * write_buffer_height; + if (pos.x < 0 || pos.x >= write_buffer_width || pos.y < 0 || pos.y >= write_buffer_height || pos.z < 0 || pos.z >= write_buffer_depth) { + return; + } + buffer_var[pos_in_buffer] = value; +} + +inline {pixel_type}2 read_buffer2d{short_pixel_type}(int read_buffer_width, int read_buffer_height, int read_buffer_depth, __global {pixel_type} * buffer_var, sampler_t sampler, int2 position ) +{ + int2 pos = (int2){position.x, position.y}; + if (true) { // if (CLK_ADDRESS_CLAMP_TO_EDGE & sampler) { + pos.x = max((MINMAX_TYPE)pos.x, (MINMAX_TYPE)0); + pos.y = max((MINMAX_TYPE)pos.y, (MINMAX_TYPE)0); + pos.x = min((MINMAX_TYPE)pos.x, (MINMAX_TYPE)read_buffer_width - 1); + pos.y = min((MINMAX_TYPE)pos.y, (MINMAX_TYPE)read_buffer_height - 1); + } + int pos_in_buffer = pos.x + pos.y * read_buffer_width; + if (pos.x < 0 || pos.x >= read_buffer_width || pos.y < 0 || pos.y >= read_buffer_height) { + return ({pixel_type}2){0, 0}; + } + return ({pixel_type}2){buffer_var[pos_in_buffer],0}; +} + +inline void write_buffer2d{short_pixel_type}(int write_buffer_width, int write_buffer_height, int write_buffer_depth, __global {pixel_type} * buffer_var, int2 pos, ushort value ) +{ + int pos_in_buffer = pos.x + pos.y * write_buffer_width; + if (pos.x < 0 || pos.x >= write_buffer_width || pos.y < 0 || pos.y >= write_buffer_height) { + return; + } + buffer_var[pos_in_buffer] = value; +} + +inline {pixel_type}2 read_buffer1d{short_pixel_type}(int read_buffer_width, int read_buffer_height, int read_buffer_depth, __global {pixel_type} * buffer_var, sampler_t sampler, int position ) +{ + int pos = (int){position}; + if (true) { // if (CLK_ADDRESS_CLAMP_TO_EDGE & sampler) { + pos = max((MINMAX_TYPE)pos, (MINMAX_TYPE)0); + pos = min((MINMAX_TYPE)pos, (MINMAX_TYPE)read_buffer_width - 1); + } + int pos_in_buffer = pos; + if (pos < 0 || pos >= read_buffer_width) { + return ({pixel_type}2){0,0}; + } + return ({pixel_type}2){buffer_var[pos_in_buffer],0}; +} + +inline void write_buffer1d{short_pixel_type}(int write_buffer_width, int write_buffer_height, int write_buffer_depth, __global {pixel_type} * buffer_var, int pos, short value ) +{ + int pos_in_buffer = pos; + if (pos < 0 || pos >= write_buffer_width) { + return; + } + buffer_var[pos_in_buffer] = value; +} \ No newline at end of file diff --git a/preamble/preamble_io.cu b/preamble/preamble_io.cu new file mode 100644 index 0000000..bf70e02 --- /dev/null +++ b/preamble/preamble_io.cu @@ -0,0 +1,76 @@ +__device__ inline {pixel_type}2 read_buffer3d{short_pixel_type}(int read_buffer_width, int read_buffer_height, int read_buffer_depth, {pixel_type} * buffer_var, int sampler, int4 position ) +{ + int4 pos = make_int4(position.x, position.y, position.z, 0); + + pos.x = max((MINMAX_TYPE)pos.x, (MINMAX_TYPE)0); + pos.y = max((MINMAX_TYPE)pos.y, (MINMAX_TYPE)0); + pos.z = max((MINMAX_TYPE)pos.z, (MINMAX_TYPE)0); + pos.x = min((MINMAX_TYPE)pos.x, (MINMAX_TYPE)read_buffer_width - 1); + pos.y = min((MINMAX_TYPE)pos.y, (MINMAX_TYPE)read_buffer_height - 1); + pos.z = min((MINMAX_TYPE)pos.z, (MINMAX_TYPE)read_buffer_depth - 1); + + int pos_in_buffer = pos.x + pos.y * read_buffer_width + pos.z * read_buffer_width * read_buffer_height; + if (pos.x < 0 || pos.x >= read_buffer_width || pos.y < 0 || pos.y >= read_buffer_height || pos.z < 0 || pos.z >= read_buffer_depth) { + return make_{pixel_type}2(0, 0); + } + return make_{pixel_type}2(buffer_var[pos_in_buffer],0); +} + +__device__ inline void write_buffer3d{short_pixel_type}(int write_buffer_width, int write_buffer_height, int write_buffer_depth, {pixel_type} * buffer_var, int4 pos, float value ) +{ + int pos_in_buffer = pos.x + pos.y * write_buffer_width + pos.z * write_buffer_width * write_buffer_height; + if (pos.x < 0 || pos.x >= write_buffer_width || pos.y < 0 || pos.y >= write_buffer_height || pos.z < 0 || pos.z >= write_buffer_depth) { + return; + } + buffer_var[pos_in_buffer] = value; +} + +__device__ inline {pixel_type}2 read_buffer2d{short_pixel_type}(int read_buffer_width, int read_buffer_height, int read_buffer_depth, {pixel_type} * buffer_var, int sampler, int2 position ) +{ + int4 pos = make_int4(position.x, position.y, 0, 0); + + pos.x = max((MINMAX_TYPE)pos.x, (MINMAX_TYPE)0); + pos.y = max((MINMAX_TYPE)pos.y, (MINMAX_TYPE)0); + pos.z = max((MINMAX_TYPE)pos.z, (MINMAX_TYPE)0); + pos.x = min((MINMAX_TYPE)pos.x, (MINMAX_TYPE)read_buffer_width - 1); + pos.y = min((MINMAX_TYPE)pos.y, (MINMAX_TYPE)read_buffer_height - 1); + pos.z = min((MINMAX_TYPE)pos.z, (MINMAX_TYPE)read_buffer_depth - 1); + + int pos_in_buffer = pos.x + pos.y * read_buffer_width; + if (pos.x < 0 || pos.x >= read_buffer_width || pos.y < 0 || pos.y >= read_buffer_height) { + return make_{pixel_type}2(0, 0); + } + return make_{pixel_type}2(buffer_var[pos_in_buffer],0); +} + +__device__ inline void write_buffer2d{short_pixel_type}(int write_buffer_width, int write_buffer_height, int write_buffer_depth, {pixel_type} * buffer_var, int2 pos, float value ) +{ + int pos_in_buffer = pos.x + pos.y * write_buffer_width; + if (pos.x < 0 || pos.x >= write_buffer_width || pos.y < 0 || pos.y >= write_buffer_height) { + return; + } + buffer_var[pos_in_buffer] = value; +} + +__device__ inline {pixel_type}2 read_buffer1d{short_pixel_type}(int read_buffer_width, int read_buffer_height, int read_buffer_depth, {pixel_type} * buffer_var, int sampler, int position ) +{ + int pos = position; + + pos = max((MINMAX_TYPE)pos, (MINMAX_TYPE)0); + pos = min((MINMAX_TYPE)pos, (MINMAX_TYPE)read_buffer_width - 1); + + int pos_in_buffer = pos; + if (pos < 0 || pos >= read_buffer_width) { + return make_{pixel_type}2(0, 0); + } + return make_{pixel_type}2(buffer_var[pos_in_buffer],0); +} + +__device__ inline void write_buffer1d{short_pixel_type}(int write_buffer_width, int write_buffer_height, int write_buffer_depth, {pixel_type} * buffer_var, int pos, ulong value ) +{ + int pos_in_buffer = pos; + if (pos < 0 || pos >= write_buffer_width) { + return; + } + buffer_var[pos_in_buffer] = value; +} \ No newline at end of file From cc1ac8280caf7231842d230003fdc89fa40a658c Mon Sep 17 00:00:00 2001 From: Stephane Rigaud Date: Tue, 26 Sep 2023 10:41:51 +0200 Subject: [PATCH 2/2] add define guards --- preamble/preamble_def.cl | 18 +++++++----------- preamble/preamble_def.cu | 8 ++++++-- preamble/preamble_io.cl | 8 +++++++- preamble/preamble_io.cu | 8 +++++++- 4 files changed, 27 insertions(+), 15 deletions(-) diff --git a/preamble/preamble_def.cl b/preamble/preamble_def.cl index 81d87d2..848bfd7 100644 --- a/preamble/preamble_def.cl +++ b/preamble/preamble_def.cl @@ -1,6 +1,9 @@ +#ifndef PREAMBLE_DEFINE +#define PREAMBLE_DEFINE + #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable -#pragma OPENCL EXTENSION cl_amd_printf : enable #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable +// #pragma OPENCL EXTENSION cl_amd_printf : enable #ifndef M_PI #define M_PI 3.14159265358979323846f /* pi */ @@ -22,12 +25,6 @@ #define M_LN10 2.30258509299404568402f /* log_e 10 */ #endif -#ifndef BUFFER_READ_WRITE - #define BUFFER_READ_WRITE 1 - -#define MINMAX_TYPE int - - inline uchar clij_convert_uchar_sat(float value) { if (value > 255) { return 255; @@ -38,7 +35,6 @@ inline uchar clij_convert_uchar_sat(float value) { return (uchar)value; } - inline char clij_convert_char_sat(float value) { if (value > 127) { return 127; @@ -49,7 +45,6 @@ inline char clij_convert_char_sat(float value) { return (char)value; } - inline ushort clij_convert_ushort_sat(float value) { if (value > 65535) { return 65535; @@ -60,7 +55,6 @@ inline ushort clij_convert_ushort_sat(float value) { return (ushort)value; } - inline short clij_convert_short_sat(float value) { if (value > 32767) { return 32767; @@ -116,4 +110,6 @@ inline float clij_convert_float_sat(float value) { } #define READ_IMAGE(a,b,c) READ_ ## a ## _IMAGE(a,b,c) -#define WRITE_IMAGE(a,b,c) WRITE_ ## a ## _IMAGE(a,b,c) \ No newline at end of file +#define WRITE_IMAGE(a,b,c) WRITE_ ## a ## _IMAGE(a,b,c) + +#endif // PREAMBLE_DEFINE \ No newline at end of file diff --git a/preamble/preamble_def.cu b/preamble/preamble_def.cu index c18c56f..a416ce1 100644 --- a/preamble/preamble_def.cu +++ b/preamble/preamble_def.cu @@ -1,4 +1,6 @@ -#define MINMAX_TYPE int +#ifndef PREAMBLE_DEFINE +#define PREAMBLE_DEFINE + #define sampler_t int #define FLT_MIN 1.19209e-07 @@ -164,4 +166,6 @@ __device__ inline float clij_convert_float_sat(float value) { #define CLK_NORMALIZED_COORDS_TRUE 8 #define CLK_ADDRESS_CLAMP 16 #define CLK_FILTER_LINEAR 32 -#define CLK_ADDRESS_NONE 64 \ No newline at end of file +#define CLK_ADDRESS_NONE 64 + +#endif // PREAMBLE_DEFINE \ No newline at end of file diff --git a/preamble/preamble_io.cl b/preamble/preamble_io.cl index afdad63..6c29ce7 100644 --- a/preamble/preamble_io.cl +++ b/preamble/preamble_io.cl @@ -1,3 +1,7 @@ +#ifdef BUFFER_READ_WRITE +#define BUFFER_READ_WRITE +#define MINMAX_TYPE int + inline {pixel_type}2 read_buffer3d{short_pixel_type}(int read_buffer_width, int read_buffer_height, int read_buffer_depth, __global {pixel_type} * buffer_var, sampler_t sampler, int4 position ) { int4 pos = (int4){position.x, position.y, position.z, 0}; @@ -71,4 +75,6 @@ inline void write_buffer1d{short_pixel_type}(int write_buffer_width, int write_b return; } buffer_var[pos_in_buffer] = value; -} \ No newline at end of file +} + +#endif // BUFFER_READ_WRITE diff --git a/preamble/preamble_io.cu b/preamble/preamble_io.cu index bf70e02..0c6870b 100644 --- a/preamble/preamble_io.cu +++ b/preamble/preamble_io.cu @@ -1,3 +1,7 @@ +#ifdef BUFFER_READ_WRITE +#define BUFFER_READ_WRITE +#define MINMAX_TYPE int + __device__ inline {pixel_type}2 read_buffer3d{short_pixel_type}(int read_buffer_width, int read_buffer_height, int read_buffer_depth, {pixel_type} * buffer_var, int sampler, int4 position ) { int4 pos = make_int4(position.x, position.y, position.z, 0); @@ -73,4 +77,6 @@ __device__ inline void write_buffer1d{short_pixel_type}(int write_buffer_width, return; } buffer_var[pos_in_buffer] = value; -} \ No newline at end of file +} + +#endif // BUFFER_READ_WRITE \ No newline at end of file