Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Medfilt in OpenCL #2342

Merged
merged 27 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
16a0b1e
Start implementing medfilt in OpenCL
kif Nov 20, 2024
f333557
Merge branch 'main' into 2313_medfilt_ng_opencl
kif Nov 21, 2024
7b71711
Change organisation of grid: collective work is always along dim 0
kif Nov 21, 2024
9a8d7ae
Merge branch '2313_medfilt_ng_opencl' of https://github.com/kif/pyFAI…
kif Nov 21, 2024
438d4a3
WIP
kif Nov 22, 2024
7c737c6
Untested medfilt kernel
kif Nov 25, 2024
f66755a
Code compiles (under nvidia)
kif Nov 26, 2024
06225a7
Code is integrated, but wrong for now...
kif Dec 4, 2024
92ac57f
Fix kernel for numerical precision
kif Dec 4, 2024
df1ba16
Going from 32->256 threads/WG gains 20% in speed
kif Dec 5, 2024
df29436
Remains at least a bug since one fragment of a pixel is missing
kif Dec 5, 2024
7b6300f
Fix upper limit noise
kif Dec 5, 2024
3ad9055
fix test
kif Dec 5, 2024
ebb7138
Add test and ensure engine compatibility
kif Dec 5, 2024
9a3ff3d
Implement `medfilt` into AzimuthalIntegrator
kif Dec 5, 2024
7cfa9ff
Handle the case "median filter"
kif Dec 5, 2024
9d7e7d9
Try to mitigate noisy test
kif Dec 5, 2024
524bd04
Fix OpenCL median filtering
kif Dec 6, 2024
fe1b0b8
Debug parallel implementation on CPU/Intel driver
kif Dec 6, 2024
af92780
Merge branch 'main' into 2313_medfilt_ng_opencl
kif Dec 7, 2024
b23d02a
fix code execution with Intel CPU driver
kif Dec 7, 2024
56f6928
Discard pixel without weight
kif Dec 7, 2024
c9e2569
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 7, 2024
60fa0f4
Typo
kif Dec 7, 2024
b225d11
some documentation on medfilt
kif Dec 8, 2024
2dcb18d
Merge remote-tracking branch 'kif/2313_medfilt_ng_opencl' into 2313_m…
kif Dec 8, 2024
ce279ea
re-run the benchmark on 32-CPU core, A5000 GPU
kif Dec 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
717 changes: 717 additions & 0 deletions doc/source/usage/tutorial/AzimuthalFilter.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions doc/source/usage/tutorial/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ a good Python fluency and to a certain extent, of the pyFAI library.
Flatfield
Ellipse/ellipse
PixelSplitting
AzimuthalFilter
Variance/uncertainties
LogScale/Guinier
multi-geometry
Expand Down
2 changes: 1 addition & 1 deletion src/pyFAI/detectors/_others.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self, pixel1=15e-6, pixel2=15e-6, max_shape=None, orientation=0):
Detector.__init__(self, pixel1=pixel1, pixel2=pixel2, max_shape=max_shape, orientation=orientation)

def __repr__(self):
return f"Detector {self.name}%s\t PixelSize= {to_eng(self._pixel1)}m, {to_eng(self._pixel2)}m"
return f"Detector {self.name}\t PixelSize= {to_eng(self._pixel1)}m, {to_eng(self._pixel2)}m"

def get_config(self):
"""Return the configuration with arguments to the constructor
Expand Down
6 changes: 4 additions & 2 deletions src/pyFAI/ext/CSR_common.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

__author__ = "Jérôme Kieffer"
__contact__ = "[email protected]"
__date__ = "19/11/2024"
__date__ = "05/12/2024"
__status__ = "stable"
__license__ = "MIT"

Expand Down Expand Up @@ -859,7 +859,9 @@ cdef class CsrIntegrator(object):
for i in range(start, stop):
former_element = element
element = work[i]
if (qmin<=former_element.s0) and (element.s0 <= qmax):
if ((element.s3!=0) and
(((qmin<=former_element.s0) and (element.s0 <= qmax)) or
((qmin>=former_element.s0) and (element.s0 >= qmax)))): #specific case where qmin==qmax
acc_sig = acc_sig + element.s1
acc_var = acc_var + element.s2
acc_norm = acc_norm + element.s3
Expand Down
306 changes: 303 additions & 3 deletions src/pyFAI/integrator/azimuthal.py

Large diffs are not rendered by default.

261 changes: 253 additions & 8 deletions src/pyFAI/opencl/azim_csr.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions src/pyFAI/opencl/test/test_collective.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
__contact__ = "[email protected]"
__license__ = "MIT"
__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
__date__ = "12/11/2024"
__date__ = "21/11/2024"

import logging
import numpy
Expand Down Expand Up @@ -250,7 +250,7 @@ def test_sort(self):
data_d = pyopencl.array.to_device(self.queue, data)
# print(ref.shape, (ref.shape[0],min(wg, self.max_valid_wg)), (1, min(wg, self.max_valid_wg)), positions)
try:
evt = self.program.test_combsort_float(self.queue, (ref.shape[0],min(wg, self.max_valid_wg)), (1, min(wg, self.max_valid_wg)),
evt = self.program.test_combsort_float(self.queue, (min(wg, self.max_valid_wg), ref.shape[0]), (min(wg, self.max_valid_wg), 1),
data_d.data,
positions_d.data,
pyopencl.LocalMemory(4*min(wg, self.max_valid_wg)))
Expand Down Expand Up @@ -290,7 +290,7 @@ def test_sort4(self):
data_d = pyopencl.array.to_device(self.queue, data)
# print(ref.shape, (ref.shape[0],min(wg, self.max_valid_wg)), (1, min(wg, self.max_valid_wg)), positions)
try:
evt = self.program.test_combsort_float4(self.queue, (ref.shape[0],min(wg, self.max_valid_wg)), (1, min(wg, self.max_valid_wg)),
evt = self.program.test_combsort_float4(self.queue, (min(wg, self.max_valid_wg), ref.shape[0]), (min(wg, self.max_valid_wg),1),
data_d.data,
positions_d.data,
pyopencl.LocalMemory(4*min(wg, self.max_valid_wg)))
Expand Down
64 changes: 44 additions & 20 deletions src/pyFAI/opencl/test/test_ocl_azim_csr.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
__contact__ = "[email protected]"
__license__ = "MIT"
__copyright__ = "2019-2021 European Synchrotron Radiation Facility, Grenoble, France"
__date__ = "28/06/2022"
__date__ = "06/12/2024"

import logging
import numpy
Expand All @@ -43,10 +43,9 @@
if ocl:
import pyopencl.array
from ...test.utilstest import UtilsTest
from silx.opencl.common import _measure_workgroup_size
from ...integrator.azimuthal import AzimuthalIntegrator
from ...method_registry import IntegrationMethod
from scipy.ndimage import gaussian_filter1d
from ...containers import ErrorModel
logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -81,10 +80,12 @@ def tearDownClass(cls):
cls.ai = None

@unittest.skipUnless(ocl, "pyopencl is missing")
def integrate_ng(self, block_size=None):
def integrate_ng(self, block_size=None, method_called="integrate_ng", extra=None):
"""
tests the 1d histogram kernel, with variable workgroup size
"""
if extra is None:
extra={}
from ..azim_csr import OCL_CSR_Integrator
data = numpy.ones(self.ai.detector.shape)
npt = 500
Expand All @@ -95,14 +96,14 @@ def integrate_ng(self, block_size=None):
dim=1, default=None, degradable=False)

# Retrieve the CSR array
cpu_integrate = self.ai._integrate1d_legacy(data, npt, unit=unit, method=csr_method)
cpu_integrate = self.ai._integrate1d_ng(data, npt, unit=unit, method=csr_method)
r_m = cpu_integrate[0]
csr_engine = list(self.ai.engines.values())[0]
csr = csr_engine.engine.lut
ref = self.ai._integrate1d_ng(data, npt, unit=unit, method=method)
integrator = OCL_CSR_Integrator(csr, data.size, block_size=block_size)
ref = self.ai._integrate1d_ng(data, npt, unit=unit, method=method, error_model="poisson")
integrator = OCL_CSR_Integrator(csr, data.size, block_size=block_size, empty=-1)
solidangle = self.ai.solidAngleArray()
res = integrator.integrate_ng(data, solidangle=solidangle)
res = integrator.__getattribute__(method_called)(data, solidangle=solidangle, error_model=ErrorModel.POISSON, **extra)
# for info, res contains: position intensity error signal variance normalization count

# Start with smth easy: the position
Expand All @@ -112,23 +113,32 @@ def integrate_ng(self, block_size=None):
if "AMD" in integrator.ctx.devices[0].platform.name:
logger.warning("This test is known to be complicated for AMD-GPU, relax the constrains for them")
else:
self.assertLessEqual(delta.max(), 1, "counts are almost the same")
self.assertEqual(delta.sum(), 0, "as much + and -")
if method_called=="integrate_ng":
self.assertLessEqual(delta.max(), 1, "counts are almost the same")
self.assertEqual(delta.sum(), 0, "as much + and -")
elif method_called=="medfilt":
pix = csr[2][1:]-csr[2][:-1]
self.assertTrue(numpy.allclose(res.count, pix), "all pixels have been counted")

# Intensities are not that different:
delta = ref.intensity - res.intensity
self.assertLessEqual(abs(delta.max()), 1e-5, "intensity is almost the same")

# histogram of normalization
ref = self.ai._integrate1d_ng(solidangle, npt, unit=unit, method=method).sum_signal
sig = res.normalization
err = abs((sig - ref).max())
self.assertLess(err, 5e-4, "normalization content is the same: %s<5e-5" % (err))
# print(ref.sum_normalization)
# print(res.normalization)
err = abs((res.normalization - ref.sum_normalization))
# print(err)
self.assertLess(err.max(), 5e-4, "normalization content is the same: %s<5e-5" % (err.max))

# histogram of signal
ref = self.ai._integrate1d_ng(data, npt, unit=unit, method=method).sum_signal
sig = res.signal
self.assertLess(abs((sig - ref).sum()), 5e-5, "signal content is the same")
self.assertLess(abs((res.signal - ref.sum_signal)).max(), 5e-5, "signal content is the same")

# histogram of variance
self.assertLess(abs((res.variance - ref.sum_variance)).max(), 5e-5, "signal content is the same")

# Intensities are not that different:
delta = ref.intensity - res.intensity
# print(delta)
self.assertLessEqual(abs(delta).max(), 1e-5, "intensity is almost the same")


@unittest.skipUnless(ocl, "pyopencl is missing")
def test_integrate_ng(self):
Expand All @@ -144,6 +154,20 @@ def test_integrate_ng_single(self):
"""
self.integrate_ng(block_size=1)

@unittest.skipUnless(ocl, "pyopencl is missing")
def test_sigma_clip(self):
"""
tests the sigma-clipping kernel, default block size
"""
self.integrate_ng(None, "sigma_clip",{"cutoff":100.0, "cycle":0,})

@unittest.skipUnless(ocl, "pyopencl is missing")
def test_medfilt(self):
"""
tests the median filtering kernel, default block size
"""
self.integrate_ng(None, "medfilt", {"quant_min":0, "quant_max":1})


def suite():
loader = unittest.defaultTestLoader.loadTestsFromTestCase
Expand Down
22 changes: 10 additions & 12 deletions src/pyFAI/resources/openCL/collective/comb_sort.cl
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@ int inline first_step(int step, int size, float ratio)
{
while (step<size)
step=next_step(step, ratio);
// if (get_local_id(0) == 0) printf("+ %d %d\n", step, size);

while (step>=size)
step=previous_step(step, ratio);
// if (get_local_id(0) == 0) printf("- %d %d\n", step, size);
return step;
}

Expand Down Expand Up @@ -61,8 +59,8 @@ int passe(global volatile float* elements,
int step,
local int* shared)
{
int wg = get_local_size(1);
int tid = get_local_id(1);
int wg = get_local_size(0);
int tid = get_local_id(0);
int cnt = 0;
int i, j, k;
barrier(CLK_GLOBAL_MEM_FENCE);
Expand Down Expand Up @@ -120,8 +118,8 @@ int passe_float4(global volatile float4* elements,
int step,
local int* shared)
{
int wg = get_local_size(1);
int tid = get_local_id(1);
int wg = get_local_size(0);
int tid = get_local_id(0);
int cnt = 0;
int i, j, k;

Expand Down Expand Up @@ -171,14 +169,14 @@ int passe_float4(global volatile float4* elements,
return 0;
}

// workgroup: (1, wg)
// grid: (nb_lines, wg)
// workgroup: (wg, 1)
// grid: (wg, nb_lines)
// shared: wg*sizeof(int)
kernel void test_combsort_float(global volatile float* elements,
global int* positions,
local int* shared)
{
int gid = get_group_id(0);
int gid = get_group_id(1);
int step = 11; // magic value
float ratio=1.3f; // magic value
int cnt;
Expand All @@ -202,14 +200,14 @@ kernel void test_combsort_float(global volatile float* elements,

}

// workgroup: (1, wg)
// grid: (nb_lines, wg)
// workgroup: (wg, 1)
// grid: (wg, nb_lines)
// shared: wg*sizeof(int)
kernel void test_combsort_float4(global volatile float4* elements,
global int* positions,
local int* shared)
{
int gid = get_group_id(0);
int gid = get_group_id(1);
int step = 11; // magic value
float ratio=1.3f; // magic value
int cnt;
Expand Down
8 changes: 6 additions & 2 deletions src/pyFAI/resources/openCL/collective/reduction.cl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ int inline sum_int_reduction(local int* shared)
shared[tid] += shared[tid+stride];
}
barrier(CLK_LOCAL_MEM_FENCE);
return shared[0];
int res = shared[0];
barrier(CLK_LOCAL_MEM_FENCE);
return res;
}

/* sum all elements in a shared memory, same size as the workgroup size 0
Expand All @@ -35,7 +37,9 @@ int inline sum_int_atomic(local int* shared)
if (tid)
atomic_add(shared, value);
barrier(CLK_LOCAL_MEM_FENCE);
return shared[0];
int res = shared[0];
barrier(CLK_LOCAL_MEM_FENCE);
return res;
}

/*
Expand Down
2 changes: 1 addition & 1 deletion src/pyFAI/resources/openCL/collective/scan.cl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

/*
* Cumsum all elements in a shared memory
* Cumsum all elements in a shared memory along dim 0
*
* Nota: the first wg-size elements, are used.
* The shared buffer needs to be twice this size of the workgroup
Expand Down
Loading
Loading