From ab311d3146a61dc7ea911a8ffc98a87089130ad5 Mon Sep 17 00:00:00 2001 From: Jerome Kieffer Date: Fri, 15 Nov 2024 15:53:10 +0100 Subject: [PATCH] Implement cython medfilt --- src/pyFAI/ext/CSR_common.pxi | 246 ++++++++-------------------- src/pyFAI/ext/meson.build | 5 +- src/pyFAI/ext/splitPixelFullCSR.pyx | 4 +- 3 files changed, 77 insertions(+), 178 deletions(-) diff --git a/src/pyFAI/ext/CSR_common.pxi b/src/pyFAI/ext/CSR_common.pxi index 9e862fc83..19ac09980 100644 --- a/src/pyFAI/ext/CSR_common.pxi +++ b/src/pyFAI/ext/CSR_common.pxi @@ -38,6 +38,7 @@ from libcpp cimport bool from libcpp.algorithm cimport sort from cython cimport floating +import os import cython from cython.parallel import prange import numpy @@ -45,18 +46,25 @@ import numpy from .preproc import preproc from ..containers import Integrate1dtpl, Integrate2dtpl, ErrorModel +# cdef Py_ssize_t MAX_THREADS = 8 +# try: +# MAX_THREADS = min(MAX_THREADS, len(os.sched_getaffinity(os.getpid()))) # Limit to the actual number of threads +# except Exception: +# MAX_THREADS = min(MAX_THREADS, os.cpu_count() or 1) -cdef struct float4: + +cdef struct float4_t: float s0 float s1 float s2 float s3 +float4_d = numpy.dtype([('s0','f4'),('s1','f4'),('s2','f4'),('s3','f4')]) -cdef inline bool cmp(float4 a, float4 b) noexcept nogil: +cdef inline bool cmp(float4_t a, float4_t b) noexcept nogil: return True if a.s0) * (b-) - b = sig / norm - delta1 = acc_sig/omega_A - b - acc_sig = acc_sig + coef * sig - delta2 = acc_sig / acc_norm - b - acc_var = acc_var + omega2_B * delta1 * delta2 - acc_count = acc_count + coef * count - else: - acc_sig = acc_sig + coef * sig - acc_var = acc_var + coef * coef * var - acc_norm = acc_norm + w - acc_norm_sq = acc_norm_sq + w*w - acc_count = acc_count + coef * count - - if (acc_norm_sq > 0.0): - aver = acc_sig / acc_norm - std = sqrt(acc_var / acc_norm_sq) - # sem = sqrt(acc_var) / acc_norm - else: - aver = NAN; - std = NAN; - # sem = NAN; - - # cycle for sigma-clipping - for c in range(cycle): - # Sigma-clip - if (acc_norm == 0.0) or (acc_count == 0.0): - break - - #This is for Chauvenet's criterion - acc_count = max(3.0, acc_count) - chauvenet_cutoff = max(cutoff, sqrt(2.0*log(acc_count/sqrt(2.0*M_PI)))) - # Discard outliers - bad_pix = 0 - for j in range(self._indptr[i], self._indptr[i + 1]): - coef = self._data[j] - if coef == 0.0: - continue - idx = self._indices[j] - sig = preproc4[idx, 0] - var = preproc4[idx, 1] - norm = preproc4[idx, 2] - count = preproc4[idx, 3] - if isnan(sig) or isnan(var) or isnan(norm) or isnan(count) or (norm == 0.0) or (count == 0.0): - continue - # Check validity (on cnt, i.e. s3) and normalisation (in s2) value to avoid zero division - x = sig / norm - if fabs(x - aver) > chauvenet_cutoff * std: - preproc4[idx, 3] = NAN; - bad_pix = bad_pix + 1 - if bad_pix == 0: - if do_hybrid_variance: - c = cycle #enforce the leave of the loop - else: - break - - #integrate again - acc_sig = 0.0 - acc_var = 0.0 - acc_norm = 0.0 - acc_norm_sq = 0.0 - acc_count = 0.0 - - for j in range(self._indptr[i], self._indptr[i + 1]): - coef = self._data[j] - if coef == 0.0: - continue - idx = self._indices[j] - sig = preproc4[idx, 0] - var = preproc4[idx, 1] - norm = preproc4[idx, 2] - count = preproc4[idx, 3] - if isnan(sig) or isnan(var) or isnan(norm) or isnan(count) or (norm == 0.0) or (count == 0.0): - continue - w = coef * norm - if do_azimuthal_variance or (do_hybrid_variance and c+1) * (b-) - b = sig / norm - delta1 = acc_sig/omega_A - b - acc_sig = acc_sig + coef * sig - delta2 = acc_sig / acc_norm - b - acc_var = acc_var + omega2_B * delta1 * delta2 - acc_count = acc_count + coef * count - else: - acc_sig = acc_sig + coef * sig - acc_var = acc_var + coef * coef * var - acc_norm = acc_norm + w - acc_norm_sq = acc_norm_sq + w*w - acc_count = acc_count + coef * count - if (acc_norm_sq > 0.0): - aver = acc_sig / acc_norm - std = sqrt(acc_var / acc_norm_sq) - # sem = sqrt(acc_var) / acc_norm # Not needed yet - else: - aver = NAN; - std = NAN; - # sem = NAN; - if bad_pix == 0: - break - + # Duplicate the input data and populate the large work-array + for i in range(npix): # NOT faster in parallel ! + j = self._indices[i] + sig = preproc4[j,0] + var = preproc4[j,1] + nrm = preproc4[j,2] + weight = self._data[i] + element.s0 = sig/nrm # average signal + element.s1 = sig * weight # weighted raw signal + element.s2 = var * weight * weight # weighted raw variance + element.s3 = nrm * weight # weighted raw normalization + work[i] = element + for idx in prange(self.output_size, schedule="guided"): + start = self._indptr[idx] + stop = self._indptr[idx+1] + acc_sig = acc_var = acc_norm = acc_norm_sq = 0.0 + sort_float4(work[start:stop]) + + cumsum = 0.0 + for i in range(start, stop): + cumsum = cumsum + work[i].s3 + work[i].s0 = cumsum + qmin = quant_min * cumsum + qmax = quant_max * cumsum + + element.s0 = 0.0 + for i in range(start, stop): + former_element = element + element = work[i] + if (former_element.s0 >= qmin) and (element.s0 <= qmax): + acc_sig = acc_sig + element.s1 + acc_var = acc_var + element.s2 + acc_norm = acc_norm + element.s3 + acc_norm_sq = acc_norm_sq + element.s3*element.s3 + cnt = cnt + 1.0 #collect things ... - sum_sig[i] = acc_sig - sum_var[i] = acc_var - sum_norm[i] = acc_norm - sum_norm_sq[i] = acc_norm_sq - sum_count[i] = acc_count - if (acc_norm_sq > 0.0): - merged[i] = aver # calculated previously - stda[i] = std # sqrt(acc_var / acc_norm_sq) # already calculated previously - sema[i] = sqrt(acc_var) / acc_norm # not calculated previously since it was not needed + sum_sig[idx] = acc_sig + sum_var[idx] = acc_var + sum_norm[idx] = acc_norm + sum_norm_sq[idx] = acc_norm_sq + sum_count[idx] = cnt + if (acc_norm_sq): + merged[idx] = acc_sig/acc_var + stda[idx] = sqrt(acc_var / acc_norm_sq) + sema[idx] = sqrt(acc_var) / acc_norm else: - merged[i] = empty - stda[i] = empty - sema[i] = empty + merged[idx] = empty + stda[idx] = empty + sema[idx] = empty - #"position intensity error signal variance normalization count" + #"position intensity sigma signal variance normalization count std sem norm_sq" return Integrate1dtpl(self.bin_centers, numpy.asarray(merged),numpy.asarray(sema) , numpy.asarray(sum_sig),numpy.asarray(sum_var), diff --git a/src/pyFAI/ext/meson.build b/src/pyFAI/ext/meson.build index 0e26b8c8e..140e2c87e 100644 --- a/src/pyFAI/ext/meson.build +++ b/src/pyFAI/ext/meson.build @@ -83,8 +83,9 @@ py.extension_module('watershed', 'watershed.pyx', py.extension_module('_tree', '_tree.pyx', dependencies : py_dep, install: true, subdir: 'pyFAI/ext') -py.extension_module('sparse_utils', 'sparse_utils.pyx', #Deprecated - dependencies : py_dep, install: true, subdir: 'pyFAI/ext') +py.extension_module('sparse_utils', 'sparse_utils.pyx', + override_options : ['cython_language=cpp'], + dependencies : [py_dep, omp], install: true, subdir: 'pyFAI/ext') py.extension_module('inpainting', 'inpainting.pyx', dependencies : py_dep, install: true, subdir: 'pyFAI/ext') diff --git a/src/pyFAI/ext/splitPixelFullCSR.pyx b/src/pyFAI/ext/splitPixelFullCSR.pyx index e52d37017..c358f04f0 100644 --- a/src/pyFAI/ext/splitPixelFullCSR.pyx +++ b/src/pyFAI/ext/splitPixelFullCSR.pyx @@ -2,7 +2,7 @@ #cython: embedsignature=True, language_level=3, binding=True #cython: boundscheck=False, wraparound=False, cdivision=True, initializedcheck=False, ## This is for developping -## cython: profile=True, warn.undeclared=True, warn.unused=True, warn.unused_result=False, warn.unused_arg=True +##cython: profile=True, warn.undeclared=True, warn.unused=True, warn.unused_result=False, warn.unused_arg=True # # Project: Fast Azimuthal Integration # https://github.com/silx-kit/pyFAI @@ -35,7 +35,7 @@ Sparse matrix represented using the CompressedSparseRow. __author__ = "Jérôme Kieffer" __contact__ = "Jerome.kieffer@esrf.fr" -__date__ = "04/10/2023" +__date__ = "15/11/2024" __status__ = "stable" __license__ = "MIT"