From cee6674b7b07ace8bd0628a3d20559e0391e3660 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Sun, 26 Feb 2017 20:45:48 +0800 Subject: [PATCH] Allow bypassing gaussian blur by setting sigma to 0 --- TCanny/TCanny.cpp | 57 +++++++++++++++++++++++++++++++++++++++--- TCanny/TCanny_AVX.cpp | 35 ++++++++++++++++++++++++++ TCanny/TCanny_AVX2.cpp | 35 ++++++++++++++++++++++++++ TCanny/TCanny_SSE2.cpp | 35 ++++++++++++++++++++++++++ 4 files changed, 158 insertions(+), 4 deletions(-) diff --git a/TCanny/TCanny.cpp b/TCanny/TCanny.cpp index efa844b..36b4457 100644 --- a/TCanny/TCanny.cpp +++ b/TCanny/TCanny.cpp @@ -35,6 +35,10 @@ #ifdef VS_TARGET_CPU_X86 #include "vectorclass/vectorclass.h" +template extern void copyData_SSE2(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float); +template extern void copyData_AVX(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float); +template extern void copyData_AVX2(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float); + extern void gaussianBlurHorizontal_SSE2(float *, float *, const float *, const int, const int); extern void gaussianBlurHorizontal_AVX(float *, float *, const float *, const int, const int); extern void gaussianBlurHorizontal_AVX2(float *, float *, const float *, const int, const int); @@ -67,6 +71,7 @@ template extern void discretizeGM_AVX2(const float *, T *, const uns static constexpr float M_PIF = 3.14159265358979323846f; static constexpr float M_1_PIF = 0.318309886183790671538f; +template static void (*copyData)(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float); static void (*gaussianBlurHorizontal)(float *, float *, const float *, const int, const int); template static void (*gaussianBlurVertical)(const T *, float *, float *, const float *, const float *, const unsigned, const int, const unsigned, const unsigned, const int, const int, const float); static void (*detectEdge)(float *, float *, float *, const int, const unsigned, const unsigned, const unsigned, const int, const unsigned); @@ -123,6 +128,31 @@ static inline T getBin(const float dir, const unsigned n) noexcept { } } +template +static void copyData_C(const T * srcp, float * VS_RESTRICT blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept { + if (std::is_integral::value) { + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x++) + blur[x] = srcp[x]; + + srcp += stride; + blur += blurStride; + } + } else { + if (offset) { + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x++) + blur[x] = srcp[x] + offset; + + srcp += stride; + blur += blurStride; + } + } else { + vs_bitblt(blur, blurStride * sizeof(float), srcp, stride * sizeof(float), width * sizeof(float), height); + } + } +} + static void gaussianBlurHorizontal_C(float * VS_RESTRICT buffer, float * VS_RESTRICT blur, const float * weights, const int width, const int radius) noexcept { for (int i = 1; i <= radius; i++) { buffer[-i] = buffer[i - 1]; @@ -382,7 +412,10 @@ static void process(const VSFrameRef * src, VSFrameRef * dst, const TCannyData * float * direction = d->direction.at(threadId); uint8_t * label = d->label.at(threadId); - gaussianBlurVertical(srcp, buffer, blur, d->weightsHorizontal[plane], d->weightsVertical[plane], width, height, stride, blurStride, d->radiusHorizontal[plane], d->radiusVertical[plane], d->offset[plane]); + if (d->radiusHorizontal[plane]) + gaussianBlurVertical(srcp, buffer, blur, d->weightsHorizontal[plane], d->weightsVertical[plane], width, height, stride, blurStride, d->radiusHorizontal[plane], d->radiusVertical[plane], d->offset[plane]); + else + copyData(srcp, blur, width, height, stride, blurStride, d->offset[plane]); if (d->mode != -1) detectEdge(blur, gradient, direction, width, height, stride, blurStride, d->mode, d->op); @@ -407,6 +440,10 @@ static void process(const VSFrameRef * src, VSFrameRef * dst, const TCannyData * } static void selectFunctions(const unsigned opt) noexcept { + copyData = copyData_C; + copyData = copyData_C; + copyData = copyData_C; + gaussianBlurHorizontal = gaussianBlurHorizontal_C; gaussianBlurVertical = gaussianBlurVertical_C; @@ -432,6 +469,10 @@ static void selectFunctions(const unsigned opt) noexcept { #ifdef VS_TARGET_CPU_X86 const int iset = instrset_detect(); if (opt == 4 || (opt == 0 && iset >= 8)) { + copyData = copyData_AVX2; + copyData = copyData_AVX2; + copyData = copyData_AVX2; + gaussianBlurHorizontal = gaussianBlurHorizontal_AVX2; gaussianBlurVertical = gaussianBlurVertical_AVX2; @@ -454,6 +495,10 @@ static void selectFunctions(const unsigned opt) noexcept { discretizeGM = discretizeGM_AVX2; discretizeGM = discretizeGM_AVX2; } else if (opt == 3 || (opt == 0 && iset == 7)) { + copyData = copyData_AVX; + copyData = copyData_AVX; + copyData = copyData_AVX; + gaussianBlurHorizontal = gaussianBlurHorizontal_AVX; gaussianBlurVertical = gaussianBlurVertical_AVX; @@ -476,6 +521,10 @@ static void selectFunctions(const unsigned opt) noexcept { discretizeGM = discretizeGM_AVX; discretizeGM = discretizeGM_AVX; } else if (opt == 2 || (opt == 0 && iset >= 2)) { + copyData = copyData_SSE2; + copyData = copyData_SSE2; + copyData = copyData_SSE2; + gaussianBlurHorizontal = gaussianBlurHorizontal_SSE2; gaussianBlurVertical = gaussianBlurVertical_SSE2; @@ -675,8 +724,8 @@ static void VS_CC tcannyCreate(const VSMap *in, VSMap *out, void *userData, VSCo const int opt = int64ToIntS(vsapi->propGetInt(in, "opt", 0, &err)); for (int i = 0; i < 3; i++) { - if (sigmaHorizontal[i] <= 0.f) - throw std::string { "sigma must be greater than 0.0" }; + if (sigmaHorizontal[i] < 0.f) + throw std::string { "sigma must be greater than or equal to 0.0" }; } if (d->t_l >= d->t_h) @@ -736,7 +785,7 @@ static void VS_CC tcannyCreate(const VSMap *in, VSMap *out, void *userData, VSCo } for (int plane = 0; plane < d->vi->format->numPlanes; plane++) { - if (d->process[plane]) { + if (d->process[plane] && sigmaHorizontal[plane]) { d->weightsHorizontal[plane] = gaussianWeights(sigmaHorizontal[plane], &d->radiusHorizontal[plane]); d->weightsVertical[plane] = gaussianWeights(sigmaVertical[plane], &d->radiusVertical[plane]); if (!d->weightsHorizontal[plane] || !d->weightsVertical[plane]) diff --git a/TCanny/TCanny_AVX.cpp b/TCanny/TCanny_AVX.cpp index 951688c..711a567 100644 --- a/TCanny/TCanny_AVX.cpp +++ b/TCanny/TCanny_AVX.cpp @@ -10,6 +10,41 @@ static constexpr float M_PIF = 3.14159265358979323846f; static constexpr float M_1_PIF = 0.318309886183790671538f; +template void copyData_AVX(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float) noexcept; + +template<> +void copyData_AVX(const uint8_t * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept { + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x += 8) + to_float(Vec8i().load_8uc(srcp + x)).stream(blur + x); + + srcp += stride; + blur += blurStride; + } +} + +template<> +void copyData_AVX(const uint16_t * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept { + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x += 8) + to_float(Vec8i().load_8us(srcp + x)).stream(blur + x); + + srcp += stride; + blur += blurStride; + } +} + +template<> +void copyData_AVX(const float * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept { + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x += 8) + (Vec8f().load_a(srcp + x) + offset).stream(blur + x); + + srcp += stride; + blur += blurStride; + } +} + void gaussianBlurHorizontal_AVX(float * buffer, float * blur, const float * weights, const int width, const int radius) noexcept { for (int i = 1; i <= radius; i++) { buffer[-i] = buffer[i - 1]; diff --git a/TCanny/TCanny_AVX2.cpp b/TCanny/TCanny_AVX2.cpp index 3435d40..63bacfa 100644 --- a/TCanny/TCanny_AVX2.cpp +++ b/TCanny/TCanny_AVX2.cpp @@ -10,6 +10,41 @@ static constexpr float M_PIF = 3.14159265358979323846f; static constexpr float M_1_PIF = 0.318309886183790671538f; +template void copyData_AVX2(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float) noexcept; + +template<> +void copyData_AVX2(const uint8_t * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept { + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x += 8) + to_float(Vec8i().load_8uc(srcp + x)).stream(blur + x); + + srcp += stride; + blur += blurStride; + } +} + +template<> +void copyData_AVX2(const uint16_t * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept { + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x += 8) + to_float(Vec8i().load_8us(srcp + x)).stream(blur + x); + + srcp += stride; + blur += blurStride; + } +} + +template<> +void copyData_AVX2(const float * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept { + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x += 8) + (Vec8f().load_a(srcp + x) + offset).stream(blur + x); + + srcp += stride; + blur += blurStride; + } +} + void gaussianBlurHorizontal_AVX2(float * buffer, float * blur, const float * weights, const int width, const int radius) noexcept { for (int i = 1; i <= radius; i++) { buffer[-i] = buffer[i - 1]; diff --git a/TCanny/TCanny_SSE2.cpp b/TCanny/TCanny_SSE2.cpp index 72a9836..6483ded 100644 --- a/TCanny/TCanny_SSE2.cpp +++ b/TCanny/TCanny_SSE2.cpp @@ -6,6 +6,41 @@ static constexpr float M_PIF = 3.14159265358979323846f; static constexpr float M_1_PIF = 0.318309886183790671538f; +template void copyData_SSE2(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float) noexcept; + +template<> +void copyData_SSE2(const uint8_t * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept { + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x += 4) + to_float(Vec4i().load_4uc(srcp + x)).stream(blur + x); + + srcp += stride; + blur += blurStride; + } +} + +template<> +void copyData_SSE2(const uint16_t * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept { + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x += 4) + to_float(Vec4i().load_4us(srcp + x)).stream(blur + x); + + srcp += stride; + blur += blurStride; + } +} + +template<> +void copyData_SSE2(const float * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept { + for (unsigned y = 0; y < height; y++) { + for (unsigned x = 0; x < width; x += 4) + (Vec4f().load_a(srcp + x) + offset).stream(blur + x); + + srcp += stride; + blur += blurStride; + } +} + void gaussianBlurHorizontal_SSE2(float * buffer, float * blur, const float * weights, const int width, const int radius) noexcept { for (int i = 1; i <= radius; i++) { buffer[-i] = buffer[i - 1];