Skip to content

Commit

Permalink
Allow bypassing gaussian blur by setting sigma to 0
Browse files Browse the repository at this point in the history
  • Loading branch information
HolyWu committed Feb 26, 2017
1 parent ce6f6d9 commit cee6674
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 4 deletions.
57 changes: 53 additions & 4 deletions TCanny/TCanny.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
#ifdef VS_TARGET_CPU_X86
#include "vectorclass/vectorclass.h"

template<typename T> extern void copyData_SSE2(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float);
template<typename T> extern void copyData_AVX(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float);
template<typename T> extern void copyData_AVX2(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float);

extern void gaussianBlurHorizontal_SSE2(float *, float *, const float *, const int, const int);
extern void gaussianBlurHorizontal_AVX(float *, float *, const float *, const int, const int);
extern void gaussianBlurHorizontal_AVX2(float *, float *, const float *, const int, const int);
Expand Down Expand Up @@ -67,6 +71,7 @@ template<typename T> extern void discretizeGM_AVX2(const float *, T *, const uns
static constexpr float M_PIF = 3.14159265358979323846f;
static constexpr float M_1_PIF = 0.318309886183790671538f;

template<typename T> static void (*copyData)(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float);
static void (*gaussianBlurHorizontal)(float *, float *, const float *, const int, const int);
template<typename T> static void (*gaussianBlurVertical)(const T *, float *, float *, const float *, const float *, const unsigned, const int, const unsigned, const unsigned, const int, const int, const float);
static void (*detectEdge)(float *, float *, float *, const int, const unsigned, const unsigned, const unsigned, const int, const unsigned);
Expand Down Expand Up @@ -123,6 +128,31 @@ static inline T getBin(const float dir, const unsigned n) noexcept {
}
}

template<typename T>
static void copyData_C(const T * srcp, float * VS_RESTRICT blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept {
if (std::is_integral<T>::value) {
for (unsigned y = 0; y < height; y++) {
for (unsigned x = 0; x < width; x++)
blur[x] = srcp[x];

srcp += stride;
blur += blurStride;
}
} else {
if (offset) {
for (unsigned y = 0; y < height; y++) {
for (unsigned x = 0; x < width; x++)
blur[x] = srcp[x] + offset;

srcp += stride;
blur += blurStride;
}
} else {
vs_bitblt(blur, blurStride * sizeof(float), srcp, stride * sizeof(float), width * sizeof(float), height);
}
}
}

static void gaussianBlurHorizontal_C(float * VS_RESTRICT buffer, float * VS_RESTRICT blur, const float * weights, const int width, const int radius) noexcept {
for (int i = 1; i <= radius; i++) {
buffer[-i] = buffer[i - 1];
Expand Down Expand Up @@ -382,7 +412,10 @@ static void process(const VSFrameRef * src, VSFrameRef * dst, const TCannyData *
float * direction = d->direction.at(threadId);
uint8_t * label = d->label.at(threadId);

gaussianBlurVertical<T>(srcp, buffer, blur, d->weightsHorizontal[plane], d->weightsVertical[plane], width, height, stride, blurStride, d->radiusHorizontal[plane], d->radiusVertical[plane], d->offset[plane]);
if (d->radiusHorizontal[plane])
gaussianBlurVertical<T>(srcp, buffer, blur, d->weightsHorizontal[plane], d->weightsVertical[plane], width, height, stride, blurStride, d->radiusHorizontal[plane], d->radiusVertical[plane], d->offset[plane]);
else
copyData<T>(srcp, blur, width, height, stride, blurStride, d->offset[plane]);

if (d->mode != -1)
detectEdge(blur, gradient, direction, width, height, stride, blurStride, d->mode, d->op);
Expand All @@ -407,6 +440,10 @@ static void process(const VSFrameRef * src, VSFrameRef * dst, const TCannyData *
}

static void selectFunctions(const unsigned opt) noexcept {
copyData<uint8_t> = copyData_C;
copyData<uint16_t> = copyData_C;
copyData<float> = copyData_C;

gaussianBlurHorizontal = gaussianBlurHorizontal_C;

gaussianBlurVertical<uint8_t> = gaussianBlurVertical_C;
Expand All @@ -432,6 +469,10 @@ static void selectFunctions(const unsigned opt) noexcept {
#ifdef VS_TARGET_CPU_X86
const int iset = instrset_detect();
if (opt == 4 || (opt == 0 && iset >= 8)) {
copyData<uint8_t> = copyData_AVX2;
copyData<uint16_t> = copyData_AVX2;
copyData<float> = copyData_AVX2;

gaussianBlurHorizontal = gaussianBlurHorizontal_AVX2;

gaussianBlurVertical<uint8_t> = gaussianBlurVertical_AVX2;
Expand All @@ -454,6 +495,10 @@ static void selectFunctions(const unsigned opt) noexcept {
discretizeGM<uint16_t> = discretizeGM_AVX2;
discretizeGM<float> = discretizeGM_AVX2;
} else if (opt == 3 || (opt == 0 && iset == 7)) {
copyData<uint8_t> = copyData_AVX;
copyData<uint16_t> = copyData_AVX;
copyData<float> = copyData_AVX;

gaussianBlurHorizontal = gaussianBlurHorizontal_AVX;

gaussianBlurVertical<uint8_t> = gaussianBlurVertical_AVX;
Expand All @@ -476,6 +521,10 @@ static void selectFunctions(const unsigned opt) noexcept {
discretizeGM<uint16_t> = discretizeGM_AVX;
discretizeGM<float> = discretizeGM_AVX;
} else if (opt == 2 || (opt == 0 && iset >= 2)) {
copyData<uint8_t> = copyData_SSE2;
copyData<uint16_t> = copyData_SSE2;
copyData<float> = copyData_SSE2;

gaussianBlurHorizontal = gaussianBlurHorizontal_SSE2;

gaussianBlurVertical<uint8_t> = gaussianBlurVertical_SSE2;
Expand Down Expand Up @@ -675,8 +724,8 @@ static void VS_CC tcannyCreate(const VSMap *in, VSMap *out, void *userData, VSCo
const int opt = int64ToIntS(vsapi->propGetInt(in, "opt", 0, &err));

for (int i = 0; i < 3; i++) {
if (sigmaHorizontal[i] <= 0.f)
throw std::string { "sigma must be greater than 0.0" };
if (sigmaHorizontal[i] < 0.f)
throw std::string { "sigma must be greater than or equal to 0.0" };
}

if (d->t_l >= d->t_h)
Expand Down Expand Up @@ -736,7 +785,7 @@ static void VS_CC tcannyCreate(const VSMap *in, VSMap *out, void *userData, VSCo
}

for (int plane = 0; plane < d->vi->format->numPlanes; plane++) {
if (d->process[plane]) {
if (d->process[plane] && sigmaHorizontal[plane]) {
d->weightsHorizontal[plane] = gaussianWeights(sigmaHorizontal[plane], &d->radiusHorizontal[plane]);
d->weightsVertical[plane] = gaussianWeights(sigmaVertical[plane], &d->radiusVertical[plane]);
if (!d->weightsHorizontal[plane] || !d->weightsVertical[plane])
Expand Down
35 changes: 35 additions & 0 deletions TCanny/TCanny_AVX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,41 @@
static constexpr float M_PIF = 3.14159265358979323846f;
static constexpr float M_1_PIF = 0.318309886183790671538f;

template<typename T> void copyData_AVX(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float) noexcept;

template<>
void copyData_AVX(const uint8_t * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept {
for (unsigned y = 0; y < height; y++) {
for (unsigned x = 0; x < width; x += 8)
to_float(Vec8i().load_8uc(srcp + x)).stream(blur + x);

srcp += stride;
blur += blurStride;
}
}

template<>
void copyData_AVX(const uint16_t * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept {
for (unsigned y = 0; y < height; y++) {
for (unsigned x = 0; x < width; x += 8)
to_float(Vec8i().load_8us(srcp + x)).stream(blur + x);

srcp += stride;
blur += blurStride;
}
}

template<>
void copyData_AVX(const float * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept {
for (unsigned y = 0; y < height; y++) {
for (unsigned x = 0; x < width; x += 8)
(Vec8f().load_a(srcp + x) + offset).stream(blur + x);

srcp += stride;
blur += blurStride;
}
}

void gaussianBlurHorizontal_AVX(float * buffer, float * blur, const float * weights, const int width, const int radius) noexcept {
for (int i = 1; i <= radius; i++) {
buffer[-i] = buffer[i - 1];
Expand Down
35 changes: 35 additions & 0 deletions TCanny/TCanny_AVX2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,41 @@
static constexpr float M_PIF = 3.14159265358979323846f;
static constexpr float M_1_PIF = 0.318309886183790671538f;

template<typename T> void copyData_AVX2(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float) noexcept;

template<>
void copyData_AVX2(const uint8_t * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept {
for (unsigned y = 0; y < height; y++) {
for (unsigned x = 0; x < width; x += 8)
to_float(Vec8i().load_8uc(srcp + x)).stream(blur + x);

srcp += stride;
blur += blurStride;
}
}

template<>
void copyData_AVX2(const uint16_t * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept {
for (unsigned y = 0; y < height; y++) {
for (unsigned x = 0; x < width; x += 8)
to_float(Vec8i().load_8us(srcp + x)).stream(blur + x);

srcp += stride;
blur += blurStride;
}
}

template<>
void copyData_AVX2(const float * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept {
for (unsigned y = 0; y < height; y++) {
for (unsigned x = 0; x < width; x += 8)
(Vec8f().load_a(srcp + x) + offset).stream(blur + x);

srcp += stride;
blur += blurStride;
}
}

void gaussianBlurHorizontal_AVX2(float * buffer, float * blur, const float * weights, const int width, const int radius) noexcept {
for (int i = 1; i <= radius; i++) {
buffer[-i] = buffer[i - 1];
Expand Down
35 changes: 35 additions & 0 deletions TCanny/TCanny_SSE2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,41 @@
static constexpr float M_PIF = 3.14159265358979323846f;
static constexpr float M_1_PIF = 0.318309886183790671538f;

template<typename T> void copyData_SSE2(const T *, float *, const unsigned, const unsigned, const unsigned, const unsigned, const float) noexcept;

template<>
void copyData_SSE2(const uint8_t * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept {
for (unsigned y = 0; y < height; y++) {
for (unsigned x = 0; x < width; x += 4)
to_float(Vec4i().load_4uc(srcp + x)).stream(blur + x);

srcp += stride;
blur += blurStride;
}
}

template<>
void copyData_SSE2(const uint16_t * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept {
for (unsigned y = 0; y < height; y++) {
for (unsigned x = 0; x < width; x += 4)
to_float(Vec4i().load_4us(srcp + x)).stream(blur + x);

srcp += stride;
blur += blurStride;
}
}

template<>
void copyData_SSE2(const float * srcp, float * blur, const unsigned width, const unsigned height, const unsigned stride, const unsigned blurStride, const float offset) noexcept {
for (unsigned y = 0; y < height; y++) {
for (unsigned x = 0; x < width; x += 4)
(Vec4f().load_a(srcp + x) + offset).stream(blur + x);

srcp += stride;
blur += blurStride;
}
}

void gaussianBlurHorizontal_SSE2(float * buffer, float * blur, const float * weights, const int width, const int radius) noexcept {
for (int i = 1; i <= radius; i++) {
buffer[-i] = buffer[i - 1];
Expand Down

0 comments on commit cee6674

Please sign in to comment.