From f621810c5af5d06256070f0c8c399ce2ac092450 Mon Sep 17 00:00:00 2001 From: HolyWu Date: Wed, 22 Sep 2021 17:47:39 +0800 Subject: [PATCH] Overhaul and migrate to VS API4 --- AddGrain/AddGrain.cpp | 241 +++++++++++++++++------------------ AddGrain/AddGrain.h | 12 +- AddGrain/AddGrain_AVX2.cpp | 54 ++++---- AddGrain/AddGrain_AVX512.cpp | 54 ++++---- AddGrain/AddGrain_SSE2.cpp | 54 ++++---- README.md | 37 +++--- 6 files changed, 221 insertions(+), 231 deletions(-) diff --git a/AddGrain/AddGrain.cpp b/AddGrain/AddGrain.cpp index 025b075..e14ce2d 100644 --- a/AddGrain/AddGrain.cpp +++ b/AddGrain/AddGrain.cpp @@ -31,27 +31,29 @@ #include "AddGrain.h" +using namespace std::literals; + #ifdef ADDGRAIN_X86 -template extern void updateFrame_sse2(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; -template extern void updateFrame_avx2(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; -template extern void updateFrame_avx512(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; +template extern void updateFrame_sse2(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; +template extern void updateFrame_avx2(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; +template extern void updateFrame_avx512(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; #endif template -static T getArg(const VSAPI * vsapi, const VSMap * map, const char * key, const T defaultValue) noexcept { +static T getArg(const VSAPI* vsapi, const VSMap* map, const char* key, const T defaultValue) noexcept { T arg{}; - int err{}; + auto err{ 0 }; if constexpr (std::is_same_v) - arg = !!vsapi->propGetInt(map, key, 0, &err); - else if constexpr (std::is_same_v) - arg = int64ToIntS(vsapi->propGetInt(map, key, 0, &err)); + arg = !!vsapi->mapGetInt(map, key, 0, &err); + else if constexpr (std::is_same_v || std::is_same_v) + arg = vsapi->mapGetIntSaturated(map, key, 0, &err); else if constexpr (std::is_same_v) - arg = vsapi->propGetInt(map, key, 0, &err); + arg = vsapi->mapGetInt(map, key, 0, &err); else if constexpr (std::is_same_v) - arg = static_cast(vsapi->propGetFloat(map, key, 0, &err)); + arg = vsapi->mapGetFloatSaturated(map, key, 0, &err); else if constexpr (std::is_same_v) - arg = vsapi->propGetFloat(map, key, 0, &err); + arg = vsapi->mapGetFloat(map, key, 0, &err); if (err) arg = defaultValue; @@ -59,19 +61,19 @@ static T getArg(const VSAPI * vsapi, const VSMap * map, const char * key, const return arg; } -static inline long fastUniformRandL(long & idum) noexcept { +static inline long fastUniformRandL(long& idum) noexcept { return idum = 1664525L * idum + 1013904223L; } // very fast & reasonably random -static inline float fastUniformRandF(long & idum) noexcept { +static inline float fastUniformRandF(long& idum) noexcept { // work with 32-bit IEEE floating point only! fastUniformRandL(idum); - const unsigned long itemp = 0x3f800000 | (0x007fffff & idum); - return *reinterpret_cast(&itemp) - 1.0f; + unsigned long itemp = 0x3f800000 | (0x007fffff & idum); + return *reinterpret_cast(&itemp) - 1.0f; } -static inline float gaussianRand(bool & iset, float & gset, long & idum) noexcept { +static inline float gaussianRand(bool& iset, float& gset, long& idum) noexcept { float fac, rsq, v1, v2; // return saved second @@ -95,25 +97,25 @@ static inline float gaussianRand(bool & iset, float & gset, long & idum) noexcep return v2 * fac; } -static inline float gaussianRand(const float mean, const float variance, bool & iset, float & gset, long & idum) noexcept { +static inline float gaussianRand(const float mean, const float variance, bool& iset, float& gset, long& idum) noexcept { return (variance == 0.0f) ? mean : gaussianRand(iset, gset, idum) * std::sqrt(variance) + mean; } template -static void generateNoise(const int planesNoise, const float scale, AddGrainData * const VS_RESTRICT d) noexcept { +static void generateNoise(const int planesNoise, const float scale, AddGrainData* const VS_RESTRICT d) noexcept { float nRep[MAXP]; - for (int i = 0; i < MAXP; i++) + for (auto i{ 0 }; i < MAXP; i++) nRep[i] = d->constant ? 1.0f : 2.0f; std::vector lastLine(d->nStride[0]); // assume plane 0 is the widest one - constexpr float mean = 0.0f; - const float pvar[] = { d->var, d->uvar }; - bool iset = false; - float gset; - auto pns = d->pNoiseSeeds.begin(); - - for (int plane = 0; plane < planesNoise; plane++) { - int h = static_cast(std::ceil(d->nHeight[plane] * nRep[plane])); + constexpr auto mean{ 0.0f }; + const float pvar[]{ d->var, d->uvar }; + auto iset{ false }; + auto gset{ 0.0f }; + auto pns{ d->pNoiseSeeds.begin() }; + + for (auto plane{ 0 }; plane < planesNoise; plane++) { + auto h{ static_cast(std::ceil(d->nHeight[plane] * nRep[plane])) }; if (planesNoise == 2 && plane == 1) { // fake plane needs at least one more row, and more if the rows are too small. round to the upper number h += (OFFSET_FAKEPLANE + d->nStride[plane] - 1) / d->nStride[plane]; @@ -123,15 +125,15 @@ static void generateNoise(const int planesNoise, const float scale, AddGrainData // allocate space for noise d->pN[plane] = malloc(d->nSize[plane] * sizeof(noise_t)); - for (int x = 0; x < d->nStride[plane]; x++) + for (auto x{ 0 }; x < d->nStride[plane]; x++) lastLine[x] = gaussianRand(mean, pvar[plane], iset, gset, d->idum); // things to vertically smooth against - for (int y = 0; y < h; y++) { - noise_t * pNW = reinterpret_cast(d->pN[plane]) + d->nStride[plane] * y; - float lastr = gaussianRand(mean, pvar[plane], iset, gset, d->idum); // something to horiz smooth against + for (auto y{ 0 }; y < h; y++) { + auto pNW{ reinterpret_cast(d->pN[plane]) + d->nStride[plane] * y }; + auto lastr{ gaussianRand(mean, pvar[plane], iset, gset, d->idum) }; // something to horiz smooth against - for (int x = 0; x < d->nStride[plane]; x++) { - float r = gaussianRand(mean, pvar[plane], iset, gset, d->idum); + for (auto x{ 0 }; x < d->nStride[plane]; x++) { + auto r{ gaussianRand(mean, pvar[plane], iset, gset, d->idum) }; r = lastr * d->hcorr + r * (1.0f - d->hcorr); // horizontal correlation lastr = r; @@ -147,13 +149,13 @@ static void generateNoise(const int planesNoise, const float scale, AddGrainData } } - for (int x = d->storedFrames; x > 0; x--) + for (auto x{ d->storedFrames }; x > 0; x--) *pns++ = fastUniformRandL(d->idum) & 0xff; // insert seed, to keep cache happy } } // on input, plane is the frame plane index (if applicable, 0 otherwise), and on output, it contains the selected noise plane -static void setRand(int & plane, int & noiseOffs, const int frameNumber, AddGrainData * const VS_RESTRICT d) noexcept { +static void setRand(int& plane, int& noiseOffs, const int frameNumber, AddGrainData* const VS_RESTRICT d) noexcept { if (d->constant) { // force noise to be identical every frame if (plane >= MAXP) { @@ -162,8 +164,8 @@ static void setRand(int & plane, int & noiseOffs, const int frameNumber, AddGrai } } else { // pull seed back out, to keep cache happy - const int seedIndex = frameNumber % d->storedFrames; - const int p0 = d->pNoiseSeeds[seedIndex]; + auto seedIndex{ frameNumber % d->storedFrames }; + auto p0{ d->pNoiseSeeds[seedIndex] }; if (plane == 0) { d->idum = p0; @@ -182,16 +184,16 @@ static void setRand(int & plane, int & noiseOffs, const int frameNumber, AddGrai } template -static void updateFrame_c(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, - const AddGrainData * const VS_RESTRICT d) noexcept { - const pixel_t * srcp = reinterpret_cast(_srcp); - pixel_t * VS_RESTRICT dstp = reinterpret_cast(_dstp); - const noise_t * pNW = reinterpret_cast(d->pN[noisePlane]) + noiseOffs; - - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { +static void updateFrame_c(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, + const AddGrainData* const VS_RESTRICT d) noexcept { + auto srcp{ reinterpret_cast(_srcp) }; + auto dstp{ reinterpret_cast(_dstp) }; + auto pNW{ reinterpret_cast(d->pN[noisePlane]) + noiseOffs }; + + for (auto y{ 0 }; y < height; y++) { + for (auto x{ 0 }; x < width; x++) { if constexpr (std::is_integral_v) - dstp[x] = std::clamp(srcp[x] + pNW[x], 0, d->peak); + dstp[x] = static_cast(std::clamp(srcp[x] + pNW[x], 0, d->peak)); else dstp[x] = srcp[x] + pNW[x]; } @@ -202,32 +204,27 @@ static void updateFrame_c(const void * _srcp, void * _dstp, const int width, con } } -static void VS_CC addgrainInit(VSMap * in, VSMap * out, void ** instanceData, VSNode * node, VSCore * core, const VSAPI * vsapi) { - AddGrainData * d = static_cast(*instanceData); - vsapi->setVideoInfo(d->vi, 1, node); -} - -static const VSFrameRef * VS_CC addgrainGetFrame(int n, int activationReason, void ** instanceData, void ** frameData, VSFrameContext * frameCtx, VSCore * core, const VSAPI * vsapi) { - AddGrainData * d = static_cast(*instanceData); +static const VSFrame* VS_CC addgrainGetFrame(int n, int activationReason, void* instanceData, [[maybe_unused]] void** frameData, VSFrameContext* frameCtx, VSCore* core, const VSAPI* vsapi) { + auto d{ static_cast(instanceData) }; if (activationReason == arInitial) { vsapi->requestFrameFilter(n, d->node, frameCtx); } else if (activationReason == arAllFramesReady) { - const VSFrameRef * src = vsapi->getFrameFilter(n, d->node, frameCtx); - const VSFrameRef * fr[] = { d->process[0] ? nullptr : src, d->process[1] ? nullptr : src, d->process[2] ? nullptr : src }; - const int pl[] = { 0, 1, 2 }; - VSFrameRef * dst = vsapi->newVideoFrame2(d->vi->format, d->vi->width, d->vi->height, fr, pl, src, core); + auto src{ vsapi->getFrameFilter(n, d->node, frameCtx) }; + decltype(src) fr[]{ d->process[0] ? nullptr : src, d->process[1] ? nullptr : src, d->process[2] ? nullptr : src }; + int pl[]{ 0, 1, 2 }; + auto dst{ vsapi->newVideoFrame2(&d->vi->format, d->vi->width, d->vi->height, fr, pl, src, core) }; - for (int plane = 0; plane < d->vi->format->numPlanes; plane++) { + for (auto plane{ 0 }; plane < d->vi->format.numPlanes; plane++) { if (d->process[plane]) { - const int width = vsapi->getFrameWidth(src, plane); - const int height = vsapi->getFrameHeight(src, plane); - const int stride = vsapi->getStride(src, plane) / d->vi->format->bytesPerSample; - const uint8_t * srcp = vsapi->getReadPtr(src, plane); - uint8_t * dstp = vsapi->getWritePtr(dst, plane); - - int noisePlane = (d->vi->format->colorFamily == cmRGB) ? 0 : plane; - int noiseOffs = 0; + const auto width{ vsapi->getFrameWidth(src, plane) }; + const auto height{ vsapi->getFrameHeight(src, plane) }; + const auto stride{ vsapi->getStride(src, plane) / d->vi->format.bytesPerSample }; + auto srcp{ vsapi->getReadPtr(src, plane) }; + auto dstp{ vsapi->getWritePtr(dst, plane) }; + + auto noisePlane{ d->vi->format.colorFamily == cfRGB ? 0 : plane }; + auto noiseOffs{ 0 }; setRand(noisePlane, noiseOffs, n, d); // seed randomness w/ plane & frame d->updateFrame(srcp, dstp, width, height, stride, noisePlane, noiseOffs, d); } @@ -240,60 +237,58 @@ static const VSFrameRef * VS_CC addgrainGetFrame(int n, int activationReason, vo return nullptr; } -static void VS_CC addgrainFree(void * instanceData, VSCore * core, const VSAPI * vsapi) { - AddGrainData * d = static_cast(instanceData); +static void VS_CC addgrainFree(void* instanceData, [[maybe_unused]] VSCore* core, const VSAPI* vsapi) { + auto d{ static_cast(instanceData) }; vsapi->freeNode(d->node); - for (int i = 0; i < MAXP; i++) + for (auto i{ 0 }; i < MAXP; i++) free(d->pN[i]); delete d; } -static void VS_CC addgrainCreate(const VSMap * in, VSMap * out, void * userData, VSCore * core, const VSAPI * vsapi) { - using namespace std::literals; - - std::unique_ptr d = std::make_unique(); +static void VS_CC addgrainCreate(const VSMap* in, VSMap* out, [[maybe_unused]] void* userData, VSCore* core, const VSAPI* vsapi) { + auto d{ std::make_unique() }; try { - d->node = vsapi->propGetNode(in, "clip", 0, nullptr); + d->node = vsapi->mapGetNode(in, "clip", 0, nullptr); d->vi = vsapi->getVideoInfo(d->node); - if (!isConstantFormat(d->vi) || - (d->vi->format->sampleType == stInteger && d->vi->format->bitsPerSample > 16) || - (d->vi->format->sampleType == stFloat && d->vi->format->bitsPerSample != 32)) - throw "only constant format 8-16 bit integer and 32 bit float input supported"sv; + if (!vsh::isConstantVideoFormat(d->vi) || + (d->vi->format.sampleType == stInteger && d->vi->format.bitsPerSample > 16) || + (d->vi->format.sampleType == stFloat && d->vi->format.bitsPerSample != 32)) + throw "only constant format 8-16 bit integer and 32 bit float input supported"; d->var = getArg(vsapi, in, "var", 1.0f); d->uvar = getArg(vsapi, in, "uvar", 0.0f); d->hcorr = getArg(vsapi, in, "hcorr", 0.0f); d->vcorr = getArg(vsapi, in, "vcorr", 0.0f); - long seed = getArg(vsapi, in, "seed", -1); + auto seed = getArg(vsapi, in, "seed", -1L); d->constant = getArg(vsapi, in, "constant", false); - const int opt = getArg(vsapi, in, "opt", 0); + auto opt = getArg(vsapi, in, "opt", 0); if (d->hcorr < 0.0f || d->hcorr > 1.0f || d->vcorr < 0.0f || d->vcorr > 1.0f) - throw "hcorr and vcorr must be between 0.0 and 1.0 (inclusive)"sv; + throw "hcorr and vcorr must be between 0.0 and 1.0 (inclusive)"; if (opt < 0 || opt > 4) - throw "opt must be 0, 1, 2, 3, or 4"sv; + throw "opt must be 0, 1, 2, 3, or 4"; { - if (d->vi->format->bytesPerSample == 1) + if (d->vi->format.bytesPerSample == 1) d->updateFrame = updateFrame_c; - else if (d->vi->format->bytesPerSample == 2) + else if (d->vi->format.bytesPerSample == 2) d->updateFrame = updateFrame_c; else d->updateFrame = updateFrame_c; #ifdef ADDGRAIN_X86 - const int iset = instrset_detect(); + auto iset{ instrset_detect() }; if ((opt == 0 && iset >= 10) || opt == 4) { - if (d->vi->format->bytesPerSample == 1) { + if (d->vi->format.bytesPerSample == 1) { d->updateFrame = updateFrame_avx512; d->step = 64; - } else if (d->vi->format->bytesPerSample == 2) { + } else if (d->vi->format.bytesPerSample == 2) { d->updateFrame = updateFrame_avx512; d->step = 32; } else { @@ -301,10 +296,10 @@ static void VS_CC addgrainCreate(const VSMap * in, VSMap * out, void * userData, d->step = 16; } } else if ((opt == 0 && iset >= 8) || opt == 3) { - if (d->vi->format->bytesPerSample == 1) { + if (d->vi->format.bytesPerSample == 1) { d->updateFrame = updateFrame_avx2; d->step = 32; - } else if (d->vi->format->bytesPerSample == 2) { + } else if (d->vi->format.bytesPerSample == 2) { d->updateFrame = updateFrame_avx2; d->step = 16; } else { @@ -312,10 +307,10 @@ static void VS_CC addgrainCreate(const VSMap * in, VSMap * out, void * userData, d->step = 8; } } else if ((opt == 0 && iset >= 2) || opt == 2) { - if (d->vi->format->bytesPerSample == 1) { + if (d->vi->format.bytesPerSample == 1) { d->updateFrame = updateFrame_sse2; d->step = 16; - } else if (d->vi->format->bytesPerSample == 2) { + } else if (d->vi->format.bytesPerSample == 2) { d->updateFrame = updateFrame_sse2; d->step = 8; } else { @@ -326,71 +321,73 @@ static void VS_CC addgrainCreate(const VSMap * in, VSMap * out, void * userData, #endif } - float scale; - if (d->vi->format->sampleType == stInteger) { - d->peak = (1 << d->vi->format->bitsPerSample) - 1; + auto scale{ 0.0f }; + if (d->vi->format.sampleType == stInteger) { + d->peak = (1 << d->vi->format.bitsPerSample) - 1; scale = d->peak / 255.0f; } else { scale = 1.0f / 255.0f; } - int planesNoise = 1; + if (seed < 0) + seed = static_cast(std::time(nullptr)); // init random + d->idum = seed; + + auto planesNoise{ 1 }; d->nStride[0] = (d->vi->width + 63) & ~63; // first plane d->nHeight[0] = d->vi->height; - if (d->vi->format->colorFamily == cmGray) { + if (d->vi->format.colorFamily == cfGray) { d->uvar = 0.0f; - } else if (d->vi->format->colorFamily == cmRGB) { + } else if (d->vi->format.colorFamily == cfRGB) { d->uvar = d->var; } else { planesNoise = 2; - d->nStride[1] = ((d->vi->width >> d->vi->format->subSamplingW) + 63) & ~63; // second and third plane - d->nHeight[1] = d->vi->height >> d->vi->format->subSamplingH; + d->nStride[1] = ((d->vi->width >> d->vi->format.subSamplingW) + 63) & ~63; // second and third plane + d->nHeight[1] = d->vi->height >> d->vi->format.subSamplingH; } if (d->var <= 0.0f && d->uvar <= 0.0f) { - vsapi->propSetNode(out, "clip", d->node, paReplace); - vsapi->freeNode(d->node); + vsapi->mapConsumeNode(out, "clip", d->node, maReplace); return; } d->process[0] = d->var > 0.0f; d->process[1] = d->process[2] = d->uvar > 0.0f; - if (seed < 0) - seed = static_cast(std::time(nullptr)); // init random - d->idum = seed; - d->storedFrames = std::min(d->vi->numFrames, 256); d->pNoiseSeeds.resize(d->storedFrames * planesNoise); - if (d->vi->format->bytesPerSample == 1) + if (d->vi->format.bytesPerSample == 1) generateNoise(planesNoise, scale, d.get()); - else if (d->vi->format->bytesPerSample == 2) + else if (d->vi->format.bytesPerSample == 2) generateNoise(planesNoise, scale, d.get()); else generateNoise(planesNoise, scale, d.get()); - } catch (const std::string_view & error) { - vsapi->setError(out, ("AddGrain: "s + error.data()).c_str()); + } catch (const char* error) { + vsapi->mapSetError(out, ("AddGrain: "s + error).c_str()); vsapi->freeNode(d->node); return; } - vsapi->createFilter(in, out, "AddGrain", addgrainInit, addgrainGetFrame, addgrainFree, fmParallel, 0, d.release(), core); + VSFilterDependency deps[]{ {d->node, rpStrictSpatial} }; + vsapi->createVideoFilter(out, "AddGrain", d->vi, addgrainGetFrame, addgrainFree, fmParallel, deps, 1, d.get(), core); + d.release(); } ////////////////////////////////////////// // Init -VS_EXTERNAL_API(void) VapourSynthPluginInit(VSConfigPlugin configFunc, VSRegisterFunction registerFunc, VSPlugin * plugin) { - configFunc("com.holywu.addgrain", "grain", "Add some correlated color gaussian noise", VAPOURSYNTH_API_VERSION, 1, plugin); - registerFunc("Add", - "clip:clip;" - "var:float:opt;" - "uvar:float:opt;" - "hcorr:float:opt;" - "vcorr:float:opt;" - "seed:int:opt;" - "constant:int:opt;" - "opt:int:opt;", - addgrainCreate, nullptr, plugin); +VS_EXTERNAL_API(void) VapourSynthPluginInit2(VSPlugin* plugin, const VSPLUGINAPI* vspapi) { + vspapi->configPlugin("com.holywu.addgrain", "grain", "Random noise film grain generator", VS_MAKE_VERSION(8, 0), VAPOURSYNTH_API_VERSION, 0, plugin); + vspapi->registerFunction("Add", + "clip:vnode;" + "var:float:opt;" + "uvar:float:opt;" + "hcorr:float:opt;" + "vcorr:float:opt;" + "seed:int:opt;" + "constant:int:opt;" + "opt:int:opt;", + "clip:vnode;", + addgrainCreate, nullptr, plugin); } diff --git a/AddGrain/AddGrain.h b/AddGrain/AddGrain.h index 6793bc5..fc4a712 100644 --- a/AddGrain/AddGrain.h +++ b/AddGrain/AddGrain.h @@ -3,8 +3,8 @@ #include #include -#include -#include +#include +#include #ifdef ADDGRAIN_X86 #include "VCL2/vectorclass.h" @@ -17,8 +17,8 @@ static constexpr int MAXP = 2; static constexpr int OFFSET_FAKEPLANE = 32; struct AddGrainData final { - VSNodeRef * node; - const VSVideoInfo * vi; + VSNode* node; + const VSVideoInfo* vi; float var, uvar, hcorr, vcorr; bool constant; bool process[3]; @@ -26,6 +26,6 @@ struct AddGrainData final { std::vector pNoiseSeeds; long idum; int nStride[MAXP], nHeight[MAXP], nSize[MAXP]; - void * pN[MAXP]; - void (*updateFrame)(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; + void* pN[MAXP]; + void (*updateFrame)(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; }; diff --git a/AddGrain/AddGrain_AVX2.cpp b/AddGrain/AddGrain_AVX2.cpp index 8765b51..036ca05 100644 --- a/AddGrain/AddGrain_AVX2.cpp +++ b/AddGrain/AddGrain_AVX2.cpp @@ -2,34 +2,34 @@ #include "AddGrain.h" template -void updateFrame_avx2(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, - const AddGrainData * const VS_RESTRICT d) noexcept { - const pixel_t * srcp = reinterpret_cast(_srcp); - pixel_t * dstp = reinterpret_cast(_dstp); - const noise_t * pNW = reinterpret_cast(d->pN[noisePlane]) + noiseOffs; +void updateFrame_avx2(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, + const AddGrainData* const VS_RESTRICT d) noexcept { + auto srcp{ reinterpret_cast(_srcp) }; + auto dstp{ reinterpret_cast(_dstp) }; + auto pNW{ reinterpret_cast(d->pN[noisePlane]) + noiseOffs }; - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x += d->step) { + for (auto y{ 0 }; y < height; y++) { + for (auto x{ 0 }; x < width; x += d->step) { if constexpr (std::is_same_v) { - const Vec32c sign = 0x80; - Vec32c src = Vec32c().load_a(srcp + x); - const Vec32c noise = Vec32c().load(pNW + x); - src ^= sign; - src = add_saturated(src, noise); - src ^= sign; - src.store_nt(dstp + x); + Vec32c sign{ -0x80 }; + auto val{ Vec32c().load_a(srcp + x) }; + auto nz{ Vec32c().load(pNW + x) }; + val ^= sign; + val = add_saturated(val, nz); + val ^= sign; + val.store_nt(dstp + x); } else if constexpr (std::is_same_v) { - const Vec16s sign = 0x8000; - Vec16s src = Vec16s().load_a(srcp + x); - const Vec16s noise = Vec16s().load(pNW + x); - src ^= sign; - src = add_saturated(src, noise); - src ^= sign; - min(Vec16us(src), d->peak).store_nt(dstp + x); + Vec16s sign{ -0x8000 }; + auto val{ Vec16s().load_a(srcp + x) }; + auto nz{ Vec16s().load(pNW + x) }; + val ^= sign; + val = add_saturated(val, nz); + val ^= sign; + min(Vec16us(val), d->peak).store_nt(dstp + x); } else { - Vec8f src = Vec8f().load_a(srcp + x); - const Vec8f noise = Vec8f().load(pNW + x); - (src + noise).store_nt(dstp + x); + auto val{ Vec8f().load_a(srcp + x) }; + auto nz{ Vec8f().load(pNW + x) }; + (val + nz).store_nt(dstp + x); } } @@ -39,7 +39,7 @@ void updateFrame_avx2(const void * _srcp, void * _dstp, const int width, const i } } -template void updateFrame_avx2(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; -template void updateFrame_avx2(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; -template void updateFrame_avx2(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; +template void updateFrame_avx2(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; +template void updateFrame_avx2(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; +template void updateFrame_avx2(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; #endif diff --git a/AddGrain/AddGrain_AVX512.cpp b/AddGrain/AddGrain_AVX512.cpp index e4186b1..5d54c1a 100644 --- a/AddGrain/AddGrain_AVX512.cpp +++ b/AddGrain/AddGrain_AVX512.cpp @@ -2,34 +2,34 @@ #include "AddGrain.h" template -void updateFrame_avx512(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, - const AddGrainData * const VS_RESTRICT d) noexcept { - const pixel_t * srcp = reinterpret_cast(_srcp); - pixel_t * dstp = reinterpret_cast(_dstp); - const noise_t * pNW = reinterpret_cast(d->pN[noisePlane]) + noiseOffs; +void updateFrame_avx512(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, + const AddGrainData* const VS_RESTRICT d) noexcept { + auto srcp{ reinterpret_cast(_srcp) }; + auto dstp{ reinterpret_cast(_dstp) }; + auto pNW{ reinterpret_cast(d->pN[noisePlane]) + noiseOffs }; - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x += d->step) { + for (auto y{ 0 }; y < height; y++) { + for (auto x{ 0 }; x < width; x += d->step) { if constexpr (std::is_same_v) { - const Vec64c sign = 0x80; - Vec64c src = Vec64c().load_a(srcp + x); - const Vec64c noise = Vec64c().load(pNW + x); - src ^= sign; - src = add_saturated(src, noise); - src ^= sign; - src.store_nt(dstp + x); + Vec64c sign{ -0x80 }; + auto val{ Vec64c().load_a(srcp + x) }; + auto nz{ Vec64c().load(pNW + x) }; + val ^= sign; + val = add_saturated(val, nz); + val ^= sign; + val.store_nt(dstp + x); } else if constexpr (std::is_same_v) { - const Vec32s sign = 0x8000; - Vec32s src = Vec32s().load_a(srcp + x); - const Vec32s noise = Vec32s().load(pNW + x); - src ^= sign; - src = add_saturated(src, noise); - src ^= sign; - min(Vec32us(src), d->peak).store_nt(dstp + x); + Vec32s sign{ -0x8000 }; + auto val{ Vec32s().load_a(srcp + x) }; + auto nz{ Vec32s().load(pNW + x) }; + val ^= sign; + val = add_saturated(val, nz); + val ^= sign; + min(Vec32us(val), d->peak).store_nt(dstp + x); } else { - Vec16f src = Vec16f().load_a(srcp + x); - const Vec16f noise = Vec16f().load(pNW + x); - (src + noise).store_nt(dstp + x); + auto val{ Vec16f().load_a(srcp + x) }; + auto nz{ Vec16f().load(pNW + x) }; + (val + nz).store_nt(dstp + x); } } @@ -39,7 +39,7 @@ void updateFrame_avx512(const void * _srcp, void * _dstp, const int width, const } } -template void updateFrame_avx512(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; -template void updateFrame_avx512(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; -template void updateFrame_avx512(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; +template void updateFrame_avx512(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; +template void updateFrame_avx512(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; +template void updateFrame_avx512(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; #endif diff --git a/AddGrain/AddGrain_SSE2.cpp b/AddGrain/AddGrain_SSE2.cpp index c6f5878..588bf4d 100644 --- a/AddGrain/AddGrain_SSE2.cpp +++ b/AddGrain/AddGrain_SSE2.cpp @@ -2,34 +2,34 @@ #include "AddGrain.h" template -void updateFrame_sse2(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, - const AddGrainData * const VS_RESTRICT d) noexcept { - const pixel_t * srcp = reinterpret_cast(_srcp); - pixel_t * dstp = reinterpret_cast(_dstp); - const noise_t * pNW = reinterpret_cast(d->pN[noisePlane]) + noiseOffs; +void updateFrame_sse2(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, + const AddGrainData* const VS_RESTRICT d) noexcept { + auto srcp{ reinterpret_cast(_srcp) }; + auto dstp{ reinterpret_cast(_dstp) }; + auto pNW{ reinterpret_cast(d->pN[noisePlane]) + noiseOffs }; - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x += d->step) { + for (auto y{ 0 }; y < height; y++) { + for (auto x{ 0 }; x < width; x += d->step) { if constexpr (std::is_same_v) { - const Vec16c sign = 0x80; - Vec16c src = Vec16c().load_a(srcp + x); - const Vec16c noise = Vec16c().load(pNW + x); - src ^= sign; - src = add_saturated(src, noise); - src ^= sign; - src.store_nt(dstp + x); + Vec16c sign{ -0x80 }; + auto val{ Vec16c().load_a(srcp + x) }; + auto nz{ Vec16c().load(pNW + x) }; + val ^= sign; + val = add_saturated(val, nz); + val ^= sign; + val.store_nt(dstp + x); } else if constexpr (std::is_same_v) { - const Vec8s sign = 0x8000; - Vec8s src = Vec8s().load_a(srcp + x); - const Vec8s noise = Vec8s().load(pNW + x); - src ^= sign; - src = add_saturated(src, noise); - src ^= sign; - min(Vec8us(src), d->peak).store_nt(dstp + x); + Vec8s sign{ -0x8000 }; + auto val{ Vec8s().load_a(srcp + x) }; + auto nz{ Vec8s().load(pNW + x) }; + val ^= sign; + val = add_saturated(val, nz); + val ^= sign; + min(Vec8us(val), d->peak).store_nt(dstp + x); } else { - Vec4f src = Vec4f().load_a(srcp + x); - const Vec4f noise = Vec4f().load(pNW + x); - (src + noise).store_nt(dstp + x); + auto val{ Vec4f().load_a(srcp + x) }; + auto nz{ Vec4f().load(pNW + x) }; + (val + nz).store_nt(dstp + x); } } @@ -39,7 +39,7 @@ void updateFrame_sse2(const void * _srcp, void * _dstp, const int width, const i } } -template void updateFrame_sse2(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; -template void updateFrame_sse2(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; -template void updateFrame_sse2(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept; +template void updateFrame_sse2(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; +template void updateFrame_sse2(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; +template void updateFrame_sse2(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept; #endif diff --git a/README.md b/README.md index 5f02541..00e0500 100644 --- a/README.md +++ b/README.md @@ -1,32 +1,28 @@ -Description -=========== - +# AddGrain AddGrain generates film like grain or other effects (like rain) by adding random noise to a video clip. This noise may optionally be horizontally or vertically correlated to cause streaking. Ported from AviSynth plugin http://forum.doom9.org/showthread.php?t=111849 -Usage -===== - - grain.Add(clip clip[, float var=1.0, float uvar=0.0, float hcorr=0.0, float vcorr=0.0, int seed=-1, bint constant=False, int opt=0]) +## Usage + grain.Add(vnode clip[, float var=1.0, float uvar=0.0, float hcorr=0.0, float vcorr=0.0, int seed=-1, bint constant=False, int opt=0]) -* clip: Clip to process. Any planar format with either integer sample type of 8-16 bit depth or float sample type of 32 bit depth is supported. +- clip: Clip to process. Any planar format with either integer sample type of 8-16 bit depth or float sample type of 32 bit depth is supported. -* var, uvar: The variance (strength) of the luma and chroma noise, 0 is disabled. `uvar` does nothing for GRAY and RGB formats. +- var, uvar: The variance (strength) of the luma and chroma noise, 0 is disabled. `uvar` does nothing for GRAY and RGB formats. -* hcorr, vcorr: Horizontal and vertical correlation, which causes a nifty streaking effect. Range 0.0-1.0 +- hcorr, vcorr: Horizontal and vertical correlation, which causes a nifty streaking effect. Range 0.0-1.0. -* seed: Specifies a repeatable grain sequence. Set to at least 0 to use. +- seed: Specifies a repeatable grain sequence. Set to at least 0 to use. -* constant: Specifies a constant grain pattern on every frame. +- constant: Specifies a constant grain pattern on every frame. -* opt: Sets which cpu optimizations to use. - * 0 = auto detect - * 1 = use c - * 2 = use sse2 - * 3 = use avx2 - * 4 = use avx512 +- opt: Sets which cpu optimizations to use. + - 0 = auto detect + - 1 = use c + - 2 = use sse2 + - 3 = use avx2 + - 4 = use avx512 The correlation factors are actually just implemented as exponential smoothing which give a weird side affect that I did not attempt to adjust. But this means that as you increase either corr factor you will have to also increase the stddev (grain amount) in order to get the same visible amount of grain, since it is being smooth out a bit. @@ -35,11 +31,8 @@ Increase both corr factors can somewhat give clumps, or larger grain size. And there is an interesting effect with, say, `grain.Add(var=800, hcorr=0, vcorr=0.9)` or any huge amount of strongly vertical grain. It can make a scene look like it is raining. -Compilation -=========== - +## Compilation ``` meson build -ninja -C build ninja -C build install ```