Skip to content

Commit

Permalink
Overhaul and migrate to VS API4
Browse files Browse the repository at this point in the history
  • Loading branch information
HolyWu committed Sep 22, 2021
1 parent bc0f42c commit f621810
Show file tree
Hide file tree
Showing 6 changed files with 221 additions and 231 deletions.
241 changes: 119 additions & 122 deletions AddGrain/AddGrain.cpp

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions AddGrain/AddGrain.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#include <type_traits>
#include <vector>

#include <VapourSynth.h>
#include <VSHelper.h>
#include <VapourSynth4.h>
#include <VSHelper4.h>

#ifdef ADDGRAIN_X86
#include "VCL2/vectorclass.h"
Expand All @@ -17,15 +17,15 @@ static constexpr int MAXP = 2;
static constexpr int OFFSET_FAKEPLANE = 32;

struct AddGrainData final {
VSNodeRef * node;
const VSVideoInfo * vi;
VSNode* node;
const VSVideoInfo* vi;
float var, uvar, hcorr, vcorr;
bool constant;
bool process[3];
int storedFrames, step, peak;
std::vector<uint8_t> pNoiseSeeds;
long idum;
int nStride[MAXP], nHeight[MAXP], nSize[MAXP];
void * pN[MAXP];
void (*updateFrame)(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept;
void* pN[MAXP];
void (*updateFrame)(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept;
};
54 changes: 27 additions & 27 deletions AddGrain/AddGrain_AVX2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,34 @@
#include "AddGrain.h"

template<typename pixel_t, typename noise_t>
void updateFrame_avx2(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs,
const AddGrainData * const VS_RESTRICT d) noexcept {
const pixel_t * srcp = reinterpret_cast<const pixel_t *>(_srcp);
pixel_t * dstp = reinterpret_cast<pixel_t *>(_dstp);
const noise_t * pNW = reinterpret_cast<noise_t *>(d->pN[noisePlane]) + noiseOffs;
void updateFrame_avx2(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs,
const AddGrainData* const VS_RESTRICT d) noexcept {
auto srcp{ reinterpret_cast<const pixel_t*>(_srcp) };
auto dstp{ reinterpret_cast<pixel_t*>(_dstp) };
auto pNW{ reinterpret_cast<noise_t*>(d->pN[noisePlane]) + noiseOffs };

for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x += d->step) {
for (auto y{ 0 }; y < height; y++) {
for (auto x{ 0 }; x < width; x += d->step) {
if constexpr (std::is_same_v<pixel_t, uint8_t>) {
const Vec32c sign = 0x80;
Vec32c src = Vec32c().load_a(srcp + x);
const Vec32c noise = Vec32c().load(pNW + x);
src ^= sign;
src = add_saturated(src, noise);
src ^= sign;
src.store_nt(dstp + x);
Vec32c sign{ -0x80 };
auto val{ Vec32c().load_a(srcp + x) };
auto nz{ Vec32c().load(pNW + x) };
val ^= sign;
val = add_saturated(val, nz);
val ^= sign;
val.store_nt(dstp + x);
} else if constexpr (std::is_same_v<pixel_t, uint16_t>) {
const Vec16s sign = 0x8000;
Vec16s src = Vec16s().load_a(srcp + x);
const Vec16s noise = Vec16s().load(pNW + x);
src ^= sign;
src = add_saturated(src, noise);
src ^= sign;
min(Vec16us(src), d->peak).store_nt(dstp + x);
Vec16s sign{ -0x8000 };
auto val{ Vec16s().load_a(srcp + x) };
auto nz{ Vec16s().load(pNW + x) };
val ^= sign;
val = add_saturated(val, nz);
val ^= sign;
min(Vec16us(val), d->peak).store_nt(dstp + x);
} else {
Vec8f src = Vec8f().load_a(srcp + x);
const Vec8f noise = Vec8f().load(pNW + x);
(src + noise).store_nt(dstp + x);
auto val{ Vec8f().load_a(srcp + x) };
auto nz{ Vec8f().load(pNW + x) };
(val + nz).store_nt(dstp + x);
}
}

Expand All @@ -39,7 +39,7 @@ void updateFrame_avx2(const void * _srcp, void * _dstp, const int width, const i
}
}

template void updateFrame_avx2<uint8_t, int8_t>(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept;
template void updateFrame_avx2<uint16_t, int16_t>(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept;
template void updateFrame_avx2<float, float>(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept;
template void updateFrame_avx2<uint8_t, int8_t>(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept;
template void updateFrame_avx2<uint16_t, int16_t>(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept;
template void updateFrame_avx2<float, float>(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept;
#endif
54 changes: 27 additions & 27 deletions AddGrain/AddGrain_AVX512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,34 @@
#include "AddGrain.h"

template<typename pixel_t, typename noise_t>
void updateFrame_avx512(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs,
const AddGrainData * const VS_RESTRICT d) noexcept {
const pixel_t * srcp = reinterpret_cast<const pixel_t *>(_srcp);
pixel_t * dstp = reinterpret_cast<pixel_t *>(_dstp);
const noise_t * pNW = reinterpret_cast<noise_t *>(d->pN[noisePlane]) + noiseOffs;
void updateFrame_avx512(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs,
const AddGrainData* const VS_RESTRICT d) noexcept {
auto srcp{ reinterpret_cast<const pixel_t*>(_srcp) };
auto dstp{ reinterpret_cast<pixel_t*>(_dstp) };
auto pNW{ reinterpret_cast<noise_t*>(d->pN[noisePlane]) + noiseOffs };

for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x += d->step) {
for (auto y{ 0 }; y < height; y++) {
for (auto x{ 0 }; x < width; x += d->step) {
if constexpr (std::is_same_v<pixel_t, uint8_t>) {
const Vec64c sign = 0x80;
Vec64c src = Vec64c().load_a(srcp + x);
const Vec64c noise = Vec64c().load(pNW + x);
src ^= sign;
src = add_saturated(src, noise);
src ^= sign;
src.store_nt(dstp + x);
Vec64c sign{ -0x80 };
auto val{ Vec64c().load_a(srcp + x) };
auto nz{ Vec64c().load(pNW + x) };
val ^= sign;
val = add_saturated(val, nz);
val ^= sign;
val.store_nt(dstp + x);
} else if constexpr (std::is_same_v<pixel_t, uint16_t>) {
const Vec32s sign = 0x8000;
Vec32s src = Vec32s().load_a(srcp + x);
const Vec32s noise = Vec32s().load(pNW + x);
src ^= sign;
src = add_saturated(src, noise);
src ^= sign;
min(Vec32us(src), d->peak).store_nt(dstp + x);
Vec32s sign{ -0x8000 };
auto val{ Vec32s().load_a(srcp + x) };
auto nz{ Vec32s().load(pNW + x) };
val ^= sign;
val = add_saturated(val, nz);
val ^= sign;
min(Vec32us(val), d->peak).store_nt(dstp + x);
} else {
Vec16f src = Vec16f().load_a(srcp + x);
const Vec16f noise = Vec16f().load(pNW + x);
(src + noise).store_nt(dstp + x);
auto val{ Vec16f().load_a(srcp + x) };
auto nz{ Vec16f().load(pNW + x) };
(val + nz).store_nt(dstp + x);
}
}

Expand All @@ -39,7 +39,7 @@ void updateFrame_avx512(const void * _srcp, void * _dstp, const int width, const
}
}

template void updateFrame_avx512<uint8_t, int8_t>(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept;
template void updateFrame_avx512<uint16_t, int16_t>(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept;
template void updateFrame_avx512<float, float>(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept;
template void updateFrame_avx512<uint8_t, int8_t>(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept;
template void updateFrame_avx512<uint16_t, int16_t>(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept;
template void updateFrame_avx512<float, float>(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept;
#endif
54 changes: 27 additions & 27 deletions AddGrain/AddGrain_SSE2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,34 @@
#include "AddGrain.h"

template<typename pixel_t, typename noise_t>
void updateFrame_sse2(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs,
const AddGrainData * const VS_RESTRICT d) noexcept {
const pixel_t * srcp = reinterpret_cast<const pixel_t *>(_srcp);
pixel_t * dstp = reinterpret_cast<pixel_t *>(_dstp);
const noise_t * pNW = reinterpret_cast<noise_t *>(d->pN[noisePlane]) + noiseOffs;
void updateFrame_sse2(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs,
const AddGrainData* const VS_RESTRICT d) noexcept {
auto srcp{ reinterpret_cast<const pixel_t*>(_srcp) };
auto dstp{ reinterpret_cast<pixel_t*>(_dstp) };
auto pNW{ reinterpret_cast<noise_t*>(d->pN[noisePlane]) + noiseOffs };

for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x += d->step) {
for (auto y{ 0 }; y < height; y++) {
for (auto x{ 0 }; x < width; x += d->step) {
if constexpr (std::is_same_v<pixel_t, uint8_t>) {
const Vec16c sign = 0x80;
Vec16c src = Vec16c().load_a(srcp + x);
const Vec16c noise = Vec16c().load(pNW + x);
src ^= sign;
src = add_saturated(src, noise);
src ^= sign;
src.store_nt(dstp + x);
Vec16c sign{ -0x80 };
auto val{ Vec16c().load_a(srcp + x) };
auto nz{ Vec16c().load(pNW + x) };
val ^= sign;
val = add_saturated(val, nz);
val ^= sign;
val.store_nt(dstp + x);
} else if constexpr (std::is_same_v<pixel_t, uint16_t>) {
const Vec8s sign = 0x8000;
Vec8s src = Vec8s().load_a(srcp + x);
const Vec8s noise = Vec8s().load(pNW + x);
src ^= sign;
src = add_saturated(src, noise);
src ^= sign;
min(Vec8us(src), d->peak).store_nt(dstp + x);
Vec8s sign{ -0x8000 };
auto val{ Vec8s().load_a(srcp + x) };
auto nz{ Vec8s().load(pNW + x) };
val ^= sign;
val = add_saturated(val, nz);
val ^= sign;
min(Vec8us(val), d->peak).store_nt(dstp + x);
} else {
Vec4f src = Vec4f().load_a(srcp + x);
const Vec4f noise = Vec4f().load(pNW + x);
(src + noise).store_nt(dstp + x);
auto val{ Vec4f().load_a(srcp + x) };
auto nz{ Vec4f().load(pNW + x) };
(val + nz).store_nt(dstp + x);
}
}

Expand All @@ -39,7 +39,7 @@ void updateFrame_sse2(const void * _srcp, void * _dstp, const int width, const i
}
}

template void updateFrame_sse2<uint8_t, int8_t>(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept;
template void updateFrame_sse2<uint16_t, int16_t>(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept;
template void updateFrame_sse2<float, float>(const void * _srcp, void * _dstp, const int width, const int height, const int stride, const int noisePlane, const int noiseOffs, const AddGrainData * const VS_RESTRICT d) noexcept;
template void updateFrame_sse2<uint8_t, int8_t>(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept;
template void updateFrame_sse2<uint16_t, int16_t>(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept;
template void updateFrame_sse2<float, float>(const void* _srcp, void* _dstp, const int width, const int height, const ptrdiff_t stride, const int noisePlane, const int noiseOffs, const AddGrainData* const VS_RESTRICT d) noexcept;
#endif
37 changes: 15 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,32 +1,28 @@
Description
===========

# AddGrain
AddGrain generates film like grain or other effects (like rain) by adding random noise to a video clip. This noise may optionally be horizontally or vertically correlated to cause streaking.

Ported from AviSynth plugin http://forum.doom9.org/showthread.php?t=111849


Usage
=====

grain.Add(clip clip[, float var=1.0, float uvar=0.0, float hcorr=0.0, float vcorr=0.0, int seed=-1, bint constant=False, int opt=0])
## Usage
grain.Add(vnode clip[, float var=1.0, float uvar=0.0, float hcorr=0.0, float vcorr=0.0, int seed=-1, bint constant=False, int opt=0])

* clip: Clip to process. Any planar format with either integer sample type of 8-16 bit depth or float sample type of 32 bit depth is supported.
- clip: Clip to process. Any planar format with either integer sample type of 8-16 bit depth or float sample type of 32 bit depth is supported.

* var, uvar: The variance (strength) of the luma and chroma noise, 0 is disabled. `uvar` does nothing for GRAY and RGB formats.
- var, uvar: The variance (strength) of the luma and chroma noise, 0 is disabled. `uvar` does nothing for GRAY and RGB formats.

* hcorr, vcorr: Horizontal and vertical correlation, which causes a nifty streaking effect. Range 0.0-1.0
- hcorr, vcorr: Horizontal and vertical correlation, which causes a nifty streaking effect. Range 0.0-1.0.

* seed: Specifies a repeatable grain sequence. Set to at least 0 to use.
- seed: Specifies a repeatable grain sequence. Set to at least 0 to use.

* constant: Specifies a constant grain pattern on every frame.
- constant: Specifies a constant grain pattern on every frame.

* opt: Sets which cpu optimizations to use.
* 0 = auto detect
* 1 = use c
* 2 = use sse2
* 3 = use avx2
* 4 = use avx512
- opt: Sets which cpu optimizations to use.
- 0 = auto detect
- 1 = use c
- 2 = use sse2
- 3 = use avx2
- 4 = use avx512

The correlation factors are actually just implemented as exponential smoothing which give a weird side affect that I did not attempt to adjust. But this means that as you increase either corr factor you will have to also increase the stddev (grain amount) in order to get the same visible amount of grain, since it is being smooth out a bit.

Expand All @@ -35,11 +31,8 @@ Increase both corr factors can somewhat give clumps, or larger grain size.
And there is an interesting effect with, say, `grain.Add(var=800, hcorr=0, vcorr=0.9)` or any huge amount of strongly vertical grain. It can make a scene look like it is raining.


Compilation
===========

## Compilation
```
meson build
ninja -C build
ninja -C build install
```

0 comments on commit f621810

Please sign in to comment.