Skip to content

Commit

Permalink
Fix wrong code due to vec_merge + pcmp to blendvb splitter.
Browse files Browse the repository at this point in the history
gcc/ChangeLog:

	PR target/112443
	* config/i386/sse.md (*avx2_pcmp<mode>3_4): Fix swap condition
	from LT to GT since there's not in the pattern.
	(*avx2_pcmp<mode>3_5): Ditto.

gcc/testsuite/ChangeLog:

	* g++.target/i386/pr112443.C: New test.

(cherry picked from commit 9a0cc04)
  • Loading branch information
algebra84 committed Nov 10, 2023
1 parent cc9d477 commit 1808ebf
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 2 deletions.
4 changes: 2 additions & 2 deletions gcc/config/i386/sse.md
Original file line number Diff line number Diff line change
Expand Up @@ -16358,7 +16358,7 @@
(match_dup 4))]
UNSPEC_BLENDV))]
{
if (INTVAL (operands[5]) == 1)
if (INTVAL (operands[5]) == 5)
std::swap (operands[1], operands[2]);
operands[3] = gen_lowpart (<MODE>mode, operands[3]);
})
Expand Down Expand Up @@ -16388,7 +16388,7 @@
(match_dup 4))]
UNSPEC_BLENDV))]
{
if (INTVAL (operands[5]) == 1)
if (INTVAL (operands[5]) == 5)
std::swap (operands[1], operands[2]);
})

Expand Down
108 changes: 108 additions & 0 deletions gcc/testsuite/g++.target/i386/pr112443.C
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/* { dg-do run } */
/* { dg-require-effective-target avx512bw } */
/* { dg-require-effective-target avx512vl } */
/* { dg-options "-O2 -std=c++17 -mavx512bw -mavx512vl" } */

#include <cstdint>
#include <x86intrin.h>
#include <functional>
#include <ostream>

#define AVX512BW
#define AVX512VL

#include "avx512f-helper.h"

struct TensorIteratorBase{
char* in;
char* out;

void for_each(std::function<void(char*, char*, int64_t size)> loop){
loop(out, in, 32);
}
};

class Vectorized {
protected:
__m256i values;

static inline __m256i invert(const __m256i& v) {
const auto ones = _mm256_set1_epi64x(-1);
return _mm256_xor_si256(ones, v);
}
public:
operator __m256i() const {
return values;
}

static constexpr int size() {
return 32;
}

Vectorized() {}
Vectorized(__m256i v) : values(v) {}
Vectorized(uint8_t v) { values = _mm256_set1_epi8(v); }
static Vectorized blendv(const Vectorized& a, const Vectorized& b,
const Vectorized& mask) {
return _mm256_blendv_epi8(a, b, mask);
}
static Vectorized loadu(const void* ptr) {
return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(ptr));
}
void store(void* ptr) const {
_mm256_storeu_si256(reinterpret_cast<__m256i*>(ptr), values);
}

Vectorized operator<(const Vectorized& other) const {
__m256i max = _mm256_max_epu8(values, other);
return invert(_mm256_cmpeq_epi8(max, values));
}
Vectorized operator-(const Vectorized& b) {
return _mm256_sub_epi8(values, b);
}
};

std::ostream& operator<<(std::ostream& stream, const Vectorized& vec) {
uint8_t buf[Vectorized::size()];
vec.store(buf);
stream << "vec[";
for (int i = 0; i != Vectorized::size(); i++) {
if (i != 0)
stream << ", ";
stream << buf[i]*1;
}
stream << "]";
return stream;
}

void run(TensorIteratorBase iter){
Vectorized zero_vec(0);
Vectorized one_vec(1);

iter.for_each([=](char* out, char* in, int64_t size) {
for (int64_t i = 0; i <= size - Vectorized::size(); i += Vectorized::size()) {
auto self_vec = Vectorized::loadu(in + i);
auto left = Vectorized::blendv(zero_vec, one_vec, zero_vec < self_vec);
auto right = Vectorized::blendv(zero_vec, one_vec, self_vec < zero_vec);
auto outv = left - right;
outv.store(out + i);
}
});
}

void
test_256 (){
char in[32];
char out[32];
for(auto& x: in) x = 1;
run(TensorIteratorBase{in, out});
Vectorized::loadu (out);
for (int i = 0; i != 32; i++)
if (out[i] != 1)
__builtin_abort ();
}

void
test_128 ()
{
}

0 comments on commit 1808ebf

Please sign in to comment.