From ac290150542f0ffea8f89c340e25de561e628b27 Mon Sep 17 00:00:00 2001 From: Laiho Date: Sun, 28 Jul 2024 20:16:05 +0300 Subject: [PATCH] unroll contains --- src/contains.rs | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/contains.rs b/src/contains.rs index 58355a8..e1a7eff 100644 --- a/src/contains.rs +++ b/src/contains.rs @@ -1,4 +1,5 @@ use crate::SIMD_LEN; +use crate::UNROLL_FACTOR; use std::simd::cmp::SimdPartialEq; use std::simd::Mask; use std::simd::{Simd, SimdElement}; @@ -19,16 +20,27 @@ where { fn contains_simd(&self, needle: &T) -> bool { let arr = self.as_slice(); - let (prefix, aligned_chunks, suffix) = arr.as_simd::(); + let (prefix, simd_data, suffix) = arr.as_simd::(); // Prefix if prefix.contains(&needle) { return true; } // SIMD let simd_needle = Simd::splat(*needle); - for chunk in aligned_chunks { - let mask = chunk.simd_eq(simd_needle).to_bitmask(); - if mask != 0 { + // Unrolled loops + let mut chunks_iter = simd_data.chunks_exact(UNROLL_FACTOR); + for chunks in chunks_iter.by_ref() { + let mut mask = Mask::default(); + for chunk in chunks { + mask |= chunk.simd_eq(simd_needle); + } + if mask.any() { + return true; + } + } + for chunk in chunks_iter.remainder() { + let mask = chunk.simd_eq(simd_needle); + if mask.any() { return true; } } @@ -57,7 +69,7 @@ mod tests { Simd: SimdPartialEq>, Standard: Distribution, { - for len in 0..100 { + for len in 0..500 { for _ in 0..5 { let mut v: Vec = vec![T::default(); len]; let mut rng = rand::thread_rng();