Skip to content

Commit

Permalink
Update core SIMD algorithm
Browse files Browse the repository at this point in the history
Takes us from 30kns -> 18.9kns
  • Loading branch information
AdamNiederer committed Jan 30, 2018
1 parent 9f3f8c0 commit 6def356
Showing 1 changed file with 13 additions and 12 deletions.
25 changes: 13 additions & 12 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,18 +235,19 @@ pub fn count(haystack: &[u8], needle: u8) -> usize {
naive_count(haystack, needle)
} else {
let mut ret = 0;
let mut i = 0;
let mut acc = u8s(0);
haystack.simd_iter().simd_for_each(u8s(needle.overflowing_add(1).0), |v| {
i += 1;
acc += (PackedEq::eq(&v, &u8s(needle)).be_u8s() & u8s(0x01));
if i == 255 {
ret += acc.scalar_reduce(0, |acc, s| acc + (s as usize));
acc = u8s(0);
i = 0;
}
});
ret + acc.scalar_reduce(0, |acc, s| acc + (s as usize))

for i in 0..haystack.len() / (u8s::WIDTH * 255) {
ret += (&haystack[i * u8s::WIDTH * 255..(i + 1) * u8s::WIDTH * 255])
.simd_iter()
.simd_reduce(u8s(0), u8s(needle.overflowing_add(1).0), |acc, v| {
acc + (PackedEq::eq(&v, &u8s(needle)).be_u8s() & u8s(0x01))
}).scalar_reduce(0, |acc, s| acc + (s as usize));
}
ret + (&haystack[haystack.len() - haystack.len() % (u8s::WIDTH * 255)..])
.simd_iter()
.simd_reduce(u8s(0), u8s(needle.overflowing_add(1).0), |acc, v| {
acc + (PackedEq::eq(&v, &u8s(needle)).be_u8s() & u8s(0x01))
}).scalar_reduce(0, |acc, s| acc + (s as usize))
}
}

Expand Down

0 comments on commit 6def356

Please sign in to comment.