From 91edfbde63f3cfd04c90f53631c67b10049be9c9 Mon Sep 17 00:00:00 2001 From: zhenfei Date: Thu, 26 Sep 2024 09:56:44 -0400 Subject: [PATCH 1/7] wip --- Cargo.toml | 1 - arith/Cargo.toml | 2 - arith/src/extension_field.rs | 4 - arith/src/extension_field/gf2_127.rs | 9 - arith/src/extension_field/gf2_127/avx.rs | 367 -------------- arith/src/extension_field/gf2_127/neon.rs | 0 arith/src/extension_field/gf2_128.rs | 13 +- .../gf2_128/{avx.rs => avx512.rs} | 0 arith/src/extension_field/gf2_128x4/avx256.rs | 466 ------------------ arith/src/extension_field/gf2_128x8.rs | 15 +- .../gf2_128x8/{avx.rs => avx512.rs} | 0 arith/src/field/m31.rs | 7 +- .../field/m31/{m31_avx.rs => m31_avx512.rs} | 0 arith/src/field/m31/m31x16.rs | 14 +- 14 files changed, 26 insertions(+), 872 deletions(-) delete mode 100644 arith/src/extension_field/gf2_127.rs delete mode 100644 arith/src/extension_field/gf2_127/avx.rs delete mode 100644 arith/src/extension_field/gf2_127/neon.rs rename arith/src/extension_field/gf2_128/{avx.rs => avx512.rs} (100%) delete mode 100644 arith/src/extension_field/gf2_128x4/avx256.rs rename arith/src/extension_field/gf2_128x8/{avx.rs => avx512.rs} (100%) rename arith/src/field/m31/{m31_avx.rs => m31_avx512.rs} (100%) diff --git a/Cargo.toml b/Cargo.toml index 4e78d72d..b052b802 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,7 +43,6 @@ path = "src/utils.rs" default = [] # default = [ "grinding" ] grinding = [] -avx256 = ["arith/avx256"] [workspace] members = ["arith", "bi-kzg"] diff --git a/arith/Cargo.toml b/arith/Cargo.toml index d05aaed5..f31a0bb0 100644 --- a/arith/Cargo.toml +++ b/arith/Cargo.toml @@ -27,5 +27,3 @@ harness = false name = "ext_field" harness = false -[features] -avx256 = [] diff --git a/arith/src/extension_field.rs b/arith/src/extension_field.rs index e0f3547b..36a9c30c 100644 --- a/arith/src/extension_field.rs +++ b/arith/src/extension_field.rs @@ -1,16 +1,12 @@ mod fr_ext; -// mod gf2_127; mod gf2_128; mod gf2_128x8; mod m31_ext; mod m31_ext3x16; use crate::{Field, FieldSerde}; -// pub use gf2_127::*; pub use gf2_128::*; pub use gf2_128x8::GF2_128x8; -#[cfg(target_arch = "x86_64")] -pub use gf2_128x8::GF2_128x8_256; pub use m31_ext::M31Ext3; pub use m31_ext3x16::M31Ext3x16; diff --git a/arith/src/extension_field/gf2_127.rs b/arith/src/extension_field/gf2_127.rs deleted file mode 100644 index 306fa594..00000000 --- a/arith/src/extension_field/gf2_127.rs +++ /dev/null @@ -1,9 +0,0 @@ -#[cfg(target_arch = "aarch64")] -pub(crate) mod neon; -#[cfg(target_arch = "aarch64")] -pub type GF2_127 = neon::NeonGF2_127; - -#[cfg(target_arch = "x86_64")] -mod avx; -#[cfg(target_arch = "x86_64")] -pub type GF2_127 = avx::AVX512GF2_127; diff --git a/arith/src/extension_field/gf2_127/avx.rs b/arith/src/extension_field/gf2_127/avx.rs deleted file mode 100644 index 75480457..00000000 --- a/arith/src/extension_field/gf2_127/avx.rs +++ /dev/null @@ -1,367 +0,0 @@ -use std::iter::{Product, Sum}; -use std::{ - arch::x86_64::*, - mem::transmute, - ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}, -}; - -use crate::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult, GF2}; - -#[derive(Debug, Clone, Copy)] -pub struct AVX512GF2_127 { - pub v: __m128i, -} - -field_common!(AVX512GF2_127); - -impl FieldSerde for AVX512GF2_127 { - const SERIALIZED_SIZE: usize = 16; - - #[inline(always)] - fn serialize_into(&self, mut writer: W) -> FieldSerdeResult<()> { - unsafe { writer.write_all(transmute::<__m128i, [u8; 16]>(self.v).as_ref())? }; - Ok(()) - } - - #[inline(always)] - fn deserialize_from(mut reader: R) -> FieldSerdeResult { - let mut u = [0u8; Self::SERIALIZED_SIZE]; - reader.read_exact(&mut u)?; - u[Self::SERIALIZED_SIZE - 1] &= 0x7F; // Should we do a modular operation here? - - unsafe { - Ok(AVX512GF2_127 { - v: transmute::<[u8; Self::SERIALIZED_SIZE], __m128i>(u), - }) - } - } - - #[inline(always)] - fn try_deserialize_from_ecc_format(mut reader: R) -> FieldSerdeResult { - let mut u = [0u8; 32]; - reader.read_exact(&mut u)?; - assert!(u[15] <= 0x7F); // and ignoring 16 - 31 - Ok(unsafe { - AVX512GF2_127 { - v: transmute::<[u8; 16], __m128i>(u[..16].try_into().unwrap()), - } - }) - } -} - -// mod x^127 + x + 1 -impl Field for AVX512GF2_127 { - const NAME: &'static str = "Galios Field 2^127"; - - const SIZE: usize = 128 / 8; - - const FIELD_SIZE: usize = 127; // in bits - - const ZERO: Self = AVX512GF2_127 { - v: unsafe { std::mem::zeroed() }, - }; - - const ONE: Self = AVX512GF2_127 { - v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([1, 0, 0, 0]) }, - }; - - const INV_2: Self = AVX512GF2_127 { - v: unsafe { std::mem::zeroed() }, - }; // should not be used - - #[inline(always)] - fn zero() -> Self { - AVX512GF2_127 { - v: unsafe { std::mem::zeroed() }, - } - } - - #[inline(always)] - fn one() -> Self { - AVX512GF2_127 { - v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([1, 0, 0, 0]) }, - } - } - - #[inline(always)] - fn random_unsafe(mut rng: impl rand::RngCore) -> Self { - let mut u = [0u8; 16]; - rng.fill_bytes(&mut u); - u[15] &= 0x7F; - unsafe { - AVX512GF2_127 { - v: *(u.as_ptr() as *const __m128i), - } - } - } - - #[inline(always)] - fn random_bool(mut rng: impl rand::RngCore) -> Self { - AVX512GF2_127 { - v: unsafe { std::mem::transmute::<[u32; 4], __m128i>([rng.next_u32() % 2, 0, 0, 0]) }, - } - } - - #[inline(always)] - fn is_zero(&self) -> bool { - unsafe { std::mem::transmute::<__m128i, [u8; 16]>(self.v) == [0; 16] } - } - - #[inline(always)] - fn exp(&self, exponent: u128) -> Self { - let mut e = exponent; - let mut res = Self::one(); - let mut t = *self; - while e > 0 { - if e & 1 == 1 { - res *= t; - } - t = t * t; - e >>= 1; - } - res - } - - #[inline(always)] - fn inv(&self) -> Option { - if self.is_zero() { - return None; - } - let p_m2 = (1u128 << 127) - 2; - Some(Self::exp(self, p_m2)) - } - - #[inline(always)] - fn square(&self) -> Self { - self * self - } - - #[inline(always)] - fn as_u32_unchecked(&self) -> u32 { - unimplemented!("u32 for GF127 doesn't make sense") - } - - #[inline(always)] - fn from_uniform_bytes(bytes: &[u8; 32]) -> Self { - let mut bytes = bytes.clone(); - bytes[15] &= 0x7F; - - unsafe { - AVX512GF2_127 { - v: transmute::<[u8; 16], __m128i>(bytes[..16].try_into().unwrap()), - } - } - } -} - -impl ExtensionField for AVX512GF2_127 { - const DEGREE: usize = 127; - - const W: u32 = 0x87; - - const X: Self = AVX512GF2_127 { - v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([2, 0, 0, 0]) }, - }; - - type BaseField = GF2; - - #[inline(always)] - fn mul_by_base_field(&self, base: &Self::BaseField) -> Self { - if base.v == 0 { - Self::zero() - } else { - *self - } - } - - #[inline(always)] - fn add_by_base_field(&self, base: &Self::BaseField) -> Self { - let mut res = *self; - res.v = unsafe { _mm_xor_si128(res.v, _mm_set_epi64x(0, base.v as i64)) }; - res - } - - /// - #[inline] - fn mul_by_x(&self) -> Self { - unsafe { - // Shift left by 1 bit - let shifted = _mm_slli_epi64(self.v, 1); - - // Get the most significant bit and move it - let msb = _mm_srli_epi64(self.v, 63); - let msb_moved = _mm_slli_si128(msb, 8); - - // Combine the shifted value with the moved msb - let shifted_consolidated = _mm_or_si128(shifted, msb_moved); - - // Create the reduction value (0b11) and the comparison value (1) - let reduction = { - let multiplier = _mm_set_epi64x(0, 0b11); - let one = _mm_set_epi64x(0, 1); - - // Check if the MSB was 1 and create a mask - let mask = _mm_cmpeq_epi64( - _mm_srli_si128(_mm_srli_epi64(shifted, 63), 8), - one); - - _mm_and_si128(mask, multiplier) - }; - - // Apply the reduction conditionally - let res = _mm_xor_si128(shifted_consolidated, reduction); - - Self { v: res } - } - } -} - -impl From for AVX512GF2_127 { - #[inline(always)] - fn from(v: GF2) -> Self { - AVX512GF2_127 { - v: unsafe { _mm_set_epi64x(0, v.v as i64) }, - } - } -} - -const X0TO126_MASK: __m128i = unsafe { transmute::<[u8; 16], __m128i>( - [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F])}; -const X127_MASK: __m128i = unsafe { transmute::<[u8; 16], __m128i>( - [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80])}; -const X127_REMINDER: __m128i = unsafe { transmute::<[u8; 16], __m128i>( - [0b11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80])}; - - -#[inline(always)] -unsafe fn mm_bitshift_left(x: __m128i) -> __m128i -{ - let mut carry = _mm_bslli_si128(x, 8); - carry = _mm_srli_epi64(carry, 64 - count); - let x = _mm_slli_epi64(x, count); - _mm_or_si128(x, carry) -} - - -#[inline] -unsafe fn gfmul(a: __m128i, b: __m128i) -> __m128i { - let xmm_mask = _mm_setr_epi32((0xFFffffff_u32) as i32, 0x0, 0x0, 0x0); - - // a = a0|a1, b = b0|b1 - - let mut tmp3 = _mm_clmulepi64_si128(a, b, 0x00); // tmp3 = a0 * b0 - let mut tmp6 = _mm_clmulepi64_si128(a, b, 0x11); // tmp6 = a1 * b1 - - // 78 = 0b0100_1110 - let mut tmp4 = _mm_shuffle_epi32(a, 78); // tmp4 = a1|a0 - let mut tmp5 = _mm_shuffle_epi32(b, 78); // tmp5 = b1|b0 - tmp4 = _mm_xor_si128(tmp4, a); // tmp4 = (a0 + a1) | (a0 + a1) - tmp5 = _mm_xor_si128(tmp5, b); // tmp5 = (b0 + b1) | (b0 + b1) - - tmp4 = _mm_clmulepi64_si128(tmp4, tmp5, 0x00); // tmp4 = (a0 + a1) * (b0 + b1) - tmp4 = _mm_xor_si128(tmp4, tmp3); // tmp4 = (a0 + a1) * (b0 + b1) - a0 * b0 - tmp4 = _mm_xor_si128(tmp4, tmp6); // tmp4 = (a0 + a1) * (b0 + b1) - a0 * b0 - a1 * b1 = a0 * b1 + a1 * b0 - - // tmp4 = e1 | e0 - tmp5 = _mm_slli_si128(tmp4, 8); // tmp5 = e0 | 00 - tmp4 = _mm_srli_si128(tmp4, 8); // tmp4 = 00 | e1 - tmp3 = _mm_xor_si128(tmp3, tmp5); // the lower 128 bits, deg 0 - 127 - tmp6 = _mm_xor_si128(tmp6, tmp4); // the higher 128 bits, deg 128 - 252, the 124 least signicicant bits are non-zero - - // x^0 - x^126 - let x0to126 = _mm_and_si128(tmp3, X0TO126_MASK); - - // x^127 - tmp4 = _mm_and_si128(tmp3, X127_MASK); - tmp4 = _mm_cmpeq_epi64(tmp4, X127_MASK); - tmp4 = _mm_srli_si128(tmp4, 15); - let x127 = _mm_and_si128(tmp4, X127_REMINDER); - - // x^128 - x^252 - let x128to252 = - _mm_and_si128( - mm_bitshift_left::<2>(tmp6), - mm_bitshift_left::<1>(tmp6), - ); - - _mm_and_si128(_mm_and_si128(x0to126, x127), x128to252) - - // let mut tmp7 = _mm_srli_epi32(tmp6, 31); - // let mut tmp8 = _mm_srli_epi32(tmp6, 30); - // let tmp9 = _mm_srli_epi32(tmp6, 25); - - // tmp7 = _mm_xor_si128(tmp7, tmp8); - // tmp7 = _mm_xor_si128(tmp7, tmp9); - - // tmp8 = _mm_shuffle_epi32(tmp7, 147); - // tmp7 = _mm_and_si128(xmm_mask, tmp8); - // tmp8 = _mm_andnot_si128(xmm_mask, tmp8); - - // tmp3 = _mm_xor_si128(tmp3, tmp8); - // tmp6 = _mm_xor_si128(tmp6, tmp7); - - // let tmp10 = _mm_slli_epi32(tmp6, 1); - // tmp3 = _mm_xor_si128(tmp3, tmp10); - - // let tmp11 = _mm_slli_epi32(tmp6, 2); - // tmp3 = _mm_xor_si128(tmp3, tmp11); - - // let tmp12 = _mm_slli_epi32(tmp6, 7); - // tmp3 = _mm_xor_si128(tmp3, tmp12); - - // _mm_xor_si128(tmp3, tmp6) - -} - -impl Default for AVX512GF2_127 { - #[inline(always)] - fn default() -> Self { - Self::zero() - } -} - -impl PartialEq for AVX512GF2_127 { - #[inline(always)] - fn eq(&self, other: &Self) -> bool { - unsafe { _mm_test_all_ones(_mm_cmpeq_epi8(self.v, other.v)) == 1 } - } -} - -impl Neg for AVX512GF2_127 { - type Output = Self; - - #[inline(always)] - fn neg(self) -> Self { - self - } -} - -impl From for AVX512GF2_127 { - #[inline(always)] - fn from(v: u32) -> Self { - AVX512GF2_127 { - v: unsafe { std::mem::transmute::<[u32; 4], __m128i>([v, 0, 0, 0]) }, - } - } -} - -#[inline(always)] -fn add_internal(a: &AVX512GF2_127, b: &AVX512GF2_127) -> AVX512GF2_127 { - AVX512GF2_127 { - v: unsafe { _mm_xor_si128(a.v, b.v) }, - } -} - -#[inline(always)] -fn sub_internal(a: &AVX512GF2_127, b: &AVX512GF2_127) -> AVX512GF2_127 { - AVX512GF2_127 { - v: unsafe { _mm_xor_si128(a.v, b.v) }, - } -} - -#[inline(always)] -fn mul_internal(a: &AVX512GF2_127, b: &AVX512GF2_127) -> AVX512GF2_127 { - AVX512GF2_127 { - v: unsafe { gfmul(a.v, b.v) }, - } -} diff --git a/arith/src/extension_field/gf2_127/neon.rs b/arith/src/extension_field/gf2_127/neon.rs deleted file mode 100644 index e69de29b..00000000 diff --git a/arith/src/extension_field/gf2_128.rs b/arith/src/extension_field/gf2_128.rs index e5cb17bb..8ba6fc03 100644 --- a/arith/src/extension_field/gf2_128.rs +++ b/arith/src/extension_field/gf2_128.rs @@ -3,7 +3,12 @@ pub(crate) mod neon; #[cfg(target_arch = "aarch64")] pub type GF2_128 = neon::NeonGF2_128; -#[cfg(target_arch = "x86_64")] -mod avx; -#[cfg(target_arch = "x86_64")] -pub type GF2_128 = avx::AVX512GF2_128; +#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] +mod avx512; +#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] +pub type GF2_128 = avx512::AVX512GF2_128; + +#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] +mod avx256; +#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] +pub type GF2_128 = avx256::AVX512GF2_128; diff --git a/arith/src/extension_field/gf2_128/avx.rs b/arith/src/extension_field/gf2_128/avx512.rs similarity index 100% rename from arith/src/extension_field/gf2_128/avx.rs rename to arith/src/extension_field/gf2_128/avx512.rs diff --git a/arith/src/extension_field/gf2_128x4/avx256.rs b/arith/src/extension_field/gf2_128x4/avx256.rs deleted file mode 100644 index 1783f708..00000000 --- a/arith/src/extension_field/gf2_128x4/avx256.rs +++ /dev/null @@ -1,466 +0,0 @@ -use crate::field_common; - -use crate::{Field, FieldSerde, FieldSerdeResult, SimdField, GF2_128}; -use std::fmt::Debug; -use std::{ - arch::x86_64::*, - iter::{Product, Sum}, - mem::transmute, - ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}, -}; - -#[derive(Clone, Copy)] -pub struct AVX256GF2_128x4 { - data: [__m256i; 2]; -} - -field_common!(AVX256GF2_128x4); - -impl AVX256GF2_128x4 { - #[inline(always)] - pub(crate) fn pack_full(data: __m128i) -> [__m256i; 2] { - unsafe { [_mm256_broadcast_i32x4(data), _mm256_broadcast_i32x4(data)] } - } -} - -impl FieldSerde for AVX256GF2_128x4 { - const SERIALIZED_SIZE: usize = 512 / 8; - - #[inline(always)] - fn serialize_into(&self, mut writer: W) -> FieldSerdeResult<()> { - unsafe { - let mut data = [0u8; 64]; - _mm256_storeu_si256(data.as_mut_ptr() as *mut i32, self.data[0]); - _mm256_storeu_si256(data.as_mut_ptr().add(32) as *mut i32, self.data[1]); - writer.write_all(&data)?; - } - Ok(()) - } - #[inline(always)] - fn deserialize_from(mut reader: R) -> FieldSerdeResult { - let mut data = [0u8; Self::SERIALIZED_SIZE]; - reader.read_exact(&mut data)?; - unsafe { - Ok(Self { - data: [_mm256_loadu_si256(data.as_ptr() as *const i32), _mm256_loadu_si256(data.as_ptr().add(8) as *const i32)], - }) - } - } - - #[inline(always)] - fn try_deserialize_from_ecc_format(mut reader: R) -> FieldSerdeResult { - let mut buf = [0u8; 32]; - reader.read_exact(&mut buf)?; - let data: __m128i = unsafe { _mm_loadu_si128(buf.as_ptr() as *const __m128i) }; - Ok(Self { - data: Self::pack_full(data), - }) - } -} - -const PACKED_0: __m256i = unsafe { transmute([0; 4]) }; - -const PACKED_INV_2: __m256i = unsafe { - transmute([ - 67_u64, - (1_u64) << 63, - 67_u64, - (1_u64) << 63, - ]) -}; - -// p(x) = x^128 + x^7 + x^2 + x + 1 -impl Field for AVX256GF2_128x4 { - const NAME: &'static str = "AVX256 Galios Field 2^128"; - - // size in bytes - const SIZE: usize = 512 / 8; - - const ZERO: Self = Self { data: PACKED_0 }; - - const INV_2: Self = Self { data: PACKED_INV_2 }; - - const FIELD_SIZE: usize = 128; - - #[inline(always)] - fn zero() -> Self { - unsafe { - let zero = _mm256_setzero_si256(); - Self { data: [zero, zero] } - } - } - - #[inline(always)] - fn is_zero(&self) -> bool { - unsafe { - let zero = _mm256_setzero_si256(); - let cmp = _mm256_cmpeq_epi64_mask(self.data[0], zero) & _mm256_cmpeq_epi64_mask(self.data[1], zero); - cmp == 0xFF // All 8 64-bit integers are equal (zero) - } - } - - #[inline(always)] - fn one() -> Self { - unsafe { - let one = _mm256_set_epi64(0, 1, 0, 1); - Self { data: [one, one] } - } - } - - #[inline(always)] - fn random_unsafe(mut rng: impl rand::RngCore) -> Self { - let data = unsafe { - _mm256_set_epi64( - rng.next_u64() as i64, - rng.next_u64() as i64, - rng.next_u64() as i64, - rng.next_u64() as i64, - ) - }; - Self { data } - } - - #[inline(always)] - fn random_bool(mut rng: impl rand::RngCore) -> Self { - let data = unsafe { - _mm256_set_epi64( - 0, - (rng.next_u64() % 2) as i64, - 0, - (rng.next_u64() % 2) as i64, - ) - }; - Self { data } - } - - #[inline(always)] - fn exp(&self, exponent: u128) -> Self { - let mut e = exponent; - let mut res = Self::one(); - let mut t = *self; - while e != 0 { - let b = e & 1; - if b == 1 { - res *= t; - } - t = t * t; - e >>= 1; - } - res - } - - #[inline(always)] - fn inv(&self) -> Option { - if self.is_zero() { - return None; - } - let p_m2 = !(0u128) - 1; - Some(Self::exp(self, p_m2)) - } - - #[inline(always)] - fn as_u32_unchecked(&self) -> u32 { - unimplemented!("self is a vector, cannot convert to u32") - } - - #[inline(always)] - fn from_uniform_bytes(_bytes: &[u8; 32]) -> Self { - todo!() - } - - #[inline(always)] - fn square(&self) -> Self { - *self * *self - } - - #[inline(always)] - fn double(&self) -> Self { - Self::ZERO - } - - #[inline(always)] - fn mul_by_2(&self) -> Self { - Self::ZERO - } - - #[inline(always)] - fn mul_by_3(&self) -> Self { - *self - } - - #[inline(always)] - fn mul_by_5(&self) -> Self { - *self - } - - #[inline(always)] - fn mul_by_6(&self) -> Self { - Self::ZERO - } -} -/* -credit to intel for the original implementation -void gfmul(__m128i a, __m128i b, __m128i *res) { - __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; - __m128i tmp7, tmp8, tmp9, tmp10, tmp11, tmp12; - __m128i XMMMASK = _mm_setr_epi32(0xffffffff, 0x0, 0x0, 0x0); - - // a = a0|a1, b = b0|b1 - - tmp3 = _mm_clmulepi64_si128(a, b, 0x00); // tmp3 = a0 * b0 - tmp6 = _mm_clmulepi64_si128(a, b, 0x11); // tmp6 = a1 * b1 - - tmp4 = _mm_shuffle_epi32(a, 78); // tmp4 = a1|a0 - tmp5 = _mm_shuffle_epi32(b, 78); // tmp5 = b1|b0 - tmp4 = _mm_xor_si128(tmp4, a); // tmp4 = (a0 + a1) | (a0 + a1) - tmp5 = _mm_xor_si128(tmp5, b); // tmp5 = (b0 + b1) | (b0 + b1) - - tmp4 = _mm_clmulepi64_si128(tmp4, tmp5, 0x00); // tmp4 = (a0 + a1) * (b0 + b1) - tmp4 = _mm_xor_si128(tmp4, tmp3); // tmp4 = (a0 + a1) * (b0 + b1) - a0 * b0 - tmp4 = _mm_xor_si128(tmp4, tmp6); // tmp4 = (a0 + a1) * (b0 + b1) - a0 * b0 - a1 * b1 = a0 * b1 + a1 * b0 - - tmp5 = _mm_slli_si128(tmp4, 8); - tmp4 = _mm_srli_si128(tmp4, 8); - tmp3 = _mm_xor_si128(tmp3, tmp5); - tmp6 = _mm_xor_si128(tmp6, tmp4); - - tmp7 = _mm_srli_epi32(tmp6, 31); - tmp8 = _mm_srli_epi32(tmp6, 30); - tmp9 = _mm_srli_epi32(tmp6, 25); - - tmp7 = _mm_xor_si128(tmp7, tmp8); - tmp7 = _mm_xor_si128(tmp7, tmp9); - - tmp8 = _mm_shuffle_epi32(tmp7, 147); - tmp7 = _mm_and_si128(XMMMASK, tmp8); - tmp8 = _mm_andnot_si128(XMMMASK, tmp8); - - tmp3 = _mm_xor_si128(tmp3, tmp8); - tmp6 = _mm_xor_si128(tmp6, tmp7); - - tmp10 = _mm_slli_epi32(tmp6, 1); - tmp3 = _mm_xor_si128(tmp3, tmp10); - - tmp11 = _mm_slli_epi32(tmp6, 2); - tmp3 = _mm_xor_si128(tmp3, tmp11); - - tmp12 = _mm_slli_epi32(tmp6, 7); - tmp3 = _mm_xor_si128(tmp3, tmp12); - - *res = _mm_xor_si128(tmp3, tmp6); -} - -*/ - -/* -AVX 512 version -void gfmul_avx512(__m512i a, __m512i b, __m512i *res) { - __m512i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; - __m512i tmp7, tmp8, tmp9, tmp10, tmp11, tmp12; - __m512i XMMMASK = _mm512_set_epi32( - 0, 0, 0, 0xffffffff, - 0, 0, 0, 0xffffffff, - 0, 0, 0, 0xffffffff, - 0, 0, 0, 0xffffffff - ); - - tmp3 = _mm512_clmulepi64_epi128(a, b, 0x00); - tmp6 = _mm512_clmulepi64_epi128(a, b, 0x11); - - tmp4 = _mm512_shuffle_epi32(a, _MM_PERM_BADC); - tmp5 = _mm512_shuffle_epi32(b, _MM_PERM_BADC); - tmp4 = _mm512_xor_si512(tmp4, a); - tmp5 = _mm512_xor_si512(tmp5, b); - - tmp4 = _mm512_clmulepi64_epi128(tmp4, tmp5, 0x00); - tmp4 = _mm512_xor_si512(tmp4, tmp3); - tmp4 = _mm512_xor_si512(tmp4, tmp6); - - tmp5 = _mm512_bslli_epi128(tmp4, 8); - tmp4 = _mm512_bsrli_epi128(tmp4, 8); - tmp3 = _mm512_xor_si512(tmp3, tmp5); - tmp6 = _mm512_xor_si512(tmp6, tmp4); - - tmp7 = _mm512_srli_epi32(tmp6, 31); - tmp8 = _mm512_srli_epi32(tmp6, 30); - tmp9 = _mm512_srli_epi32(tmp6, 25); - - tmp7 = _mm512_xor_si512(tmp7, tmp8); - tmp7 = _mm512_xor_si512(tmp7, tmp9); - - tmp8 = _mm512_shuffle_epi32(tmp7, _MM_PERM_ABCD); - tmp7 = _mm512_and_si512(XMMMASK, tmp8); - tmp8 = _mm512_andnot_si512(XMMMASK, tmp8); - - tmp3 = _mm512_xor_si512(tmp3, tmp8); - tmp6 = _mm512_xor_si512(tmp6, tmp7); - - tmp10 = _mm512_slli_epi32(tmp6, 1); - tmp3 = _mm512_xor_si512(tmp3, tmp10); - - tmp11 = _mm512_slli_epi32(tmp6, 2); - tmp3 = _mm512_xor_si512(tmp3, tmp11); - - tmp12 = _mm512_slli_epi32(tmp6, 7); - tmp3 = _mm512_xor_si512(tmp3, tmp12); - - *res = _mm512_xor_si512(tmp3, tmp6); -} - */ - -impl From for AVX256GF2_128x4 { - #[inline(always)] - fn from(v: u32) -> AVX256GF2_128x4 { - assert!(v < 2); // only 0 and 1 are allowed - let data = unsafe { [_mm256_set_epi64(0, v as i64, 0, v as i64, 0), _mm256_set_epi64(0, v as i64, 0, v as i64, 0)] }; - AVX256GF2_128x4 { data } - } -} - -impl Neg for AVX256GF2_128x4 { - type Output = AVX256GF2_128x4; - - #[inline(always)] - fn neg(self) -> AVX256GF2_128x4 { - self - } -} - -impl Debug for AVX256GF2_128x4 { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut data = [0u8; 64]; - unsafe { - _mm256_storeu_si256(data.as_mut_ptr() as *mut __m256i, self.data[0]); - _mm256_storeu_si256(data.as_mut_ptr().add(8) as *mut __m256i, self.data[1]); - } - f.debug_struct("AVX256GF2_128x4") - .field("data", &data) - .finish() - } -} - -impl PartialEq for AVX256GF2_128x4 { - #[inline(always)] - fn eq(&self, other: &Self) -> bool { - unsafe { - let cmp = _mm256_cmpeq_epi64_mask(self.data[0], other.data[0]) & _mm256_cmpeq_epi64_mask(self.data[1], other.data[1]); - cmp == 0xFF // All 8 64-bit integers are equal - } - } -} - -impl Default for AVX256GF2_128x4 { - #[inline(always)] - fn default() -> Self { - Self::zero() - } -} - -impl From for AVX256GF2_128x4 { - #[inline(always)] - fn from(v: GF2_128) -> AVX256GF2_128x4 { - unsafe { - let mut result = [_mm256_setzero_si256(), _mm256_setzero_si256()]; // Initialize a zeroed _m512i - result[0] = _mm256_inserti32x4(result[0], v.v, 0); // Insert `a` at position 0 - result[0] = _mm256_inserti32x4(result[0], v.v, 1); // Insert `b` at position 1 - result[1] = _mm256_inserti32x4(result[1], v.v, 2); // Insert `c` at position 2 - result[1] = _mm256_inserti32x4(result[1], v.v, 3); // Insert `d` at position 3 - AVX256GF2_128x4 { data: result } - } - } -} - -impl SimdField for AVX256GF2_128x4 { - #[inline(always)] - fn scale(&self, challenge: &Self::Scalar) -> Self { - let simd_challenge = AVX256GF2_128x4::from(*challenge); - *self * simd_challenge - } - type Scalar = GF2_128; - - #[inline(always)] - fn pack_size() -> usize { - 4 - } -} - -#[inline(always)] -fn add_internal(a: &AVX256GF2_128x4, b: &AVX256GF2_128x4) -> AVX256GF2_128x4 { - unsafe { - AVX256GF2_128x4 { - data: [_mm256_xor_si256(a.data[0], b.data[0]), _mm256_xor_si256(a.data[1], b.data[1])], - } - } -} - -#[inline(always)] -fn sub_internal(a: &AVX256GF2_128x4, b: &AVX256GF2_128x4) -> AVX256GF2_128x4 { - unsafe { - AVX256GF2_128x4 { - data: [_mm256_xor_si256(a.data[0], b.data[0]), _mm256_xor_si256(a.data[1], b.data[1])], - } - } -} - -#[inline] -fn mul_internal(a: &AVX256GF2_128x4, b: &AVX256GF2_128x4) -> AVX256GF2_128x4 { - unsafe { - let xmmmask = _mm256_set_epi32( - 0, - 0, - 0, - 0xffffffffu32 as i32, - 0, - 0, - 0, - 0xffffffffu32 as i32, - ); - let mut result = [_mm256_setzero_si256(), _mm256_setzero_si256()]; - for i in 0..2 { - - let mut tmp3 = _mm256_clmulepi64_epi128(a.data[i], b.data[i], 0x00); - let mut tmp6 = _mm256_clmulepi64_epi128(a.data[i], b.data[i], 0x11); - - let mut tmp4 = _mm256_shuffle_epi32(a.data[i], _MM_PERM_BADC); - let mut tmp5 = _mm256_shuffle_epi32(b.data[i], _MM_PERM_BADC); - tmp4 = _mm256_xor_si256(tmp4, a.data[i]); - tmp5 = _mm256_xor_si256(tmp5, b.data[i]); - - tmp4 = _mm256_clmulepi64_epi128(tmp4, tmp5, 0x00); - tmp4 = _mm256_xor_si256(tmp4, tmp3); - tmp4 = _mm256_xor_si256(tmp4, tmp6); - - tmp5 = _mm256_bslli_epi128(tmp4, 8); - tmp4 = _mm256_bsrli_epi128(tmp4, 8); - tmp3 = _mm256_xor_si256(tmp3, tmp5); - tmp6 = _mm256_xor_si256(tmp6, tmp4); - - let tmp7 = _mm256_srli_epi32(tmp6, 31); - let tmp8 = _mm256_srli_epi32(tmp6, 30); - let tmp9 = _mm256_srli_epi32(tmp6, 25); - - let mut tmp7 = _mm256_xor_si256(tmp7, tmp8); - tmp7 = _mm256_xor_si256(tmp7, tmp9); - - let mut tmp8 = _mm256_shuffle_epi32(tmp7, _MM_PERM_CBAD); - tmp7 = _mm256_and_si256(xmmmask, tmp8); - tmp8 = _mm256_andnot_si256(xmmmask, tmp8); - - tmp3 = _mm256_xor_si256(tmp3, tmp8); - tmp6 = _mm256_xor_si256(tmp6, tmp7); - - let tmp10 = _mm256_slli_epi32(tmp6, 1); - tmp3 = _mm256_xor_si256(tmp3, tmp10); - - let tmp11 = _mm256_slli_epi32(tmp6, 2); - tmp3 = _mm256_xor_si256(tmp3, tmp11); - - let tmp12 = _mm256_slli_epi32(tmp6, 7); - tmp3 = _mm256_xor_si256(tmp3, tmp12); - - result[i] = _mm256_xor_si256(tmp3, tmp6); - - } - AVX256GF2_128x4 { data: result } - } -} diff --git a/arith/src/extension_field/gf2_128x8.rs b/arith/src/extension_field/gf2_128x8.rs index ba393103..e250159d 100644 --- a/arith/src/extension_field/gf2_128x8.rs +++ b/arith/src/extension_field/gf2_128x8.rs @@ -3,13 +3,12 @@ pub(crate) mod neon; #[cfg(target_arch = "aarch64")] pub type GF2_128x8 = neon::NeonGF2_128x8; -#[cfg(target_arch = "x86_64")] -mod avx; -#[cfg(target_arch = "x86_64")] +#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] +mod avx512; +#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] +pub type GF2_128x8 = avx512::AVX512GF2_128x8; + +#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] mod avx256; -#[cfg(target_arch = "x86_64")] -pub type GF2_128x8_256 = avx256::AVX256GF2_128x8; -#[cfg(all(target_arch = "x86_64", feature = "avx256"))] +#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] pub type GF2_128x8 = avx256::AVX256GF2_128x8; -#[cfg(all(target_arch = "x86_64", not(feature = "avx256")))] -pub type GF2_128x8 = avx::AVX512GF2_128x8; diff --git a/arith/src/extension_field/gf2_128x8/avx.rs b/arith/src/extension_field/gf2_128x8/avx512.rs similarity index 100% rename from arith/src/extension_field/gf2_128x8/avx.rs rename to arith/src/extension_field/gf2_128x8/avx512.rs diff --git a/arith/src/field/m31.rs b/arith/src/field/m31.rs index 628f3add..6c12ff27 100644 --- a/arith/src/field/m31.rs +++ b/arith/src/field/m31.rs @@ -1,11 +1,12 @@ mod m31x16; pub use m31x16::M31x16; -#[cfg(target_arch = "x86_64")] -pub(crate) mod m31_avx; -#[cfg(target_arch = "x86_64")] +#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] pub(crate) mod m31_avx256; +#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] +pub(crate) mod m31_avx512; + #[cfg(target_arch = "x86_64")] pub type M31x16_256 = m31_avx256::AVXM31; diff --git a/arith/src/field/m31/m31_avx.rs b/arith/src/field/m31/m31_avx512.rs similarity index 100% rename from arith/src/field/m31/m31_avx.rs rename to arith/src/field/m31/m31_avx512.rs diff --git a/arith/src/field/m31/m31x16.rs b/arith/src/field/m31/m31x16.rs index c00456d2..b111b6e7 100644 --- a/arith/src/field/m31/m31x16.rs +++ b/arith/src/field/m31/m31x16.rs @@ -1,14 +1,12 @@ // A M31x16 stores 512 bits of data. // With AVX it stores a single __m512i element. // With NEON it stores four uint32x4_t elements. -#[cfg(target_arch = "x86_64")] -cfg_if::cfg_if! { - if #[cfg(feature = "avx256")] { - pub type M31x16 = super::m31_avx256::AVXM31; - } else { - pub type M31x16 = super::m31_avx::AVXM31; - } -} #[cfg(target_arch = "aarch64")] pub type M31x16 = super::m31_neon::NeonM31; + +#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] +pub type M31x16 = super::m31_avx512::AVXM31; + +#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] +pub type M31x16 = super::m31_avx256::AVXM31; From e5ab6982b89304d23bdc505242e45f9508154153 Mon Sep 17 00:00:00 2001 From: zhenfei Date: Thu, 26 Sep 2024 10:07:17 -0400 Subject: [PATCH 2/7] fix --- .github/workflows/ci.yml | 6 +-- arith/src/extension_field/gf2_128.rs | 4 +- arith/src/extension_field/gf2_128/avx256.rs | 58 ++++++++++----------- arith/src/extension_field/gf2_128x8.rs | 8 +-- readme.md | 11 +++- 5 files changed, 48 insertions(+), 39 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4dd75704..33a1f29d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -67,13 +67,13 @@ jobs: feature: avx2 field: gf2ext128 - os: 7950x3d - feature: avx512 + feature: avx512f field: m31ext3 - os: 7950x3d - feature: avx512 + feature: avx512f field: fr - os: 7950x3d - feature: avx512 + feature: avx512f field: gf2ext128 steps: - uses: actions/checkout@v4 diff --git a/arith/src/extension_field/gf2_128.rs b/arith/src/extension_field/gf2_128.rs index 8ba6fc03..334e8cd7 100644 --- a/arith/src/extension_field/gf2_128.rs +++ b/arith/src/extension_field/gf2_128.rs @@ -10,5 +10,5 @@ pub type GF2_128 = avx512::AVX512GF2_128; #[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] mod avx256; -#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] -pub type GF2_128 = avx256::AVX512GF2_128; +#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] +pub type GF2_128 = avx256::AVX256GF2_128; diff --git a/arith/src/extension_field/gf2_128/avx256.rs b/arith/src/extension_field/gf2_128/avx256.rs index f3984f88..ba090b8b 100644 --- a/arith/src/extension_field/gf2_128/avx256.rs +++ b/arith/src/extension_field/gf2_128/avx256.rs @@ -8,13 +8,13 @@ use std::{ use crate::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult, GF2}; #[derive(Debug, Clone, Copy)] -pub struct AVX512GF2_128 { +pub struct AVX256GF2_128 { pub v: __m128i, } -field_common!(AVX512GF2_128); +field_common!(AVX256GF2_128); -impl FieldSerde for AVX512GF2_128 { +impl FieldSerde for AVX256GF2_128 { const SERIALIZED_SIZE: usize = 16; #[inline(always)] @@ -28,7 +28,7 @@ impl FieldSerde for AVX512GF2_128 { let mut u = [0u8; Self::SERIALIZED_SIZE]; reader.read_exact(&mut u)?; unsafe { - Ok(AVX512GF2_128 { + Ok(AVX256GF2_128 { v: transmute::<[u8; Self::SERIALIZED_SIZE], __m128i>(u), }) } @@ -39,42 +39,42 @@ impl FieldSerde for AVX512GF2_128 { let mut u = [0u8; 32]; reader.read_exact(&mut u)?; Ok(unsafe { - AVX512GF2_128 { + AVX256GF2_128 { v: transmute::<[u8; 16], __m128i>(u[..16].try_into().unwrap()), } }) } } -impl Field for AVX512GF2_128 { +impl Field for AVX256GF2_128 { const NAME: &'static str = "Galios Field 2^128"; const SIZE: usize = 128 / 8; const FIELD_SIZE: usize = 128; // in bits - const ZERO: Self = AVX512GF2_128 { + const ZERO: Self = AVX256GF2_128 { v: unsafe { std::mem::zeroed() }, }; - const ONE: Self = AVX512GF2_128 { + const ONE: Self = AVX256GF2_128 { v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([1, 0, 0, 0]) }, }; - const INV_2: Self = AVX512GF2_128 { + const INV_2: Self = AVX256GF2_128 { v: unsafe { std::mem::zeroed() }, }; // should not be used #[inline(always)] fn zero() -> Self { - AVX512GF2_128 { + AVX256GF2_128 { v: unsafe { std::mem::zeroed() }, } } #[inline(always)] fn one() -> Self { - AVX512GF2_128 { + AVX256GF2_128 { // 1 in the first bit v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([1, 0, 0, 0]) }, // TODO check bit order } @@ -85,7 +85,7 @@ impl Field for AVX512GF2_128 { let mut u = [0u8; 16]; rng.fill_bytes(&mut u); unsafe { - AVX512GF2_128 { + AVX256GF2_128 { v: *(u.as_ptr() as *const __m128i), } } @@ -93,7 +93,7 @@ impl Field for AVX512GF2_128 { #[inline(always)] fn random_bool(mut rng: impl rand::RngCore) -> Self { - AVX512GF2_128 { + AVX256GF2_128 { v: unsafe { std::mem::transmute::<[u32; 4], __m128i>([rng.next_u32() % 2, 0, 0, 0]) }, } } @@ -140,19 +140,19 @@ impl Field for AVX512GF2_128 { #[inline(always)] fn from_uniform_bytes(bytes: &[u8; 32]) -> Self { unsafe { - AVX512GF2_128 { + AVX256GF2_128 { v: transmute::<[u8; 16], __m128i>(bytes[..16].try_into().unwrap()), } } } } -impl ExtensionField for AVX512GF2_128 { +impl ExtensionField for AVX256GF2_128 { const DEGREE: usize = 128; const W: u32 = 0x87; - const X: Self = AVX512GF2_128 { + const X: Self = AVX256GF2_128 { v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([2, 0, 0, 0]) }, }; @@ -206,10 +206,10 @@ impl ExtensionField for AVX512GF2_128 { } } -impl From for AVX512GF2_128 { +impl From for AVX256GF2_128 { #[inline(always)] fn from(v: GF2) -> Self { - AVX512GF2_128 { + AVX256GF2_128 { v: unsafe { _mm_set_epi64x(0, v.v as i64) }, } } @@ -264,21 +264,21 @@ unsafe fn gfmul(a: __m128i, b: __m128i) -> __m128i { _mm_xor_si128(tmp3, tmp6) } -impl Default for AVX512GF2_128 { +impl Default for AVX256GF2_128 { #[inline(always)] fn default() -> Self { Self::zero() } } -impl PartialEq for AVX512GF2_128 { +impl PartialEq for AVX256GF2_128 { #[inline(always)] fn eq(&self, other: &Self) -> bool { unsafe { _mm_test_all_ones(_mm_cmpeq_epi8(self.v, other.v)) == 1 } } } -impl Neg for AVX512GF2_128 { +impl Neg for AVX256GF2_128 { type Output = Self; #[inline(always)] @@ -287,32 +287,32 @@ impl Neg for AVX512GF2_128 { } } -impl From for AVX512GF2_128 { +impl From for AVX256GF2_128 { #[inline(always)] fn from(v: u32) -> Self { - AVX512GF2_128 { + AVX256GF2_128 { v: unsafe { std::mem::transmute::<[u32; 4], __m128i>([v, 0, 0, 0]) }, } } } #[inline(always)] -fn add_internal(a: &AVX512GF2_128, b: &AVX512GF2_128) -> AVX512GF2_128 { - AVX512GF2_128 { +fn add_internal(a: &AVX256GF2_128, b: &AVX256GF2_128) -> AVX256GF2_128 { + AVX256GF2_128 { v: unsafe { _mm_xor_si128(a.v, b.v) }, } } #[inline(always)] -fn sub_internal(a: &AVX512GF2_128, b: &AVX512GF2_128) -> AVX512GF2_128 { - AVX512GF2_128 { +fn sub_internal(a: &AVX256GF2_128, b: &AVX256GF2_128) -> AVX256GF2_128 { + AVX256GF2_128 { v: unsafe { _mm_xor_si128(a.v, b.v) }, } } #[inline(always)] -fn mul_internal(a: &AVX512GF2_128, b: &AVX512GF2_128) -> AVX512GF2_128 { - AVX512GF2_128 { +fn mul_internal(a: &AVX256GF2_128, b: &AVX256GF2_128) -> AVX256GF2_128 { + AVX256GF2_128 { v: unsafe { gfmul(a.v, b.v) }, } } diff --git a/arith/src/extension_field/gf2_128x8.rs b/arith/src/extension_field/gf2_128x8.rs index e250159d..e610ea12 100644 --- a/arith/src/extension_field/gf2_128x8.rs +++ b/arith/src/extension_field/gf2_128x8.rs @@ -4,11 +4,11 @@ pub(crate) mod neon; pub type GF2_128x8 = neon::NeonGF2_128x8; #[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] -mod avx512; +mod avx256; #[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] -pub type GF2_128x8 = avx512::AVX512GF2_128x8; +pub type GF2_128x8 = avx256::AVX256GF2_128x8; #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] -mod avx256; +mod avx512; #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] -pub type GF2_128x8 = avx256::AVX256GF2_128x8; +pub type GF2_128x8 = avx512::AVX512GF2_128x8; diff --git a/readme.md b/readme.md index 889fce3b..b0950da5 100644 --- a/readme.md +++ b/readme.md @@ -45,7 +45,16 @@ This compiler is your entry point for using our prover; the repository you have Please note that the witness generation process is not yet optimal, and we are actively working on improving it. ## AVX -We use AVX512 by default, if your CPU doesn't support AVX512, or you encountered illegal instruction error, please use `--features avx256` instead. +We use AVX2 by default. On an x86 or a mac, you can simply do +``` +RUSTFLAGS="-C target-cpu=native" cargo test --release --workspace +``` +For some platforms, if you do not indicate `target-cpu=native` it may simulate avx2 instructions, rather than use it directly, and this will cause performance decrease. + +Our code also supports `avx512`. This is not turned on by default. To use `avx512` +``` +RUSTFLAGS="-C target-cpu=native -C target-features=+avx512f" cargo test --release --workspace +``` ## Environment Setup From 936ad66f1be6dd873c6df92fb6eac9984c6a2605 Mon Sep 17 00:00:00 2001 From: zhenfei Date: Thu, 26 Sep 2024 10:10:09 -0400 Subject: [PATCH 3/7] fix --- arith/src/field/m31.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/arith/src/field/m31.rs b/arith/src/field/m31.rs index 6c12ff27..c263b955 100644 --- a/arith/src/field/m31.rs +++ b/arith/src/field/m31.rs @@ -7,9 +7,6 @@ pub(crate) mod m31_avx256; #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] pub(crate) mod m31_avx512; -#[cfg(target_arch = "x86_64")] -pub type M31x16_256 = m31_avx256::AVXM31; - #[cfg(target_arch = "aarch64")] pub mod m31_neon; From d008a82fbd661155dfa73e6d4e3f1c0da9e288cf Mon Sep 17 00:00:00 2001 From: zhenfei Date: Thu, 26 Sep 2024 10:14:02 -0400 Subject: [PATCH 4/7] simplify --- arith/src/extension_field/gf2_128.rs | 13 +- .../gf2_128/{avx256.rs => avx.rs} | 58 ++-- arith/src/extension_field/gf2_128/avx512.rs | 318 ------------------ 3 files changed, 33 insertions(+), 356 deletions(-) rename arith/src/extension_field/gf2_128/{avx256.rs => avx.rs} (88%) delete mode 100644 arith/src/extension_field/gf2_128/avx512.rs diff --git a/arith/src/extension_field/gf2_128.rs b/arith/src/extension_field/gf2_128.rs index 334e8cd7..431673f9 100644 --- a/arith/src/extension_field/gf2_128.rs +++ b/arith/src/extension_field/gf2_128.rs @@ -3,12 +3,7 @@ pub(crate) mod neon; #[cfg(target_arch = "aarch64")] pub type GF2_128 = neon::NeonGF2_128; -#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] -mod avx512; -#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] -pub type GF2_128 = avx512::AVX512GF2_128; - -#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] -mod avx256; -#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] -pub type GF2_128 = avx256::AVX256GF2_128; +#[cfg(target_arch = "x86_64")] +pub(crate) mod avx; +#[cfg(target_arch = "x86_64")] +pub type GF2_128 = avx::AVXGF2_128; diff --git a/arith/src/extension_field/gf2_128/avx256.rs b/arith/src/extension_field/gf2_128/avx.rs similarity index 88% rename from arith/src/extension_field/gf2_128/avx256.rs rename to arith/src/extension_field/gf2_128/avx.rs index ba090b8b..d37e3675 100644 --- a/arith/src/extension_field/gf2_128/avx256.rs +++ b/arith/src/extension_field/gf2_128/avx.rs @@ -8,13 +8,13 @@ use std::{ use crate::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult, GF2}; #[derive(Debug, Clone, Copy)] -pub struct AVX256GF2_128 { +pub struct AVXGF2_128 { pub v: __m128i, } -field_common!(AVX256GF2_128); +field_common!(AVXGF2_128); -impl FieldSerde for AVX256GF2_128 { +impl FieldSerde for AVXGF2_128 { const SERIALIZED_SIZE: usize = 16; #[inline(always)] @@ -28,7 +28,7 @@ impl FieldSerde for AVX256GF2_128 { let mut u = [0u8; Self::SERIALIZED_SIZE]; reader.read_exact(&mut u)?; unsafe { - Ok(AVX256GF2_128 { + Ok(AVXGF2_128 { v: transmute::<[u8; Self::SERIALIZED_SIZE], __m128i>(u), }) } @@ -39,42 +39,42 @@ impl FieldSerde for AVX256GF2_128 { let mut u = [0u8; 32]; reader.read_exact(&mut u)?; Ok(unsafe { - AVX256GF2_128 { + AVXGF2_128 { v: transmute::<[u8; 16], __m128i>(u[..16].try_into().unwrap()), } }) } } -impl Field for AVX256GF2_128 { +impl Field for AVXGF2_128 { const NAME: &'static str = "Galios Field 2^128"; const SIZE: usize = 128 / 8; const FIELD_SIZE: usize = 128; // in bits - const ZERO: Self = AVX256GF2_128 { + const ZERO: Self = AVXGF2_128 { v: unsafe { std::mem::zeroed() }, }; - const ONE: Self = AVX256GF2_128 { + const ONE: Self = AVXGF2_128 { v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([1, 0, 0, 0]) }, }; - const INV_2: Self = AVX256GF2_128 { + const INV_2: Self = AVXGF2_128 { v: unsafe { std::mem::zeroed() }, }; // should not be used #[inline(always)] fn zero() -> Self { - AVX256GF2_128 { + AVXGF2_128 { v: unsafe { std::mem::zeroed() }, } } #[inline(always)] fn one() -> Self { - AVX256GF2_128 { + AVXGF2_128 { // 1 in the first bit v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([1, 0, 0, 0]) }, // TODO check bit order } @@ -85,7 +85,7 @@ impl Field for AVX256GF2_128 { let mut u = [0u8; 16]; rng.fill_bytes(&mut u); unsafe { - AVX256GF2_128 { + AVXGF2_128 { v: *(u.as_ptr() as *const __m128i), } } @@ -93,7 +93,7 @@ impl Field for AVX256GF2_128 { #[inline(always)] fn random_bool(mut rng: impl rand::RngCore) -> Self { - AVX256GF2_128 { + AVXGF2_128 { v: unsafe { std::mem::transmute::<[u32; 4], __m128i>([rng.next_u32() % 2, 0, 0, 0]) }, } } @@ -140,19 +140,19 @@ impl Field for AVX256GF2_128 { #[inline(always)] fn from_uniform_bytes(bytes: &[u8; 32]) -> Self { unsafe { - AVX256GF2_128 { + AVXGF2_128 { v: transmute::<[u8; 16], __m128i>(bytes[..16].try_into().unwrap()), } } } } -impl ExtensionField for AVX256GF2_128 { +impl ExtensionField for AVXGF2_128 { const DEGREE: usize = 128; const W: u32 = 0x87; - const X: Self = AVX256GF2_128 { + const X: Self = AVXGF2_128 { v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([2, 0, 0, 0]) }, }; @@ -206,10 +206,10 @@ impl ExtensionField for AVX256GF2_128 { } } -impl From for AVX256GF2_128 { +impl From for AVXGF2_128 { #[inline(always)] fn from(v: GF2) -> Self { - AVX256GF2_128 { + AVXGF2_128 { v: unsafe { _mm_set_epi64x(0, v.v as i64) }, } } @@ -264,21 +264,21 @@ unsafe fn gfmul(a: __m128i, b: __m128i) -> __m128i { _mm_xor_si128(tmp3, tmp6) } -impl Default for AVX256GF2_128 { +impl Default for AVXGF2_128 { #[inline(always)] fn default() -> Self { Self::zero() } } -impl PartialEq for AVX256GF2_128 { +impl PartialEq for AVXGF2_128 { #[inline(always)] fn eq(&self, other: &Self) -> bool { unsafe { _mm_test_all_ones(_mm_cmpeq_epi8(self.v, other.v)) == 1 } } } -impl Neg for AVX256GF2_128 { +impl Neg for AVXGF2_128 { type Output = Self; #[inline(always)] @@ -287,32 +287,32 @@ impl Neg for AVX256GF2_128 { } } -impl From for AVX256GF2_128 { +impl From for AVXGF2_128 { #[inline(always)] fn from(v: u32) -> Self { - AVX256GF2_128 { + AVXGF2_128 { v: unsafe { std::mem::transmute::<[u32; 4], __m128i>([v, 0, 0, 0]) }, } } } #[inline(always)] -fn add_internal(a: &AVX256GF2_128, b: &AVX256GF2_128) -> AVX256GF2_128 { - AVX256GF2_128 { +fn add_internal(a: &AVXGF2_128, b: &AVXGF2_128) -> AVXGF2_128 { + AVXGF2_128 { v: unsafe { _mm_xor_si128(a.v, b.v) }, } } #[inline(always)] -fn sub_internal(a: &AVX256GF2_128, b: &AVX256GF2_128) -> AVX256GF2_128 { - AVX256GF2_128 { +fn sub_internal(a: &AVXGF2_128, b: &AVXGF2_128) -> AVXGF2_128 { + AVXGF2_128 { v: unsafe { _mm_xor_si128(a.v, b.v) }, } } #[inline(always)] -fn mul_internal(a: &AVX256GF2_128, b: &AVX256GF2_128) -> AVX256GF2_128 { - AVX256GF2_128 { +fn mul_internal(a: &AVXGF2_128, b: &AVXGF2_128) -> AVXGF2_128 { + AVXGF2_128 { v: unsafe { gfmul(a.v, b.v) }, } } diff --git a/arith/src/extension_field/gf2_128/avx512.rs b/arith/src/extension_field/gf2_128/avx512.rs deleted file mode 100644 index f3984f88..00000000 --- a/arith/src/extension_field/gf2_128/avx512.rs +++ /dev/null @@ -1,318 +0,0 @@ -use std::iter::{Product, Sum}; -use std::{ - arch::x86_64::*, - mem::transmute, - ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign}, -}; - -use crate::{field_common, ExtensionField, Field, FieldSerde, FieldSerdeResult, GF2}; - -#[derive(Debug, Clone, Copy)] -pub struct AVX512GF2_128 { - pub v: __m128i, -} - -field_common!(AVX512GF2_128); - -impl FieldSerde for AVX512GF2_128 { - const SERIALIZED_SIZE: usize = 16; - - #[inline(always)] - fn serialize_into(&self, mut writer: W) -> FieldSerdeResult<()> { - unsafe { writer.write_all(transmute::<__m128i, [u8; 16]>(self.v).as_ref())? }; - Ok(()) - } - - #[inline(always)] - fn deserialize_from(mut reader: R) -> FieldSerdeResult { - let mut u = [0u8; Self::SERIALIZED_SIZE]; - reader.read_exact(&mut u)?; - unsafe { - Ok(AVX512GF2_128 { - v: transmute::<[u8; Self::SERIALIZED_SIZE], __m128i>(u), - }) - } - } - - #[inline(always)] - fn try_deserialize_from_ecc_format(mut reader: R) -> FieldSerdeResult { - let mut u = [0u8; 32]; - reader.read_exact(&mut u)?; - Ok(unsafe { - AVX512GF2_128 { - v: transmute::<[u8; 16], __m128i>(u[..16].try_into().unwrap()), - } - }) - } -} - -impl Field for AVX512GF2_128 { - const NAME: &'static str = "Galios Field 2^128"; - - const SIZE: usize = 128 / 8; - - const FIELD_SIZE: usize = 128; // in bits - - const ZERO: Self = AVX512GF2_128 { - v: unsafe { std::mem::zeroed() }, - }; - - const ONE: Self = AVX512GF2_128 { - v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([1, 0, 0, 0]) }, - }; - - const INV_2: Self = AVX512GF2_128 { - v: unsafe { std::mem::zeroed() }, - }; // should not be used - - #[inline(always)] - fn zero() -> Self { - AVX512GF2_128 { - v: unsafe { std::mem::zeroed() }, - } - } - - #[inline(always)] - fn one() -> Self { - AVX512GF2_128 { - // 1 in the first bit - v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([1, 0, 0, 0]) }, // TODO check bit order - } - } - - #[inline(always)] - fn random_unsafe(mut rng: impl rand::RngCore) -> Self { - let mut u = [0u8; 16]; - rng.fill_bytes(&mut u); - unsafe { - AVX512GF2_128 { - v: *(u.as_ptr() as *const __m128i), - } - } - } - - #[inline(always)] - fn random_bool(mut rng: impl rand::RngCore) -> Self { - AVX512GF2_128 { - v: unsafe { std::mem::transmute::<[u32; 4], __m128i>([rng.next_u32() % 2, 0, 0, 0]) }, - } - } - - #[inline(always)] - fn is_zero(&self) -> bool { - unsafe { std::mem::transmute::<__m128i, [u8; 16]>(self.v) == [0; 16] } - } - - #[inline(always)] - fn exp(&self, exponent: u128) -> Self { - let mut e = exponent; - let mut res = Self::one(); - let mut t = *self; - while e > 0 { - if e & 1 == 1 { - res *= t; - } - t = t * t; - e >>= 1; - } - res - } - - #[inline(always)] - fn inv(&self) -> Option { - if self.is_zero() { - return None; - } - let p_m2 = !(0u128) - 1; - Some(Self::exp(self, p_m2)) - } - - #[inline(always)] - fn square(&self) -> Self { - self * self - } - - #[inline(always)] - fn as_u32_unchecked(&self) -> u32 { - unimplemented!("u32 for GF128 doesn't make sense") - } - - #[inline(always)] - fn from_uniform_bytes(bytes: &[u8; 32]) -> Self { - unsafe { - AVX512GF2_128 { - v: transmute::<[u8; 16], __m128i>(bytes[..16].try_into().unwrap()), - } - } - } -} - -impl ExtensionField for AVX512GF2_128 { - const DEGREE: usize = 128; - - const W: u32 = 0x87; - - const X: Self = AVX512GF2_128 { - v: unsafe { std::mem::transmute::<[i32; 4], __m128i>([2, 0, 0, 0]) }, - }; - - type BaseField = GF2; - - #[inline(always)] - fn mul_by_base_field(&self, base: &Self::BaseField) -> Self { - if base.v == 0 { - Self::zero() - } else { - *self - } - } - - #[inline(always)] - fn add_by_base_field(&self, base: &Self::BaseField) -> Self { - let mut res = *self; - res.v = unsafe { _mm_xor_si128(res.v, _mm_set_epi64x(0, base.v as i64)) }; - res - } - - #[inline] - fn mul_by_x(&self) -> Self { - unsafe { - // Shift left by 1 bit - let shifted = _mm_slli_epi64(self.v, 1); - - // Get the most significant bit and move it - let msb = _mm_srli_epi64(self.v, 63); - let msb_moved = _mm_slli_si128(msb, 8); - - // Combine the shifted value with the moved msb - let shifted_consolidated = _mm_or_si128(shifted, msb_moved); - - // Create the reduction value (0x87) and the comparison value (1) - let reduction = { - let multiplier = _mm_set_epi64x(0, 0x87); - let one = _mm_set_epi64x(0, 1); - - // Check if the MSB was 1 and create a mask - let mask = _mm_cmpeq_epi64(_mm_srli_si128(msb, 8), one); - - _mm_and_si128(mask, multiplier) - }; - - // Apply the reduction conditionally - let res = _mm_xor_si128(shifted_consolidated, reduction); - - Self { v: res } - } - } -} - -impl From for AVX512GF2_128 { - #[inline(always)] - fn from(v: GF2) -> Self { - AVX512GF2_128 { - v: unsafe { _mm_set_epi64x(0, v.v as i64) }, - } - } -} - -#[inline] -unsafe fn gfmul(a: __m128i, b: __m128i) -> __m128i { - let xmm_mask = _mm_setr_epi32((0xffffffff_u32) as i32, 0x0, 0x0, 0x0); - - // a = a0|a1, b = b0|b1 - - let mut tmp3 = _mm_clmulepi64_si128(a, b, 0x00); // tmp3 = a0 * b0 - let mut tmp6 = _mm_clmulepi64_si128(a, b, 0x11); // tmp6 = a1 * b1 - - let mut tmp4 = _mm_shuffle_epi32(a, 78); // tmp4 = a1|a0 - let mut tmp5 = _mm_shuffle_epi32(b, 78); // tmp5 = b1|b0 - tmp4 = _mm_xor_si128(tmp4, a); // tmp4 = (a0 + a1) | (a0 + a1) - tmp5 = _mm_xor_si128(tmp5, b); // tmp5 = (b0 + b1) | (b0 + b1) - - tmp4 = _mm_clmulepi64_si128(tmp4, tmp5, 0x00); // tmp4 = (a0 + a1) * (b0 + b1) - tmp4 = _mm_xor_si128(tmp4, tmp3); // tmp4 = (a0 + a1) * (b0 + b1) - a0 * b0 - tmp4 = _mm_xor_si128(tmp4, tmp6); // tmp4 = (a0 + a1) * (b0 + b1) - a0 * b0 - a1 * b1 = a0 * b1 + a1 * b0 - - let tmp5_shifted_left = _mm_slli_si128(tmp4, 8); - tmp4 = _mm_srli_si128(tmp4, 8); - tmp3 = _mm_xor_si128(tmp3, tmp5_shifted_left); - tmp6 = _mm_xor_si128(tmp6, tmp4); - - let mut tmp7 = _mm_srli_epi32(tmp6, 31); - let mut tmp8 = _mm_srli_epi32(tmp6, 30); - let tmp9 = _mm_srli_epi32(tmp6, 25); - - tmp7 = _mm_xor_si128(tmp7, tmp8); - tmp7 = _mm_xor_si128(tmp7, tmp9); - - tmp8 = _mm_shuffle_epi32(tmp7, 147); - tmp7 = _mm_and_si128(xmm_mask, tmp8); - tmp8 = _mm_andnot_si128(xmm_mask, tmp8); - - tmp3 = _mm_xor_si128(tmp3, tmp8); - tmp6 = _mm_xor_si128(tmp6, tmp7); - - let tmp10 = _mm_slli_epi32(tmp6, 1); - tmp3 = _mm_xor_si128(tmp3, tmp10); - - let tmp11 = _mm_slli_epi32(tmp6, 2); - tmp3 = _mm_xor_si128(tmp3, tmp11); - - let tmp12 = _mm_slli_epi32(tmp6, 7); - tmp3 = _mm_xor_si128(tmp3, tmp12); - - _mm_xor_si128(tmp3, tmp6) -} - -impl Default for AVX512GF2_128 { - #[inline(always)] - fn default() -> Self { - Self::zero() - } -} - -impl PartialEq for AVX512GF2_128 { - #[inline(always)] - fn eq(&self, other: &Self) -> bool { - unsafe { _mm_test_all_ones(_mm_cmpeq_epi8(self.v, other.v)) == 1 } - } -} - -impl Neg for AVX512GF2_128 { - type Output = Self; - - #[inline(always)] - fn neg(self) -> Self { - self - } -} - -impl From for AVX512GF2_128 { - #[inline(always)] - fn from(v: u32) -> Self { - AVX512GF2_128 { - v: unsafe { std::mem::transmute::<[u32; 4], __m128i>([v, 0, 0, 0]) }, - } - } -} - -#[inline(always)] -fn add_internal(a: &AVX512GF2_128, b: &AVX512GF2_128) -> AVX512GF2_128 { - AVX512GF2_128 { - v: unsafe { _mm_xor_si128(a.v, b.v) }, - } -} - -#[inline(always)] -fn sub_internal(a: &AVX512GF2_128, b: &AVX512GF2_128) -> AVX512GF2_128 { - AVX512GF2_128 { - v: unsafe { _mm_xor_si128(a.v, b.v) }, - } -} - -#[inline(always)] -fn mul_internal(a: &AVX512GF2_128, b: &AVX512GF2_128) -> AVX512GF2_128 { - AVX512GF2_128 { - v: unsafe { gfmul(a.v, b.v) }, - } -} From 8fc7519d4a15dad0321509ed7f521b59d108d405 Mon Sep 17 00:00:00 2001 From: zhenfei Date: Thu, 26 Sep 2024 10:21:18 -0400 Subject: [PATCH 5/7] system test --- tests/system.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/system.rs diff --git a/tests/system.rs b/tests/system.rs new file mode 100644 index 00000000..9467dc54 --- /dev/null +++ b/tests/system.rs @@ -0,0 +1,21 @@ +#[test] +fn test_mutually_exclusive_flags() { + let mut enabled_ctr = 0; + + #[cfg(target_arch = "aarch64")] + { + enabled_ctr += 1; + } + + #[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] + { + enabled_ctr += 1; + } + + #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] + { + enabled_ctr += 1; + } + + assert_eq!(enabled_ctr, 1); +} From 83845aeeb97ed588c6165968904ed3612ba720c5 Mon Sep 17 00:00:00 2001 From: zhenfei Date: Thu, 26 Sep 2024 10:40:17 -0400 Subject: [PATCH 6/7] fix --- arith/benches/ext_field.rs | 4 ---- arith/benches/field.rs | 6 ------ arith/src/field/m31.rs | 20 ++++++++++++------- arith/src/field/m31/m31x16.rs | 12 ----------- .../m31/{m31_avx256.rs => m31x16_avx256.rs} | 0 .../m31/{m31_avx512.rs => m31x16_avx512.rs} | 0 .../field/m31/{m31_neon.rs => m31x16_neon.rs} | 0 7 files changed, 13 insertions(+), 29 deletions(-) delete mode 100644 arith/src/field/m31/m31x16.rs rename arith/src/field/m31/{m31_avx256.rs => m31x16_avx256.rs} (100%) rename arith/src/field/m31/{m31_avx512.rs => m31x16_avx512.rs} (100%) rename arith/src/field/m31/{m31_neon.rs => m31x16_neon.rs} (100%) diff --git a/arith/benches/ext_field.rs b/arith/benches/ext_field.rs index 681ed116..05e6d281 100644 --- a/arith/benches/ext_field.rs +++ b/arith/benches/ext_field.rs @@ -1,5 +1,3 @@ -#[cfg(target_arch = "x86_64")] -use arith::GF2_128x8_256; use arith::{ExtensionField, Field, GF2_128x8, M31Ext3, M31Ext3x16, GF2_128}; use ark_std::test_rng; use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; @@ -156,8 +154,6 @@ fn ext_by_base_benchmark(c: &mut Criterion) { bench_field::(c); bench_field::(c); bench_field::(c); - #[cfg(target_arch = "x86_64")] - bench_field::(c); } criterion_group!(ext_by_base_benches, ext_by_base_benchmark); diff --git a/arith/benches/field.rs b/arith/benches/field.rs index 5369b85e..c2904d39 100644 --- a/arith/benches/field.rs +++ b/arith/benches/field.rs @@ -1,8 +1,6 @@ // this module benchmarks the performance of different field operations use arith::{Field, GF2_128x8, GF2x8, M31Ext3, M31Ext3x16, M31x16, GF2, GF2_128, M31}; -#[cfg(target_arch = "x86_64")] -use arith::{GF2_128x8_256, M31x16_256}; use ark_std::test_rng; use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; use halo2curves::bn256::Fr; @@ -176,8 +174,6 @@ pub(crate) fn bench_field(c: &mut Criterion) { fn criterion_benchmark(c: &mut Criterion) { bench_field::(c); bench_field::(c); - #[cfg(target_arch = "x86_64")] - bench_field::(c); bench_field::(c); bench_field::(c); bench_field::(c); @@ -185,8 +181,6 @@ fn criterion_benchmark(c: &mut Criterion) { bench_field::(c); bench_field::(c); bench_field::(c); - #[cfg(target_arch = "x86_64")] - bench_field::(c); } criterion_group!(benches, criterion_benchmark); diff --git a/arith/src/field/m31.rs b/arith/src/field/m31.rs index c263b955..87a2c0ca 100644 --- a/arith/src/field/m31.rs +++ b/arith/src/field/m31.rs @@ -1,14 +1,20 @@ -mod m31x16; -pub use m31x16::M31x16; +#[cfg(target_arch = "aarch64")] +pub mod m31x16_neon; +#[cfg(target_arch = "aarch64")] +pub type M31x16 = m31x16_neon::NeonM31; -#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] -pub(crate) mod m31_avx256; #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] -pub(crate) mod m31_avx512; +pub mod m31x16_avx512; +#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] +pub type M31x16 = m31x16_avx512::AVXM31; + + +#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] +pub mod m31x16_avx256; +#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] +pub type M31x16 = m31x16_avx256::AVXM31; -#[cfg(target_arch = "aarch64")] -pub mod m31_neon; use rand::RngCore; diff --git a/arith/src/field/m31/m31x16.rs b/arith/src/field/m31/m31x16.rs deleted file mode 100644 index b111b6e7..00000000 --- a/arith/src/field/m31/m31x16.rs +++ /dev/null @@ -1,12 +0,0 @@ -// A M31x16 stores 512 bits of data. -// With AVX it stores a single __m512i element. -// With NEON it stores four uint32x4_t elements. - -#[cfg(target_arch = "aarch64")] -pub type M31x16 = super::m31_neon::NeonM31; - -#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] -pub type M31x16 = super::m31_avx512::AVXM31; - -#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] -pub type M31x16 = super::m31_avx256::AVXM31; diff --git a/arith/src/field/m31/m31_avx256.rs b/arith/src/field/m31/m31x16_avx256.rs similarity index 100% rename from arith/src/field/m31/m31_avx256.rs rename to arith/src/field/m31/m31x16_avx256.rs diff --git a/arith/src/field/m31/m31_avx512.rs b/arith/src/field/m31/m31x16_avx512.rs similarity index 100% rename from arith/src/field/m31/m31_avx512.rs rename to arith/src/field/m31/m31x16_avx512.rs diff --git a/arith/src/field/m31/m31_neon.rs b/arith/src/field/m31/m31x16_neon.rs similarity index 100% rename from arith/src/field/m31/m31_neon.rs rename to arith/src/field/m31/m31x16_neon.rs From 6bc1347f3c1516418177aeb66b08aab26e9e7472 Mon Sep 17 00:00:00 2001 From: zhenfei Date: Thu, 26 Sep 2024 11:04:49 -0400 Subject: [PATCH 7/7] Update m31.rs --- arith/src/field/m31.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/arith/src/field/m31.rs b/arith/src/field/m31.rs index 87a2c0ca..6ab8191a 100644 --- a/arith/src/field/m31.rs +++ b/arith/src/field/m31.rs @@ -3,19 +3,16 @@ pub mod m31x16_neon; #[cfg(target_arch = "aarch64")] pub type M31x16 = m31x16_neon::NeonM31; - #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] pub mod m31x16_avx512; #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] pub type M31x16 = m31x16_avx512::AVXM31; - #[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] pub mod m31x16_avx256; #[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] pub type M31x16 = m31x16_avx256::AVXM31; - use rand::RngCore; use crate::{field_common, Field, FieldForECC, FieldSerde, FieldSerdeResult};