use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN};
use arrayref::{array_mut_ref, array_ref};
cfg_if::cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
cfg_if::cfg_if! {
if #[cfg(blake3_avx512_ffi)] {
pub const MAX_SIMD_DEGREE: usize = 16;
} else {
pub const MAX_SIMD_DEGREE: usize = 8;
}
}
} else if #[cfg(blake3_neon)] {
pub const MAX_SIMD_DEGREE: usize = 4;
} else {
pub const MAX_SIMD_DEGREE: usize = 1;
}
}
cfg_if::cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
cfg_if::cfg_if! {
if #[cfg(blake3_avx512_ffi)] {
pub const MAX_SIMD_DEGREE_OR_2: usize = 16;
} else {
pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
}
}
} else if #[cfg(blake3_neon)] {
pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
} else {
pub const MAX_SIMD_DEGREE_OR_2: usize = 2;
}
}
#[derive(Clone, Copy, Debug)]
pub enum Platform {
Portable,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
SSE2,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
SSE41,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
AVX2,
#[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
AVX512,
#[cfg(blake3_neon)]
NEON,
}
impl Platform {
#[allow(unreachable_code)]
pub fn detect() -> Self {
#[cfg(miri)]
{
return Platform::Portable;
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
#[cfg(blake3_avx512_ffi)]
{
if avx512_detected() {
return Platform::AVX512;
}
}
if avx2_detected() {
return Platform::AVX2;
}
if sse41_detected() {
return Platform::SSE41;
}
if sse2_detected() {
return Platform::SSE2;
}
}
#[cfg(blake3_neon)]
{
return Platform::NEON;
}
Platform::Portable
}
pub fn simd_degree(&self) -> usize {
let degree = match self {
Platform::Portable => 1,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::SSE2 => 4,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::SSE41 => 4,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX2 => 8,
#[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => 16,
#[cfg(blake3_neon)]
Platform::NEON => 4,
};
debug_assert!(degree <= MAX_SIMD_DEGREE);
degree
}
pub fn compress_in_place(
&self,
cv: &mut CVWords,
block: &[u8; BLOCK_LEN],
block_len: u8,
counter: u64,
flags: u8,
) {
match self {
Platform::Portable => portable::compress_in_place(cv, block, block_len, counter, flags),
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::SSE2 => unsafe {
crate::sse2::compress_in_place(cv, block, block_len, counter, flags)
},
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::SSE41 | Platform::AVX2 => unsafe {
crate::sse41::compress_in_place(cv, block, block_len, counter, flags)
},
#[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe {
crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
},
#[cfg(blake3_neon)]
Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags),
}
}
pub fn compress_xof(
&self,
cv: &CVWords,
block: &[u8; BLOCK_LEN],
block_len: u8,
counter: u64,
flags: u8,
) -> [u8; 64] {
match self {
Platform::Portable => portable::compress_xof(cv, block, block_len, counter, flags),
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::SSE2 => unsafe {
crate::sse2::compress_xof(cv, block, block_len, counter, flags)
},
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::SSE41 | Platform::AVX2 => unsafe {
crate::sse41::compress_xof(cv, block, block_len, counter, flags)
},
#[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe {
crate::avx512::compress_xof(cv, block, block_len, counter, flags)
},
#[cfg(blake3_neon)]
Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags),
}
}
pub fn hash_many<const N: usize>(
&self,
inputs: &[&[u8; N]],
key: &CVWords,
counter: u64,
increment_counter: IncrementCounter,
flags: u8,
flags_start: u8,
flags_end: u8,
out: &mut [u8],
) {
match self {
Platform::Portable => portable::hash_many(
inputs,
key,
counter,
increment_counter,
flags,
flags_start,
flags_end,
out,
),
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::SSE2 => unsafe {
crate::sse2::hash_many(
inputs,
key,
counter,
increment_counter,
flags,
flags_start,
flags_end,
out,
)
},
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::SSE41 => unsafe {
crate::sse41::hash_many(
inputs,
key,
counter,
increment_counter,
flags,
flags_start,
flags_end,
out,
)
},
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX2 => unsafe {
crate::avx2::hash_many(
inputs,
key,
counter,
increment_counter,
flags,
flags_start,
flags_end,
out,
)
},
#[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe {
crate::avx512::hash_many(
inputs,
key,
counter,
increment_counter,
flags,
flags_start,
flags_end,
out,
)
},
#[cfg(blake3_neon)]
Platform::NEON => unsafe {
crate::neon::hash_many(
inputs,
key,
counter,
increment_counter,
flags,
flags_start,
flags_end,
out,
)
},
}
}
pub fn xof_many(
&self,
cv: &CVWords,
block: &[u8; BLOCK_LEN],
block_len: u8,
mut counter: u64,
flags: u8,
out: &mut [u8],
) {
debug_assert_eq!(0, out.len() % BLOCK_LEN, "whole blocks only");
if out.is_empty() {
return;
}
match self {
#[cfg(blake3_avx512_ffi)]
#[cfg(unix)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe {
crate::avx512::xof_many(cv, block, block_len, counter, flags, out)
},
_ => {
for out_block in out.chunks_exact_mut(BLOCK_LEN) {
let out_array: &mut [u8; BLOCK_LEN] = out_block.try_into().unwrap();
*out_array = self.compress_xof(cv, block, block_len, counter, flags);
counter += 1;
}
}
}
}
pub fn portable() -> Self {
Self::Portable
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub fn sse2() -> Option<Self> {
if sse2_detected() {
Some(Self::SSE2)
} else {
None
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub fn sse41() -> Option<Self> {
if sse41_detected() {
Some(Self::SSE41)
} else {
None
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub fn avx2() -> Option<Self> {
if avx2_detected() {
Some(Self::AVX2)
} else {
None
}
}
#[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub fn avx512() -> Option<Self> {
if avx512_detected() {
Some(Self::AVX512)
} else {
None
}
}
#[cfg(blake3_neon)]
pub fn neon() -> Option<Self> {
Some(Self::NEON)
}
}
#[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline(always)]
#[allow(unreachable_code)]
pub fn avx512_detected() -> bool {
if cfg!(miri) {
return false;
}
if cfg!(feature = "no_avx512") {
return false;
}
#[cfg(all(target_feature = "avx512f", target_feature = "avx512vl"))]
{
return true;
}
#[cfg(feature = "std")]
{
if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512vl") {
return true;
}
}
false
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline(always)]
#[allow(unreachable_code)]
pub fn avx2_detected() -> bool {
if cfg!(miri) {
return false;
}
if cfg!(feature = "no_avx2") {
return false;
}
#[cfg(target_feature = "avx2")]
{
return true;
}
#[cfg(feature = "std")]
{
if is_x86_feature_detected!("avx2") {
return true;
}
}
false
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline(always)]
#[allow(unreachable_code)]
pub fn sse41_detected() -> bool {
if cfg!(miri) {
return false;
}
if cfg!(feature = "no_sse41") {
return false;
}
#[cfg(target_feature = "sse4.1")]
{
return true;
}
#[cfg(feature = "std")]
{
if is_x86_feature_detected!("sse4.1") {
return true;
}
}
false
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline(always)]
#[allow(unreachable_code)]
pub fn sse2_detected() -> bool {
if cfg!(miri) {
return false;
}
if cfg!(feature = "no_sse2") {
return false;
}
#[cfg(target_feature = "sse2")]
{
return true;
}
#[cfg(feature = "std")]
{
if is_x86_feature_detected!("sse2") {
return true;
}
}
false
}
#[inline(always)]
pub fn words_from_le_bytes_32(bytes: &[u8; 32]) -> [u32; 8] {
let mut out = [0; 8];
out[0] = u32::from_le_bytes(*array_ref!(bytes, 0 * 4, 4));
out[1] = u32::from_le_bytes(*array_ref!(bytes, 1 * 4, 4));
out[2] = u32::from_le_bytes(*array_ref!(bytes, 2 * 4, 4));
out[3] = u32::from_le_bytes(*array_ref!(bytes, 3 * 4, 4));
out[4] = u32::from_le_bytes(*array_ref!(bytes, 4 * 4, 4));
out[5] = u32::from_le_bytes(*array_ref!(bytes, 5 * 4, 4));
out[6] = u32::from_le_bytes(*array_ref!(bytes, 6 * 4, 4));
out[7] = u32::from_le_bytes(*array_ref!(bytes, 7 * 4, 4));
out
}
#[inline(always)]
pub fn words_from_le_bytes_64(bytes: &[u8; 64]) -> [u32; 16] {
let mut out = [0; 16];
out[0] = u32::from_le_bytes(*array_ref!(bytes, 0 * 4, 4));
out[1] = u32::from_le_bytes(*array_ref!(bytes, 1 * 4, 4));
out[2] = u32::from_le_bytes(*array_ref!(bytes, 2 * 4, 4));
out[3] = u32::from_le_bytes(*array_ref!(bytes, 3 * 4, 4));
out[4] = u32::from_le_bytes(*array_ref!(bytes, 4 * 4, 4));
out[5] = u32::from_le_bytes(*array_ref!(bytes, 5 * 4, 4));
out[6] = u32::from_le_bytes(*array_ref!(bytes, 6 * 4, 4));
out[7] = u32::from_le_bytes(*array_ref!(bytes, 7 * 4, 4));
out[8] = u32::from_le_bytes(*array_ref!(bytes, 8 * 4, 4));
out[9] = u32::from_le_bytes(*array_ref!(bytes, 9 * 4, 4));
out[10] = u32::from_le_bytes(*array_ref!(bytes, 10 * 4, 4));
out[11] = u32::from_le_bytes(*array_ref!(bytes, 11 * 4, 4));
out[12] = u32::from_le_bytes(*array_ref!(bytes, 12 * 4, 4));
out[13] = u32::from_le_bytes(*array_ref!(bytes, 13 * 4, 4));
out[14] = u32::from_le_bytes(*array_ref!(bytes, 14 * 4, 4));
out[15] = u32::from_le_bytes(*array_ref!(bytes, 15 * 4, 4));
out
}
#[inline(always)]
pub fn le_bytes_from_words_32(words: &[u32; 8]) -> [u8; 32] {
let mut out = [0; 32];
*array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes();
*array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes();
*array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes();
*array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes();
*array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes();
*array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes();
*array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes();
*array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes();
out
}
#[inline(always)]
pub fn le_bytes_from_words_64(words: &[u32; 16]) -> [u8; 64] {
let mut out = [0; 64];
*array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes();
*array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes();
*array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes();
*array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes();
*array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes();
*array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes();
*array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes();
*array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes();
*array_mut_ref!(out, 8 * 4, 4) = words[8].to_le_bytes();
*array_mut_ref!(out, 9 * 4, 4) = words[9].to_le_bytes();
*array_mut_ref!(out, 10 * 4, 4) = words[10].to_le_bytes();
*array_mut_ref!(out, 11 * 4, 4) = words[11].to_le_bytes();
*array_mut_ref!(out, 12 * 4, 4) = words[12].to_le_bytes();
*array_mut_ref!(out, 13 * 4, 4) = words[13].to_le_bytes();
*array_mut_ref!(out, 14 * 4, 4) = words[14].to_le_bytes();
*array_mut_ref!(out, 15 * 4, 4) = words[15].to_le_bytes();
out
}