ring/cpu/intel.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
// Copyright 2016-2021 Brian Smith.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
use cfg_if::cfg_if;
mod abi_assumptions {
use core::mem::size_of;
// TOOD: Support targets that do not have SSE and SSE2 enabled, such as
// x86_64-unknown-linux-none. See
// https://github.com/briansmith/ring/issues/1793#issuecomment-1793243725,
// https://github.com/briansmith/ring/issues/1832,
// https://github.com/briansmith/ring/issues/1833.
const _ASSUMES_SSE2: () =
assert!(cfg!(target_feature = "sse") && cfg!(target_feature = "sse2"));
#[cfg(target_arch = "x86_64")]
const _ASSUMED_POINTER_SIZE: usize = 8;
#[cfg(target_arch = "x86")]
const _ASSUMED_POINTER_SIZE: usize = 4;
const _ASSUMED_USIZE_SIZE: () = assert!(size_of::<usize>() == _ASSUMED_POINTER_SIZE);
const _ASSUMED_REF_SIZE: () = assert!(size_of::<&'static u8>() == _ASSUMED_POINTER_SIZE);
const _ASSUMED_ENDIANNESS: () = assert!(cfg!(target_endian = "little"));
}
pub(super) mod featureflags {
use super::super::CAPS_STATIC;
use crate::{
cpu,
polyfill::{once_cell::race, usize_from_u32},
};
use core::num::NonZeroUsize;
pub(in super::super) fn get_or_init() -> cpu::Features {
// SAFETY: `OPENSSL_cpuid_setup` must be called only in
// `INIT.call_once()` below.
prefixed_extern! {
fn OPENSSL_cpuid_setup(out: &mut [u32; 4]);
}
let _: NonZeroUsize = FEATURES.get_or_init(|| {
let mut cpuid = [0; 4];
// SAFETY: We assume that it is safe to execute CPUID and XGETBV.
unsafe {
OPENSSL_cpuid_setup(&mut cpuid);
}
let detected = super::cpuid_to_caps_and_set_c_flags(&cpuid);
let merged = CAPS_STATIC | detected;
let merged = usize_from_u32(merged) | (1 << (super::Shift::Initialized as u32));
NonZeroUsize::new(merged).unwrap() // Can't fail because we just set a bit.
});
// SAFETY: We initialized the CPU features as required.
// `INIT.call_once` has `happens-before` semantics.
unsafe { cpu::Features::new_after_feature_flags_written_and_synced_unchecked() }
}
pub(in super::super) fn get(_cpu_features: cpu::Features) -> u32 {
// SAFETY: Since only `get_or_init()` could have created
// `_cpu_features`, and it only does so after `FEATURES.get_or_init()`,
// we know we are reading from `FEATURES` after initializing it.
//
// Also, 0 means "no features detected" to users, which is designed to
// be a safe configuration.
let features = FEATURES.get().map(NonZeroUsize::get).unwrap_or(0);
// The truncation is lossless, as we set the value with a u32.
#[allow(clippy::cast_possible_truncation)]
let features = features as u32;
features
}
static FEATURES: race::OnceNonZeroUsize = race::OnceNonZeroUsize::new();
#[cfg(target_arch = "x86")]
#[rustfmt::skip]
pub const STATIC_DETECTED: u32 = 0
| (if cfg!(target_feature = "sse2") { super::Sse2::mask() } else { 0 })
;
// Limited to x86_64-v2 features.
// TODO: Add missing x86-64-v3 features if we find real-world use of x86-64-v3.
// TODO: Add all features we use.
#[cfg(target_arch = "x86_64")]
#[rustfmt::skip]
pub const STATIC_DETECTED: u32 = 0
| if cfg!(target_feature = "sse4.1") { super::Sse41::mask() } else { 0 }
| if cfg!(target_feature = "ssse3") { super::Ssse3::mask() } else { 0 }
;
pub const FORCE_DYNAMIC_DETECTION: u32 = 0;
}
fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
// "Intel" citations are for "Intel 64 and IA-32 Architectures Software
// Developer’s Manual", Combined Volumes, December 2024.
// "AMD" citations are for "AMD64 Technology AMD64 Architecture
// Programmer’s Manual, Volumes 1-5" Revision 4.08 April 2024.
// The `prefixed_extern!` uses below assume this
#[cfg(target_arch = "x86_64")]
use core::{mem::align_of, sync::atomic::AtomicU32};
#[cfg(target_arch = "x86_64")]
const _ATOMIC32_ALIGNMENT_EQUALS_U32_ALIGNMENT: () =
assert!(align_of::<AtomicU32>() == align_of::<u32>());
fn check(leaf: u32, bit: u32) -> bool {
let shifted = 1 << bit;
(leaf & shifted) == shifted
}
fn set(out: &mut u32, shift: Shift) {
let shifted = 1 << (shift as u32);
debug_assert_eq!(*out & shifted, 0);
*out |= shifted;
debug_assert_eq!(*out & shifted, shifted);
}
#[cfg(target_arch = "x86_64")]
let is_intel = check(cpuid[0], 30); // Synthesized by `OPENSSL_cpuid_setup`
// CPUID leaf 1.
let leaf1_ecx = cpuid[1];
// Intel: "Structured Extended Feature Flags Enumeration Leaf"
#[cfg(target_arch = "x86_64")]
let (extended_features_ebx, extended_features_ecx) = (cpuid[2], cpuid[3]);
let mut caps = 0;
// AMD: "Collectively the SSE1, [...] are referred to as the legacy SSE
// instructions. All legacy SSE instructions support 128-bit vector
// operands."
// Intel: "11.6.2 Checking for Intel SSE and SSE2 Support"
// We have to assume the prerequisites for SSE/SSE2 are met since we're
// already almost definitely using SSE registers if these target features
// are enabled.
//
// These also seem to help ensure CMOV support; There doesn't seem to be
// a `cfg!(target_feature = "cmov")`. It is likely that removing these
// assertions will remove the requirement for CMOV. With our without
// CMOV, it is likely that some of our timing side channel prevention does
// not work. Presumably the people who delete these are verifying that it
// all works fine.
const _SSE_REQUIRED: () = assert!(cfg!(target_feature = "sse"));
const _SSE2_REQUIRED: () = assert!(cfg!(target_feature = "sse2"));
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
{
// If somebody is trying to compile for an x86 target without SSE2
// and they deleted the `_SSE2_REQUIRED` const assertion above then
// they're probably trying to support a Linux/BSD/etc. distro that
// tries to support ancient x86 systems without SSE/SSE2. Try to
// reduce the harm caused, by implementing dynamic feature detection
// for them so that most systems will work like normal.
//
// Note that usually an x86-64 target with SSE2 disabled by default,
// usually `-none-` targets, will not support dynamically-detected use
// of SIMD registers via CPUID. A whole different mechanism is needed
// to support them. Same for i*86-*-none targets.
let leaf1_edx = cpuid[0];
let sse1_available = check(leaf1_edx, 25);
let sse2_available = check(leaf1_edx, 26);
if sse1_available && sse2_available {
set(&mut caps, Shift::Sse2);
}
}
// Sometimes people delete the `_SSE_REQUIRED`/`_SSE2_REQUIRED` const
// assertions in an attempt to support pre-SSE2 32-bit x86 systems. If they
// do, hopefully they won't delete these redundant assertions, so that
// x86_64 isn't affected.
#[cfg(target_arch = "x86_64")]
const _SSE2_REQUIRED_X86_64: () = assert!(cfg!(target_feature = "sse2"));
#[cfg(target_arch = "x86_64")]
const _SSE_REQUIRED_X86_64: () = assert!(cfg!(target_feature = "sse2"));
// Intel: "12.7.2 Checking for SSSE3 Support"
// If/when we support dynamic detection of SSE/SSE2, make this conditional
// on SSE/SSE2.
if check(leaf1_ecx, 9) {
set(&mut caps, Shift::Ssse3);
}
// Intel: "12.12.2 Checking for Intel SSE4.1 Support"
// If/when we support dynamic detection of SSE/SSE2, make this conditional
// on SSE/SSE2.
// XXX: We don't check for SSE3 and we're not sure if it is compatible for
// us to do so; does AMD advertise SSE3? TODO: address this.
// XXX: We don't condition this on SSSE3 being available. TODO: address
// this.
#[cfg(target_arch = "x86_64")]
if check(leaf1_ecx, 19) {
set(&mut caps, Shift::Sse41);
}
// AMD: "The extended SSE instructions include [...]."
// Intel: "14.3 DETECTION OF INTEL AVX INSTRUCTIONS"
// `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
// support AVX state.
let avx_available = check(leaf1_ecx, 28);
if avx_available {
set(&mut caps, Shift::Avx);
}
#[cfg(target_arch = "x86_64")]
if avx_available {
// The Intel docs don't seem to document the detection. The instruction
// definitions of the VEX.256 instructions reference the
// VAES/VPCLMULQDQ features and the documentation for the extended
// features gives the values. We combine these into one feature because
// we never use them independently.
let vaes_available = check(extended_features_ecx, 9);
let vclmul_available = check(extended_features_ecx, 10);
if vaes_available && vclmul_available {
set(&mut caps, Shift::VAesClmul);
}
}
// "14.7.1 Detection of Intel AVX2 Hardware support"
// XXX: We don't condition AVX2 on AVX. TODO: Address this.
// `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
// support AVX state.
#[cfg(target_arch = "x86_64")]
if check(extended_features_ebx, 5) {
set(&mut caps, Shift::Avx2);
// Declared as `uint32_t` in the C code.
prefixed_extern! {
static avx2_available: AtomicU32;
}
// SAFETY: The C code only reads `avx2_available`, and its reads are
// synchronized through the `OnceNonZeroUsize` Acquire/Release
// semantics as we ensure we have a `cpu::Features` instance before
// calling into the C code.
let flag = unsafe { &avx2_available };
flag.store(1, core::sync::atomic::Ordering::Relaxed);
}
// Intel: "12.13.4 Checking for Intel AES-NI Support"
// If/when we support dynamic detection of SSE/SSE2, revisit this.
// TODO: Clarify "interesting" states like (!SSE && AVX && AES-NI)
// and AES-NI & !AVX.
// Each check of `ClMul`, `Aes`, and `Sha` must be paired with a check for
// an AVX feature (e.g. `Avx`) or an SSE feature (e.g. `Ssse3`), as every
// use will either be supported by SSE* or AVX* instructions. We then
// assume that those supporting instructions' prerequisites (e.g. OS
// support for AVX or SSE state, respectively) are the only prerequisites
// for these features.
if check(leaf1_ecx, 1) {
set(&mut caps, Shift::ClMul);
}
if check(leaf1_ecx, 25) {
set(&mut caps, Shift::Aes);
}
// See BoringSSL 69c26de93c82ad98daecaec6e0c8644cdf74b03f before enabling
// static feature detection for this.
#[cfg(target_arch = "x86_64")]
if check(extended_features_ebx, 29) {
set(&mut caps, Shift::Sha);
}
#[cfg(target_arch = "x86_64")]
{
if is_intel {
set(&mut caps, Shift::IntelCpu);
}
if check(leaf1_ecx, 22) {
set(&mut caps, Shift::Movbe);
}
let adx_available = check(extended_features_ebx, 19);
if adx_available {
set(&mut caps, Shift::Adx);
}
// Some 6th Generation (Skylake) CPUs claim to support BMI1 and BMI2
// when they don't; see erratum "SKD052". The Intel document at
// https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/6th-gen-core-u-y-spec-update.pdf
// contains the footnote "Affects 6th Generation Intel Pentium processor
// family and Intel Celeron processor family". Further research indicates
// that Skylake Pentium/Celeron do not implement AVX or ADX. It turns
// out that we only use BMI1 and BMI2 in combination with ADX and/or
// AVX.
//
// rust `std::arch::is_x86_feature_detected` does a very similar thing
// but only looks at AVX, not ADX. Note that they reference an older
// version of the erratum labeled SKL052.
let believe_bmi_bits = !is_intel || (adx_available || avx_available);
if check(extended_features_ebx, 3) && believe_bmi_bits {
set(&mut caps, Shift::Bmi1);
}
let bmi2_available = check(extended_features_ebx, 8) && believe_bmi_bits;
if bmi2_available {
set(&mut caps, Shift::Bmi2);
}
if adx_available && bmi2_available {
// Declared as `uint32_t` in the C code.
prefixed_extern! {
static adx_bmi2_available: AtomicU32;
}
// SAFETY: The C code only reads `adx_bmi2_available`, and its
// reads are synchronized through the `OnceNonZeroUsize`
// Acquire/Release semantics as we ensure we have a
// `cpu::Features` instance before calling into the C code.
let flag = unsafe { &adx_bmi2_available };
flag.store(1, core::sync::atomic::Ordering::Relaxed);
}
}
caps
}
impl_get_feature! {
features: [
{ ("x86_64") => VAesClmul },
{ ("x86", "x86_64") => ClMul },
{ ("x86", "x86_64") => Ssse3 },
{ ("x86_64") => Sse41 },
{ ("x86_64") => Movbe },
{ ("x86", "x86_64") => Aes },
{ ("x86", "x86_64") => Avx },
{ ("x86_64") => Bmi1 },
{ ("x86_64") => Avx2 },
{ ("x86_64") => Bmi2 },
{ ("x86_64") => Adx },
// See BoringSSL 69c26de93c82ad98daecaec6e0c8644cdf74b03f before enabling
// static feature detection for this.
{ ("x86_64") => Sha },
// x86_64 can just assume SSE2 is available.
{ ("x86") => Sse2 },
],
}
cfg_if! {
if #[cfg(target_arch = "x86_64")] {
#[derive(Clone, Copy)]
pub(crate) struct IntelCpu(super::Features);
impl super::GetFeature<IntelCpu> for super::features::Values {
fn get_feature(&self) -> Option<IntelCpu> {
const MASK: u32 = 1 << (Shift::IntelCpu as u32);
if (self.values() & MASK) == MASK {
Some(IntelCpu(self.cpu()))
} else {
None
}
}
}
}
}
#[cfg(test)]
mod tests {
// This should always pass on any x86 system except very, very, old ones.
#[cfg(target_arch = "x86")]
#[test]
fn x86_has_sse2() {
use super::*;
use crate::cpu::{self, GetFeature as _};
assert!(matches!(cpu::features().get_feature(), Some(Sse2 { .. })))
}
}