mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 10:01:39 -05:00
Merge tag 'fpsimd-on-stack-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull arm64 FPSIMD on-stack buffer updates from Eric Biggers: "This is a core arm64 change. However, I was asked to take this because most uses of kernel-mode FPSIMD are in crypto or CRC code. In v6.8, the size of task_struct on arm64 increased by 528 bytes due to the new 'kernel_fpsimd_state' field. This field was added to allow kernel-mode FPSIMD code to be preempted. Unfortunately, 528 bytes is kind of a lot for task_struct. This regression in the task_struct size was noticed and reported. Recover that space by making this state be allocated on the stack at the beginning of each kernel-mode FPSIMD section. To make it easier for all the users of kernel-mode FPSIMD to do that correctly, introduce and use a 'scoped_ksimd' abstraction" * tag 'fpsimd-on-stack-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (23 commits) lib/crypto: arm64: Move remaining algorithms to scoped ksimd API lib/crypto: arm/blake2b: Move to scoped ksimd API arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack arm64/fpu: Enforce task-context only for generic kernel mode FPU net/mlx5: Switch to more abstract scoped ksimd guard API on arm64 arm64/xorblocks: Switch to 'ksimd' scoped guard API crypto/arm64: sm4 - Switch to 'ksimd' scoped guard API crypto/arm64: sm3 - Switch to 'ksimd' scoped guard API crypto/arm64: sha3 - Switch to 'ksimd' scoped guard API crypto/arm64: polyval - Switch to 'ksimd' scoped guard API crypto/arm64: nhpoly1305 - Switch to 'ksimd' scoped guard API crypto/arm64: aes-gcm - Switch to 'ksimd' scoped guard API crypto/arm64: aes-blk - Switch to 'ksimd' scoped guard API crypto/arm64: aes-ccm - Switch to 'ksimd' scoped guard API raid6: Move to more abstract 'ksimd' guard API crypto: aegis128-neon - Move to more abstract 'ksimd' guard API crypto/arm64: sm4-ce-gcm - Avoid pointless yield of the NEON unit crypto/arm64: sm4-ce-ccm - Avoid pointless yield of the NEON unit crypto/arm64: aes-ce-ccm - Avoid pointless yield of the NEON unit lib/crc: Switch ARM and arm64 to 'ksimd' scoped guard API ...
This commit is contained in:
@@ -5,7 +5,6 @@
|
||||
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
|
||||
@@ -19,22 +18,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
|
||||
|
||||
static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
|
||||
{
|
||||
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
|
||||
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
|
||||
if (static_branch_likely(&have_pmull)) {
|
||||
if (likely(may_use_simd())) {
|
||||
kernel_neon_begin();
|
||||
crc = crc_t10dif_pmull64(crc, data, length);
|
||||
kernel_neon_end();
|
||||
return crc;
|
||||
}
|
||||
scoped_ksimd()
|
||||
return crc_t10dif_pmull64(crc, data, length);
|
||||
} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
|
||||
static_branch_likely(&have_neon) &&
|
||||
likely(may_use_simd())) {
|
||||
static_branch_likely(&have_neon)) {
|
||||
u8 buf[16] __aligned(16);
|
||||
|
||||
kernel_neon_begin();
|
||||
crc_t10dif_pmull8(crc, data, length, buf);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
crc_t10dif_pmull8(crc, data, length, buf);
|
||||
|
||||
return crc_t10dif_generic(0, buf, sizeof(buf));
|
||||
}
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include <linux/cpufeature.h>
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
|
||||
@@ -42,9 +41,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
len -= n;
|
||||
}
|
||||
n = round_down(len, 16);
|
||||
kernel_neon_begin();
|
||||
crc = crc32_pmull_le(p, n, crc);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
crc = crc32_pmull_le(p, n, crc);
|
||||
p += n;
|
||||
len -= n;
|
||||
}
|
||||
@@ -71,9 +69,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
|
||||
len -= n;
|
||||
}
|
||||
n = round_down(len, 16);
|
||||
kernel_neon_begin();
|
||||
crc = crc32c_pmull_le(p, n, crc);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
crc = crc32c_pmull_le(p, n, crc);
|
||||
p += n;
|
||||
len -= n;
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
|
||||
#include <linux/cpufeature.h>
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd);
|
||||
@@ -21,22 +20,16 @@ asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
|
||||
|
||||
static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
|
||||
{
|
||||
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
|
||||
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
|
||||
if (static_branch_likely(&have_pmull)) {
|
||||
if (likely(may_use_simd())) {
|
||||
kernel_neon_begin();
|
||||
crc = crc_t10dif_pmull_p64(crc, data, length);
|
||||
kernel_neon_end();
|
||||
return crc;
|
||||
}
|
||||
scoped_ksimd()
|
||||
return crc_t10dif_pmull_p64(crc, data, length);
|
||||
} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
|
||||
static_branch_likely(&have_asimd) &&
|
||||
likely(may_use_simd())) {
|
||||
static_branch_likely(&have_asimd)) {
|
||||
u8 buf[16];
|
||||
|
||||
kernel_neon_begin();
|
||||
crc_t10dif_pmull_p8(crc, data, length, buf);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
crc_t10dif_pmull_p8(crc, data, length, buf);
|
||||
|
||||
return crc_t10dif_generic(0, buf, sizeof(buf));
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
// The minimum input length to consider the 4-way interleaved code path
|
||||
@@ -23,9 +22,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
|
||||
if (len >= min_len && cpu_have_named_feature(PMULL) &&
|
||||
likely(may_use_simd())) {
|
||||
kernel_neon_begin();
|
||||
crc = crc32_le_arm64_4way(crc, p, len);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
crc = crc32_le_arm64_4way(crc, p, len);
|
||||
|
||||
p += round_down(len, 64);
|
||||
len %= 64;
|
||||
@@ -44,9 +42,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
|
||||
|
||||
if (len >= min_len && cpu_have_named_feature(PMULL) &&
|
||||
likely(may_use_simd())) {
|
||||
kernel_neon_begin();
|
||||
crc = crc32c_le_arm64_4way(crc, p, len);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
crc = crc32c_le_arm64_4way(crc, p, len);
|
||||
|
||||
p += round_down(len, 64);
|
||||
len %= 64;
|
||||
@@ -65,9 +62,8 @@ static inline u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
|
||||
|
||||
if (len >= min_len && cpu_have_named_feature(PMULL) &&
|
||||
likely(may_use_simd())) {
|
||||
kernel_neon_begin();
|
||||
crc = crc32_be_arm64_4way(crc, p, len);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
crc = crc32_be_arm64_4way(crc, p, len);
|
||||
|
||||
p += round_down(len, 64);
|
||||
len %= 64;
|
||||
|
||||
@@ -24,9 +24,8 @@ static void blake2b_compress(struct blake2b_ctx *ctx,
|
||||
const size_t blocks = min_t(size_t, nblocks,
|
||||
SZ_4K / BLAKE2B_BLOCK_SIZE);
|
||||
|
||||
kernel_neon_begin();
|
||||
blake2b_compress_neon(ctx, data, blocks, inc);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
blake2b_compress_neon(ctx, data, blocks, inc);
|
||||
|
||||
data += blocks * BLAKE2B_BLOCK_SIZE;
|
||||
nblocks -= blocks;
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
|
||||
@@ -68,9 +67,8 @@ static void hchacha_block_arch(const struct chacha_state *state,
|
||||
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
|
||||
hchacha_block_arm(state, out, nrounds);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
hchacha_block_neon(state, out, nrounds);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
hchacha_block_neon(state, out, nrounds);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,9 +85,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
|
||||
do {
|
||||
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
|
||||
|
||||
kernel_neon_begin();
|
||||
chacha_doneon(state, dst, src, todo, nrounds);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
chacha_doneon(state, dst, src, todo, nrounds);
|
||||
|
||||
bytes -= todo;
|
||||
src += todo;
|
||||
|
||||
@@ -25,9 +25,8 @@ static void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
|
||||
const u8 point[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
|
||||
kernel_neon_begin();
|
||||
curve25519_neon(out, scalar, point);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
curve25519_neon(out, scalar, point);
|
||||
} else {
|
||||
curve25519_generic(out, scalar, point);
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/jump_label.h>
|
||||
@@ -32,9 +31,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
|
||||
do {
|
||||
unsigned int todo = min_t(unsigned int, len, SZ_4K);
|
||||
|
||||
kernel_neon_begin();
|
||||
poly1305_blocks_neon(state, src, todo, padbit);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
poly1305_blocks_neon(state, src, todo, padbit);
|
||||
|
||||
len -= todo;
|
||||
src += todo;
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
|
||||
@@ -22,12 +21,12 @@ static void sha1_blocks(struct sha1_block_state *state,
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
|
||||
static_branch_likely(&have_neon) && likely(may_use_simd())) {
|
||||
kernel_neon_begin();
|
||||
if (static_branch_likely(&have_ce))
|
||||
sha1_ce_transform(state, data, nblocks);
|
||||
else
|
||||
sha1_transform_neon(state, data, nblocks);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd() {
|
||||
if (static_branch_likely(&have_ce))
|
||||
sha1_ce_transform(state, data, nblocks);
|
||||
else
|
||||
sha1_transform_neon(state, data, nblocks);
|
||||
}
|
||||
} else {
|
||||
sha1_block_data_order(state, data, nblocks);
|
||||
}
|
||||
|
||||
@@ -22,12 +22,12 @@ static void sha256_blocks(struct sha256_block_state *state,
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
|
||||
static_branch_likely(&have_neon) && likely(may_use_simd())) {
|
||||
kernel_neon_begin();
|
||||
if (static_branch_likely(&have_ce))
|
||||
sha256_ce_transform(state, data, nblocks);
|
||||
else
|
||||
sha256_block_data_order_neon(state, data, nblocks);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd() {
|
||||
if (static_branch_likely(&have_ce))
|
||||
sha256_ce_transform(state, data, nblocks);
|
||||
else
|
||||
sha256_block_data_order_neon(state, data, nblocks);
|
||||
}
|
||||
} else {
|
||||
sha256_block_data_order(state, data, nblocks);
|
||||
}
|
||||
|
||||
@@ -19,9 +19,8 @@ static void sha512_blocks(struct sha512_block_state *state,
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
|
||||
static_branch_likely(&have_neon) && likely(may_use_simd())) {
|
||||
kernel_neon_begin();
|
||||
sha512_block_data_order_neon(state, data, nblocks);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
sha512_block_data_order_neon(state, data, nblocks);
|
||||
} else {
|
||||
sha512_block_data_order(state, data, nblocks);
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
|
||||
@@ -65,9 +64,8 @@ static void hchacha_block_arch(const struct chacha_state *state,
|
||||
if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
|
||||
hchacha_block_generic(state, out, nrounds);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
hchacha_block_neon(state, out, nrounds);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
hchacha_block_neon(state, out, nrounds);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,9 +79,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
|
||||
do {
|
||||
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
|
||||
|
||||
kernel_neon_begin();
|
||||
chacha_doneon(state, dst, src, todo, nrounds);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
chacha_doneon(state, dst, src, todo, nrounds);
|
||||
|
||||
bytes -= todo;
|
||||
src += todo;
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/jump_label.h>
|
||||
@@ -31,9 +30,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
|
||||
do {
|
||||
unsigned int todo = min_t(unsigned int, len, SZ_4K);
|
||||
|
||||
kernel_neon_begin();
|
||||
poly1305_blocks_neon(state, src, todo, padbit);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
poly1305_blocks_neon(state, src, todo, padbit);
|
||||
|
||||
len -= todo;
|
||||
src += todo;
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <linux/cpufeature.h>
|
||||
|
||||
@@ -24,13 +23,14 @@ static void polyval_preparekey_arch(struct polyval_key *key,
|
||||
static_assert(ARRAY_SIZE(key->h_powers) == NUM_H_POWERS);
|
||||
memcpy(&key->h_powers[NUM_H_POWERS - 1], raw_key, POLYVAL_BLOCK_SIZE);
|
||||
if (static_branch_likely(&have_pmull) && may_use_simd()) {
|
||||
kernel_neon_begin();
|
||||
for (int i = NUM_H_POWERS - 2; i >= 0; i--) {
|
||||
key->h_powers[i] = key->h_powers[i + 1];
|
||||
polyval_mul_pmull(&key->h_powers[i],
|
||||
&key->h_powers[NUM_H_POWERS - 1]);
|
||||
scoped_ksimd() {
|
||||
for (int i = NUM_H_POWERS - 2; i >= 0; i--) {
|
||||
key->h_powers[i] = key->h_powers[i + 1];
|
||||
polyval_mul_pmull(
|
||||
&key->h_powers[i],
|
||||
&key->h_powers[NUM_H_POWERS - 1]);
|
||||
}
|
||||
}
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
for (int i = NUM_H_POWERS - 2; i >= 0; i--) {
|
||||
key->h_powers[i] = key->h_powers[i + 1];
|
||||
@@ -44,9 +44,8 @@ static void polyval_mul_arch(struct polyval_elem *acc,
|
||||
const struct polyval_key *key)
|
||||
{
|
||||
if (static_branch_likely(&have_pmull) && may_use_simd()) {
|
||||
kernel_neon_begin();
|
||||
polyval_mul_pmull(acc, &key->h_powers[NUM_H_POWERS - 1]);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
polyval_mul_pmull(acc, &key->h_powers[NUM_H_POWERS - 1]);
|
||||
} else {
|
||||
polyval_mul_generic(acc, &key->h_powers[NUM_H_POWERS - 1]);
|
||||
}
|
||||
@@ -62,9 +61,8 @@ static void polyval_blocks_arch(struct polyval_elem *acc,
|
||||
size_t n = min_t(size_t, nblocks,
|
||||
4096 / POLYVAL_BLOCK_SIZE);
|
||||
|
||||
kernel_neon_begin();
|
||||
polyval_blocks_pmull(acc, key, data, n);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
polyval_blocks_pmull(acc, key, data, n);
|
||||
data += n * POLYVAL_BLOCK_SIZE;
|
||||
nblocks -= n;
|
||||
} while (nblocks);
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <linux/cpufeature.h>
|
||||
|
||||
@@ -20,9 +19,9 @@ static void sha1_blocks(struct sha1_block_state *state,
|
||||
do {
|
||||
size_t rem;
|
||||
|
||||
kernel_neon_begin();
|
||||
rem = __sha1_ce_transform(state, data, nblocks);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
rem = __sha1_ce_transform(state, data, nblocks);
|
||||
|
||||
data += (nblocks - rem) * SHA1_BLOCK_SIZE;
|
||||
nblocks = rem;
|
||||
} while (nblocks);
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <linux/cpufeature.h>
|
||||
|
||||
@@ -27,17 +26,16 @@ static void sha256_blocks(struct sha256_block_state *state,
|
||||
do {
|
||||
size_t rem;
|
||||
|
||||
kernel_neon_begin();
|
||||
rem = __sha256_ce_transform(state,
|
||||
data, nblocks);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
rem = __sha256_ce_transform(state, data,
|
||||
nblocks);
|
||||
|
||||
data += (nblocks - rem) * SHA256_BLOCK_SIZE;
|
||||
nblocks = rem;
|
||||
} while (nblocks);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
sha256_block_neon(state, data, nblocks);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
sha256_block_neon(state, data, nblocks);
|
||||
}
|
||||
} else {
|
||||
sha256_block_data_order(state, data, nblocks);
|
||||
@@ -66,9 +64,8 @@ static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx,
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
|
||||
static_branch_likely(&have_ce) && len >= SHA256_BLOCK_SIZE &&
|
||||
len <= 65536 && likely(may_use_simd())) {
|
||||
kernel_neon_begin();
|
||||
sha256_ce_finup2x(ctx, data1, data2, len, out1, out2);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
sha256_ce_finup2x(ctx, data1, data2, len, out1, out2);
|
||||
kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE);
|
||||
kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE);
|
||||
return true;
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <linux/cpufeature.h>
|
||||
|
||||
@@ -23,10 +22,9 @@ static void sha3_absorb_blocks(struct sha3_state *state, const u8 *data,
|
||||
do {
|
||||
size_t rem;
|
||||
|
||||
kernel_neon_begin();
|
||||
rem = sha3_ce_transform(state, data, nblocks,
|
||||
block_size);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
rem = sha3_ce_transform(state, data, nblocks,
|
||||
block_size);
|
||||
data += (nblocks - rem) * block_size;
|
||||
nblocks = rem;
|
||||
} while (nblocks);
|
||||
@@ -46,9 +44,8 @@ static void sha3_keccakf(struct sha3_state *state)
|
||||
*/
|
||||
static const u8 zeroes[SHA3_512_BLOCK_SIZE];
|
||||
|
||||
kernel_neon_begin();
|
||||
sha3_ce_transform(state, zeroes, 1, sizeof(zeroes));
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
sha3_ce_transform(state, zeroes, 1, sizeof(zeroes));
|
||||
} else {
|
||||
sha3_keccakf_generic(state);
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
#include <asm/neon.h>
|
||||
|
||||
#include <asm/simd.h>
|
||||
#include <linux/cpufeature.h>
|
||||
|
||||
@@ -24,9 +24,9 @@ static void sha512_blocks(struct sha512_block_state *state,
|
||||
do {
|
||||
size_t rem;
|
||||
|
||||
kernel_neon_begin();
|
||||
rem = __sha512_ce_transform(state, data, nblocks);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
rem = __sha512_ce_transform(state, data, nblocks);
|
||||
|
||||
data += (nblocks - rem) * SHA512_BLOCK_SIZE;
|
||||
nblocks = rem;
|
||||
} while (nblocks);
|
||||
|
||||
@@ -8,10 +8,9 @@
|
||||
#include <linux/raid/pq.h>
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#else
|
||||
#define kernel_neon_begin()
|
||||
#define kernel_neon_end()
|
||||
#define scoped_ksimd()
|
||||
#define cpu_has_neon() (1)
|
||||
#endif
|
||||
|
||||
@@ -32,10 +31,9 @@
|
||||
{ \
|
||||
void raid6_neon ## _n ## _gen_syndrome_real(int, \
|
||||
unsigned long, void**); \
|
||||
kernel_neon_begin(); \
|
||||
raid6_neon ## _n ## _gen_syndrome_real(disks, \
|
||||
scoped_ksimd() \
|
||||
raid6_neon ## _n ## _gen_syndrome_real(disks, \
|
||||
(unsigned long)bytes, ptrs); \
|
||||
kernel_neon_end(); \
|
||||
} \
|
||||
static void raid6_neon ## _n ## _xor_syndrome(int disks, \
|
||||
int start, int stop, \
|
||||
@@ -43,10 +41,9 @@
|
||||
{ \
|
||||
void raid6_neon ## _n ## _xor_syndrome_real(int, \
|
||||
int, int, unsigned long, void**); \
|
||||
kernel_neon_begin(); \
|
||||
raid6_neon ## _n ## _xor_syndrome_real(disks, \
|
||||
start, stop, (unsigned long)bytes, ptrs); \
|
||||
kernel_neon_end(); \
|
||||
scoped_ksimd() \
|
||||
raid6_neon ## _n ## _xor_syndrome_real(disks, \
|
||||
start, stop, (unsigned long)bytes, ptrs);\
|
||||
} \
|
||||
struct raid6_calls const raid6_neonx ## _n = { \
|
||||
raid6_neon ## _n ## _gen_syndrome, \
|
||||
|
||||
@@ -7,11 +7,10 @@
|
||||
#include <linux/raid/pq.h>
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include "neon.h"
|
||||
#else
|
||||
#define kernel_neon_begin()
|
||||
#define kernel_neon_end()
|
||||
#define scoped_ksimd()
|
||||
#define cpu_has_neon() (1)
|
||||
#endif
|
||||
|
||||
@@ -55,9 +54,8 @@ static void raid6_2data_recov_neon(int disks, size_t bytes, int faila,
|
||||
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
|
||||
raid6_gfexp[failb]]];
|
||||
|
||||
kernel_neon_begin();
|
||||
__raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
__raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul);
|
||||
}
|
||||
|
||||
static void raid6_datap_recov_neon(int disks, size_t bytes, int faila,
|
||||
@@ -86,9 +84,8 @@ static void raid6_datap_recov_neon(int disks, size_t bytes, int faila,
|
||||
/* Now, pick the proper data tables */
|
||||
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
|
||||
|
||||
kernel_neon_begin();
|
||||
__raid6_datap_recov_neon(bytes, p, q, dq, qmul);
|
||||
kernel_neon_end();
|
||||
scoped_ksimd()
|
||||
__raid6_datap_recov_neon(bytes, p, q, dq, qmul);
|
||||
}
|
||||
|
||||
const struct raid6_recov_calls raid6_recov_neon = {
|
||||
|
||||
Reference in New Issue
Block a user