Merge patch series "riscv: Unaligned access speed probing fixes and skipping"

Andrew Jones <ajones@ventanamicro.com> says:

The first six patches of this series are fixes and cleanups of the
unaligned access speed probing code. The next patch introduces a
kernel command line option that allows the probing to be skipped.
This command line option is a different approach than Jesse's [1].
[1] takes a cpu-list for a particular speed, supporting heterogeneous
platforms. With this approach, the kernel command line should only
be used for homogeneous platforms. [1] also only allowed 'fast' and
'slow' to be selected. This parameter also supports 'unsupported',
which could be useful for testing code paths gated on that. The final
patch adds the documentation.

[1] https://lore.kernel.org/linux-riscv/20240805173816.3722002-1-jesse@rivosinc.com/

* patches from https://lore.kernel.org/r/20250304120014.143628-10-ajones@ventanamicro.com:
  Documentation/kernel-parameters: Add riscv unaligned speed parameters
  riscv: Add parameter for skipping access speed tests
  riscv: Fix set up of vector cpu hotplug callback
  riscv: Fix set up of cpu hotplug callbacks
  riscv: Change check_unaligned_access_speed_all_cpus to void
  riscv: Fix check_unaligned_access_all_cpus
  riscv: Fix riscv_online_cpu_vec
  riscv: Annotate unaligned access init functions

Link: https://lore.kernel.org/r/20250304120014.143628-10-ajones@ventanamicro.com
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
This commit is contained in:
Alexandre Ghiti
2025-03-19 14:23:50 +00:00
4 changed files with 174 additions and 109 deletions

View File

@@ -7477,6 +7477,22 @@
Note that genuine overcurrent events won't be
reported either.
unaligned_scalar_speed=
[RISCV]
Format: {slow | fast | unsupported}
Allow skipping scalar unaligned access speed tests. This
is useful for testing alternative code paths and to skip
the tests in environments where they run too slowly. All
CPUs must have the same scalar unaligned access speed.
unaligned_vector_speed=
[RISCV]
Format: {slow | fast | unsupported}
Allow skipping vector unaligned access speed tests. This
is useful for testing alternative code paths and to skip
the tests in environments where they run too slowly. All
CPUs must have the same vector unaligned access speed.
unknown_nmi_panic
[X86] Cause panic on unknown NMI.

View File

@@ -63,7 +63,7 @@ void __init riscv_user_isa_enable(void);
#define __RISCV_ISA_EXT_SUPERSET_VALIDATE(_name, _id, _sub_exts, _validate) \
_RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts), _validate)
bool check_unaligned_access_emulated_all_cpus(void);
bool __init check_unaligned_access_emulated_all_cpus(void);
#if defined(CONFIG_RISCV_SCALAR_MISALIGNED)
void check_unaligned_access_emulated(struct work_struct *work __always_unused);
void unaligned_emulation_finish(void);
@@ -76,7 +76,7 @@ static inline bool unaligned_ctl_available(void)
}
#endif
bool check_vector_unaligned_access_emulated_all_cpus(void);
bool __init check_vector_unaligned_access_emulated_all_cpus(void);
#if defined(CONFIG_RISCV_VECTOR_MISALIGNED)
void check_vector_unaligned_access_emulated(struct work_struct *work __always_unused);
DECLARE_PER_CPU(long, vector_misaligned_access);

View File

@@ -605,16 +605,10 @@ void check_vector_unaligned_access_emulated(struct work_struct *work __always_un
kernel_vector_end();
}
bool check_vector_unaligned_access_emulated_all_cpus(void)
bool __init check_vector_unaligned_access_emulated_all_cpus(void)
{
int cpu;
if (!has_vector()) {
for_each_online_cpu(cpu)
per_cpu(vector_misaligned_access, cpu) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
return false;
}
schedule_on_each_cpu(check_vector_unaligned_access_emulated);
for_each_online_cpu(cpu)
@@ -625,7 +619,7 @@ bool check_vector_unaligned_access_emulated_all_cpus(void)
return true;
}
#else
bool check_vector_unaligned_access_emulated_all_cpus(void)
bool __init check_vector_unaligned_access_emulated_all_cpus(void)
{
return false;
}
@@ -659,7 +653,7 @@ void check_unaligned_access_emulated(struct work_struct *work __always_unused)
}
}
bool check_unaligned_access_emulated_all_cpus(void)
bool __init check_unaligned_access_emulated_all_cpus(void)
{
int cpu;
@@ -684,7 +678,7 @@ bool unaligned_ctl_available(void)
return unaligned_ctl;
}
#else
bool check_unaligned_access_emulated_all_cpus(void)
bool __init check_unaligned_access_emulated_all_cpus(void)
{
return false;
}

View File

@@ -24,8 +24,12 @@
DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
static long unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
static long unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN;
static cpumask_t fast_misaligned_access;
#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
static int check_unaligned_access(void *param)
{
int cpu = smp_processor_id();
@@ -121,7 +125,7 @@ static int check_unaligned_access(void *param)
return 0;
}
static void check_unaligned_access_nonboot_cpu(void *param)
static void __init check_unaligned_access_nonboot_cpu(void *param)
{
unsigned int cpu = smp_processor_id();
struct page **pages = param;
@@ -130,6 +134,50 @@ static void check_unaligned_access_nonboot_cpu(void *param)
check_unaligned_access(pages[cpu]);
}
/* Measure unaligned access speed on all CPUs present at boot in parallel. */
static void __init check_unaligned_access_speed_all_cpus(void)
{
unsigned int cpu;
unsigned int cpu_count = num_possible_cpus();
struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL);
if (!bufs) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
return;
}
/*
* Allocate separate buffers for each CPU so there's no fighting over
* cache lines.
*/
for_each_cpu(cpu, cpu_online_mask) {
bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!bufs[cpu]) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
goto out;
}
}
/* Check everybody except 0, who stays behind to tend jiffies. */
on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
/* Check core 0. */
smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
out:
for_each_cpu(cpu, cpu_online_mask) {
if (bufs[cpu])
__free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
}
kfree(bufs);
}
#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
static void __init check_unaligned_access_speed_all_cpus(void)
{
}
#endif
DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
@@ -175,7 +223,7 @@ static void set_unaligned_access_static_branches(void)
modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
}
static int lock_and_set_unaligned_access_static_branch(void)
static int __init lock_and_set_unaligned_access_static_branch(void)
{
cpus_read_lock();
set_unaligned_access_static_branches();
@@ -188,21 +236,29 @@ arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
static int riscv_online_cpu(unsigned int cpu)
{
static struct page *buf;
/* We are already set since the last check */
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN)
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) {
goto exit;
} else if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) {
per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param;
goto exit;
check_unaligned_access_emulated(NULL);
buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!buf) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
return -ENOMEM;
}
check_unaligned_access(buf);
__free_pages(buf, MISALIGNED_BUFFER_ORDER);
#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
{
static struct page *buf;
check_unaligned_access_emulated(NULL);
buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!buf) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
return -ENOMEM;
}
check_unaligned_access(buf);
__free_pages(buf, MISALIGNED_BUFFER_ORDER);
}
#endif
exit:
set_unaligned_access_static_branches();
@@ -217,59 +273,6 @@ static int riscv_offline_cpu(unsigned int cpu)
return 0;
}
/* Measure unaligned access speed on all CPUs present at boot in parallel. */
static int check_unaligned_access_speed_all_cpus(void)
{
unsigned int cpu;
unsigned int cpu_count = num_possible_cpus();
struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL);
if (!bufs) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
return 0;
}
/*
* Allocate separate buffers for each CPU so there's no fighting over
* cache lines.
*/
for_each_cpu(cpu, cpu_online_mask) {
bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!bufs[cpu]) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
goto out;
}
}
/* Check everybody except 0, who stays behind to tend jiffies. */
on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
/* Check core 0. */
smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
/*
* Setup hotplug callbacks for any new CPUs that come online or go
* offline.
*/
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
riscv_online_cpu, riscv_offline_cpu);
out:
for_each_cpu(cpu, cpu_online_mask) {
if (bufs[cpu])
__free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
}
kfree(bufs);
return 0;
}
#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
static int check_unaligned_access_speed_all_cpus(void)
{
return 0;
}
#endif
#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
static void check_vector_unaligned_access(struct work_struct *work __always_unused)
{
@@ -368,57 +371,109 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
__free_pages(page, MISALIGNED_BUFFER_ORDER);
}
/* Measure unaligned access speed on all CPUs present at boot in parallel. */
static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
schedule_on_each_cpu(check_vector_unaligned_access);
return 0;
}
#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
return 0;
}
#endif
static int riscv_online_cpu_vec(unsigned int cpu)
{
if (!has_vector())
if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) {
per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param;
return 0;
}
if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED)
#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
return 0;
check_vector_unaligned_access_emulated(NULL);
check_vector_unaligned_access(NULL);
#endif
return 0;
}
/* Measure unaligned access speed on all CPUs present at boot in parallel. */
static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
static const char * const speed_str[] __initconst = { NULL, NULL, "slow", "fast", "unsupported" };
static int __init set_unaligned_scalar_speed_param(char *str)
{
schedule_on_each_cpu(check_vector_unaligned_access);
if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW]))
unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW;
else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_FAST]))
unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST;
else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED]))
unaligned_scalar_speed_param = RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED;
else
return -EINVAL;
return 1;
}
__setup("unaligned_scalar_speed=", set_unaligned_scalar_speed_param);
static int __init set_unaligned_vector_speed_param(char *str)
{
if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW]))
unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW;
else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_FAST]))
unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST;
else if (!strcmp(str, speed_str[RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED]))
unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
else
return -EINVAL;
return 1;
}
__setup("unaligned_vector_speed=", set_unaligned_vector_speed_param);
static int __init check_unaligned_access_all_cpus(void)
{
int cpu;
if (unaligned_scalar_speed_param == RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN &&
!check_unaligned_access_emulated_all_cpus()) {
check_unaligned_access_speed_all_cpus();
} else {
pr_info("scalar unaligned access speed set to '%s' by command line\n",
speed_str[unaligned_scalar_speed_param]);
for_each_online_cpu(cpu)
per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param;
}
if (!has_vector())
unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
if (unaligned_vector_speed_param == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN &&
!check_vector_unaligned_access_emulated_all_cpus() &&
IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
kthread_run(vec_check_unaligned_access_speed_all_cpus,
NULL, "vec_check_unaligned_access_speed_all_cpus");
} else {
pr_info("vector unaligned access speed set to '%s' by command line\n",
speed_str[unaligned_vector_speed_param]);
for_each_online_cpu(cpu)
per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param;
}
/*
* Setup hotplug callbacks for any new CPUs that come online or go
* offline.
*/
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
riscv_online_cpu, riscv_offline_cpu);
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
riscv_online_cpu_vec, NULL);
return 0;
}
#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
return 0;
}
#endif
static int check_unaligned_access_all_cpus(void)
{
bool all_cpus_emulated, all_cpus_vec_unsupported;
all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus();
if (!all_cpus_vec_unsupported &&
IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
kthread_run(vec_check_unaligned_access_speed_all_cpus,
NULL, "vec_check_unaligned_access_speed_all_cpus");
}
if (!all_cpus_emulated)
return check_unaligned_access_speed_all_cpus();
return 0;
}
arch_initcall(check_unaligned_access_all_cpus);