From fc6f89dc707838564abbb8e22dad8e4d75c7fa26 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 Jul 2025 15:47:29 -0700 Subject: [PATCH 1/2] stop_machine: Improve kernel-doc function-header comments Add more detail to the kernel-doc function-header comments for stop_machine(), stop_machine_cpuslocked(), and stop_core_cpuslocked(). Signed-off-by: Paul E. McKenney --- include/linux/stop_machine.h | 64 +++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 3132262a404d..72820503514c 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -88,55 +88,73 @@ static inline void print_stop_info(const char *log_lvl, struct task_struct *task #endif /* CONFIG_SMP */ /* - * stop_machine "Bogolock": stop the entire machine, disable - * interrupts. This is a very heavy lock, which is equivalent to - * grabbing every spinlock (and more). So the "read" side to such a - * lock is anything which disables preemption. + * stop_machine "Bogolock": stop the entire machine, disable interrupts. + * This is a very heavy lock, which is equivalent to grabbing every raw + * spinlock (and more). So the "read" side to such a lock is anything + * which disables preemption. */ #if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU) /** * stop_machine: freeze the machine on all CPUs and run this function * @fn: the function to run - * @data: the data ptr for the @fn() - * @cpus: the cpus to run the @fn() on (NULL = any online cpu) + * @data: the data ptr to pass to @fn() + * @cpus: the cpus to run @fn() on (NULL = run on each online CPU) * - * Description: This causes a thread to be scheduled on every cpu, - * each of which disables interrupts. The result is that no one is - * holding a spinlock or inside any other preempt-disabled region when - * @fn() runs. + * Description: This causes a thread to be scheduled on every CPU, which + * will run with interrupts disabled. Each CPU specified by @cpus will + * run @fn. While @fn is executing, there will no other CPUs holding + * a raw spinlock or running within any other type of preempt-disabled + * region of code. * - * This can be thought of as a very heavy write lock, equivalent to - * grabbing every spinlock in the kernel. + * When @cpus specifies only a single CPU, this can be thought of as + * a reader-writer lock where readers disable preemption (for example, + * by holding a raw spinlock) and where the insanely heavy writers run + * @fn while also preventing any other CPU from doing any useful work. + * These writers can also be thought of as having implicitly grabbed every + * raw spinlock in the kernel. * - * Protects against CPU hotplug. + * When @fn is a no-op, this can be thought of as an RCU implementation + * where readers again disable preemption and writers use stop_machine() + * in place of synchronize_rcu(), albeit with orders of magnitude more + * disruption than even that of synchronize_rcu_expedited(). + * + * Although only one stop_machine() operation can proceed at a time, + * the possibility of blocking in cpus_read_lock() means that the caller + * cannot usefully rely on this serialization. + * + * Return: 0 if all invocations of @fn return zero. Otherwise, the + * value returned by an arbitrarily chosen member of the set of calls to + * @fn that returned non-zero. */ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); /** * stop_machine_cpuslocked: freeze the machine on all CPUs and run this function * @fn: the function to run - * @data: the data ptr for the @fn() - * @cpus: the cpus to run the @fn() on (NULL = any online cpu) + * @data: the data ptr to pass to @fn() + * @cpus: the cpus to run @fn() on (NULL = run on each online CPU) * - * Same as above. Must be called from with in a cpus_read_lock() protected - * region. Avoids nested calls to cpus_read_lock(). + * Same as above. Avoids nested calls to cpus_read_lock(). + * + * Context: Must be called from within a cpus_read_lock() protected region. */ int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); /** * stop_core_cpuslocked: - stop all threads on just one core * @cpu: any cpu in the targeted core - * @fn: the function to run - * @data: the data ptr for @fn() + * @fn: the function to run on each CPU in the core containing @cpu + * @data: the data ptr to pass to @fn() * - * Same as above, but instead of every CPU, only the logical CPUs of a - * single core are affected. + * Same as above, but instead of every CPU, only the logical CPUs of the + * single core containing @cpu are affected. * * Context: Must be called from within a cpus_read_lock() protected region. * - * Return: 0 if all executions of @fn returned 0, any non zero return - * value if any returned non zero. + * Return: 0 if all invocations of @fn return zero. Otherwise, the + * value returned by an arbitrarily chosen member of the set of calls to + * @fn that returned non-zero. */ int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data); From cf4fc66746e344181f41604066659073dbb8aaf0 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 15 Jul 2025 16:01:51 -0400 Subject: [PATCH 2/2] smp: Document preemption and stop_machine() mutual exclusion Recently while revising RCU's cpu online checks, there was some discussion around how IPIs synchronize with hotplug. Add comments explaining how preemption disable creates mutual exclusion with CPU hotplug's stop_machine mechanism. The key insight is that stop_machine() atomically updates CPU masks and flushes IPIs with interrupts disabled, and cannot proceed while any CPU (including the IPI sender) has preemption disabled. [ Apply peterz feedback. ] Cc: Andrea Righi Cc: Paul E. McKenney Cc: Frederic Weisbecker Cc: Peter Zijlstra (Intel) Cc: rcu@vger.kernel.org Acked-by: Paul E. McKenney Co-developed-by: Frederic Weisbecker Signed-off-by: Joel Fernandes Signed-off-by: Paul E. McKenney --- kernel/smp.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/kernel/smp.c b/kernel/smp.c index 974f3a3962e8..23d51a8e582d 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -86,13 +86,15 @@ int smpcfd_dead_cpu(unsigned int cpu) int smpcfd_dying_cpu(unsigned int cpu) { /* - * The IPIs for the smp-call-function callbacks queued by other - * CPUs might arrive late, either due to hardware latencies or - * because this CPU disabled interrupts (inside stop-machine) - * before the IPIs were sent. So flush out any pending callbacks - * explicitly (without waiting for the IPIs to arrive), to - * ensure that the outgoing CPU doesn't go offline with work - * still pending. + * The IPIs for the smp-call-function callbacks queued by other CPUs + * might arrive late, either due to hardware latencies or because this + * CPU disabled interrupts (inside stop-machine) before the IPIs were + * sent. So flush out any pending callbacks explicitly (without waiting + * for the IPIs to arrive), to ensure that the outgoing CPU doesn't go + * offline with work still pending. + * + * This runs with interrupts disabled inside the stopper task invoked by + * stop_machine(), ensuring mutually exclusive CPU offlining and IPI flush. */ __flush_smp_call_function_queue(false); irq_work_run(); @@ -418,6 +420,10 @@ void __smp_call_single_queue(int cpu, struct llist_node *node) */ static int generic_exec_single(int cpu, call_single_data_t *csd) { + /* + * Preemption already disabled here so stopper cannot run on this CPU, + * ensuring mutually exclusive CPU offlining and last IPI flush. + */ if (cpu == smp_processor_id()) { smp_call_func_t func = csd->func; void *info = csd->info; @@ -638,8 +644,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, int err; /* - * prevent preemption and reschedule on another processor, - * as well as CPU removal + * Prevent preemption and reschedule on another CPU, as well as CPU + * removal. This prevents stopper from running on this CPU, thus + * providing mutual exclusion of the below cpu_online() check and + * IPI sending ensuring IPI are not missed by CPU going offline. */ this_cpu = get_cpu();