From c14ecb555c3ee80eeb030a4e46d00e679537f03a Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 19 Sep 2025 11:12:38 +0200 Subject: [PATCH 01/19] locking/spinlock/debug: Fix data-race in do_raw_write_lock KCSAN reports: BUG: KCSAN: data-race in do_raw_write_lock / do_raw_write_lock write (marked) to 0xffff800009cf504c of 4 bytes by task 1102 on cpu 1: do_raw_write_lock+0x120/0x204 _raw_write_lock_irq do_exit call_usermodehelper_exec_async ret_from_fork read to 0xffff800009cf504c of 4 bytes by task 1103 on cpu 0: do_raw_write_lock+0x88/0x204 _raw_write_lock_irq do_exit call_usermodehelper_exec_async ret_from_fork value changed: 0xffffffff -> 0x00000001 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 1103 Comm: kworker/u4:1 6.1.111 Commit 1a365e822372 ("locking/spinlock/debug: Fix various data races") has adressed most of these races, but seems to be not consistent/not complete. >From do_raw_write_lock() only debug_write_lock_after() part has been converted to WRITE_ONCE(), but not debug_write_lock_before() part. Do it now. Fixes: 1a365e822372 ("locking/spinlock/debug: Fix various data races") Reported-by: Adrian Freihofer Signed-off-by: Alexander Sverdlin Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Paul E. McKenney Acked-by: Waiman Long Cc: stable@vger.kernel.org --- kernel/locking/spinlock_debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c index 87b03d2e41db..2338b3adfb55 100644 --- a/kernel/locking/spinlock_debug.c +++ b/kernel/locking/spinlock_debug.c @@ -184,8 +184,8 @@ void do_raw_read_unlock(rwlock_t *lock) static inline void debug_write_lock_before(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - RWLOCK_BUG_ON(lock->owner == current, lock, "recursion"); - RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), + RWLOCK_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion"); + RWLOCK_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(), lock, "cpu recursion"); } From da123f0ee40f0e5a3791bbaf58a1db1744c59f72 Mon Sep 17 00:00:00 2001 From: Daniel Almeida Date: Fri, 19 Sep 2025 11:12:39 +0200 Subject: [PATCH 02/19] rust: lock: guard: Add T: Unpin bound to DerefMut A core property of pinned types is not handing a mutable reference to the inner data in safe code, as this trivially allows that data to be moved. Enforce this condition by adding a bound on lock::Guard's DerefMut implementation, so that it's only implemented for pinning-agnostic types. Suggested-by: Benno Lossin Suggested-by: Boqun Feng Signed-off-by: Daniel Almeida Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://github.com/Rust-for-Linux/linux/issues/1181 --- rust/kernel/sync/lock.rs | 5 ++++- rust/kernel/sync/lock/global.rs | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/rust/kernel/sync/lock.rs b/rust/kernel/sync/lock.rs index 27202beef90c..b482f34bf0ce 100644 --- a/rust/kernel/sync/lock.rs +++ b/rust/kernel/sync/lock.rs @@ -251,7 +251,10 @@ fn deref(&self) -> &Self::Target { } } -impl core::ops::DerefMut for Guard<'_, T, B> { +impl core::ops::DerefMut for Guard<'_, T, B> +where + T: Unpin, +{ fn deref_mut(&mut self) -> &mut Self::Target { // SAFETY: The caller owns the lock, so it is safe to deref the protected data. unsafe { &mut *self.lock.data.get() } diff --git a/rust/kernel/sync/lock/global.rs b/rust/kernel/sync/lock/global.rs index d65f94b5caf2..38b448032799 100644 --- a/rust/kernel/sync/lock/global.rs +++ b/rust/kernel/sync/lock/global.rs @@ -106,7 +106,10 @@ fn deref(&self) -> &Self::Target { } } -impl core::ops::DerefMut for GlobalGuard { +impl core::ops::DerefMut for GlobalGuard +where + B::Item: Unpin, +{ fn deref_mut(&mut self) -> &mut Self::Target { &mut self.inner } From 2497a7116ff9a051d0e78885a27a52213bc2841d Mon Sep 17 00:00:00 2001 From: Daniel Almeida Date: Fri, 19 Sep 2025 11:12:40 +0200 Subject: [PATCH 03/19] rust: lock: Pin the inner data In preparation to support Lock where T is pinned, the first thing that needs to be done is to structurally pin the 'data' member. This switches the 't' parameter in Lock::new() to take in an impl PinInit instead of a plain T. This in turn uses the blanket implementation "impl PinInit for T". Subsequent patches will touch on Guard. Suggested-by: Benno Lossin Suggested-by: Boqun Feng Signed-off-by: Daniel Almeida Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://github.com/Rust-for-Linux/linux/issues/1181 --- rust/kernel/sync/lock.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/rust/kernel/sync/lock.rs b/rust/kernel/sync/lock.rs index b482f34bf0ce..9242790d15db 100644 --- a/rust/kernel/sync/lock.rs +++ b/rust/kernel/sync/lock.rs @@ -11,7 +11,7 @@ types::{NotThreadSafe, Opaque, ScopeGuard}, }; use core::{cell::UnsafeCell, marker::PhantomPinned, pin::Pin}; -use pin_init::{pin_data, pin_init, PinInit}; +use pin_init::{pin_data, pin_init, PinInit, Wrapper}; pub mod mutex; pub mod spinlock; @@ -115,6 +115,7 @@ pub struct Lock { _pin: PhantomPinned, /// The data protected by the lock. + #[pin] pub(crate) data: UnsafeCell, } @@ -127,9 +128,13 @@ unsafe impl Sync for Lock {} impl Lock { /// Constructs a new lock initialiser. - pub fn new(t: T, name: &'static CStr, key: Pin<&'static LockClassKey>) -> impl PinInit { + pub fn new( + t: impl PinInit, + name: &'static CStr, + key: Pin<&'static LockClassKey>, + ) -> impl PinInit { pin_init!(Self { - data: UnsafeCell::new(t), + data <- UnsafeCell::pin_init(t), _pin: PhantomPinned, // SAFETY: `slot` is valid while the closure is called and both `name` and `key` have // static lifetimes so they live indefinitely. From 66f1ea83d9f8346324fc50779944297d778cac95 Mon Sep 17 00:00:00 2001 From: Daniel Almeida Date: Fri, 19 Sep 2025 11:12:41 +0200 Subject: [PATCH 04/19] rust: lock: Add a Pin<&mut T> accessor In order for callers to be able to access the inner T safely if T: !Unpin, there needs to be a way to get a Pin<&mut T>. Add this accessor and a corresponding example to tell users how it works. This requires the pin projection functionality [1] for better ergonomic. [boqun: Apply Daniel's fix to the code example, add the reference to pin projection patch and remove out-of-date part in the commit log] Suggested-by: Benno Lossin Suggested-by: Boqun Feng Signed-off-by: Daniel Almeida Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Link: https://github.com/Rust-for-Linux/linux/issues/1181 Link: https://lore.kernel.org/rust-for-linux/20250912174148.373530-1-lossin@kernel.org/ [1] --- rust/kernel/sync/lock.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/rust/kernel/sync/lock.rs b/rust/kernel/sync/lock.rs index 9242790d15db..cb00fdb94ffd 100644 --- a/rust/kernel/sync/lock.rs +++ b/rust/kernel/sync/lock.rs @@ -245,6 +245,31 @@ pub(crate) fn do_unlocked(&mut self, cb: impl FnOnce() -> U) -> U { cb() } + + /// Returns a pinned mutable reference to the protected data. + /// + /// The guard implements [`DerefMut`] when `T: Unpin`, so for [`Unpin`] + /// types [`DerefMut`] should be used instead of this function. + /// + /// [`DerefMut`]: core::ops::DerefMut + /// [`Unpin`]: core::marker::Unpin + /// + /// # Examples + /// + /// ``` + /// # use kernel::sync::{Mutex, MutexGuard}; + /// # use core::{pin::Pin, marker::PhantomPinned}; + /// struct Data(PhantomPinned); + /// + /// fn example(mutex: &Mutex) { + /// let mut data: MutexGuard<'_, Data> = mutex.lock(); + /// let mut data: Pin<&mut Data> = data.as_mut(); + /// } + /// ``` + pub fn as_mut(&mut self) -> Pin<&mut T> { + // SAFETY: `self.lock.data` is structurally pinned. + unsafe { Pin::new_unchecked(&mut *self.lock.data.get()) } + } } impl core::ops::Deref for Guard<'_, T, B> { From 44472d1b83127e579c798ff92a07ae86d98b61b9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Oct 2025 13:07:32 +0200 Subject: [PATCH 05/19] atomic: Skip alignment check for try_cmpxchg() old arg The 'old' argument in atomic_try_cmpxchg() and related functions is a pointer to a normal non-atomic integer number, which does not require to be naturally aligned, unlike the atomic_t/atomic64_t types themselves. In order to add an alignment check with CONFIG_DEBUG_ATOMIC into the normal instrument_atomic_read_write() helper, change this check to use the non-atomic instrument_read_write(), the same way that was done earlier for try_cmpxchg() in commit ec570320b09f ("locking/atomic: Correct (cmp)xchg() instrumentation"). This prevents warnings on m68k calling the 32-bit atomic_try_cmpxchg() with 16-bit aligned arguments as well as several more architectures including x86-32 when calling atomic64_try_cmpxchg() with 32-bit aligned u64 arguments. Reported-by: Finn Thain Signed-off-by: Arnd Bergmann Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/cover.1757810729.git.fthain@linux-m68k.org/ --- include/linux/atomic/atomic-instrumented.h | 26 +++++++++++----------- scripts/atomic/gen-atomic-instrumented.sh | 11 +++++---- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index 9409a6ddf3e0..37ab6314a9f7 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -1276,7 +1276,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new) { kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_try_cmpxchg(v, old, new); } @@ -1298,7 +1298,7 @@ static __always_inline bool atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) { instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_try_cmpxchg_acquire(v, old, new); } @@ -1321,7 +1321,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) { kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_try_cmpxchg_release(v, old, new); } @@ -1343,7 +1343,7 @@ static __always_inline bool atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) { instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_try_cmpxchg_relaxed(v, old, new); } @@ -2854,7 +2854,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic64_try_cmpxchg(v, old, new); } @@ -2876,7 +2876,7 @@ static __always_inline bool atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) { instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic64_try_cmpxchg_acquire(v, old, new); } @@ -2899,7 +2899,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) { kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic64_try_cmpxchg_release(v, old, new); } @@ -2921,7 +2921,7 @@ static __always_inline bool atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) { instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic64_try_cmpxchg_relaxed(v, old, new); } @@ -4432,7 +4432,7 @@ atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) { kcsan_mb(); instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_long_try_cmpxchg(v, old, new); } @@ -4454,7 +4454,7 @@ static __always_inline bool atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) { instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_long_try_cmpxchg_acquire(v, old, new); } @@ -4477,7 +4477,7 @@ atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) { kcsan_release(); instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_long_try_cmpxchg_release(v, old, new); } @@ -4499,7 +4499,7 @@ static __always_inline bool atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new) { instrument_atomic_read_write(v, sizeof(*v)); - instrument_atomic_read_write(old, sizeof(*old)); + instrument_read_write(old, sizeof(*old)); return raw_atomic_long_try_cmpxchg_relaxed(v, old, new); } @@ -5050,4 +5050,4 @@ atomic_long_dec_if_positive(atomic_long_t *v) #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 8829b337928e9508259079d32581775ececd415b +// f618ac667f868941a84ce0ab2242f1786e049ed4 diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh index 592f3ec89b5f..9c1d53f81eb2 100755 --- a/scripts/atomic/gen-atomic-instrumented.sh +++ b/scripts/atomic/gen-atomic-instrumented.sh @@ -12,7 +12,7 @@ gen_param_check() local arg="$1"; shift local type="${arg%%:*}" local name="$(gen_param_name "${arg}")" - local rw="write" + local rw="atomic_write" case "${type#c}" in i) return;; @@ -20,14 +20,17 @@ gen_param_check() if [ ${type#c} != ${type} ]; then # We don't write to constant parameters. - rw="read" + rw="atomic_read" + elif [ "${type}" = "p" ] ; then + # The "old" argument in try_cmpxchg() gets accessed non-atomically + rw="read_write" elif [ "${meta}" != "s" ]; then # An atomic RMW: if this parameter is not a constant, and this atomic is # not just a 's'tore, this parameter is both read from and written to. - rw="read_write" + rw="atomic_read_write" fi - printf "\tinstrument_atomic_${rw}(${name}, sizeof(*${name}));\n" + printf "\tinstrument_${rw}(${name}, sizeof(*${name}));\n" } #gen_params_checks(meta, arg...) From 28a0ee311960baad97bf85e1e995aed4a71e22a2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 28 Sep 2025 18:20:29 +0200 Subject: [PATCH 06/19] documentation: seqlock: fix the wrong documentation of read_seqbegin_or_lock/need_seqretry The comments and pseudo code in Documentation/locking/seqlock.rst are wrong: int seq = 0; do { read_seqbegin_or_lock(&foo_seqlock, &seq); /* ... [[read-side critical section]] ... */ } while (need_seqretry(&foo_seqlock, seq)); read_seqbegin_or_lock() always returns with an even "seq" and need_seqretry() doesn't change this counter. This means that seq is always even and thus the locking pass is simply impossible. IOW, "_or_lock" has no effect and this code doesn't differ from do { seq = read_seqbegin(&foo_seqlock); /* ... [[read-side critical section]] ... */ } while (read_seqretry(&foo_seqlock, seq)); Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) --- Documentation/locking/seqlock.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst index 3fb7ea3ab22a..9899871d3d9a 100644 --- a/Documentation/locking/seqlock.rst +++ b/Documentation/locking/seqlock.rst @@ -220,13 +220,14 @@ Read path, three categories: according to a passed marker. This is used to avoid lockless readers starvation (too much retry loops) in case of a sharp spike in write activity. First, a lockless read is tried (even marker passed). If - that trial fails (odd sequence counter is returned, which is used as - the next iteration marker), the lockless read is transformed to a - full locking read and no retry loop is necessary:: + that trial fails (sequence counter doesn't match), make the marker + odd for the next iteration, the lockless read is transformed to a + full locking read and no retry loop is necessary, for example:: /* marker; even initialization */ - int seq = 0; + int seq = 1; do { + seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&foo_seqlock, &seq); /* ... [[read-side critical section]] ... */ From cc39f3872c0865bef992b713338df369554fa9e0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Oct 2025 22:11:54 +0200 Subject: [PATCH 07/19] seqlock: Introduce scoped_seqlock_read() The read_seqbegin/need_seqretry/done_seqretry API is cumbersome and error prone. With the new helper the "typical" code like int seq, nextseq; unsigned long flags; nextseq = 0; do { seq = nextseq; flags = read_seqbegin_or_lock_irqsave(&seqlock, &seq); // read-side critical section nextseq = 1; } while (need_seqretry(&seqlock, seq)); done_seqretry_irqrestore(&seqlock, seq, flags); can be rewritten as scoped_seqlock_read (&seqlock, ss_lock_irqsave) { // read-side critical section } Original idea by Oleg Nesterov; with contributions from Linus. Originally-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) --- include/linux/seqlock.h | 111 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 5ce48eab7a2a..b7bcc4111e90 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -1209,4 +1209,115 @@ done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) if (seq & 1) read_sequnlock_excl_irqrestore(lock, flags); } + +enum ss_state { + ss_done = 0, + ss_lock, + ss_lock_irqsave, + ss_lockless, +}; + +struct ss_tmp { + enum ss_state state; + unsigned long data; + spinlock_t *lock; + spinlock_t *lock_irqsave; +}; + +static inline void __scoped_seqlock_cleanup(struct ss_tmp *sst) +{ + if (sst->lock) + spin_unlock(sst->lock); + if (sst->lock_irqsave) + spin_unlock_irqrestore(sst->lock_irqsave, sst->data); +} + +extern void __scoped_seqlock_invalid_target(void); + +#if defined(CONFIG_CC_IS_GCC) && CONFIG_GCC_VERSION < 90000 +/* + * For some reason some GCC-8 architectures (nios2, alpha) have trouble + * determining that the ss_done state is impossible in __scoped_seqlock_next() + * below. + */ +static inline void __scoped_seqlock_bug(void) { } +#else +/* + * Canary for compiler optimization -- if the compiler doesn't realize this is + * an impossible state, it very likely generates sub-optimal code here. + */ +extern void __scoped_seqlock_bug(void); +#endif + +static inline void +__scoped_seqlock_next(struct ss_tmp *sst, seqlock_t *lock, enum ss_state target) +{ + switch (sst->state) { + case ss_done: + __scoped_seqlock_bug(); + return; + + case ss_lock: + case ss_lock_irqsave: + sst->state = ss_done; + return; + + case ss_lockless: + if (!read_seqretry(lock, sst->data)) { + sst->state = ss_done; + return; + } + break; + } + + switch (target) { + case ss_done: + __scoped_seqlock_invalid_target(); + return; + + case ss_lock: + sst->lock = &lock->lock; + spin_lock(sst->lock); + sst->state = ss_lock; + return; + + case ss_lock_irqsave: + sst->lock_irqsave = &lock->lock; + spin_lock_irqsave(sst->lock_irqsave, sst->data); + sst->state = ss_lock_irqsave; + return; + + case ss_lockless: + sst->data = read_seqbegin(lock); + return; + } +} + +#define __scoped_seqlock_read(_seqlock, _target, _s) \ + for (struct ss_tmp _s __cleanup(__scoped_seqlock_cleanup) = \ + { .state = ss_lockless, .data = read_seqbegin(_seqlock) }; \ + _s.state != ss_done; \ + __scoped_seqlock_next(&_s, _seqlock, _target)) + +/** + * scoped_seqlock_read (lock, ss_state) - execute the read side critical + * section without manual sequence + * counter handling or calls to other + * helpers + * @lock: pointer to seqlock_t protecting the data + * @ss_state: one of {ss_lock, ss_lock_irqsave, ss_lockless} indicating + * the type of critical read section + * + * Example: + * + * scoped_seqlock_read (&lock, ss_lock) { + * // read-side critical section + * } + * + * Starts with a lockess pass first. If it fails, restarts the critical + * section with the lock held. + */ +#define scoped_seqlock_read(_seqlock, _target) \ + __scoped_seqlock_read(_seqlock, _target, __UNIQUE_ID(seqlock)) + #endif /* __LINUX_SEQLOCK_H */ From 488f48b32654dc6be04d9cc12f75ce030c9cb21b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Oct 2025 14:30:52 +0200 Subject: [PATCH 08/19] seqlock: Change thread_group_cputime() to use scoped_seqlock_read() To simplify the code and make it more readable. While at it, change thread_group_cputime() to use __for_each_thread(sig). [peterz: update to new interface] Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/cputime.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 7097de2c8cda..4f97896887ec 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -313,10 +313,8 @@ static u64 read_sum_exec_runtime(struct task_struct *t) void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) { struct signal_struct *sig = tsk->signal; - u64 utime, stime; struct task_struct *t; - unsigned int seq, nextseq; - unsigned long flags; + u64 utime, stime; /* * Update current task runtime to account pending time since last @@ -329,27 +327,19 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) if (same_thread_group(current, tsk)) (void) task_sched_runtime(current); - rcu_read_lock(); - /* Attempt a lockless read on the first round. */ - nextseq = 0; - do { - seq = nextseq; - flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); + guard(rcu)(); + scoped_seqlock_read (&sig->stats_lock, ss_lock_irqsave) { times->utime = sig->utime; times->stime = sig->stime; times->sum_exec_runtime = sig->sum_sched_runtime; - for_each_thread(tsk, t) { + __for_each_thread(sig, t) { task_cputime(t, &utime, &stime); times->utime += utime; times->stime += stime; times->sum_exec_runtime += read_sum_exec_runtime(t); } - /* If lockless access failed, take the lock. */ - nextseq = 1; - } while (need_seqretry(&sig->stats_lock, seq)); - done_seqretry_irqrestore(&sig->stats_lock, seq, flags); - rcu_read_unlock(); + } } #ifdef CONFIG_IRQ_TIME_ACCOUNTING From b76f72bea2c601afec81829ea427fc0d20f83216 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Oct 2025 14:30:59 +0200 Subject: [PATCH 09/19] seqlock: Change do_task_stat() to use scoped_seqlock_read() To simplify the code and make it more readable. [peterz: change to new interface] Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) --- fs/proc/array.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 2ae63189091e..cbd4bc4a58e4 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -481,7 +481,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, unsigned long flags; int exit_code = task->exit_code; struct signal_struct *sig = task->signal; - unsigned int seq = 1; state = *get_task_state(task); vsize = eip = esp = 0; @@ -538,10 +537,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (permitted && (!whole || num_threads < 2)) wchan = !task_is_running(task); - do { - seq++; /* 2 on the 1st/lockless path, otherwise odd */ - flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); - + scoped_seqlock_read (&sig->stats_lock, ss_lock_irqsave) { cmin_flt = sig->cmin_flt; cmaj_flt = sig->cmaj_flt; cutime = sig->cutime; @@ -563,8 +559,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, } rcu_read_unlock(); } - } while (need_seqretry(&sig->stats_lock, seq)); - done_seqretry_irqrestore(&sig->stats_lock, seq, flags); + } if (whole) { thread_group_cputime_adjusted(task, &utime, &stime); From 795aab353d0650b2d04dc3aa2e22a51000cb2aaa Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 8 Oct 2025 14:31:05 +0200 Subject: [PATCH 10/19] seqlock: Change do_io_accounting() to use scoped_seqlock_read() To simplify the code and make it more readable. [peterz: change to new interface] Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) --- fs/proc/base.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 6299878e3d97..407b41cb6e7c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3043,21 +3043,14 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh if (whole) { struct signal_struct *sig = task->signal; struct task_struct *t; - unsigned int seq = 1; - unsigned long flags; - - rcu_read_lock(); - do { - seq++; /* 2 on the 1st/lockless path, otherwise odd */ - flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); + guard(rcu)(); + scoped_seqlock_read (&sig->stats_lock, ss_lock_irqsave) { acct = sig->ioac; __for_each_thread(sig, t) task_io_accounting_add(&acct, &t->ioac); - } while (need_seqretry(&sig->stats_lock, seq)); - done_seqretry_irqrestore(&sig->stats_lock, seq, flags); - rcu_read_unlock(); + } } else { acct = task->ioac; } From 37d0472c8ac441af8bc10fc4959ad9d62dd5fa4c Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 21 Oct 2025 23:42:37 -0400 Subject: [PATCH 11/19] rust: debugfs: Implement Reader for Mutex only when T is Unpin Since we are going to make `Mutex` structurally pin the data (i.e. `T`), therefore `.lock()` function only returns a `Guard` that can dereference a mutable reference to `T` if only `T` is `Unpin`, therefore restrict the impl `Reader` block of `Mutex` to that. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Acked-by: Danilo Krummrich Link: https://patch.msgid.link/20251022034237.70431-1-boqun.feng@gmail.com --- rust/kernel/debugfs/traits.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/debugfs/traits.rs b/rust/kernel/debugfs/traits.rs index ab009eb254b3..ba7ec5a900b8 100644 --- a/rust/kernel/debugfs/traits.rs +++ b/rust/kernel/debugfs/traits.rs @@ -50,7 +50,7 @@ pub trait Reader { fn read_from_slice(&self, reader: &mut UserSliceReader) -> Result; } -impl Reader for Mutex { +impl Reader for Mutex { fn read_from_slice(&self, reader: &mut UserSliceReader) -> Result { let mut buf = [0u8; 128]; if reader.len() > buf.len() { From b94d45b6bbb42571ec225d3be0e7457c8765a5b4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 28 Oct 2025 09:56:38 +0100 Subject: [PATCH 12/19] seqlock: Allow KASAN to fail optimizing Some KASAN builds are failing to properly optimize this code -- luckily we don't care about core quality for KASAN builds, so just exclude it. Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Closes: https://lore.kernel.org/oe-kbuild-all/202510251641.idrNXhv5-lkp@intel.com/ --- include/linux/seqlock.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index b7bcc4111e90..a8a8661839b6 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -1234,11 +1234,14 @@ static inline void __scoped_seqlock_cleanup(struct ss_tmp *sst) extern void __scoped_seqlock_invalid_target(void); -#if defined(CONFIG_CC_IS_GCC) && CONFIG_GCC_VERSION < 90000 +#if (defined(CONFIG_CC_IS_GCC) && CONFIG_GCC_VERSION < 90000) || defined(CONFIG_KASAN) /* * For some reason some GCC-8 architectures (nios2, alpha) have trouble * determining that the ss_done state is impossible in __scoped_seqlock_next() * below. + * + * Similarly KASAN is known to confuse compilers enough to break this. But we + * don't care about code quality for KASAN builds anyway. */ static inline void __scoped_seqlock_bug(void) { } #else From 14e9a18b07ec463a85094cc8942788336164319f Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 21 Oct 2025 23:53:22 -0400 Subject: [PATCH 13/19] rust: sync: atomic: Make Atomic*Ops pub(crate) In order to write code over a generate Atomic we need to make Atomic*Ops public so that functions like `.load()` and `.store()` are available. Make these pub(crate) at the beginning so the usage in kernel crate is supported. Tested-by: David Gow Acked-by: Greg Kroah-Hartman Signed-off-by: Boqun Feng Link: https://patch.msgid.link/20251022035324.70785-2-boqun.feng@gmail.com --- rust/kernel/sync/atomic.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/kernel/sync/atomic.rs b/rust/kernel/sync/atomic.rs index 016a6bcaf080..0bc6e7b04d67 100644 --- a/rust/kernel/sync/atomic.rs +++ b/rust/kernel/sync/atomic.rs @@ -22,9 +22,10 @@ pub use internal::AtomicImpl; pub use ordering::{Acquire, Full, Relaxed, Release}; +pub(crate) use internal::{AtomicArithmeticOps, AtomicBasicOps, AtomicExchangeOps}; use crate::build_error; -use internal::{AtomicArithmeticOps, AtomicBasicOps, AtomicExchangeOps, AtomicRepr}; +use internal::AtomicRepr; use ordering::OrderingType; /// A memory location which can be safely modified from multiple execution contexts. From 013f912eb5fa7c06b3648ca630acfc4ff26456fa Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 21 Oct 2025 23:53:23 -0400 Subject: [PATCH 14/19] rust: sync: atomic: Implement Debug for Atomic If `Atomic` is `Debug` then it's a `debugfs::Writer`, therefore make it so since 1) debugfs needs to support `Atomic` and 2) it's rather trivial to implement `Debug` for `Atomic`. Tested-by: David Gow Acked-by: Greg Kroah-Hartman Signed-off-by: Boqun Feng Link: https://patch.msgid.link/20251022035324.70785-3-boqun.feng@gmail.com --- rust/kernel/sync/atomic.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/rust/kernel/sync/atomic.rs b/rust/kernel/sync/atomic.rs index 0bc6e7b04d67..3afc376be42d 100644 --- a/rust/kernel/sync/atomic.rs +++ b/rust/kernel/sync/atomic.rs @@ -307,6 +307,15 @@ pub fn store(&self, v: T, _: Ordering) { } } +impl core::fmt::Debug for Atomic +where + T::Repr: AtomicBasicOps, +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + core::fmt::Debug::fmt(&self.load(Relaxed), f) + } +} + impl Atomic where T::Repr: AtomicExchangeOps, From f74cf399e02e24c544b0bd4b1fe8fa2c5ae30b18 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 21 Oct 2025 23:53:24 -0400 Subject: [PATCH 15/19] rust: debugfs: Replace the usage of Rust native atomics Rust native atomics are not allowed to use in kernel due to the mismatch of memory model with Linux kernel memory model, hence remove the usage of Rust native atomics in debufs. Reviewed-by: Matthew Maurer Acked-by: Danilo Krummrich Tested-by: David Gow Acked-by: Greg Kroah-Hartman Signed-off-by: Boqun Feng Link: https://patch.msgid.link/20251022035324.70785-4-boqun.feng@gmail.com --- rust/kernel/debugfs/traits.rs | 53 +++++++++-------------------- samples/rust/rust_debugfs.rs | 12 +++---- samples/rust/rust_debugfs_scoped.rs | 6 ++-- 3 files changed, 25 insertions(+), 46 deletions(-) diff --git a/rust/kernel/debugfs/traits.rs b/rust/kernel/debugfs/traits.rs index ba7ec5a900b8..92054fed2136 100644 --- a/rust/kernel/debugfs/traits.rs +++ b/rust/kernel/debugfs/traits.rs @@ -4,14 +4,11 @@ //! Traits for rendering or updating values exported to DebugFS. use crate::prelude::*; +use crate::sync::atomic::{Atomic, AtomicBasicOps, AtomicType, Relaxed}; use crate::sync::Mutex; use crate::uaccess::UserSliceReader; use core::fmt::{self, Debug, Formatter}; use core::str::FromStr; -use core::sync::atomic::{ - AtomicI16, AtomicI32, AtomicI64, AtomicI8, AtomicIsize, AtomicU16, AtomicU32, AtomicU64, - AtomicU8, AtomicUsize, Ordering, -}; /// A trait for types that can be written into a string. /// @@ -66,37 +63,21 @@ fn read_from_slice(&self, reader: &mut UserSliceReader) -> Result { } } -macro_rules! impl_reader_for_atomic { - ($(($atomic_type:ty, $int_type:ty)),*) => { - $( - impl Reader for $atomic_type { - fn read_from_slice(&self, reader: &mut UserSliceReader) -> Result { - let mut buf = [0u8; 21]; // Enough for a 64-bit number. - if reader.len() > buf.len() { - return Err(EINVAL); - } - let n = reader.len(); - reader.read_slice(&mut buf[..n])?; +impl Reader for Atomic +where + T::Repr: AtomicBasicOps, +{ + fn read_from_slice(&self, reader: &mut UserSliceReader) -> Result { + let mut buf = [0u8; 21]; // Enough for a 64-bit number. + if reader.len() > buf.len() { + return Err(EINVAL); + } + let n = reader.len(); + reader.read_slice(&mut buf[..n])?; - let s = core::str::from_utf8(&buf[..n]).map_err(|_| EINVAL)?; - let val = s.trim().parse::<$int_type>().map_err(|_| EINVAL)?; - self.store(val, Ordering::Relaxed); - Ok(()) - } - } - )* - }; + let s = core::str::from_utf8(&buf[..n]).map_err(|_| EINVAL)?; + let val = s.trim().parse::().map_err(|_| EINVAL)?; + self.store(val, Relaxed); + Ok(()) + } } - -impl_reader_for_atomic!( - (AtomicI16, i16), - (AtomicI32, i32), - (AtomicI64, i64), - (AtomicI8, i8), - (AtomicIsize, isize), - (AtomicU16, u16), - (AtomicU32, u32), - (AtomicU64, u64), - (AtomicU8, u8), - (AtomicUsize, usize) -); diff --git a/samples/rust/rust_debugfs.rs b/samples/rust/rust_debugfs.rs index 82b61a15a34b..711faa07bece 100644 --- a/samples/rust/rust_debugfs.rs +++ b/samples/rust/rust_debugfs.rs @@ -32,14 +32,12 @@ //! ``` use core::str::FromStr; -use core::sync::atomic::AtomicUsize; -use core::sync::atomic::Ordering; use kernel::c_str; use kernel::debugfs::{Dir, File}; use kernel::new_mutex; use kernel::prelude::*; +use kernel::sync::atomic::{Atomic, Relaxed}; use kernel::sync::Mutex; - use kernel::{acpi, device::Core, of, platform, str::CString, types::ARef}; kernel::module_platform_driver! { @@ -59,7 +57,7 @@ struct RustDebugFs { #[pin] _compatible: File, #[pin] - counter: File, + counter: File>, #[pin] inner: File>, } @@ -109,7 +107,7 @@ fn probe( ) -> Result>> { let result = KBox::try_pin_init(RustDebugFs::new(pdev), GFP_KERNEL)?; // We can still mutate fields through the files which are atomic or mutexed: - result.counter.store(91, Ordering::Relaxed); + result.counter.store(91, Relaxed); { let mut guard = result.inner.lock(); guard.x = guard.y; @@ -120,8 +118,8 @@ fn probe( } impl RustDebugFs { - fn build_counter(dir: &Dir) -> impl PinInit> + '_ { - dir.read_write_file(c_str!("counter"), AtomicUsize::new(0)) + fn build_counter(dir: &Dir) -> impl PinInit>> + '_ { + dir.read_write_file(c_str!("counter"), Atomic::::new(0)) } fn build_inner(dir: &Dir) -> impl PinInit>> + '_ { diff --git a/samples/rust/rust_debugfs_scoped.rs b/samples/rust/rust_debugfs_scoped.rs index b0c4e76b123e..9f0ec5f24cda 100644 --- a/samples/rust/rust_debugfs_scoped.rs +++ b/samples/rust/rust_debugfs_scoped.rs @@ -6,9 +6,9 @@ //! `Scope::dir` to create a variety of files without the need to separately //! track them all. -use core::sync::atomic::AtomicUsize; use kernel::debugfs::{Dir, Scope}; use kernel::prelude::*; +use kernel::sync::atomic::Atomic; use kernel::sync::Mutex; use kernel::{c_str, new_mutex, str::CString}; @@ -62,7 +62,7 @@ fn create_file_write( let file_name = CString::try_from_fmt(fmt!("{name_str}"))?; for sub in items { nums.push( - AtomicUsize::new(sub.parse().map_err(|_| EINVAL)?), + Atomic::::new(sub.parse().map_err(|_| EINVAL)?), GFP_KERNEL, )?; } @@ -109,7 +109,7 @@ fn init(device_dir: Dir) -> impl PinInit { struct DeviceData { name: CString, - nums: KVec, + nums: KVec>, } fn init_control(base_dir: &Dir, dyn_dirs: Dir) -> impl PinInit> + '_ { From 51d7a054521de7085783a9a1ba15c3530863409a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 5 Nov 2025 15:23:50 +0100 Subject: [PATCH 16/19] locking/mutex: Redo __mutex_init() to reduce generated code size mutex_init() invokes __mutex_init() providing the name of the lock and a pointer to a the lock class. With LOCKDEP enabled this information is useful but without LOCKDEP it not used at all. Passing the pointer information of the lock class might be considered negligible but the name of the lock is passed as well and the string is stored. This information is wasting storage. Split __mutex_init() into a _genereic() variant doing the initialisation of the lock and a _lockdep() version which does _genereic() plus the lockdep bits. Restrict the lockdep version to lockdep enabled builds allowing the compiler to remove the unused parameter. This results in the following size reduction: text data bss dec filename | 30237599 8161430 1176624 39575653 vmlinux.defconfig | 30233269 8149142 1176560 39558971 vmlinux.defconfig.patched -4.2KiB -12KiB | 32455099 8471098 12934684 53860881 vmlinux.defconfig.lockdep | 32455100 8471098 12934684 53860882 vmlinux.defconfig.patched.lockdep | 27152407 7191822 2068040 36412269 vmlinux.defconfig.preempt_rt | 27145937 7183630 2067976 36397543 vmlinux.defconfig.patched.preempt_rt -6.3KiB -8KiB | 29382020 7505742 13784608 50672370 vmlinux.defconfig.preempt_rt.lockdep | 29376229 7505742 13784544 50666515 vmlinux.defconfig.patched.preempt_rt.lockdep -5.6KiB [peterz: folded fix from boqun] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Waiman Long Link: https://lkml.kernel.org/r/20251125145425.68319-1-boqun.feng@gmail.com Link: https://patch.msgid.link/20251105142350.Tfeevs2N@linutronix.de --- include/linux/mutex.h | 45 ++++++++++++++++++++++++++++-------- kernel/locking/mutex-debug.c | 10 +------- kernel/locking/mutex.c | 28 +++++++++++++++++----- kernel/locking/mutex.h | 5 ++-- kernel/locking/rtmutex_api.c | 19 +++++++++++---- 5 files changed, 75 insertions(+), 32 deletions(-) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 847b81ca6436..bf535f0118bb 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -86,8 +86,23 @@ do { \ #define DEFINE_MUTEX(mutexname) \ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) -extern void __mutex_init(struct mutex *lock, const char *name, - struct lock_class_key *key); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void mutex_init_lockep(struct mutex *lock, const char *name, struct lock_class_key *key); + +static inline void __mutex_init(struct mutex *lock, const char *name, + struct lock_class_key *key) +{ + mutex_init_lockep(lock, name, key); +} +#else +extern void mutex_init_generic(struct mutex *lock); + +static inline void __mutex_init(struct mutex *lock, const char *name, + struct lock_class_key *key) +{ + mutex_init_generic(lock); +} +#endif /* !CONFIG_DEBUG_LOCK_ALLOC */ /** * mutex_is_locked - is the mutex locked @@ -111,17 +126,27 @@ extern bool mutex_is_locked(struct mutex *lock); #define DEFINE_MUTEX(mutexname) \ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) -extern void __mutex_rt_init(struct mutex *lock, const char *name, - struct lock_class_key *key); - #define mutex_is_locked(l) rt_mutex_base_is_locked(&(l)->rtmutex) -#define __mutex_init(mutex, name, key) \ -do { \ - rt_mutex_base_init(&(mutex)->rtmutex); \ - __mutex_rt_init((mutex), name, key); \ -} while (0) +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern void mutex_rt_init_lockdep(struct mutex *mutex, const char *name, + struct lock_class_key *key); +static inline void __mutex_init(struct mutex *lock, const char *name, + struct lock_class_key *key) +{ + mutex_rt_init_lockdep(lock, name, key); +} + +#else +extern void mutex_rt_init_generic(struct mutex *mutex); + +static inline void __mutex_init(struct mutex *lock, const char *name, + struct lock_class_key *key) +{ + mutex_rt_init_generic(lock); +} +#endif /* !CONFIG_LOCKDEP */ #endif /* CONFIG_PREEMPT_RT */ #ifdef CONFIG_DEBUG_MUTEXES diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 949103fd8e9b..2c6b02d4699b 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -78,16 +78,8 @@ void debug_mutex_unlock(struct mutex *lock) } } -void debug_mutex_init(struct mutex *lock, const char *name, - struct lock_class_key *key) +void debug_mutex_init(struct mutex *lock) { -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* - * Make sure we are not reinitializing a held lock: - */ - debug_check_no_locks_freed((void *)lock, sizeof(*lock)); - lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP); -#endif lock->magic = lock; } diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index de7d6702cd96..2a1d165b3167 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -43,8 +43,7 @@ # define MUTEX_WARN_ON(cond) #endif -void -__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) +static void __mutex_init_generic(struct mutex *lock) { atomic_long_set(&lock->owner, 0); raw_spin_lock_init(&lock->wait_lock); @@ -52,10 +51,8 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) #ifdef CONFIG_MUTEX_SPIN_ON_OWNER osq_lock_init(&lock->osq); #endif - - debug_mutex_init(lock, name, key); + debug_mutex_init(lock); } -EXPORT_SYMBOL(__mutex_init); static inline struct task_struct *__owner_task(unsigned long owner) { @@ -142,6 +139,11 @@ static inline bool __mutex_trylock(struct mutex *lock) * There is nothing that would stop spreading the lockdep annotations outwards * except more code. */ +void mutex_init_generic(struct mutex *lock) +{ + __mutex_init_generic(lock); +} +EXPORT_SYMBOL(mutex_init_generic); /* * Optimistic trylock that only works in the uncontended case. Make sure to @@ -166,7 +168,21 @@ static __always_inline bool __mutex_unlock_fast(struct mutex *lock) return atomic_long_try_cmpxchg_release(&lock->owner, &curr, 0UL); } -#endif + +#else /* !CONFIG_DEBUG_LOCK_ALLOC */ + +void mutex_init_lockep(struct mutex *lock, const char *name, struct lock_class_key *key) +{ + __mutex_init_generic(lock); + + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP); +} +EXPORT_SYMBOL(mutex_init_lockep); +#endif /* !CONFIG_DEBUG_LOCK_ALLOC */ static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag) { diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 2e8080a9bee3..9ad4da8cea00 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -59,8 +59,7 @@ extern void debug_mutex_add_waiter(struct mutex *lock, extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task); extern void debug_mutex_unlock(struct mutex *lock); -extern void debug_mutex_init(struct mutex *lock, const char *name, - struct lock_class_key *key); +extern void debug_mutex_init(struct mutex *lock); #else /* CONFIG_DEBUG_MUTEXES */ # define debug_mutex_lock_common(lock, waiter) do { } while (0) # define debug_mutex_wake_waiter(lock, waiter) do { } while (0) @@ -68,6 +67,6 @@ extern void debug_mutex_init(struct mutex *lock, const char *name, # define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) # define debug_mutex_remove_waiter(lock, waiter, ti) do { } while (0) # define debug_mutex_unlock(lock) do { } while (0) -# define debug_mutex_init(lock, name, key) do { } while (0) +# define debug_mutex_init(lock) do { } while (0) #endif /* !CONFIG_DEBUG_MUTEXES */ #endif /* CONFIG_PREEMPT_RT */ diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c index bafd5af98eae..59dbd29cb219 100644 --- a/kernel/locking/rtmutex_api.c +++ b/kernel/locking/rtmutex_api.c @@ -515,13 +515,11 @@ void rt_mutex_debug_task_free(struct task_struct *task) #ifdef CONFIG_PREEMPT_RT /* Mutexes */ -void __mutex_rt_init(struct mutex *mutex, const char *name, - struct lock_class_key *key) +static void __mutex_rt_init_generic(struct mutex *mutex) { + rt_mutex_base_init(&mutex->rtmutex); debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); - lockdep_init_map_wait(&mutex->dep_map, name, key, 0, LD_WAIT_SLEEP); } -EXPORT_SYMBOL(__mutex_rt_init); static __always_inline int __mutex_lock_common(struct mutex *lock, unsigned int state, @@ -542,6 +540,13 @@ static __always_inline int __mutex_lock_common(struct mutex *lock, } #ifdef CONFIG_DEBUG_LOCK_ALLOC +void mutex_rt_init_lockdep(struct mutex *mutex, const char *name, struct lock_class_key *key) +{ + __mutex_rt_init_generic(mutex); + lockdep_init_map_wait(&mutex->dep_map, name, key, 0, LD_WAIT_SLEEP); +} +EXPORT_SYMBOL(mutex_rt_init_lockdep); + void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) { __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); @@ -598,6 +603,12 @@ int __sched _mutex_trylock_nest_lock(struct mutex *lock, EXPORT_SYMBOL_GPL(_mutex_trylock_nest_lock); #else /* CONFIG_DEBUG_LOCK_ALLOC */ +void mutex_rt_init_generic(struct mutex *mutex) +{ + __mutex_rt_init_generic(mutex); +} +EXPORT_SYMBOL(mutex_rt_init_generic); + void __sched mutex_lock(struct mutex *lock) { __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); From 52ed746147140e30419ee852c1916531b4ef9b0a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 27 Nov 2025 15:41:39 +0100 Subject: [PATCH 17/19] locking/local_lock: Add the headers to MAINTAINERS The local_lock_t was never added to the MAINTAINERS file since its inclusion. Add local_lock_t to the locking primitives section. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Waiman Long Link: https://patch.msgid.link/20251127144140.215722-2-bigeasy@linutronix.de --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 545a4776795e..a099b9b391b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14517,6 +14517,7 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core F: Documentation/locking/ F: arch/*/include/asm/spinlock*.h +F: include/linux/local_lock*.h F: include/linux/lockdep*.h F: include/linux/mutex*.h F: include/linux/rwlock*.h From 719e357fc09c63238956eb7cd546627f9e050640 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Thu, 27 Nov 2025 15:41:40 +0100 Subject: [PATCH 18/19] locking/local_lock: s/l/__l/ and s/tl/__tl/ to reduce the risk of shadowing The Linux kernel coding style advises to avoid common variable names in function-like macros to reduce the risk of namespace collisions. Throughout local_lock_internal.h, several macros use the rather common variable names 'l' and 'tl'. This already resulted in an actual collision: the __local_lock_acquire() function like macro is currently shadowing the parameter 'l' of the: class_##_name##_t class_##_name##_constructor(_type *l) function factory from . Rename the variable 'l' to '__l' and the variable 'tl' to '__tl' throughout the file to fix the current namespace collision and to prevent future ones. [ bigeasy: Rebase, update all l and tl instances in macros ] Signed-off-by: Vincent Mailhol Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Waiman Long Link: https://patch.msgid.link/20251127144140.215722-3-bigeasy@linutronix.de --- include/linux/local_lock_internal.h | 62 ++++++++++++++--------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index a4dc479157b5..8f82b4eb542f 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -99,18 +99,18 @@ do { \ #define __local_lock_acquire(lock) \ do { \ - local_trylock_t *tl; \ - local_lock_t *l; \ + local_trylock_t *__tl; \ + local_lock_t *__l; \ \ - l = (local_lock_t *)(lock); \ - tl = (local_trylock_t *)l; \ + __l = (local_lock_t *)(lock); \ + __tl = (local_trylock_t *)__l; \ _Generic((lock), \ local_trylock_t *: ({ \ - lockdep_assert(tl->acquired == 0); \ - WRITE_ONCE(tl->acquired, 1); \ + lockdep_assert(__tl->acquired == 0); \ + WRITE_ONCE(__tl->acquired, 1); \ }), \ local_lock_t *: (void)0); \ - local_lock_acquire(l); \ + local_lock_acquire(__l); \ } while (0) #define __local_lock(lock) \ @@ -133,36 +133,36 @@ do { \ #define __local_trylock(lock) \ ({ \ - local_trylock_t *tl; \ + local_trylock_t *__tl; \ \ preempt_disable(); \ - tl = (lock); \ - if (READ_ONCE(tl->acquired)) { \ + __tl = (lock); \ + if (READ_ONCE(__tl->acquired)) { \ preempt_enable(); \ - tl = NULL; \ + __tl = NULL; \ } else { \ - WRITE_ONCE(tl->acquired, 1); \ + WRITE_ONCE(__tl->acquired, 1); \ local_trylock_acquire( \ - (local_lock_t *)tl); \ + (local_lock_t *)__tl); \ } \ - !!tl; \ + !!__tl; \ }) #define __local_trylock_irqsave(lock, flags) \ ({ \ - local_trylock_t *tl; \ + local_trylock_t *__tl; \ \ local_irq_save(flags); \ - tl = (lock); \ - if (READ_ONCE(tl->acquired)) { \ + __tl = (lock); \ + if (READ_ONCE(__tl->acquired)) { \ local_irq_restore(flags); \ - tl = NULL; \ + __tl = NULL; \ } else { \ - WRITE_ONCE(tl->acquired, 1); \ + WRITE_ONCE(__tl->acquired, 1); \ local_trylock_acquire( \ - (local_lock_t *)tl); \ + (local_lock_t *)__tl); \ } \ - !!tl; \ + !!__tl; \ }) /* preemption or migration must be disabled before calling __local_lock_is_locked */ @@ -170,16 +170,16 @@ do { \ #define __local_lock_release(lock) \ do { \ - local_trylock_t *tl; \ - local_lock_t *l; \ + local_trylock_t *__tl; \ + local_lock_t *__l; \ \ - l = (local_lock_t *)(lock); \ - tl = (local_trylock_t *)l; \ - local_lock_release(l); \ + __l = (local_lock_t *)(lock); \ + __tl = (local_trylock_t *)__l; \ + local_lock_release(__l); \ _Generic((lock), \ local_trylock_t *: ({ \ - lockdep_assert(tl->acquired == 1); \ - WRITE_ONCE(tl->acquired, 0); \ + lockdep_assert(__tl->acquired == 1); \ + WRITE_ONCE(__tl->acquired, 0); \ }), \ local_lock_t *: (void)0); \ } while (0) @@ -223,12 +223,12 @@ typedef spinlock_t local_trylock_t; #define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) #define INIT_LOCAL_TRYLOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) -#define __local_lock_init(l) \ +#define __local_lock_init(__l) \ do { \ - local_spin_lock_init((l)); \ + local_spin_lock_init((__l)); \ } while (0) -#define __local_trylock_init(l) __local_lock_init(l) +#define __local_trylock_init(__l) __local_lock_init(__l) #define __local_lock(__lock) \ do { \ From 43decb6b628eb033a1b6188e5018773c0d38be1d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 27 Nov 2025 22:59:25 -0800 Subject: [PATCH 19/19] locking/local_lock: Fix all kernel-doc warnings Modify kernel-doc comments in local_lock.h to prevent warnings: Warning: include/linux/local_lock.h:9 function parameter 'lock' not described in 'local_lock_init' Warning: include/linux/local_lock.h:56 function parameter 'lock' not described in 'local_trylock_init' Warning: include/linux/local_lock.h:56 expecting prototype for local_lock_init(). Prototype was for local_trylock_init() instead Signed-off-by: Randy Dunlap Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://patch.msgid.link/20251128065925.917917-1-rdunlap@infradead.org --- include/linux/local_lock.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 0d91d060e3e9..b0e6ab329b00 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -6,6 +6,7 @@ /** * local_lock_init - Runtime initialize a lock instance + * @lock: The lock variable */ #define local_lock_init(lock) __local_lock_init(lock) @@ -52,7 +53,8 @@ __local_unlock_irqrestore(this_cpu_ptr(lock), flags) /** - * local_lock_init - Runtime initialize a lock instance + * local_trylock_init - Runtime initialize a lock instance + * @lock: The lock variable */ #define local_trylock_init(lock) __local_trylock_init(lock)