mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-03 21:45:08 -04:00
Merge patch series "membarrier: riscv: Core serializing command"
RISC-V was lacking a membarrier implementation for the store/fetch ordering, which is a bit tricky because of the deferred icache flushing we use in RISC-V. * b4-shazam-merge: membarrier: riscv: Provide core serializing command locking: Introduce prepare_sync_core_cmd() membarrier: Create Documentation/scheduler/membarrier.rst membarrier: riscv: Add full memory barrier in switch_mm() Link: https://lore.kernel.org/r/20240131144936.29190-1-parri.andrea@gmail.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
@@ -10,6 +10,22 @@
|
||||
# Rely on implicit context synchronization as a result of exception return
|
||||
# when returning from IPI handler, and when returning to user-space.
|
||||
#
|
||||
# * riscv
|
||||
#
|
||||
# riscv uses xRET as return from interrupt and to return to user-space.
|
||||
#
|
||||
# Given that xRET is not core serializing, we rely on FENCE.I for providing
|
||||
# core serialization:
|
||||
#
|
||||
# - by calling sync_core_before_usermode() on return from interrupt (cf.
|
||||
# ipi_sync_core()),
|
||||
#
|
||||
# - via switch_mm() and sync_core_before_usermode() (respectively, for
|
||||
# uthread->uthread and kthread->uthread transitions) before returning
|
||||
# to user-space.
|
||||
#
|
||||
# The serialization in switch_mm() is activated by prepare_sync_core_cmd().
|
||||
#
|
||||
# * x86
|
||||
#
|
||||
# x86-32 uses IRET as return from interrupt, which takes care of the IPI.
|
||||
@@ -43,7 +59,7 @@
|
||||
| openrisc: | TODO |
|
||||
| parisc: | TODO |
|
||||
| powerpc: | ok |
|
||||
| riscv: | TODO |
|
||||
| riscv: | ok |
|
||||
| s390: | ok |
|
||||
| sh: | TODO |
|
||||
| sparc: | TODO |
|
||||
|
||||
@@ -7,6 +7,7 @@ Scheduler
|
||||
|
||||
|
||||
completion
|
||||
membarrier
|
||||
sched-arch
|
||||
sched-bwc
|
||||
sched-deadline
|
||||
|
||||
39
Documentation/scheduler/membarrier.rst
Normal file
39
Documentation/scheduler/membarrier.rst
Normal file
@@ -0,0 +1,39 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
========================
|
||||
membarrier() System Call
|
||||
========================
|
||||
|
||||
MEMBARRIER_CMD_{PRIVATE,GLOBAL}_EXPEDITED - Architecture requirements
|
||||
=====================================================================
|
||||
|
||||
Memory barriers before updating rq->curr
|
||||
----------------------------------------
|
||||
|
||||
The commands MEMBARRIER_CMD_PRIVATE_EXPEDITED and MEMBARRIER_CMD_GLOBAL_EXPEDITED
|
||||
require each architecture to have a full memory barrier after coming from
|
||||
user-space, before updating rq->curr. This barrier is implied by the sequence
|
||||
rq_lock(); smp_mb__after_spinlock() in __schedule(). The barrier matches a full
|
||||
barrier in the proximity of the membarrier system call exit, cf.
|
||||
membarrier_{private,global}_expedited().
|
||||
|
||||
Memory barriers after updating rq->curr
|
||||
---------------------------------------
|
||||
|
||||
The commands MEMBARRIER_CMD_PRIVATE_EXPEDITED and MEMBARRIER_CMD_GLOBAL_EXPEDITED
|
||||
require each architecture to have a full memory barrier after updating rq->curr,
|
||||
before returning to user-space. The schemes providing this barrier on the various
|
||||
architectures are as follows.
|
||||
|
||||
- alpha, arc, arm, hexagon, mips rely on the full barrier implied by
|
||||
spin_unlock() in finish_lock_switch().
|
||||
|
||||
- arm64 relies on the full barrier implied by switch_to().
|
||||
|
||||
- powerpc, riscv, s390, sparc, x86 rely on the full barrier implied by
|
||||
switch_mm(), if mm is not NULL; they rely on the full barrier implied
|
||||
by mmdrop(), otherwise. On powerpc and riscv, switch_mm() relies on
|
||||
membarrier_arch_switch_mm().
|
||||
|
||||
The barrier matches a full barrier in the proximity of the membarrier system call
|
||||
entry, cf. membarrier_{private,global}_expedited().
|
||||
@@ -14039,7 +14039,9 @@ M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
|
||||
M: "Paul E. McKenney" <paulmck@kernel.org>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Supported
|
||||
F: arch/powerpc/include/asm/membarrier.h
|
||||
F: Documentation/scheduler/membarrier.rst
|
||||
F: arch/*/include/asm/membarrier.h
|
||||
F: arch/*/include/asm/sync_core.h
|
||||
F: include/uapi/linux/membarrier.h
|
||||
F: kernel/sched/membarrier.c
|
||||
|
||||
|
||||
@@ -27,14 +27,18 @@ config RISCV
|
||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||
select ARCH_HAS_GIGANTIC_PAGE
|
||||
select ARCH_HAS_KCOV
|
||||
select ARCH_HAS_MEMBARRIER_CALLBACKS
|
||||
select ARCH_HAS_MEMBARRIER_SYNC_CORE
|
||||
select ARCH_HAS_MMIOWB
|
||||
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
|
||||
select ARCH_HAS_PMEM_API
|
||||
select ARCH_HAS_PREPARE_SYNC_CORE_CMD
|
||||
select ARCH_HAS_PTE_SPECIAL
|
||||
select ARCH_HAS_SET_DIRECT_MAP if MMU
|
||||
select ARCH_HAS_SET_MEMORY if MMU
|
||||
select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
|
||||
select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
|
||||
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
|
||||
select ARCH_HAS_SYSCALL_WRAPPER
|
||||
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
|
||||
select ARCH_HAS_UBSAN_SANITIZE_ALL
|
||||
|
||||
50
arch/riscv/include/asm/membarrier.h
Normal file
50
arch/riscv/include/asm/membarrier.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef _ASM_RISCV_MEMBARRIER_H
|
||||
#define _ASM_RISCV_MEMBARRIER_H
|
||||
|
||||
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
|
||||
struct mm_struct *next,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* Only need the full barrier when switching between processes.
|
||||
* Barrier when switching from kernel to userspace is not
|
||||
* required here, given that it is implied by mmdrop(). Barrier
|
||||
* when switching from userspace to kernel is not needed after
|
||||
* store to rq->curr.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_SMP) &&
|
||||
likely(!(atomic_read(&next->membarrier_state) &
|
||||
(MEMBARRIER_STATE_PRIVATE_EXPEDITED |
|
||||
MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The membarrier system call requires a full memory barrier
|
||||
* after storing to rq->curr, before going back to user-space.
|
||||
*
|
||||
* This barrier is also needed for the SYNC_CORE command when
|
||||
* switching between processes; in particular, on a transition
|
||||
* from a thread belonging to another mm to a thread belonging
|
||||
* to the mm for which a membarrier SYNC_CORE is done on CPU0:
|
||||
*
|
||||
* - [CPU0] sets all bits in the mm icache_stale_mask (in
|
||||
* prepare_sync_core_cmd());
|
||||
*
|
||||
* - [CPU1] stores to rq->curr (by the scheduler);
|
||||
*
|
||||
* - [CPU0] loads rq->curr within membarrier and observes
|
||||
* cpu_rq(1)->curr->mm != mm, so the IPI is skipped on
|
||||
* CPU1; this means membarrier relies on switch_mm() to
|
||||
* issue the sync-core;
|
||||
*
|
||||
* - [CPU1] switch_mm() loads icache_stale_mask; if the bit
|
||||
* is zero, switch_mm() may incorrectly skip the sync-core.
|
||||
*
|
||||
* Matches a full barrier in the proximity of the membarrier
|
||||
* system call entry.
|
||||
*/
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
#endif /* _ASM_RISCV_MEMBARRIER_H */
|
||||
29
arch/riscv/include/asm/sync_core.h
Normal file
29
arch/riscv/include/asm/sync_core.h
Normal file
@@ -0,0 +1,29 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_RISCV_SYNC_CORE_H
|
||||
#define _ASM_RISCV_SYNC_CORE_H
|
||||
|
||||
/*
|
||||
* RISC-V implements return to user-space through an xRET instruction,
|
||||
* which is not core serializing.
|
||||
*/
|
||||
static inline void sync_core_before_usermode(void)
|
||||
{
|
||||
asm volatile ("fence.i" ::: "memory");
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Ensure the next switch_mm() on every CPU issues a core serializing
|
||||
* instruction for the given @mm.
|
||||
*/
|
||||
static inline void prepare_sync_core_cmd(struct mm_struct *mm)
|
||||
{
|
||||
cpumask_setall(&mm->context.icache_stale_mask);
|
||||
}
|
||||
#else
|
||||
static inline void prepare_sync_core_cmd(struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#endif /* _ASM_RISCV_SYNC_CORE_H */
|
||||
@@ -323,6 +323,8 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||
if (unlikely(prev == next))
|
||||
return;
|
||||
|
||||
membarrier_arch_switch_mm(prev, next, task);
|
||||
|
||||
/*
|
||||
* Mark the current MM context as inactive, and the next as
|
||||
* active. This is at least used by the icache flushing
|
||||
|
||||
@@ -17,5 +17,19 @@ static inline void sync_core_before_usermode(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_SYNC_CORE_H */
|
||||
#ifdef CONFIG_ARCH_HAS_PREPARE_SYNC_CORE_CMD
|
||||
#include <asm/sync_core.h>
|
||||
#else
|
||||
/*
|
||||
* This is a dummy prepare_sync_core_cmd() implementation that can be used on
|
||||
* all architectures which provide unconditional core serializing instructions
|
||||
* in switch_mm().
|
||||
* If your architecture doesn't provide such core serializing instructions in
|
||||
* switch_mm(), you may need to write your own functions.
|
||||
*/
|
||||
static inline void prepare_sync_core_cmd(struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_SYNC_CORE_H */
|
||||
|
||||
@@ -1970,6 +1970,9 @@ source "kernel/Kconfig.locks"
|
||||
config ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
|
||||
bool
|
||||
|
||||
config ARCH_HAS_PREPARE_SYNC_CORE_CMD
|
||||
bool
|
||||
|
||||
config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
|
||||
bool
|
||||
|
||||
|
||||
@@ -6638,7 +6638,9 @@ static void __sched notrace __schedule(unsigned int sched_mode)
|
||||
* if (signal_pending_state()) if (p->state & @state)
|
||||
*
|
||||
* Also, the membarrier system call requires a full memory barrier
|
||||
* after coming from user-space, before storing to rq->curr.
|
||||
* after coming from user-space, before storing to rq->curr; this
|
||||
* barrier matches a full barrier in the proximity of the membarrier
|
||||
* system call exit.
|
||||
*/
|
||||
rq_lock(rq, &rf);
|
||||
smp_mb__after_spinlock();
|
||||
@@ -6709,12 +6711,20 @@ static void __sched notrace __schedule(unsigned int sched_mode)
|
||||
*
|
||||
* Here are the schemes providing that barrier on the
|
||||
* various architectures:
|
||||
* - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
|
||||
* switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
|
||||
* - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC,
|
||||
* RISC-V. switch_mm() relies on membarrier_arch_switch_mm()
|
||||
* on PowerPC and on RISC-V.
|
||||
* - finish_lock_switch() for weakly-ordered
|
||||
* architectures where spin_unlock is a full barrier,
|
||||
* - switch_to() for arm64 (weakly-ordered, spin_unlock
|
||||
* is a RELEASE barrier),
|
||||
*
|
||||
* The barrier matches a full barrier in the proximity of
|
||||
* the membarrier system call entry.
|
||||
*
|
||||
* On RISC-V, this barrier pairing is also needed for the
|
||||
* SYNC_CORE command when switching between processes, cf.
|
||||
* the inline comments in membarrier_arch_switch_mm().
|
||||
*/
|
||||
++*switch_count;
|
||||
|
||||
|
||||
@@ -251,7 +251,7 @@ static int membarrier_global_expedited(void)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Matches memory barriers around rq->curr modification in
|
||||
* Matches memory barriers after rq->curr modification in
|
||||
* scheduler.
|
||||
*/
|
||||
smp_mb(); /* system call entry is not a mb. */
|
||||
@@ -300,7 +300,7 @@ static int membarrier_global_expedited(void)
|
||||
|
||||
/*
|
||||
* Memory barrier on the caller thread _after_ we finished
|
||||
* waiting for the last IPI. Matches memory barriers around
|
||||
* waiting for the last IPI. Matches memory barriers before
|
||||
* rq->curr modification in scheduler.
|
||||
*/
|
||||
smp_mb(); /* exit from system call is not a mb */
|
||||
@@ -320,6 +320,7 @@ static int membarrier_private_expedited(int flags, int cpu_id)
|
||||
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
|
||||
return -EPERM;
|
||||
ipi_func = ipi_sync_core;
|
||||
prepare_sync_core_cmd(mm);
|
||||
} else if (flags == MEMBARRIER_FLAG_RSEQ) {
|
||||
if (!IS_ENABLED(CONFIG_RSEQ))
|
||||
return -EINVAL;
|
||||
@@ -339,8 +340,12 @@ static int membarrier_private_expedited(int flags, int cpu_id)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Matches memory barriers around rq->curr modification in
|
||||
* Matches memory barriers after rq->curr modification in
|
||||
* scheduler.
|
||||
*
|
||||
* On RISC-V, this barrier pairing is also needed for the
|
||||
* SYNC_CORE command when switching between processes, cf.
|
||||
* the inline comments in membarrier_arch_switch_mm().
|
||||
*/
|
||||
smp_mb(); /* system call entry is not a mb. */
|
||||
|
||||
@@ -415,7 +420,7 @@ static int membarrier_private_expedited(int flags, int cpu_id)
|
||||
|
||||
/*
|
||||
* Memory barrier on the caller thread _after_ we finished
|
||||
* waiting for the last IPI. Matches memory barriers around
|
||||
* waiting for the last IPI. Matches memory barriers before
|
||||
* rq->curr modification in scheduler.
|
||||
*/
|
||||
smp_mb(); /* exit from system call is not a mb */
|
||||
|
||||
Reference in New Issue
Block a user