mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-01-14 01:40:56 -05:00
When Priority Mask Hint Enable (PMHE) == 0b1, the GIC may use the PMR value to determine whether to signal an IRQ to a PE, and consequently after a change to the PMR value, a DSB SY may be required to ensure that interrupts are signalled to a CPU in finite time. When PMHE == 0b0, interrupts are always signalled to the relevant PE, and all masking occurs locally, without requiring a DSB SY. Since commit:f226650494("arm64: Relax ICC_PMR_EL1 accesses when ICC_CTLR_EL1.PMHE is clear") ... we handle this dynamically: in most cases a static key is used to determine whether to issue a DSB SY, but the entry code must read from ICC_CTLR_EL1 as static keys aren't accessible from plain assembly. It would be much nicer to use an alternative instruction sequence for the DSB, as this would avoid the need to read from ICC_CTLR_EL1 in the entry code, and for most other code this will result in simpler code generation with fewer instructions and fewer branches. This patch adds a new ARM64_HAS_GIC_PRIO_RELAXED_SYNC cpucap which is only set when ICC_CTLR_EL1.PMHE == 0b0 (and GIC priority masking is in use). This allows us to replace the existing users of the `gic_pmr_sync` static key with alternative sequences which default to a DSB SY and are relaxed to a NOP when PMHE is not in use. The entry assembly management of the PMR is slightly restructured to use a branch (rather than multiple NOPs) when priority masking is not in use. This is more in keeping with other alternatives in the entry assembly, and permits the use of a separate alternatives for the PMHE-dependent DSB SY (and removal of the conditional branch this currently requires). For consistency I've adjusted both the save and restore paths. According to bloat-o-meter, when building defconfig + CONFIG_ARM64_PSEUDO_NMI=y this shrinks the kernel text by ~4KiB: | add/remove: 4/2 grow/shrink: 42/310 up/down: 332/-5032 (-4700) The resulting vmlinux is ~66KiB smaller, though the resulting Image size is unchanged due to padding and alignment: | [mark@lakrids:~/src/linux]% ls -al vmlinux-* | -rwxr-xr-x 1 mark mark 137508344 Jan 17 14:11 vmlinux-after | -rwxr-xr-x 1 mark mark 137575440 Jan 17 13:49 vmlinux-before | [mark@lakrids:~/src/linux]% ls -al Image-* | -rw-r--r-- 1 mark mark 38777344 Jan 17 14:11 Image-after | -rw-r--r-- 1 mark mark 38777344 Jan 17 13:49 Image-before Prior to this patch we did not verify the state of ICC_CTLR_EL1.PMHE on secondary CPUs. As of this patch this is verified by the cpufeature code when using GIC priority masking (i.e. when using pseudo-NMIs). Note that since commit:7e3a57fa6c("arm64: Document ICC_CTLR_EL3.PMHE setting requirements") ... Documentation/arm64/booting.rst specifies: | - ICC_CTLR_EL3.PMHE (bit 6) must be set to the same value across | all CPUs the kernel is executing on, and must stay constant | for the lifetime of the kernel. ... so that should not adversely affect any compliant systems, and as we'll only check for the absense of PMHE when using pseudo-NMIs, this will only fire when such mismatch will adversely affect the system. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Reviewed-by: Marc Zyngier <maz@kernel.org> Cc: Mark Brown <broonie@kernel.org> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20230130145429.903791-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
220 lines
5.7 KiB
C
220 lines
5.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Based on arch/arm/include/asm/barrier.h
|
|
*
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
*/
|
|
#ifndef __ASM_BARRIER_H
|
|
#define __ASM_BARRIER_H
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <linux/kasan-checks.h>
|
|
|
|
#include <asm/alternative-macros.h>
|
|
|
|
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
|
|
#define nops(n) asm volatile(__nops(n))
|
|
|
|
#define sev() asm volatile("sev" : : : "memory")
|
|
#define wfe() asm volatile("wfe" : : : "memory")
|
|
#define wfet(val) asm volatile("msr s0_3_c1_c0_0, %0" \
|
|
: : "r" (val) : "memory")
|
|
#define wfi() asm volatile("wfi" : : : "memory")
|
|
#define wfit(val) asm volatile("msr s0_3_c1_c0_1, %0" \
|
|
: : "r" (val) : "memory")
|
|
|
|
#define isb() asm volatile("isb" : : : "memory")
|
|
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
|
|
#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
|
|
|
|
#define psb_csync() asm volatile("hint #17" : : : "memory")
|
|
#define __tsb_csync() asm volatile("hint #18" : : : "memory")
|
|
#define csdb() asm volatile("hint #20" : : : "memory")
|
|
|
|
/*
|
|
* Data Gathering Hint:
|
|
* This instruction prevents merging memory accesses with Normal-NC or
|
|
* Device-GRE attributes before the hint instruction with any memory accesses
|
|
* appearing after the hint instruction.
|
|
*/
|
|
#define dgh() asm volatile("hint #6" : : : "memory")
|
|
|
|
#ifdef CONFIG_ARM64_PSEUDO_NMI
|
|
#define pmr_sync() \
|
|
do { \
|
|
asm volatile( \
|
|
ALTERNATIVE_CB("dsb sy", \
|
|
ARM64_HAS_GIC_PRIO_RELAXED_SYNC, \
|
|
alt_cb_patch_nops) \
|
|
); \
|
|
} while(0)
|
|
#else
|
|
#define pmr_sync() do {} while (0)
|
|
#endif
|
|
|
|
#define __mb() dsb(sy)
|
|
#define __rmb() dsb(ld)
|
|
#define __wmb() dsb(st)
|
|
|
|
#define __dma_mb() dmb(osh)
|
|
#define __dma_rmb() dmb(oshld)
|
|
#define __dma_wmb() dmb(oshst)
|
|
|
|
#define io_stop_wc() dgh()
|
|
|
|
#define tsb_csync() \
|
|
do { \
|
|
/* \
|
|
* CPUs affected by Arm Erratum 2054223 or 2067961 needs \
|
|
* another TSB to ensure the trace is flushed. The barriers \
|
|
* don't have to be strictly back to back, as long as the \
|
|
* CPU is in trace prohibited state. \
|
|
*/ \
|
|
if (cpus_have_final_cap(ARM64_WORKAROUND_TSB_FLUSH_FAILURE)) \
|
|
__tsb_csync(); \
|
|
__tsb_csync(); \
|
|
} while (0)
|
|
|
|
/*
|
|
* Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz
|
|
* and 0 otherwise.
|
|
*/
|
|
#define array_index_mask_nospec array_index_mask_nospec
|
|
static inline unsigned long array_index_mask_nospec(unsigned long idx,
|
|
unsigned long sz)
|
|
{
|
|
unsigned long mask;
|
|
|
|
asm volatile(
|
|
" cmp %1, %2\n"
|
|
" sbc %0, xzr, xzr\n"
|
|
: "=r" (mask)
|
|
: "r" (idx), "Ir" (sz)
|
|
: "cc");
|
|
|
|
csdb();
|
|
return mask;
|
|
}
|
|
|
|
/*
|
|
* Ensure that reads of the counter are treated the same as memory reads
|
|
* for the purposes of ordering by subsequent memory barriers.
|
|
*
|
|
* This insanity brought to you by speculative system register reads,
|
|
* out-of-order memory accesses, sequence locks and Thomas Gleixner.
|
|
*
|
|
* https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/
|
|
*/
|
|
#define arch_counter_enforce_ordering(val) do { \
|
|
u64 tmp, _val = (val); \
|
|
\
|
|
asm volatile( \
|
|
" eor %0, %1, %1\n" \
|
|
" add %0, sp, %0\n" \
|
|
" ldr xzr, [%0]" \
|
|
: "=r" (tmp) : "r" (_val)); \
|
|
} while (0)
|
|
|
|
#define __smp_mb() dmb(ish)
|
|
#define __smp_rmb() dmb(ishld)
|
|
#define __smp_wmb() dmb(ishst)
|
|
|
|
#define __smp_store_release(p, v) \
|
|
do { \
|
|
typeof(p) __p = (p); \
|
|
union { __unqual_scalar_typeof(*p) __val; char __c[1]; } __u = \
|
|
{ .__val = (__force __unqual_scalar_typeof(*p)) (v) }; \
|
|
compiletime_assert_atomic_type(*p); \
|
|
kasan_check_write(__p, sizeof(*p)); \
|
|
switch (sizeof(*p)) { \
|
|
case 1: \
|
|
asm volatile ("stlrb %w1, %0" \
|
|
: "=Q" (*__p) \
|
|
: "r" (*(__u8 *)__u.__c) \
|
|
: "memory"); \
|
|
break; \
|
|
case 2: \
|
|
asm volatile ("stlrh %w1, %0" \
|
|
: "=Q" (*__p) \
|
|
: "r" (*(__u16 *)__u.__c) \
|
|
: "memory"); \
|
|
break; \
|
|
case 4: \
|
|
asm volatile ("stlr %w1, %0" \
|
|
: "=Q" (*__p) \
|
|
: "r" (*(__u32 *)__u.__c) \
|
|
: "memory"); \
|
|
break; \
|
|
case 8: \
|
|
asm volatile ("stlr %1, %0" \
|
|
: "=Q" (*__p) \
|
|
: "r" (*(__u64 *)__u.__c) \
|
|
: "memory"); \
|
|
break; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define __smp_load_acquire(p) \
|
|
({ \
|
|
union { __unqual_scalar_typeof(*p) __val; char __c[1]; } __u; \
|
|
typeof(p) __p = (p); \
|
|
compiletime_assert_atomic_type(*p); \
|
|
kasan_check_read(__p, sizeof(*p)); \
|
|
switch (sizeof(*p)) { \
|
|
case 1: \
|
|
asm volatile ("ldarb %w0, %1" \
|
|
: "=r" (*(__u8 *)__u.__c) \
|
|
: "Q" (*__p) : "memory"); \
|
|
break; \
|
|
case 2: \
|
|
asm volatile ("ldarh %w0, %1" \
|
|
: "=r" (*(__u16 *)__u.__c) \
|
|
: "Q" (*__p) : "memory"); \
|
|
break; \
|
|
case 4: \
|
|
asm volatile ("ldar %w0, %1" \
|
|
: "=r" (*(__u32 *)__u.__c) \
|
|
: "Q" (*__p) : "memory"); \
|
|
break; \
|
|
case 8: \
|
|
asm volatile ("ldar %0, %1" \
|
|
: "=r" (*(__u64 *)__u.__c) \
|
|
: "Q" (*__p) : "memory"); \
|
|
break; \
|
|
} \
|
|
(typeof(*p))__u.__val; \
|
|
})
|
|
|
|
#define smp_cond_load_relaxed(ptr, cond_expr) \
|
|
({ \
|
|
typeof(ptr) __PTR = (ptr); \
|
|
__unqual_scalar_typeof(*ptr) VAL; \
|
|
for (;;) { \
|
|
VAL = READ_ONCE(*__PTR); \
|
|
if (cond_expr) \
|
|
break; \
|
|
__cmpwait_relaxed(__PTR, VAL); \
|
|
} \
|
|
(typeof(*ptr))VAL; \
|
|
})
|
|
|
|
#define smp_cond_load_acquire(ptr, cond_expr) \
|
|
({ \
|
|
typeof(ptr) __PTR = (ptr); \
|
|
__unqual_scalar_typeof(*ptr) VAL; \
|
|
for (;;) { \
|
|
VAL = smp_load_acquire(__PTR); \
|
|
if (cond_expr) \
|
|
break; \
|
|
__cmpwait_relaxed(__PTR, VAL); \
|
|
} \
|
|
(typeof(*ptr))VAL; \
|
|
})
|
|
|
|
#include <asm-generic/barrier.h>
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* __ASM_BARRIER_H */
|