Files
linux/arch/arm64/kernel/sdei.c
Catalin Marinas 17c05cb0ef Merge branches 'for-next/misc', 'for-next/kselftest', 'for-next/efi-preempt', 'for-next/assembler-macro', 'for-next/typos', 'for-next/sme-ptrace-disable', 'for-next/local-tlbi-page-reused', 'for-next/mpam', 'for-next/acpi' and 'for-next/documentation', remote-tracking branch 'arm64/for-next/perf' into for-next/core
* arm64/for-next/perf:
  perf: arm_spe: Add support for filtering on data source
  perf: Add perf_event_attr::config4
  perf/imx_ddr: Add support for PMU in DB (system interconnects)
  perf/imx_ddr: Get and enable optional clks
  perf/imx_ddr: Move ida_alloc() from ddr_perf_init() to ddr_perf_probe()
  dt-bindings: perf: fsl-imx-ddr: Add compatible string for i.MX8QM, i.MX8QXP and i.MX8DXL
  arch_topology: Provide a stub topology_core_has_smt() for !CONFIG_GENERIC_ARCH_TOPOLOGY
  perf/arm-ni: Fix and optimise register offset calculation
  perf: arm_pmuv3: Add new Cortex and C1 CPU PMUs
  perf: arm_cspmu: fix error handling in arm_cspmu_impl_unregister()
  perf/arm-ni: Add NoC S3 support
  perf/arm_cspmu: nvidia: Add pmevfiltr2 support
  perf/arm_cspmu: nvidia: Add revision id matching
  perf/arm_cspmu: Add pmpidr support
  perf/arm_cspmu: Add callback to reset filter config
  perf: arm_pmuv3: Don't use PMCCNTR_EL0 on SMT cores

* for-next/misc:
  : Miscellaneous patches
  arm64: atomics: lse: Remove unused parameters from ATOMIC_FETCH_OP_AND macros
  arm64: remove duplicate ARCH_HAS_MEM_ENCRYPT
  arm64: mm: use untagged address to calculate page index
  arm64: mm: make linear mapping permission update more robust for patial range
  arm64/mm: Elide TLB flush in certain pte protection transitions
  arm64/mm: Rename try_pgd_pgtable_alloc_init_mm
  arm64/mm: Allow __create_pgd_mapping() to propagate pgtable_alloc() errors
  arm64: add unlikely hint to MTE async fault check in el0_svc_common
  arm64: acpi: add newline to deferred APEI warning
  arm64: entry: Clean out some indirection
  arm64/mm: Ensure PGD_SIZE is aligned to 64 bytes when PA_BITS = 52
  arm64/mm: Drop cpu_set_[default|idmap]_tcr_t0sz()
  arm64: remove unused ARCH_PFN_OFFSET
  arm64: use SOFTIRQ_ON_OWN_STACK for enabling softirq stack
  arm64: Remove assertion on CONFIG_VMAP_STACK

* for-next/kselftest:
  : arm64 kselftest patches
  kselftest/arm64: Align zt-test register dumps

* for-next/efi-preempt:
  : arm64: Make EFI calls preemptible
  arm64/efi: Call EFI runtime services without disabling preemption
  arm64/efi: Move uaccess en/disable out of efi_set_pgd()
  arm64/efi: Drop efi_rt_lock spinlock from EFI arch wrapper
  arm64/fpsimd: Permit kernel mode NEON with IRQs off
  arm64/fpsimd: Don't warn when EFI execution context is preemptible
  efi/runtime-wrappers: Keep track of the efi_runtime_lock owner
  efi: Add missing static initializer for efi_mm::cpus_allowed_lock

* for-next/assembler-macro:
  : arm64: Replace __ASSEMBLY__ with __ASSEMBLER__ in headers
  arm64: Replace __ASSEMBLY__ with __ASSEMBLER__ in non-uapi headers
  arm64: Replace __ASSEMBLY__ with __ASSEMBLER__ in uapi headers

* for-next/typos:
  : Random typo/spelling fixes
  arm64: Fix double word in comments
  arm64: Fix typos and spelling errors in comments

* for-next/sme-ptrace-disable:
  : Support disabling streaming mode via ptrace on SME only systems
  kselftest/arm64: Cover disabling streaming mode without SVE in fp-ptrace
  kselftst/arm64: Test NT_ARM_SVE FPSIMD format writes on non-SVE systems
  arm64/sme: Support disabling streaming mode via ptrace on SME only systems

* for-next/local-tlbi-page-reused:
  : arm64, mm: avoid TLBI broadcast if page reused in write fault
  arm64, tlbflush: don't TLBI broadcast if page reused in write fault
  mm: add spurious fault fixing support for huge pmd

* for-next/mpam: (34 commits)
  : Basic Arm MPAM driver (more to follow)
  MAINTAINERS: new entry for MPAM Driver
  arm_mpam: Add kunit tests for props_mismatch()
  arm_mpam: Add kunit test for bitmap reset
  arm_mpam: Add helper to reset saved mbwu state
  arm_mpam: Use long MBWU counters if supported
  arm_mpam: Probe for long/lwd mbwu counters
  arm_mpam: Consider overflow in bandwidth counter state
  arm_mpam: Track bandwidth counter state for power management
  arm_mpam: Add mpam_msmon_read() to read monitor value
  arm_mpam: Add helpers to allocate monitors
  arm_mpam: Probe and reset the rest of the features
  arm_mpam: Allow configuration to be applied and restored during cpu online
  arm_mpam: Use a static key to indicate when mpam is enabled
  arm_mpam: Register and enable IRQs
  arm_mpam: Extend reset logic to allow devices to be reset any time
  arm_mpam: Add a helper to touch an MSC from any CPU
  arm_mpam: Reset MSC controls from cpuhp callbacks
  arm_mpam: Merge supported features during mpam_enable() into mpam_class
  arm_mpam: Probe the hardware features resctrl supports
  arm_mpam: Add helpers for managing the locking around the mon_sel registers
  ...

* for-next/acpi:
  : arm64 acpi updates
  ACPI: GTDT: Get rid of acpi_arch_timer_mem_init()

* for-next/documentation:
  : arm64 Documentation updates
  Documentation/arm64: Fix the typo of register names
2025-11-28 15:47:12 +00:00

260 lines
6.1 KiB
C

// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2017 Arm Ltd.
#define pr_fmt(fmt) "sdei: " fmt
#include <linux/arm-smccc.h>
#include <linux/arm_sdei.h>
#include <linux/hardirq.h>
#include <linux/irqflags.h>
#include <linux/sched/task_stack.h>
#include <linux/scs.h>
#include <linux/uaccess.h>
#include <asm/alternative.h>
#include <asm/exception.h>
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/ptrace.h>
#include <asm/sections.h>
#include <asm/stacktrace.h>
#include <asm/sysreg.h>
#include <asm/vmap_stack.h>
unsigned long sdei_exit_mode;
/*
* VMAP'd stacks checking for stack overflow on exception using sp as a scratch
* register, meaning SDEI has to switch to its own stack. We need two stacks as
* a critical event may interrupt a normal event that has just taken a
* synchronous exception, and is using sp as scratch register. For a critical
* event interrupting a normal event, we can't reliably tell if we were on the
* sdei stack.
* For now, we allocate stacks when the driver is probed.
*/
DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
#ifdef CONFIG_SHADOW_CALL_STACK
DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
#endif
DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
{
unsigned long *p;
p = per_cpu(*ptr, cpu);
if (p) {
per_cpu(*ptr, cpu) = NULL;
vfree(p);
}
}
static void free_sdei_stacks(void)
{
int cpu;
for_each_possible_cpu(cpu) {
_free_sdei_stack(&sdei_stack_normal_ptr, cpu);
_free_sdei_stack(&sdei_stack_critical_ptr, cpu);
}
}
static int _init_sdei_stack(unsigned long * __percpu *ptr, int cpu)
{
unsigned long *p;
p = arch_alloc_vmap_stack(SDEI_STACK_SIZE, cpu_to_node(cpu));
if (!p)
return -ENOMEM;
per_cpu(*ptr, cpu) = p;
return 0;
}
static int init_sdei_stacks(void)
{
int cpu;
int err = 0;
for_each_possible_cpu(cpu) {
err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu);
if (err)
break;
err = _init_sdei_stack(&sdei_stack_critical_ptr, cpu);
if (err)
break;
}
if (err)
free_sdei_stacks();
return err;
}
static void _free_sdei_scs(unsigned long * __percpu *ptr, int cpu)
{
void *s;
s = per_cpu(*ptr, cpu);
if (s) {
per_cpu(*ptr, cpu) = NULL;
scs_free(s);
}
}
static void free_sdei_scs(void)
{
int cpu;
for_each_possible_cpu(cpu) {
_free_sdei_scs(&sdei_shadow_call_stack_normal_ptr, cpu);
_free_sdei_scs(&sdei_shadow_call_stack_critical_ptr, cpu);
}
}
static int _init_sdei_scs(unsigned long * __percpu *ptr, int cpu)
{
void *s;
s = scs_alloc(cpu_to_node(cpu));
if (!s)
return -ENOMEM;
per_cpu(*ptr, cpu) = s;
return 0;
}
static int init_sdei_scs(void)
{
int cpu;
int err = 0;
if (!scs_is_enabled())
return 0;
for_each_possible_cpu(cpu) {
err = _init_sdei_scs(&sdei_shadow_call_stack_normal_ptr, cpu);
if (err)
break;
err = _init_sdei_scs(&sdei_shadow_call_stack_critical_ptr, cpu);
if (err)
break;
}
if (err)
free_sdei_scs();
return err;
}
unsigned long sdei_arch_get_entry_point(int conduit)
{
/*
* SDEI works between adjacent exception levels. If we booted at EL1 we
* assume a hypervisor is marshalling events. If we booted at EL2 and
* dropped to EL1 because we don't support VHE, then we can't support
* SDEI.
*/
if (is_hyp_nvhe()) {
pr_err("Not supported on this hardware/boot configuration\n");
goto out_err;
}
if (init_sdei_stacks())
goto out_err;
if (init_sdei_scs())
goto out_err_free_stacks;
sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
if (arm64_kernel_unmapped_at_el0()) {
unsigned long offset;
offset = (unsigned long)__sdei_asm_entry_trampoline -
(unsigned long)__entry_tramp_text_start;
return TRAMP_VALIAS + offset;
} else
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
return (unsigned long)__sdei_asm_handler;
out_err_free_stacks:
free_sdei_stacks();
out_err:
return 0;
}
/*
* do_sdei_event() returns one of:
* SDEI_EV_HANDLED - success, return to the interrupted context.
* SDEI_EV_FAILED - failure, return this error code to firmware.
* virtual-address - success, return to this address.
*/
unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
struct sdei_registered_event *arg)
{
u32 mode;
int i, err = 0;
int clobbered_registers = 4;
u64 elr = read_sysreg(elr_el1);
u32 kernel_mode = read_sysreg(CurrentEL) | 1; /* +SPSel */
unsigned long vbar = read_sysreg(vbar_el1);
if (arm64_kernel_unmapped_at_el0())
clobbered_registers++;
/* Retrieve the missing registers values */
for (i = 0; i < clobbered_registers; i++) {
/* from within the handler, this call always succeeds */
sdei_api_event_context(i, &regs->regs[i]);
}
err = sdei_event_handler(regs, arg);
if (err)
return SDEI_EV_FAILED;
if (elr != read_sysreg(elr_el1)) {
/*
* We took a synchronous exception from the SDEI handler.
* This could deadlock, and if you interrupt KVM it will
* hyp-panic instead.
*/
pr_warn("unsafe: exception during handler\n");
}
mode = regs->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK);
/*
* If we interrupted the kernel with interrupts masked, we always go
* back to wherever we came from.
*/
if (mode == kernel_mode && regs_irqs_disabled(regs))
return SDEI_EV_HANDLED;
/*
* Otherwise, we pretend this was an IRQ. This lets user space tasks
* receive signals before we return to them, and KVM to invoke it's
* world switch to do the same.
*
* See DDI0487B.a Table D1-7 'Vector offsets from vector table base
* address'.
*/
if (mode == kernel_mode)
return vbar + 0x280;
else if (mode & PSR_MODE32_BIT)
return vbar + 0x680;
return vbar + 0x480;
}