mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 14:41:22 -05:00
The only remaining reason why EFI runtime services are invoked with preemption disabled is the fact that the mm is swapped out behind the back of the context switching code. The kernel no longer disables preemption in kernel_neon_begin(). Furthermore, the EFI spec is being clarified to explicitly state that only baseline FP/SIMD is permitted in EFI runtime service implementations, and so the existing kernel mode NEON context switching code is sufficient to preserve and restore the execution context of an in-progress EFI runtime service call. Most EFI calls are made from the efi_rts_wq, which is serviced by a kthread. As kthreads never return to user space, they usually don't have an mm, and so we can use the existing infrastructure to swap in the efi_mm while the EFI call is in progress. This is visible to the scheduler, which will therefore reactivate the selected mm when switching out the kthread and back in again. Given that the EFI spec explicitly permits runtime services to be called with interrupts enabled, firmware code is already required to tolerate interruptions. So rather than disable preemption, disable only migration so that EFI runtime services are less likely to cause scheduling delays. To avoid potential issues where runtime services are interrupted while polling the secure firmware for async completions, keep migration disabled so that a runtime service invocation does not resume on a different CPU from the one it was started on. Note, though, that the firmware executes at the same privilege level as the kernel, and is therefore able to disable interrupts altogether. Acked-by: Will Deacon <will@kernel.org> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Acked-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
264 lines
7.2 KiB
C
264 lines
7.2 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Extensible Firmware Interface
|
|
*
|
|
* Based on Extensible Firmware Interface Specification version 2.4
|
|
*
|
|
* Copyright (C) 2013, 2014 Linaro Ltd.
|
|
*/
|
|
|
|
#include <linux/efi.h>
|
|
#include <linux/init.h>
|
|
#include <linux/kmemleak.h>
|
|
#include <linux/kthread.h>
|
|
#include <linux/screen_info.h>
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <asm/efi.h>
|
|
#include <asm/stacktrace.h>
|
|
#include <asm/vmap_stack.h>
|
|
|
|
static bool region_is_misaligned(const efi_memory_desc_t *md)
|
|
{
|
|
if (PAGE_SIZE == EFI_PAGE_SIZE)
|
|
return false;
|
|
return !PAGE_ALIGNED(md->phys_addr) ||
|
|
!PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT);
|
|
}
|
|
|
|
/*
|
|
* Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
|
|
* executable, everything else can be mapped with the XN bits
|
|
* set. Also take the new (optional) RO/XP bits into account.
|
|
*/
|
|
static __init ptdesc_t create_mapping_protection(efi_memory_desc_t *md)
|
|
{
|
|
u64 attr = md->attribute;
|
|
u32 type = md->type;
|
|
|
|
if (type == EFI_MEMORY_MAPPED_IO) {
|
|
pgprot_t prot = __pgprot(PROT_DEVICE_nGnRE);
|
|
|
|
if (arm64_is_protected_mmio(md->phys_addr,
|
|
md->num_pages << EFI_PAGE_SHIFT))
|
|
prot = pgprot_encrypted(prot);
|
|
else
|
|
prot = pgprot_decrypted(prot);
|
|
return pgprot_val(prot);
|
|
}
|
|
|
|
if (region_is_misaligned(md)) {
|
|
static bool __initdata code_is_misaligned;
|
|
|
|
/*
|
|
* Regions that are not aligned to the OS page size cannot be
|
|
* mapped with strict permissions, as those might interfere
|
|
* with the permissions that are needed by the adjacent
|
|
* region's mapping. However, if we haven't encountered any
|
|
* misaligned runtime code regions so far, we can safely use
|
|
* non-executable permissions for non-code regions.
|
|
*/
|
|
code_is_misaligned |= (type == EFI_RUNTIME_SERVICES_CODE);
|
|
|
|
return code_is_misaligned ? pgprot_val(PAGE_KERNEL_EXEC)
|
|
: pgprot_val(PAGE_KERNEL);
|
|
}
|
|
|
|
/* R-- */
|
|
if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
|
|
(EFI_MEMORY_XP | EFI_MEMORY_RO))
|
|
return pgprot_val(PAGE_KERNEL_RO);
|
|
|
|
/* R-X */
|
|
if (attr & EFI_MEMORY_RO)
|
|
return pgprot_val(PAGE_KERNEL_ROX);
|
|
|
|
/* RW- */
|
|
if (((attr & (EFI_MEMORY_RP | EFI_MEMORY_WP | EFI_MEMORY_XP)) ==
|
|
EFI_MEMORY_XP) ||
|
|
type != EFI_RUNTIME_SERVICES_CODE)
|
|
return pgprot_val(PAGE_KERNEL);
|
|
|
|
/* RWX */
|
|
return pgprot_val(PAGE_KERNEL_EXEC);
|
|
}
|
|
|
|
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
|
|
{
|
|
ptdesc_t prot_val = create_mapping_protection(md);
|
|
bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE ||
|
|
md->type == EFI_RUNTIME_SERVICES_DATA);
|
|
|
|
/*
|
|
* If this region is not aligned to the page size used by the OS, the
|
|
* mapping will be rounded outwards, and may end up sharing a page
|
|
* frame with an adjacent runtime memory region. Given that the page
|
|
* table descriptor covering the shared page will be rewritten when the
|
|
* adjacent region gets mapped, we must avoid block mappings here so we
|
|
* don't have to worry about splitting them when that happens.
|
|
*/
|
|
if (region_is_misaligned(md))
|
|
page_mappings_only = true;
|
|
|
|
create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
|
|
md->num_pages << EFI_PAGE_SHIFT,
|
|
__pgprot(prot_val | PTE_NG), page_mappings_only);
|
|
return 0;
|
|
}
|
|
|
|
struct set_perm_data {
|
|
const efi_memory_desc_t *md;
|
|
bool has_bti;
|
|
};
|
|
|
|
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
|
|
{
|
|
struct set_perm_data *spd = data;
|
|
const efi_memory_desc_t *md = spd->md;
|
|
pte_t pte = __ptep_get(ptep);
|
|
|
|
if (md->attribute & EFI_MEMORY_RO)
|
|
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
|
|
if (md->attribute & EFI_MEMORY_XP)
|
|
pte = set_pte_bit(pte, __pgprot(PTE_PXN));
|
|
else if (system_supports_bti_kernel() && spd->has_bti)
|
|
pte = set_pte_bit(pte, __pgprot(PTE_GP));
|
|
__set_pte(ptep, pte);
|
|
return 0;
|
|
}
|
|
|
|
int __init efi_set_mapping_permissions(struct mm_struct *mm,
|
|
efi_memory_desc_t *md,
|
|
bool has_bti)
|
|
{
|
|
struct set_perm_data data = { md, has_bti };
|
|
|
|
BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE &&
|
|
md->type != EFI_RUNTIME_SERVICES_DATA);
|
|
|
|
if (region_is_misaligned(md))
|
|
return 0;
|
|
|
|
/*
|
|
* Calling apply_to_page_range() is only safe on regions that are
|
|
* guaranteed to be mapped down to pages. Since we are only called
|
|
* for regions that have been mapped using efi_create_mapping() above
|
|
* (and this is checked by the generic Memory Attributes table parsing
|
|
* routines), there is no need to check that again here.
|
|
*/
|
|
return apply_to_page_range(mm, md->virt_addr,
|
|
md->num_pages << EFI_PAGE_SHIFT,
|
|
set_permissions, &data);
|
|
}
|
|
|
|
/*
|
|
* UpdateCapsule() depends on the system being shutdown via
|
|
* ResetSystem().
|
|
*/
|
|
bool efi_poweroff_required(void)
|
|
{
|
|
return efi_enabled(EFI_RUNTIME_SERVICES);
|
|
}
|
|
|
|
asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
|
|
{
|
|
pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f);
|
|
return s;
|
|
}
|
|
|
|
void arch_efi_call_virt_setup(void)
|
|
{
|
|
efi_runtime_assert_lock_held();
|
|
|
|
if (preemptible() && (current->flags & PF_KTHREAD)) {
|
|
/*
|
|
* Disable migration to ensure that a preempted EFI runtime
|
|
* service call will be resumed on the same CPU. This avoids
|
|
* potential issues with EFI runtime calls that are preempted
|
|
* while polling for an asynchronous completion of a secure
|
|
* firmware call, which may not permit the CPU to change.
|
|
*/
|
|
migrate_disable();
|
|
kthread_use_mm(&efi_mm);
|
|
} else {
|
|
efi_virtmap_load();
|
|
}
|
|
|
|
/*
|
|
* Enable access to the valid TTBR0_EL1 and invoke the errata
|
|
* workaround directly since there is no return from exception when
|
|
* invoking the EFI run-time services.
|
|
*/
|
|
uaccess_ttbr0_enable();
|
|
post_ttbr_update_workaround();
|
|
|
|
__efi_fpsimd_begin();
|
|
}
|
|
|
|
void arch_efi_call_virt_teardown(void)
|
|
{
|
|
__efi_fpsimd_end();
|
|
|
|
/*
|
|
* Defer the switch to the current thread's TTBR0_EL1 until
|
|
* uaccess_enable(). Do so before efi_virtmap_unload() updates the
|
|
* saved TTBR0 value, so the userland page tables are not activated
|
|
* inadvertently over the back of an exception.
|
|
*/
|
|
uaccess_ttbr0_disable();
|
|
|
|
if (preemptible() && (current->flags & PF_KTHREAD)) {
|
|
kthread_unuse_mm(&efi_mm);
|
|
migrate_enable();
|
|
} else {
|
|
efi_virtmap_unload();
|
|
}
|
|
}
|
|
|
|
asmlinkage u64 *efi_rt_stack_top __ro_after_init;
|
|
|
|
asmlinkage efi_status_t __efi_rt_asm_recover(void);
|
|
|
|
bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg)
|
|
{
|
|
/* Check whether the exception occurred while running the firmware */
|
|
if (!current_in_efi() || regs->pc >= TASK_SIZE_64)
|
|
return false;
|
|
|
|
pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg);
|
|
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
|
|
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
|
|
|
|
regs->regs[0] = EFI_ABORTED;
|
|
regs->regs[30] = efi_rt_stack_top[-1];
|
|
regs->pc = (u64)__efi_rt_asm_recover;
|
|
|
|
if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK))
|
|
regs->regs[18] = efi_rt_stack_top[-2];
|
|
|
|
return true;
|
|
}
|
|
|
|
/* EFI requires 8 KiB of stack space for runtime services */
|
|
static_assert(THREAD_SIZE >= SZ_8K);
|
|
|
|
static int __init arm64_efi_rt_init(void)
|
|
{
|
|
void *p;
|
|
|
|
if (!efi_enabled(EFI_RUNTIME_SERVICES))
|
|
return 0;
|
|
|
|
p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE);
|
|
if (!p) {
|
|
pr_warn("Failed to allocate EFI runtime stack\n");
|
|
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
kmemleak_not_leak(p);
|
|
efi_rt_stack_top = p + THREAD_SIZE;
|
|
return 0;
|
|
}
|
|
core_initcall(arm64_efi_rt_init);
|