From c0c7fa4e7a512006710c8e4d6b6f7b40c9f786cd Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 10 Jun 2025 14:09:35 +0200 Subject: [PATCH 01/67] docs: arm64: Fix ICC_SRE_EL2 register typo in booting.rst Fix trivial ICC_SRE_EL2 register spelling typo in booting.rst. Signed-off-by: Lorenzo Pieralisi Cc: Jonathan Corbet Cc: Will Deacon CC: Catalin Marinas Link: https://lore.kernel.org/r/20250610120935.852034-1-lpieralisi@kernel.org Signed-off-by: Will Deacon --- Documentation/arch/arm64/booting.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index dee7b6de864f..ee9b790c0d72 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -234,7 +234,7 @@ Before jumping into the kernel, the following conditions must be met: - If the kernel is entered at EL1: - - ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1 + - ICC_SRE_EL2.Enable (bit 3) must be initialised to 0b1 - ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1. - The DT or ACPI tables must describe a GICv3 interrupt controller. From 650768c512faba8070bf4cfbb28c95eb5cd203f3 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Tue, 27 May 2025 13:56:33 +0530 Subject: [PATCH 02/67] arm64: Restrict pagetable teardown to avoid false warning Commit 9c006972c3fe ("arm64: mmu: drop pXd_present() checks from pXd_free_pYd_table()") removes the pxd_present() checks because the caller checks pxd_present(). But, in case of vmap_try_huge_pud(), the caller only checks pud_present(); pud_free_pmd_page() recurses on each pmd through pmd_free_pte_page(), wherein the pmd may be none. Thus it is possible to hit a warning in the latter, since pmd_none => !pmd_table(). Thus, add a pmd_present() check in pud_free_pmd_page(). This problem was found by code inspection. Fixes: 9c006972c3fe ("arm64: mmu: drop pXd_present() checks from pXd_free_pYd_table()") Cc: stable@vger.kernel.org Reported-by: Ryan Roberts Acked-by: David Hildenbrand Signed-off-by: Dev Jain Reviewed-by: Catalin Marinas Reviewed-by: Anshuman Khandual Reviewed-by: Ryan Roberts Link: https://lore.kernel.org/r/20250527082633.61073-1-dev.jain@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8fcf59ba39db..00ab1d648db6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1305,7 +1305,8 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr) next = addr; end = addr + PUD_SIZE; do { - pmd_free_pte_page(pmdp, next); + if (pmd_present(pmdp_get(pmdp))) + pmd_free_pte_page(pmdp, next); } while (pmdp++, next += PMD_SIZE, next != end); pud_clear(pudp); From d2be3270f40b1330f8c1c9512c59dd085a758ac0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 11 Jun 2025 17:28:13 +0100 Subject: [PATCH 03/67] arm64/gcs: Don't call gcs_free() during flush_gcs() Currently we call gcs_free() during flush_gcs() to reset the thread state for GCS. This includes unmapping any kernel allocated GCS, but this is redundant when doing a flush_thread() since we are reinitialising the thread memory too. Inline the reinitialisation of the thread struct. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20250611-arm64-gcs-flush-thread-v1-1-cc26feeddabd@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a5ca15daeb8a..5954cec19660 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -288,7 +288,9 @@ static void flush_gcs(void) if (!system_supports_gcs()) return; - gcs_free(current); + current->thread.gcspr_el0 = 0; + current->thread.gcs_base = 0; + current->thread.gcs_size = 0; current->thread.gcs_el0_mode = 0; write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1); write_sysreg_s(0, SYS_GCSPR_EL0); From 39dfc971e42d886e7df01371cd1bef505076d84c Mon Sep 17 00:00:00 2001 From: Tengda Wu Date: Wed, 4 Jun 2025 00:55:33 +0000 Subject: [PATCH 04/67] arm64/ptrace: Fix stack-out-of-bounds read in regs_get_kernel_stack_nth() KASAN reports a stack-out-of-bounds read in regs_get_kernel_stack_nth(). Call Trace: [ 97.283505] BUG: KASAN: stack-out-of-bounds in regs_get_kernel_stack_nth+0xa8/0xc8 [ 97.284677] Read of size 8 at addr ffff800089277c10 by task 1.sh/2550 [ 97.285732] [ 97.286067] CPU: 7 PID: 2550 Comm: 1.sh Not tainted 6.6.0+ #11 [ 97.287032] Hardware name: linux,dummy-virt (DT) [ 97.287815] Call trace: [ 97.288279] dump_backtrace+0xa0/0x128 [ 97.288946] show_stack+0x20/0x38 [ 97.289551] dump_stack_lvl+0x78/0xc8 [ 97.290203] print_address_description.constprop.0+0x84/0x3c8 [ 97.291159] print_report+0xb0/0x280 [ 97.291792] kasan_report+0x84/0xd0 [ 97.292421] __asan_load8+0x9c/0xc0 [ 97.293042] regs_get_kernel_stack_nth+0xa8/0xc8 [ 97.293835] process_fetch_insn+0x770/0xa30 [ 97.294562] kprobe_trace_func+0x254/0x3b0 [ 97.295271] kprobe_dispatcher+0x98/0xe0 [ 97.295955] kprobe_breakpoint_handler+0x1b0/0x210 [ 97.296774] call_break_hook+0xc4/0x100 [ 97.297451] brk_handler+0x24/0x78 [ 97.298073] do_debug_exception+0xac/0x178 [ 97.298785] el1_dbg+0x70/0x90 [ 97.299344] el1h_64_sync_handler+0xcc/0xe8 [ 97.300066] el1h_64_sync+0x78/0x80 [ 97.300699] kernel_clone+0x0/0x500 [ 97.301331] __arm64_sys_clone+0x70/0x90 [ 97.302084] invoke_syscall+0x68/0x198 [ 97.302746] el0_svc_common.constprop.0+0x11c/0x150 [ 97.303569] do_el0_svc+0x38/0x50 [ 97.304164] el0_svc+0x44/0x1d8 [ 97.304749] el0t_64_sync_handler+0x100/0x130 [ 97.305500] el0t_64_sync+0x188/0x190 [ 97.306151] [ 97.306475] The buggy address belongs to stack of task 1.sh/2550 [ 97.307461] and is located at offset 0 in frame: [ 97.308257] __se_sys_clone+0x0/0x138 [ 97.308910] [ 97.309241] This frame has 1 object: [ 97.309873] [48, 184) 'args' [ 97.309876] [ 97.310749] The buggy address belongs to the virtual mapping at [ 97.310749] [ffff800089270000, ffff800089279000) created by: [ 97.310749] dup_task_struct+0xc0/0x2e8 [ 97.313347] [ 97.313674] The buggy address belongs to the physical page: [ 97.314604] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x14f69a [ 97.315885] flags: 0x15ffffe00000000(node=1|zone=2|lastcpupid=0xfffff) [ 97.316957] raw: 015ffffe00000000 0000000000000000 dead000000000122 0000000000000000 [ 97.318207] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 [ 97.319445] page dumped because: kasan: bad access detected [ 97.320371] [ 97.320694] Memory state around the buggy address: [ 97.321511] ffff800089277b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 97.322681] ffff800089277b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 97.323846] >ffff800089277c00: 00 00 f1 f1 f1 f1 f1 f1 00 00 00 00 00 00 00 00 [ 97.325023] ^ [ 97.325683] ffff800089277c80: 00 00 00 00 00 00 00 00 00 f3 f3 f3 f3 f3 f3 f3 [ 97.326856] ffff800089277d00: f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 00 00 This issue seems to be related to the behavior of some gcc compilers and was also fixed on the s390 architecture before: commit d93a855c31b7 ("s390/ptrace: Avoid KASAN false positives in regs_get_kernel_stack_nth()") As described in that commit, regs_get_kernel_stack_nth() has confirmed that `addr` is on the stack, so reading the value at `*addr` should be allowed. Use READ_ONCE_NOCHECK() helper to silence the KASAN check for this case. Fixes: 0a8ea52c3eb1 ("arm64: Add HAVE_REGS_AND_STACK_ACCESS_API feature") Signed-off-by: Tengda Wu Link: https://lore.kernel.org/r/20250604005533.1278992-1-wutengda@huaweicloud.com [will: Use '*addr' as the argument to READ_ONCE_NOCHECK()] Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index a360e52db02f..ee94b72bf8fb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -141,7 +141,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) addr += n; if (regs_within_kernel_stack(regs, (unsigned long)addr)) - return *addr; + return READ_ONCE_NOCHECK(*addr); else return 0; } From 91b89a634487d5614e51ee773a889ed57f5551ca Mon Sep 17 00:00:00 2001 From: Dylan Hatch Date: Tue, 3 Jun 2025 22:34:17 +0000 Subject: [PATCH 05/67] arm64/module: Use text-poke API for late relocations. To enable late module patching, livepatch modules need to be able to apply some of their relocations well after being loaded. In this scenario however, the livepatch module text and data is already RX-only, so special treatment is needed to make the late relocations possible. To do this, use the text-poking API for these late relocations. This patch is partially based off commit 88fc078a7a8f6 ("x86/module: Use text_poke() for late relocations"). Signed-off-by: Dylan Hatch Acked-by: Song Liu Acked-by: Will Deacon Link: https://lore.kernel.org/r/20250603223417.3700218-1-dylanbhatch@google.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/module.c | 101 +++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 06bb680bfe97..40148d2725ce 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -23,6 +23,7 @@ #include #include #include +#include enum aarch64_reloc_op { RELOC_OP_NONE, @@ -48,7 +49,17 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val) return 0; } -static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) +#define WRITE_PLACE(place, val, mod) do { \ + __typeof__(val) __val = (val); \ + \ + if (mod->state == MODULE_STATE_UNFORMED) \ + *(place) = __val; \ + else \ + aarch64_insn_copy(place, &(__val), sizeof(*place)); \ +} while (0) + +static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len, + struct module *me) { s64 sval = do_reloc(op, place, val); @@ -66,7 +77,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) switch (len) { case 16: - *(s16 *)place = sval; + WRITE_PLACE((s16 *)place, sval, me); switch (op) { case RELOC_OP_ABS: if (sval < 0 || sval > U16_MAX) @@ -82,7 +93,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) } break; case 32: - *(s32 *)place = sval; + WRITE_PLACE((s32 *)place, sval, me); switch (op) { case RELOC_OP_ABS: if (sval < 0 || sval > U32_MAX) @@ -98,7 +109,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) } break; case 64: - *(s64 *)place = sval; + WRITE_PLACE((s64 *)place, sval, me); break; default: pr_err("Invalid length (%d) for data relocation\n", len); @@ -113,7 +124,8 @@ enum aarch64_insn_movw_imm_type { }; static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val, - int lsb, enum aarch64_insn_movw_imm_type imm_type) + int lsb, enum aarch64_insn_movw_imm_type imm_type, + struct module *me) { u64 imm; s64 sval; @@ -145,7 +157,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val, /* Update the instruction with the new encoding. */ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm); - *place = cpu_to_le32(insn); + WRITE_PLACE(place, cpu_to_le32(insn), me); if (imm > U16_MAX) return -ERANGE; @@ -154,7 +166,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val, } static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val, - int lsb, int len, enum aarch64_insn_imm_type imm_type) + int lsb, int len, enum aarch64_insn_imm_type imm_type, + struct module *me) { u64 imm, imm_mask; s64 sval; @@ -170,7 +183,7 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val, /* Update the instruction's immediate field. */ insn = aarch64_insn_encode_immediate(imm_type, insn, imm); - *place = cpu_to_le32(insn); + WRITE_PLACE(place, cpu_to_le32(insn), me); /* * Extract the upper value bits (including the sign bit) and @@ -189,17 +202,17 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val, } static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs, - __le32 *place, u64 val) + __le32 *place, u64 val, struct module *me) { u32 insn; if (!is_forbidden_offset_for_adrp(place)) return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21, - AARCH64_INSN_IMM_ADR); + AARCH64_INSN_IMM_ADR, me); /* patch ADRP to ADR if it is in range */ if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21, - AARCH64_INSN_IMM_ADR)) { + AARCH64_INSN_IMM_ADR, me)) { insn = le32_to_cpu(*place); insn &= ~BIT(31); } else { @@ -211,7 +224,7 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs, AARCH64_INSN_BRANCH_NOLINK); } - *place = cpu_to_le32(insn); + WRITE_PLACE(place, cpu_to_le32(insn), me); return 0; } @@ -255,23 +268,23 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, /* Data relocations. */ case R_AARCH64_ABS64: overflow_check = false; - ovf = reloc_data(RELOC_OP_ABS, loc, val, 64); + ovf = reloc_data(RELOC_OP_ABS, loc, val, 64, me); break; case R_AARCH64_ABS32: - ovf = reloc_data(RELOC_OP_ABS, loc, val, 32); + ovf = reloc_data(RELOC_OP_ABS, loc, val, 32, me); break; case R_AARCH64_ABS16: - ovf = reloc_data(RELOC_OP_ABS, loc, val, 16); + ovf = reloc_data(RELOC_OP_ABS, loc, val, 16, me); break; case R_AARCH64_PREL64: overflow_check = false; - ovf = reloc_data(RELOC_OP_PREL, loc, val, 64); + ovf = reloc_data(RELOC_OP_PREL, loc, val, 64, me); break; case R_AARCH64_PREL32: - ovf = reloc_data(RELOC_OP_PREL, loc, val, 32); + ovf = reloc_data(RELOC_OP_PREL, loc, val, 32, me); break; case R_AARCH64_PREL16: - ovf = reloc_data(RELOC_OP_PREL, loc, val, 16); + ovf = reloc_data(RELOC_OP_PREL, loc, val, 16, me); break; /* MOVW instruction relocations. */ @@ -280,88 +293,88 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, fallthrough; case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; fallthrough; case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; fallthrough; case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_UABS_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_SABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; case R_AARCH64_MOVW_SABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; case R_AARCH64_MOVW_SABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; case R_AARCH64_MOVW_PREL_G0_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_PREL_G0: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; case R_AARCH64_MOVW_PREL_G1_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_PREL_G1: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; case R_AARCH64_MOVW_PREL_G2_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - AARCH64_INSN_IMM_MOVKZ); + AARCH64_INSN_IMM_MOVKZ, me); break; case R_AARCH64_MOVW_PREL_G2: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; case R_AARCH64_MOVW_PREL_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48, - AARCH64_INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ, me); break; /* Immediate instruction relocations. */ case R_AARCH64_LD_PREL_LO19: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, - AARCH64_INSN_IMM_19); + AARCH64_INSN_IMM_19, me); break; case R_AARCH64_ADR_PREL_LO21: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, - AARCH64_INSN_IMM_ADR); + AARCH64_INSN_IMM_ADR, me); break; case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; fallthrough; case R_AARCH64_ADR_PREL_PG_HI21: - ovf = reloc_insn_adrp(me, sechdrs, loc, val); + ovf = reloc_insn_adrp(me, sechdrs, loc, val, me); if (ovf && ovf != -ERANGE) return ovf; break; @@ -369,46 +382,46 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12, - AARCH64_INSN_IMM_12); + AARCH64_INSN_IMM_12, me); break; case R_AARCH64_LDST16_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11, - AARCH64_INSN_IMM_12); + AARCH64_INSN_IMM_12, me); break; case R_AARCH64_LDST32_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10, - AARCH64_INSN_IMM_12); + AARCH64_INSN_IMM_12, me); break; case R_AARCH64_LDST64_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9, - AARCH64_INSN_IMM_12); + AARCH64_INSN_IMM_12, me); break; case R_AARCH64_LDST128_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8, - AARCH64_INSN_IMM_12); + AARCH64_INSN_IMM_12, me); break; case R_AARCH64_TSTBR14: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14, - AARCH64_INSN_IMM_14); + AARCH64_INSN_IMM_14, me); break; case R_AARCH64_CONDBR19: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, - AARCH64_INSN_IMM_19); + AARCH64_INSN_IMM_19, me); break; case R_AARCH64_JUMP26: case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, - AARCH64_INSN_IMM_26); + AARCH64_INSN_IMM_26, me); if (ovf == -ERANGE) { val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym); if (!val) return -ENOEXEC; ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, - 26, AARCH64_INSN_IMM_26); + 26, AARCH64_INSN_IMM_26, me); } break; From beecfd6a88a675e20987e70ec532ba734b230fa4 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 21 May 2025 12:09:59 +0100 Subject: [PATCH 06/67] arm64: stacktrace: Check kretprobe_find_ret_addr() return value If kretprobe_find_ret_addr() fails to find the original return address, it returns 0. Check for this case so that a reliable stacktrace won't silently ignore it. Signed-off-by: Mark Rutland Cc: Andrea della Porta Cc: Breno Leitao Cc: Josh Poimboeuf Cc: Miroslav Benes Cc: Petr Mladek Cc: Song Liu Cc: Will Deacon Reviewed-and-tested-by: Song Liu Link: https://lore.kernel.org/r/20250521111000.2237470-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/stacktrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 1d9d51d7627f..f6494c094214 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -152,6 +152,8 @@ kunwind_recover_return_address(struct kunwind_state *state) orig_pc = kretprobe_find_ret_addr(state->task, (void *)state->common.fp, &state->kr_cur); + if (!orig_pc) + return -EINVAL; state->common.pc = orig_pc; state->flags.kretprobe = 1; } From 805f13e403cd43bb88790642feef507108af6fc7 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 21 May 2025 12:10:00 +0100 Subject: [PATCH 07/67] arm64: stacktrace: Implement arch_stack_walk_reliable() Add arch_stack_walk_reliable(), which will be used during kernel live patching to detect when threads have completed executing old versions of functions. Note that arch_stack_walk_reliable() only needs to guarantee that it returns an error code when it cannot provide a reliable stacktrace. It is not required to provide a reliable stacktrace in all scenarios so long as it returns said error code. At present we can only reliably unwind up to an exception boundary. In future we should be able to improve this with additional data from the compiler (e.g. sframe). Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20250320171559.3423224-2-song@kernel.org [ Mark: Simplify logic, clarify commit message ] Signed-off-by: Mark Rutland Cc: Andrea della Porta Cc: Breno Leitao Cc: Josh Poimboeuf Cc: Miroslav Benes Cc: Petr Mladek Cc: Song Liu Cc: Will Deacon Link: https://lore.kernel.org/r/20250521111000.2237470-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 +- arch/arm64/kernel/stacktrace.c | 53 +++++++++++++++++++++++++++------- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 55fc331af337..b7462424aa59 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -279,6 +279,7 @@ config ARM64 select HAVE_SOFTIRQ_ON_OWN_STACK select USER_STACKTRACE_SUPPORT select VDSO_GETRANDOM + select HAVE_RELIABLE_STACKTRACE help ARM 64-bit (AArch64) Linux support. @@ -2498,4 +2499,3 @@ endmenu # "CPU Power Management" source "drivers/acpi/Kconfig" source "arch/arm64/kvm/Kconfig" - diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index f6494c094214..acf682afbd47 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -279,21 +279,24 @@ kunwind_next(struct kunwind_state *state) typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie); -static __always_inline void +static __always_inline int do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state, void *cookie) { - if (kunwind_recover_return_address(state)) - return; + int ret; + + ret = kunwind_recover_return_address(state); + if (ret) + return ret; while (1) { - int ret; - if (!consume_state(state, cookie)) - break; + return -EINVAL; ret = kunwind_next(state); + if (ret == -ENOENT) + return 0; if (ret < 0) - break; + return ret; } } @@ -326,7 +329,7 @@ do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state, : stackinfo_get_unknown(); \ }) -static __always_inline void +static __always_inline int kunwind_stack_walk(kunwind_consume_fn consume_state, void *cookie, struct task_struct *task, struct pt_regs *regs) @@ -354,7 +357,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state, if (regs) { if (task != current) - return; + return -EINVAL; kunwind_init_from_regs(&state, regs); } else if (task == current) { kunwind_init_from_caller(&state); @@ -362,7 +365,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state, kunwind_init_from_task(&state, task); } - do_kunwind(&state, consume_state, cookie); + return do_kunwind(&state, consume_state, cookie); } struct kunwind_consume_entry_data { @@ -389,6 +392,36 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs); } +static __always_inline bool +arch_reliable_kunwind_consume_entry(const struct kunwind_state *state, void *cookie) +{ + /* + * At an exception boundary we can reliably consume the saved PC. We do + * not know whether the LR was live when the exception was taken, and + * so we cannot perform the next unwind step reliably. + * + * All that matters is whether the *entire* unwind is reliable, so give + * up as soon as we hit an exception boundary. + */ + if (state->source == KUNWIND_SOURCE_REGS_PC) + return false; + + return arch_kunwind_consume_entry(state, cookie); +} + +noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, + struct task_struct *task) +{ + struct kunwind_consume_entry_data data = { + .consume_entry = consume_entry, + .cookie = cookie, + }; + + return kunwind_stack_walk(arch_reliable_kunwind_consume_entry, &data, + task, NULL); +} + struct bpf_unwind_consume_entry_data { bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp); void *cookie; From 3eb06f6ce3af65df4e9b3d8fc1d711fbfe7673b3 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 25 Jun 2025 11:34:32 +0000 Subject: [PATCH 08/67] arm64: cpufeature: Introduce MATCH_ALL_EARLY_CPUS capability type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For system-wide capabilities, the kernel has the SCOPE_SYSTEM type. Such capabilities are checked once the SMP boot has completed using the sanitised ID registers. However, there is a need for a new capability type similar in scope to the system one but with checking performed locally on each CPU during boot (e.g. based on MIDR_EL1 which is not a sanitised register). Introduce ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS which, together with ARM64_CPUCAP_SCOPE_LOCAL_CPU, ensures that such capability is enabled only if all early CPUs have it. For ease of use, define ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE which combines SCOPE_LOCAL_CPU, PERMITTED_FOR_LATE_CPUS and MATCH_ALL_EARLY_CPUS. Signed-off-by: Mikołaj Lenczewski Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250625113435.26849-2-miko.lenczewski@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 23 +++++++++++ arch/arm64/kernel/cpufeature.c | 60 +++++++++++++++++++++++------ 2 files changed, 72 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index c4326f1cb917..155ebd040c55 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -275,6 +275,14 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; #define ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU ((u16)BIT(5)) /* Panic when a conflict is detected */ #define ARM64_CPUCAP_PANIC_ON_CONFLICT ((u16)BIT(6)) +/* + * When paired with SCOPE_LOCAL_CPU, all early CPUs must satisfy the + * condition. This is different from SCOPE_SYSTEM where the check is performed + * only once at the end of the SMP boot on the sanitised ID registers. + * SCOPE_SYSTEM is not suitable for cases where the capability depends on + * properties local to a CPU like MIDR_EL1. + */ +#define ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS ((u16)BIT(7)) /* * CPU errata workarounds that need to be enabled at boot time if one or @@ -304,6 +312,16 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU | \ ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU) +/* + * CPU feature detected at boot time and present on all early CPUs. Late CPUs + * are permitted to have the feature even if it hasn't been enabled, although + * the feature will not be used by Linux in this case. If all early CPUs have + * the feature, then every late CPU must have it. + */ +#define ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE \ + (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ + ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU | \ + ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS) /* * CPU feature detected at boot time, on one or more CPUs. A late CPU @@ -391,6 +409,11 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) return cap->type & ARM64_CPUCAP_SCOPE_MASK; } +static inline bool cpucap_match_all_early_cpus(const struct arm64_cpu_capabilities *cap) +{ + return cap->type & ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS; +} + /* * Generic helper for handling capabilities with multiple (match,enable) pairs * of call backs, sharing the same capability bit. diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b34044e20128..f9c947166322 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3370,18 +3370,49 @@ static void update_cpu_capabilities(u16 scope_mask) scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (i = 0; i < ARM64_NCAPS; i++) { + bool match_all = false; + bool caps_set = false; + bool boot_cpu = false; + caps = cpucap_ptrs[i]; - if (!caps || !(caps->type & scope_mask) || - cpus_have_cap(caps->capability) || - !caps->matches(caps, cpucap_default_scope(caps))) + if (!caps || !(caps->type & scope_mask)) continue; - if (caps->desc && !caps->cpus) + match_all = cpucap_match_all_early_cpus(caps); + caps_set = cpus_have_cap(caps->capability); + boot_cpu = scope_mask & SCOPE_BOOT_CPU; + + /* + * Unless it's a match-all CPUs feature, avoid probing if + * already detected. + */ + if (!match_all && caps_set) + continue; + + /* + * A match-all CPUs capability is only set when probing the + * boot CPU. It may be cleared subsequently if not detected on + * secondary ones. + */ + if (match_all && !caps_set && !boot_cpu) + continue; + + if (!caps->matches(caps, cpucap_default_scope(caps))) { + if (match_all) + __clear_bit(caps->capability, system_cpucaps); + continue; + } + + /* + * Match-all CPUs capabilities are logged later when the + * system capabilities are finalised. + */ + if (!match_all && caps->desc && !caps->cpus) pr_info("detected: %s\n", caps->desc); __set_bit(caps->capability, system_cpucaps); - if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU)) + if (boot_cpu && (caps->type & SCOPE_BOOT_CPU)) set_bit(caps->capability, boot_cpucaps); } } @@ -3782,17 +3813,24 @@ static void __init setup_system_capabilities(void) enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); apply_alternatives_all(); - /* - * Log any cpucaps with a cpumask as these aren't logged by - * update_cpu_capabilities(). - */ for (int i = 0; i < ARM64_NCAPS; i++) { const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i]; - if (caps && caps->cpus && caps->desc && - cpumask_any(caps->cpus) < nr_cpu_ids) + if (!caps || !caps->desc) + continue; + + /* + * Log any cpucaps with a cpumask as these aren't logged by + * update_cpu_capabilities(). + */ + if (caps->cpus && cpumask_any(caps->cpus) < nr_cpu_ids) pr_info("detected: %s on CPU%*pbl\n", caps->desc, cpumask_pr_args(caps->cpus)); + + /* Log match-all CPUs capabilities */ + if (cpucap_match_all_early_cpus(caps) && + cpus_have_cap(caps->capability)) + pr_info("detected: %s\n", caps->desc); } /* From 5aa4b625762e5a1caf2e43aa52e55b7b507783e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Lenczewski?= Date: Wed, 25 Jun 2025 11:34:33 +0000 Subject: [PATCH 09/67] arm64: Add BBM Level 2 cpu feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Break-Before-Make cpu feature supports multiple levels (levels 0-2), and this commit adds a dedicated BBML2 cpufeature to test against support for. To support BBML2 in as wide a range of contexts as we can, we want not only the architectural guarantees that BBML2 makes, but additionally want BBML2 to not create TLB conflict aborts. Not causing aborts avoids us having to prove that no recursive faults can be induced in any path that uses BBML2, allowing its use for arbitrary kernel mappings. This feature builds on the previous ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE, as all early cpus must support BBML2 for us to enable it (and any later cpus must also support it to be onlined). Not onlining late cpus that do not support BBML2 is unavoidable, as we might currently be using BBML2 semantics for kernel memory regions. This could cause faults in the late cpus, and would be difficult to unwind, so let us avoid the case altogether. Signed-off-by: Mikołaj Lenczewski Reviewed-by: Catalin Marinas Reviewed-by: Suzuki K Poulose Reviewed-by: Ryan Roberts Link: https://lore.kernel.org/r/20250625113435.26849-3-miko.lenczewski@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 5 ++++ arch/arm64/kernel/cpufeature.c | 38 +++++++++++++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 3 files changed, 44 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 155ebd040c55..bf13d676aae2 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -871,6 +871,11 @@ static inline bool system_supports_pmuv3(void) return cpus_have_final_cap(ARM64_HAS_PMUV3); } +static inline bool system_supports_bbml2_noabort(void) +{ + return alternative_has_cap_unlikely(ARM64_HAS_BBML2_NOABORT); +} + int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); bool try_emulate_mrs(struct pt_regs *regs, u32 isn); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f9c947166322..2b6e70bad72a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2213,6 +2213,38 @@ static bool hvhe_possible(const struct arm64_cpu_capabilities *entry, return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE); } +static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int scope) +{ + /* + * We want to allow usage of BBML2 in as wide a range of kernel contexts + * as possible. This list is therefore an allow-list of known-good + * implementations that both support BBML2 and additionally, fulfill the + * extra constraint of never generating TLB conflict aborts when using + * the relaxed BBML2 semantics (such aborts make use of BBML2 in certain + * kernel contexts difficult to prove safe against recursive aborts). + * + * Note that implementations can only be considered "known-good" if their + * implementors attest to the fact that the implementation never raises + * TLB conflict aborts for BBML2 mapping granularity changes. + */ + static const struct midr_range supports_bbml2_noabort_list[] = { + MIDR_REV_RANGE(MIDR_CORTEX_X4, 0, 3, 0xf), + MIDR_REV_RANGE(MIDR_NEOVERSE_V3, 0, 2, 0xf), + {} + }; + + /* Does our cpu guarantee to never raise TLB conflict aborts? */ + if (!is_midr_in_range_list(supports_bbml2_noabort_list)) + return false; + + /* + * We currently ignore the ID_AA64MMFR2_EL1 register, and only care + * about whether the MIDR check passes. + */ + + return true; +} + #ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { @@ -2980,6 +3012,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP) }, + { + .desc = "BBM Level 2 without TLB conflict abort", + .capability = ARM64_HAS_BBML2_NOABORT, + .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE, + .matches = has_bbml2_noabort, + }, { .desc = "52-bit Virtual Addressing for KVM (LPA2)", .capability = ARM64_HAS_LPA2, diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 10effd4cff6b..2bd2bfaeddcd 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -45,6 +45,7 @@ HAS_LPA2 HAS_LSE_ATOMICS HAS_MOPS HAS_NESTED_VIRT +HAS_BBML2_NOABORT HAS_PAN HAS_PMUV3 HAS_S1PIE From 212c439bdd8f99bcbec75adfca3297ae9319c2fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Lenczewski?= Date: Wed, 25 Jun 2025 11:34:34 +0000 Subject: [PATCH 10/67] iommu/arm: Add BBM Level 2 smmu feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For supporting BBM Level 2 for userspace mappings, we want to ensure that the smmu also supports its own version of BBM Level 2. Luckily, the smmu spec (IHI 0070G 3.21.1.3) is stricter than the aarch64 spec (DDI 0487K.a D8.16.2), so already guarantees that no aborts are raised when BBM level 2 is claimed. Add the feature and testing for it under arm_smmu_sva_supported(). Signed-off-by: Mikołaj Lenczewski Reviewed-by: Catalin Marinas Reviewed-by: Robin Murphy Reviewed-by: Ryan Roberts Link: https://lore.kernel.org/r/20250625113435.26849-4-miko.lenczewski@arm.com Signed-off-by: Catalin Marinas --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 3 +++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 +++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 ++ 3 files changed, 8 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 0601dece0a0d..59a480974d80 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -220,6 +220,9 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) feat_mask |= ARM_SMMU_FEAT_VAX; } + if (system_supports_bbml2_noabort()) + feat_mask |= ARM_SMMU_FEAT_BBML2; + if ((smmu->features & feat_mask) != feat_mask) return false; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 10cc6dc26b7b..39e933086f8f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4457,6 +4457,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) if (FIELD_GET(IDR3_FWB, reg)) smmu->features |= ARM_SMMU_FEAT_S2FWB; + if (FIELD_GET(IDR3_BBM, reg) == 2) + smmu->features |= ARM_SMMU_FEAT_BBML2; + /* IDR5 */ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index ea41d790463e..a33bf520ba97 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -60,6 +60,7 @@ struct arm_smmu_device; #define ARM_SMMU_IDR3 0xc #define IDR3_FWB (1 << 8) #define IDR3_RIL (1 << 10) +#define IDR3_BBM GENMASK(12, 11) #define ARM_SMMU_IDR5 0x14 #define IDR5_STALL_MAX GENMASK(31, 16) @@ -755,6 +756,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_HA (1 << 21) #define ARM_SMMU_FEAT_HD (1 << 22) #define ARM_SMMU_FEAT_S2FWB (1 << 23) +#define ARM_SMMU_FEAT_BBML2 (1 << 24) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) From 83bbd6be7d1712471001c421298fb525b7abf69e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Lenczewski?= Date: Wed, 25 Jun 2025 11:34:35 +0000 Subject: [PATCH 11/67] arm64/mm: Elide tlbi in contpte_convert() under BBML2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When converting a region via contpte_convert() to use mTHP, we have two different goals. We have to mark each entry as contiguous, and we would like to smear the dirty and young (access) bits across all entries in the contiguous block. Currently, we do this by first accumulating the dirty and young bits in the block, using an atomic __ptep_get_and_clear() and the relevant pte_{dirty,young}() calls, performing a tlbi, and finally smearing the correct bits across the block using __set_ptes(). This approach works fine for BBM level 0, but with support for BBM level 2 we are allowed to reorder the tlbi to after setting the pagetable entries. We expect the time cost of a tlbi to be much greater than the cost of clearing and resetting the PTEs. As such, this reordering of the tlbi outside the window where our PTEs are invalid greatly reduces the duration the PTE are visibly invalid for other threads. This reduces the likelyhood of a concurrent page walk finding an invalid PTE, reducing the likelyhood of a fault in other threads, and improving performance (more so when there are more threads). Because we support via allowlist only bbml2 implementations that never raise conflict aborts and instead invalidate the tlb entries automatically in hardware, we can avoid the final flush altogether. However, avoiding the intermediate tlbi+dsb must be carefully considered to ensure that we remain both correct and performant. We document our reasoning and the expected interactions further in the contpte_convert() source. To do so we rely on the aarch64 spec (DDI 0487L.a D8.7.1.1) requirements RNGLXZ and RJQQTC to provide guarantees that the elision is correct. Signed-off-by: Mikołaj Lenczewski Reviewed-by: Catalin Marinas Reviewed-by: Ryan Roberts Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250625113435.26849-5-miko.lenczewski@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/contpte.c | 139 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 138 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c index bcac4f55f9c1..203357061d0a 100644 --- a/arch/arm64/mm/contpte.c +++ b/arch/arm64/mm/contpte.c @@ -68,7 +68,144 @@ static void contpte_convert(struct mm_struct *mm, unsigned long addr, pte = pte_mkyoung(pte); } - __flush_tlb_range(&vma, start_addr, addr, PAGE_SIZE, true, 3); + /* + * On eliding the __tlb_flush_range() under BBML2+noabort: + * + * NOTE: Instead of using N=16 as the contiguous block length, we use + * N=4 for clarity. + * + * NOTE: 'n' and 'c' are used to denote the "contiguous bit" being + * unset and set, respectively. + * + * We worry about two cases where contiguous bit is used: + * - When folding N smaller non-contiguous ptes as 1 contiguous block. + * - When unfolding a contiguous block into N smaller non-contiguous ptes. + * + * Currently, the BBML0 folding case looks as follows: + * + * 0) Initial page-table layout: + * + * +----+----+----+----+ + * |RO,n|RO,n|RO,n|RW,n| <--- last page being set as RO + * +----+----+----+----+ + * + * 1) Aggregate AF + dirty flags using __ptep_get_and_clear(): + * + * +----+----+----+----+ + * | 0 | 0 | 0 | 0 | + * +----+----+----+----+ + * + * 2) __flush_tlb_range(): + * + * |____ tlbi + dsb ____| + * + * 3) __set_ptes() to repaint contiguous block: + * + * +----+----+----+----+ + * |RO,c|RO,c|RO,c|RO,c| + * +----+----+----+----+ + * + * 4) The kernel will eventually __flush_tlb() for changed page: + * + * |____| <--- tlbi + dsb + * + * As expected, the intermediate tlbi+dsb ensures that other PEs + * only ever see an invalid (0) entry, or the new contiguous TLB entry. + * The final tlbi+dsb will always throw away the newly installed + * contiguous TLB entry, which is a micro-optimisation opportunity, + * but does not affect correctness. + * + * In the BBML2 case, the change is avoiding the intermediate tlbi+dsb. + * This means a few things, but notably other PEs will still "see" any + * stale cached TLB entries. This could lead to a "contiguous bit + * misprogramming" issue until the final tlbi+dsb of the changed page, + * which would clear out both the stale (RW,n) entry and the new (RO,c) + * contiguous entry installed in its place. + * + * What this is saying, is the following: + * + * +----+----+----+----+ + * |RO,n|RO,n|RO,n|RW,n| <--- old page tables, all non-contiguous + * +----+----+----+----+ + * + * +----+----+----+----+ + * |RO,c|RO,c|RO,c|RO,c| <--- new page tables, all contiguous + * +----+----+----+----+ + * /\ + * || + * + * If both the old single (RW,n) and new contiguous (RO,c) TLB entries + * are present, and a write is made to this address, do we fault or + * is the write permitted (via amalgamation)? + * + * The relevant Arm ARM DDI 0487L.a requirements are RNGLXZ and RJQQTC, + * and together state that when BBML1 or BBML2 are implemented, either + * a TLB conflict abort is raised (which we expressly forbid), or will + * "produce an OA, access permissions, and memory attributes that are + * consistent with any of the programmed translation table values". + * + * That is to say, will either raise a TLB conflict, or produce one of + * the cached TLB entries, but never amalgamate. + * + * Thus, as the page tables are only considered "consistent" after + * the final tlbi+dsb (which evicts both the single stale (RW,n) TLB + * entry as well as the new contiguous (RO,c) TLB entry), omitting the + * initial tlbi+dsb is correct. + * + * It is also important to note that at the end of the BBML2 folding + * case, we are still left with potentially all N TLB entries still + * cached (the N-1 non-contiguous ptes, and the single contiguous + * block). However, over time, natural TLB pressure will cause the + * non-contiguous pte TLB entries to be flushed, leaving only the + * contiguous block TLB entry. This means that omitting the tlbi+dsb is + * not only correct, but also keeps our eventual performance benefits. + * + * For the unfolding case, BBML0 looks as follows: + * + * 0) Initial page-table layout: + * + * +----+----+----+----+ + * |RW,c|RW,c|RW,c|RW,c| <--- last page being set as RO + * +----+----+----+----+ + * + * 1) Aggregate AF + dirty flags using __ptep_get_and_clear(): + * + * +----+----+----+----+ + * | 0 | 0 | 0 | 0 | + * +----+----+----+----+ + * + * 2) __flush_tlb_range(): + * + * |____ tlbi + dsb ____| + * + * 3) __set_ptes() to repaint as non-contiguous: + * + * +----+----+----+----+ + * |RW,n|RW,n|RW,n|RW,n| + * +----+----+----+----+ + * + * 4) Update changed page permissions: + * + * +----+----+----+----+ + * |RW,n|RW,n|RW,n|RO,n| <--- last page permissions set + * +----+----+----+----+ + * + * 5) The kernel will eventually __flush_tlb() for changed page: + * + * |____| <--- tlbi + dsb + * + * For BBML2, we again remove the intermediate tlbi+dsb. Here, there + * are no issues, as the final tlbi+dsb covering the changed page is + * guaranteed to remove the original large contiguous (RW,c) TLB entry, + * as well as the intermediate (RW,n) TLB entry; the next access will + * install the new (RO,n) TLB entry and the page tables are only + * considered "consistent" after the final tlbi+dsb, so software must + * be prepared for this inconsistency prior to finishing the mm dance + * regardless. + */ + + if (!system_supports_bbml2_noabort()) + __flush_tlb_range(&vma, start_addr, addr, PAGE_SIZE, true, 3); __set_ptes(mm, start_addr, start_ptep, pte, CONT_PTES); } From 6853acd39998a8aed9ac1552d0133640c02bfdfe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Jun 2025 03:09:35 +0900 Subject: [PATCH 12/67] arm64: pi: use 'targets' instead of extra-y in Makefile %.pi.o files are built as prerequisites of other objects. There is no need to use extra-y, which is planned for deprecation. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20250602180937.528459-1-masahiroy@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/pi/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile index 4d11a8c29181..211e1a79b07a 100644 --- a/arch/arm64/kernel/pi/Makefile +++ b/arch/arm64/kernel/pi/Makefile @@ -41,4 +41,4 @@ obj-y := idreg-override.pi.o \ obj-$(CONFIG_RELOCATABLE) += relocate.pi.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_early.pi.o obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.pi.o -extra-y := $(patsubst %.pi.o,%.o,$(obj-y)) +targets := $(patsubst %.pi.o,%.o,$(obj-y)) From b9f58d3572a8e1ef707b941eae58ec4014b9269d Mon Sep 17 00:00:00 2001 From: Li Chen Date: Fri, 20 Jun 2025 21:13:07 +0800 Subject: [PATCH 13/67] ACPI: Return -ENODEV from acpi_parse_spcr() when SPCR support is disabled If CONFIG_ACPI_SPCR_TABLE is disabled, acpi_parse_spcr() currently returns 0, which may incorrectly suggest that SPCR parsing was successful. This patch changes the behavior to return -ENODEV to clearly indicate that SPCR support is not available. This prepares the codebase for future changes that depend on acpi_parse_spcr() failure detection, such as suppressing misleading console messages. Signed-off-by: Li Chen Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20250620131309.126555-2-me@linux.beauty Signed-off-by: Catalin Marinas --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index f102c0fe3431..71e692f95290 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1503,7 +1503,7 @@ int acpi_parse_spcr(bool enable_earlycon, bool enable_console); #else static inline int acpi_parse_spcr(bool enable_earlycon, bool enable_console) { - return 0; + return -ENODEV; } #endif From bad3fa2fb9206f4dcec6ddef094ec2fbf6e8dcb2 Mon Sep 17 00:00:00 2001 From: Li Chen Date: Fri, 20 Jun 2025 21:13:08 +0800 Subject: [PATCH 14/67] ACPI: Suppress misleading SPCR console message when SPCR table is absent The kernel currently alway prints: "Use ACPI SPCR as default console: No/Yes " even on systems that lack an SPCR table. This can mislead users into thinking the SPCR table exists on the machines without SPCR. With this change, the "Yes" is only printed if the SPCR table is present, parsed and !param_acpi_nospcr. This avoids user confusion on SPCR-less systems. Signed-off-by: Li Chen Acked-by: Hanjun Guo Link: https://lore.kernel.org/r/20250620131309.126555-3-me@linux.beauty Signed-off-by: Catalin Marinas --- arch/arm64/kernel/acpi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index b9a66fc146c9..4d529ff7ba51 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -197,6 +197,8 @@ static int __init acpi_fadt_sanity_check(void) */ void __init acpi_boot_table_init(void) { + int ret; + /* * Enable ACPI instead of device tree unless * - ACPI has been disabled explicitly (acpi=off), or @@ -250,10 +252,12 @@ void __init acpi_boot_table_init(void) * behaviour, use acpi=nospcr to disable console in ACPI SPCR * table as default serial console. */ - acpi_parse_spcr(earlycon_acpi_spcr_enable, + ret = acpi_parse_spcr(earlycon_acpi_spcr_enable, !param_acpi_nospcr); - pr_info("Use ACPI SPCR as default console: %s\n", - param_acpi_nospcr ? "No" : "Yes"); + if (!ret || param_acpi_nospcr || !IS_ENABLED(CONFIG_ACPI_SPCR_TABLE)) + pr_info("Use ACPI SPCR as default console: No\n"); + else + pr_info("Use ACPI SPCR as default console: Yes\n"); if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); From fd1e0fd71f6552238573efa92fd3e6c324986ee3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 30 Jun 2025 10:45:02 -0700 Subject: [PATCH 15/67] arm64: Implement HAVE_LIVEPATCH Allocate a task flag used to represent the patch pending state for the task. When a livepatch is being loaded or unloaded, the livepatch code uses this flag to select the proper version of a being patched kernel functions to use for current task. In arch/arm64/Kconfig, select HAVE_LIVEPATCH and include proper Kconfig. This is largely based on [1] by Suraj Jitindar Singh. [1] https://lore.kernel.org/all/20210604235930.603-1-surajjs@amazon.com/ Cc: Suraj Jitindar Singh Cc: Torsten Duwe Acked-by: Miroslav Benes Tested-by: Breno Leitao Tested-by: Andrea della Porta Signed-off-by: Song Liu Acked-by: Will Deacon Link: https://lore.kernel.org/r/20250630174502.842486-1-song@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 3 +++ arch/arm64/include/asm/thread_info.h | 5 ++++- arch/arm64/kernel/entry-common.c | 4 ++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b7462424aa59..110218542920 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -280,6 +280,7 @@ config ARM64 select USER_STACKTRACE_SUPPORT select VDSO_GETRANDOM select HAVE_RELIABLE_STACKTRACE + select HAVE_LIVEPATCH help ARM 64-bit (AArch64) Linux support. @@ -2499,3 +2500,5 @@ endmenu # "CPU Power Management" source "drivers/acpi/Kconfig" source "arch/arm64/kvm/Kconfig" + +source "kernel/livepatch/Kconfig" diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 1269c2487574..f241b8601ebd 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -70,6 +70,7 @@ void arch_setup_new_exec(void); #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ #define TIF_SECCOMP 11 /* syscall secure computing */ #define TIF_SYSCALL_EMU 12 /* syscall emulation active */ +#define TIF_PATCH_PENDING 13 /* pending live patching update */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 #define TIF_RESTORE_SIGMASK 20 @@ -96,6 +97,7 @@ void arch_setup_new_exec(void); #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) +#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_32BIT (1 << TIF_32BIT) @@ -107,7 +109,8 @@ void arch_setup_new_exec(void); #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ - _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING) + _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING | \ + _TIF_PATCH_PENDING) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 7c1970b341b8..a56878d7c733 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -144,6 +145,9 @@ static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) (void __user *)NULL, current); } + if (thread_flags & _TIF_PATCH_PENDING) + klp_update_patch_state(current); + if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); From 66984536899f49d388424cf9b158b0a664217cf6 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:04 +0100 Subject: [PATCH 16/67] arm64/cpufeature: Add FEAT_MTE_TAGGED_FAR feature Add FEAT_MTE_TAGGED_FAR cpucap which makes FAR_ELx report all non-address bits on a synchronous MTE tag check fault since Armv8.9 Signed-off-by: Yeoreum Yun Acked-by: Yury Khrustalev Link: https://lore.kernel.org/r/20250618084513.1761345-2-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 8 ++++++++ arch/arm64/tools/cpucaps | 1 + 2 files changed, 9 insertions(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b34044e20128..af6a6924a3e8 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -320,6 +320,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_FPMR_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTEFAR_SHIFT, 4, ID_AA64PFR2_EL1_MTEFAR_NI), ARM64_FTR_END, }; @@ -2874,6 +2875,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, MTE, MTE3) }, + { + .desc = "FAR on MTE Tag Check Fault", + .capability = ARM64_MTE_FAR, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, MTEFAR, IMP) + }, #endif /* CONFIG_ARM64_MTE */ { .desc = "RCpc load-acquire (LDAPR)", diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 10effd4cff6b..fe8f4f8ce95c 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -68,6 +68,7 @@ MPAM MPAM_HCR MTE MTE_ASYMM +MTE_FAR SME SME_FA64 SME2 From 7c7f55039b8d69567acc953964b656a8b126c30a Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:05 +0100 Subject: [PATCH 17/67] arm64: Report address tag when FEAT_MTE_TAGGED_FAR is supported If FEAT_MTE_TAGGED_FAR (Armv8.9) is supported, bits 63:60 of the fault address are preserved in response to synchronous tag check faults (SEGV_MTESERR). This patch modifies below to support this feature: - Use the original FAR_EL1 value when an MTE tag check fault occurs, if ARM64_MTE_FAR is supported so that not only logical tag (bits 59:56) but also address tag (bits 63:60] being reported too. - Add HWCAP for mtefar to let user know bits 63:60 includes address tag information when when FEAT_MTE_TAGGED_FAR is supported. Applications that require this information should install a signal handler with the SA_EXPOSE_TAGBITS flag. While this introduces a minor ABI change, most applications do not set this flag and therefore will not be affected. Signed-off-by: Yeoreum Yun Link: https://lore.kernel.org/r/20250618084513.1761345-3-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/elf_hwcaps.rst | 3 +++ Documentation/arch/arm64/tagged-pointers.rst | 11 ++++++----- arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 1 + arch/arm64/kernel/cpuinfo.c | 1 + arch/arm64/mm/fault.c | 7 +++++-- 7 files changed, 18 insertions(+), 7 deletions(-) diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index 69d7afe56853..358f5af035ff 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -435,6 +435,9 @@ HWCAP2_SME_SF8DP4 HWCAP2_POE Functionality implied by ID_AA64MMFR3_EL1.S1POE == 0b0001. +HWCAP3_MTE_FAR + Functionality implied by ID_AA64PFR2_EL1.MTEFAR == 0b0001. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/Documentation/arch/arm64/tagged-pointers.rst b/Documentation/arch/arm64/tagged-pointers.rst index 81b6c2a770dd..f87a925ca9a5 100644 --- a/Documentation/arch/arm64/tagged-pointers.rst +++ b/Documentation/arch/arm64/tagged-pointers.rst @@ -60,11 +60,12 @@ that signal handlers in applications making use of tags cannot rely on the tag information for user virtual addresses being maintained in these fields unless the flag was set. -Due to architecture limitations, bits 63:60 of the fault address -are not preserved in response to synchronous tag check faults -(SEGV_MTESERR) even if SA_EXPOSE_TAGBITS was set. Applications should -treat the values of these bits as undefined in order to accommodate -future architecture revisions which may preserve the bits. +If FEAT_MTE_TAGGED_FAR (Armv8.9) is supported, bits 63:60 of the fault address +are preserved in response to synchronous tag check faults (SEGV_MTESERR) +otherwise not preserved even if SA_EXPOSE_TAGBITS was set. +Applications should interpret the values of these bits based on +the support for the HWCAP3_MTE_FAR. If the support is not present, +the values of these bits should be considered as undefined otherwise valid. For signals raised in response to watchpoint debug exceptions, the tag information will be preserved regardless of the SA_EXPOSE_TAGBITS diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 1c3f9617d54f..28dd1ac29ecc 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -176,6 +176,7 @@ #define KERNEL_HWCAP_POE __khwcap2_feature(POE) #define __khwcap3_feature(x) (const_ilog2(HWCAP3_ ## x) + 128) +#define KERNEL_HWCAP_MTE_FAR __khwcap3_feature(MTE_FAR) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 705a7afa8e58..7d22527a7975 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -143,5 +143,6 @@ /* * HWCAP3 flags - for AT_HWCAP3 */ +#define HWCAP3_MTE_FAR (1UL << 0) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index af6a6924a3e8..8a5284c733b7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3219,6 +3219,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_MTE HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE), HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3), + HWCAP_CAP(ID_AA64PFR2_EL1, MTEFAR, IMP, CAP_HWCAP, KERNEL_HWCAP_MTE_FAR), #endif /* CONFIG_ARM64_MTE */ HWCAP_CAP(ID_AA64MMFR0_EL1, ECV, IMP, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(ID_AA64MMFR1_EL1, AFP, IMP, CAP_HWCAP, KERNEL_HWCAP_AFP), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index c1f2b6b04b41..e552cb305641 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -160,6 +160,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_SFEXPA] = "smesfexpa", [KERNEL_HWCAP_SME_STMOP] = "smestmop", [KERNEL_HWCAP_SME_SMOP4] = "smesmop4", + [KERNEL_HWCAP_MTE_FAR] = "mtefar", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ec0a337891dd..832d8540e13b 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -837,9 +837,12 @@ static int do_tag_check_fault(unsigned long far, unsigned long esr, /* * The architecture specifies that bits 63:60 of FAR_EL1 are UNKNOWN * for tag check faults. Set them to corresponding bits in the untagged - * address. + * address if ARM64_MTE_FAR isn't supported. + * Otherwise, bits 63:60 of FAR_EL1 are not UNKNOWN. */ - far = (__untagged_addr(far) & ~MTE_TAG_MASK) | (far & MTE_TAG_MASK); + if (!cpus_have_cap(ARM64_MTE_FAR)) + far = (__untagged_addr(far) & ~MTE_TAG_MASK) | (far & MTE_TAG_MASK); + do_bad_area(far, esr, regs); return 0; } From 61eae495da68fe3d17ed6e8d3ebcdb8c9a2f7c44 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:06 +0100 Subject: [PATCH 18/67] KVM: arm64: Expose FEAT_MTE_TAGGED_FAR feature to guest expose FEAT_MTE_TAGGED_FAR feature to guest. Signed-off-by: Yeoreum Yun Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20250618084513.1761345-4-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kvm/sys_regs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a6cf2888d150..45457963e8b6 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1617,8 +1617,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); break; case SYS_ID_AA64PFR2_EL1: - /* We only expose FPMR */ - val &= ID_AA64PFR2_EL1_FPMR; + val &= ID_AA64PFR2_EL1_FPMR | + (kvm_has_mte(vcpu->kvm) ? ID_AA64PFR2_EL1_MTEFAR : 0); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) @@ -2876,7 +2876,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64PFR1_EL1_MPAM_frac | ID_AA64PFR1_EL1_RAS_frac | ID_AA64PFR1_EL1_MTE)), - ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR), + ID_WRITABLE(ID_AA64PFR2_EL1, + ID_AA64PFR2_EL1_FPMR | + ID_AA64PFR2_EL1_MTEFAR), ID_UNALLOCATED(4,3), ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0), ID_HIDDEN(ID_AA64SMFR0_EL1), From 49a9942ff80c99dabdc4a76011d755524e72fd9d Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:07 +0100 Subject: [PATCH 19/67] kselftest/arm64: Add MTE_FAR hwcap test add MTE_FAR hwcap test on kselftest. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-5-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/hwcap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 35f521e5f41c..e60bfb798ba2 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -1098,6 +1098,12 @@ static const struct hwcap_data { .sigill_fn = hbc_sigill, .sigill_reliable = true, }, + { + .name = "MTE_FAR", + .at_hwcap = AT_HWCAP3, + .hwcap_bit = HWCAP3_MTE_FAR, + .cpuinfo = "mtefar", + }, }; typedef void (*sighandler_fn)(int, siginfo_t *, void *); From cfafa517c9e658756511e210e7288efe21554bcf Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:08 +0100 Subject: [PATCH 20/67] kselftest/arm64/mte: Register mte signal handler with SA_EXPOSE_TAGBITS To test address tag[63:60] and memory tag[59:56] is preserved when memory tag fault happen, Let mte_register_signal() to register signal handler with SA_EXPOSE_TAGBITS. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-6-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/check_buffer_fill.c | 2 +- .../testing/selftests/arm64/mte/check_child_memory.c | 4 ++-- .../selftests/arm64/mte/check_hugetlb_options.c | 4 ++-- tools/testing/selftests/arm64/mte/check_ksm_options.c | 4 ++-- .../testing/selftests/arm64/mte/check_mmap_options.c | 4 ++-- .../selftests/arm64/mte/check_tags_inclusion.c | 2 +- tools/testing/selftests/arm64/mte/check_user_mem.c | 2 +- tools/testing/selftests/arm64/mte/mte_common_util.c | 11 ++++++++++- tools/testing/selftests/arm64/mte/mte_common_util.h | 3 ++- 9 files changed, 23 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 2ee7f114d7fa..5248b5265aa4 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -415,7 +415,7 @@ int main(int argc, char *argv[]) return err; /* Register SIGSEGV handler */ - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(20); diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c index 7597fc632cad..b97ea3981c21 100644 --- a/tools/testing/selftests/arm64/mte/check_child_memory.c +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -160,8 +160,8 @@ int main(int argc, char *argv[]) return err; /* Register SIGSEGV handler */ - mte_register_signal(SIGSEGV, mte_default_handler); - mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); + mte_register_signal(SIGBUS, mte_default_handler, false); /* Set test plan */ ksft_set_plan(12); diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c index 3bfcd3848432..4e644a606394 100644 --- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c +++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c @@ -235,8 +235,8 @@ int main(int argc, char *argv[]) return err; /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler); - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler, false); + mte_register_signal(SIGSEGV, mte_default_handler, false); allocate_hugetlb(); diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c index 88c74bc46d4f..afea4e381862 100644 --- a/tools/testing/selftests/arm64/mte/check_ksm_options.c +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -141,8 +141,8 @@ int main(int argc, char *argv[]) return KSFT_FAIL; } /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler); - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler, false); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(4); diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index 17694caaff53..b37bf481c9f9 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -201,8 +201,8 @@ int main(int argc, char *argv[]) sizes[item - 1] = page_size + 1; /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler); - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler, false); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(22); diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index a3d1e23fe02a..b96296ab9870 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -180,7 +180,7 @@ int main(int argc, char *argv[]) return err; /* Register SIGSEGV handler */ - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(4); diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index f4ae5f87a3b7..d1d14aaaba16 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -211,7 +211,7 @@ int main(int argc, char *argv[]) return err; /* Register signal handlers */ - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(64); diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index a1dc2fe5285b..83240b980f9c 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -19,6 +19,10 @@ #include "mte_common_util.h" #include "mte_def.h" +#ifndef SA_EXPOSE_TAGBITS +#define SA_EXPOSE_TAGBITS 0x00000800 +#endif + #define INIT_BUFFER_SIZE 256 struct mte_fault_cxt cur_mte_cxt; @@ -79,12 +83,17 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) } } -void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)) +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *), + bool export_tags) { struct sigaction sa; sa.sa_sigaction = handler; sa.sa_flags = SA_SIGINFO; + + if (export_tags && signal == SIGSEGV) + sa.sa_flags |= SA_EXPOSE_TAGBITS; + sigemptyset(&sa.sa_mask); sigaction(signal, &sa, NULL); } diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index a0017a303beb..6b109e84fa39 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -40,7 +40,8 @@ extern struct mte_fault_cxt cur_mte_cxt; /* MTE utility functions */ void mte_default_handler(int signum, siginfo_t *si, void *uc); -void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)); +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *), + bool export_tags); void mte_wait_after_trig(void); void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags); void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping, From 2e3e356560ef54605a1c779ae8dcd7889ff2875d Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:09 +0100 Subject: [PATCH 21/67] kselftest/arm64/mte: Check MTE_FAR feature is supported To run the MTE_FAR test when cpu supports MTE_FAR feature, check the MTE_FAR feature is supported in mte test. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-7-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/mte_common_util.c | 4 ++++ tools/testing/selftests/arm64/mte/mte_common_util.h | 1 + 2 files changed, 5 insertions(+) diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 83240b980f9c..5c5680a87498 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -26,6 +26,7 @@ #define INIT_BUFFER_SIZE 256 struct mte_fault_cxt cur_mte_cxt; +bool mtefar_support; static unsigned int mte_cur_mode; static unsigned int mte_cur_pstate_tco; @@ -325,12 +326,15 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask) int mte_default_setup(void) { unsigned long hwcaps2 = getauxval(AT_HWCAP2); + unsigned long hwcaps3 = getauxval(AT_HWCAP3); unsigned long en = 0; int ret; if (!(hwcaps2 & HWCAP2_MTE)) ksft_exit_skip("MTE features unavailable\n"); + mtefar_support = !!(hwcaps3 & HWCAP3_MTE_FAR); + /* Get current mte mode */ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0); if (ret < 0) { diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index 6b109e84fa39..4e1dd959df9b 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -37,6 +37,7 @@ struct mte_fault_cxt { }; extern struct mte_fault_cxt cur_mte_cxt; +extern bool mtefar_support; /* MTE utility functions */ void mte_default_handler(int signum, siginfo_t *si, void *uc); From ed434c6e081327e9d0685d4b5e4cd067246f8be6 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:10 +0100 Subject: [PATCH 22/67] kselftest/arm64/mte: Add address tag related macro and function Add address tag related macro and function to test MTE_FAR feature. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-8-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- .../selftests/arm64/mte/mte_common_util.c | 17 +++++++++++++++++ .../selftests/arm64/mte/mte_common_util.h | 2 ++ tools/testing/selftests/arm64/mte/mte_def.h | 8 ++++++++ 3 files changed, 27 insertions(+) diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 5c5680a87498..d9702a542cb6 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -130,6 +131,19 @@ void mte_clear_tags(void *ptr, size_t size) mte_clear_tag_address_range(ptr, size); } +void *mte_insert_atag(void *ptr) +{ + unsigned char atag; + + atag = mtefar_support ? (random() % MT_ATAG_MASK) + 1 : 0; + return (void *)MT_SET_ATAG((unsigned long)ptr, atag); +} + +void *mte_clear_atag(void *ptr) +{ + return (void *)MT_CLEAR_ATAG((unsigned long)ptr); +} + static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping, size_t range_before, size_t range_after, bool tags, int fd) @@ -330,6 +344,9 @@ int mte_default_setup(void) unsigned long en = 0; int ret; + /* To generate random address tag */ + srandom(time(NULL)); + if (!(hwcaps2 & HWCAP2_MTE)) ksft_exit_skip("MTE features unavailable\n"); diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index 4e1dd959df9b..045e4ad2f018 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -56,6 +56,8 @@ void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type, size_t range_before, size_t range_after); void *mte_insert_tags(void *ptr, size_t size); void mte_clear_tags(void *ptr, size_t size); +void *mte_insert_atag(void *ptr); +void *mte_clear_atag(void *ptr); int mte_default_setup(void); void mte_restore_setup(void); int mte_switch_mode(int mte_option, unsigned long incl_mask); diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h index 9b188254b61a..6ad22f07c9b8 100644 --- a/tools/testing/selftests/arm64/mte/mte_def.h +++ b/tools/testing/selftests/arm64/mte/mte_def.h @@ -42,6 +42,8 @@ #define MT_TAG_COUNT 16 #define MT_INCLUDE_TAG_MASK 0xFFFF #define MT_EXCLUDE_TAG_MASK 0x0 +#define MT_ATAG_SHIFT 60 +#define MT_ATAG_MASK 0xFUL #define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1) #define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT)) @@ -49,6 +51,12 @@ #define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK)) #define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE)) +#define MT_CLEAR_ATAG(x) ((x) & ~(MT_TAG_MASK << MT_ATAG_SHIFT)) +#define MT_SET_ATAG(x, y) ((x) | (((y) & MT_ATAG_MASK) << MT_ATAG_SHIFT)) +#define MT_FETCH_ATAG(x) ((x >> MT_ATAG_SHIFT) & (MT_ATAG_MASK)) + +#define MT_CLEAR_TAGS(x) (MT_CLEAR_ATAG(MT_CLEAR_TAG(x))) + #define MT_PSTATE_TCO_SHIFT 25 #define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT) #define MT_PSTATE_TCO_EN 1 From 49cee364c8665c5951162a0e193d10a86e3e6011 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:11 +0100 Subject: [PATCH 23/67] kselftest/arm64/mte: Add verification for address tag in signal handler Add the address tag [63:60] verification when synchronous mte fault is happen. when signal handler is registered with SA_EXPOSE_TAGBITS, address includes not only memory tag [59:56] but also address tag. Therefore, when verify fault address location, remove both tags Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-9-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- .../selftests/arm64/mte/mte_common_util.c | 38 ++++++++++++++----- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index d9702a542cb6..10dcbc37e379 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -33,12 +33,25 @@ static unsigned int mte_cur_pstate_tco; void mte_default_handler(int signum, siginfo_t *si, void *uc) { + struct sigaction sa; unsigned long addr = (unsigned long)si->si_addr; + unsigned char si_tag, si_atag; + + sigaction(signum, NULL, &sa); + + if (sa.sa_flags & SA_EXPOSE_TAGBITS) { + si_tag = MT_FETCH_TAG(addr); + si_atag = MT_FETCH_ATAG(addr); + addr = MT_CLEAR_TAGS(addr); + } else { + si_tag = 0; + si_atag = 0; + } if (signum == SIGSEGV) { #ifdef DEBUG - ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n", - ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); + ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx, si_tag=%x, si_atag=%x\n", + ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code, si_tag, si_atag); #endif if (si->si_code == SEGV_MTEAERR) { if (cur_mte_cxt.trig_si_code == si->si_code) @@ -51,13 +64,18 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) } /* Compare the context for precise error */ else if (si->si_code == SEGV_MTESERR) { + if ((!mtefar_support && si_atag) || (si_atag != MT_FETCH_ATAG(cur_mte_cxt.trig_addr))) { + ksft_print_msg("Invalid MTE synchronous exception caught for address tag! si_tag=%x, si_atag: %x\n", si_tag, si_atag); + exit(KSFT_FAIL); + } + if (cur_mte_cxt.trig_si_code == si->si_code && ((cur_mte_cxt.trig_range >= 0 && - addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || (cur_mte_cxt.trig_range < 0 && - addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) { + addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) { cur_mte_cxt.fault_valid = true; /* Adjust the pc by 4 */ ((ucontext_t *)uc)->uc_mcontext.pc += 4; @@ -73,11 +91,11 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) ksft_print_msg("INFO: SIGBUS signal at pc=%llx, fault addr=%lx, si_code=%x\n", ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); if ((cur_mte_cxt.trig_range >= 0 && - addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || (cur_mte_cxt.trig_range < 0 && - addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) { + addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) { cur_mte_cxt.fault_valid = true; /* Adjust the pc by 4 */ ((ucontext_t *)uc)->uc_mcontext.pc += 4; From 64a64e5d12f070405800745c674916fbbf2b9283 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:12 +0100 Subject: [PATCH 24/67] kselftest/arm64/mte: Refactor check_mmap_option test Before add mtefar testcase on check_mmap_option.c, refactor check_mmap_option: - make testcase suite array with test options (mem_type, mte_sync type and etc) to use general testcase pattern - generate each test case name acoording to test options. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-10-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- .../selftests/arm64/mte/check_mmap_options.c | 363 +++++++++++++++--- 1 file changed, 311 insertions(+), 52 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index b37bf481c9f9..0df7ce532465 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE +#include #include #include #include @@ -24,6 +25,23 @@ #define TAG_CHECK_ON 0 #define TAG_CHECK_OFF 1 +#define TEST_NAME_MAX 256 + +enum mte_mem_check_type { + CHECK_ANON_MEM = 0, + CHECK_FILE_MEM = 1, + CHECK_CLEAR_PROT_MTE = 2, +}; + +struct check_mmap_testcase { + int check_type; + int mem_type; + int mte_sync; + int mapping; + int tag_check; + bool enable_tco; +}; + static size_t page_size; static int sizes[] = { 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, @@ -183,10 +201,271 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) return KSFT_PASS; } +const char *format_test_name(struct check_mmap_testcase *tc) +{ + static char test_name[TEST_NAME_MAX]; + const char *check_type_str; + const char *mem_type_str; + const char *sync_str; + const char *mapping_str; + const char *tag_check_str; + + switch (tc->check_type) { + case CHECK_ANON_MEM: + check_type_str = "anonymous memory"; + break; + case CHECK_FILE_MEM: + check_type_str = "file memory"; + break; + case CHECK_CLEAR_PROT_MTE: + check_type_str = "clear PROT_MTE flags"; + break; + default: + assert(0); + break; + } + + switch (tc->mem_type) { + case USE_MMAP: + mem_type_str = "mmap"; + break; + case USE_MPROTECT: + mem_type_str = "mmap/mprotect"; + break; + default: + assert(0); + break; + } + + switch (tc->mte_sync) { + case MTE_NONE_ERR: + sync_str = "no error"; + break; + case MTE_SYNC_ERR: + sync_str = "sync error"; + break; + case MTE_ASYNC_ERR: + sync_str = "async error"; + break; + default: + assert(0); + break; + } + + switch (tc->mapping) { + case MAP_SHARED: + mapping_str = "shared"; + break; + case MAP_PRIVATE: + mapping_str = "private"; + break; + default: + assert(0); + break; + } + + switch (tc->tag_check) { + case TAG_CHECK_ON: + tag_check_str = "tag check on"; + break; + case TAG_CHECK_OFF: + tag_check_str = "tag check off"; + break; + default: + assert(0); + break; + } + + snprintf(test_name, sizeof(test_name), + "Check %s with %s mapping, %s mode, %s memory and %s\n", + check_type_str, mapping_str, sync_str, mem_type_str, + tag_check_str); + + return test_name; +} + int main(int argc, char *argv[]) { - int err; + int err, i; int item = ARRAY_SIZE(sizes); + struct check_mmap_testcase test_cases[]= { + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .enable_tco = true, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .enable_tco = true, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_NONE_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_NONE_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .enable_tco = false, + }, + }; err = mte_default_setup(); if (err) @@ -205,59 +484,39 @@ int main(int argc, char *argv[]) mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ - ksft_set_plan(22); + ksft_set_plan(ARRAY_SIZE(test_cases)); - mte_enable_pstate_tco(); + for (i = 0 ; i < ARRAY_SIZE(test_cases); i++) { + if (test_cases[i].enable_tco) + mte_enable_pstate_tco(); + else + mte_disable_pstate_tco(); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n"); - - mte_disable_pstate_tco(); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n"); - - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); - - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); - - evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), - "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n"); - evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), - "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); + switch (test_cases[i].check_type) { + case CHECK_ANON_MEM: + evaluate_test(check_anonymous_memory_mapping(test_cases[i].mem_type, + test_cases[i].mte_sync, + test_cases[i].mapping, + test_cases[i].tag_check), + format_test_name(&test_cases[i])); + break; + case CHECK_FILE_MEM: + evaluate_test(check_file_memory_mapping(test_cases[i].mem_type, + test_cases[i].mte_sync, + test_cases[i].mapping, + test_cases[i].tag_check), + format_test_name(&test_cases[i])); + break; + case CHECK_CLEAR_PROT_MTE: + evaluate_test(check_clear_prot_mte_flag(test_cases[i].mem_type, + test_cases[i].mte_sync, + test_cases[i].mapping), + format_test_name(&test_cases[i])); + break; + default: + exit(KSFT_FAIL); + } + } mte_restore_setup(); ksft_print_cnts(); From d09674f98cdbf5dc2288cd5e2d0e63b6e5f723b2 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 18 Jun 2025 09:45:13 +0100 Subject: [PATCH 25/67] kselftest/arm64/mte: Add mtefar tests on check_mmap_options If FEAT_MTE_TAGGED_FAR (Armv8.9) is supported, bits 63:60 of the fault address are preserved in response to synchronous tag check faults (SEGV_MTESERR). This patch adds new test cases using address tags (bits 63:60), corresponding to each existing test in check_mmap_option. Signed-off-by: Yeoreum Yun Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250618084513.1761345-11-yeoreum.yun@arm.com Signed-off-by: Catalin Marinas --- .../selftests/arm64/mte/check_mmap_options.c | 194 +++++++++++++++--- 1 file changed, 171 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index 0df7ce532465..91a81b4a9bfa 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -24,6 +24,8 @@ #define OVERFLOW MT_GRANULE_SIZE #define TAG_CHECK_ON 0 #define TAG_CHECK_OFF 1 +#define ATAG_CHECK_ON 1 +#define ATAG_CHECK_OFF 0 #define TEST_NAME_MAX 256 @@ -39,6 +41,7 @@ struct check_mmap_testcase { int mte_sync; int mapping; int tag_check; + int atag_check; bool enable_tco; }; @@ -48,8 +51,14 @@ static int sizes[] = { /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 }; -static int check_mte_memory(char *ptr, int size, int mode, int tag_check) +static int check_mte_memory(char *ptr, int size, int mode, int tag_check, int atag_check) { + if (!mtefar_support && atag_check == ATAG_CHECK_ON) + return KSFT_SKIP; + + if (atag_check == ATAG_CHECK_ON) + ptr = mte_insert_atag(ptr); + mte_initialize_current_context(mode, (uintptr_t)ptr, size); memset(ptr, '1', size); mte_wait_after_trig(); @@ -75,7 +84,7 @@ static int check_mte_memory(char *ptr, int size, int mode, int tag_check) return KSFT_PASS; } -static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check, int atag_check) { char *ptr, *map_ptr; int run, result, map_size; @@ -97,16 +106,16 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i munmap((void *)map_ptr, map_size); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, tag_check); + result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check); mte_clear_tags((void *)ptr, sizes[run]); mte_free_memory((void *)map_ptr, map_size, mem_type, false); - if (result == KSFT_FAIL) - return KSFT_FAIL; + if (result != KSFT_PASS) + return result; } return KSFT_PASS; } -static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check, int atag_check) { char *ptr, *map_ptr; int run, fd, map_size; @@ -135,17 +144,17 @@ static int check_file_memory_mapping(int mem_type, int mode, int mapping, int ta close(fd); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, tag_check); + result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check); mte_clear_tags((void *)ptr, sizes[run]); munmap((void *)map_ptr, map_size); close(fd); - if (result == KSFT_FAIL) - break; + if (result != KSFT_PASS) + return result; } - return result; + return KSFT_PASS; } -static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) +static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping, int atag_check) { char *ptr, *map_ptr; int run, prot_flag, result, fd, map_size; @@ -168,7 +177,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check); mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); if (result != KSFT_PASS) return KSFT_FAIL; @@ -192,11 +201,11 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) close(fd); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check); mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); close(fd); if (result != KSFT_PASS) - return KSFT_FAIL; + return result; } return KSFT_PASS; } @@ -209,6 +218,7 @@ const char *format_test_name(struct check_mmap_testcase *tc) const char *sync_str; const char *mapping_str; const char *tag_check_str; + const char *atag_check_str; switch (tc->check_type) { case CHECK_ANON_MEM: @@ -276,10 +286,22 @@ const char *format_test_name(struct check_mmap_testcase *tc) break; } + switch (tc->atag_check) { + case ATAG_CHECK_ON: + atag_check_str = "with address tag [63:60]"; + break; + case ATAG_CHECK_OFF: + atag_check_str = "without address tag [63:60]"; + break; + default: + assert(0); + break; + } + snprintf(test_name, sizeof(test_name), - "Check %s with %s mapping, %s mode, %s memory and %s\n", + "Check %s with %s mapping, %s mode, %s memory and %s (%s)\n", check_type_str, mapping_str, sync_str, mem_type_str, - tag_check_str); + tag_check_str, atag_check_str); return test_name; } @@ -295,6 +317,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, .enable_tco = true, }, { @@ -303,6 +326,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, .enable_tco = true, }, { @@ -311,6 +335,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_NONE_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -319,6 +344,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_NONE_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -327,6 +353,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -335,6 +362,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -343,6 +371,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -351,6 +380,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -359,6 +389,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -367,6 +398,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -375,6 +407,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -383,6 +416,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -391,6 +425,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -399,6 +434,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -407,6 +443,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -415,6 +452,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -423,6 +461,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -431,6 +470,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -439,6 +479,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -447,6 +488,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_ASYNC_ERR, .mapping = MAP_SHARED, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -455,6 +497,7 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, .enable_tco = false, }, { @@ -463,6 +506,106 @@ int main(int argc, char *argv[]) .mte_sync = MTE_SYNC_ERR, .mapping = MAP_PRIVATE, .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, .enable_tco = false, }, }; @@ -479,14 +622,16 @@ int main(int argc, char *argv[]) sizes[item - 2] = page_size; sizes[item - 1] = page_size + 1; - /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler, false); - mte_register_signal(SIGSEGV, mte_default_handler, false); - /* Set test plan */ ksft_set_plan(ARRAY_SIZE(test_cases)); for (i = 0 ; i < ARRAY_SIZE(test_cases); i++) { + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler, + test_cases[i].atag_check == ATAG_CHECK_ON); + mte_register_signal(SIGSEGV, mte_default_handler, + test_cases[i].atag_check == ATAG_CHECK_ON); + if (test_cases[i].enable_tco) mte_enable_pstate_tco(); else @@ -497,20 +642,23 @@ int main(int argc, char *argv[]) evaluate_test(check_anonymous_memory_mapping(test_cases[i].mem_type, test_cases[i].mte_sync, test_cases[i].mapping, - test_cases[i].tag_check), + test_cases[i].tag_check, + test_cases[i].atag_check), format_test_name(&test_cases[i])); break; case CHECK_FILE_MEM: evaluate_test(check_file_memory_mapping(test_cases[i].mem_type, test_cases[i].mte_sync, test_cases[i].mapping, - test_cases[i].tag_check), + test_cases[i].tag_check, + test_cases[i].atag_check), format_test_name(&test_cases[i])); break; case CHECK_CLEAR_PROT_MTE: evaluate_test(check_clear_prot_mte_flag(test_cases[i].mem_type, test_cases[i].mte_sync, - test_cases[i].mapping), + test_cases[i].mapping, + test_cases[i].atag_check), format_test_name(&test_cases[i])); break; default: From 6d80cb73131db19a9d625dad93d5dbc53513c6cb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 9 Jun 2025 14:29:10 +0100 Subject: [PATCH 26/67] kselftest/arm64: Convert tpidr2 test to use kselftest.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent work by Thomas Weißschuh means that it is now possible to use kselftest.h with nolibc. Convert the tpidr2 test which is nolibc specific to use kselftest.h, making it look more standard and ensuring it gets the benefit of any work done on kselftest.h. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250609-kselftest-arm64-nolibc-header-v1-1-16ee1c6fbfed@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/abi/Makefile | 2 +- tools/testing/selftests/arm64/abi/tpidr2.c | 140 ++++++--------------- 2 files changed, 38 insertions(+), 104 deletions(-) diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile index a6d30c620908..483488f8c2ad 100644 --- a/tools/testing/selftests/arm64/abi/Makefile +++ b/tools/testing/selftests/arm64/abi/Makefile @@ -12,4 +12,4 @@ $(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S $(OUTPUT)/tpidr2: tpidr2.c $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ -static -include ../../../../include/nolibc/nolibc.h \ - -ffreestanding -Wall $^ -o $@ -lgcc + -I../.. -ffreestanding -Wall $^ -o $@ -lgcc diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index eb19dcc37a75..f58a9f89b952 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -3,31 +3,12 @@ #include #include +#include "kselftest.h" + #define SYS_TPIDR2 "S3_3_C13_C0_5" #define EXPECTED_TESTS 5 -static void putstr(const char *str) -{ - write(1, str, strlen(str)); -} - -static void putnum(unsigned int num) -{ - char c; - - if (num / 10) - putnum(num / 10); - - c = '0' + (num % 10); - write(1, &c, 1); -} - -static int tests_run; -static int tests_passed; -static int tests_failed; -static int tests_skipped; - static void set_tpidr2(uint64_t val) { asm volatile ( @@ -50,20 +31,6 @@ static uint64_t get_tpidr2(void) return val; } -static void print_summary(void) -{ - if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS) - putstr("# UNEXPECTED TEST COUNT: "); - - putstr("# Totals: pass:"); - putnum(tests_passed); - putstr(" fail:"); - putnum(tests_failed); - putstr(" xfail:0 xpass:0 skip:"); - putnum(tests_skipped); - putstr(" error:0\n"); -} - /* Processes should start with TPIDR2 == 0 */ static int default_value(void) { @@ -105,9 +72,8 @@ static int write_fork_read(void) if (newpid == 0) { /* In child */ if (get_tpidr2() != oldpid) { - putstr("# TPIDR2 changed in child: "); - putnum(get_tpidr2()); - putstr("\n"); + ksft_print_msg("TPIDR2 changed in child: %llx\n", + get_tpidr2()); exit(0); } @@ -115,14 +81,12 @@ static int write_fork_read(void) if (get_tpidr2() == getpid()) { exit(1); } else { - putstr("# Failed to set TPIDR2 in child\n"); + ksft_print_msg("Failed to set TPIDR2 in child\n"); exit(0); } } if (newpid < 0) { - putstr("# fork() failed: -"); - putnum(-newpid); - putstr("\n"); + ksft_print_msg("fork() failed: %d\n", newpid); return 0; } @@ -132,23 +96,22 @@ static int write_fork_read(void) if (waiting < 0) { if (errno == EINTR) continue; - putstr("# waitpid() failed: "); - putnum(errno); - putstr("\n"); + ksft_print_msg("waitpid() failed: %d\n", errno); return 0; } if (waiting != newpid) { - putstr("# waitpid() returned wrong PID\n"); + ksft_print_msg("waitpid() returned wrong PID: %d != %d\n", + waiting, newpid); return 0; } if (!WIFEXITED(status)) { - putstr("# child did not exit\n"); + ksft_print_msg("child did not exit\n"); return 0; } if (getpid() != get_tpidr2()) { - putstr("# TPIDR2 corrupted in parent\n"); + ksft_print_msg("TPIDR2 corrupted in parent\n"); return 0; } @@ -188,35 +151,32 @@ static int write_clone_read(void) stack = malloc(__STACK_SIZE); if (!stack) { - putstr("# malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); return 0; } ret = sys_clone(CLONE_VM, (unsigned long)stack + __STACK_SIZE, &parent_tid, 0, &child_tid); if (ret == -1) { - putstr("# clone() failed\n"); - putnum(errno); - putstr("\n"); + ksft_print_msg("clone() failed: %d\n", errno); return 0; } if (ret == 0) { /* In child */ if (get_tpidr2() != 0) { - putstr("# TPIDR2 non-zero in child: "); - putnum(get_tpidr2()); - putstr("\n"); + ksft_print_msg("TPIDR2 non-zero in child: %llx\n", + get_tpidr2()); exit(0); } if (gettid() == 0) - putstr("# Child TID==0\n"); + ksft_print_msg("Child TID==0\n"); set_tpidr2(gettid()); if (get_tpidr2() == gettid()) { exit(1); } else { - putstr("# Failed to set TPIDR2 in child\n"); + ksft_print_msg("Failed to set TPIDR2 in child\n"); exit(0); } } @@ -227,25 +187,22 @@ static int write_clone_read(void) if (waiting < 0) { if (errno == EINTR) continue; - putstr("# wait4() failed: "); - putnum(errno); - putstr("\n"); + ksft_print_msg("wait4() failed: %d\n", errno); return 0; } if (waiting != ret) { - putstr("# wait4() returned wrong PID "); - putnum(waiting); - putstr("\n"); + ksft_print_msg("wait4() returned wrong PID %d\n", + waiting); return 0; } if (!WIFEXITED(status)) { - putstr("# child did not exit\n"); + ksft_print_msg("child did not exit\n"); return 0; } if (parent != get_tpidr2()) { - putstr("# TPIDR2 corrupted in parent\n"); + ksft_print_msg("TPIDR2 corrupted in parent\n"); return 0; } @@ -253,35 +210,14 @@ static int write_clone_read(void) } } -#define run_test(name) \ - if (name()) { \ - tests_passed++; \ - } else { \ - tests_failed++; \ - putstr("not "); \ - } \ - putstr("ok "); \ - putnum(++tests_run); \ - putstr(" " #name "\n"); - -#define skip_test(name) \ - tests_skipped++; \ - putstr("ok "); \ - putnum(++tests_run); \ - putstr(" # SKIP " #name "\n"); - int main(int argc, char **argv) { int ret; - putstr("TAP version 13\n"); - putstr("1.."); - putnum(EXPECTED_TESTS); - putstr("\n"); + ksft_print_header(); + ksft_set_plan(5); - putstr("# PID: "); - putnum(getpid()); - putstr("\n"); + ksft_print_msg("PID: %d\n", getpid()); /* * This test is run with nolibc which doesn't support hwcap and @@ -290,23 +226,21 @@ int main(int argc, char **argv) */ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); if (ret >= 0) { - run_test(default_value); - run_test(write_read); - run_test(write_sleep_read); - run_test(write_fork_read); - run_test(write_clone_read); + ksft_test_result(default_value(), "default_value\n"); + ksft_test_result(write_read, "write_read\n"); + ksft_test_result(write_sleep_read, "write_sleep_read\n"); + ksft_test_result(write_fork_read, "write_fork_read\n"); + ksft_test_result(write_clone_read, "write_clone_read\n"); } else { - putstr("# SME support not present\n"); + ksft_print_msg("SME support not present\n"); - skip_test(default_value); - skip_test(write_read); - skip_test(write_sleep_read); - skip_test(write_fork_read); - skip_test(write_clone_read); + ksft_test_result_skip("default_value\n"); + ksft_test_result_skip("write_read\n"); + ksft_test_result_skip("write_sleep_read\n"); + ksft_test_result_skip("write_fork_read\n"); + ksft_test_result_skip("write_clone_read\n"); } - print_summary(); - - return 0; + ksft_finished(); } From 867446f090589626497638f70b10be5e61a0b925 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 9 Jun 2025 16:25:31 +0100 Subject: [PATCH 27/67] kselftest/arm64: Fix check for setting new VLs in sve-ptrace The check that the new vector length we set was the expected one was typoed to an assignment statement which for some reason the compilers didn't spot, most likely due to the macros involved. Fixes: a1d7111257cd ("selftests: arm64: More comprehensively test the SVE ptrace interface") Acked-by: Mark Rutland Acked-by: Dev Jain Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250609-kselftest-arm64-ssve-fixups-v2-1-998fcfa6f240@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 577b6e05e860..c499d5789dd5 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -253,7 +253,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, return; } - ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n", + ksft_test_result(new_sve->vl == prctl_vl, "Set %s VL %u\n", type->name, vl); free(new_sve); From 94ab150010f430a36c72e2fe5b7da44a625a6ad5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 9 Jun 2025 16:25:32 +0100 Subject: [PATCH 28/67] kselftest/arm64: Fix test for streaming FPSIMD write in sve-ptrace Since f916dd32a943 ("arm64/fpsimd: ptrace: Mandate SVE payload for streaming-mode state") we do not support writing FPSIMD payload data when writing NT_ARM_SSVE but the sve-ptrace test has an explicit test for this being supported which was not updated to reflect the new behaviour. Fix the test to expect a failure when writing FPSIMD data to the streaming mode register set. Acked-by: Mark Rutland Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250609-kselftest-arm64-ssve-fixups-v2-2-998fcfa6f240@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index c499d5789dd5..7e259907805b 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -301,8 +301,10 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) p[j] = j; } + /* This should only succeed for SVE */ ret = set_sve(child, type, sve); - ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n", + ksft_test_result((type->regset == NT_ARM_SVE) == (ret == 0), + "%s FPSIMD set via SVE: %d\n", type->name, ret); if (ret) goto out; From 9e8ebfe677f9101bbfe1f75d548a5aec581e8213 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 9 Jun 2025 16:25:33 +0100 Subject: [PATCH 29/67] kselftest/arm64: Specify SVE data when testing VL set in sve-ptrace Since f916dd32a943 ("arm64/fpsimd: ptrace: Mandate SVE payload for streaming-mode state") we reject attempts to write to the streaming mode regset even if there is no register data supplied, causing the tests for setting vector lengths and setting SVE_VL_INHERIT in sve-ptrace to spuriously fail. Set the flag to avoid the issue, we still support not supplying register data. Acked-by: Mark Rutland Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250609-kselftest-arm64-ssve-fixups-v2-3-998fcfa6f240@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 7e259907805b..7f9b6a61d369 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -170,7 +170,7 @@ static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type) memset(&sve, 0, sizeof(sve)); sve.size = sizeof(sve); sve.vl = sve_vl_from_vq(SVE_VQ_MIN); - sve.flags = SVE_PT_VL_INHERIT; + sve.flags = SVE_PT_VL_INHERIT | SVE_PT_REGS_SVE; ret = set_sve(child, type, &sve); if (ret != 0) { ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n", @@ -235,6 +235,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, /* Set the VL by doing a set with no register payload */ memset(&sve, 0, sizeof(sve)); sve.size = sizeof(sve); + sve.flags = SVE_PT_REGS_SVE; sve.vl = vl; ret = set_sve(child, type, &sve); if (ret != 0) { From d3a80c5109a358943bde903f7dea3be469377ea5 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 13 Jun 2025 08:06:45 +0530 Subject: [PATCH 30/67] arm64/debug: Drop redundant DBG_MDSCR_* macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MDSCR_EL1 has already been defined in tools sysreg format and hence can be used in all debug monitor related call paths. But using generated sysreg definitions causes build warnings because there is a mismatch between mdscr variable (u32) and GENMASK() based masks (long unsigned int). Convert all variables handling MDSCR_EL1 register as u64 which also reflects its true width as well. -------------------------------------------------------------------------- arch/arm64/kernel/debug-monitors.c: In function ‘disable_debug_monitors’: arch/arm64/kernel/debug-monitors.c:108:13: warning: conversion from ‘long unsigned int’ to ‘u32’ {aka ‘unsigned int’} changes value from ‘18446744073709518847’ to ‘4294934527’ [-Woverflow] 108 | disable = ~MDSCR_EL1_MDE; | ^ -------------------------------------------------------------------------- While here, replace an open encoding with MDSCR_EL1_TDCC in __cpu_setup(). Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Ada Couprie Diaz Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20250613023646.1215700-2-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 4 ++-- arch/arm64/include/asm/debug-monitors.h | 6 ------ arch/arm64/kernel/debug-monitors.c | 22 +++++++++++----------- arch/arm64/kernel/entry-common.c | 4 ++-- arch/arm64/mm/proc.S | 2 +- 5 files changed, 16 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ad63457a05c5..f229d96616e5 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -53,7 +53,7 @@ .macro disable_step_tsk, flgs, tmp tbz \flgs, #TIF_SINGLESTEP, 9990f mrs \tmp, mdscr_el1 - bic \tmp, \tmp, #DBG_MDSCR_SS + bic \tmp, \tmp, #MDSCR_EL1_SS msr mdscr_el1, \tmp isb // Take effect before a subsequent clear of DAIF.D 9990: @@ -63,7 +63,7 @@ .macro enable_step_tsk, flgs, tmp tbz \flgs, #TIF_SINGLESTEP, 9990f mrs \tmp, mdscr_el1 - orr \tmp, \tmp, #DBG_MDSCR_SS + orr \tmp, \tmp, #MDSCR_EL1_SS msr mdscr_el1, \tmp 9990: .endm diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 8f6ba31b8658..1f37dd01482b 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -13,14 +13,8 @@ #include /* Low-level stepping controls. */ -#define DBG_MDSCR_SS (1 << 0) #define DBG_SPSR_SS (1 << 21) -/* MDSCR_EL1 enabling bits */ -#define DBG_MDSCR_KDE (1 << 13) -#define DBG_MDSCR_MDE (1 << 15) -#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) - #define DBG_ESR_EVT(x) (((x) >> 27) & 0x7) /* AArch64 */ diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 58f047de3e1c..08f1d02507cd 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -34,7 +34,7 @@ u8 debug_monitors_arch(void) /* * MDSCR access routines. */ -static void mdscr_write(u32 mdscr) +static void mdscr_write(u64 mdscr) { unsigned long flags; flags = local_daif_save(); @@ -43,7 +43,7 @@ static void mdscr_write(u32 mdscr) } NOKPROBE_SYMBOL(mdscr_write); -static u32 mdscr_read(void) +static u64 mdscr_read(void) { return read_sysreg(mdscr_el1); } @@ -79,16 +79,16 @@ static DEFINE_PER_CPU(int, kde_ref_count); void enable_debug_monitors(enum dbg_active_el el) { - u32 mdscr, enable = 0; + u64 mdscr, enable = 0; WARN_ON(preemptible()); if (this_cpu_inc_return(mde_ref_count) == 1) - enable = DBG_MDSCR_MDE; + enable = MDSCR_EL1_MDE; if (el == DBG_ACTIVE_EL1 && this_cpu_inc_return(kde_ref_count) == 1) - enable |= DBG_MDSCR_KDE; + enable |= MDSCR_EL1_KDE; if (enable && debug_enabled) { mdscr = mdscr_read(); @@ -100,16 +100,16 @@ NOKPROBE_SYMBOL(enable_debug_monitors); void disable_debug_monitors(enum dbg_active_el el) { - u32 mdscr, disable = 0; + u64 mdscr, disable = 0; WARN_ON(preemptible()); if (this_cpu_dec_return(mde_ref_count) == 0) - disable = ~DBG_MDSCR_MDE; + disable = ~MDSCR_EL1_MDE; if (el == DBG_ACTIVE_EL1 && this_cpu_dec_return(kde_ref_count) == 0) - disable &= ~DBG_MDSCR_KDE; + disable &= ~MDSCR_EL1_KDE; if (disable) { mdscr = mdscr_read(); @@ -415,7 +415,7 @@ void kernel_enable_single_step(struct pt_regs *regs) { WARN_ON(!irqs_disabled()); set_regs_spsr_ss(regs); - mdscr_write(mdscr_read() | DBG_MDSCR_SS); + mdscr_write(mdscr_read() | MDSCR_EL1_SS); enable_debug_monitors(DBG_ACTIVE_EL1); } NOKPROBE_SYMBOL(kernel_enable_single_step); @@ -423,7 +423,7 @@ NOKPROBE_SYMBOL(kernel_enable_single_step); void kernel_disable_single_step(void) { WARN_ON(!irqs_disabled()); - mdscr_write(mdscr_read() & ~DBG_MDSCR_SS); + mdscr_write(mdscr_read() & ~MDSCR_EL1_SS); disable_debug_monitors(DBG_ACTIVE_EL1); } NOKPROBE_SYMBOL(kernel_disable_single_step); @@ -431,7 +431,7 @@ NOKPROBE_SYMBOL(kernel_disable_single_step); int kernel_active_single_step(void) { WARN_ON(!irqs_disabled()); - return mdscr_read() & DBG_MDSCR_SS; + return mdscr_read() & MDSCR_EL1_SS; } NOKPROBE_SYMBOL(kernel_active_single_step); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 7c1970b341b8..171f93f2494b 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -344,7 +344,7 @@ static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); static void cortex_a76_erratum_1463225_svc_handler(void) { - u32 reg, val; + u64 reg, val; if (!unlikely(test_thread_flag(TIF_SINGLESTEP))) return; @@ -354,7 +354,7 @@ static void cortex_a76_erratum_1463225_svc_handler(void) __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1); reg = read_sysreg(mdscr_el1); - val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE; + val = reg | MDSCR_EL1_SS | MDSCR_EL1_KDE; write_sysreg(val, mdscr_el1); asm volatile("msr daifclr, #8"); isb(); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 80d470aa469d..eabf97c71dbf 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -454,7 +454,7 @@ SYM_FUNC_START(__cpu_setup) dsb nsh msr cpacr_el1, xzr // Reset cpacr_el1 - mov x1, #1 << 12 // Reset mdscr_el1 and disable + mov x1, MDSCR_EL1_TDCC // Reset mdscr_el1 and disable msr mdscr_el1, x1 // access to the DCC from EL0 reset_pmuserenr_el0 x1 // Disable PMU access from EL0 reset_amuserenr_el0 x1 // Disable AMU access from EL0 From 30ff3c981e48b37a93249a96675b450469ac13a6 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 13 Jun 2025 08:06:46 +0530 Subject: [PATCH 31/67] KVM: selftests: Change MDSCR_EL1 register holding variables as uint64_t Change MDSCR_EL1 register holding local variables as uint64_t that reflects its true register width as well. Cc: Oliver Upton Cc: Joey Gouly Cc: kvm@vger.kernel.org Cc: kvmarm@lists.linux.dev Cc: linux-kernel@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Acked-by: Marc Zyngier Reviewed-by: Ada Couprie Diaz Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20250613023646.1215700-3-anshuman.khandual@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/kvm/arm64/debug-exceptions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index c7fb55c9135b..e34963956fbc 100644 --- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -140,7 +140,7 @@ static void enable_os_lock(void) static void enable_monitor_debug_exceptions(void) { - uint32_t mdscr; + uint64_t mdscr; asm volatile("msr daifclr, #8"); @@ -223,7 +223,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, static void install_ss(void) { - uint32_t mdscr; + uint64_t mdscr; asm volatile("msr daifclr, #8"); From 093ae7a033cfde536db997e7dc4e829ce65fb38a Mon Sep 17 00:00:00 2001 From: Xavier Xia Date: Tue, 24 Jun 2025 23:25:49 +0800 Subject: [PATCH 32/67] arm64/mm: Optimize loop to reduce redundant operations of contpte_ptep_get This commit optimizes the contpte_ptep_get and contpte_ptep_get_lockless function by adding early termination logic. It checks if the dirty and young bits of orig_pte are already set and skips redundant bit-setting operations during the loop. This reduces unnecessary iterations and improves performance. In order to verify the optimization performance, a test function has been designed. The function's execution time and instruction statistics have been traced using perf, and the following are the operation results on a certain Qualcomm mobile phone chip: Test Code: #include #include #include #define PAGE_SIZE 4096 #define CONT_PTES 16 #define TEST_SIZE (4096* CONT_PTES * PAGE_SIZE) #define YOUNG_BIT 8 void rwdata(char *buf) { for (size_t i = 0; i < TEST_SIZE; i += PAGE_SIZE) { buf[i] = 'a'; volatile char c = buf[i]; } } void clear_young_dirty(char *buf) { if (madvise(buf, TEST_SIZE, MADV_FREE) == -1) { perror("madvise free failed"); free(buf); exit(EXIT_FAILURE); } if (madvise(buf, TEST_SIZE, MADV_COLD) == -1) { perror("madvise free failed"); free(buf); exit(EXIT_FAILURE); } } void set_one_young(char *buf) { for (size_t i = 0; i < TEST_SIZE; i += CONT_PTES * PAGE_SIZE) { volatile char c = buf[i + YOUNG_BIT * PAGE_SIZE]; } } void test_contpte_perf() { char *buf; int ret = posix_memalign((void **)&buf, CONT_PTES * PAGE_SIZE, TEST_SIZE); if ((ret != 0) || ((unsigned long)buf % CONT_PTES * PAGE_SIZE)) { perror("posix_memalign failed"); exit(EXIT_FAILURE); } rwdata(buf); #if TEST_CASE2 || TEST_CASE3 clear_young_dirty(buf); #endif #if TEST_CASE2 set_one_young(buf); #endif for (int j = 0; j < 500; j++) { mlock(buf, TEST_SIZE); munlock(buf, TEST_SIZE); } free(buf); } int main(void) { test_contpte_perf(); return 0; } Descriptions of three test scenarios Scenario 1 The data of all 16 PTEs are both dirty and young. #define TEST_CASE2 0 #define TEST_CASE3 0 Scenario 2 Among the 16 PTEs, only the 8th one is young, and there are no dirty ones. #define TEST_CASE2 1 #define TEST_CASE3 0 Scenario 3 Among the 16 PTEs, there are neither young nor dirty ones. #define TEST_CASE2 0 #define TEST_CASE3 1 Test results |Scenario 1 | Original| Optimized| |-------------------|---------------|----------------| |instructions | 37912436160| 18731580031| |test time | 4.2797| 2.2949| |overhead of | | | |contpte_ptep_get() | 21.31%| 4.80%| |Scenario 2 | Original| Optimized| |-------------------|---------------|----------------| |instructions | 36701270862| 36115790086| |test time | 3.2335| 3.0874| |Overhead of | | | |contpte_ptep_get() | 32.26%| 33.57%| |Scenario 3 | Original| Optimized| |-------------------|---------------|----------------| |instructions | 36706279735| 36750881878| |test time | 3.2008| 3.1249| |Overhead of | | | |contpte_ptep_get() | 31.94%| 34.59%| For Scenario 1, optimized code can achieve an instruction benefit of 50.59% and a time benefit of 46.38%. For Scenario 2, optimized code can achieve an instruction count benefit of 1.6% and a time benefit of 4.5%. For Scenario 3, since all the PTEs have neither the young nor the dirty flag, the branches taken by optimized code should be the same as those of the original code. In fact, the test results of optimized code seem to be closer to those of the original code. Ryan re-ran these tests on Apple M2 with 4K base pages + 64K mTHP. Scenario 1: reduced to 56% of baseline execution time Scenario 2: reduced to 89% of baseline execution time Scenario 3: reduced to 91% of baseline execution time It can be proven through test function that the optimization for contpte_ptep_get is effective. Since the logic of contpte_ptep_get_lockless is similar to that of contpte_ptep_get, the same optimization scheme is also adopted for it. Reviewed-by: Ryan Roberts Tested-by: Ryan Roberts Reviewed-by: Barry Song Signed-off-by: Xavier Xia Link: https://lore.kernel.org/r/20250624152549.2647828-1-xavier.qyxia@gmail.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/contpte.c | 72 +++++++++++++++++++++++++++++++++++------ 1 file changed, 63 insertions(+), 9 deletions(-) diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c index bcac4f55f9c1..71efe7dff0ad 100644 --- a/arch/arm64/mm/contpte.c +++ b/arch/arm64/mm/contpte.c @@ -169,17 +169,46 @@ pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte) for (i = 0; i < CONT_PTES; i++, ptep++) { pte = __ptep_get(ptep); - if (pte_dirty(pte)) + if (pte_dirty(pte)) { orig_pte = pte_mkdirty(orig_pte); + for (; i < CONT_PTES; i++, ptep++) { + pte = __ptep_get(ptep); + if (pte_young(pte)) { + orig_pte = pte_mkyoung(orig_pte); + break; + } + } + break; + } - if (pte_young(pte)) + if (pte_young(pte)) { orig_pte = pte_mkyoung(orig_pte); + i++; + ptep++; + for (; i < CONT_PTES; i++, ptep++) { + pte = __ptep_get(ptep); + if (pte_dirty(pte)) { + orig_pte = pte_mkdirty(orig_pte); + break; + } + } + break; + } } return orig_pte; } EXPORT_SYMBOL_GPL(contpte_ptep_get); +static inline bool contpte_is_consistent(pte_t pte, unsigned long pfn, + pgprot_t orig_prot) +{ + pgprot_t prot = pte_pgprot(pte_mkold(pte_mkclean(pte))); + + return pte_valid_cont(pte) && pte_pfn(pte) == pfn && + pgprot_val(prot) == pgprot_val(orig_prot); +} + pte_t contpte_ptep_get_lockless(pte_t *orig_ptep) { /* @@ -202,7 +231,6 @@ pte_t contpte_ptep_get_lockless(pte_t *orig_ptep) pgprot_t orig_prot; unsigned long pfn; pte_t orig_pte; - pgprot_t prot; pte_t *ptep; pte_t pte; int i; @@ -219,18 +247,44 @@ pte_t contpte_ptep_get_lockless(pte_t *orig_ptep) for (i = 0; i < CONT_PTES; i++, ptep++, pfn++) { pte = __ptep_get(ptep); - prot = pte_pgprot(pte_mkold(pte_mkclean(pte))); - if (!pte_valid_cont(pte) || - pte_pfn(pte) != pfn || - pgprot_val(prot) != pgprot_val(orig_prot)) + if (!contpte_is_consistent(pte, pfn, orig_prot)) goto retry; - if (pte_dirty(pte)) + if (pte_dirty(pte)) { orig_pte = pte_mkdirty(orig_pte); + for (; i < CONT_PTES; i++, ptep++, pfn++) { + pte = __ptep_get(ptep); - if (pte_young(pte)) + if (!contpte_is_consistent(pte, pfn, orig_prot)) + goto retry; + + if (pte_young(pte)) { + orig_pte = pte_mkyoung(orig_pte); + break; + } + } + break; + } + + if (pte_young(pte)) { orig_pte = pte_mkyoung(orig_pte); + i++; + ptep++; + pfn++; + for (; i < CONT_PTES; i++, ptep++, pfn++) { + pte = __ptep_get(ptep); + + if (!contpte_is_consistent(pte, pfn, orig_prot)) + goto retry; + + if (pte_dirty(pte)) { + orig_pte = pte_mkdirty(orig_pte); + break; + } + } + break; + } } return orig_pte; From ef8923e6c051a98164c2889db943df9695a39888 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 4 Jul 2025 05:47:07 -0700 Subject: [PATCH 33/67] arm64: efi: Fix KASAN false positive for EFI runtime stack KASAN reports invalid accesses during arch_stack_walk() for EFI runtime services due to vmalloc tagging[1]. The EFI runtime stack must be allocated with KASAN tags reset to avoid false positives. This patch uses arch_alloc_vmap_stack() instead of __vmalloc_node() for EFI stack allocation, which internally calls kasan_reset_tag() The changes ensure EFI runtime stacks are properly sanitized for KASAN while maintaining functional consistency. Link: https://lore.kernel.org/all/aFVVEgD0236LdrL6@gmail.com/ [1] Suggested-by: Andrey Konovalov Suggested-by: Catalin Marinas Reviewed-by: Catalin Marinas Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20250704-arm_kasan-v2-1-32ebb4fd7607@debian.org Signed-off-by: Will Deacon --- arch/arm64/kernel/efi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 3857fd7ee8d4..62230d6dd919 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -15,6 +15,7 @@ #include #include +#include static bool region_is_misaligned(const efi_memory_desc_t *md) { @@ -214,9 +215,13 @@ static int __init arm64_efi_rt_init(void) if (!efi_enabled(EFI_RUNTIME_SERVICES)) return 0; - p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL, - NUMA_NO_NODE, &&l); -l: if (!p) { + if (!IS_ENABLED(CONFIG_VMAP_STACK)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return -ENOMEM; + } + + p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE); + if (!p) { pr_warn("Failed to allocate EFI runtime stack\n"); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return -ENOMEM; From ad8b22648b7d0bc6f84230508436b1aafc2e2516 Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:40:57 +0100 Subject: [PATCH 34/67] arm64: debug: clean up single_step_handler logic Remove the unnecessary boolean which always checks if the handler was found and return early instead. Signed-off-by: Ada Couprie Diaz Tested-by: Luis Claudio R. Goncalves Reviewed-by: Anshuman Khandual Acked-by: Mark Rutland Reviewed-by: Will Deacon Link: https://lore.kernel.org/r/20250707114109.35672-2-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 58f047de3e1c..676fa0231935 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -241,8 +241,6 @@ static void send_user_sigtrap(int si_code) static int single_step_handler(unsigned long unused, unsigned long esr, struct pt_regs *regs) { - bool handler_found = false; - /* * If we are stepping a pending breakpoint, call the hw_breakpoint * handler first. @@ -250,10 +248,10 @@ static int single_step_handler(unsigned long unused, unsigned long esr, if (!reinstall_suspended_bps(regs)) return 0; - if (!handler_found && call_step_hook(regs, esr) == DBG_HOOK_HANDLED) - handler_found = true; + if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) + return 0; - if (!handler_found && user_mode(regs)) { + if (user_mode(regs)) { send_user_sigtrap(TRAP_TRACE); /* @@ -263,7 +261,7 @@ static int single_step_handler(unsigned long unused, unsigned long esr, * to the active-not-pending state). */ user_rewind_single_step(current); - } else if (!handler_found) { + } else { pr_warn("Unexpected kernel single-step exception at EL1\n"); /* * Re-enable stepping since we know that we will be From b1e2d95524e4d0f5b643394c739212869e95cf6a Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:40:58 +0100 Subject: [PATCH 35/67] arm64: refactor aarch32_break_handler() `aarch32_break_handler()` is called in `do_el0_undef()` when we are trying to handle an exception whose Exception Syndrome is unknown. It checks if the instruction hit might be a 32-bit arm break (be it A32 or T2), and sends a SIGTRAP to userspace if it is so that it can be handled. However, this is badly represented in the naming of the function, and is not consistent with the other functions called with the same logic in `do_el0_undef()`. Rename it `try_handle_aarch32_break()` and change the return value to a boolean to align with the logic of the other tentative handlers in `do_el0_undef()`, the previous error code being ignored anyway. Signed-off-by: Ada Couprie Diaz Tested-by: Luis Claudio R. Goncalves Reviewed-by: Anshuman Khandual Acked-by: Mark Rutland Reviewed-by: Will Deacon Link: https://lore.kernel.org/r/20250707114109.35672-3-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 2 +- arch/arm64/kernel/debug-monitors.c | 10 +++++----- arch/arm64/kernel/traps.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 8f6ba31b8658..e1caf6a8380c 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -116,7 +116,7 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) } #endif -int aarch32_break_handler(struct pt_regs *regs); +bool try_handle_aarch32_break(struct pt_regs *regs); void debug_traps_init(void); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 676fa0231935..9e69f2e915e8 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -334,7 +334,7 @@ static int brk_handler(unsigned long unused, unsigned long esr, } NOKPROBE_SYMBOL(brk_handler); -int aarch32_break_handler(struct pt_regs *regs) +bool try_handle_aarch32_break(struct pt_regs *regs) { u32 arm_instr; u16 thumb_instr; @@ -342,7 +342,7 @@ int aarch32_break_handler(struct pt_regs *regs) void __user *pc = (void __user *)instruction_pointer(regs); if (!compat_user_mode(regs)) - return -EFAULT; + return false; if (compat_thumb_mode(regs)) { /* get 16-bit Thumb instruction */ @@ -366,12 +366,12 @@ int aarch32_break_handler(struct pt_regs *regs) } if (!bp) - return -EFAULT; + return false; send_user_sigtrap(TRAP_BRKPT); - return 0; + return true; } -NOKPROBE_SYMBOL(aarch32_break_handler); +NOKPROBE_SYMBOL(try_handle_aarch32_break); void __init debug_traps_init(void) { diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 9bfa5c944379..685c11b2c553 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -454,7 +454,7 @@ void do_el0_undef(struct pt_regs *regs, unsigned long esr) u32 insn; /* check for AArch32 breakpoint instructions */ - if (!aarch32_break_handler(regs)) + if (try_handle_aarch32_break(regs)) return; if (user_insn_read(regs, &insn)) From 6adfdc5e2ef9c71a76d8d127a2eb54f0fbe9be5e Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:40:59 +0100 Subject: [PATCH 36/67] arm64: debug: call software breakpoint handlers statically Software breakpoints pass an immediate value in ESR ("comment") that can be used to call a specialized handler (KGDB, KASAN...). We do so in two different ways : - During early boot, `early_brk64` statically checks against known immediates and calls the corresponding handler, - During init, handlers are dynamically registered into a list. When called, the generic software breakpoint handler will iterate over the list to find the appropriate handler. The dynamic registration does not provide any benefit here as it is not exported and all its uses are within the arm64 tree. It also depends on an RCU list, whose safe access currently relies on the non-preemptible state of `do_debug_exception`. Replace the list iteration logic in `call_break_hooks` to call the breakpoint handlers statically if they are enabled, like in `early_brk64`. Expose the handlers in their respective headers to be reachable from `arch/arm64/kernel/debug-monitors.c` at link time. Unify the naming of the software breakpoint handlers to XXX_brk_handler(), making it clear they are related and to differentiate from the hardware breakpoints. Signed-off-by: Ada Couprie Diaz Tested-by: Luis Claudio R. Goncalves Reviewed-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707114109.35672-4-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/kgdb.h | 3 + arch/arm64/include/asm/kprobes.h | 8 +++ arch/arm64/include/asm/traps.h | 6 ++ arch/arm64/include/asm/uprobes.h | 2 + arch/arm64/kernel/debug-monitors.c | 54 +++++++++++++---- arch/arm64/kernel/kgdb.c | 22 ++----- arch/arm64/kernel/probes/kprobes.c | 31 ++-------- arch/arm64/kernel/probes/kprobes_trampoline.S | 2 +- arch/arm64/kernel/probes/uprobes.c | 9 +-- arch/arm64/kernel/traps.c | 59 ++++--------------- 10 files changed, 83 insertions(+), 113 deletions(-) diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h index 21fc85e9d2be..82a76b2102fb 100644 --- a/arch/arm64/include/asm/kgdb.h +++ b/arch/arm64/include/asm/kgdb.h @@ -24,6 +24,9 @@ static inline void arch_kgdb_breakpoint(void) extern void kgdb_handle_bus_error(void); extern int kgdb_fault_expected; +int kgdb_brk_handler(struct pt_regs *regs, unsigned long esr); +int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr); + #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index be7a3680dadf..f2782560647b 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -41,4 +41,12 @@ void __kretprobe_trampoline(void); void __kprobes *trampoline_probe_handler(struct pt_regs *regs); #endif /* CONFIG_KPROBES */ + +int __kprobes kprobe_brk_handler(struct pt_regs *regs, + unsigned long esr); +int __kprobes kprobe_ss_brk_handler(struct pt_regs *regs, + unsigned long esr); +int __kprobes kretprobe_brk_handler(struct pt_regs *regs, + unsigned long esr); + #endif /* _ARM_KPROBES_H */ diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 82cf1f879c61..e3e8944a71c3 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -29,6 +29,12 @@ void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey); void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); +int bug_brk_handler(struct pt_regs *regs, unsigned long esr); +int cfi_brk_handler(struct pt_regs *regs, unsigned long esr); +int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr); +int kasan_brk_handler(struct pt_regs *regs, unsigned long esr); +int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr); + int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs); /* diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 014b02897f8e..3659a79a9f32 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -28,4 +28,6 @@ struct arch_uprobe { bool simulate; }; +int uprobe_brk_handler(struct pt_regs *regs, unsigned long esr); + #endif diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 9e69f2e915e8..15d7158a7548 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -21,8 +21,11 @@ #include #include #include +#include +#include #include #include +#include /* Determine debug architecture. */ u8 debug_monitors_arch(void) @@ -299,20 +302,47 @@ void unregister_kernel_break_hook(struct break_hook *hook) static int call_break_hook(struct pt_regs *regs, unsigned long esr) { - struct break_hook *hook; - struct list_head *list; - - list = user_mode(regs) ? &user_break_hook : &kernel_break_hook; - - /* - * Since brk exception disables interrupt, this function is - * entirely not preemptible, and we can use rcu list safely here. - */ - list_for_each_entry_rcu(hook, list, node) { - if ((esr_brk_comment(esr) & ~hook->mask) == hook->imm) - return hook->fn(regs, esr); + if (user_mode(regs)) { + if (IS_ENABLED(CONFIG_UPROBES) && + esr_brk_comment(esr) == UPROBES_BRK_IMM) + return uprobe_brk_handler(regs, esr); + return DBG_HOOK_ERROR; } + if (esr_brk_comment(esr) == BUG_BRK_IMM) + return bug_brk_handler(regs, esr); + + if (IS_ENABLED(CONFIG_CFI_CLANG) && esr_is_cfi_brk(esr)) + return cfi_brk_handler(regs, esr); + + if (esr_brk_comment(esr) == FAULT_BRK_IMM) + return reserved_fault_brk_handler(regs, esr); + + if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) && + (esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) + return kasan_brk_handler(regs, esr); + + if (IS_ENABLED(CONFIG_UBSAN_TRAP) && esr_is_ubsan_brk(esr)) + return ubsan_brk_handler(regs, esr); + + if (IS_ENABLED(CONFIG_KGDB)) { + if (esr_brk_comment(esr) == KGDB_DYN_DBG_BRK_IMM) + return kgdb_brk_handler(regs, esr); + if (esr_brk_comment(esr) == KGDB_COMPILED_DBG_BRK_IMM) + return kgdb_compiled_brk_handler(regs, esr); + } + + if (IS_ENABLED(CONFIG_KPROBES)) { + if (esr_brk_comment(esr) == KPROBES_BRK_IMM) + return kprobe_brk_handler(regs, esr); + if (esr_brk_comment(esr) == KPROBES_BRK_SS_IMM) + return kprobe_ss_brk_handler(regs, esr); + } + + if (IS_ENABLED(CONFIG_KRETPROBES) && + esr_brk_comment(esr) == KRETPROBES_BRK_IMM) + return kretprobe_brk_handler(regs, esr); + return DBG_HOOK_ERROR; } NOKPROBE_SYMBOL(call_break_hook); diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index f3c4d3a8a20f..b5a3b9c85a65 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -234,21 +234,21 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, return err; } -static int kgdb_brk_fn(struct pt_regs *regs, unsigned long esr) +int kgdb_brk_handler(struct pt_regs *regs, unsigned long esr) { kgdb_handle_exception(1, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } -NOKPROBE_SYMBOL(kgdb_brk_fn) +NOKPROBE_SYMBOL(kgdb_brk_handler) -static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned long esr) +int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr) { compiled_break = 1; kgdb_handle_exception(1, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } -NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); +NOKPROBE_SYMBOL(kgdb_compiled_brk_handler); static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr) { @@ -260,16 +260,6 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr) } NOKPROBE_SYMBOL(kgdb_step_brk_fn); -static struct break_hook kgdb_brkpt_hook = { - .fn = kgdb_brk_fn, - .imm = KGDB_DYN_DBG_BRK_IMM, -}; - -static struct break_hook kgdb_compiled_brkpt_hook = { - .fn = kgdb_compiled_brk_fn, - .imm = KGDB_COMPILED_DBG_BRK_IMM, -}; - static struct step_hook kgdb_step_hook = { .fn = kgdb_step_brk_fn }; @@ -316,8 +306,6 @@ int kgdb_arch_init(void) if (ret != 0) return ret; - register_kernel_break_hook(&kgdb_brkpt_hook); - register_kernel_break_hook(&kgdb_compiled_brkpt_hook); register_kernel_step_hook(&kgdb_step_hook); return 0; } @@ -329,8 +317,6 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { - unregister_kernel_break_hook(&kgdb_brkpt_hook); - unregister_kernel_break_hook(&kgdb_compiled_brkpt_hook); unregister_kernel_step_hook(&kgdb_step_hook); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d9e462eafb95..0c5d408afd95 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -292,8 +292,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) return 0; } -static int __kprobes -kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr) +int __kprobes +kprobe_brk_handler(struct pt_regs *regs, unsigned long esr) { struct kprobe *p, *cur_kprobe; struct kprobe_ctlblk *kcb; @@ -336,13 +336,8 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_HANDLED; } -static struct break_hook kprobes_break_hook = { - .imm = KPROBES_BRK_IMM, - .fn = kprobe_breakpoint_handler, -}; - -static int __kprobes -kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr) +int __kprobes +kprobe_ss_brk_handler(struct pt_regs *regs, unsigned long esr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long addr = instruction_pointer(regs); @@ -360,13 +355,8 @@ kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_ERROR; } -static struct break_hook kprobes_break_ss_hook = { - .imm = KPROBES_BRK_SS_IMM, - .fn = kprobe_breakpoint_ss_handler, -}; - -static int __kprobes -kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr) +int __kprobes +kretprobe_brk_handler(struct pt_regs *regs, unsigned long esr) { if (regs->pc != (unsigned long)__kretprobe_trampoline) return DBG_HOOK_ERROR; @@ -375,11 +365,6 @@ kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_HANDLED; } -static struct break_hook kretprobes_break_hook = { - .imm = KRETPROBES_BRK_IMM, - .fn = kretprobe_breakpoint_handler, -}; - /* * Provide a blacklist of symbols identifying ranges which cannot be kprobed. * This blacklist is exposed to userspace via debugfs (kprobes/blacklist). @@ -422,9 +407,5 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p) int __init arch_init_kprobes(void) { - register_kernel_break_hook(&kprobes_break_hook); - register_kernel_break_hook(&kprobes_break_ss_hook); - register_kernel_break_hook(&kretprobes_break_hook); - return 0; } diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S index a362f3dbb3d1..b60739d3983f 100644 --- a/arch/arm64/kernel/probes/kprobes_trampoline.S +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -12,7 +12,7 @@ SYM_CODE_START(__kretprobe_trampoline) /* * Trigger a breakpoint exception. The PC will be adjusted by - * kretprobe_breakpoint_handler(), and no subsequent instructions will + * kretprobe_brk_handler(), and no subsequent instructions will * be executed from the trampoline. */ brk #KRETPROBES_BRK_IMM diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index cb3d05af36e3..ad68b4a5974d 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -173,7 +173,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, return NOTIFY_DONE; } -static int uprobe_breakpoint_handler(struct pt_regs *regs, +int uprobe_brk_handler(struct pt_regs *regs, unsigned long esr) { if (uprobe_pre_sstep_notifier(regs)) @@ -194,12 +194,6 @@ static int uprobe_single_step_handler(struct pt_regs *regs, return DBG_HOOK_ERROR; } -/* uprobe breakpoint handler hook */ -static struct break_hook uprobes_break_hook = { - .imm = UPROBES_BRK_IMM, - .fn = uprobe_breakpoint_handler, -}; - /* uprobe single step handler hook */ static struct step_hook uprobes_step_hook = { .fn = uprobe_single_step_handler, @@ -207,7 +201,6 @@ static struct step_hook uprobes_step_hook = { static int __init arch_init_uprobes(void) { - register_user_break_hook(&uprobes_break_hook); register_user_step_hook(&uprobes_step_hook); return 0; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 685c11b2c553..654c8ea46ec6 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -987,7 +987,7 @@ void do_serror(struct pt_regs *regs, unsigned long esr) int is_valid_bugaddr(unsigned long addr) { /* - * bug_handler() only called for BRK #BUG_BRK_IMM. + * bug_brk_handler() only called for BRK #BUG_BRK_IMM. * So the answer is trivial -- any spurious instances with no * bug table entry will be rejected by report_bug() and passed * back to the debug-monitors code and handled as a fatal @@ -997,7 +997,7 @@ int is_valid_bugaddr(unsigned long addr) } #endif -static int bug_handler(struct pt_regs *regs, unsigned long esr) +int bug_brk_handler(struct pt_regs *regs, unsigned long esr) { switch (report_bug(regs->pc, regs)) { case BUG_TRAP_TYPE_BUG: @@ -1017,13 +1017,8 @@ static int bug_handler(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_HANDLED; } -static struct break_hook bug_break_hook = { - .fn = bug_handler, - .imm = BUG_BRK_IMM, -}; - #ifdef CONFIG_CFI_CLANG -static int cfi_handler(struct pt_regs *regs, unsigned long esr) +int cfi_brk_handler(struct pt_regs *regs, unsigned long esr) { unsigned long target; u32 type; @@ -1046,15 +1041,9 @@ static int cfi_handler(struct pt_regs *regs, unsigned long esr) arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); return DBG_HOOK_HANDLED; } - -static struct break_hook cfi_break_hook = { - .fn = cfi_handler, - .imm = CFI_BRK_IMM_BASE, - .mask = CFI_BRK_IMM_MASK, -}; #endif /* CONFIG_CFI_CLANG */ -static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr) +int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr) { pr_err("%s generated an invalid instruction at %pS!\n", "Kernel text patching", @@ -1064,11 +1053,6 @@ static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_ERROR; } -static struct break_hook fault_break_hook = { - .fn = reserved_fault_handler, - .imm = FAULT_BRK_IMM, -}; - #ifdef CONFIG_KASAN_SW_TAGS #define KASAN_ESR_RECOVER 0x20 @@ -1076,7 +1060,7 @@ static struct break_hook fault_break_hook = { #define KASAN_ESR_SIZE_MASK 0x0f #define KASAN_ESR_SIZE(esr) (1 << ((esr) & KASAN_ESR_SIZE_MASK)) -static int kasan_handler(struct pt_regs *regs, unsigned long esr) +int kasan_brk_handler(struct pt_regs *regs, unsigned long esr) { bool recover = esr & KASAN_ESR_RECOVER; bool write = esr & KASAN_ESR_WRITE; @@ -1107,26 +1091,14 @@ static int kasan_handler(struct pt_regs *regs, unsigned long esr) arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); return DBG_HOOK_HANDLED; } - -static struct break_hook kasan_break_hook = { - .fn = kasan_handler, - .imm = KASAN_BRK_IMM, - .mask = KASAN_BRK_MASK, -}; #endif #ifdef CONFIG_UBSAN_TRAP -static int ubsan_handler(struct pt_regs *regs, unsigned long esr) +int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr) { die(report_ubsan_failure(esr & UBSAN_BRK_MASK), regs, esr); return DBG_HOOK_HANDLED; } - -static struct break_hook ubsan_break_hook = { - .fn = ubsan_handler, - .imm = UBSAN_BRK_IMM, - .mask = UBSAN_BRK_MASK, -}; #endif /* @@ -1138,31 +1110,20 @@ int __init early_brk64(unsigned long addr, unsigned long esr, { #ifdef CONFIG_CFI_CLANG if (esr_is_cfi_brk(esr)) - return cfi_handler(regs, esr) != DBG_HOOK_HANDLED; + return cfi_brk_handler(regs, esr) != DBG_HOOK_HANDLED; #endif #ifdef CONFIG_KASAN_SW_TAGS if ((esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) - return kasan_handler(regs, esr) != DBG_HOOK_HANDLED; + return kasan_brk_handler(regs, esr) != DBG_HOOK_HANDLED; #endif #ifdef CONFIG_UBSAN_TRAP if (esr_is_ubsan_brk(esr)) - return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED; + return ubsan_brk_handler(regs, esr) != DBG_HOOK_HANDLED; #endif - return bug_handler(regs, esr) != DBG_HOOK_HANDLED; + return bug_brk_handler(regs, esr) != DBG_HOOK_HANDLED; } void __init trap_init(void) { - register_kernel_break_hook(&bug_break_hook); -#ifdef CONFIG_CFI_CLANG - register_kernel_break_hook(&cfi_break_hook); -#endif - register_kernel_break_hook(&fault_break_hook); -#ifdef CONFIG_KASAN_SW_TAGS - register_kernel_break_hook(&kasan_break_hook); -#endif -#ifdef CONFIG_UBSAN_TRAP - register_kernel_break_hook(&ubsan_break_hook); -#endif debug_traps_init(); } From 403b48aad5b3e857b8c2576ce6a421f3d23dd6a6 Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:41:00 +0100 Subject: [PATCH 37/67] arm64: debug: call step handlers statically Software stepping checks for the correct handler by iterating over a list of dynamically registered handlers and calling all of them until one handles the exception. This is the only generic way to handle software stepping handlers in arm64 as the exception does not provide an immediate that could be checked, contrary to software breakpoints. However, the registration mechanism is not exported and has only two current users : the KGDB stepping handler, and the uprobe single step handler. Given that one comes from user mode and the other from kernel mode, call the appropriate one by checking the source EL of the exception. Add a stand-in that returns DBG_HOOK_ERROR when the configuration options are not enabled. Remove `arch_init_uprobes()` as it is not useful anymore and is specific to arm64. Unify the naming of the handler to XXX_single_step_handler(), making it clear they are related. Signed-off-by: Ada Couprie Diaz Tested-by: Luis Claudio R. Goncalves Reviewed-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707114109.35672-5-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/kgdb.h | 9 +++++++++ arch/arm64/include/asm/uprobes.h | 9 +++++++++ arch/arm64/kernel/debug-monitors.c | 25 ++++++------------------- arch/arm64/kernel/kgdb.c | 17 +++-------------- arch/arm64/kernel/probes/uprobes.c | 15 +-------------- 5 files changed, 28 insertions(+), 47 deletions(-) diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h index 82a76b2102fb..3184f5d1e3ae 100644 --- a/arch/arm64/include/asm/kgdb.h +++ b/arch/arm64/include/asm/kgdb.h @@ -26,6 +26,15 @@ extern int kgdb_fault_expected; int kgdb_brk_handler(struct pt_regs *regs, unsigned long esr); int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr); +#ifdef CONFIG_KGDB +int kgdb_single_step_handler(struct pt_regs *regs, unsigned long esr); +#else +static inline int kgdb_single_step_handler(struct pt_regs *regs, + unsigned long esr) +{ + return DBG_HOOK_ERROR; +} +#endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 3659a79a9f32..89bfb0213a50 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -29,5 +29,14 @@ struct arch_uprobe { }; int uprobe_brk_handler(struct pt_regs *regs, unsigned long esr); +#ifdef CONFIG_UPROBES +int uprobe_single_step_handler(struct pt_regs *regs, unsigned long esr); +#else +static inline int uprobe_single_step_handler(struct pt_regs *regs, + unsigned long esr) +{ + return DBG_HOOK_ERROR; +} +#endif #endif diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 15d7158a7548..26c71b7edc26 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -200,30 +200,17 @@ void unregister_kernel_step_hook(struct step_hook *hook) } /* - * Call registered single step handlers + * Call single step handlers * There is no Syndrome info to check for determining the handler. - * So we call all the registered handlers, until the right handler is - * found which returns zero. + * However, there is only one possible handler for user and kernel modes, so + * check and call the appropriate one. */ static int call_step_hook(struct pt_regs *regs, unsigned long esr) { - struct step_hook *hook; - struct list_head *list; - int retval = DBG_HOOK_ERROR; + if (user_mode(regs)) + return uprobe_single_step_handler(regs, esr); - list = user_mode(regs) ? &user_step_hook : &kernel_step_hook; - - /* - * Since single-step exception disables interrupt, this function is - * entirely not preemptible, and we can use rcu list safely here. - */ - list_for_each_entry_rcu(hook, list, node) { - retval = hook->fn(regs, esr); - if (retval == DBG_HOOK_HANDLED) - break; - } - - return retval; + return kgdb_single_step_handler(regs, esr); } NOKPROBE_SYMBOL(call_step_hook); diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index b5a3b9c85a65..968324a79a89 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -250,7 +250,7 @@ int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr) } NOKPROBE_SYMBOL(kgdb_compiled_brk_handler); -static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr) +int kgdb_single_step_handler(struct pt_regs *regs, unsigned long esr) { if (!kgdb_single_step) return DBG_HOOK_ERROR; @@ -258,11 +258,7 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr) kgdb_handle_exception(0, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } -NOKPROBE_SYMBOL(kgdb_step_brk_fn); - -static struct step_hook kgdb_step_hook = { - .fn = kgdb_step_brk_fn -}; +NOKPROBE_SYMBOL(kgdb_single_step_handler); static int __kgdb_notify(struct die_args *args, unsigned long cmd) { @@ -301,13 +297,7 @@ static struct notifier_block kgdb_notifier = { */ int kgdb_arch_init(void) { - int ret = register_die_notifier(&kgdb_notifier); - - if (ret != 0) - return ret; - - register_kernel_step_hook(&kgdb_step_hook); - return 0; + return register_die_notifier(&kgdb_notifier); } /* @@ -317,7 +307,6 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { - unregister_kernel_step_hook(&kgdb_step_hook); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index ad68b4a5974d..1f91fd2a8187 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -182,7 +182,7 @@ int uprobe_brk_handler(struct pt_regs *regs, return DBG_HOOK_ERROR; } -static int uprobe_single_step_handler(struct pt_regs *regs, +int uprobe_single_step_handler(struct pt_regs *regs, unsigned long esr) { struct uprobe_task *utask = current->utask; @@ -194,16 +194,3 @@ static int uprobe_single_step_handler(struct pt_regs *regs, return DBG_HOOK_ERROR; } -/* uprobe single step handler hook */ -static struct step_hook uprobes_step_hook = { - .fn = uprobe_single_step_handler, -}; - -static int __init arch_init_uprobes(void) -{ - register_user_step_hook(&uprobes_step_hook); - - return 0; -} - -device_initcall(arch_init_uprobes); From d4e0b12620946a4011ad695490211fc38bf5cb42 Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:41:01 +0100 Subject: [PATCH 38/67] arm64: debug: remove break/step handler registration infrastructure Remove all infrastructure for the dynamic registration previously used by software breakpoints and stepping handlers. Signed-off-by: Ada Couprie Diaz Tested-by: Luis Claudio R. Goncalves Reviewed-by: Anshuman Khandual Reviewed-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707114109.35672-6-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 24 ---------- arch/arm64/kernel/debug-monitors.c | 63 ------------------------- 2 files changed, 87 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index e1caf6a8380c..caee1d923f9c 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -62,30 +62,6 @@ struct task_struct; #define DBG_HOOK_HANDLED 0 #define DBG_HOOK_ERROR 1 -struct step_hook { - struct list_head node; - int (*fn)(struct pt_regs *regs, unsigned long esr); -}; - -void register_user_step_hook(struct step_hook *hook); -void unregister_user_step_hook(struct step_hook *hook); - -void register_kernel_step_hook(struct step_hook *hook); -void unregister_kernel_step_hook(struct step_hook *hook); - -struct break_hook { - struct list_head node; - int (*fn)(struct pt_regs *regs, unsigned long esr); - u16 imm; - u16 mask; /* These bits are ignored when comparing with imm */ -}; - -void register_user_break_hook(struct break_hook *hook); -void unregister_user_break_hook(struct break_hook *hook); - -void register_kernel_break_hook(struct break_hook *hook); -void unregister_kernel_break_hook(struct break_hook *hook); - u8 debug_monitors_arch(void); enum dbg_active_el { diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 26c71b7edc26..46fb9359a554 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -159,46 +159,6 @@ NOKPROBE_SYMBOL(clear_user_regs_spsr_ss); #define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs) #define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs) -static DEFINE_SPINLOCK(debug_hook_lock); -static LIST_HEAD(user_step_hook); -static LIST_HEAD(kernel_step_hook); - -static void register_debug_hook(struct list_head *node, struct list_head *list) -{ - spin_lock(&debug_hook_lock); - list_add_rcu(node, list); - spin_unlock(&debug_hook_lock); - -} - -static void unregister_debug_hook(struct list_head *node) -{ - spin_lock(&debug_hook_lock); - list_del_rcu(node); - spin_unlock(&debug_hook_lock); - synchronize_rcu(); -} - -void register_user_step_hook(struct step_hook *hook) -{ - register_debug_hook(&hook->node, &user_step_hook); -} - -void unregister_user_step_hook(struct step_hook *hook) -{ - unregister_debug_hook(&hook->node); -} - -void register_kernel_step_hook(struct step_hook *hook) -{ - register_debug_hook(&hook->node, &kernel_step_hook); -} - -void unregister_kernel_step_hook(struct step_hook *hook) -{ - unregister_debug_hook(&hook->node); -} - /* * Call single step handlers * There is no Syndrome info to check for determining the handler. @@ -264,29 +224,6 @@ static int single_step_handler(unsigned long unused, unsigned long esr, } NOKPROBE_SYMBOL(single_step_handler); -static LIST_HEAD(user_break_hook); -static LIST_HEAD(kernel_break_hook); - -void register_user_break_hook(struct break_hook *hook) -{ - register_debug_hook(&hook->node, &user_break_hook); -} - -void unregister_user_break_hook(struct break_hook *hook) -{ - unregister_debug_hook(&hook->node); -} - -void register_kernel_break_hook(struct break_hook *hook) -{ - register_debug_hook(&hook->node, &kernel_break_hook); -} - -void unregister_kernel_break_hook(struct break_hook *hook) -{ - unregister_debug_hook(&hook->node); -} - static int call_break_hook(struct pt_regs *regs, unsigned long esr) { if (user_mode(regs)) { From eaff68b3286116d499a3d4e513a36d772faba587 Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:41:02 +0100 Subject: [PATCH 39/67] arm64: entry: Add entry and exit functions for debug exceptions Move the `debug_exception_enter()` and `debug_exception_exit()` functions from mm/fault.c, as they are needed to split the debug exceptions entry paths from the current unified one. Make them externally visible in include/asm/exception.h until the caller in mm/fault.c is cleaned up. Signed-off-by: Ada Couprie Diaz Tested-by: Luis Claudio R. Goncalves Reviewed-by: Anshuman Khandual Reviewed-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707114109.35672-7-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 4 ++++ arch/arm64/kernel/entry-common.c | 22 ++++++++++++++++++++++ arch/arm64/mm/fault.c | 22 ---------------------- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d48fc16584cd..e54b5466fd2c 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -80,4 +80,8 @@ void do_serror(struct pt_regs *regs, unsigned long esr); void do_signal(struct pt_regs *regs); void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far); + +void debug_exception_enter(struct pt_regs *regs); +void debug_exception_exit(struct pt_regs *regs); + #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 7c1970b341b8..3bdfa5abaf7a 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -441,6 +441,28 @@ static __always_inline void fpsimd_syscall_exit(void) __this_cpu_write(fpsimd_last_state.to_save, FP_STATE_CURRENT); } +/* + * In debug exception context, we explicitly disable preemption despite + * having interrupts disabled. + * This serves two purposes: it makes it much less likely that we would + * accidentally schedule in exception context and it will force a warning + * if we somehow manage to schedule by accident. + */ +void debug_exception_enter(struct pt_regs *regs) +{ + preempt_disable(); + + /* This code is a bit fragile. Test it. */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work"); +} +NOKPROBE_SYMBOL(debug_exception_enter); + +void debug_exception_exit(struct pt_regs *regs) +{ + preempt_enable_no_resched(); +} +NOKPROBE_SYMBOL(debug_exception_exit); + UNHANDLED(el1t, 64, sync) UNHANDLED(el1t, 64, irq) UNHANDLED(el1t, 64, fiq) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ec0a337891dd..d451d7d834f1 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -966,28 +966,6 @@ void __init hook_debug_fault_code(int nr, debug_fault_info[nr].name = name; } -/* - * In debug exception context, we explicitly disable preemption despite - * having interrupts disabled. - * This serves two purposes: it makes it much less likely that we would - * accidentally schedule in exception context and it will force a warning - * if we somehow manage to schedule by accident. - */ -static void debug_exception_enter(struct pt_regs *regs) -{ - preempt_disable(); - - /* This code is a bit fragile. Test it. */ - RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work"); -} -NOKPROBE_SYMBOL(debug_exception_enter); - -static void debug_exception_exit(struct pt_regs *regs) -{ - preempt_enable_no_resched(); -} -NOKPROBE_SYMBOL(debug_exception_exit); - void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, struct pt_regs *regs) { From 43e2ae77fcab8a01101a2e5da528b5222b338e5f Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:41:03 +0100 Subject: [PATCH 40/67] arm64: debug: split hardware breakpoint exception entry Currently all debug exceptions share common entry code and are routed to `do_debug_exception()`, which calls dynamically-registered handlers for each specific debug exception. This is unfortunate as different debug exceptions have different entry handling requirements, and it would be better to handle these distinct requirements earlier. Hardware breakpoints exceptions are generated by the hardware after user configuration. As such, they can be exploited when training branch predictors outside of the userspace VA range: they still need to call `arm64_apply_bp_hardening()` if needed to mitigate against this attack. However, they do not need to handle the Cortex-A76 erratum #1463225 as it only applies to single stepping exceptions. It does not set an address in FAR_EL1 either, only the hardware watchpoint does. As the hardware breakpoint handler only returns 0 and never triggers the call to `arm64_notify_die()`, we can call it directly from `entry-common.c`. Split the hardware breakpoint exception entry, adjust the function signature, and handling of the Cortex-A76 erratum to fit the behaviour of the exception. Move the call to `arm64_apply_bp_hardening()` to `entry-common.c` so that we can do it as early as possible, and only for the exceptions coming from EL0, where it is needed. This is safe to do as it is `noinstr`, as are all the functions it may call. `el0_ia()` and `el0_pc()` already call it this way. Signed-off-by: Ada Couprie Diaz Tested-by: Luis Claudio R. Goncalves Reviewed-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707114109.35672-8-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 5 +++++ arch/arm64/kernel/entry-common.c | 28 ++++++++++++++++++++++++++++ arch/arm64/kernel/hw_breakpoint.c | 16 ++++++---------- 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index e54b5466fd2c..8ec4e0ff49b7 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -61,6 +61,11 @@ void do_el0_gcs(struct pt_regs *regs, unsigned long esr); void do_el1_gcs(struct pt_regs *regs, unsigned long esr); void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, struct pt_regs *regs); +#ifdef CONFIG_HAVE_HW_BREAKPOINT +void do_breakpoint(unsigned long esr, struct pt_regs *regs); +#else +static inline void do_breakpoint(unsigned long esr, struct pt_regs *regs) {} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs); void do_sve_acc(unsigned long esr, struct pt_regs *regs); void do_sme_acc(unsigned long esr, struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 3bdfa5abaf7a..be2add6b4ae3 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -526,6 +526,15 @@ static void noinstr el1_mops(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); } +static void noinstr el1_breakpt(struct pt_regs *regs, unsigned long esr) +{ + arm64_enter_el1_dbg(regs); + debug_exception_enter(regs); + do_breakpoint(esr, regs); + debug_exception_exit(regs); + arm64_exit_el1_dbg(regs); +} + static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -575,6 +584,8 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) el1_mops(regs, esr); break; case ESR_ELx_EC_BREAKPT_CUR: + el1_breakpt(regs, esr); + break; case ESR_ELx_EC_SOFTSTP_CUR: case ESR_ELx_EC_WATCHPT_CUR: case ESR_ELx_EC_BRK64: @@ -769,6 +780,19 @@ static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } +static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr) +{ + if (!is_ttbr0_addr(regs->pc)) + arm64_apply_bp_hardening(); + + enter_from_user_mode(regs); + debug_exception_enter(regs); + do_breakpoint(esr, regs); + debug_exception_exit(regs); + local_daif_restore(DAIF_PROCCTX); + exit_to_user_mode(regs); +} + static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) { /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ @@ -848,6 +872,8 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) el0_gcs(regs, esr); break; case ESR_ELx_EC_BREAKPT_LOW: + el0_breakpt(regs, esr); + break; case ESR_ELx_EC_SOFTSTP_LOW: case ESR_ELx_EC_WATCHPT_LOW: case ESR_ELx_EC_BRK64: @@ -968,6 +994,8 @@ asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs) el0_cp15(regs, esr); break; case ESR_ELx_EC_BREAKPT_LOW: + el0_breakpt(regs, esr); + break; case ESR_ELx_EC_SOFTSTP_LOW: case ESR_ELx_EC_WATCHPT_LOW: case ESR_ELx_EC_BKPT32: diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 722ac45f9f7b..d7eede5d869c 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -618,8 +619,7 @@ NOKPROBE_SYMBOL(toggle_bp_registers); /* * Debug exception handlers. */ -static int breakpoint_handler(unsigned long unused, unsigned long esr, - struct pt_regs *regs) +void do_breakpoint(unsigned long esr, struct pt_regs *regs) { int i, step = 0, *kernel_step; u32 ctrl_reg; @@ -662,7 +662,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr, } if (!step) - return 0; + return; if (user_mode(regs)) { debug_info->bps_disabled = 1; @@ -670,7 +670,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr, /* If we're already stepping a watchpoint, just return. */ if (debug_info->wps_disabled) - return 0; + return; if (test_thread_flag(TIF_SINGLESTEP)) debug_info->suspended_step = 1; @@ -681,7 +681,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr, kernel_step = this_cpu_ptr(&stepping_kernel_bp); if (*kernel_step != ARM_KERNEL_STEP_NONE) - return 0; + return; if (kernel_active_single_step()) { *kernel_step = ARM_KERNEL_STEP_SUSPEND; @@ -690,10 +690,8 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr, kernel_enable_single_step(regs); } } - - return 0; } -NOKPROBE_SYMBOL(breakpoint_handler); +NOKPROBE_SYMBOL(do_breakpoint); /* * Arm64 hardware does not always report a watchpoint hit address that matches @@ -988,8 +986,6 @@ static int __init arch_hw_breakpoint_init(void) core_num_brps, core_num_wrps); /* Register debug fault handlers. */ - hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, - TRAP_HWBKPT, "hw-breakpoint handler"); hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP, TRAP_HWBKPT, "hw-watchpoint handler"); From 80691d35523de3292b64c2ffa444aab3d55e51ba Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:41:04 +0100 Subject: [PATCH 41/67] arm64: debug: refactor reinstall_suspended_bps() `reinstall_suspended_bps()` plays a key part in the stepping process when we have hardware breakpoints and watchpoints enabled. It checks if we need to step one, will re-enable it if it has been handled and will return whether or not we need to proceed with a single-step. However, the current naming and return values make it harder to understand the logic and goal of the function. Rename it `try_step_suspended_breakpoints()` and change the return value to a boolean, aligning it with similar functions used in `do_el0_undef()` like `try_emulate_mrs()`, and making its behaviour more obvious. Signed-off-by: Ada Couprie Diaz Tested-by: Luis Claudio R. Goncalves Reviewed-by: Anshuman Khandual Reviewed-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707114109.35672-9-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 6 +++--- arch/arm64/kernel/debug-monitors.c | 2 +- arch/arm64/kernel/hw_breakpoint.c | 25 ++++++++++++------------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index caee1d923f9c..586290e95d87 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -84,11 +84,11 @@ void kernel_rewind_single_step(struct pt_regs *regs); void kernel_fastforward_single_step(struct pt_regs *regs); #ifdef CONFIG_HAVE_HW_BREAKPOINT -int reinstall_suspended_bps(struct pt_regs *regs); +bool try_step_suspended_breakpoints(struct pt_regs *regs); #else -static inline int reinstall_suspended_bps(struct pt_regs *regs) +static inline bool try_step_suspended_breakpoints(struct pt_regs *regs) { - return -ENODEV; + return false; } #endif diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 46fb9359a554..89264d1a9d65 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -195,7 +195,7 @@ static int single_step_handler(unsigned long unused, unsigned long esr, * If we are stepping a pending breakpoint, call the hw_breakpoint * handler first. */ - if (!reinstall_suspended_bps(regs)) + if (try_step_suspended_breakpoints(regs)) return 0; if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index d7eede5d869c..309ae24d4548 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -847,36 +847,35 @@ NOKPROBE_SYMBOL(watchpoint_handler); /* * Handle single-step exception. */ -int reinstall_suspended_bps(struct pt_regs *regs) +bool try_step_suspended_breakpoints(struct pt_regs *regs) { struct debug_info *debug_info = ¤t->thread.debug; - int handled_exception = 0, *kernel_step; - - kernel_step = this_cpu_ptr(&stepping_kernel_bp); + int *kernel_step = this_cpu_ptr(&stepping_kernel_bp); + bool handled_exception = false; /* * Called from single-step exception handler. - * Return 0 if execution can resume, 1 if a SIGTRAP should be - * reported. + * Return true if we stepped a breakpoint and can resume execution, + * false if we need to handle a single-step. */ if (user_mode(regs)) { if (debug_info->bps_disabled) { debug_info->bps_disabled = 0; toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 1); - handled_exception = 1; + handled_exception = true; } if (debug_info->wps_disabled) { debug_info->wps_disabled = 0; toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1); - handled_exception = 1; + handled_exception = true; } if (handled_exception) { if (debug_info->suspended_step) { debug_info->suspended_step = 0; /* Allow exception handling to fall-through. */ - handled_exception = 0; + handled_exception = false; } else { user_disable_single_step(current); } @@ -890,17 +889,17 @@ int reinstall_suspended_bps(struct pt_regs *regs) if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) { kernel_disable_single_step(); - handled_exception = 1; + handled_exception = true; } else { - handled_exception = 0; + handled_exception = false; } *kernel_step = ARM_KERNEL_STEP_NONE; } - return !handled_exception; + return handled_exception; } -NOKPROBE_SYMBOL(reinstall_suspended_bps); +NOKPROBE_SYMBOL(try_step_suspended_breakpoints); /* * Context-switcher for restoring suspended breakpoints. From 0ac7584c08ceff13fc1e3082a0104548688d6b00 Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:41:05 +0100 Subject: [PATCH 42/67] arm64: debug: split single stepping exception entry Currently all debug exceptions share common entry code and are routed to `do_debug_exception()`, which calls dynamically-registered handlers for each specific debug exception. This is unfortunate as different debug exceptions have different entry handling requirements, and it would be better to handle these distinct requirements earlier. The single stepping exception has the most constraints : it can be exploited to train branch predictors and it needs special handling at EL1 for the Cortex-A76 erratum #1463225. We need to conserve all those mitigations. However, it does not write an address at FAR_EL1, as only hardware watchpoints do so. The single-step handler does its own signaling if it needs to and only returns 0, so we can call it directly from `entry-common.c`. Split the single stepping exception entry, adjust the function signature, keep the security mitigation and erratum handling. Further, as the EL0 and EL1 code paths are cleanly separated, we can split `do_softstep()` into `do_el0_softstep()` and `do_el1_softstep()` and call them directly from the relevant entry paths. We can also remove `NOKPROBE_SYMBOL` for the EL0 path, as it cannot lead to a kprobe recursion. Move the call to `arm64_apply_bp_hardening()` to `entry-common.c` so that we can do it as early as possible, and only for the exceptions coming from EL0, where it is needed. This is safe to do as it is `noinstr`, as are all the functions it may call. `el0_ia()` and `el0_pc()` already call it this way. When taking a soft-step exception from EL0, most of the single stepping handling is safely preemptible : the only possible handler is `uprobe_single_step_handler()`. It only operates on task-local data and properly checks its validity, then raises a Thread Information Flag, processed before returning to userspace in `do_notify_resume()`, which is already preemptible. However, the soft-step handler first calls `reinstall_suspended_bps()` to check if there is any hardware breakpoint or watchpoint pending or already stepped through. This cannot be preempted as it manipulates the hardware breakpoint and watchpoint registers. Move the call to `try_step_suspended_breakpoints()` to `entry-common.c` and adjust the relevant comments. We can now safely unmask interrupts before handling the step itself, fixing a PREEMPT_RT issue where the handler could call a sleeping function with preemption disabled. Signed-off-by: Ada Couprie Diaz Closes: https://lore.kernel.org/linux-arm-kernel/Z6YW_Kx4S2tmj2BP@uudg.org/ Tested-by: Luis Claudio R. Goncalves Reviewed-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707114109.35672-10-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 2 + arch/arm64/kernel/debug-monitors.c | 77 +++++++++++------------------- arch/arm64/kernel/entry-common.c | 43 +++++++++++++++++ arch/arm64/kernel/hw_breakpoint.c | 2 +- 4 files changed, 75 insertions(+), 49 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 8ec4e0ff49b7..c8e7c61b8ac4 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -66,6 +66,8 @@ void do_breakpoint(unsigned long esr, struct pt_regs *regs); #else static inline void do_breakpoint(unsigned long esr, struct pt_regs *regs) {} #endif /* CONFIG_HAVE_HW_BREAKPOINT */ +void do_el0_softstep(unsigned long esr, struct pt_regs *regs); +void do_el1_softstep(unsigned long esr, struct pt_regs *regs); void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs); void do_sve_acc(unsigned long esr, struct pt_regs *regs); void do_sme_acc(unsigned long esr, struct pt_regs *regs); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 89264d1a9d65..cb50d1f59798 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -159,21 +160,6 @@ NOKPROBE_SYMBOL(clear_user_regs_spsr_ss); #define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs) #define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs) -/* - * Call single step handlers - * There is no Syndrome info to check for determining the handler. - * However, there is only one possible handler for user and kernel modes, so - * check and call the appropriate one. - */ -static int call_step_hook(struct pt_regs *regs, unsigned long esr) -{ - if (user_mode(regs)) - return uprobe_single_step_handler(regs, esr); - - return kgdb_single_step_handler(regs, esr); -} -NOKPROBE_SYMBOL(call_step_hook); - static void send_user_sigtrap(int si_code) { struct pt_regs *regs = current_pt_regs(); @@ -188,41 +174,38 @@ static void send_user_sigtrap(int si_code) "User debug trap"); } -static int single_step_handler(unsigned long unused, unsigned long esr, - struct pt_regs *regs) +/* + * We have already unmasked interrupts and enabled preemption + * when calling do_el0_softstep() from entry-common.c. + */ +void do_el0_softstep(unsigned long esr, struct pt_regs *regs) { + if (uprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED) + return; + + send_user_sigtrap(TRAP_TRACE); /* - * If we are stepping a pending breakpoint, call the hw_breakpoint - * handler first. + * ptrace will disable single step unless explicitly + * asked to re-enable it. For other clients, it makes + * sense to leave it enabled (i.e. rewind the controls + * to the active-not-pending state). */ - if (try_step_suspended_breakpoints(regs)) - return 0; - - if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) - return 0; - - if (user_mode(regs)) { - send_user_sigtrap(TRAP_TRACE); - - /* - * ptrace will disable single step unless explicitly - * asked to re-enable it. For other clients, it makes - * sense to leave it enabled (i.e. rewind the controls - * to the active-not-pending state). - */ - user_rewind_single_step(current); - } else { - pr_warn("Unexpected kernel single-step exception at EL1\n"); - /* - * Re-enable stepping since we know that we will be - * returning to regs. - */ - set_regs_spsr_ss(regs); - } - - return 0; + user_rewind_single_step(current); } -NOKPROBE_SYMBOL(single_step_handler); + +void do_el1_softstep(unsigned long esr, struct pt_regs *regs) +{ + if (kgdb_single_step_handler(regs, esr) == DBG_HOOK_HANDLED) + return; + + pr_warn("Unexpected kernel single-step exception at EL1\n"); + /* + * Re-enable stepping since we know that we will be + * returning to regs. + */ + set_regs_spsr_ss(regs); +} +NOKPROBE_SYMBOL(do_el1_softstep); static int call_break_hook(struct pt_regs *regs, unsigned long esr) { @@ -329,8 +312,6 @@ NOKPROBE_SYMBOL(try_handle_aarch32_break); void __init debug_traps_init(void) { - hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, - TRAP_TRACE, "single-step handler"); hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, TRAP_BRKPT, "BRK handler"); } diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index be2add6b4ae3..7265bef96672 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -535,6 +535,24 @@ static void noinstr el1_breakpt(struct pt_regs *regs, unsigned long esr) arm64_exit_el1_dbg(regs); } +static void noinstr el1_softstp(struct pt_regs *regs, unsigned long esr) +{ + arm64_enter_el1_dbg(regs); + if (!cortex_a76_erratum_1463225_debug_handler(regs)) { + debug_exception_enter(regs); + /* + * After handling a breakpoint, we suspend the breakpoint + * and use single-step to move to the next instruction. + * If we are stepping a suspended breakpoint there's nothing more to do: + * the single-step is complete. + */ + if (!try_step_suspended_breakpoints(regs)) + do_el1_softstep(esr, regs); + debug_exception_exit(regs); + } + arm64_exit_el1_dbg(regs); +} + static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -587,6 +605,8 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) el1_breakpt(regs, esr); break; case ESR_ELx_EC_SOFTSTP_CUR: + el1_softstp(regs, esr); + break; case ESR_ELx_EC_WATCHPT_CUR: case ESR_ELx_EC_BRK64: el1_dbg(regs, esr); @@ -793,6 +813,25 @@ static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } +static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) +{ + if (!is_ttbr0_addr(regs->pc)) + arm64_apply_bp_hardening(); + + enter_from_user_mode(regs); + /* + * After handling a breakpoint, we suspend the breakpoint + * and use single-step to move to the next instruction. + * If we are stepping a suspended breakpoint there's nothing more to do: + * the single-step is complete. + */ + if (!try_step_suspended_breakpoints(regs)) { + local_daif_restore(DAIF_PROCCTX); + do_el0_softstep(esr, regs); + } + exit_to_user_mode(regs); +} + static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) { /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ @@ -875,6 +914,8 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) el0_breakpt(regs, esr); break; case ESR_ELx_EC_SOFTSTP_LOW: + el0_softstp(regs, esr); + break; case ESR_ELx_EC_WATCHPT_LOW: case ESR_ELx_EC_BRK64: el0_dbg(regs, esr); @@ -997,6 +1038,8 @@ asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs) el0_breakpt(regs, esr); break; case ESR_ELx_EC_SOFTSTP_LOW: + el0_softstp(regs, esr); + break; case ESR_ELx_EC_WATCHPT_LOW: case ESR_ELx_EC_BKPT32: el0_dbg(regs, esr); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 309ae24d4548..8a80e13347c8 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -854,7 +854,7 @@ bool try_step_suspended_breakpoints(struct pt_regs *regs) bool handled_exception = false; /* - * Called from single-step exception handler. + * Called from single-step exception entry. * Return true if we stepped a breakpoint and can resume execution, * false if we need to handle a single-step. */ From 413f0bba005dacf2484bb8ecce212fab9be79d81 Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:41:06 +0100 Subject: [PATCH 43/67] arm64: debug: split hardware watchpoint exception entry Currently all debug exceptions share common entry code and are routed to `do_debug_exception()`, which calls dynamically-registered handlers for each specific debug exception. This is unfortunate as different debug exceptions have different entry handling requirements, and it would be better to handle these distinct requirements earlier. Hardware watchpoints are the only debug exceptions that will write FAR_EL1, so we need to preserve it and pass it down. However, they cannot be used to maliciously train branch predictors, so we can omit calling `arm64_bp_hardening()`, nor do they need to handle the Cortex-A76 erratum #1463225, as it only applies to single stepping exceptions. As the hardware watchpoint handler only returns 0 and never triggers the call to `arm64_notify_die()`, we can call it directly from `entry-common.c`. Split the hardware watchpoint exception entry and adjust the behaviour to match the lack of needed mitigations. Signed-off-by: Ada Couprie Diaz Tested-by: Luis Claudio R. Goncalves Reviewed-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707114109.35672-11-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 4 ++++ arch/arm64/kernel/entry-common.c | 31 ++++++++++++++++++++++++++++++ arch/arm64/kernel/hw_breakpoint.c | 17 +++++----------- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index c8e7c61b8ac4..0362fecc5f69 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -63,8 +63,12 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, struct pt_regs *regs); #ifdef CONFIG_HAVE_HW_BREAKPOINT void do_breakpoint(unsigned long esr, struct pt_regs *regs); +void do_watchpoint(unsigned long addr, unsigned long esr, + struct pt_regs *regs); #else static inline void do_breakpoint(unsigned long esr, struct pt_regs *regs) {} +static inline void do_watchpoint(unsigned long addr, unsigned long esr, + struct pt_regs *regs) {} #endif /* CONFIG_HAVE_HW_BREAKPOINT */ void do_el0_softstep(unsigned long esr, struct pt_regs *regs); void do_el1_softstep(unsigned long esr, struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 7265bef96672..8a6d951c14cc 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -553,6 +553,18 @@ static void noinstr el1_softstp(struct pt_regs *regs, unsigned long esr) arm64_exit_el1_dbg(regs); } +static void noinstr el1_watchpt(struct pt_regs *regs, unsigned long esr) +{ + /* Watchpoints are the only debug exception to write FAR_EL1 */ + unsigned long far = read_sysreg(far_el1); + + arm64_enter_el1_dbg(regs); + debug_exception_enter(regs); + do_watchpoint(far, esr, regs); + debug_exception_exit(regs); + arm64_exit_el1_dbg(regs); +} + static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -608,6 +620,8 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) el1_softstp(regs, esr); break; case ESR_ELx_EC_WATCHPT_CUR: + el1_watchpt(regs, esr); + break; case ESR_ELx_EC_BRK64: el1_dbg(regs, esr); break; @@ -832,6 +846,19 @@ static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } +static void noinstr el0_watchpt(struct pt_regs *regs, unsigned long esr) +{ + /* Watchpoints are the only debug exception to write FAR_EL1 */ + unsigned long far = read_sysreg(far_el1); + + enter_from_user_mode(regs); + debug_exception_enter(regs); + do_watchpoint(far, esr, regs); + debug_exception_exit(regs); + local_daif_restore(DAIF_PROCCTX); + exit_to_user_mode(regs); +} + static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) { /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ @@ -917,6 +944,8 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) el0_softstp(regs, esr); break; case ESR_ELx_EC_WATCHPT_LOW: + el0_watchpt(regs, esr); + break; case ESR_ELx_EC_BRK64: el0_dbg(regs, esr); break; @@ -1041,6 +1070,8 @@ asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs) el0_softstp(regs, esr); break; case ESR_ELx_EC_WATCHPT_LOW: + el0_watchpt(regs, esr); + break; case ESR_ELx_EC_BKPT32: el0_dbg(regs, esr); break; diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 8a80e13347c8..ab76b36dce82 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -750,8 +750,7 @@ static int watchpoint_report(struct perf_event *wp, unsigned long addr, return step; } -static int watchpoint_handler(unsigned long addr, unsigned long esr, - struct pt_regs *regs) +void do_watchpoint(unsigned long addr, unsigned long esr, struct pt_regs *regs) { int i, step = 0, *kernel_step, access, closest_match = 0; u64 min_dist = -1, dist; @@ -806,7 +805,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr, rcu_read_unlock(); if (!step) - return 0; + return; /* * We always disable EL0 watchpoints because the kernel can @@ -819,7 +818,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr, /* If we're already stepping a breakpoint, just return. */ if (debug_info->bps_disabled) - return 0; + return; if (test_thread_flag(TIF_SINGLESTEP)) debug_info->suspended_step = 1; @@ -830,7 +829,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr, kernel_step = this_cpu_ptr(&stepping_kernel_bp); if (*kernel_step != ARM_KERNEL_STEP_NONE) - return 0; + return; if (kernel_active_single_step()) { *kernel_step = ARM_KERNEL_STEP_SUSPEND; @@ -839,10 +838,8 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr, kernel_enable_single_step(regs); } } - - return 0; } -NOKPROBE_SYMBOL(watchpoint_handler); +NOKPROBE_SYMBOL(do_watchpoint); /* * Handle single-step exception. @@ -984,10 +981,6 @@ static int __init arch_hw_breakpoint_init(void) pr_info("found %d breakpoint and %d watchpoint registers.\n", core_num_brps, core_num_wrps); - /* Register debug fault handlers. */ - hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP, - TRAP_HWBKPT, "hw-watchpoint handler"); - /* * Reset the breakpoint resources. We assume that a halting * debugger will leave the world in a nice state for us. From 31575e11ecf7e44face72d1e624cb147a9283733 Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:41:07 +0100 Subject: [PATCH 44/67] arm64: debug: split brk64 exception entry Currently all debug exceptions share common entry code and are routed to `do_debug_exception()`, which calls dynamically-registered handlers for each specific debug exception. This is unfortunate as different debug exceptions have different entry handling requirements, and it would be better to handle these distinct requirements earlier. The BRK64 instruction can only be triggered by a BRK instruction. Thus, we know that the PC is a legitimate address and isn't being used to train a branch predictor with a bogus address : we don't need to call `arm64_apply_bp_hardening()`. We do not need to handle the Cortex-A76 erratum #1463225 either, as it only relevant for single stepping at EL1. BRK64 does not write FAR_EL1 either, as only hardware watchpoints do so. Split the BRK64 exception entry, adjust the function signature, and its behaviour to match the lack of needed mitigations. Further, as the EL0 and EL1 code paths are cleanly separated, we can split `do_brk64()` into `do_el0_brk64()` and `do_el1_brk64()`, and call them directly from the relevant entry paths. Use `die()` directly for the EL1 error path, as in `do_el1_bti()` and `do_el1_undef()`. We can also remove `NOKRPOBE_SYMBOL` for the EL0 path, as it cannot lead to a kprobe recursion. When taking a BRK64 exception from EL0, the exception handling is safely preemptible : the only possible handler is `uprobe_brk_handler()`. It only operates on task-local data and properly checks its validity, then raises a Thread Information Flag, processed before returning to userspace in `do_notify_resume()`, which is already preemptible. Thus we can safely unmask interrupts and enable preemption before handling the break itself, fixing a PREEMPT_RT issue where the handler could call a sleeping function with preemption disabled. Given that the break hook registration is handled statically in `call_break_hook` since (arm64: debug: call software break handlers statically) and that we now bypass the exception handler registration, this change renders `early_brk64` redundant : its functionality is now handled through the post-init path. This also removes the last usage of `el1_dbg()`. This also removes the last usage of `el0_dbg()` without `CONFIG_COMPAT`. Mark it `__maybe_unused`, to prevent a warning when building this patch without `CONFIG_COMPAT`, as the following patch removes `el0_dbg()`. Signed-off-by: Ada Couprie Diaz Tested-by: Luis Claudio R. Goncalves Reviewed-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707114109.35672-12-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 2 ++ arch/arm64/kernel/debug-monitors.c | 48 ++++++++++++++---------------- arch/arm64/kernel/entry-common.c | 24 ++++++++++----- 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 0362fecc5f69..ca556583b128 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -72,6 +72,8 @@ static inline void do_watchpoint(unsigned long addr, unsigned long esr, #endif /* CONFIG_HAVE_HW_BREAKPOINT */ void do_el0_softstep(unsigned long esr, struct pt_regs *regs); void do_el1_softstep(unsigned long esr, struct pt_regs *regs); +void do_el0_brk64(unsigned long esr, struct pt_regs *regs); +void do_el1_brk64(unsigned long esr, struct pt_regs *regs); void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs); void do_sve_acc(unsigned long esr, struct pt_regs *regs); void do_sme_acc(unsigned long esr, struct pt_regs *regs); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index cb50d1f59798..7c1c89bb9ad1 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -207,15 +207,8 @@ void do_el1_softstep(unsigned long esr, struct pt_regs *regs) } NOKPROBE_SYMBOL(do_el1_softstep); -static int call_break_hook(struct pt_regs *regs, unsigned long esr) +static int call_el1_break_hook(struct pt_regs *regs, unsigned long esr) { - if (user_mode(regs)) { - if (IS_ENABLED(CONFIG_UPROBES) && - esr_brk_comment(esr) == UPROBES_BRK_IMM) - return uprobe_brk_handler(regs, esr); - return DBG_HOOK_ERROR; - } - if (esr_brk_comment(esr) == BUG_BRK_IMM) return bug_brk_handler(regs, esr); @@ -252,24 +245,30 @@ static int call_break_hook(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_ERROR; } -NOKPROBE_SYMBOL(call_break_hook); +NOKPROBE_SYMBOL(call_el1_break_hook); -static int brk_handler(unsigned long unused, unsigned long esr, - struct pt_regs *regs) +/* + * We have already unmasked interrupts and enabled preemption + * when calling do_el0_brk64() from entry-common.c. + */ +void do_el0_brk64(unsigned long esr, struct pt_regs *regs) { - if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) - return 0; + if (IS_ENABLED(CONFIG_UPROBES) && + esr_brk_comment(esr) == UPROBES_BRK_IMM && + uprobe_brk_handler(regs, esr) == DBG_HOOK_HANDLED) + return; - if (user_mode(regs)) { - send_user_sigtrap(TRAP_BRKPT); - } else { - pr_warn("Unexpected kernel BRK exception at EL1\n"); - return -EFAULT; - } - - return 0; + send_user_sigtrap(TRAP_BRKPT); } -NOKPROBE_SYMBOL(brk_handler); + +void do_el1_brk64(unsigned long esr, struct pt_regs *regs) +{ + if (call_el1_break_hook(regs, esr) == DBG_HOOK_HANDLED) + return; + + die("Oops - BRK", regs, esr); +} +NOKPROBE_SYMBOL(do_el1_brk64); bool try_handle_aarch32_break(struct pt_regs *regs) { @@ -311,10 +310,7 @@ bool try_handle_aarch32_break(struct pt_regs *regs) NOKPROBE_SYMBOL(try_handle_aarch32_break); void __init debug_traps_init(void) -{ - hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, - TRAP_BRKPT, "BRK handler"); -} +{} /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 8a6d951c14cc..b2415c7b0743 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -565,13 +565,12 @@ static void noinstr el1_watchpt(struct pt_regs *regs, unsigned long esr) arm64_exit_el1_dbg(regs); } -static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_brk64(struct pt_regs *regs, unsigned long esr) { - unsigned long far = read_sysreg(far_el1); - arm64_enter_el1_dbg(regs); - if (!cortex_a76_erratum_1463225_debug_handler(regs)) - do_debug_exception(far, esr, regs); + debug_exception_enter(regs); + do_el1_brk64(esr, regs); + debug_exception_exit(regs); arm64_exit_el1_dbg(regs); } @@ -623,7 +622,7 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) el1_watchpt(regs, esr); break; case ESR_ELx_EC_BRK64: - el1_dbg(regs, esr); + el1_brk64(regs, esr); break; case ESR_ELx_EC_FPAC: el1_fpac(regs, esr); @@ -859,7 +858,16 @@ static void noinstr el0_watchpt(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } -static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_brk64(struct pt_regs *regs, unsigned long esr) +{ + enter_from_user_mode(regs); + local_daif_restore(DAIF_PROCCTX); + do_el0_brk64(esr, regs); + exit_to_user_mode(regs); +} + +static void noinstr __maybe_unused +el0_dbg(struct pt_regs *regs, unsigned long esr) { /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ unsigned long far = read_sysreg(far_el1); @@ -947,7 +955,7 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) el0_watchpt(regs, esr); break; case ESR_ELx_EC_BRK64: - el0_dbg(regs, esr); + el0_brk64(regs, esr); break; case ESR_ELx_EC_FPAC: el0_fpac(regs, esr); From fc5e5d0477c532054ce8692fd16fdaab2cb8946f Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:41:08 +0100 Subject: [PATCH 45/67] arm64: debug: split bkpt32 exception entry Currently all debug exceptions share common entry code and are routed to `do_debug_exception()`, which calls dynamically-registered handlers for each specific debug exception. This is unfortunate as different debug exceptions have different entry handling requirements, and it would be better to handle these distinct requirements earlier. The BKPT32 exception can only be triggered by a BKPT instruction. Thus, we know that the PC is a legitimate address and isn't being used to train a branch predictor with a bogus address : we don't need to call `arm64_apply_bp_hardening()`. The handler for this exception only pends a signal and doesn't depend on any per-CPU state : we don't need to inhibit preemption, nor do we need to keep the DAIF exceptions masked, so we can unmask them earlier. Split the BKPT32 exception entry and adjust function signatures and its behaviour to match its relaxed constraints compared to other debug exceptions. We can also remove `NOKRPOBE_SYMBOL`, as this cannot lead to a kprobe recursion. This replaces the last usage of `el0_dbg()`, so remove it. Signed-off-by: Ada Couprie Diaz Tested-by: Luis Claudio R. Goncalves Reviewed-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707114109.35672-13-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 1 + arch/arm64/kernel/debug-monitors.c | 7 +++++++ arch/arm64/kernel/entry-common.c | 22 +++++++++------------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index ca556583b128..cdce3c713766 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -74,6 +74,7 @@ void do_el0_softstep(unsigned long esr, struct pt_regs *regs); void do_el1_softstep(unsigned long esr, struct pt_regs *regs); void do_el0_brk64(unsigned long esr, struct pt_regs *regs); void do_el1_brk64(unsigned long esr, struct pt_regs *regs); +void do_bkpt32(unsigned long esr, struct pt_regs *regs); void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs); void do_sve_acc(unsigned long esr, struct pt_regs *regs); void do_sme_acc(unsigned long esr, struct pt_regs *regs); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 7c1c89bb9ad1..43a612eaa7d2 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -270,6 +270,13 @@ void do_el1_brk64(unsigned long esr, struct pt_regs *regs) } NOKPROBE_SYMBOL(do_el1_brk64); +#ifdef CONFIG_COMPAT +void do_bkpt32(unsigned long esr, struct pt_regs *regs) +{ + arm64_notify_die("aarch32 BKPT", regs, SIGTRAP, TRAP_BRKPT, regs->pc, esr); +} +#endif /* CONFIG_COMPAT */ + bool try_handle_aarch32_break(struct pt_regs *regs) { u32 arm_instr; diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index b2415c7b0743..bbf58fcab142 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -866,18 +866,6 @@ static void noinstr el0_brk64(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } -static void noinstr __maybe_unused -el0_dbg(struct pt_regs *regs, unsigned long esr) -{ - /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ - unsigned long far = read_sysreg(far_el1); - - enter_from_user_mode(regs); - do_debug_exception(far, esr, regs); - local_daif_restore(DAIF_PROCCTX); - exit_to_user_mode(regs); -} - static void noinstr el0_svc(struct pt_regs *regs) { enter_from_user_mode(regs); @@ -1038,6 +1026,14 @@ static void noinstr el0_svc_compat(struct pt_regs *regs) exit_to_user_mode(regs); } +static void noinstr el0_bkpt32(struct pt_regs *regs, unsigned long esr) +{ + enter_from_user_mode(regs); + local_daif_restore(DAIF_PROCCTX); + do_bkpt32(esr, regs); + exit_to_user_mode(regs); +} + asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -1081,7 +1077,7 @@ asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs) el0_watchpt(regs, esr); break; case ESR_ELx_EC_BKPT32: - el0_dbg(regs, esr); + el0_bkpt32(regs, esr); break; default: el0_inv(regs, esr); From a8b8cce9d96d65dfe3d89abf02033151f8b7d670 Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Mon, 7 Jul 2025 12:41:09 +0100 Subject: [PATCH 46/67] arm64: debug: remove debug exception registration infrastructure Now that debug exceptions are handled individually and without the need for dynamic registration, remove the unused registration infrastructure. This removes the external caller for `debug_exception_enter()` and `debug_exception_exit()`. Make them static again and remove them from the header. Remove `early_brk64()` as it has been made redundant by (arm64: debug: split brk64 exception entry) and is not used anymore. Note : in `early_brk64()` `bug_brk_handler()` is called unconditionally as a fall-through, but now `call_break_hook()` only calls it if the immediate matches. This does not change the behaviour in early boot, as if `bug_brk_handler()` was called on a non-BUG immediate it would return DBG_HOOK_ERROR anyway, which `call_break_hook()` will do if no immediate matches. Remove `trap_init()`, as it would be empty and a weak definition already exists in `init/main.c`. Signed-off-by: Ada Couprie Diaz Tested-by: Luis Claudio R. Goncalves Reviewed-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707114109.35672-14-ada.coupriediaz@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 2 - arch/arm64/include/asm/exception.h | 6 --- arch/arm64/include/asm/system_misc.h | 4 -- arch/arm64/kernel/debug-monitors.c | 3 -- arch/arm64/kernel/entry-common.c | 4 +- arch/arm64/kernel/traps.c | 27 ------------- arch/arm64/mm/fault.c | 53 ------------------------- 7 files changed, 2 insertions(+), 97 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 586290e95d87..4dfc37dd1d96 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -94,7 +94,5 @@ static inline bool try_step_suspended_breakpoints(struct pt_regs *regs) bool try_handle_aarch32_break(struct pt_regs *regs); -void debug_traps_init(void); - #endif /* __ASSEMBLY */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index cdce3c713766..e3874c4fc399 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -59,8 +59,6 @@ void do_el0_bti(struct pt_regs *regs); void do_el1_bti(struct pt_regs *regs, unsigned long esr); void do_el0_gcs(struct pt_regs *regs, unsigned long esr); void do_el1_gcs(struct pt_regs *regs, unsigned long esr); -void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, - struct pt_regs *regs); #ifdef CONFIG_HAVE_HW_BREAKPOINT void do_breakpoint(unsigned long esr, struct pt_regs *regs); void do_watchpoint(unsigned long addr, unsigned long esr, @@ -94,8 +92,4 @@ void do_serror(struct pt_regs *regs, unsigned long esr); void do_signal(struct pt_regs *regs); void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far); - -void debug_exception_enter(struct pt_regs *regs); -void debug_exception_exit(struct pt_regs *regs); - #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index c34344256762..344b1c1a4bbb 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -25,10 +25,6 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, int signo, int sicode, unsigned long far, unsigned long err); -void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned long, - struct pt_regs *), - int sig, int code, const char *name); - struct mm_struct; extern void __show_regs(struct pt_regs *); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 43a612eaa7d2..43f342a01fdf 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -316,9 +316,6 @@ bool try_handle_aarch32_break(struct pt_regs *regs) } NOKPROBE_SYMBOL(try_handle_aarch32_break); -void __init debug_traps_init(void) -{} - /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) { diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index bbf58fcab142..4b67312a88ad 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -448,7 +448,7 @@ static __always_inline void fpsimd_syscall_exit(void) * accidentally schedule in exception context and it will force a warning * if we somehow manage to schedule by accident. */ -void debug_exception_enter(struct pt_regs *regs) +static void debug_exception_enter(struct pt_regs *regs) { preempt_disable(); @@ -457,7 +457,7 @@ void debug_exception_enter(struct pt_regs *regs) } NOKPROBE_SYMBOL(debug_exception_enter); -void debug_exception_exit(struct pt_regs *regs) +static void debug_exception_exit(struct pt_regs *regs) { preempt_enable_no_resched(); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 654c8ea46ec6..98b11da5a5ad 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -1100,30 +1100,3 @@ int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_HANDLED; } #endif - -/* - * Initial handler for AArch64 BRK exceptions - * This handler only used until debug_traps_init(). - */ -int __init early_brk64(unsigned long addr, unsigned long esr, - struct pt_regs *regs) -{ -#ifdef CONFIG_CFI_CLANG - if (esr_is_cfi_brk(esr)) - return cfi_brk_handler(regs, esr) != DBG_HOOK_HANDLED; -#endif -#ifdef CONFIG_KASAN_SW_TAGS - if ((esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) - return kasan_brk_handler(regs, esr) != DBG_HOOK_HANDLED; -#endif -#ifdef CONFIG_UBSAN_TRAP - if (esr_is_ubsan_brk(esr)) - return ubsan_brk_handler(regs, esr) != DBG_HOOK_HANDLED; -#endif - return bug_brk_handler(regs, esr) != DBG_HOOK_HANDLED; -} - -void __init trap_init(void) -{ - debug_traps_init(); -} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index d451d7d834f1..bc7f554dea9f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -53,18 +53,12 @@ struct fault_info { }; static const struct fault_info fault_info[]; -static struct fault_info debug_fault_info[]; static inline const struct fault_info *esr_to_fault_info(unsigned long esr) { return fault_info + (esr & ESR_ELx_FSC); } -static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr) -{ - return debug_fault_info + DBG_ESR_EVT(esr); -} - static void data_abort_decode(unsigned long esr) { unsigned long iss2 = ESR_ELx_ISS2(esr); @@ -938,53 +932,6 @@ void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs) } NOKPROBE_SYMBOL(do_sp_pc_abort); -/* - * __refdata because early_brk64 is __init, but the reference to it is - * clobbered at arch_initcall time. - * See traps.c and debug-monitors.c:debug_traps_init(). - */ -static struct fault_info __refdata debug_fault_info[] = { - { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware breakpoint" }, - { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware single-step" }, - { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware watchpoint" }, - { do_bad, SIGKILL, SI_KERNEL, "unknown 3" }, - { do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" }, - { do_bad, SIGKILL, SI_KERNEL, "aarch32 vector catch" }, - { early_brk64, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" }, - { do_bad, SIGKILL, SI_KERNEL, "unknown 7" }, -}; - -void __init hook_debug_fault_code(int nr, - int (*fn)(unsigned long, unsigned long, struct pt_regs *), - int sig, int code, const char *name) -{ - BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info)); - - debug_fault_info[nr].fn = fn; - debug_fault_info[nr].sig = sig; - debug_fault_info[nr].code = code; - debug_fault_info[nr].name = name; -} - -void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, - struct pt_regs *regs) -{ - const struct fault_info *inf = esr_to_debug_fault_info(esr); - unsigned long pc = instruction_pointer(regs); - - debug_exception_enter(regs); - - if (user_mode(regs) && !is_ttbr0_addr(pc)) - arm64_apply_bp_hardening(); - - if (inf->fn(addr_if_watchpoint, esr, regs)) { - arm64_notify_die(inf->name, regs, inf->sig, inf->code, pc, esr); - } - - debug_exception_exit(regs); -} -NOKPROBE_SYMBOL(do_debug_exception); - /* * Used during anonymous page fault handling. */ From ef6861b8e6dd7d933e29022b6620c42085b907f9 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 7 Jul 2025 09:01:01 -0700 Subject: [PATCH 47/67] arm64: Mandate VMAP_STACK On arm64, VMAP_STACK has been enabled by default for a while now, and the only reason to disable it was a historical lack of support for KASAN_VMALLOC. Today there's no good reason to disable VMAP_STACK. Mandate VMAP_STACK, which will allow code to be simplified in subsequent patches. Suggested-by: Will Deacon Signed-off-by: Breno Leitao Acked-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707-arm64_vmap-v1-1-8de98ca0f91c@debian.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 55fc331af337..b38230a8cc7e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -279,6 +279,7 @@ config ARM64 select HAVE_SOFTIRQ_ON_OWN_STACK select USER_STACKTRACE_SUPPORT select VDSO_GETRANDOM + select VMAP_STACK help ARM 64-bit (AArch64) Linux support. From 63829521a8e8fd8852305b521ba8cb7b41722365 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 7 Jul 2025 09:01:02 -0700 Subject: [PATCH 48/67] arm64: efi: Remove CONFIG_VMAP_STACK check Remove the CONFIG_VMAP_STACK check in arm64_efi_rt_init() since VMAP_STACK is now always enabled on arm64. The arch_alloc_vmap_stack() call will fail to build if VMAP_STACK is not set, providing sufficient protection without the explicit runtime check. Signed-off-by: Breno Leitao Acked-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707-arm64_vmap-v1-2-8de98ca0f91c@debian.org Signed-off-by: Will Deacon --- arch/arm64/kernel/efi.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 62230d6dd919..6c371b158b99 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -215,11 +215,6 @@ static int __init arm64_efi_rt_init(void) if (!efi_enabled(EFI_RUNTIME_SERVICES)) return 0; - if (!IS_ENABLED(CONFIG_VMAP_STACK)) { - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return -ENOMEM; - } - p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE); if (!p) { pr_warn("Failed to allocate EFI runtime stack\n"); From 0909c719c17b7a3e88dd1ee231b4a136c946c39e Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 7 Jul 2025 09:01:03 -0700 Subject: [PATCH 49/67] arm64: Remove CONFIG_VMAP_STACK conditionals from THREAD_SHIFT and THREAD_ALIGN Now that VMAP_STACK is always enabled on arm64, remove the CONFIG_VMAP_STACK conditional logic from the definitions of THREAD_SHIFT and THREAD_ALIGN in arch/arm64/include/asm/memory.h. This simplifies the code by unconditionally setting THREAD_ALIGN to (2 * THREAD_SIZE) and adjusting the THREAD_SHIFT definition to only depend on MIN_THREAD_SHIFT and PAGE_SHIFT. This change reflects the updated arm64 stack model, where all kernel threads use virtually mapped stacks with guard pages, and ensures alignment and stack sizing are consistently handled. Signed-off-by: Breno Leitao Acked-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707-arm64_vmap-v1-3-8de98ca0f91c@debian.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 717829df294e..5213248e081b 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -118,7 +118,7 @@ * VMAP'd stacks are allocated at page granularity, so we must ensure that such * stacks are a multiple of page size. */ -#if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT) +#if (MIN_THREAD_SHIFT < PAGE_SHIFT) #define THREAD_SHIFT PAGE_SHIFT #else #define THREAD_SHIFT MIN_THREAD_SHIFT @@ -135,11 +135,7 @@ * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry * assembly. */ -#ifdef CONFIG_VMAP_STACK #define THREAD_ALIGN (2 * THREAD_SIZE) -#else -#define THREAD_ALIGN THREAD_SIZE -#endif #define IRQ_STACK_SIZE THREAD_SIZE From c4a5699d5cefd9d6a6a1d326297d5de6e8e0adde Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 7 Jul 2025 09:01:04 -0700 Subject: [PATCH 50/67] arm64: remove CONFIG_VMAP_STACK conditionals from irq stack setup With VMAP_STACK always enabled on arm64, drop the CONFIG_VMAP_STACK checks and legacy irq stack allocation from arch/arm64/kernel/irq.c. The code now unconditionally uses the VMAP_STACK path for irq stack initialization, simplifying the logic. Signed-off-by: Breno Leitao Acked-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707-arm64_vmap-v1-4-8de98ca0f91c@debian.org Signed-off-by: Will Deacon --- arch/arm64/kernel/irq.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 85087e2df564..c0065a1d77cf 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -51,7 +51,6 @@ static void init_irq_scs(void) scs_alloc(early_cpu_to_node(cpu)); } -#ifdef CONFIG_VMAP_STACK static void __init init_irq_stacks(void) { int cpu; @@ -62,18 +61,6 @@ static void __init init_irq_stacks(void) per_cpu(irq_stack_ptr, cpu) = p; } } -#else -/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */ -DEFINE_PER_CPU_ALIGNED(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); - -static void init_irq_stacks(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu); -} -#endif #ifndef CONFIG_PREEMPT_RT static void ____do_softirq(struct pt_regs *regs) From e5692bba1e6667441836e13f723112e4aeec3028 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 7 Jul 2025 09:01:05 -0700 Subject: [PATCH 51/67] arm64: remove CONFIG_VMAP_STACK conditionals from traps overflow stack With VMAP_STACK now always enabled on arm64, remove the CONFIG_VMAP_STACK checks from overflow stack definitions and related code in arch/arm64/kernel/traps.c. The overflow_stack and panic_bad_stack() logic are now unconditionally included, simplifying the source and matching the mandatory stack model. Signed-off-by: Breno Leitao Acked-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707-arm64_vmap-v1-5-8de98ca0f91c@debian.org Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 9bfa5c944379..6acbcffca650 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -894,8 +894,6 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr) "Bad EL0 synchronous exception"); } -#ifdef CONFIG_VMAP_STACK - DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) __aligned(16); @@ -927,7 +925,6 @@ void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigne nmi_panic(NULL, "kernel stack overflow"); cpu_park_loop(); } -#endif void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr) { From 907cb5cd8efdae8672b7fd45047d130a430179f6 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 7 Jul 2025 09:01:06 -0700 Subject: [PATCH 52/67] arm64: remove CONFIG_VMAP_STACK checks from stacktrace overflow logic With VMAP_STACK now always enabled on arm64, remove all CONFIG_VMAP_STACK conditionals from overflow stack handling in stacktrace code. This change unconditionally defines the per-CPU overflow_stack and stackinfo_get_overflow() helper in arch/arm64/include/asm/stacktrace.h, and always includes the overflow stack in the stack_info array in arch/arm64/kernel/stacktrace.c. Also, drop redundant CONFIG_VMAP_STACK checks from SDEI stack declarations. Signed-off-by: Breno Leitao Acked-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707-arm64_vmap-v1-6-8de98ca0f91c@debian.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/stacktrace.h | 6 +----- arch/arm64/kernel/stacktrace.c | 4 +--- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 66ec8caa6ac0..6d3280932bf5 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -59,7 +59,6 @@ static inline bool on_task_stack(const struct task_struct *tsk, #define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1)) -#ifdef CONFIG_VMAP_STACK DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); static inline struct stack_info stackinfo_get_overflow(void) @@ -72,11 +71,8 @@ static inline struct stack_info stackinfo_get_overflow(void) .high = high, }; } -#else -#define stackinfo_get_overflow() stackinfo_get_unknown() -#endif -#if defined(CONFIG_ARM_SDE_INTERFACE) && defined(CONFIG_VMAP_STACK) +#if defined(CONFIG_ARM_SDE_INTERFACE) DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 1d9d51d7627f..e823320fe031 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -332,10 +332,8 @@ kunwind_stack_walk(kunwind_consume_fn consume_state, struct stack_info stacks[] = { stackinfo_get_task(task), STACKINFO_CPU(irq), -#if defined(CONFIG_VMAP_STACK) STACKINFO_CPU(overflow), -#endif -#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE) +#if defined(CONFIG_ARM_SDE_INTERFACE) STACKINFO_SDEI(normal), STACKINFO_SDEI(critical), #endif From 3e72b9e9f01a4851640a095de688a031d9259f5d Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 7 Jul 2025 09:01:07 -0700 Subject: [PATCH 53/67] arm64: remove CONFIG_VMAP_STACK checks from SDEI stack handling With VMAP_STACK now always enabled on arm64, remove all CONFIG_VMAP_STACK conditionals from SDEI stack allocation and initialization in arch/arm64/kernel/sdei.c. This change unconditionally defines the SDEI stack pointers and replaces runtime checks with BUILD_BUG_ON() assertions, ensuring that the code is only built when VMAP_STACK is enabled. This simplifies the logic and reflects the mandatory use of VMAP_STACK for all arm64 kernel builds. Signed-off-by: Breno Leitao Acked-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707-arm64_vmap-v1-7-8de98ca0f91c@debian.org Signed-off-by: Will Deacon --- arch/arm64/kernel/sdei.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 255d12f881c2..6f24a0251e18 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -34,10 +34,8 @@ unsigned long sdei_exit_mode; DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); -#ifdef CONFIG_VMAP_STACK DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); -#endif DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr); DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr); @@ -65,8 +63,7 @@ static void free_sdei_stacks(void) { int cpu; - if (!IS_ENABLED(CONFIG_VMAP_STACK)) - return; + BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); for_each_possible_cpu(cpu) { _free_sdei_stack(&sdei_stack_normal_ptr, cpu); @@ -91,8 +88,7 @@ static int init_sdei_stacks(void) int cpu; int err = 0; - if (!IS_ENABLED(CONFIG_VMAP_STACK)) - return 0; + BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); for_each_possible_cpu(cpu) { err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu); From 9d1869f0f537d26005a521a141dde759fc3303f5 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 7 Jul 2025 09:01:08 -0700 Subject: [PATCH 54/67] arm64: remove CONFIG_VMAP_STACK checks from entry code With VMAP_STACK now always enabled on arm64, remove all CONFIG_VMAP_STACK conditionals from entry handling in arch/arm64/kernel/entry-common.c and arch/arm64/kernel/entry.S. This change unconditionally includes the bad stack handling and overflow detection logic, simplifying the code and reflecting the mandatory use of VMAP_STACK for all arm64 kernel builds. Signed-off-by: Breno Leitao Acked-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250707-arm64_vmap-v1-8-8de98ca0f91c@debian.org Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-common.c | 2 -- arch/arm64/kernel/entry.S | 6 ------ 2 files changed, 8 deletions(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 7c1970b341b8..99a341ee7131 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -977,7 +977,6 @@ UNHANDLED(el0t, 32, fiq) UNHANDLED(el0t, 32, error) #endif /* CONFIG_COMPAT */ -#ifdef CONFIG_VMAP_STACK asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -986,7 +985,6 @@ asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs) arm64_enter_nmi(regs); panic_bad_stack(regs, esr, far); } -#endif /* CONFIG_VMAP_STACK */ #ifdef CONFIG_ARM_SDE_INTERFACE asmlinkage noinstr unsigned long diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5ae2a34b50bd..ea74cb7aac5b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -55,7 +55,6 @@ .endif sub sp, sp, #PT_REGS_SIZE -#ifdef CONFIG_VMAP_STACK /* * Test whether the SP has overflowed, without corrupting a GPR. * Task and IRQ stacks are aligned so that SP & (1 << THREAD_SHIFT) @@ -97,7 +96,6 @@ /* We were already on the overflow stack. Restore sp/x0 and carry on. */ sub sp, sp, x0 mrs x0, tpidrro_el0 -#endif b el\el\ht\()_\regsize\()_\label .org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm @@ -540,7 +538,6 @@ SYM_CODE_START(vectors) kernel_ventry 0, t, 32, error // Error 32-bit EL0 SYM_CODE_END(vectors) -#ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(__bad_stack) /* * We detected an overflow in kernel_ventry, which switched to the @@ -568,7 +565,6 @@ SYM_CODE_START_LOCAL(__bad_stack) bl handle_bad_stack ASM_BUG() SYM_CODE_END(__bad_stack) -#endif /* CONFIG_VMAP_STACK */ .macro entry_handler el:req, ht:req, regsize:req, label:req @@ -1003,7 +999,6 @@ SYM_CODE_START(__sdei_asm_handler) 1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6 2: str x19, [x5] -#ifdef CONFIG_VMAP_STACK /* * entry.S may have been using sp as a scratch register, find whether * this is a normal or critical event and switch to the appropriate @@ -1016,7 +1011,6 @@ SYM_CODE_START(__sdei_asm_handler) 2: mov x6, #SDEI_STACK_SIZE add x5, x5, x6 mov sp, x5 -#endif #ifdef CONFIG_SHADOW_CALL_STACK /* Use a separate shadow call stack for normal and critical events */ From 344b6580472451390d070c65c27f59716a1deecb Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Jun 2025 21:55:20 +0900 Subject: [PATCH 55/67] arm64: fix unnecessary rebuilding when CONFIG_DEBUG_EFI=y When CONFIG_DEBUG_EFI is enabled, some objects are needlessly rebuilt. [Steps to reproduce] Enable CONFIG_DEBUG_EFI and run 'make' twice in a clean source tree. On the second run, arch/arm64/kernel/head.o is rebuilt even though no files have changed. $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- clean $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- [ snip ] $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- CALL scripts/checksyscalls.sh AS arch/arm64/kernel/head.o AR arch/arm64/kernel/built-in.a AR arch/arm64/built-in.a AR built-in.a [ snip ] The issue is caused by the use of the $(realpath ...) function. At the time arch/arm64/kernel/Makefile is parsed on the first run, $(objtree)/vmlinux does not exist. As a result, $(realpath $(objtree)/vmlinux) expands to an empty string. On the second run of Make, $(objtree)/vmlinux already exists, so $(realpath $(objtree)/vmlinux) expands to the absolute path of vmlinux. However, this change in the command line causes arch/arm64/kernel/head.o to be rebuilt. To address this issue, use $(abspath ...) instead, which does not require the file to exist. While $(abspath ...) does not resolve symlinks, this should be fine from a debugging perspective. The GNU Make manual [1] clearly explains the difference between the two: $(realpath names...) For each file name in names return the canonical absolute name. A canonical name does not contain any . or .. components, nor any repeated path separators (/) or symlinks. In case of a failure the empty string is returned. Consult the realpath(3) documentation for a list of possible failure causes. $(abspath namees...) For each file name in names return an absolute name that does not contain any . or .. components, nor any repeated path separators (/). Note that, in contrast to realpath function, abspath does not resolve symlinks and does not require the file names to refer to an existing file or directory. Use the wildcard function to test for existence. The same problem exists in drivers/firmware/efi/libstub/Makefile.zboot. On the first run of Make, $(obj)/vmlinuz.efi.elf does not exist when the Makefile is parsed, so -DZBOOT_EFI_PATH is set to an empty string. Replace $(realpath ...) with $(abspath ...) there as well. [1]: https://www.gnu.org/software/make/manual/make.html#File-Name-Functions Fixes: 757b435aaabe ("efi: arm64: Add vmlinux debug link to the Image binary") Fixes: a050910972bb ("efi/libstub: implement generic EFI zboot") Signed-off-by: Masahiro Yamada Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250625125555.2504734-1-masahiroy@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 2 +- drivers/firmware/efi/libstub/Makefile.zboot | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2920b0a51403..a2604c33f35c 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -81,7 +81,7 @@ obj-y += head.o always-$(KBUILD_BUILTIN) += vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) -AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" +AFLAGS_head.o += -DVMLINUX_PATH="\"$(abspath vmlinux)\"" endif # for cleaning diff --git a/drivers/firmware/efi/libstub/Makefile.zboot b/drivers/firmware/efi/libstub/Makefile.zboot index 92e3c73502ba..832deee36e48 100644 --- a/drivers/firmware/efi/libstub/Makefile.zboot +++ b/drivers/firmware/efi/libstub/Makefile.zboot @@ -36,7 +36,7 @@ aflags-zboot-header-$(EFI_ZBOOT_FORWARD_CFI) := \ -DPE_DLL_CHAR_EX=IMAGE_DLLCHARACTERISTICS_EX_FORWARD_CFI_COMPAT AFLAGS_zboot-header.o += -DMACHINE_TYPE=IMAGE_FILE_MACHINE_$(EFI_ZBOOT_MACH_TYPE) \ - -DZBOOT_EFI_PATH="\"$(realpath $(obj)/vmlinuz.efi.elf)\"" \ + -DZBOOT_EFI_PATH="\"$(abspath $(obj)/vmlinuz.efi.elf)\"" \ -DZBOOT_SIZE_LEN=$(zboot-size-len-y) \ -DCOMP_TYPE="\"$(comp-type-y)\"" \ $(aflags-zboot-header-y) From 75fdf823f94b18fa29ecbad9f39ecf8c57e8b8c6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 14 Jul 2025 12:21:27 +0100 Subject: [PATCH 56/67] arm64/gcs: Don't call gcs_free() when releasing task_struct Currently we call gcs_free() when releasing task_struct but this is redundant, it attempts to deallocate any kernel managed userspace GCS which should no longer be relevant and resets values in the struct we're in the process of freeing. By the time arch_release_task_struct() is called the mm will have been disassociated from the task so the check for a mm in gcs_free() will always be false, for threads that are exiting leaving the mm active deactivate_mm() will have been called previously and freed any kernel managed GCS. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250714-arm64-gcs-release-task-v2-1-8a83cadfc846@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 1 - arch/arm64/mm/gcs.c | 6 ------ 2 files changed, 7 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a5ca15daeb8a..f53608ecaf4b 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -339,7 +339,6 @@ void flush_thread(void) void arch_release_task_struct(struct task_struct *tsk) { fpsimd_release_task(tsk); - gcs_free(tsk); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c index 5c46ec527b1c..6e93f78de79b 100644 --- a/arch/arm64/mm/gcs.c +++ b/arch/arm64/mm/gcs.c @@ -157,12 +157,6 @@ void gcs_free(struct task_struct *task) if (!system_supports_gcs()) return; - /* - * When fork() with CLONE_VM fails, the child (tsk) already - * has a GCS allocated, and exit_thread() calls this function - * to free it. In this case the parent (current) and the - * child share the same mm struct. - */ if (!task->mm || task->mm != current->mm) return; From d7ce7e3a84642aadf7c4787f7ec4f58eb163d129 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 16 Jul 2025 02:42:01 -0700 Subject: [PATCH 57/67] arm64: Mark kernel as tainted on SAE and SError panic Set TAINT_MACHINE_CHECK when SError or Synchronous External Abort (SEA) interrupts trigger a panic to flag potential hardware faults. This tainting mechanism aids in debugging and enables correlation of hardware-related crashes in large-scale deployments. This change aligns with similar patches[1] that mark machine check events when the system crashes due to hardware errors. Link: https://lore.kernel.org/all/20250702-add_tain-v1-1-9187b10914b9@debian.org/ [1] Signed-off-by: Breno Leitao Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250716-vmcore_hw_error-v2-1-f187f7d62aba@debian.org Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 1 + arch/arm64/mm/fault.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 9bfa5c944379..7468b22585ce 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -931,6 +931,7 @@ void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigne void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr) { + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK); console_verbose(); pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n", diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ec0a337891dd..004106ff4bd0 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -826,6 +826,7 @@ static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs) */ siaddr = untagged_addr(far); } + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK); arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr); return 0; From 54c605124da6fad3d6033ca822c551ce33982084 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Jul 2025 22:11:41 +0100 Subject: [PATCH 58/67] kselftest/arm4: Provide local defines for AT_HWCAP3 Some build environments for the selftests are not picking up the newly added AT_HWCAP3 when using the libc headers, even with headers_install (which we require already for the arm64 selftests). As a quick fix add local definitions of the constant to tools use it, while auxvec.h is installed with some toolchains it needs some persuasion to get picked up. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250715-arm64-selftest-bodge-hwcap3-v1-1-541b54bc43bb@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/hwcap.c | 4 ++++ tools/testing/selftests/arm64/mte/check_prctl.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 35f521e5f41c..aa902408facd 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -21,6 +21,10 @@ #define TESTS_PER_HWCAP 3 +#ifndef AT_HWCAP3 +#define AT_HWCAP3 29 +#endif + /* * Function expected to generate exception when the feature is not * supported and return when it is supported. If the specific exception diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c index 4c89e9538ca0..c36c4c49ff95 100644 --- a/tools/testing/selftests/arm64/mte/check_prctl.c +++ b/tools/testing/selftests/arm64/mte/check_prctl.c @@ -12,6 +12,10 @@ #include "kselftest.h" +#ifndef AT_HWCAP3 +#define AT_HWCAP3 29 +#endif + static int set_tagged_addr_ctrl(int val) { int ret; From aa46e18836c07e4d81491d47f8deeba840e780f0 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 16 Jul 2025 04:54:32 +0100 Subject: [PATCH 59/67] arm64/mm: Drop redundant addr increment in set_huge_pte_at() The 'addr' need not be incremented in the loop because that is not going to be used subsequently. Cc: Catalin Marinas Cc: Will Deacon Cc: Ryan Roberts Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Dev Jain Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20250716035432.293682-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 0c8737f4f2ce..1d90a7e75333 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -225,7 +225,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, ncontig = num_contig_ptes(sz, &pgsize); if (!pte_present(pte)) { - for (i = 0; i < ncontig; i++, ptep++, addr += pgsize) + for (i = 0; i < ncontig; i++, ptep++) __set_ptes_anysz(mm, ptep, pte, 1, pgsize); return; } From b5cebb5de9a8fce3f6e6451f6db8e5608c7f2261 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 18 Jul 2025 19:33:16 +0100 Subject: [PATCH 60/67] kselftest/arm64: Allow sve-ptrace to run on SME only systems Currently the sve-ptrace test program only runs if the system supports SVE but since SME includes streaming SVE the tests it offers are valid even on a system that only supports SME. Since the tests already have individual hwcap checks just remove the top level test and rely on those. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250718-arm64-sve-ptrace-sme-only-v1-1-2a1121e51b1d@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 7f9b6a61d369..b22303778fb0 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -753,9 +753,6 @@ int main(void) ksft_print_header(); ksft_set_plan(EXPECTED_TESTS); - if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) - ksft_exit_skip("SVE not available\n"); - child = fork(); if (!child) return do_child(); From b84d2b27954f83c95c4b984d4f85574e8f51caa5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 18 Jul 2025 23:03:26 +0100 Subject: [PATCH 61/67] kselftest/arm64: Test FPSIMD format data writes via NT_ARM_SVE in fp-ptrace The NT_ARM_SVE register set supports two data formats, the native SVE one and an alternative format where we embed a copy of user_fpsimd_data as used for NT_PRFPREG in the SVE register set. The register data is set as for a write to NT_PRFPREG and changes in vector length and streaming mode are handled as for any NT_ARM_SVE write. This has not previously been tested by fp-ptrace, add coverage of it. We do not support writes in FPSIMD format for NT_ARM_SSVE so we skip the test for anything that would leave us in streaming mode. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250718-arm64-fp-ptrace-sve-fpsimd-v1-1-7ecda32aa297@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 66 +++++++++++++++++++- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index 191c47ca0ed8..c479c97dea1a 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1066,6 +1066,23 @@ static bool sve_write_supported(struct test_config *config) return true; } +static bool sve_write_fpsimd_supported(struct test_config *config) +{ + if (!sve_supported()) + return false; + + if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA)) + return false; + + if (config->svcr_expected & SVCR_SM) + return false; + + if (config->sme_vl_in != config->sme_vl_expected) + return false; + + return true; +} + static void fpsimd_write_expected(struct test_config *config) { int vl; @@ -1152,7 +1169,7 @@ static void sve_write_expected(struct test_config *config) } } -static void sve_write(pid_t child, struct test_config *config) +static void sve_write_sve(pid_t child, struct test_config *config) { struct user_sve_header *sve; struct iovec iov; @@ -1195,6 +1212,45 @@ static void sve_write(pid_t child, struct test_config *config) free(iov.iov_base); } +static void sve_write_fpsimd(pid_t child, struct test_config *config) +{ + struct user_sve_header *sve; + struct user_fpsimd_state *fpsimd; + struct iovec iov; + int ret, vl, vq; + + vl = vl_expected(config); + vq = __sve_vq_from_vl(vl); + + if (!vl) + return; + + iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, + SVE_PT_REGS_FPSIMD); + iov.iov_base = malloc(iov.iov_len); + if (!iov.iov_base) { + ksft_print_msg("Failed allocating %lu byte SVE write buffer\n", + iov.iov_len); + return; + } + memset(iov.iov_base, 0, iov.iov_len); + + sve = iov.iov_base; + sve->size = iov.iov_len; + sve->flags = SVE_PT_REGS_FPSIMD; + sve->vl = vl; + + fpsimd = iov.iov_base + SVE_PT_REGS_OFFSET; + memcpy(&fpsimd->vregs, v_expected, sizeof(v_expected)); + + ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_SVE, &iov); + if (ret != 0) + ksft_print_msg("Failed to write SVE: %s (%d)\n", + strerror(errno), errno); + + free(iov.iov_base); +} + static bool za_write_supported(struct test_config *config) { if ((config->svcr_in & SVCR_SM) != (config->svcr_expected & SVCR_SM)) @@ -1386,7 +1442,13 @@ static struct test_definition sve_test_defs[] = { .name = "SVE write", .supported = sve_write_supported, .set_expected_values = sve_write_expected, - .modify_values = sve_write, + .modify_values = sve_write_sve, + }, + { + .name = "SVE write FPSIMD format", + .supported = sve_write_fpsimd_supported, + .set_expected_values = fpsimd_write_expected, + .modify_values = sve_write_fpsimd, }, }; From b021f45d39f37f67005948a96abea84736890463 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 18 Jul 2025 23:14:50 +0100 Subject: [PATCH 62/67] kselftest/arm64: Test SME on SME only systems in fp-ptrace When checking that the vector extensions are supported fp-ptrace currently only checks for SVE being supported which means that we get into a confused half configured state for SME only systems. Check for SME as well. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250718-arm64-fp-ptrace-sme-only-v1-1-3b96dd19a503@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index c479c97dea1a..52d4d8fa2958 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1669,7 +1669,7 @@ int main(void) * Run the test set if there is no SVE or SME, with those we * have to pick a VL for each run. */ - if (!sve_supported()) { + if (!sve_supported() && !sme_supported()) { test_config.sve_vl_in = 0; test_config.sve_vl_expected = 0; test_config.sme_vl_in = 0; From aa7d3c8bc27d32dec940c924d6d270fa312e731f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 18 Jul 2025 23:14:51 +0100 Subject: [PATCH 63/67] kselftest/arm64: Fix SVE write data generation for SME only systems fp-ptrace does not handle SME only systems correctly when generating data, on SME only systems scenarios where we are not in streaming mode will not have an expected vector length. This leads to attempts to do memcpy()s of zero byte arrays which can crash, fix this by skipping generation of SVE data for cases where we do not expect to have an active vector length. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250718-arm64-fp-ptrace-sme-only-v1-2-3b96dd19a503@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index 52d4d8fa2958..a709485cf553 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1151,6 +1151,9 @@ static void sve_write_expected(struct test_config *config) int vl = vl_expected(config); int sme_vq = __sve_vq_from_vl(config->sme_vl_expected); + if (!vl) + return; + fill_random(z_expected, __SVE_ZREGS_SIZE(__sve_vq_from_vl(vl))); fill_random(p_expected, __SVE_PREGS_SIZE(__sve_vq_from_vl(vl))); @@ -1178,6 +1181,9 @@ static void sve_write_sve(pid_t child, struct test_config *config) vl = vl_expected(config); vq = __sve_vq_from_vl(vl); + if (!vl) + return; + iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); iov.iov_base = malloc(iov.iov_len); if (!iov.iov_base) { From 4752dcc156f2090143296ff45f8e35c8ec3e1730 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 18 Jul 2025 23:14:52 +0100 Subject: [PATCH 64/67] kselftest/arm64: Handle attempts to disable SM on SME only systems The ABI for disabling streaming mode via ptrace is to do a write via the SVE register set. Following the recent round of fixes to the ptrace code we don't support this operation on systems without SVE, which is detected as failures by fp-ptrace. Update the program so that it knows that this operation is not currently supported. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250718-arm64-fp-ptrace-sme-only-v1-3-3b96dd19a503@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index a709485cf553..124bc883365e 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1061,6 +1061,9 @@ static bool sve_write_supported(struct test_config *config) if (config->sme_vl_in != config->sme_vl_expected) { return false; } + + if (!sve_supported()) + return false; } return true; From 1a665a71ef0fb043c6cfafbf6a6cc9cdc2357505 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Thu, 19 Jun 2025 17:00:42 +0100 Subject: [PATCH 65/67] arm64: signal: Remove ISB when resetting POR_EL0 POR_EL0 is set to its most permissive value before setting up the signal frame, to ensure that uaccess succeeds regardless of the signal stack's pkey. We are now tolerant to spurious POE faults. This means that we do not strictly need to issue an ISB after updating POR_EL0, even when followed by uaccess. The question is whether a fault is likely to happen or not if the ISB is omitted; in this case the answer seems to be no. If the regular stack is used, then it should already be accessible. If the alternate signal stack is used, then a special (inaccessible) pkey may be used - the assumption is that this situation is very uncommon. Remove the ISB to speed up the regular path - this should not have any functional impact regardless of the scenario. Signed-off-by: Kevin Brodsky Link: https://lore.kernel.org/r/20250619160042.2499290-3-kevin.brodsky@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/signal.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 417140cd399b..db3f972f8cd9 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -95,8 +95,11 @@ static void save_reset_user_access_state(struct user_access_state *ua_state) ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0); write_sysreg_s(por_enable_all, SYS_POR_EL0); - /* Ensure that any subsequent uaccess observes the updated value */ - isb(); + /* + * No ISB required as we can tolerate spurious Overlay faults - + * the fault handler will check again based on the new value + * of POR_EL0. + */ } } From 8e7a67ca5a8013ac6055c776d7d1ef4c4047ee48 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 22 Jul 2025 10:55:48 +0100 Subject: [PATCH 66/67] arm64: Kconfig: Keep selects somewhat alphabetically ordered Recent patches selecting HAVE_RELIABLE_STACKTRACE and HAVE_LIVEPATCH added them to the end of the ARM64 Kconfig select list. Move them around to keep this list nearly alphabetically ordered. Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 110218542920..29759cfa0ea9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -234,6 +234,7 @@ config ARM64 select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_LIVEPATCH select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS @@ -242,6 +243,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_FUNCTION_ARG_ACCESS_API select MMU_GATHER_RCU_TABLE_FREE @@ -279,8 +281,6 @@ config ARM64 select HAVE_SOFTIRQ_ON_OWN_STACK select USER_STACKTRACE_SUPPORT select VDSO_GETRANDOM - select HAVE_RELIABLE_STACKTRACE - select HAVE_LIVEPATCH help ARM 64-bit (AArch64) Linux support. From cbbcfb94c55c02a8c4ce52b5da0770b5591a314c Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 18 Jul 2025 23:37:33 -0500 Subject: [PATCH 67/67] arm64/gcs: task_gcs_el0_enable() should use passed task Mark Rutland noticed that the task parameter is ignored and 'current' is being used instead. Since this is usually what its passed, it hasn't yet been causing problems but likely will as the code gets more testing. But, once this is fixed, it creates a new bug in copy_thread_gcs() since the gcs_el_mode isn't yet set for the task before its being checked. Move gcs_alloc_thread_stack() after the new task's gcs_el0_mode initialization to avoid this. Fixes: fc84bc5378a8 ("arm64/gcs: Context switch GCS state for EL0") Signed-off-by: Jeremy Linton Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250719043740.4548-2-jeremy.linton@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/gcs.h | 2 +- arch/arm64/kernel/process.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h index f50660603ecf..5bc432234d3a 100644 --- a/arch/arm64/include/asm/gcs.h +++ b/arch/arm64/include/asm/gcs.h @@ -58,7 +58,7 @@ static inline u64 gcsss2(void) static inline bool task_gcs_el0_enabled(struct task_struct *task) { - return current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE; + return task->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE; } void gcs_set_el0_mode(struct task_struct *task); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index f53608ecaf4b..665e1f96501b 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -305,13 +305,13 @@ static int copy_thread_gcs(struct task_struct *p, p->thread.gcs_base = 0; p->thread.gcs_size = 0; + p->thread.gcs_el0_mode = current->thread.gcs_el0_mode; + p->thread.gcs_el0_locked = current->thread.gcs_el0_locked; + gcs = gcs_alloc_thread_stack(p, args); if (IS_ERR_VALUE(gcs)) return PTR_ERR((void *)gcs); - p->thread.gcs_el0_mode = current->thread.gcs_el0_mode; - p->thread.gcs_el0_locked = current->thread.gcs_el0_locked; - return 0; }