From 807e5d383ca8bf6c0ac0fddd5edf9dda92e97c5d Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 31 Dec 2025 15:19:10 +0800 Subject: [PATCH 01/14] LoongArch: Complete CPUCFG registers definition According to the "LoongArch Reference Manual Volume 1: Basic Architecture", begin with LA664 CPU core there are more features supported which are indicated in CPUCFG2 and CPUCFG3. This patch completes the definitions of them so as to match the architecture specification. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarch.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index e6b8ff61c8cc..553c4dc7a156 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -94,6 +94,12 @@ #define CPUCFG2_LSPW BIT(21) #define CPUCFG2_LAM BIT(22) #define CPUCFG2_PTW BIT(24) +#define CPUCFG2_FRECIPE BIT(25) +#define CPUCFG2_DIV32 BIT(26) +#define CPUCFG2_LAM_BH BIT(27) +#define CPUCFG2_LAMCAS BIT(28) +#define CPUCFG2_LLACQ_SCREL BIT(29) +#define CPUCFG2_SCQ BIT(30) #define LOONGARCH_CPUCFG3 0x3 #define CPUCFG3_CCDMA BIT(0) @@ -108,6 +114,7 @@ #define CPUCFG3_SPW_HG_HF BIT(11) #define CPUCFG3_RVA BIT(12) #define CPUCFG3_RVAMAX GENMASK(16, 13) +#define CPUCFG3_DBAR_HINTS BIT(17) #define CPUCFG3_ALDORDER_CAP BIT(18) /* All address load ordered, capability */ #define CPUCFG3_ASTORDER_CAP BIT(19) /* All address store ordered, capability */ #define CPUCFG3_ALDORDER_STA BIT(20) /* All address load ordered, status */ From d5be446948b379f1d1a8e7bc6656d13f44c5c7b1 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 31 Dec 2025 15:19:10 +0800 Subject: [PATCH 02/14] LoongArch: Set correct protection_map[] for VM_NONE/VM_SHARED For 32BIT platform _PAGE_PROTNONE is 0, so set a VMA to be VM_NONE or VM_SHARED will make pages non-present, then cause Oops with kernel page fault. Fix it by set correct protection_map[] for VM_NONE/VM_SHARED, replacing _PAGE_PROTNONE with _PAGE_PRESENT. Signed-off-by: Huacai Chen --- arch/loongarch/mm/cache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c index 6be04d36ca07..496916845ff7 100644 --- a/arch/loongarch/mm/cache.c +++ b/arch/loongarch/mm/cache.c @@ -160,8 +160,8 @@ void cpu_cache_init(void) static const pgprot_t protection_map[16] = { [VM_NONE] = __pgprot(_CACHE_CC | _PAGE_USER | - _PAGE_PROTNONE | _PAGE_NO_EXEC | - _PAGE_NO_READ), + _PAGE_NO_EXEC | _PAGE_NO_READ | + (_PAGE_PROTNONE ? : _PAGE_PRESENT)), [VM_READ] = __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC), @@ -180,8 +180,8 @@ static const pgprot_t protection_map[16] = { [VM_EXEC | VM_WRITE | VM_READ] = __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT), [VM_SHARED] = __pgprot(_CACHE_CC | _PAGE_USER | - _PAGE_PROTNONE | _PAGE_NO_EXEC | - _PAGE_NO_READ), + _PAGE_NO_EXEC | _PAGE_NO_READ | + (_PAGE_PROTNONE ? : _PAGE_PRESENT)), [VM_SHARED | VM_READ] = __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC), From 1b2f4706c6a20ab6ac41533dd8032d54322076e3 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 31 Dec 2025 15:19:10 +0800 Subject: [PATCH 03/14] LoongArch: Use UNWIND_HINT_END_OF_STACK for entry points kernel_entry() and smpboot_entry() are the last frames for ORC unwinder, so it is proper to use the annotation UNWIND_HINT_END_OF_STACK for them. Link: https://lore.kernel.org/lkml/ots6w2ntyudj5ucs5eowncta2vmfssatpcqwzpar3ekk577hxi@j45dd4dmwx6x/ Suggested-by: Josh Poimboeuf Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/head.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index aba548db2446..ce7f6c04f4ab 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -42,6 +42,7 @@ SYM_DATA(kernel_fsize, .long _kernel_fsize); .align 12 SYM_CODE_START(kernel_entry) # kernel entry point + UNWIND_HINT_END_OF_STACK SETUP_TWINS SETUP_MODES t0 @@ -113,6 +114,7 @@ SYM_CODE_END(kernel_entry) * function after setting up the stack and tp registers. */ SYM_CODE_START(smpboot_entry) + UNWIND_HINT_END_OF_STACK SETUP_TWINS SETUP_MODES t0 From 6e5416d63bcba2bcc280d170eaf66775771892be Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 31 Dec 2025 15:19:10 +0800 Subject: [PATCH 04/14] LoongArch: Remove is_entry_func() and kernel_entry_end For now, the related code of is_entry_func() is useless, so they can be removed. Then the symbol kernel_entry_end is not used any more, so it can be removed too. Link: https://lore.kernel.org/lkml/kjiyla6qj3l7ezspitulrdoc5laj2e6hoecvd254hssnpddczm@g6nkaombh6va/ Suggested-by: Josh Poimboeuf Signed-off-by: Tiezhu Yang --- arch/loongarch/kernel/head.S | 2 -- arch/loongarch/kernel/unwind_orc.c | 11 ----------- 2 files changed, 13 deletions(-) diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index ce7f6c04f4ab..7f288e89573b 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -144,5 +144,3 @@ SYM_CODE_START(smpboot_entry) SYM_CODE_END(smpboot_entry) #endif /* CONFIG_SMP */ - -SYM_ENTRY(kernel_entry_end, SYM_L_GLOBAL, SYM_A_NONE) diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index 0d5fa64a2225..710f82d73797 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -348,14 +348,6 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, } EXPORT_SYMBOL_GPL(unwind_start); -static bool is_entry_func(unsigned long addr) -{ - extern u32 kernel_entry; - extern u32 kernel_entry_end; - - return addr >= (unsigned long)&kernel_entry && addr < (unsigned long)&kernel_entry_end; -} - static inline unsigned long bt_address(unsigned long ra) { extern unsigned long eentry; @@ -402,9 +394,6 @@ bool unwind_next_frame(struct unwind_state *state) /* Don't let modules unload while we're reading their ORC data. */ guard(rcu)(); - if (is_entry_func(state->pc)) - goto end; - orc = orc_find(state->pc); if (!orc) { /* From 4cd641a79e69270a062777f64a0dd330abb9044a Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 31 Dec 2025 15:19:19 +0800 Subject: [PATCH 05/14] LoongArch: Remove unnecessary checks for ORC unwinder According to the following function definitions, __kernel_text_address() already checks __module_text_address(), so it should remove the check of __module_text_address() in bt_address() at least. int __kernel_text_address(unsigned long addr) { if (kernel_text_address(addr)) return 1; ... return 0; } int kernel_text_address(unsigned long addr) { bool no_rcu; int ret = 1; ... if (is_module_text_address(addr)) goto out; ... return ret; } bool is_module_text_address(unsigned long addr) { guard(rcu)(); return __module_text_address(addr) != NULL; } Furthermore, there are two checks of __kernel_text_address(), one is in bt_address() and the other is after calling bt_address(), it looks like redundant. Handle the exception address first and then use __kernel_text_address() to validate the calculated address for exception or the normal address in bt_address(), then it can remove the check of __kernel_text_address() after calling bt_address(). Just remove unnecessary checks, no functional changes intended. Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/unwind_orc.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index 710f82d73797..8a6e3429a860 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -352,12 +352,6 @@ static inline unsigned long bt_address(unsigned long ra) { extern unsigned long eentry; - if (__kernel_text_address(ra)) - return ra; - - if (__module_text_address(ra)) - return ra; - if (ra >= eentry && ra < eentry + EXCCODE_INT_END * VECSIZE) { unsigned long func; unsigned long type = (ra - eentry) / VECSIZE; @@ -375,10 +369,13 @@ static inline unsigned long bt_address(unsigned long ra) break; } - return func + offset; + ra = func + offset; } - return ra; + if (__kernel_text_address(ra)) + return ra; + + return 0; } bool unwind_next_frame(struct unwind_state *state) @@ -501,9 +498,6 @@ bool unwind_next_frame(struct unwind_state *state) goto err; } - if (!__kernel_text_address(state->pc)) - goto err; - return true; err: From 9bdc1ab5e4ce6f066119018d8f69631a46f9c5a0 Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Wed, 31 Dec 2025 15:19:20 +0800 Subject: [PATCH 06/14] LoongArch: Enable exception fixup for specific ADE subcode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch allows the LoongArch BPF JIT to handle recoverable memory access errors generated by BPF_PROBE_MEM* instructions. When a BPF program performs memory access operations, the instructions it executes may trigger ADEM exceptions. The kernel’s built-in BPF exception table mechanism (EX_TYPE_BPF) will generate corresponding exception fixup entries in the JIT compilation phase; however, the architecture-specific trap handling function needs to proactively call the common fixup routine to achieve exception recovery. do_ade(): fix EX_TYPE_BPF memory access exceptions for BPF programs, ensure safe execution. Relevant test cases: illegal address access tests in module_attach and subprogs_extable of selftests/bpf. Signed-off-by: Chenghao Duan Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 004b8ebf0051..5d49b742e3bf 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -535,10 +535,15 @@ asmlinkage void noinstr do_fpe(struct pt_regs *regs, unsigned long fcsr) asmlinkage void noinstr do_ade(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); + unsigned int esubcode = FIELD_GET(CSR_ESTAT_ESUBCODE, regs->csr_estat); + + if ((esubcode == EXSUBCODE_ADEM) && fixup_exception(regs)) + goto out; die_if_kernel("Kernel ade access", regs); force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)regs->csr_badvaddr); +out: irqentry_exit(regs, state); } From 45cb47c628dfbd1994c619f3eac271a780602826 Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Wed, 31 Dec 2025 15:19:20 +0800 Subject: [PATCH 07/14] LoongArch: Refactor register restoration in ftrace_common_return Refactor the register restoration sequence in the ftrace_common_return function to clearly distinguish between the logic of normal returns and direct call returns in function tracing scenarios. The logic is as follows: 1. In the case of a normal return, the execution flow returns to the traced function, and ftrace must ensure that the register data is consistent with the state when the function was entered. ra = parent return address; t0 = traced function return address. 2. In the case of a direct call return, the execution flow jumps to the custom trampoline function, and ftrace must ensure that the register data is consistent with the state when ftrace was entered. ra = traced function return address; t0 = parent return address. Cc: stable@vger.kernel.org Fixes: 9cdc3b6a299c ("LoongArch: ftrace: Add direct call support") Signed-off-by: Chenghao Duan Signed-off-by: Huacai Chen --- arch/loongarch/kernel/mcount_dyn.S | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S index d6b474ad1d5e..5729c20e5b8b 100644 --- a/arch/loongarch/kernel/mcount_dyn.S +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -94,7 +94,6 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) * at the callsite, so there is no need to restore the T series regs. */ ftrace_common_return: - PTR_L ra, sp, PT_R1 PTR_L a0, sp, PT_R4 PTR_L a1, sp, PT_R5 PTR_L a2, sp, PT_R6 @@ -104,12 +103,17 @@ ftrace_common_return: PTR_L a6, sp, PT_R10 PTR_L a7, sp, PT_R11 PTR_L fp, sp, PT_R22 - PTR_L t0, sp, PT_ERA PTR_L t1, sp, PT_R13 - PTR_ADDI sp, sp, PT_SIZE bnez t1, .Ldirect + + PTR_L ra, sp, PT_R1 + PTR_L t0, sp, PT_ERA + PTR_ADDI sp, sp, PT_SIZE jr t0 .Ldirect: + PTR_L t0, sp, PT_R1 + PTR_L ra, sp, PT_ERA + PTR_ADDI sp, sp, PT_SIZE jr t1 SYM_CODE_END(ftrace_common) @@ -161,6 +165,8 @@ SYM_CODE_END(return_to_handler) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS SYM_CODE_START(ftrace_stub_direct_tramp) UNWIND_HINT_UNDEFINED - jr t0 + move t1, ra + move ra, t0 + jr t1 SYM_CODE_END(ftrace_stub_direct_tramp) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ From 3f5a238f24d7b75f9efe324d3539ad388f58536e Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Wed, 31 Dec 2025 15:19:20 +0800 Subject: [PATCH 08/14] LoongArch: BPF: Sign extend kfunc call arguments The kfunc calls are native calls so they should follow LoongArch calling conventions. Sign extend its arguments properly to avoid kernel panic. This is done by adding a new emit_abi_ext() helper. The emit_abi_ext() helper performs extension in place meaning a value already store in the target register (Note: this is different from the existing sign_extend() helper and thus we can't reuse it). Cc: stable@vger.kernel.org Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support") Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 16 ++++++++++++++++ arch/loongarch/net/bpf_jit.h | 26 ++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 8dc58781b8eb..5352d0c30fb2 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -950,6 +950,22 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off); } + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + const struct btf_func_model *m; + int i; + + m = bpf_jit_find_kfunc_model(ctx->prog, insn); + if (!m) + return -EINVAL; + + for (i = 0; i < m->nr_args; i++) { + u8 reg = regmap[BPF_REG_1 + i]; + bool sign = m->arg_flags[i] & BTF_FMODEL_SIGNED_ARG; + + emit_abi_ext(ctx, reg, m->arg_size[i], sign); + } + } + move_addr(ctx, t1, func_addr); emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0); diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h index 5697158fd164..75b6330030a9 100644 --- a/arch/loongarch/net/bpf_jit.h +++ b/arch/loongarch/net/bpf_jit.h @@ -88,6 +88,32 @@ static inline void emit_sext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, boo emit_insn(ctx, addiw, reg, reg, 0); } +/* Emit proper extension according to ABI requirements. + * Note that it requires a value of size `size` already resides in register `reg`. + */ +static inline void emit_abi_ext(struct jit_ctx *ctx, int reg, u8 size, bool sign) +{ + /* ABI requires unsigned char/short to be zero-extended */ + if (!sign && (size == 1 || size == 2)) + return; + + switch (size) { + case 1: + emit_insn(ctx, extwb, reg, reg); + break; + case 2: + emit_insn(ctx, extwh, reg, reg); + break; + case 4: + emit_insn(ctx, addiw, reg, reg, 0); + break; + case 8: + break; + default: + pr_warn("bpf_jit: invalid size %d for extension\n", size); + } +} + static inline void move_addr(struct jit_ctx *ctx, enum loongarch_gpr rd, u64 addr) { u64 imm_11_0, imm_31_12, imm_51_32, imm_63_52; From eb71f5c433e1c6dff089b315881dec40a88a7baf Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Wed, 31 Dec 2025 15:19:20 +0800 Subject: [PATCH 09/14] LoongArch: BPF: Zero-extend bpf_tail_call() index The bpf_tail_call() index should be treated as a u32 value. Let's zero-extend it to avoid calling wrong BPF progs. See similar fixes for x86 [1]) and arm64 ([2]) for more details. [1]: https://github.com/torvalds/linux/commit/90caccdd8cc0215705f18b92771b449b01e2474a [2]: https://github.com/torvalds/linux/commit/16338a9b3ac30740d49f5dfed81bac0ffa53b9c7 Cc: stable@vger.kernel.org Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support") Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 5352d0c30fb2..766ded335fd8 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -280,6 +280,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn) * goto out; */ tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] : ctx->offset[0]; + emit_zext_32(ctx, a2, true); + off = offsetof(struct bpf_array, map.max_entries); emit_insn(ctx, ldwu, t1, a1, off); /* bgeu $a2, $t1, jmp_offset */ From d314e1f48260cef3f869e3edc02a02c8a48b08e1 Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Wed, 31 Dec 2025 15:19:20 +0800 Subject: [PATCH 10/14] LoongArch: BPF: Save return address register ra to t0 before trampoline Modify the build_prologue() function to ensure the return address register ra is saved to t0 before entering trampoline operations. This change ensures the accurate return address handling when a BPF program calls another BPF program, preventing errors in the BPF-to-BPF call chain. Cc: stable@vger.kernel.org Fixes: 677e6123e3d2 ("LoongArch: BPF: Disable trampoline for kernel module function trace") Signed-off-by: Chenghao Duan Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 766ded335fd8..9729c0ff7bfc 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -139,6 +139,7 @@ static void build_prologue(struct jit_ctx *ctx) stack_adjust = round_up(stack_adjust, 16); stack_adjust += bpf_stack_adjust; + move_reg(ctx, LOONGARCH_GPR_T0, LOONGARCH_GPR_RA); /* Reserve space for the move_imm + jirl instruction */ for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) emit_insn(ctx, nop); From 61319d15a56093358c6822d30659fe2941f589f1 Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Wed, 31 Dec 2025 15:19:21 +0800 Subject: [PATCH 11/14] LoongArch: BPF: Adjust the jump offset of tail calls Call the next bpf prog and skip the first instruction of TCC initialization. A total of 7 instructions are skipped: 'move t0, ra' 1 inst 'move_imm + jirl' 5 inst 'addid REG_TCC, zero, 0' 1 inst Relevant test cases: the tailcalls test item in selftests/bpf. Cc: stable@vger.kernel.org Fixes: 677e6123e3d2 ("LoongArch: BPF: Disable trampoline for kernel module function trace") Signed-off-by: Chenghao Duan Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 9729c0ff7bfc..e6aeaec46969 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -239,7 +239,7 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) * Call the next bpf prog and skip the first instruction * of TCC initialization. */ - emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 6); + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 7); } } From 26138762d9a27a7f1c33f467c4123c600f64a36e Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Wed, 31 Dec 2025 15:19:21 +0800 Subject: [PATCH 12/14] LoongArch: BPF: Enable trampoline-based tracing for module functions Remove the previous restrictions that blocked the tracing of kernel module functions. Fix the issue that previously caused kernel lockups when attempting to trace module functions. Before entering the trampoline code, the return address register ra shall store the address of the next assembly instruction after the 'bl trampoline' instruction, which is the traced function address, and the register t0 shall store the parent function return address. Refine the trampoline return logic to ensure that register data remains correct when returning to both the traced function and the parent function. Before this patch was applied, the module_attach test in selftests/bpf encountered a deadlock issue. This was caused by an incorrect jump address after the trampoline execution, which resulted in an infinite loop within the module function. Cc: stable@vger.kernel.org Fixes: 677e6123e3d2 ("LoongArch: BPF: Disable trampoline for kernel module function trace") Signed-off-by: Chenghao Duan Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index e6aeaec46969..9f6e93343b6e 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1284,7 +1284,7 @@ static int emit_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) return 0; } - return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, (u64)target); + return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_RA : LOONGARCH_GPR_ZERO, (u64)target); } static int emit_call(struct jit_ctx *ctx, u64 addr) @@ -1641,14 +1641,12 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i /* To traced function */ /* Ftrace jump skips 2 NOP instructions */ - if (is_kernel_text((unsigned long)orig_call)) + if (is_kernel_text((unsigned long)orig_call) || + is_module_text_address((unsigned long)orig_call)) orig_call += LOONGARCH_FENTRY_NBYTES; /* Direct jump skips 5 NOP instructions */ else if (is_bpf_text_address((unsigned long)orig_call)) orig_call += LOONGARCH_BPF_FENTRY_NBYTES; - /* Module tracing not supported - cause kernel lockups */ - else if (is_module_text_address((unsigned long)orig_call)) - return -ENOTSUPP; if (flags & BPF_TRAMP_F_CALL_ORIG) { move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im); @@ -1741,12 +1739,16 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16); - if (flags & BPF_TRAMP_F_SKIP_FRAME) + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* return to parent function */ - emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); - else - /* return to traced function */ + move_reg(ctx, LOONGARCH_GPR_RA, LOONGARCH_GPR_T0); emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0); + } else { + /* return to traced function */ + move_reg(ctx, LOONGARCH_GPR_T1, LOONGARCH_GPR_RA); + move_reg(ctx, LOONGARCH_GPR_RA, LOONGARCH_GPR_T0); + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T1, 0); + } } ret = ctx->idx; From 73721d8676771c6c7b06d4e636cc053fc76afefd Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Wed, 31 Dec 2025 15:19:21 +0800 Subject: [PATCH 13/14] LoongArch: BPF: Enhance the bpf_arch_text_poke() function Enhance the bpf_arch_text_poke() function to enable accurate location of BPF program entry points. When modifying the entry point of a BPF program, skip the "move t0, ra" instruction to ensure the correct logic and copy of the jump address. Cc: stable@vger.kernel.org Fixes: 677e6123e3d2 ("LoongArch: BPF: Disable trampoline for kernel module function trace") Signed-off-by: Chenghao Duan Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 9f6e93343b6e..d1d5a65308b9 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1309,15 +1309,30 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, { int ret; bool is_call; + unsigned long size = 0; + unsigned long offset = 0; + void *image = NULL; + char namebuf[KSYM_NAME_LEN]; u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; /* Only poking bpf text is supported. Since kernel function entry * is set up by ftrace, we rely on ftrace to poke kernel functions. */ - if (!is_bpf_text_address((unsigned long)ip)) + if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) return -ENOTSUPP; + image = ip - offset; + + /* zero offset means we're poking bpf prog entry */ + if (offset == 0) { + /* skip to the nop instruction in bpf prog entry: + * move t0, ra + * nop + */ + ip = image + LOONGARCH_INSN_SIZE; + } + is_call = old_t == BPF_MOD_CALL; ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call); if (ret) From bb85d206be208bbf834883e948125a35ac59993a Mon Sep 17 00:00:00 2001 From: Chenghao Duan Date: Wed, 31 Dec 2025 15:19:25 +0800 Subject: [PATCH 14/14] samples/ftrace: Adjust LoongArch register restore order in direct calls Ensure that in the ftrace direct call logic, the CPU register state (with ra = parent return address) is restored to the correct state after the execution of the custom trampoline function and before returning to the traced function. Additionally, guarantee the correctness of the jump logic for jr t0 (traced function address). Cc: stable@vger.kernel.org Fixes: 9cdc3b6a299c ("LoongArch: ftrace: Add direct call support") Reported-by: Youling Tang Acked-by: Steven Rostedt (Google) Signed-off-by: Chenghao Duan Signed-off-by: Huacai Chen --- samples/ftrace/ftrace-direct-modify.c | 8 ++++---- samples/ftrace/ftrace-direct-multi-modify.c | 8 ++++---- samples/ftrace/ftrace-direct-multi.c | 4 ++-- samples/ftrace/ftrace-direct-too.c | 4 ++-- samples/ftrace/ftrace-direct.c | 4 ++-- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index da3a9f2091f5..1ba1927b548e 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -176,8 +176,8 @@ asm ( " st.d $t0, $sp, 0\n" " st.d $ra, $sp, 8\n" " bl my_direct_func1\n" -" ld.d $t0, $sp, 0\n" -" ld.d $ra, $sp, 8\n" +" ld.d $ra, $sp, 0\n" +" ld.d $t0, $sp, 8\n" " addi.d $sp, $sp, 16\n" " jr $t0\n" " .size my_tramp1, .-my_tramp1\n" @@ -189,8 +189,8 @@ asm ( " st.d $t0, $sp, 0\n" " st.d $ra, $sp, 8\n" " bl my_direct_func2\n" -" ld.d $t0, $sp, 0\n" -" ld.d $ra, $sp, 8\n" +" ld.d $ra, $sp, 0\n" +" ld.d $t0, $sp, 8\n" " addi.d $sp, $sp, 16\n" " jr $t0\n" " .size my_tramp2, .-my_tramp2\n" diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index 8f7986d698d8..7a7822dfeb50 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -199,8 +199,8 @@ asm ( " move $a0, $t0\n" " bl my_direct_func1\n" " ld.d $a0, $sp, 0\n" -" ld.d $t0, $sp, 8\n" -" ld.d $ra, $sp, 16\n" +" ld.d $ra, $sp, 8\n" +" ld.d $t0, $sp, 16\n" " addi.d $sp, $sp, 32\n" " jr $t0\n" " .size my_tramp1, .-my_tramp1\n" @@ -215,8 +215,8 @@ asm ( " move $a0, $t0\n" " bl my_direct_func2\n" " ld.d $a0, $sp, 0\n" -" ld.d $t0, $sp, 8\n" -" ld.d $ra, $sp, 16\n" +" ld.d $ra, $sp, 8\n" +" ld.d $t0, $sp, 16\n" " addi.d $sp, $sp, 32\n" " jr $t0\n" " .size my_tramp2, .-my_tramp2\n" diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c index db326c81a27d..3fe6ddaf0b69 100644 --- a/samples/ftrace/ftrace-direct-multi.c +++ b/samples/ftrace/ftrace-direct-multi.c @@ -131,8 +131,8 @@ asm ( " move $a0, $t0\n" " bl my_direct_func\n" " ld.d $a0, $sp, 0\n" -" ld.d $t0, $sp, 8\n" -" ld.d $ra, $sp, 16\n" +" ld.d $ra, $sp, 8\n" +" ld.d $t0, $sp, 16\n" " addi.d $sp, $sp, 32\n" " jr $t0\n" " .size my_tramp, .-my_tramp\n" diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index 3d0fa260332d..bf2411aa6fd7 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -143,8 +143,8 @@ asm ( " ld.d $a0, $sp, 0\n" " ld.d $a1, $sp, 8\n" " ld.d $a2, $sp, 16\n" -" ld.d $t0, $sp, 24\n" -" ld.d $ra, $sp, 32\n" +" ld.d $ra, $sp, 24\n" +" ld.d $t0, $sp, 32\n" " addi.d $sp, $sp, 48\n" " jr $t0\n" " .size my_tramp, .-my_tramp\n" diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 956834b0d19a..5368c8c39cbb 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -124,8 +124,8 @@ asm ( " st.d $ra, $sp, 16\n" " bl my_direct_func\n" " ld.d $a0, $sp, 0\n" -" ld.d $t0, $sp, 8\n" -" ld.d $ra, $sp, 16\n" +" ld.d $ra, $sp, 8\n" +" ld.d $t0, $sp, 16\n" " addi.d $sp, $sp, 32\n" " jr $t0\n" " .size my_tramp, .-my_tramp\n"