From da7870162f176ea38bff7380d67222e0430b0e35 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 14 Mar 2025 08:10:09 +0100 Subject: [PATCH 001/279] xtensa: Replace __ASSEMBLY__ with __ASSEMBLER__ in uapi headers __ASSEMBLY__ is only defined by the Makefile of the kernel, so this is not really useful for uapi headers (unless the userspace Makefile defines it, too). Let's switch to __ASSEMBLER__ which gets set automatically by the compiler when compiling assembly code. This is a completely mechanical patch (done with a simple "sed -i" statement). Cc: Chris Zankel Cc: Max Filippov Signed-off-by: Thomas Huth Message-Id: <20250314071013.1575167-39-thuth@redhat.com> Signed-off-by: Max Filippov --- arch/xtensa/include/uapi/asm/ptrace.h | 2 +- arch/xtensa/include/uapi/asm/signal.h | 6 +++--- arch/xtensa/include/uapi/asm/types.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/xtensa/include/uapi/asm/ptrace.h b/arch/xtensa/include/uapi/asm/ptrace.h index 9115e86ebc75..6e89ea301438 100644 --- a/arch/xtensa/include/uapi/asm/ptrace.h +++ b/arch/xtensa/include/uapi/asm/ptrace.h @@ -42,7 +42,7 @@ #define PTRACE_GETFDPIC_EXEC 0 #define PTRACE_GETFDPIC_INTERP 1 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct user_pt_regs { __u32 pc; diff --git a/arch/xtensa/include/uapi/asm/signal.h b/arch/xtensa/include/uapi/asm/signal.h index b8c824dd4b74..8060f1914400 100644 --- a/arch/xtensa/include/uapi/asm/signal.h +++ b/arch/xtensa/include/uapi/asm/signal.h @@ -19,7 +19,7 @@ #define _NSIG_BPW 32 #define _NSIG_WORDS (_NSIG / _NSIG_BPW) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -77,7 +77,7 @@ typedef struct { #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -106,5 +106,5 @@ typedef struct sigaltstack { __kernel_size_t ss_size; } stack_t; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_XTENSA_SIGNAL_H */ diff --git a/arch/xtensa/include/uapi/asm/types.h b/arch/xtensa/include/uapi/asm/types.h index 12db8ac38750..2e9217a06ebf 100644 --- a/arch/xtensa/include/uapi/asm/types.h +++ b/arch/xtensa/include/uapi/asm/types.h @@ -14,7 +14,7 @@ #include -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define __XTENSA_UL(x) (x) # define __XTENSA_UL_CONST(x) x #else @@ -23,7 +23,7 @@ # define __XTENSA_UL_CONST(x) ___XTENSA_UL_CONST(x) #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #endif From 44a4ef59d5506c6dc7599d876a3a1014697ec480 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 14 Mar 2025 08:10:10 +0100 Subject: [PATCH 002/279] xtensa: Replace __ASSEMBLY__ with __ASSEMBLER__ in non-uapi headers While the GCC and Clang compilers already define __ASSEMBLER__ automatically when compiling assembly code, __ASSEMBLY__ is a macro that only gets defined by the Makefiles in the kernel. This can be very confusing when switching between userspace and kernelspace coding, or when dealing with uapi headers that rather should use __ASSEMBLER__ instead. So let's standardize on the __ASSEMBLER__ macro that is provided by the compilers now. This is a completely mechanical patch (done with a simple "sed -i" statement). Cc: Chris Zankel Cc: Max Filippov Signed-off-by: Thomas Huth Message-Id: <20250314071013.1575167-40-thuth@redhat.com> Signed-off-by: Max Filippov --- arch/xtensa/include/asm/bootparam.h | 2 +- arch/xtensa/include/asm/cmpxchg.h | 4 ++-- arch/xtensa/include/asm/coprocessor.h | 8 ++++---- arch/xtensa/include/asm/current.h | 2 +- arch/xtensa/include/asm/ftrace.h | 8 ++++---- arch/xtensa/include/asm/initialize_mmu.h | 4 ++-- arch/xtensa/include/asm/jump_label.h | 4 ++-- arch/xtensa/include/asm/kasan.h | 2 +- arch/xtensa/include/asm/kmem_layout.h | 2 +- arch/xtensa/include/asm/page.h | 4 ++-- arch/xtensa/include/asm/pgtable.h | 8 ++++---- arch/xtensa/include/asm/processor.h | 4 ++-- arch/xtensa/include/asm/ptrace.h | 6 +++--- arch/xtensa/include/asm/signal.h | 4 ++-- arch/xtensa/include/asm/thread_info.h | 8 ++++---- arch/xtensa/include/asm/tlbflush.h | 4 ++-- 16 files changed, 37 insertions(+), 37 deletions(-) diff --git a/arch/xtensa/include/asm/bootparam.h b/arch/xtensa/include/asm/bootparam.h index 6333bd1eb9d2..a459ffbaf7ab 100644 --- a/arch/xtensa/include/asm/bootparam.h +++ b/arch/xtensa/include/asm/bootparam.h @@ -27,7 +27,7 @@ #define BP_TAG_FIRST 0x7B0B /* first tag with a version number */ #define BP_TAG_LAST 0x7E0B /* last tag */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* All records are aligned to 4 bytes */ diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h index 95e33a913962..b6db4838b175 100644 --- a/arch/xtensa/include/asm/cmpxchg.h +++ b/arch/xtensa/include/asm/cmpxchg.h @@ -11,7 +11,7 @@ #ifndef _XTENSA_CMPXCHG_H #define _XTENSA_CMPXCHG_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -220,6 +220,6 @@ __arch_xchg(unsigned long x, volatile void * ptr, int size) } } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _XTENSA_CMPXCHG_H */ diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h index 3b1a0d5d2169..e0447bcc52c5 100644 --- a/arch/xtensa/include/asm/coprocessor.h +++ b/arch/xtensa/include/asm/coprocessor.h @@ -16,7 +16,7 @@ #include #include -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # include .macro xchal_sa_start a b @@ -69,7 +69,7 @@ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* * XTENSA_HAVE_COPROCESSOR(x) returns 1 if coprocessor x is configured. @@ -87,7 +87,7 @@ #define XTENSA_HAVE_IO_PORTS \ XCHAL_CP_PORT_MASK -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Additional registers. @@ -151,5 +151,5 @@ void local_coprocessors_flush_release_all(void); #endif /* XTENSA_HAVE_COPROCESSORS */ -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _XTENSA_COPROCESSOR_H */ diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h index df275d554788..7b483538f066 100644 --- a/arch/xtensa/include/asm/current.h +++ b/arch/xtensa/include/asm/current.h @@ -13,7 +13,7 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h index 0ea4f84cd558..f676d209d110 100644 --- a/arch/xtensa/include/asm/ftrace.h +++ b/arch/xtensa/include/asm/ftrace.h @@ -12,20 +12,20 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned long return_address(unsigned level); #define ftrace_return_address(n) return_address(n) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((unsigned long)(_mcount)) #define MCOUNT_INSN_SIZE 3 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern void _mcount(void); #define mcount _mcount -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_FUNCTION_TRACER */ #endif /* _XTENSA_FTRACE_H */ diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h index 574795a20d6f..101bcb87e15b 100644 --- a/arch/xtensa/include/asm/initialize_mmu.h +++ b/arch/xtensa/include/asm/initialize_mmu.h @@ -34,7 +34,7 @@ #define CA_WRITEBACK (0x4) #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define XTENSA_HWVERSION_RC_2009_0 230000 @@ -240,6 +240,6 @@ .endm -#endif /*__ASSEMBLY__*/ +#endif /*__ASSEMBLER__*/ #endif /* _XTENSA_INITIALIZE_MMU_H */ diff --git a/arch/xtensa/include/asm/jump_label.h b/arch/xtensa/include/asm/jump_label.h index 46c8596259d2..38e3e2a9b0fb 100644 --- a/arch/xtensa/include/asm/jump_label.h +++ b/arch/xtensa/include/asm/jump_label.h @@ -4,7 +4,7 @@ #ifndef _ASM_XTENSA_JUMP_LABEL_H #define _ASM_XTENSA_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -61,5 +61,5 @@ struct jump_entry { jump_label_t key; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/xtensa/include/asm/kasan.h b/arch/xtensa/include/asm/kasan.h index 8d2b4248466f..0da91b64fab9 100644 --- a/arch/xtensa/include/asm/kasan.h +++ b/arch/xtensa/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_KASAN diff --git a/arch/xtensa/include/asm/kmem_layout.h b/arch/xtensa/include/asm/kmem_layout.h index 6fc05cba61a2..6949724625a0 100644 --- a/arch/xtensa/include/asm/kmem_layout.h +++ b/arch/xtensa/include/asm/kmem_layout.h @@ -80,7 +80,7 @@ #if (!XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY) && defined(CONFIG_USE_OF) #define XCHAL_KIO_PADDR xtensa_get_kio_paddr() -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern unsigned long xtensa_kio_paddr; static inline unsigned long xtensa_get_kio_paddr(void) diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h index 644413792bf3..20655174b111 100644 --- a/arch/xtensa/include/asm/page.h +++ b/arch/xtensa/include/asm/page.h @@ -80,7 +80,7 @@ #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define __pgprot(x) (x) @@ -172,7 +172,7 @@ static inline unsigned long ___pa(unsigned long va) #define page_to_virt(page) __va(page_to_pfn(page) << PAGE_SHIFT) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #include #endif /* _XTENSA_PAGE_H */ diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 1647a7cc3fbf..46b634934f13 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -203,7 +203,7 @@ * What follows is the closest we can get by reasonable means.. * See linux/mm/mmap.c for protection_map[] array that uses these definitions. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) @@ -372,10 +372,10 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return pte; } -#endif /* !defined (__ASSEMBLY__) */ +#endif /* !defined (__ASSEMBLER__) */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* Assembly macro _PGD_INDEX is the same as C pgd_index(unsigned long), * _PGD_OFFSET as C pgd_offset(struct mm_struct*, unsigned long), @@ -414,7 +414,7 @@ void update_mmu_tlb_range(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, unsigned int nr); #define update_mmu_tlb_range update_mmu_tlb_range -#endif /* !defined (__ASSEMBLY__) */ +#endif /* !defined (__ASSEMBLER__) */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG #define __HAVE_ARCH_PTEP_GET_AND_CLEAR diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 47b5df86ab5c..60a211356335 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -105,7 +105,7 @@ #error Unsupported xtensa ABI #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #if defined(__XTENSA_WINDOWED_ABI__) @@ -263,5 +263,5 @@ static inline unsigned long get_er(unsigned long addr) #endif /* XCHAL_HAVE_EXTERN_REGS */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _XTENSA_PROCESSOR_H */ diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h index 86c70117371b..f0f5e7c224c9 100644 --- a/arch/xtensa/include/asm/ptrace.h +++ b/arch/xtensa/include/asm/ptrace.h @@ -41,7 +41,7 @@ #define NO_SYSCALL (-1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -109,11 +109,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) int do_syscall_trace_enter(struct pt_regs *regs); void do_syscall_trace_leave(struct pt_regs *regs); -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ # include #define PT_REGS_OFFSET (KERNEL_STACK_SIZE - PT_USER_SIZE) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _XTENSA_PTRACE_H */ diff --git a/arch/xtensa/include/asm/signal.h b/arch/xtensa/include/asm/signal.h index de169b4eaeef..d301e68573cc 100644 --- a/arch/xtensa/include/asm/signal.h +++ b/arch/xtensa/include/asm/signal.h @@ -14,10 +14,10 @@ #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define __ARCH_HAS_SA_RESTORER #include -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _XTENSA_SIGNAL_H */ diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index e0dffcc43b9e..5b74dfc35ef9 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -16,7 +16,7 @@ #define CURRENT_SHIFT KERNEL_STACK_SHIFT -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ # include #endif @@ -28,7 +28,7 @@ * must also be changed */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #if XTENSA_HAVE_COPROCESSORS @@ -80,7 +80,7 @@ struct thread_info { * macros/functions for gaining access to the thread information structure */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define INIT_THREAD_INFO(tsk) \ { \ @@ -99,7 +99,7 @@ static __always_inline struct thread_info *current_thread_info(void) return ti; } -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg,sp) \ diff --git a/arch/xtensa/include/asm/tlbflush.h b/arch/xtensa/include/asm/tlbflush.h index 573df8cea200..3edaebeef423 100644 --- a/arch/xtensa/include/asm/tlbflush.h +++ b/arch/xtensa/include/asm/tlbflush.h @@ -20,7 +20,7 @@ #define ITLB_HIT_BIT 3 #define DTLB_HIT_BIT 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* TLB flushing: * @@ -201,5 +201,5 @@ static inline unsigned long read_itlb_translation (int way) return tmp; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _XTENSA_TLBFLUSH_H */ From 5d939fbdd480cdf276eccc01eda3ed41e37d3f8a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 9 Jun 2025 23:34:04 -0400 Subject: [PATCH 003/279] tools/power turbostat: regression fix: --show C1E% The new default idle counter groupings broke "--show C1E%" (or any other C-state %) Also delete a stray debug printf from the same offending commit. Reported-by: Zhang Rui Fixes: ec4acd3166d8 ("tools/power turbostat: disable "cpuidle" invocation counters, by default") Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 5230e072e414..33a54a9e0781 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2429,7 +2429,6 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } if (i == MAX_BIC) { - fprintf(stderr, "deferred %s\n", name_list); if (mode == SHOW_LIST) { deferred_add_names[deferred_add_index++] = name_list; if (deferred_add_index >= MAX_DEFERRED) { @@ -10537,9 +10536,6 @@ void probe_cpuidle_residency(void) int min_state = 1024, max_state = 0; char *sp; - if (!DO_BIC(BIC_pct_idle)) - return; - for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); From f393a761763c542761abcf978252d431269366d6 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 8 Jul 2025 14:56:23 +0200 Subject: [PATCH 004/279] efi: add ovmf debug log driver Recent OVMF versions (edk2-stable202508 + newer) can write their debug log to a memory buffer. This driver exposes the log content via sysfs (/sys/firmware/efi/ovmf_debug_log). Signed-off-by: Gerd Hoffmann Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/Kconfig | 8 ++ drivers/firmware/efi/Makefile | 1 + drivers/firmware/efi/efi.c | 8 ++ drivers/firmware/efi/ovmf-debug-log.c | 111 ++++++++++++++++++++++++++ include/linux/efi.h | 4 + 5 files changed, 132 insertions(+) create mode 100644 drivers/firmware/efi/ovmf-debug-log.c diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index db8c5c03d3a2..eb1bff6968a5 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -263,6 +263,14 @@ config EFI_COCO_SECRET virt/coco/efi_secret module to access the secrets, which in turn allows userspace programs to access the injected secrets. +config OVMF_DEBUG_LOG + bool "Expose OVMF firmware debug log via sysfs" + depends on EFI + help + Recent OVMF versions (edk2-stable202508 + newer) can write + their debug log to a memory buffer. This driver exposes the + log content via sysfs (/sys/firmware/efi/ovmf_debug_log). + config UNACCEPTED_MEMORY bool depends on EFI_STUB diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index a2d0009560d0..8efbcf699e4f 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o obj-$(CONFIG_EFI_EMBEDDED_FIRMWARE) += embedded-firmware.o obj-$(CONFIG_LOAD_UEFI_KEYS) += mokvar-table.o +obj-$(CONFIG_OVMF_DEBUG_LOG) += ovmf-debug-log.o obj-$(CONFIG_SYSFB) += sysfb_efi.o diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index e57bff702b5f..1ce428e2ac8a 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -45,6 +45,7 @@ struct efi __read_mostly efi = { .esrt = EFI_INVALID_TABLE_ADDR, .tpm_log = EFI_INVALID_TABLE_ADDR, .tpm_final_log = EFI_INVALID_TABLE_ADDR, + .ovmf_debug_log = EFI_INVALID_TABLE_ADDR, #ifdef CONFIG_LOAD_UEFI_KEYS .mokvar_table = EFI_INVALID_TABLE_ADDR, #endif @@ -473,6 +474,10 @@ static int __init efisubsys_init(void) platform_device_register_simple("efi_secret", 0, NULL, 0); #endif + if (IS_ENABLED(CONFIG_OVMF_DEBUG_LOG) && + efi.ovmf_debug_log != EFI_INVALID_TABLE_ADDR) + ovmf_log_probe(efi.ovmf_debug_log); + return 0; err_remove_group: @@ -617,6 +622,9 @@ static const efi_config_table_type_t common_tables[] __initconst = { {LINUX_EFI_MEMRESERVE_TABLE_GUID, &mem_reserve, "MEMRESERVE" }, {LINUX_EFI_INITRD_MEDIA_GUID, &initrd, "INITRD" }, {EFI_RT_PROPERTIES_TABLE_GUID, &rt_prop, "RTPROP" }, +#ifdef CONFIG_OVMF_DEBUG_LOG + {OVMF_MEMORY_LOG_TABLE_GUID, &efi.ovmf_debug_log, "OvmfDebugLog" }, +#endif #ifdef CONFIG_EFI_RCI2_TABLE {DELLEMC_EFI_RCI2_TABLE_GUID, &rci2_table_phys }, #endif diff --git a/drivers/firmware/efi/ovmf-debug-log.c b/drivers/firmware/efi/ovmf-debug-log.c new file mode 100644 index 000000000000..5b2471ffaeed --- /dev/null +++ b/drivers/firmware/efi/ovmf-debug-log.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include + +#define OVMF_DEBUG_LOG_MAGIC1 0x3167646d666d766f // "ovmfmdg1" +#define OVMF_DEBUG_LOG_MAGIC2 0x3267646d666d766f // "ovmfmdg2" + +struct ovmf_debug_log_header { + u64 magic1; + u64 magic2; + u64 hdr_size; + u64 log_size; + u64 lock; // edk2 spinlock + u64 head_off; + u64 tail_off; + u64 truncated; + u8 fw_version[128]; +}; + +static struct ovmf_debug_log_header *hdr; +static u8 *logbuf; +static u64 logbufsize; + +static ssize_t ovmf_log_read(struct file *filp, struct kobject *kobj, + const struct bin_attribute *attr, char *buf, + loff_t offset, size_t count) +{ + u64 start, end; + + start = hdr->head_off + offset; + if (hdr->head_off > hdr->tail_off && start >= hdr->log_size) + start -= hdr->log_size; + + end = start + count; + if (start > hdr->tail_off) { + if (end > hdr->log_size) + end = hdr->log_size; + } else { + if (end > hdr->tail_off) + end = hdr->tail_off; + } + + if (start > logbufsize || end > logbufsize) + return 0; + if (start >= end) + return 0; + + memcpy(buf, logbuf + start, end - start); + return end - start; +} + +static struct bin_attribute ovmf_log_bin_attr = { + .attr = { + .name = "ovmf_debug_log", + .mode = 0444, + }, + .read = ovmf_log_read, +}; + +int __init ovmf_log_probe(unsigned long ovmf_debug_log_table) +{ + int ret = -EINVAL; + u64 size; + + /* map + verify header */ + hdr = memremap(ovmf_debug_log_table, sizeof(*hdr), MEMREMAP_WB); + if (!hdr) { + pr_err("OVMF debug log: header map failed\n"); + return -EINVAL; + } + + if (hdr->magic1 != OVMF_DEBUG_LOG_MAGIC1 || + hdr->magic2 != OVMF_DEBUG_LOG_MAGIC2) { + printk(KERN_ERR "OVMF debug log: magic mismatch\n"); + goto err_unmap; + } + + size = hdr->hdr_size + hdr->log_size; + pr_info("OVMF debug log: firmware version: \"%s\"\n", hdr->fw_version); + pr_info("OVMF debug log: buffer size: %lluk\n", size / 1024); + + /* map complete log buffer */ + memunmap(hdr); + hdr = memremap(ovmf_debug_log_table, size, MEMREMAP_WB); + if (!hdr) { + pr_err("OVMF debug log: buffer map failed\n"); + return -EINVAL; + } + logbuf = (void *)hdr + hdr->hdr_size; + logbufsize = hdr->log_size; + + ovmf_log_bin_attr.size = size; + ret = sysfs_create_bin_file(efi_kobj, &ovmf_log_bin_attr); + if (ret != 0) { + pr_err("OVMF debug log: sysfs register failed\n"); + goto err_unmap; + } + + return 0; + +err_unmap: + memunmap(hdr); + return ret; +} diff --git a/include/linux/efi.h b/include/linux/efi.h index 7d63d1d75f22..50db7df0efab 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -439,6 +439,7 @@ void efi_native_runtime_setup(void); /* OVMF protocol GUIDs */ #define OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID EFI_GUID(0xc5a010fe, 0x38a7, 0x4531, 0x8a, 0x4a, 0x05, 0x00, 0xd2, 0xfd, 0x16, 0x49) +#define OVMF_MEMORY_LOG_TABLE_GUID EFI_GUID(0x95305139, 0xb20f, 0x4723, 0x84, 0x25, 0x62, 0x7c, 0x88, 0x8f, 0xf1, 0x21) typedef struct { efi_guid_t guid; @@ -642,6 +643,7 @@ extern struct efi { unsigned long esrt; /* ESRT table */ unsigned long tpm_log; /* TPM2 Event Log table */ unsigned long tpm_final_log; /* TPM2 Final Events Log table */ + unsigned long ovmf_debug_log; unsigned long mokvar_table; /* MOK variable config table */ unsigned long coco_secret; /* Confidential computing secret table */ unsigned long unaccepted; /* Unaccepted memory table */ @@ -1344,6 +1346,8 @@ bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table) umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n); +int ovmf_log_probe(unsigned long ovmf_debug_log_table); + /* * efivar ops event type */ From 33927f3d0ecdcff06326d6e4edb6166aed42811c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 12 Jul 2025 06:02:31 +0100 Subject: [PATCH 005/279] habanalabs: fix UAF in export_dmabuf() As soon as we'd inserted a file reference into descriptor table, another thread could close it. That's fine for the case when all we are doing is returning that descriptor to userland (it's a race, but it's a userland race and there's nothing the kernel can do about it). However, if we follow fd_install() with any kind of access to objects that would be destroyed on close (be it the struct file itself or anything destroyed by its ->release()), we have a UAF. dma_buf_fd() is a combination of reserving a descriptor and fd_install(). habanalabs export_dmabuf() calls it and then proceeds to access the objects destroyed on close. In particular, it grabs an extra reference to another struct file that will be dropped as part of ->release() for ours; that "will be" is actually "might have already been". Fix that by reserving descriptor before anything else and do fd_install() only when everything had been set up. As a side benefit, we no longer have the failure exit with file already created, but reference to underlying file (as well as ->dmabuf_export_cnt, etc.) not grabbed yet; unlike dma_buf_fd(), fd_install() can't fail. Fixes: db1a8dd916aa ("habanalabs: add support for dma-buf exporter") Signed-off-by: Al Viro --- drivers/accel/habanalabs/common/memory.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c index 601fdbe70179..61472a381904 100644 --- a/drivers/accel/habanalabs/common/memory.c +++ b/drivers/accel/habanalabs/common/memory.c @@ -1829,9 +1829,6 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf) struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv; struct hl_ctx *ctx; - if (!hl_dmabuf) - return; - ctx = hl_dmabuf->ctx; if (hl_dmabuf->memhash_hnode) @@ -1859,7 +1856,12 @@ static int export_dmabuf(struct hl_ctx *ctx, { DEFINE_DMA_BUF_EXPORT_INFO(exp_info); struct hl_device *hdev = ctx->hdev; - int rc, fd; + CLASS(get_unused_fd, fd)(flags); + + if (fd < 0) { + dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n", fd); + return fd; + } exp_info.ops = &habanalabs_dmabuf_ops; exp_info.size = total_size; @@ -1872,13 +1874,6 @@ static int export_dmabuf(struct hl_ctx *ctx, return PTR_ERR(hl_dmabuf->dmabuf); } - fd = dma_buf_fd(hl_dmabuf->dmabuf, flags); - if (fd < 0) { - dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf, %d\n", fd); - rc = fd; - goto err_dma_buf_put; - } - hl_dmabuf->ctx = ctx; hl_ctx_get(hl_dmabuf->ctx); atomic_inc(&ctx->hdev->dmabuf_export_cnt); @@ -1890,13 +1885,9 @@ static int export_dmabuf(struct hl_ctx *ctx, get_file(ctx->hpriv->file_priv->filp); *dmabuf_fd = fd; + fd_install(take_fd(fd), hl_dmabuf->dmabuf->file); return 0; - -err_dma_buf_put: - hl_dmabuf->dmabuf->priv = NULL; - dma_buf_put(hl_dmabuf->dmabuf); - return rc; } static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset) From ce60ab3964782df9ba34f0a64c0bc766dd508bde Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 May 2025 06:45:45 -0400 Subject: [PATCH 006/279] Expand the type of nfs_fattr->valid We need to be able to track more than 32 attributes per inode. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/1e3405fca54efd0be7c91c1da77917b94f5dfcc4.1748515333.git.bcodding@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- include/linux/nfs_fs_sb.h | 2 +- include/linux/nfs_xdr.h | 54 +++++++++++++++++++-------------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a2fa6bc4d74e..17f5dcda2a00 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2209,7 +2209,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) bool attr_changed = false; bool have_delegation; - dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", + dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%llx)\n", __func__, inode->i_sb->s_id, inode->i_ino, nfs_display_fhandle_hash(NFS_FH(inode)), atomic_read(&inode->i_count), fattr->valid); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 63141320c2a8..d7895eeccea3 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -172,8 +172,8 @@ struct nfs_server { #define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000 #define NFS_MOUNT_NETUNREACH_FATAL 0x40000000 - unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ + __u64 fattr_valid; /* Valid attributes */ unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 67f6632f723b..9cacbbd14787 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -45,7 +45,7 @@ struct nfs4_threshold { }; struct nfs_fattr { - unsigned int valid; /* which fields are valid */ + __u64 valid; /* which fields are valid */ umode_t mode; __u32 nlink; kuid_t uid; @@ -80,32 +80,32 @@ struct nfs_fattr { struct nfs4_label *label; }; -#define NFS_ATTR_FATTR_TYPE (1U << 0) -#define NFS_ATTR_FATTR_MODE (1U << 1) -#define NFS_ATTR_FATTR_NLINK (1U << 2) -#define NFS_ATTR_FATTR_OWNER (1U << 3) -#define NFS_ATTR_FATTR_GROUP (1U << 4) -#define NFS_ATTR_FATTR_RDEV (1U << 5) -#define NFS_ATTR_FATTR_SIZE (1U << 6) -#define NFS_ATTR_FATTR_PRESIZE (1U << 7) -#define NFS_ATTR_FATTR_BLOCKS_USED (1U << 8) -#define NFS_ATTR_FATTR_SPACE_USED (1U << 9) -#define NFS_ATTR_FATTR_FSID (1U << 10) -#define NFS_ATTR_FATTR_FILEID (1U << 11) -#define NFS_ATTR_FATTR_ATIME (1U << 12) -#define NFS_ATTR_FATTR_MTIME (1U << 13) -#define NFS_ATTR_FATTR_CTIME (1U << 14) -#define NFS_ATTR_FATTR_PREMTIME (1U << 15) -#define NFS_ATTR_FATTR_PRECTIME (1U << 16) -#define NFS_ATTR_FATTR_CHANGE (1U << 17) -#define NFS_ATTR_FATTR_PRECHANGE (1U << 18) -#define NFS_ATTR_FATTR_V4_LOCATIONS (1U << 19) -#define NFS_ATTR_FATTR_V4_REFERRAL (1U << 20) -#define NFS_ATTR_FATTR_MOUNTPOINT (1U << 21) -#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID (1U << 22) -#define NFS_ATTR_FATTR_OWNER_NAME (1U << 23) -#define NFS_ATTR_FATTR_GROUP_NAME (1U << 24) -#define NFS_ATTR_FATTR_V4_SECURITY_LABEL (1U << 25) +#define NFS_ATTR_FATTR_TYPE BIT_ULL(0) +#define NFS_ATTR_FATTR_MODE BIT_ULL(1) +#define NFS_ATTR_FATTR_NLINK BIT_ULL(2) +#define NFS_ATTR_FATTR_OWNER BIT_ULL(3) +#define NFS_ATTR_FATTR_GROUP BIT_ULL(4) +#define NFS_ATTR_FATTR_RDEV BIT_ULL(5) +#define NFS_ATTR_FATTR_SIZE BIT_ULL(6) +#define NFS_ATTR_FATTR_PRESIZE BIT_ULL(7) +#define NFS_ATTR_FATTR_BLOCKS_USED BIT_ULL(8) +#define NFS_ATTR_FATTR_SPACE_USED BIT_ULL(9) +#define NFS_ATTR_FATTR_FSID BIT_ULL(10) +#define NFS_ATTR_FATTR_FILEID BIT_ULL(11) +#define NFS_ATTR_FATTR_ATIME BIT_ULL(12) +#define NFS_ATTR_FATTR_MTIME BIT_ULL(13) +#define NFS_ATTR_FATTR_CTIME BIT_ULL(14) +#define NFS_ATTR_FATTR_PREMTIME BIT_ULL(15) +#define NFS_ATTR_FATTR_PRECTIME BIT_ULL(16) +#define NFS_ATTR_FATTR_CHANGE BIT_ULL(17) +#define NFS_ATTR_FATTR_PRECHANGE BIT_ULL(18) +#define NFS_ATTR_FATTR_V4_LOCATIONS BIT_ULL(19) +#define NFS_ATTR_FATTR_V4_REFERRAL BIT_ULL(20) +#define NFS_ATTR_FATTR_MOUNTPOINT BIT_ULL(21) +#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID BIT_ULL(22) +#define NFS_ATTR_FATTR_OWNER_NAME BIT_ULL(23) +#define NFS_ATTR_FATTR_GROUP_NAME BIT_ULL(24) +#define NFS_ATTR_FATTR_V4_SECURITY_LABEL BIT_ULL(25) #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ | NFS_ATTR_FATTR_MODE \ From 1c7ae2dd3f0e6d07ec0a5a348f2561f2171b9c81 Mon Sep 17 00:00:00 2001 From: Anne Marie Merritt Date: Thu, 29 May 2025 06:45:46 -0400 Subject: [PATCH 007/279] nfs: Add timecreate to nfs inode Add tracking of the create time (a.k.a. btime) along with corresponding bitfields, request, and decode xdr routines. Signed-off-by: Anne Marie Merritt Signed-off-by: Lance Shelton Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/1e3677b0655fa2bbaba0817b41d111d94a06e5ee.1748515333.git.bcodding@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 17 +++++++++++++++-- fs/nfs/nfs4proc.c | 14 +++++++++++++- fs/nfs/nfs4xdr.c | 24 ++++++++++++++++++++++++ fs/nfs/nfstrace.h | 3 ++- include/linux/nfs_fs.h | 8 ++++++++ include/linux/nfs_xdr.h | 3 +++ 6 files changed, 65 insertions(+), 4 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 17f5dcda2a00..c5462aed6bf5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -197,6 +197,7 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) if (!(flags & NFS_INO_REVAL_FORCED)) flags &= ~(NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_BTIME | NFS_INO_INVALID_XATTR); flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE); } @@ -522,6 +523,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode_set_atime(inode, 0, 0); inode_set_mtime(inode, 0, 0); inode_set_ctime(inode, 0, 0); + memset(&nfsi->btime, 0, sizeof(nfsi->btime)); inode_set_iversion_raw(inode, 0); inode->i_size = 0; clear_nlink(inode); @@ -545,6 +547,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode_set_ctime_to_ts(inode, fattr->ctime); else if (fattr_supported & NFS_ATTR_FATTR_CTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); + if (fattr->valid & NFS_ATTR_FATTR_BTIME) + nfsi->btime = fattr->btime; + else if (fattr_supported & NFS_ATTR_FATTR_BTIME) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BTIME); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode_set_iversion_raw(inode, fattr->change_attr); else @@ -1943,7 +1949,7 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr, NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER | - NFS_INO_INVALID_NLINK; + NFS_INO_INVALID_NLINK | NFS_INO_INVALID_BTIME; unsigned long cache_validity = NFS_I(inode)->cache_validity; enum nfs4_change_attr_type ctype = NFS_SERVER(inode)->change_attr_type; @@ -2304,7 +2310,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK | NFS_INO_INVALID_MODE - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_OTHER + | NFS_INO_INVALID_BTIME; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); attr_changed = true; @@ -2338,6 +2345,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_CTIME; + if (fattr->valid & NFS_ATTR_FATTR_BTIME) + nfsi->btime = fattr->btime; + else if (fattr_supported & NFS_ATTR_FATTR_BTIME) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_BTIME; + /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { new_isize = nfs_size_to_loff_t(fattr->size); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 341740fa293d..92f1b2601b67 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -222,6 +222,7 @@ const u32 nfs4_fattr_bitmap[3] = { | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_CREATE | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_MOUNTED_ON_FILEID, @@ -243,6 +244,7 @@ static const u32 nfs4_pnfs_open_bitmap[3] = { | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_CREATE | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY, FATTR4_WORD2_MDSTHRESHOLD @@ -323,6 +325,9 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, if (!(cache_validity & NFS_INO_INVALID_OTHER)) dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP); + if (!(cache_validity & NFS_INO_INVALID_BTIME)) + dst[1] &= ~FATTR4_WORD1_TIME_CREATE; + if (nfs_have_delegated_mtime(inode)) { if (!(cache_validity & NFS_INO_INVALID_ATIME)) dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET); @@ -1307,7 +1312,8 @@ nfs4_update_changeattr_locked(struct inode *inode, NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK | - NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR; + NFS_INO_INVALID_MODE | NFS_INO_INVALID_BTIME | + NFS_INO_INVALID_XATTR; nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); } nfsi->attrtimeo_timestamp = jiffies; @@ -4047,6 +4053,10 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->fattr_valid &= ~NFS_ATTR_FATTR_CTIME; if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)) server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)) + server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_CREATE)) + server->fattr_valid &= ~NFS_ATTR_FATTR_BTIME; memcpy(server->attr_bitmask_nl, res.attr_bitmask, sizeof(server->attr_bitmask)); server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; @@ -5781,6 +5791,8 @@ void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[], bitmask[1] |= FATTR4_WORD1_TIME_MODIFY; if (cache_validity & NFS_INO_INVALID_BLOCKS) bitmask[1] |= FATTR4_WORD1_SPACE_USED; + if (cache_validity & NFS_INO_INVALID_BTIME) + bitmask[1] |= FATTR4_WORD1_TIME_CREATE; if (cache_validity & NFS_INO_INVALID_SIZE) bitmask[0] |= FATTR4_WORD0_SIZE; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 318afde38057..49ff98571fa5 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1623,6 +1623,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_CREATE | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY; attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; @@ -4207,6 +4208,24 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str return status; } +static int decode_attr_time_create(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) +{ + int status = 0; + + time->tv_sec = 0; + time->tv_nsec = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_CREATE - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_TIME_CREATE)) { + status = decode_attr_time(xdr, time); + if (status == 0) + status = NFS_ATTR_FATTR_BTIME; + bitmap[1] &= ~FATTR4_WORD1_TIME_CREATE; + } + dprintk("%s: btime=%lld\n", __func__, time->tv_sec); + return status; +} + static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec64 *time) { int status = 0; @@ -4781,6 +4800,11 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; + status = decode_attr_time_create(xdr, bitmap, &fattr->btime); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime); if (status < 0) goto xdr_error; diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 7a058bd8c566..f49f064c5ee5 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -32,7 +32,8 @@ { NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \ { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \ { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \ - { NFS_INO_INVALID_MODE, "INVALID_MODE" }) + { NFS_INO_INVALID_MODE, "INVALID_MODE" }, \ + { NFS_INO_INVALID_BTIME, "INVALID_BTIME" }) #define nfs_show_nfsi_flags(v) \ __print_flags(v, "|", \ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 67ae2c3f41d2..c585939b6cd6 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -160,6 +160,12 @@ struct nfs_inode { unsigned long flags; /* atomic bit ops */ unsigned long cache_validity; /* bit mask */ + /* + * NFS Attributes not included in struct inode + */ + + struct timespec64 btime; + /* * read_cache_jiffies is when we started read-caching this inode. * attrtimeo is for how long the cached information is assumed @@ -316,10 +322,12 @@ struct nfs4_copy_state { #define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ #define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */ #define NFS_INO_INVALID_MODE BIT(17) /* cached mode is invalid */ +#define NFS_INO_INVALID_BTIME BIT(18) /* cached btime is invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ | NFS_INO_INVALID_MTIME \ + | NFS_INO_INVALID_BTIME \ | NFS_INO_INVALID_SIZE \ | NFS_INO_INVALID_NLINK \ | NFS_INO_INVALID_MODE \ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9cacbbd14787..ac4bff6e9913 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -67,6 +67,7 @@ struct nfs_fattr { struct timespec64 atime; struct timespec64 mtime; struct timespec64 ctime; + struct timespec64 btime; __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ __u64 pre_size; /* pre_op_attr.size */ @@ -106,6 +107,7 @@ struct nfs_fattr { #define NFS_ATTR_FATTR_OWNER_NAME BIT_ULL(23) #define NFS_ATTR_FATTR_GROUP_NAME BIT_ULL(24) #define NFS_ATTR_FATTR_V4_SECURITY_LABEL BIT_ULL(25) +#define NFS_ATTR_FATTR_BTIME BIT_ULL(26) #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ | NFS_ATTR_FATTR_MODE \ @@ -126,6 +128,7 @@ struct nfs_fattr { | NFS_ATTR_FATTR_SPACE_USED) #define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \ | NFS_ATTR_FATTR_SPACE_USED \ + | NFS_ATTR_FATTR_BTIME \ | NFS_ATTR_FATTR_V4_SECURITY_LABEL) /* From 4b5427414749233fb2315a89c9fa64328cf5ec03 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 May 2025 06:45:47 -0400 Subject: [PATCH 008/279] NFS: Return the file btime in the statx results when appropriate If the server supports the NFSv4.x "create_time" attribute, then return it as part of the statx results. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/eae27d6467e08aaa67e0ac6ae7119263a0f83349.1748515333.git.bcodding@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 15 +++++++++++++-- fs/nfs/nfs4trace.h | 3 ++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c5462aed6bf5..4c7fa4f2bd5e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -937,6 +937,7 @@ static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry) static u32 nfs_get_valid_attrmask(struct inode *inode) { + u64 fattr_valid = NFS_SERVER(inode)->fattr_valid; unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); u32 reply_mask = STATX_INO | STATX_TYPE; @@ -956,6 +957,9 @@ static u32 nfs_get_valid_attrmask(struct inode *inode) reply_mask |= STATX_UID | STATX_GID; if (!(cache_validity & NFS_INO_INVALID_BLOCKS)) reply_mask |= STATX_BLOCKS; + if (!(cache_validity & NFS_INO_INVALID_BTIME) && + (fattr_valid & NFS_ATTR_FATTR_BTIME)) + reply_mask |= STATX_BTIME; if (!(cache_validity & NFS_INO_INVALID_CHANGE)) reply_mask |= STATX_CHANGE_COOKIE; return reply_mask; @@ -966,6 +970,7 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, { struct inode *inode = d_inode(path->dentry); struct nfs_server *server = NFS_SERVER(inode); + u64 fattr_valid = server->fattr_valid; unsigned long cache_validity; int err = 0; bool force_sync = query_flags & AT_STATX_FORCE_SYNC; @@ -976,9 +981,12 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID | STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME | - STATX_INO | STATX_SIZE | STATX_BLOCKS | + STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME | STATX_CHANGE_COOKIE; + if (!(fattr_valid & NFS_ATTR_FATTR_BTIME)) + request_mask &= ~STATX_BTIME; + if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { if (readdirplus_enabled) nfs_readdirplus_parent_cache_hit(path->dentry); @@ -1010,7 +1018,7 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, /* Is the user requesting attributes that might need revalidation? */ if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME| STATX_MTIME|STATX_UID|STATX_GID| - STATX_SIZE|STATX_BLOCKS| + STATX_SIZE|STATX_BLOCKS|STATX_BTIME| STATX_CHANGE_COOKIE))) goto out_no_revalidate; @@ -1034,6 +1042,8 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, do_update |= cache_validity & NFS_INO_INVALID_OTHER; if (request_mask & STATX_BLOCKS) do_update |= cache_validity & NFS_INO_INVALID_BLOCKS; + if (request_mask & STATX_BTIME) + do_update |= cache_validity & NFS_INO_INVALID_BTIME; if (do_update) { if (readdirplus_enabled) @@ -1055,6 +1065,7 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC; if (S_ISDIR(inode->i_mode)) stat->blksize = NFS_SERVER(inode)->dtsize; + stat->btime = NFS_I(inode)->btime; out: trace_nfs_getattr_exit(inode, err); return err; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index deab4c0e21a0..553e45502588 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -30,7 +30,8 @@ { NFS_ATTR_FATTR_CTIME, "CTIME" }, \ { NFS_ATTR_FATTR_CHANGE, "CHANGE" }, \ { NFS_ATTR_FATTR_OWNER_NAME, "OWNER_NAME" }, \ - { NFS_ATTR_FATTR_GROUP_NAME, "GROUP_NAME" }) + { NFS_ATTR_FATTR_GROUP_NAME, "GROUP_NAME" }, \ + { NFS_ATTR_FATTR_BTIME, "BTIME" }) DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_PROTO( From c1b0b9d79fdf8a86451eac914fe6f5cf39bbfc5f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 9 Jun 2025 08:15:17 +1000 Subject: [PATCH 009/279] nfs: use lock_two_nondirectories() Rather than open-coding this function call it to make intention clear and to use "correct" nesting levels (parent and child are for directories). This is purely cosmetic with no expected change in behaviour. Signed-off-by: NeilBrown Link: https://lore.kernel.org/r/174942091741.608730.3327223511347232829@noble.neil.brown.name Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 5e9d66f3466c..53a958746bb0 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -253,7 +253,6 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, struct nfs_server *server = NFS_SERVER(dst_inode); struct inode *src_inode = file_inode(src_file); unsigned int bs = server->clone_blksize; - bool same_inode = false; int ret; /* NFS does not support deduplication. */ @@ -275,20 +274,8 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, goto out; } - if (src_inode == dst_inode) - same_inode = true; - /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ - if (same_inode) { - inode_lock(src_inode); - } else if (dst_inode < src_inode) { - inode_lock_nested(dst_inode, I_MUTEX_PARENT); - inode_lock_nested(src_inode, I_MUTEX_CHILD); - } else { - inode_lock_nested(src_inode, I_MUTEX_PARENT); - inode_lock_nested(dst_inode, I_MUTEX_CHILD); - } - + lock_two_nondirectories(src_inode, dst_inode); /* flush all pending writes on both src and dst so that server * has the latest data */ ret = nfs_sync_inode(src_inode); @@ -306,15 +293,7 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1); out_unlock: - if (same_inode) { - inode_unlock(src_inode); - } else if (dst_inode < src_inode) { - inode_unlock(src_inode); - inode_unlock(dst_inode); - } else { - inode_unlock(dst_inode); - inode_unlock(src_inode); - } + unlock_two_nondirectories(src_inode, dst_inode); out: return ret < 0 ? ret : count; } From a9e21837208d63f42a3387bdf826605c1904be9b Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Tue, 10 Jun 2025 17:12:46 +0200 Subject: [PATCH 010/279] pnfs: add pnfs_ds_connect trace point This tracepoint aims to expose pnfs DS connect status Signed-off-by: Tigran Mkrtchyan Reviewed-by: Benjamin Coddington Link: https://lore.kernel.org/r/20250610151246.9147-1-tigran.mkrtchyan@desy.de Signed-off-by: Trond Myklebust --- fs/nfs/nfs4trace.c | 1 + fs/nfs/nfs4trace.h | 26 ++++++++++++++++++++++++++ fs/nfs/pnfs_nfs.c | 14 +++++++++----- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index 389941ccc9c9..436763a559cd 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -26,6 +26,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_done); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_ds_connect); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error); diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 553e45502588..9f69d7e14925 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -274,6 +274,32 @@ TRACE_EVENT(nfs4_cb_offload, show_nfs_stable_how(__entry->cb_how) ) ); + +TRACE_EVENT(pnfs_ds_connect, + TP_PROTO( + char *ds_remotestr, + int status + ), + + TP_ARGS(ds_remotestr, status), + + TP_STRUCT__entry( + __string(ds_ips, ds_remotestr) + __field(int, status) + ), + + TP_fast_assign( + __assign_str(ds_ips); + __entry->status = status; + ), + + TP_printk( + "ds_ips=%s, status=%d", + __get_str(ds_ips), + __entry->status + ) +); + #endif /* CONFIG_NFS_V4_1 */ TRACE_EVENT(nfs4_setup_sequence, diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index b4ccdf78d4dd..7b32afb29782 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -17,6 +17,7 @@ #include "internal.h" #include "pnfs.h" #include "netns.h" +#include "nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS @@ -1007,8 +1008,10 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, err = nfs4_wait_ds_connect(ds); if (err || ds->ds_clp) goto out; - if (nfs4_test_deviceid_unavailable(devid)) - return -ENODEV; + if (nfs4_test_deviceid_unavailable(devid)) { + err = -ENODEV; + goto out; + } } while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0); if (ds->ds_clp) @@ -1038,11 +1041,12 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) { WARN_ON_ONCE(ds->ds_clp || !nfs4_test_deviceid_unavailable(devid)); - return -EINVAL; - } - err = nfs_client_init_status(ds->ds_clp); + err = -EINVAL; + } else + err = nfs_client_init_status(ds->ds_clp); } + trace_pnfs_ds_connect(ds->ds_remotestr, err); return err; } EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect); From 0715a72ee9a38461eac4b34388b772914f269119 Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Fri, 13 Jun 2025 11:44:37 +0200 Subject: [PATCH 011/279] NFS: remove unused wpages field from struct nfs_server The wpages field is not serving any purpose since commit c63c7b051395 ("NFS: Fix a race when doing NFS write coalescing") which was merged in v2.6.22-rc1. Remove it completely. Signed-off-by: Anthony Iliopoulos Link: https://lore.kernel.org/r/20250613094439.82338-2-ailiop@suse.com Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 - include/linux/nfs_fs_sb.h | 1 - 2 files changed, 2 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index cf35ad3f818a..23dafc590476 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -814,7 +814,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->wsize = max_rpc_payload; if (server->wsize > NFS_MAX_FILE_IO_SIZE) server->wsize = NFS_MAX_FILE_IO_SIZE; - server->wpages = (server->wsize + PAGE_SIZE - 1) >> PAGE_SHIFT; server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d7895eeccea3..7048f9b867ab 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -177,7 +177,6 @@ struct nfs_server { unsigned int rsize; /* read size */ unsigned int rpages; /* read size (in pages) */ unsigned int wsize; /* write size */ - unsigned int wpages; /* write size (in pages) */ unsigned int wtmult; /* server disk block size */ unsigned int dtsize; /* readdir size */ unsigned short port; /* "port=" setting */ From 74a33326cfe8e62ebe0a65ba01ea8a8bceb532f8 Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Fri, 13 Jun 2025 11:44:38 +0200 Subject: [PATCH 012/279] NFS: remove unused time_delta field from struct nfs_server The last code that was using this was removed via commit ca0daa277aca ("NFS: Cache aggressively when file is open for writing") which was merged in v4.8-rc1, so it can be removed completely. Signed-off-by: Anthony Iliopoulos Link: https://lore.kernel.org/r/20250613094439.82338-3-ailiop@suse.com Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 - include/linux/nfs_fs_sb.h | 1 - 2 files changed, 2 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 23dafc590476..47258dc3af70 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -830,7 +830,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->maxfilesize = fsinfo->maxfilesize; - server->time_delta = fsinfo->time_delta; server->change_attr_type = fsinfo->change_attr_type; server->clone_blksize = fsinfo->clone_blksize; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7048f9b867ab..e1b2cf57e765 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -202,7 +202,6 @@ struct nfs_server { struct nfs_fsid fsid; int s_sysfs_id; /* sysfs dentry index */ __u64 maxfilesize; /* maximum file size */ - struct timespec64 time_delta; /* smallest time granularity */ unsigned long mount_time; /* when this fs was mounted */ struct super_block *super; /* VFS super block */ dev_t s_dev; /* superblock dev numbers */ From 2c665d91c2a2d8b5bdf1374d1253b3c89fca4ede Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Fri, 13 Jun 2025 11:44:39 +0200 Subject: [PATCH 013/279] NFS: remove unused pnfs_ld_data field from struct nfs_server The last code that was using this was removed via commit 20d655d6197d ("pnfs/blocklayout: use the device id cache") which was merged in v3.18-rc1, so it can be removed completely. Signed-off-by: Anthony Iliopoulos Link: https://lore.kernel.org/r/20250613094439.82338-4-ailiop@suse.com Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e1b2cf57e765..d2d36711a119 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -246,7 +246,6 @@ struct nfs_server { filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; - void *pnfs_ld_data; /* per mount point data */ /* the following fields are protected by nfs_client->cl_lock */ struct rb_root state_owners; From 8c206b0a121e4195f892f298628791f3b848fef0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 09:19:12 -0400 Subject: [PATCH 014/279] nfs: add cache_validity to the nfs_inode_event tracepoints Managing the cache_validity flags is the deep voodoo of NFS cache coherency. Let's have a little extra visibility into that value via the nfs_inode_event tracepoints. Reviewed-by: Benjamin Coddington Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20250618-nfs-tracepoints-v2-1-540c9fb48da2@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfstrace.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index f49f064c5ee5..96b1323318c2 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -57,6 +57,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event, __field(u32, fhandle) __field(u64, fileid) __field(u64, version) + __field(unsigned long, cache_validity) ), TP_fast_assign( @@ -65,14 +66,17 @@ DECLARE_EVENT_CLASS(nfs_inode_event, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); + __entry->cache_validity = nfsi->cache_validity; ), TP_printk( - "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu ", + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu cache_validity=0x%lx (%s)", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - (unsigned long long)__entry->version + (unsigned long long)__entry->version, + __entry->cache_validity, + nfs_show_cache_validity(__entry->cache_validity) ) ); From 0139a30ada76d154ff48dfe7a1d2056f7d7ae023 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 09:19:13 -0400 Subject: [PATCH 015/279] nfs: add a tracepoint to nfs_inode_detach_delegation_locked We have tracepoints for setting a delegation and reclaiming them. Add a tracepoint for when the delegation is being detached from the inode. Reviewed-by: Benjamin Coddington Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20250618-nfs-tracepoints-v2-2-540c9fb48da2@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 ++ fs/nfs/nfs4trace.h | 1 + 2 files changed, 3 insertions(+) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 10ef46e29b25..78a97d340bbd 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -355,6 +355,8 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi, rcu_dereference_protected(nfsi->delegation, lockdep_is_held(&clp->cl_lock)); + trace_nfs4_detach_delegation(&nfsi->vfs_inode, delegation->type); + if (deleg_cur == NULL || delegation != deleg_cur) return NULL; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 9f69d7e14925..674f15e91c54 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -983,6 +983,7 @@ DECLARE_EVENT_CLASS(nfs4_set_delegation_event, TP_ARGS(inode, fmode)) DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_set_delegation); DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_reclaim_delegation); +DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_detach_delegation); TRACE_EVENT(nfs4_delegreturn_exit, TP_PROTO( From 5dd03d14b3a9595ea320a55c499ebf85b422392f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 09:19:14 -0400 Subject: [PATCH 016/279] nfs: new tracepoint in nfs_delegation_need_return Add a tracepoint in the function that decides whether to return a delegation to the server. Reviewed-by: Benjamin Coddington Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20250618-nfs-tracepoints-v2-3-540c9fb48da2@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 ++ fs/nfs/nfs4trace.h | 47 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 78a97d340bbd..6f136c47eed7 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -594,6 +594,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) { bool ret = false; + trace_nfs_delegation_need_return(delegation); + if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) ret = true; if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 674f15e91c54..6ab05261ce34 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -14,6 +14,8 @@ #include #include +#include "delegation.h" + #define show_nfs_fattr_flags(valid) \ __print_flags((unsigned long)valid, "|", \ { NFS_ATTR_FATTR_TYPE, "TYPE" }, \ @@ -985,6 +987,51 @@ DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_set_delegation); DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_reclaim_delegation); DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_detach_delegation); +#define show_delegation_flags(flags) \ + __print_flags(flags, "|", \ + { BIT(NFS_DELEGATION_NEED_RECLAIM), "NEED_RECLAIM" }, \ + { BIT(NFS_DELEGATION_RETURN), "RETURN" }, \ + { BIT(NFS_DELEGATION_RETURN_IF_CLOSED), "RETURN_IF_CLOSED" }, \ + { BIT(NFS_DELEGATION_REFERENCED), "REFERENCED" }, \ + { BIT(NFS_DELEGATION_RETURNING), "RETURNING" }, \ + { BIT(NFS_DELEGATION_REVOKED), "REVOKED" }, \ + { BIT(NFS_DELEGATION_TEST_EXPIRED), "TEST_EXPIRED" }, \ + { BIT(NFS_DELEGATION_INODE_FREEING), "INODE_FREEING" }, \ + { BIT(NFS_DELEGATION_RETURN_DELAYED), "RETURN_DELAYED" }) + +DECLARE_EVENT_CLASS(nfs4_delegation_event, + TP_PROTO( + const struct nfs_delegation *delegation + ), + + TP_ARGS(delegation), + + TP_STRUCT__entry( + __field(u32, fhandle) + __field(unsigned int, fmode) + __field(unsigned long, flags) + ), + + TP_fast_assign( + __entry->fhandle = nfs_fhandle_hash(NFS_FH(delegation->inode)); + __entry->fmode = delegation->type; + __entry->flags = delegation->flags; + ), + + TP_printk( + "fhandle=0x%08x fmode=%s flags=%s", + __entry->fhandle, show_fs_fmode_flags(__entry->fmode), + show_delegation_flags(__entry->flags) + ) +); +#define DEFINE_NFS4_DELEGATION_EVENT(name) \ + DEFINE_EVENT(nfs4_delegation_event, name, \ + TP_PROTO( \ + const struct nfs_delegation *delegation \ + ), \ + TP_ARGS(delegation)) +DEFINE_NFS4_DELEGATION_EVENT(nfs_delegation_need_return); + TRACE_EVENT(nfs4_delegreturn_exit, TP_PROTO( const struct nfs4_delegreturnargs *args, From b0b7cdc99431655aec3f3afcf05e3eeca0f8dd79 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2025 09:19:15 -0400 Subject: [PATCH 017/279] nfs: new tracepoint in match_stateid operation Add new tracepoints in the NFSv4 match_stateid minorversion op that show the info in both stateids. Reviewed-by: Benjamin Coddington Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20250618-nfs-tracepoints-v2-4-540c9fb48da2@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ++++ fs/nfs/nfs4trace.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 92f1b2601b67..ef2077e185b6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10692,6 +10692,8 @@ nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) static bool nfs41_match_stateid(const nfs4_stateid *s1, const nfs4_stateid *s2) { + trace_nfs41_match_stateid(s1, s2); + if (s1->type != s2->type) return false; @@ -10709,6 +10711,8 @@ static bool nfs41_match_stateid(const nfs4_stateid *s1, static bool nfs4_match_stateid(const nfs4_stateid *s1, const nfs4_stateid *s2) { + trace_nfs4_match_stateid(s1, s2); + return nfs4_stateid_match(s1, s2); } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 6ab05261ce34..fe419147f60f 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1524,6 +1524,63 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_recall); DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_layoutrecall_file); +#define show_stateid_type(type) \ + __print_symbolic(type, \ + { NFS4_INVALID_STATEID_TYPE, "INVALID" }, \ + { NFS4_SPECIAL_STATEID_TYPE, "SPECIAL" }, \ + { NFS4_OPEN_STATEID_TYPE, "OPEN" }, \ + { NFS4_LOCK_STATEID_TYPE, "LOCK" }, \ + { NFS4_DELEGATION_STATEID_TYPE, "DELEGATION" }, \ + { NFS4_LAYOUT_STATEID_TYPE, "LAYOUT" }, \ + { NFS4_PNFS_DS_STATEID_TYPE, "PNFS_DS" }, \ + { NFS4_REVOKED_STATEID_TYPE, "REVOKED" }, \ + { NFS4_FREED_STATEID_TYPE, "FREED" }) + +DECLARE_EVENT_CLASS(nfs4_match_stateid_event, + TP_PROTO( + const nfs4_stateid *s1, + const nfs4_stateid *s2 + ), + + TP_ARGS(s1, s2), + + TP_STRUCT__entry( + __field(int, s1_seq) + __field(int, s2_seq) + __field(u32, s1_hash) + __field(u32, s2_hash) + __field(int, s1_type) + __field(int, s2_type) + ), + + TP_fast_assign( + __entry->s1_seq = s1->seqid; + __entry->s1_hash = nfs_stateid_hash(s1); + __entry->s1_type = s1->type; + __entry->s2_seq = s2->seqid; + __entry->s2_hash = nfs_stateid_hash(s2); + __entry->s2_type = s2->type; + ), + + TP_printk( + "s1=%s:%x:%u s2=%s:%x:%u", + show_stateid_type(__entry->s1_type), + __entry->s1_hash, __entry->s1_seq, + show_stateid_type(__entry->s2_type), + __entry->s2_hash, __entry->s2_seq + ) +); + +#define DEFINE_NFS4_MATCH_STATEID_EVENT(name) \ + DEFINE_EVENT(nfs4_match_stateid_event, name, \ + TP_PROTO( \ + const nfs4_stateid *s1, \ + const nfs4_stateid *s2 \ + ), \ + TP_ARGS(s1, s2)) +DEFINE_NFS4_MATCH_STATEID_EVENT(nfs41_match_stateid); +DEFINE_NFS4_MATCH_STATEID_EVENT(nfs4_match_stateid); + DECLARE_EVENT_CLASS(nfs4_idmap_event, TP_PROTO( const char *name, From 72508db0fe1762f2cfcff1cb4cf28a8e645bdd43 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 24 Jun 2025 17:32:09 -0400 Subject: [PATCH 018/279] NFS: Allow folio migration for the case of mode == MIGRATE_SYNC When the mode is MIGRATE_SYNC, we are allowed to call nfs_wb_folio() under the folio lock. Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 374fc6b34c79..0dd22983f0d0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -2113,8 +2113,12 @@ int nfs_migrate_folio(struct address_space *mapping, struct folio *dst, * that we can safely release the inode reference while holding * the folio lock. */ - if (folio_test_private(src)) - return -EBUSY; + if (folio_test_private(src)) { + if (mode == MIGRATE_SYNC) + nfs_wb_folio(src->mapping->host, src); + if (folio_test_private(src)) + return -EBUSY; + } if (folio_test_private_2(src)) { /* [DEPRECATED] */ if (mode == MIGRATE_ASYNC) From 90c9550a8d65fb9b1bf87baf97a04ed91bf61b33 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 May 2025 13:50:55 +0200 Subject: [PATCH 019/279] NFS: support the kernel keyring for TLS Allow tlshd to use a per-mount key from the kernel keyring similar to NVMe over TCP. Note that tlshd expects keys and certificates stored in the kernel keyring to be in DER format, not the PEM format used for file based keys and certificates, so they need to be converted before they are added to the keyring, which is a bit unexpected. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20250515115107.33052-2-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/fs_context.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 13f71ca8c974..9e94d18448ff 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -96,6 +96,8 @@ enum nfs_param { Opt_wsize, Opt_write, Opt_xprtsec, + Opt_cert_serial, + Opt_privkey_serial, }; enum { @@ -221,6 +223,8 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_enum ("write", Opt_write, nfs_param_enums_write), fsparam_u32 ("wsize", Opt_wsize), fsparam_string("xprtsec", Opt_xprtsec), + fsparam_s32("cert_serial", Opt_cert_serial), + fsparam_s32("privkey_serial", Opt_privkey_serial), {} }; @@ -551,6 +555,32 @@ static int nfs_parse_version_string(struct fs_context *fc, return 0; } +#ifdef CONFIG_KEYS +static int nfs_tls_key_verify(key_serial_t key_id) +{ + struct key *key = key_lookup(key_id); + int error = 0; + + if (IS_ERR(key)) { + pr_err("key id %08x not found\n", key_id); + return PTR_ERR(key); + } + if (test_bit(KEY_FLAG_REVOKED, &key->flags) || + test_bit(KEY_FLAG_INVALIDATED, &key->flags)) { + pr_err("key id %08x revoked\n", key_id); + error = -EKEYREVOKED; + } + + key_put(key); + return error; +} +#else +static inline int nfs_tls_key_verify(key_serial_t key_id) +{ + return -ENOENT; +} +#endif /* CONFIG_KEYS */ + /* * Parse a single mount parameter. */ @@ -807,6 +837,18 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, if (ret < 0) return ret; break; + case Opt_cert_serial: + ret = nfs_tls_key_verify(result.int_32); + if (ret < 0) + return ret; + ctx->xprtsec.cert_serial = result.int_32; + break; + case Opt_privkey_serial: + ret = nfs_tls_key_verify(result.int_32); + if (ret < 0) + return ret; + ctx->xprtsec.privkey_serial = result.int_32; + break; case Opt_proto: if (!param->string) From 87268f7a4f1fb7243bba5a4aa6199720b54f72dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 May 2025 13:50:56 +0200 Subject: [PATCH 020/279] nfs: create a kernel keyring Create a kernel .nfs keyring similar to the nvme .nvme one. Unlike for a userspace-created keyrind, tlshd is a possesor of the keys with this and thus the keys don't need user read permissions. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20250515115107.33052-3-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4c7fa4f2bd5e..60fa0c8ff04e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2649,6 +2649,35 @@ static struct pernet_operations nfs_net_ops = { .size = sizeof(struct nfs_net), }; +#ifdef CONFIG_KEYS +static struct key *nfs_keyring; + +static int __init nfs_init_keyring(void) +{ + nfs_keyring = keyring_alloc(".nfs", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + current_cred(), + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + (KEY_USR_ALL & ~KEY_USR_SETATTR), + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); + return PTR_ERR_OR_ZERO(nfs_keyring); +} + +static void __exit nfs_exit_keyring(void) +{ + key_put(nfs_keyring); +} +#else +static inline int nfs_init_keyring(void) +{ + return 0; +} + +static inline void nfs_exit_keyring(void) +{ +} +#endif /* CONFIG_KEYS */ + /* * Initialize NFS */ @@ -2656,6 +2685,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_init_keyring(); + if (err) + return err; + err = nfs_sysfs_init(); if (err < 0) goto out10; @@ -2716,6 +2749,7 @@ static int __init init_nfs_fs(void) out9: nfs_sysfs_exit(); out10: + nfs_exit_keyring(); return err; } @@ -2731,6 +2765,7 @@ static void __exit exit_nfs_fs(void) nfs_fs_proc_exit(); nfsiod_stop(); nfs_sysfs_exit(); + nfs_exit_keyring(); } /* Not quite true; I just maintain it */ From 48693d119b2114f8eaf8b8f972b29e05ae581ad4 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 13 Jul 2025 00:30:06 +0100 Subject: [PATCH 021/279] SUNRPC: Remove unused xdr functions Remove a bunch of unused xdr_*decode* functions: The last use of xdr_decode_netobj() was removed in 2021 by: commit 7cf96b6d0104 ("lockd: Update the NLMv4 SHARE arguments decoder to use struct xdr_stream") The last use of xdr_decode_string_inplace() was removed in 2021 by: commit 3049e974a7c7 ("lockd: Update the NLMv4 FREE_ALL arguments decoder to use struct xdr_stream") The last use of xdr_stream_decode_opaque() was removed in 2024 by: commit fed8a17c61ff ("xdrgen: typedefs should use the built-in string and opaque functions") The functions xdr_stream_decode_string() and xdr_stream_decode_opaque_dup() were both added in 2018 by the commit 0e779aa70308 ("SUNRPC: Add helpers for decoding opaque and string types") but never used. Remove them. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250712233006.403226-1-linux@treblig.org Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 9 --- net/sunrpc/xdr.c | 110 ------------------------------------- 2 files changed, 119 deletions(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index a2ab813a9800..e370886632b0 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -128,10 +128,7 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_string(__be32 *p, const char *s); -__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp, - unsigned int maxlen); __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); -__be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); @@ -341,12 +338,6 @@ xdr_stream_remaining(const struct xdr_stream *xdr) return xdr->nwords << 2; } -ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, - size_t size); -ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr, - size_t maxlen, gfp_t gfp_flags); -ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, - size_t size); ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor, diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 2ea00e354ba6..a0aae1144212 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -37,19 +37,6 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj) } EXPORT_SYMBOL_GPL(xdr_encode_netobj); -__be32 * -xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj) -{ - unsigned int len; - - if ((len = be32_to_cpu(*p++)) > XDR_MAX_NETOBJ) - return NULL; - obj->len = len; - obj->data = (u8 *) p; - return p + XDR_QUADLEN(len); -} -EXPORT_SYMBOL_GPL(xdr_decode_netobj); - /** * xdr_encode_opaque_fixed - Encode fixed length opaque data * @p: pointer to current position in XDR buffer. @@ -102,21 +89,6 @@ xdr_encode_string(__be32 *p, const char *string) } EXPORT_SYMBOL_GPL(xdr_encode_string); -__be32 * -xdr_decode_string_inplace(__be32 *p, char **sp, - unsigned int *lenp, unsigned int maxlen) -{ - u32 len; - - len = be32_to_cpu(*p++); - if (len > maxlen) - return NULL; - *lenp = len; - *sp = (char *) p; - return p + XDR_QUADLEN(len); -} -EXPORT_SYMBOL_GPL(xdr_decode_string_inplace); - /** * xdr_terminate_string - '\0'-terminate a string residing in an xdr_buf * @buf: XDR buffer where string resides @@ -2247,88 +2219,6 @@ int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset, } EXPORT_SYMBOL_GPL(xdr_process_buf); -/** - * xdr_stream_decode_opaque - Decode variable length opaque - * @xdr: pointer to xdr_stream - * @ptr: location to store opaque data - * @size: size of storage buffer @ptr - * - * Return values: - * On success, returns size of object stored in *@ptr - * %-EBADMSG on XDR buffer overflow - * %-EMSGSIZE on overflow of storage buffer @ptr - */ -ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, size_t size) -{ - ssize_t ret; - void *p; - - ret = xdr_stream_decode_opaque_inline(xdr, &p, size); - if (ret <= 0) - return ret; - memcpy(ptr, p, ret); - return ret; -} -EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque); - -/** - * xdr_stream_decode_opaque_dup - Decode and duplicate variable length opaque - * @xdr: pointer to xdr_stream - * @ptr: location to store pointer to opaque data - * @maxlen: maximum acceptable object size - * @gfp_flags: GFP mask to use - * - * Return values: - * On success, returns size of object stored in *@ptr - * %-EBADMSG on XDR buffer overflow - * %-EMSGSIZE if the size of the object would exceed @maxlen - * %-ENOMEM on memory allocation failure - */ -ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr, - size_t maxlen, gfp_t gfp_flags) -{ - ssize_t ret; - void *p; - - ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen); - if (ret > 0) { - *ptr = kmemdup(p, ret, gfp_flags); - if (*ptr != NULL) - return ret; - ret = -ENOMEM; - } - *ptr = NULL; - return ret; -} -EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque_dup); - -/** - * xdr_stream_decode_string - Decode variable length string - * @xdr: pointer to xdr_stream - * @str: location to store string - * @size: size of storage buffer @str - * - * Return values: - * On success, returns length of NUL-terminated string stored in *@str - * %-EBADMSG on XDR buffer overflow - * %-EMSGSIZE on overflow of storage buffer @str - */ -ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size) -{ - ssize_t ret; - void *p; - - ret = xdr_stream_decode_opaque_inline(xdr, &p, size); - if (ret > 0) { - memcpy(str, p, ret); - str[ret] = '\0'; - return strlen(str); - } - *str = '\0'; - return ret; -} -EXPORT_SYMBOL_GPL(xdr_stream_decode_string); - /** * xdr_stream_decode_string_dup - Decode and duplicate variable length string * @xdr: pointer to xdr_stream From 3b3bc9a1f730dc24f9765dc70de65ad10888333e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 18 Feb 2025 21:52:50 +0000 Subject: [PATCH 022/279] NFS: Remove unused function nfs_umount nfs_umount() has been unused since 2013's commit 4580a92d44e2 ("NFS: Use server-recommended security flavor by default (NFSv3)") Remove it. Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250218215250.263709-1-linux@treblig.org Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 - fs/nfs/mount_clnt.c | 68 --------------------------------------------- 2 files changed, 69 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 69c2c10ee658..d55dce8bf043 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -207,7 +207,6 @@ struct nfs_mount_request { }; extern int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans); -extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern const struct rpc_program nfs_program; diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 57c9dd700b58..db8dfb920394 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -223,74 +223,6 @@ int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans) goto out; } -/** - * nfs_umount - Notify a server that we have unmounted this export - * @info: pointer to umount request arguments - * - * MOUNTPROC_UMNT is advisory, so we set a short timeout, and always - * use UDP. - */ -void nfs_umount(const struct nfs_mount_request *info) -{ - static const struct rpc_timeout nfs_umnt_timeout = { - .to_initval = 1 * HZ, - .to_maxval = 3 * HZ, - .to_retries = 2, - }; - struct rpc_create_args args = { - .net = info->net, - .protocol = IPPROTO_UDP, - .address = (struct sockaddr *)info->sap, - .addrsize = info->salen, - .timeout = &nfs_umnt_timeout, - .servername = info->hostname, - .program = &mnt_program, - .version = info->version, - .authflavor = RPC_AUTH_UNIX, - .flags = RPC_CLNT_CREATE_NOPING, - .cred = current_cred(), - }; - struct rpc_message msg = { - .rpc_argp = info->dirpath, - }; - struct rpc_clnt *clnt; - int status; - - if (strlen(info->dirpath) > MNTPATHLEN) - return; - - if (info->noresvport) - args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; - - clnt = rpc_create(&args); - if (IS_ERR(clnt)) - goto out_clnt_err; - - dprintk("NFS: sending UMNT request for %s:%s\n", - (info->hostname ? info->hostname : "server"), info->dirpath); - - if (info->version == NFS_MNT3_VERSION) - msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT]; - else - msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT]; - - status = rpc_call_sync(clnt, &msg, 0); - rpc_shutdown_client(clnt); - - if (unlikely(status < 0)) - goto out_call_err; - - return; - -out_clnt_err: - dprintk("NFS: failed to create UMNT RPC client, status=%ld\n", - PTR_ERR(clnt)); - return; - -out_call_err: - dprintk("NFS: UMNT request failed, status=%d\n", status); -} - /* * XDR encode/decode functions for MOUNT */ From 9768797c219326699778fba9cd3b607b2f1e7950 Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Mon, 30 Jun 2025 21:35:26 +0300 Subject: [PATCH 023/279] pNFS: Fix uninited ptr deref in block/scsi layout The error occurs on the third attempt to encode extents. When function ext_tree_prepare_commit() reallocates a larger buffer to retry encoding extents, the "layoutupdate_pages" page array is initialized only after the retry loop. But ext_tree_free_commitdata() is called on every iteration and tries to put pages in the array, thus dereferencing uninitialized pointers. An additional problem is that there is no limit on the maximum possible buffer_size. When there are too many extents, the client may create a layoutcommit that is larger than the maximum possible RPC size accepted by the server. During testing, we observed two typical scenarios. First, one memory page for extents is enough when we work with small files, append data to the end of the file, or preallocate extents before writing. But when we fill a new large file without preallocating, the number of extents can be huge, and counting the number of written extents in ext_tree_encode_commit() does not help much. Since this number increases even more between unlocking and locking of ext_tree, the reallocated buffer may not be large enough again and again. Co-developed-by: Konstantin Evtushenko Signed-off-by: Konstantin Evtushenko Signed-off-by: Sergey Bashirov Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250630183537.196479-2-sergeybashirov@gmail.com Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/extent_tree.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 8f7cff7a4293..0add0f329816 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -552,6 +552,15 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, return ret; } +/** + * ext_tree_prepare_commit - encode extents that need to be committed + * @arg: layout commit data + * + * Return values: + * %0: Success, all required extents are encoded + * %-ENOSPC: Some extents are encoded, but not all, due to RPC size limit + * %-ENOMEM: Out of memory, extents not encoded + */ int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) { @@ -568,12 +577,12 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) start_p = page_address(arg->layoutupdate_page); arg->layoutupdate_pages = &arg->layoutupdate_page; -retry: - ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten); + ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, + &count, &arg->lastbytewritten); if (unlikely(ret)) { ext_tree_free_commitdata(arg, buffer_size); - buffer_size = ext_tree_layoutupdate_size(bl, count); + buffer_size = NFS_SERVER(arg->inode)->wsize; count = 0; arg->layoutupdate_pages = @@ -588,7 +597,8 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) return -ENOMEM; } - goto retry; + ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, + &count, &arg->lastbytewritten); } *start_p = cpu_to_be32(count); @@ -608,7 +618,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) } dprintk("%s found %zu ranges\n", __func__, count); - return 0; + return ret; } void From d84c4754f8740915da9977a282f72a3b2b0e0ac9 Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Mon, 30 Jun 2025 21:35:27 +0300 Subject: [PATCH 024/279] pNFS: Fix extent encoding in block/scsi layout The ext_tree_encode_commit() function may be called multiple times for the same file, layout, and last written byte if the provided buffer is not large enough to encode all extents in it. The first problem is that the last written byte field must be zeroed only on a successful call, otherwise we will lose its actual value and get an integer overflow on the next encoding attempt. The second problem is that we can't count and encode in one pass. The extent state changes during encoding, so if we return -ENOSPC but have already encoded some extents into a small buffer, they will not be re-encoded into a new larger buffer on the next try. As a result, the client never commits these extents to the server. Co-developed-by: Konstantin Evtushenko Signed-off-by: Konstantin Evtushenko Signed-off-by: Sergey Bashirov Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250630183537.196479-3-sergeybashirov@gmail.com Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/extent_tree.c | 80 +++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 6 deletions(-) diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 0add0f329816..faccd5caa149 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -520,10 +520,71 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p) return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT); } -static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, +/** + * ext_tree_try_encode_commit - try to encode all extents into the buffer + * @bl: pointer to the layout + * @p: pointer to the output buffer + * @buffer_size: size of the output buffer + * @count: output pointer to the number of encoded extents + * @lastbyte: output pointer to the last written byte + * + * Return values: + * %0: Success, all required extents encoded, outputs are valid + * %-ENOSPC: Buffer too small, nothing encoded, outputs are invalid + */ +static int +ext_tree_try_encode_commit(struct pnfs_block_layout *bl, __be32 *p, size_t buffer_size, size_t *count, __u64 *lastbyte) { struct pnfs_block_extent *be; + + spin_lock(&bl->bl_ext_lock); + for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) { + if (be->be_state != PNFS_BLOCK_INVALID_DATA || + be->be_tag != EXTENT_WRITTEN) + continue; + + (*count)++; + if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) { + spin_unlock(&bl->bl_ext_lock); + return -ENOSPC; + } + } + for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) { + if (be->be_state != PNFS_BLOCK_INVALID_DATA || + be->be_tag != EXTENT_WRITTEN) + continue; + + if (bl->bl_scsi_layout) + p = encode_scsi_range(be, p); + else + p = encode_block_extent(be, p); + be->be_tag = EXTENT_COMMITTING; + } + *lastbyte = (bl->bl_lwb != 0) ? bl->bl_lwb - 1 : U64_MAX; + bl->bl_lwb = 0; + spin_unlock(&bl->bl_ext_lock); + + return 0; +} + +/** + * ext_tree_encode_commit - encode as much as possible extents into the buffer + * @bl: pointer to the layout + * @p: pointer to the output buffer + * @buffer_size: size of the output buffer + * @count: output pointer to the number of encoded extents + * @lastbyte: output pointer to the last written byte + * + * Return values: + * %0: Success, all required extents encoded, outputs are valid + * %-ENOSPC: Buffer too small, some extents are encoded, outputs are valid + */ +static int +ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, + size_t buffer_size, size_t *count, __u64 *lastbyte) +{ + struct pnfs_block_extent *be, *be_prev; int ret = 0; spin_lock(&bl->bl_ext_lock); @@ -534,9 +595,9 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, (*count)++; if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) { - /* keep counting.. */ + (*count)--; ret = -ENOSPC; - continue; + break; } if (bl->bl_scsi_layout) @@ -544,9 +605,16 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, else p = encode_block_extent(be, p); be->be_tag = EXTENT_COMMITTING; + be_prev = be; + } + if (!ret) { + *lastbyte = (bl->bl_lwb != 0) ? bl->bl_lwb - 1 : U64_MAX; + bl->bl_lwb = 0; + } else { + *lastbyte = be_prev->be_f_offset + be_prev->be_length; + *lastbyte <<= SECTOR_SHIFT; + *lastbyte -= 1; } - *lastbyte = bl->bl_lwb - 1; - bl->bl_lwb = 0; spin_unlock(&bl->bl_ext_lock); return ret; @@ -577,7 +645,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) start_p = page_address(arg->layoutupdate_page); arg->layoutupdate_pages = &arg->layoutupdate_page; - ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, + ret = ext_tree_try_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten); if (unlikely(ret)) { ext_tree_free_commitdata(arg, buffer_size); From 66642bbee595e5fa8fc4ce7c8706c3697da239fe Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Mon, 30 Jun 2025 21:35:28 +0300 Subject: [PATCH 025/279] pNFS: Add prepare commit trace to block/scsi layout Replace dprintk with trace event in ext_tree_prepare_commit() function. Co-developed-by: Konstantin Evtushenko Signed-off-by: Konstantin Evtushenko Signed-off-by: Sergey Bashirov Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250630183537.196479-4-sergeybashirov@gmail.com Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/extent_tree.c | 6 +++--- fs/nfs/nfs4trace.c | 1 + fs/nfs/nfs4trace.h | 34 ++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index faccd5caa149..315949a7e92d 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -6,6 +6,7 @@ #include #include "blocklayout.h" +#include "../nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -637,8 +638,6 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) __be32 *start_p; int ret; - dprintk("%s enter\n", __func__); - arg->layoutupdate_page = alloc_page(GFP_NOFS); if (!arg->layoutupdate_page) return -ENOMEM; @@ -685,7 +684,8 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) } } - dprintk("%s found %zu ranges\n", __func__, count); + trace_bl_ext_tree_prepare_commit(ret, count, + arg->lastbytewritten, !!ret); return ret; } diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index 436763a559cd..987c92d6364b 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -32,6 +32,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error); EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error); +EXPORT_TRACEPOINT_SYMBOL_GPL(bl_ext_tree_prepare_commit); EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg); EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_reg_err); EXPORT_TRACEPOINT_SYMBOL_GPL(bl_pr_key_unreg); diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index fe419147f60f..9776d220cec3 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2295,6 +2295,40 @@ TRACE_EVENT(ff_layout_commit_error, ) ); +TRACE_EVENT(bl_ext_tree_prepare_commit, + TP_PROTO( + int ret, + size_t count, + u64 lwb, + bool not_all_ranges + ), + + TP_ARGS(ret, count, lwb, not_all_ranges), + + TP_STRUCT__entry( + __field(int, ret) + __field(size_t, count) + __field(u64, lwb) + __field(bool, not_all_ranges) + ), + + TP_fast_assign( + __entry->ret = ret; + __entry->count = count; + __entry->lwb = lwb; + __entry->not_all_ranges = not_all_ranges; + ), + + TP_printk( + "ret=%d, found %zu ranges, lwb=%llu%s", + __entry->ret, + __entry->count, + __entry->lwb, + __entry->not_all_ranges ? ", not all ranges encoded" : + "" + ) +); + DECLARE_EVENT_CLASS(pnfs_bl_pr_key_class, TP_PROTO( const struct block_device *bdev, From d897d81671bc4615c80f4f3bd5e6b218f59df50c Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Mon, 30 Jun 2025 21:35:29 +0300 Subject: [PATCH 026/279] pNFS: Handle RPC size limit for layoutcommits When there are too many block extents for a layoutcommit, they may not all fit into the maximum-sized RPC. This patch allows the generic pnfs code to properly handle -ENOSPC returned by the block/scsi layout driver and trigger additional layoutcommits if necessary. Co-developed-by: Konstantin Evtushenko Signed-off-by: Konstantin Evtushenko Signed-off-by: Sergey Bashirov Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250630183537.196479-5-sergeybashirov@gmail.com Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 1a7ec68bde15..3fd0971bf16f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -3340,6 +3340,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) struct nfs_inode *nfsi = NFS_I(inode); loff_t end_pos; int status; + bool mark_as_dirty = false; if (!pnfs_layoutcommit_outstanding(inode)) return 0; @@ -3391,19 +3392,23 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) if (ld->prepare_layoutcommit) { status = ld->prepare_layoutcommit(&data->args); if (status) { - put_cred(data->cred); + if (status != -ENOSPC) + put_cred(data->cred); spin_lock(&inode->i_lock); set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; - goto out_unlock; + if (status != -ENOSPC) + goto out_unlock; + spin_unlock(&inode->i_lock); + mark_as_dirty = true; } } status = nfs4_proc_layoutcommit(data, sync); out: - if (status) + if (status || mark_as_dirty) mark_inode_dirty_sync(inode); dprintk("<-- %s status %d\n", __func__, status); return status; From 81438498a285759f31e843ac4800f82a5ce6521f Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Tue, 1 Jul 2025 15:21:48 +0300 Subject: [PATCH 027/279] pNFS: Fix stripe mapping in block/scsi layout Because of integer division, we need to carefully calculate the disk offset. Consider the example below for a stripe of 6 volumes, a chunk size of 4096, and an offset of 70000. chunk = div_u64(offset, dev->chunk_size) = 70000 / 4096 = 17 offset = chunk * dev->chunk_size = 17 * 4096 = 69632 disk_offset_wrong = div_u64(offset, dev->nr_children) = 69632 / 6 = 11605 disk_chunk = div_u64(chunk, dev->nr_children) = 17 / 6 = 2 disk_offset = disk_chunk * dev->chunk_size = 2 * 4096 = 8192 Signed-off-by: Sergey Bashirov Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250701122341.199112-1-sergeybashirov@gmail.com Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index cab8809f0e0f..44306ac22353 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -257,10 +257,11 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, struct pnfs_block_dev *child; u64 chunk; u32 chunk_idx; + u64 disk_chunk; u64 disk_offset; chunk = div_u64(offset, dev->chunk_size); - div_u64_rem(chunk, dev->nr_children, &chunk_idx); + disk_chunk = div_u64_rem(chunk, dev->nr_children, &chunk_idx); if (chunk_idx >= dev->nr_children) { dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", @@ -273,7 +274,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, offset = chunk * dev->chunk_size; /* disk offset of the stripe */ - disk_offset = div_u64(offset, dev->nr_children); + disk_offset = disk_chunk * dev->chunk_size; child = &dev->children[chunk_idx]; child->map(child, disk_offset, map); From 7db6e66663681abda54f81d5916db3a3b8b1a13d Mon Sep 17 00:00:00 2001 From: Sergey Bashirov Date: Wed, 2 Jul 2025 16:32:21 +0300 Subject: [PATCH 028/279] pNFS: Fix disk addr range check in block/scsi layout At the end of the isect translation, disc_addr represents the physical disk offset. Thus, end calculated from disk_addr is also a physical disk offset. Therefore, range checking should be done using map->disk_offset, not map->start. Signed-off-by: Sergey Bashirov Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250702133226.212537-1-sergeybashirov@gmail.com Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 47189476b553..5d6edafbed20 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -149,8 +149,8 @@ do_add_page_to_bio(struct bio *bio, int npg, enum req_op op, sector_t isect, /* limit length to what the device mapping allows */ end = disk_addr + *len; - if (end >= map->start + map->len) - *len = map->start + map->len - disk_addr; + if (end >= map->disk_offset + map->len) + *len = map->disk_offset + map->len - disk_addr; retry: if (!bio) { From 86bc643afd72c28c25831c87df6e6d0b016c5004 Mon Sep 17 00:00:00 2001 From: Aaron Kling Date: Tue, 8 Jul 2025 02:30:42 -0500 Subject: [PATCH 029/279] efistub: Lower default log level Some uefi implementations will write the efistub logs to the display over a splash image. This is not desirable for debug and info logs, so lower the default efi log level to exclude them. Suggested-by: Ard Biesheuvel Signed-off-by: Aaron Kling Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/printk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/printk.c b/drivers/firmware/efi/libstub/printk.c index 3a67a2cea7bd..bc599212c05d 100644 --- a/drivers/firmware/efi/libstub/printk.c +++ b/drivers/firmware/efi/libstub/printk.c @@ -5,13 +5,13 @@ #include #include #include -#include /* For CONSOLE_LOGLEVEL_* */ +#include #include #include #include "efistub.h" -int efi_loglevel = CONSOLE_LOGLEVEL_DEFAULT; +int efi_loglevel = LOGLEVEL_NOTICE; /** * efi_char16_puts() - Write a UCS-2 encoded string to the console From 02eb7a8eee20b9ec6aafd5e17c5c41b53e8b13ef Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Fri, 11 Jul 2025 07:44:46 +0200 Subject: [PATCH 030/279] efi: add API doc entry for ovmf_debug_log Document the newly added sysfs ABI for accessing the in-memory debug log provided by OVMF EFI firmware (when enabled) Signed-off-by: Gerd Hoffmann Acked-by: Jonathan Corbet Signed-off-by: Ard Biesheuvel --- Documentation/ABI/testing/sysfs-firmware-efi | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-firmware-efi b/Documentation/ABI/testing/sysfs-firmware-efi index 5e4d0b27cdfe..927e362d4974 100644 --- a/Documentation/ABI/testing/sysfs-firmware-efi +++ b/Documentation/ABI/testing/sysfs-firmware-efi @@ -36,3 +36,10 @@ Description: Displays the content of the Runtime Configuration Interface Table version 2 on Dell EMC PowerEdge systems in binary format Users: It is used by Dell EMC OpenManage Server Administrator tool to populate BIOS setup page. + +What: /sys/firmware/efi/ovmf_debug_log +Date: July 2025 +Contact: Gerd Hoffmann , linux-efi@vger.kernel.org +Description: Displays the content of the OVMF debug log buffer. The file is + only present in case the firmware supports logging to a memory + buffer. From 0c927d478486fbc96a225aeae6705e7152700c87 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 1 Jul 2025 10:55:38 -0700 Subject: [PATCH 031/279] ARM: dts: broadcom: Fix bcm7445 memory controller compatible The memory controller node compatible string was incompletely specified and used the fallback compatible. After commit 501be7cecec9 ("dt-bindings: memory-controller: Define fallback compatible") however, we need to fully specify the compatible string. Fixes: 501be7cecec9 ("dt-bindings: memory-controller: Define fallback compatible") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507011302.ZqNlBKWX-lkp@intel.com/ Link: https://lore.kernel.org/r/20250701175538.1633435-1-florian.fainelli@broadcom.com Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/broadcom/bcm7445.dtsi | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/broadcom/bcm7445.dtsi b/arch/arm/boot/dts/broadcom/bcm7445.dtsi index 5ac2042515b8..c6307c7437e3 100644 --- a/arch/arm/boot/dts/broadcom/bcm7445.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm7445.dtsi @@ -237,7 +237,8 @@ memc@0 { ranges = <0x0 0x0 0x80000>; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-b.1.x", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x800>; }; @@ -259,7 +260,8 @@ memc@80000 { ranges = <0x0 0x80000 0x80000>; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-b.1.x", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x800>; }; @@ -281,7 +283,8 @@ memc@100000 { ranges = <0x0 0x100000 0x80000>; memc-ddr@2000 { - compatible = "brcm,brcmstb-memc-ddr"; + compatible = "brcm,brcmstb-memc-ddr-rev-b.1.x", + "brcm,brcmstb-memc-ddr"; reg = <0x2000 0x800>; }; From f3fc8f06492693d4fcb32c9821fb465d4c7f5a97 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Jul 2025 08:30:45 +0200 Subject: [PATCH 032/279] NFS: pass struct nfs_client_initdata to nfs4_set_client Passed the partially filled out structure to nfs4_set_client instead of 11 arguments that then get stashed into the structure. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 151 ++++++++++++++++++++------------------------ 1 file changed, 68 insertions(+), 83 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 162c85a83a14..2e623da1a787 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -895,55 +895,40 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, * Set up an NFS4 client */ static int nfs4_set_client(struct nfs_server *server, - const char *hostname, - const struct sockaddr_storage *addr, - const size_t addrlen, - const char *ip_addr, - int proto, const struct rpc_timeout *timeparms, - u32 minorversion, unsigned int nconnect, - unsigned int max_connect, - struct net *net, - struct xprtsec_parms *xprtsec) + struct nfs_client_initdata *cl_init) { - struct nfs_client_initdata cl_init = { - .hostname = hostname, - .addr = addr, - .addrlen = addrlen, - .ip_addr = ip_addr, - .nfs_mod = &nfs_v4, - .proto = proto, - .minorversion = minorversion, - .net = net, - .timeparms = timeparms, - .cred = server->cred, - .xprtsec = *xprtsec, - }; struct nfs_client *clp; - if (minorversion == 0) - __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags); - else - cl_init.max_connect = max_connect; - switch (proto) { + cl_init->nfs_mod = &nfs_v4; + cl_init->cred = server->cred; + + if (cl_init->minorversion == 0) { + __set_bit(NFS_CS_REUSEPORT, &cl_init->init_flags); + cl_init->max_connect = 0; + } + + switch (cl_init->proto) { case XPRT_TRANSPORT_RDMA: case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_TCP_TLS: - cl_init.nconnect = nconnect; + break; + default: + cl_init->nconnect = 0; } if (server->flags & NFS_MOUNT_NORESVPORT) - __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + __set_bit(NFS_CS_NORESVPORT, &cl_init->init_flags); if (server->options & NFS_OPTION_MIGRATION) - __set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); + __set_bit(NFS_CS_MIGRATION, &cl_init->init_flags); if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status)) - __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags); - server->port = rpc_get_port((struct sockaddr *)addr); + __set_bit(NFS_CS_TSM_POSSIBLE, &cl_init->init_flags); + server->port = rpc_get_port((struct sockaddr *)cl_init->addr); if (server->flags & NFS_MOUNT_NETUNREACH_FATAL) - __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags); + __set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init->init_flags); /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init); + clp = nfs_get_client(cl_init); if (IS_ERR(clp)) return PTR_ERR(clp); @@ -1156,6 +1141,19 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); struct rpc_timeout timeparms; + struct nfs_client_initdata cl_init = { + .hostname = ctx->nfs_server.hostname, + .addr = &ctx->nfs_server._address, + .addrlen = ctx->nfs_server.addrlen, + .ip_addr = ctx->client_address, + .proto = ctx->nfs_server.protocol, + .minorversion = ctx->minorversion, + .net = fc->net_ns, + .timeparms = &timeparms, + .xprtsec = ctx->xprtsec, + .nconnect = ctx->nfs_server.nconnect, + .max_connect = ctx->nfs_server.max_connect, + }; int error; nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol, @@ -1175,18 +1173,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) ctx->selected_flavor = RPC_AUTH_UNIX; /* Get a client record */ - error = nfs4_set_client(server, - ctx->nfs_server.hostname, - &ctx->nfs_server._address, - ctx->nfs_server.addrlen, - ctx->client_address, - ctx->nfs_server.protocol, - &timeparms, - ctx->minorversion, - ctx->nfs_server.nconnect, - ctx->nfs_server.max_connect, - fc->net_ns, - &ctx->xprtsec); + error = nfs4_set_client(server, &cl_init); if (error < 0) return error; @@ -1246,18 +1233,28 @@ struct nfs_server *nfs4_create_server(struct fs_context *fc) struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); - struct nfs_client *parent_client; - struct nfs_server *server, *parent_server; - int proto, error; + struct nfs_server *parent_server = NFS_SB(ctx->clone_data.sb); + struct nfs_client *parent_client = parent_server->nfs_client; + struct nfs_client_initdata cl_init = { + .hostname = ctx->nfs_server.hostname, + .addr = &ctx->nfs_server._address, + .addrlen = ctx->nfs_server.addrlen, + .ip_addr = parent_client->cl_ipaddr, + .minorversion = parent_client->cl_mvops->minor_version, + .net = parent_client->cl_net, + .timeparms = parent_server->client->cl_timeout, + .xprtsec = parent_client->cl_xprtsec, + .nconnect = parent_client->cl_nconnect, + .max_connect = parent_client->cl_max_connect, + }; + struct nfs_server *server; bool auth_probe; + int error; server = nfs_alloc_server(); if (!server) return ERR_PTR(-ENOMEM); - parent_server = NFS_SB(ctx->clone_data.sb); - parent_client = parent_server->nfs_client; - server->cred = get_cred(parent_server->cred); /* Initialise the client representation from the parent server */ @@ -1266,38 +1263,17 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) /* Get a client representation */ #if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT); - error = nfs4_set_client(server, - ctx->nfs_server.hostname, - &ctx->nfs_server._address, - ctx->nfs_server.addrlen, - parent_client->cl_ipaddr, - XPRT_TRANSPORT_RDMA, - parent_server->client->cl_timeout, - parent_client->cl_mvops->minor_version, - parent_client->cl_nconnect, - parent_client->cl_max_connect, - parent_client->cl_net, - &parent_client->cl_xprtsec); + cl_init.proto = XPRT_TRANSPORT_RDMA; + error = nfs4_set_client(server, &cl_init); if (!error) goto init_server; #endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */ - proto = XPRT_TRANSPORT_TCP; + cl_init.proto = XPRT_TRANSPORT_TCP; if (parent_client->cl_xprtsec.policy != RPC_XPRTSEC_NONE) - proto = XPRT_TRANSPORT_TCP_TLS; + cl_init.proto = XPRT_TRANSPORT_TCP_TLS; rpc_set_port(&ctx->nfs_server.address, NFS_PORT); - error = nfs4_set_client(server, - ctx->nfs_server.hostname, - &ctx->nfs_server._address, - ctx->nfs_server.addrlen, - parent_client->cl_ipaddr, - proto, - parent_server->client->cl_timeout, - parent_client->cl_mvops->minor_version, - parent_client->cl_nconnect, - parent_client->cl_max_connect, - parent_client->cl_net, - &parent_client->cl_xprtsec); + error = nfs4_set_client(server, &cl_init); if (error < 0) goto error; @@ -1353,6 +1329,19 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, char buf[INET6_ADDRSTRLEN + 1]; struct sockaddr_storage address; struct sockaddr *localaddr = (struct sockaddr *)&address; + struct nfs_client_initdata cl_init = { + .hostname = hostname, + .addr = sap, + .addrlen = salen, + .ip_addr = buf, + .proto = clp->cl_proto, + .minorversion = clp->cl_minorversion, + .net = net, + .timeparms = clnt->cl_timeout, + .xprtsec = clp->cl_xprtsec, + .nconnect = clp->cl_nconnect, + .max_connect = clp->cl_max_connect, + }; int error; error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout); @@ -1368,11 +1357,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, nfs_server_remove_lists(server); set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); - error = nfs4_set_client(server, hostname, sap, salen, buf, - clp->cl_proto, clnt->cl_timeout, - clp->cl_minorversion, - clp->cl_nconnect, clp->cl_max_connect, - net, &clp->cl_xprtsec); + error = nfs4_set_client(server, &cl_init); clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); if (error != 0) { nfs_server_insert_lists(server); From c262b444bd0d6bfbcda65130e6137952bef422f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Jul 2025 08:24:50 +0200 Subject: [PATCH 033/279] NFS: drop __exit from nfs_exit_keyring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise built-in NFS can lead to sectіon mismatches. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250714062450.1468117-1-hch@lst.de Fixes: 87268f7a4f1f ("nfs: create a kernel keyring") Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 60fa0c8ff04e..338ef77ae423 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2663,7 +2663,7 @@ static int __init nfs_init_keyring(void) return PTR_ERR_OR_ZERO(nfs_keyring); } -static void __exit nfs_exit_keyring(void) +static void nfs_exit_keyring(void) { key_put(nfs_keyring); } From f06bedfa62d57f7b67d44aacd6badad2e13a803f Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Fri, 27 Jun 2025 09:17:51 +0200 Subject: [PATCH 034/279] pNFS/flexfiles: don't attempt pnfs on fatal DS errors When an applications get killed (SIGTERM/SIGINT) while pNFS client performs a connection to DS, client ends in an infinite loop of connect-disconnect. This source of the issue, it that flexfilelayoutdev#nfs4_ff_layout_prepare_ds gets an error on nfs4_pnfs_ds_connect with status ERESTARTSYS, which is set by rpc_signal_task, but the error is treated as transient, thus retried. The issue is reproducible with Ctrl+C the following script(there should be ~1000 files in a directory, client should must not have any connections to DSes): ``` echo 3 > /proc/sys/vm/drop_caches for i in * do head -1 $i done ``` The change aims to propagate the nfs4_ff_layout_prepare_ds error state to the caller that can decide whatever this is a retryable error or not. Signed-off-by: Tigran Mkrtchyan Link: https://lore.kernel.org/r/20250627071751.189663-1-tigran.mkrtchyan@desy.de Fixes: 260f32adb88d ("pNFS/flexfiles: Check the result of nfs4_pnfs_ds_connect") Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 26 ++++++++++++++--------- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 +++--- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 4bea008dbebd..8dc921d83538 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -762,14 +762,14 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, { struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_ff_layout_mirror *mirror; - struct nfs4_pnfs_ds *ds; + struct nfs4_pnfs_ds *ds = ERR_PTR(-EAGAIN); u32 idx; /* mirrors are initially sorted by efficiency */ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); - if (!ds) + if (IS_ERR(ds)) continue; if (check_device && @@ -777,10 +777,10 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, continue; *best_idx = idx; - return ds; + break; } - return NULL; + return ds; } static struct nfs4_pnfs_ds * @@ -942,7 +942,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, for (i = 0; i < pgio->pg_mirror_count; i++) { mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true); - if (!ds) { + if (IS_ERR(ds)) { if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; pnfs_generic_pg_cleanup(pgio); @@ -1867,6 +1867,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) u32 idx = hdr->pgio_mirror_idx; int vers; struct nfs_fh *fh; + bool ds_fatal_error = false; dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n", __func__, hdr->inode->i_ino, @@ -1874,8 +1875,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); - if (!ds) + if (IS_ERR(ds)) { + ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds)); goto out_failed; + } ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, hdr->inode); @@ -1923,7 +1926,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) return PNFS_ATTEMPTED; out_failed: - if (ff_layout_avoid_mds_available_ds(lseg)) + if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error) return PNFS_TRY_AGAIN; trace_pnfs_mds_fallback_read_pagelist(hdr->inode, hdr->args.offset, hdr->args.count, @@ -1945,11 +1948,14 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) int vers; struct nfs_fh *fh; u32 idx = hdr->pgio_mirror_idx; + bool ds_fatal_error = false; mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); - if (!ds) + if (IS_ERR(ds)) { + ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds)); goto out_failed; + } ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, hdr->inode); @@ -2000,7 +2006,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) return PNFS_ATTEMPTED; out_failed: - if (ff_layout_avoid_mds_available_ds(lseg)) + if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error) return PNFS_TRY_AGAIN; trace_pnfs_mds_fallback_write_pagelist(hdr->inode, hdr->args.offset, hdr->args.count, @@ -2043,7 +2049,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); - if (!ds) + if (IS_ERR(ds)) goto out_err; ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 656d5c50bbce..30365ec782bb 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -370,11 +370,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, struct nfs4_ff_layout_mirror *mirror, bool fail_return) { - struct nfs4_pnfs_ds *ds = NULL; + struct nfs4_pnfs_ds *ds; struct inode *ino = lseg->pls_layout->plh_inode; struct nfs_server *s = NFS_SERVER(ino); unsigned int max_payload; - int status; + int status = -EAGAIN; if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror)) goto noconnect; @@ -418,7 +418,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, ff_layout_send_layouterror(lseg); if (fail_return || !ff_layout_has_available_ds(lseg)) pnfs_error_mark_layout_for_return(ino, lseg); - ds = NULL; + ds = ERR_PTR(status); out: return ds; } From 67173860a763b99317184bfaa821abd3578a4ce3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jul 2025 10:14:46 +0200 Subject: [PATCH 035/279] NFS: cleanup error handling in nfs4_server_common_setup Return error directly instead of using a goto label for it. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250718081509.2607553-2-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 2e623da1a787..5943a192f36b 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1103,14 +1103,14 @@ static int nfs4_server_common_setup(struct nfs_server *server, /* We must ensure the session is initialised first */ error = nfs4_init_session(server->nfs_client); if (error < 0) - goto out; + return error; nfs4_server_set_init_caps(server); /* Probe the root fh to retrieve its FSID and filehandle */ error = nfs4_get_rootfh(server, mntfh, auth_probe); if (error < 0) - goto out; + return error; dprintk("Server FSID: %llx:%llx\n", (unsigned long long) server->fsid.major, @@ -1119,7 +1119,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, error = nfs_probe_server(server, mntfh); if (error < 0) - goto out; + return error; nfs4_session_limit_rwsize(server); nfs4_session_limit_xasize(server); @@ -1130,8 +1130,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, nfs_server_insert_lists(server); server->mount_time = jiffies; server->destroy = nfs4_destroy_server; -out: - return error; + return 0; } /* From 7375bbad467e9b1b101d591a458b49a0f3896641 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jul 2025 10:14:47 +0200 Subject: [PATCH 036/279] NFS: cleanup nfs_inode_reclaim_delegation Reduce a level of indentation for most of the code in this function. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/20250718081509.2607553-3-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 48 ++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 6f136c47eed7..568d2e6d65fa 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -237,34 +237,34 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation != NULL) { - spin_lock(&delegation->lock); - nfs4_stateid_copy(&delegation->stateid, stateid); - delegation->type = type; - delegation->pagemod_limit = pagemod_limit; - oldcred = delegation->cred; - delegation->cred = get_cred(cred); - switch (deleg_type) { - case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: - case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: - set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); - break; - default: - clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); - } - clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); - if (test_and_clear_bit(NFS_DELEGATION_REVOKED, - &delegation->flags)) - atomic_long_inc(&nfs_active_delegations); - spin_unlock(&delegation->lock); - rcu_read_unlock(); - put_cred(oldcred); - trace_nfs4_reclaim_delegation(inode, type); - } else { + if (!delegation) { rcu_read_unlock(); nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit, deleg_type); + return; } + + spin_lock(&delegation->lock); + nfs4_stateid_copy(&delegation->stateid, stateid); + delegation->type = type; + delegation->pagemod_limit = pagemod_limit; + oldcred = delegation->cred; + delegation->cred = get_cred(cred); + switch (deleg_type) { + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); + break; + default: + clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags); + } + clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); + if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + atomic_long_inc(&nfs_active_delegations); + spin_unlock(&delegation->lock); + rcu_read_unlock(); + put_cred(oldcred); + trace_nfs4_reclaim_delegation(inode, type); } static int nfs_do_return_delegation(struct inode *inode, From aee077d8edc8b9772b205f4104686d676171e61f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jul 2025 10:14:48 +0200 Subject: [PATCH 037/279] NFS: move the delegation_watermark module parameter Keep the module_param_named next to the variable declaration instead of somewhere unrelated, following the best practice in the rest of the kernel. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/20250718081509.2607553-4-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 568d2e6d65fa..5f85966d7709 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -29,6 +29,7 @@ static atomic_long_t nfs_active_delegations; static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK; +module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644); static void __nfs_free_delegation(struct nfs_delegation *delegation) { @@ -1573,5 +1574,3 @@ bool nfs4_delegation_flush_on_close(const struct inode *inode) rcu_read_unlock(); return ret; } - -module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644); From 2fb4af5ea3c735a205d97de10f044f809b20af51 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jul 2025 10:14:49 +0200 Subject: [PATCH 038/279] NFS: track active delegations per-server The active delegation watermark was added to avoid overloading servers. Track the active delegation per-server instead of globally so that clients talking to multiple servers aren't limited by the global limit. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/20250718081509.2607553-5-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 + fs/nfs/delegation.c | 35 +++++++++++++++++++---------------- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 47258dc3af70..e13eb429b8b5 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1005,6 +1005,7 @@ struct nfs_server *nfs_alloc_server(void) INIT_LIST_HEAD(&server->ss_src_copies); atomic_set(&server->active, 0); + atomic_long_set(&server->nr_active_delegations, 0); server->io_stats = nfs_alloc_iostats(); if (!server->io_stats) { diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5f85966d7709..ea96f77e38c2 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -27,7 +27,6 @@ #define NFS_DEFAULT_DELEGATION_WATERMARK (5000U) -static atomic_long_t nfs_active_delegations; static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK; module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644); @@ -38,11 +37,12 @@ static void __nfs_free_delegation(struct nfs_delegation *delegation) kfree_rcu(delegation, rcu); } -static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation) +static void nfs_mark_delegation_revoked(struct nfs_server *server, + struct nfs_delegation *delegation) { if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; - atomic_long_dec(&nfs_active_delegations); + atomic_long_dec(&server->nr_active_delegations); if (!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) nfs_clear_verifier_delegated(delegation->inode); } @@ -60,9 +60,10 @@ static void nfs_put_delegation(struct nfs_delegation *delegation) __nfs_free_delegation(delegation); } -static void nfs_free_delegation(struct nfs_delegation *delegation) +static void nfs_free_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) { - nfs_mark_delegation_revoked(delegation); + nfs_mark_delegation_revoked(server, delegation); nfs_put_delegation(delegation); } @@ -261,7 +262,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, } clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) - atomic_long_inc(&nfs_active_delegations); + atomic_long_inc(&NFS_SERVER(inode)->nr_active_delegations); spin_unlock(&delegation->lock); rcu_read_unlock(); put_cred(oldcred); @@ -413,7 +414,8 @@ nfs_update_delegation_cred(struct nfs_delegation *delegation, } static void -nfs_update_inplace_delegation(struct nfs_delegation *delegation, +nfs_update_inplace_delegation(struct nfs_server *server, + struct nfs_delegation *delegation, const struct nfs_delegation *update) { if (nfs4_stateid_is_newer(&update->stateid, &delegation->stateid)) { @@ -426,7 +428,7 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, nfs_update_delegation_cred(delegation, update->cred); /* smp_mb__before_atomic() is implicit due to xchg() */ clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags); - atomic_long_inc(&nfs_active_delegations); + atomic_long_inc(&server->nr_active_delegations); } } } @@ -481,7 +483,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, if (nfs4_stateid_match_other(&old_delegation->stateid, &delegation->stateid)) { spin_lock(&old_delegation->lock); - nfs_update_inplace_delegation(old_delegation, + nfs_update_inplace_delegation(server, old_delegation, delegation); spin_unlock(&old_delegation->lock); goto out; @@ -530,7 +532,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; - atomic_long_inc(&nfs_active_delegations); + atomic_long_inc(&server->nr_active_delegations); trace_nfs4_set_delegation(inode, type); @@ -544,7 +546,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, __nfs_free_delegation(delegation); if (freeme != NULL) { nfs_do_return_delegation(inode, freeme, 0); - nfs_free_delegation(freeme); + nfs_free_delegation(server, freeme); } return status; } @@ -756,7 +758,7 @@ void nfs_inode_evict_delegation(struct inode *inode) set_bit(NFS_DELEGATION_RETURNING, &delegation->flags); set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags); nfs_do_return_delegation(inode, delegation, 1); - nfs_free_delegation(delegation); + nfs_free_delegation(NFS_SERVER(inode), delegation); } } @@ -842,7 +844,8 @@ void nfs4_inode_return_delegation_on_close(struct inode *inode) if (!delegation) goto out; if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) || - atomic_long_read(&nfs_active_delegations) >= nfs_delegation_watermark) { + atomic_long_read(&NFS_SERVER(inode)->nr_active_delegations) >= + nfs_delegation_watermark) { spin_lock(&delegation->lock); if (delegation->inode && list_empty(&NFS_I(inode)->open_files) && @@ -1018,7 +1021,7 @@ static void nfs_revoke_delegation(struct inode *inode, } spin_unlock(&delegation->lock); } - nfs_mark_delegation_revoked(delegation); + nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); ret = true; out: rcu_read_unlock(); @@ -1050,7 +1053,7 @@ void nfs_delegation_mark_returned(struct inode *inode, delegation->stateid.seqid = stateid->seqid; } - nfs_mark_delegation_revoked(delegation); + nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); spin_unlock(&delegation->lock); if (nfs_detach_delegation(NFS_I(inode), delegation, NFS_SERVER(inode))) @@ -1270,7 +1273,7 @@ static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server, if (delegation != NULL) { if (nfs_detach_delegation(NFS_I(inode), delegation, server) != NULL) - nfs_free_delegation(delegation); + nfs_free_delegation(server, delegation); /* Match nfs_start_delegation_return_locked */ nfs_put_delegation(delegation); } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d2d36711a119..a9b44f12623f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -254,6 +254,7 @@ struct nfs_server { struct list_head state_owners_lru; struct list_head layouts; struct list_head delegations; + atomic_long_t nr_active_delegations; struct list_head ss_copies; struct list_head ss_src_copies; From f5b3108e6a14418b120a3c38ca589b8d6cf87627 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jul 2025 10:14:50 +0200 Subject: [PATCH 039/279] NFS: use a hash table for delegation lookup nfs_delegation_find_inode currently has to walk the entire list of delegations per inode, which can become pretty large, and can become even larger when increasing the delegation watermark. Add a hash table to speed up the delegation lookup, sized as a fraction of the delegation watermark. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250718081509.2607553-6-hch@lst.de Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 28 +++++++++++++++++++++++++++- fs/nfs/delegation.h | 3 +++ fs/nfs/nfs4client.c | 5 +++++ fs/nfs/nfs4proc.c | 22 +++++++++++++++++++++- include/linux/nfs_fs_sb.h | 2 ++ 5 files changed, 58 insertions(+), 2 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ea96f77e38c2..9d3a5f29f17f 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -30,6 +30,13 @@ static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK; module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644); +static struct hlist_head *nfs_delegation_hash(struct nfs_server *server, + const struct nfs_fh *fhandle) +{ + return server->delegation_hash_table + + (nfs_fhandle_hash(fhandle) & server->delegation_hash_mask); +} + static void __nfs_free_delegation(struct nfs_delegation *delegation) { put_cred(delegation->cred); @@ -367,6 +374,7 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi, spin_unlock(&delegation->lock); return NULL; } + hlist_del_init_rcu(&delegation->hash); list_del_rcu(&delegation->super_list); delegation->inode = NULL; rcu_assign_pointer(nfsi->delegation, NULL); @@ -529,6 +537,8 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, spin_unlock(&inode->i_lock); list_add_tail_rcu(&delegation->super_list, &server->delegations); + hlist_add_head_rcu(&delegation->hash, + nfs_delegation_hash(server, &NFS_I(inode)->fh)); rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; @@ -1166,11 +1176,12 @@ static struct inode * nfs_delegation_find_inode_server(struct nfs_server *server, const struct nfs_fh *fhandle) { + struct hlist_head *head = nfs_delegation_hash(server, fhandle); struct nfs_delegation *delegation; struct super_block *freeme = NULL; struct inode *res = NULL; - list_for_each_entry_rcu(delegation, &server->delegations, super_list) { + hlist_for_each_entry_rcu(delegation, head, hash) { spin_lock(&delegation->lock); if (delegation->inode != NULL && !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && @@ -1577,3 +1588,18 @@ bool nfs4_delegation_flush_on_close(const struct inode *inode) rcu_read_unlock(); return ret; } + +int nfs4_delegation_hash_alloc(struct nfs_server *server) +{ + int delegation_buckets, i; + + delegation_buckets = roundup_pow_of_two(nfs_delegation_watermark / 16); + server->delegation_hash_mask = delegation_buckets - 1; + server->delegation_hash_table = kmalloc_array(delegation_buckets, + sizeof(*server->delegation_hash_table), GFP_KERNEL); + if (!server->delegation_hash_table) + return -ENOMEM; + for (i = 0; i < delegation_buckets; i++) + INIT_HLIST_HEAD(&server->delegation_hash_table[i]); + return 0; +} diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 8ff5ab9c5c25..08ec2e9c68a4 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -14,6 +14,7 @@ * NFSv4 delegation */ struct nfs_delegation { + struct hlist_node hash; struct list_head super_list; const struct cred *cred; struct inode *inode; @@ -123,4 +124,6 @@ static inline int nfs_have_delegated_mtime(struct inode *inode) NFS_DELEGATION_FLAG_TIME); } +int nfs4_delegation_hash_alloc(struct nfs_server *server); + #endif diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 5943a192f36b..2ea98f1f116f 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -802,6 +802,7 @@ static void nfs4_destroy_server(struct nfs_server *server) unset_pnfs_layoutdriver(server); nfs4_purge_state_owners(server, &freeme); nfs4_free_state_owners(&freeme); + kfree(server->delegation_hash_table); } /* @@ -1096,6 +1097,10 @@ static int nfs4_server_common_setup(struct nfs_server *server, { int error; + error = nfs4_delegation_hash_alloc(server); + if (error) + return error; + /* data servers support only a subset of NFSv4.1 */ if (is_ds_only_client(server->nfs_client)) return -EPROTONOSUPPORT; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ef2077e185b6..d8bebd757af3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10967,6 +10967,26 @@ static const struct inode_operations nfs4_file_inode_operations = { .listxattr = nfs4_listxattr, }; +static struct nfs_server *nfs4_clone_server(struct nfs_server *source, + struct nfs_fh *fh, struct nfs_fattr *fattr, + rpc_authflavor_t flavor) +{ + struct nfs_server *server; + int error; + + server = nfs_clone_server(source, fh, fattr, flavor); + if (IS_ERR(server)) + return server; + + error = nfs4_delegation_hash_alloc(server); + if (error) { + nfs_free_server(server); + return ERR_PTR(error); + } + + return server; +} + const struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, @@ -11019,7 +11039,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .init_client = nfs4_init_client, .free_client = nfs4_free_client, .create_server = nfs4_create_server, - .clone_server = nfs_clone_server, + .clone_server = nfs4_clone_server, .discover_trunking = nfs4_discover_trunking, .enable_swap = nfs4_enable_swap, .disable_swap = nfs4_disable_swap, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a9b44f12623f..d30c0245031c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -255,6 +255,8 @@ struct nfs_server { struct list_head layouts; struct list_head delegations; atomic_long_t nr_active_delegations; + unsigned int delegation_hash_mask; + struct hlist_head *delegation_hash_table; struct list_head ss_copies; struct list_head ss_src_copies; From 1db3a48e83bb64a70bf27263b7002585574a9c2d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Jul 2025 16:15:27 -0700 Subject: [PATCH 040/279] NFS: Fix wakeup of __nfs_lookup_revalidate() in unblock_revalidate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use store_release_wake_up() to add the appropriate memory barrier before calling wake_up_var(&dentry->d_fsdata). Reported-by: Lukáš Hejtmánek Suggested-by: Santosh Pradhan Link: https://lore.kernel.org/all/18945D18-3EDB-4771-B019-0335CE671077@ics.muni.cz/ Fixes: 99bc9f2eb3f7 ("NFS: add barriers when testing for NFS_FSDATA_BLOCKED") Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d0e0b435a843..d81217923936 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1828,9 +1828,7 @@ static void block_revalidate(struct dentry *dentry) static void unblock_revalidate(struct dentry *dentry) { - /* store_release ensures wait_var_event() sees the update */ - smp_store_release(&dentry->d_fsdata, NULL); - wake_up_var(&dentry->d_fsdata); + store_release_wake_up(&dentry->d_fsdata, NULL); } /* From ec0abdda891f082dcb95bfbe7fcc82b12342e506 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Jul 2025 16:44:03 -0700 Subject: [PATCH 041/279] NFS: Clean up pnfs_put_layout_hdr()/pnfs_destroy_layout_final() Use the wake_up_var_locked() and wait_var_event_spinlock() helpers. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3fd0971bf16f..a3135b5af7ee 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -306,7 +306,6 @@ void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) { struct inode *inode; - unsigned long i_state; if (!lo) return; @@ -317,12 +316,11 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) if (!list_empty(&lo->plh_segs)) WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); pnfs_detach_layout_hdr(lo); - i_state = inode->i_state; + /* Notify pnfs_destroy_layout_final() that we're done */ + if (inode->i_state & (I_FREEING | I_CLEAR)) + wake_up_var_locked(lo, &inode->i_lock); spin_unlock(&inode->i_lock); pnfs_free_layout_hdr(lo); - /* Notify pnfs_destroy_layout_final() that we're done */ - if (i_state & (I_FREEING | I_CLEAR)) - wake_up_var(lo); } } @@ -809,23 +807,17 @@ void pnfs_destroy_layout(struct nfs_inode *nfsi) } EXPORT_SYMBOL_GPL(pnfs_destroy_layout); -static bool pnfs_layout_removed(struct nfs_inode *nfsi, - struct pnfs_layout_hdr *lo) -{ - bool ret; - - spin_lock(&nfsi->vfs_inode.i_lock); - ret = nfsi->layout != lo; - spin_unlock(&nfsi->vfs_inode.i_lock); - return ret; -} - void pnfs_destroy_layout_final(struct nfs_inode *nfsi) { struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi); + struct inode *inode = &nfsi->vfs_inode; - if (lo) - wait_var_event(lo, pnfs_layout_removed(nfsi, lo)); + if (lo) { + spin_lock(&inode->i_lock); + wait_var_event_spinlock(lo, nfsi->layout != lo, + &inode->i_lock); + spin_unlock(&inode->i_lock); + } } static bool From f66e6bffc531bafaeb067e6f6af56f52d5cd4ac2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Jul 2025 19:13:55 -0700 Subject: [PATCH 042/279] SUNRPC: Silence warnings about parameters not being described Warning: net/sunrpc/auth_gss/gss_krb5_crypto.c:902 function parameter 'len' not described in 'krb5_etm_decrypt' Warning: net/sunrpc/auth_gss/gss_krb5_crypto.c:902 function parameter 'buf' not described in 'krb5_etm_decrypt' Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 8f2d65c1e831..16dcf115de1e 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -875,8 +875,8 @@ krb5_etm_encrypt(struct krb5_ctx *kctx, u32 offset, * krb5_etm_decrypt - Decrypt using the RFC 8009 rules * @kctx: Kerberos context * @offset: starting offset of the ciphertext, in bytes - * @len: - * @buf: + * @len: size of ciphertext to unwrap + * @buf: ciphertext to unwrap * @headskip: OUT: the enctype's confounder length, in octets * @tailskip: OUT: the enctype's HMAC length, in octets * From ef93a685e01a281b5e2a25ce4e3428cf9371a205 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Jul 2025 09:24:58 -0400 Subject: [PATCH 043/279] NFS: Fix filehandle bounds checking in nfs_fh_to_dentry() The function needs to check the minimal filehandle length before it can access the embedded filehandle. Reported-by: zhangjian Fixes: 20fa19027286 ("nfs: add export operations") Signed-off-by: Trond Myklebust --- fs/nfs/export.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/nfs/export.c b/fs/nfs/export.c index e9c233b6fd20..a10dd5f9d078 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -66,14 +66,21 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, { struct nfs_fattr *fattr = NULL; struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw); - size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size; + size_t fh_size = offsetof(struct nfs_fh, data); const struct nfs_rpc_ops *rpc_ops; struct dentry *dentry; struct inode *inode; - int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size); + int len = EMBED_FH_OFF; u32 *p = fid->raw; int ret; + /* Initial check of bounds */ + if (fh_len < len + XDR_QUADLEN(fh_size) || + fh_len > XDR_QUADLEN(NFS_MAXFHSIZE)) + return NULL; + /* Calculate embedded filehandle size */ + fh_size += server_fh->size; + len += XDR_QUADLEN(fh_size); /* NULL translates to ESTALE */ if (fh_len < len || fh_type != len) return NULL; From 9acb237deff7667b0f6b10fe6b1b70c4429ea049 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 22 Jul 2025 16:56:41 -0400 Subject: [PATCH 044/279] NFSv4.2: another fix for listxattr Currently, when the server supports NFS4.1 security labels then security.selinux label in included twice. Instead, only add it when the server doesn't possess security label support. Fixes: 243fea134633 ("NFSv4.2: fix listxattr to return selinux security label") Signed-off-by: Olga Kornievskaia Link: https://lore.kernel.org/r/20250722205641.79394-1-okorniev@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d8bebd757af3..d7dc669d84c5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10883,7 +10883,7 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) { - ssize_t error, error2, error3, error4; + ssize_t error, error2, error3, error4 = 0; size_t left = size; error = generic_listxattr(dentry, list, left); @@ -10911,9 +10911,11 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) left -= error3; } - error4 = security_inode_listsecurity(d_inode(dentry), list, left); - if (error4 < 0) - return error4; + if (!nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) { + error4 = security_inode_listsecurity(d_inode(dentry), list, left); + if (error4 < 0) + return error4; + } error += error2 + error3 + error4; if (size && error > size) From 99765233ab42bf7a4950377ad7894dce8a5c0e60 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 9 Jul 2025 21:47:43 -0400 Subject: [PATCH 045/279] NFS: Fixup allocation flags for nfsiod's __GFP_NORETRY If the NFS client is doing writeback from a workqueue context, avoid using __GFP_NORETRY for allocations if the task has set PF_MEMALLOC_NOIO or PF_MEMALLOC_NOFS. The combination of these flags makes memory allocation failures much more likely. We've seen those allocation failures show up when the loopback driver is doing writeback from a workqueue to a file on NFS, where memory allocation failure results in errors or corruption within the loopback device's filesystem. Suggested-by: Trond Myklebust Fixes: 0bae835b63c5 ("NFS: Avoid writeback threads getting stuck in mempool_alloc()") Signed-off-by: Benjamin Coddington Reviewed-by: Laurence Oberman Tested-by: Laurence Oberman Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/f83ac1155a4bc670f2663959a7a068571e06afd9.1752111622.git.bcodding@redhat.com Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d55dce8bf043..0143e0794d32 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -670,9 +670,12 @@ nfs_write_match_verf(const struct nfs_writeverf *verf, static inline gfp_t nfs_io_gfp_mask(void) { - if (current->flags & PF_WQ_WORKER) - return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; - return GFP_KERNEL; + gfp_t ret = current_gfp_context(GFP_KERNEL); + + /* For workers __GFP_NORETRY only with __GFP_IO or __GFP_FS */ + if ((current->flags & PF_WQ_WORKER) && ret == GFP_KERNEL) + ret |= __GFP_NORETRY | __GFP_NOWARN; + return ret; } /* From eb3bb145280b6c857a748731a229698e4a7cf37b Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Sat, 26 Jul 2025 00:02:54 +0500 Subject: [PATCH 046/279] ASoC: SOF: amd: acp-loader: Use GFP_KERNEL for DMA allocations in resume context Replace GFP_ATOMIC with GFP_KERNEL for dma_alloc_coherent() calls. This change improves memory allocation reliability during firmware loading, particularly during system resume when memory pressure is high. Because of using GFP_KERNEL, reclaim can happen which can reduce the probability of failure. Fixes memory allocation failures observed during system resume with fragmented memory conditions. snd_sof_amd_vangogh 0000:04:00.5: error: failed to load DSP firmware after resume -12 Fixes: 145d7e5ae8f4e ("ASoC: SOF: amd: add option to use sram for data bin loading") Fixes: 7e51a9e38ab20 ("ASoC: SOF: amd: Add fw loader and renoir dsp ops to load firmware") Cc: stable@vger.kernel.org Signed-off-by: Muhammad Usama Anjum Link: https://patch.msgid.link/20250725190254.1081184-1-usama.anjum@collabora.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp-loader.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/sof/amd/acp-loader.c b/sound/soc/sof/amd/acp-loader.c index ea105227227d..98324bbade15 100644 --- a/sound/soc/sof/amd/acp-loader.c +++ b/sound/soc/sof/amd/acp-loader.c @@ -65,7 +65,7 @@ int acp_dsp_block_write(struct snd_sof_dev *sdev, enum snd_sof_fw_blk_type blk_t dma_size = page_count * ACP_PAGE_SIZE; adata->bin_buf = dma_alloc_coherent(&pci->dev, dma_size, &adata->sha_dma_addr, - GFP_ATOMIC); + GFP_KERNEL); if (!adata->bin_buf) return -ENOMEM; } @@ -77,7 +77,7 @@ int acp_dsp_block_write(struct snd_sof_dev *sdev, enum snd_sof_fw_blk_type blk_t adata->data_buf = dma_alloc_coherent(&pci->dev, ACP_DEFAULT_DRAM_LENGTH, &adata->dma_addr, - GFP_ATOMIC); + GFP_KERNEL); if (!adata->data_buf) return -ENOMEM; } @@ -90,7 +90,7 @@ int acp_dsp_block_write(struct snd_sof_dev *sdev, enum snd_sof_fw_blk_type blk_t adata->sram_data_buf = dma_alloc_coherent(&pci->dev, ACP_DEFAULT_SRAM_LENGTH, &adata->sram_dma_addr, - GFP_ATOMIC); + GFP_KERNEL); if (!adata->sram_data_buf) return -ENOMEM; } From 459779d04ae8dfd4083679a7bf9d72af165d1023 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 16 Jun 2025 15:28:56 +0900 Subject: [PATCH 047/279] block: Improve read ahead size for rotational devices For a device that does not advertize an optimal I/O size, the function blk_apply_bdi_limits() defaults to an initial setting of the ra_pages field of struct backing_dev_info to VM_READAHEAD_PAGES, that is, 128 KB. This low I/O size value is far from being optimal for hard-disk devices: when reading files from multiple contexts using buffered I/Os, the seek overhead between the small read commands generated to read-ahead multiple files will significantly limit the performance that can be achieved. This fact applies to all ATA devices as ATA does not define an optimal I/O size and the SCSI SAT specification does not define a default value to expose to the host. Modify blk_apply_bdi_limits() to use a device max_sectors limit to calculate the ra_pages field of struct backing_dev_info, when the device is a rotational one (BLK_FEAT_ROTATIONAL feature is set). For a SCSI disk, this defaults to 2560 KB, which significantly improve performance for buffered reads. Using XFS and sequentially reading randomly selected (large) files stored on a SATA HDD, the maximum throughput achieved with 8 readers reading files with 1MB buffered I/Os increases from 122 MB/s to 167 MB/s (+36%). The improvement is even larger when reading files using 128 KB buffered I/Os, with a throughput increasing from 57 MB/s to 165 MB/s (+189%). Signed-off-by: Damien Le Moal Reviewed-by: John Garry Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250616062856.1629897-1-dlemoal@kernel.org Signed-off-by: Jens Axboe --- block/blk-settings.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 91449147bae9..608df4674245 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -62,16 +62,24 @@ EXPORT_SYMBOL(blk_set_stacking_limits); void blk_apply_bdi_limits(struct backing_dev_info *bdi, struct queue_limits *lim) { + u64 io_opt = lim->io_opt; + /* * For read-ahead of large files to be effective, we need to read ahead - * at least twice the optimal I/O size. + * at least twice the optimal I/O size. For rotational devices that do + * not report an optimal I/O size (e.g. ATA HDDs), use the maximum I/O + * size to avoid falling back to the (rather inefficient) small default + * read-ahead size. * * There is no hardware limitation for the read-ahead size and the user * might have increased the read-ahead size through sysfs, so don't ever * decrease it. */ + if (!io_opt && (lim->features & BLK_FEAT_ROTATIONAL)) + io_opt = (u64)lim->max_sectors << SECTOR_SHIFT; + bdi->ra_pages = max3(bdi->ra_pages, - lim->io_opt * 2 / PAGE_SIZE, + io_opt * 2 >> PAGE_SHIFT, VM_READAHEAD_PAGES); bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT; } From 448dfecc7ff807822ecd47a5c052acedca7d09e8 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 29 Jul 2025 09:14:47 +0000 Subject: [PATCH 048/279] block: avoid possible overflow for chunk_sectors check in blk_stack_limits() In blk_stack_limits(), we check that the t->chunk_sectors value is a multiple of the t->physical_block_size value. However, by finding the chunk_sectors value in bytes, we may overflow the unsigned int which holds chunk_sectors, so change the check to be based on sectors. Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250729091448.1691334-2-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- block/blk-settings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 608df4674245..bb192d776bdb 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -857,7 +857,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, } /* chunk_sectors a multiple of the physical block size? */ - if ((t->chunk_sectors << 9) & (t->physical_block_size - 1)) { + if (t->chunk_sectors % (t->physical_block_size >> SECTOR_SHIFT)) { t->chunk_sectors = 0; t->flags |= BLK_FLAG_MISALIGNED; ret = -1; From 1da67b5b1754713b8ea0c3dd847e04790cffd91f Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 29 Jul 2025 09:14:48 +0000 Subject: [PATCH 049/279] block: Enforce power-of-2 physical block size The merging/splitting code and other queue limits checking depends on the physical block size being a power-of-2, so enforce it. Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20250729091448.1691334-3-john.g.garry@oracle.com [axboe: add missing braces] Signed-off-by: Jens Axboe --- block/blk-settings.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index bb192d776bdb..a7a794baba72 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -320,8 +320,12 @@ int blk_validate_limits(struct queue_limits *lim) pr_warn("Invalid logical block size (%d)\n", lim->logical_block_size); return -EINVAL; } - if (lim->physical_block_size < lim->logical_block_size) + if (lim->physical_block_size < lim->logical_block_size) { lim->physical_block_size = lim->logical_block_size; + } else if (!is_power_of_2(lim->physical_block_size)) { + pr_warn("Invalid physical block size (%d)\n", lim->physical_block_size); + return -EINVAL; + } /* * The minimum I/O size defaults to the physical block size unless From 5421681bc3ef13476bd9443379cd69381e8760fa Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 29 Jul 2025 10:32:29 +0800 Subject: [PATCH 050/279] blk-ioc: don't hold queue_lock for ioc_lookup_icq() Currently issue io can grab queue_lock three times from bfq_bio_merge(), bfq_limit_depth() and bfq_prepare_request(), the queue_lock is not necessary if icq is already created because both queue and ioc can't be freed before io issuing is done, hence remove the unnecessary queue_lock and use rcu to protect radix tree lookup. Noted this is also a prep patch to support request batch dispatching[1]. [1] https://lore.kernel.org/all/20250722072431.610354-1-yukuai1@huaweicloud.com/ Signed-off-by: Yu Kuai Reviewed-by: Damien Le Moal Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20250729023229.2944898-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 18 ++---------------- block/blk-ioc.c | 16 ++++++---------- 2 files changed, 8 insertions(+), 26 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0cb1e9873aab..f71ec0887733 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -454,17 +454,10 @@ static struct bfq_io_cq *icq_to_bic(struct io_cq *icq) */ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q) { - struct bfq_io_cq *icq; - unsigned long flags; - if (!current->io_context) return NULL; - spin_lock_irqsave(&q->queue_lock, flags); - icq = icq_to_bic(ioc_lookup_icq(q)); - spin_unlock_irqrestore(&q->queue_lock, flags); - - return icq; + return icq_to_bic(ioc_lookup_icq(q)); } /* @@ -2457,15 +2450,8 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { struct bfq_data *bfqd = q->elevator->elevator_data; - struct request *free = NULL; - /* - * bfq_bic_lookup grabs the queue_lock: invoke it now and - * store its return value for later use, to avoid nesting - * queue_lock inside the bfqd->lock. We assume that the bic - * returned by bfq_bic_lookup does not go away before - * bfqd->lock is taken. - */ struct bfq_io_cq *bic = bfq_bic_lookup(q); + struct request *free = NULL; bool ret; spin_lock_irq(&bfqd->lock); diff --git a/block/blk-ioc.c b/block/blk-ioc.c index ce82770c72ab..9fda3906e5f5 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -308,24 +308,23 @@ int __copy_io(unsigned long clone_flags, struct task_struct *tsk) #ifdef CONFIG_BLK_ICQ /** - * ioc_lookup_icq - lookup io_cq from ioc + * ioc_lookup_icq - lookup io_cq from ioc in io issue path * @q: the associated request_queue * * Look up io_cq associated with @ioc - @q pair from @ioc. Must be called - * with @q->queue_lock held. + * from io issue path, either return NULL if current issue io to @q for the + * first time, or return a valid icq. */ struct io_cq *ioc_lookup_icq(struct request_queue *q) { struct io_context *ioc = current->io_context; struct io_cq *icq; - lockdep_assert_held(&q->queue_lock); - /* * icq's are indexed from @ioc using radix tree and hint pointer, - * both of which are protected with RCU. All removals are done - * holding both q and ioc locks, and we're holding q lock - if we - * find a icq which points to us, it's guaranteed to be valid. + * both of which are protected with RCU, io issue path ensures that + * both request_queue and current task are valid, the found icq + * is guaranteed to be valid until the io is done. */ rcu_read_lock(); icq = rcu_dereference(ioc->icq_hint); @@ -419,10 +418,7 @@ struct io_cq *ioc_find_get_icq(struct request_queue *q) task_unlock(current); } else { get_io_context(ioc); - - spin_lock_irq(&q->queue_lock); icq = ioc_lookup_icq(q); - spin_unlock_irq(&q->queue_lock); } if (!icq) { From 10dfd36f078423c51602a9a21ed85e8e6c947a00 Mon Sep 17 00:00:00 2001 From: Romain Gantois Date: Tue, 29 Jul 2025 11:50:57 +0200 Subject: [PATCH 051/279] regulator: core: correct convergence check in regulator_set_voltage() The logic in regulator_set_voltage() which checks for a non-convergence condition on a stepped regulator is flawed. regulator_set_voltage() checks if the error in target voltage has increased or decreased, and returns -EWOULDBLOCK if the error has not decreased enough. The correct non-convergence condition is: new_delta - delta > -rdev->constraints->max_uV_step or equivalently: delta - new_delta < rdev->constraints->max_uV_step But the currently used condition is: new_delta - delta > rdev->constraints->max_uV_step Which may cause an infinite loop if the voltage error doesn't converge. Fix this by correcting the convergence condition. Suggested-by: Jon Hunter Fixes: d511206dc7443 ("regulator: core: repeat voltage setting request for stepped regulators") Signed-off-by: Romain Gantois Link: https://patch.msgid.link/20250729-b4-regulator-stepping-fix-v1-1-3f7b8c55d7d7@bootlin.com Tested-by: Jon Hunter Reviewed-by: Jon Hunter Signed-off-by: Mark Brown --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 8ed9b96518cf..554d83c4af0c 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -3884,7 +3884,7 @@ static int regulator_set_voltage_unlocked(struct regulator *regulator, new_delta = ret; /* check that voltage is converging quickly enough */ - if (new_delta - delta > rdev->constraints->max_uV_step) { + if (delta - new_delta < rdev->constraints->max_uV_step) { ret = -EWOULDBLOCK; goto out; } From 926406a85ad895fbe6ee4577cdbc4f55245a0742 Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Tue, 29 Jul 2025 15:51:10 +0100 Subject: [PATCH 052/279] MAINTAINERS: Add entries for the RZ/V2H(P) RSPI Add the MAINTAINERS entries for the Renesas RZ/V2H(P) RSPI driver. Signed-off-by: Fabrizio Castro Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20250729145110.37258-1-fabrizio.castro.jz@renesas.com Signed-off-by: Mark Brown --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index addb26082fa0..b1c081f9c567 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21208,6 +21208,14 @@ S: Maintained F: Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml F: drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c +RENESAS RZ/V2H(P) RSPI DRIVER +M: Fabrizio Castro +L: linux-spi@vger.kernel.org +L: linux-renesas-soc@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/spi/renesas,rzv2h-rspi.yaml +F: drivers/spi/spi-rzv2h-rspi.c + RENESAS RZ/V2H(P) USB2PHY PORT RESET DRIVER M: Fabrizio Castro M: Lad Prabhakar From 8d452accd1380e1cb0b15a9876bcd19b14c5fabb Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Wed, 30 Jul 2025 14:40:54 +0800 Subject: [PATCH 053/279] ASoC: wm8962: Clear master mode when enter runtime suspend The enabled master mode causes power consumption to increase in idle state. Clear the MSTR bit in runtime supsend and recover it in runtime resume to reduce power. Signed-off-by: Shengjiu Wang Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20250730064054.3006409-1-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm8962.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index d69aa8b15629..27b4326429a0 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -82,6 +82,7 @@ struct wm8962_priv { #endif int irq; + bool master_flag; }; /* We can't use the same notifier block for more than one supply and @@ -2715,6 +2716,7 @@ static int wm8962_set_dai_sysclk(struct snd_soc_dai *dai, int clk_id, static int wm8962_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct snd_soc_component *component = dai->component; + struct wm8962_priv *wm8962 = snd_soc_component_get_drvdata(component); int aif0 = 0; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { @@ -2761,9 +2763,11 @@ static int wm8962_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } + wm8962->master_flag = false; switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBP_CFP: aif0 |= WM8962_MSTR; + wm8962->master_flag = true; break; case SND_SOC_DAIFMT_CBC_CFC: break; @@ -3903,6 +3907,9 @@ static int wm8962_runtime_resume(struct device *dev) WM8962_BIAS_ENA | WM8962_VMID_SEL_MASK, WM8962_BIAS_ENA | 0x180); + if (wm8962->master_flag) + regmap_update_bits(wm8962->regmap, WM8962_AUDIO_INTERFACE_0, + WM8962_MSTR, WM8962_MSTR); msleep(5); return 0; @@ -3916,6 +3923,10 @@ static int wm8962_runtime_suspend(struct device *dev) { struct wm8962_priv *wm8962 = dev_get_drvdata(dev); + if (wm8962->master_flag) + regmap_update_bits(wm8962->regmap, WM8962_AUDIO_INTERFACE_0, + WM8962_MSTR, 0); + regmap_update_bits(wm8962->regmap, WM8962_PWR_MGMT_1, WM8962_VMID_SEL_MASK | WM8962_BIAS_ENA, 0); From 49811586be373e26a3ab52f54e0dfa663c02fddd Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 30 Jul 2025 13:16:07 +0530 Subject: [PATCH 054/279] block: move elevator queue allocation logic into blk_mq_init_sched In preparation for allocating sched_tags before freezing the request queue and acquiring ->elevator_lock, move the elevator queue allocation logic from the elevator ops ->init_sched callback into blk_mq_init_sched. As elevator_alloc is now only invoked from block layer core, we don't need to export it, so unexport elevator_alloc function. This refactoring provides a centralized location for elevator queue initialization, which makes it easier to store pre-allocated sched_tags in the struct elevator_queue during later changes. Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250730074614.2537382-2-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 13 +++---------- block/blk-mq-sched.c | 11 ++++++++--- block/elevator.c | 1 - block/elevator.h | 2 +- block/kyber-iosched.c | 11 ++--------- block/mq-deadline.c | 14 ++------------ 6 files changed, 16 insertions(+), 36 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index f71ec0887733..aca9886c9ee3 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7218,22 +7218,16 @@ static void bfq_init_root_group(struct bfq_group *root_group, root_group->sched_data.bfq_class_idle_last_service = jiffies; } -static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) +static int bfq_init_queue(struct request_queue *q, struct elevator_queue *eq) { struct bfq_data *bfqd; - struct elevator_queue *eq; unsigned int i; struct blk_independent_access_ranges *ia_ranges = q->disk->ia_ranges; - eq = elevator_alloc(q, e); - if (!eq) + bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node); + if (!bfqd) return -ENOMEM; - bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node); - if (!bfqd) { - kobject_put(&eq->kobj); - return -ENOMEM; - } eq->elevator_data = bfqd; spin_lock_irq(&q->queue_lock); @@ -7391,7 +7385,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) out_free: kfree(bfqd); - kobject_put(&eq->kobj); return -ENOMEM; } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 55a0fd105147..359e0704e09b 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -475,10 +475,14 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, BLKDEV_DEFAULT_RQ); + eq = elevator_alloc(q, e); + if (!eq) + return -ENOMEM; + if (blk_mq_is_shared_tags(flags)) { ret = blk_mq_init_sched_shared_tags(q); if (ret) - return ret; + goto err_put_elevator; } queue_for_each_hw_ctx(q, hctx, i) { @@ -487,7 +491,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) goto err_free_map_and_rqs; } - ret = e->ops.init_sched(q, e); + ret = e->ops.init_sched(q, eq); if (ret) goto err_free_map_and_rqs; @@ -508,7 +512,8 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) err_free_map_and_rqs: blk_mq_sched_free_rqs(q); blk_mq_sched_tags_teardown(q, flags); - +err_put_elevator: + kobject_put(&eq->kobj); q->elevator = NULL; return ret; } diff --git a/block/elevator.c b/block/elevator.c index 88f8f36bed98..939b0c590fbe 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -148,7 +148,6 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, return eq; } -EXPORT_SYMBOL(elevator_alloc); static void elevator_release(struct kobject *kobj) { diff --git a/block/elevator.h b/block/elevator.h index a07ce773a38f..a4de5f9ad790 100644 --- a/block/elevator.h +++ b/block/elevator.h @@ -24,7 +24,7 @@ struct blk_mq_alloc_data; struct blk_mq_hw_ctx; struct elevator_mq_ops { - int (*init_sched)(struct request_queue *, struct elevator_type *); + int (*init_sched)(struct request_queue *, struct elevator_queue *); void (*exit_sched)(struct elevator_queue *); int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int); void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 4dba8405bd01..7b6832cb3a8d 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -402,20 +402,13 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) return ERR_PTR(ret); } -static int kyber_init_sched(struct request_queue *q, struct elevator_type *e) +static int kyber_init_sched(struct request_queue *q, struct elevator_queue *eq) { struct kyber_queue_data *kqd; - struct elevator_queue *eq; - - eq = elevator_alloc(q, e); - if (!eq) - return -ENOMEM; kqd = kyber_queue_data_alloc(q); - if (IS_ERR(kqd)) { - kobject_put(&eq->kobj); + if (IS_ERR(kqd)) return PTR_ERR(kqd); - } blk_stat_enable_accounting(q); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 2edf1cac06d5..7b6caf30e00a 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -568,20 +568,14 @@ static void dd_exit_sched(struct elevator_queue *e) /* * initialize elevator private data (deadline_data). */ -static int dd_init_sched(struct request_queue *q, struct elevator_type *e) +static int dd_init_sched(struct request_queue *q, struct elevator_queue *eq) { struct deadline_data *dd; - struct elevator_queue *eq; enum dd_prio prio; - int ret = -ENOMEM; - - eq = elevator_alloc(q, e); - if (!eq) - return ret; dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node); if (!dd) - goto put_eq; + return -ENOMEM; eq->elevator_data = dd; @@ -608,10 +602,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e) q->elevator = eq; return 0; - -put_eq: - kobject_put(&eq->kobj); - return ret; } /* From f5a6604f7a4405450e4a1f54e5430f47290c500f Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 30 Jul 2025 13:16:08 +0530 Subject: [PATCH 055/279] block: fix lockdep warning caused by lock dependency in elv_iosched_store Recent lockdep reports [1] have revealed a potential deadlock caused by a lock dependency between the percpu allocator lock and the elevator lock. This issue can be avoided by ensuring that the allocation and release of scheduler tags (sched_tags) are performed outside the elevator lock. Furthermore, the queue does not need to be remain frozen during these operations. To address this, move all sched_tags allocations and deallocations outside of both the ->elevator_lock and the ->freeze_lock. Since the lifetime of the elevator queue and its associated sched_tags is closely tied, the allocated sched_tags are now stored in the elevator queue structure. Then, during the actual elevator switch (which runs under ->freeze_lock and ->elevator_lock), the pre-allocated sched_tags are assigned to the appropriate q->hctx. Once the elevator switch is complete and the locks are released, the old elevator queue and its associated sched_tags are freed. This commit specifically addresses the allocation/deallocation of sched_ tags during elevator switching. Note that sched_tags may also be allocated in other contexts, such as during nr_hw_queues updates. Supporting that use case will require batch allocation/deallocation, which will be handled in a follow-up patch. This restructuring ensures that sched_tags memory management occurs entirely outside of the ->elevator_lock and ->freeze_lock context, eliminating the lock dependency problem seen during scheduler updates. [1] https://lore.kernel.org/all/0659ea8d-a463-47c8-9180-43c719e106eb@linux.ibm.com/ Reported-by: Stefan Haberland Closes: https://lore.kernel.org/all/0659ea8d-a463-47c8-9180-43c719e106eb@linux.ibm.com/ Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250730074614.2537382-3-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 155 +++++++++++++++++++++++-------------------- block/blk-mq-sched.h | 8 ++- block/elevator.c | 40 +++++++++-- block/elevator.h | 14 +++- 4 files changed, 136 insertions(+), 81 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 359e0704e09b..2d6d1ebdd8fb 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -374,64 +374,17 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); -static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, - unsigned int hctx_idx) -{ - if (blk_mq_is_shared_tags(q->tag_set->flags)) { - hctx->sched_tags = q->sched_shared_tags; - return 0; - } - - hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx, - q->nr_requests); - - if (!hctx->sched_tags) - return -ENOMEM; - return 0; -} - -static void blk_mq_exit_sched_shared_tags(struct request_queue *queue) -{ - blk_mq_free_rq_map(queue->sched_shared_tags); - queue->sched_shared_tags = NULL; -} - /* called in queue's release handler, tagset has gone away */ static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags) { struct blk_mq_hw_ctx *hctx; unsigned long i; - queue_for_each_hw_ctx(q, hctx, i) { - if (hctx->sched_tags) { - if (!blk_mq_is_shared_tags(flags)) - blk_mq_free_rq_map(hctx->sched_tags); - hctx->sched_tags = NULL; - } - } + queue_for_each_hw_ctx(q, hctx, i) + hctx->sched_tags = NULL; if (blk_mq_is_shared_tags(flags)) - blk_mq_exit_sched_shared_tags(q); -} - -static int blk_mq_init_sched_shared_tags(struct request_queue *queue) -{ - struct blk_mq_tag_set *set = queue->tag_set; - - /* - * Set initial depth at max so that we don't need to reallocate for - * updating nr_requests. - */ - queue->sched_shared_tags = blk_mq_alloc_map_and_rqs(set, - BLK_MQ_NO_HCTX_IDX, - MAX_SCHED_RQ); - if (!queue->sched_shared_tags) - return -ENOMEM; - - blk_mq_tag_update_sched_shared_tags(queue); - - return 0; + q->sched_shared_tags = NULL; } void blk_mq_sched_reg_debugfs(struct request_queue *q) @@ -458,8 +411,75 @@ void blk_mq_sched_unreg_debugfs(struct request_queue *q) mutex_unlock(&q->debugfs_mutex); } +void blk_mq_free_sched_tags(struct elevator_tags *et, + struct blk_mq_tag_set *set) +{ + unsigned long i; + + /* Shared tags are stored at index 0 in @tags. */ + if (blk_mq_is_shared_tags(set->flags)) + blk_mq_free_map_and_rqs(set, et->tags[0], BLK_MQ_NO_HCTX_IDX); + else { + for (i = 0; i < et->nr_hw_queues; i++) + blk_mq_free_map_and_rqs(set, et->tags[i], i); + } + + kfree(et); +} + +struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, + unsigned int nr_hw_queues) +{ + unsigned int nr_tags; + int i; + struct elevator_tags *et; + gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; + + if (blk_mq_is_shared_tags(set->flags)) + nr_tags = 1; + else + nr_tags = nr_hw_queues; + + et = kmalloc(sizeof(struct elevator_tags) + + nr_tags * sizeof(struct blk_mq_tags *), gfp); + if (!et) + return NULL; + /* + * Default to double of smaller one between hw queue_depth and + * 128, since we don't split into sync/async like the old code + * did. Additionally, this is a per-hw queue depth. + */ + et->nr_requests = 2 * min_t(unsigned int, set->queue_depth, + BLKDEV_DEFAULT_RQ); + et->nr_hw_queues = nr_hw_queues; + + if (blk_mq_is_shared_tags(set->flags)) { + /* Shared tags are stored at index 0 in @tags. */ + et->tags[0] = blk_mq_alloc_map_and_rqs(set, BLK_MQ_NO_HCTX_IDX, + MAX_SCHED_RQ); + if (!et->tags[0]) + goto out; + } else { + for (i = 0; i < et->nr_hw_queues; i++) { + et->tags[i] = blk_mq_alloc_map_and_rqs(set, i, + et->nr_requests); + if (!et->tags[i]) + goto out_unwind; + } + } + + return et; +out_unwind: + while (--i >= 0) + blk_mq_free_map_and_rqs(set, et->tags[i], i); +out: + kfree(et); + return NULL; +} + /* caller must have a reference to @e, will grab another one if successful */ -int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) +int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e, + struct elevator_tags *et) { unsigned int flags = q->tag_set->flags; struct blk_mq_hw_ctx *hctx; @@ -467,40 +487,33 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) unsigned long i; int ret; - /* - * Default to double of smaller one between hw queue_depth and 128, - * since we don't split into sync/async like the old code did. - * Additionally, this is a per-hw queue depth. - */ - q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth, - BLKDEV_DEFAULT_RQ); - - eq = elevator_alloc(q, e); + eq = elevator_alloc(q, e, et); if (!eq) return -ENOMEM; + q->nr_requests = et->nr_requests; + if (blk_mq_is_shared_tags(flags)) { - ret = blk_mq_init_sched_shared_tags(q); - if (ret) - goto err_put_elevator; + /* Shared tags are stored at index 0 in @et->tags. */ + q->sched_shared_tags = et->tags[0]; + blk_mq_tag_update_sched_shared_tags(q); } queue_for_each_hw_ctx(q, hctx, i) { - ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i); - if (ret) - goto err_free_map_and_rqs; + if (blk_mq_is_shared_tags(flags)) + hctx->sched_tags = q->sched_shared_tags; + else + hctx->sched_tags = et->tags[i]; } ret = e->ops.init_sched(q, eq); if (ret) - goto err_free_map_and_rqs; + goto out; queue_for_each_hw_ctx(q, hctx, i) { if (e->ops.init_hctx) { ret = e->ops.init_hctx(hctx, i); if (ret) { - eq = q->elevator; - blk_mq_sched_free_rqs(q); blk_mq_exit_sched(q, eq); kobject_put(&eq->kobj); return ret; @@ -509,10 +522,8 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) } return 0; -err_free_map_and_rqs: - blk_mq_sched_free_rqs(q); +out: blk_mq_sched_tags_teardown(q, flags); -err_put_elevator: kobject_put(&eq->kobj); q->elevator = NULL; return ret; diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 1326526bb733..0cde00cd1c47 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -18,10 +18,16 @@ void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); -int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); +int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e, + struct elevator_tags *et); void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); void blk_mq_sched_free_rqs(struct request_queue *q); +struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, + unsigned int nr_hw_queues); +void blk_mq_free_sched_tags(struct elevator_tags *et, + struct blk_mq_tag_set *set); + static inline void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) { if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) diff --git a/block/elevator.c b/block/elevator.c index 939b0c590fbe..e9dc837b7b70 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -54,6 +54,8 @@ struct elv_change_ctx { struct elevator_queue *old; /* for registering new elevator */ struct elevator_queue *new; + /* holds sched tags data */ + struct elevator_tags *et; }; static DEFINE_SPINLOCK(elv_list_lock); @@ -132,7 +134,7 @@ static struct elevator_type *elevator_find_get(const char *name) static const struct kobj_type elv_ktype; struct elevator_queue *elevator_alloc(struct request_queue *q, - struct elevator_type *e) + struct elevator_type *e, struct elevator_tags *et) { struct elevator_queue *eq; @@ -145,6 +147,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, kobject_init(&eq->kobj, &elv_ktype); mutex_init(&eq->sysfs_lock); hash_init(eq->hash); + eq->et = et; return eq; } @@ -165,7 +168,6 @@ static void elevator_exit(struct request_queue *q) lockdep_assert_held(&q->elevator_lock); ioc_clear_queue(q); - blk_mq_sched_free_rqs(q); mutex_lock(&e->sysfs_lock); blk_mq_exit_sched(q, e); @@ -591,7 +593,7 @@ static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx) } if (new_e) { - ret = blk_mq_init_sched(q, new_e); + ret = blk_mq_init_sched(q, new_e, ctx->et); if (ret) goto out_unfreeze; ctx->new = q->elevator; @@ -626,8 +628,10 @@ static void elv_exit_and_release(struct request_queue *q) elevator_exit(q); mutex_unlock(&q->elevator_lock); blk_mq_unfreeze_queue(q, memflags); - if (e) + if (e) { + blk_mq_free_sched_tags(e->et, q->tag_set); kobject_put(&e->kobj); + } } static int elevator_change_done(struct request_queue *q, @@ -640,6 +644,7 @@ static int elevator_change_done(struct request_queue *q, &ctx->old->flags); elv_unregister_queue(q, ctx->old); + blk_mq_free_sched_tags(ctx->old->et, q->tag_set); kobject_put(&ctx->old->kobj); if (enable_wbt) wbt_enable_default(q->disk); @@ -658,9 +663,16 @@ static int elevator_change_done(struct request_queue *q, static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx) { unsigned int memflags; + struct blk_mq_tag_set *set = q->tag_set; int ret = 0; - lockdep_assert_held(&q->tag_set->update_nr_hwq_lock); + lockdep_assert_held(&set->update_nr_hwq_lock); + + if (strncmp(ctx->name, "none", 4)) { + ctx->et = blk_mq_alloc_sched_tags(set, set->nr_hw_queues); + if (!ctx->et) + return -ENOMEM; + } memflags = blk_mq_freeze_queue(q); /* @@ -680,6 +692,11 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx) blk_mq_unfreeze_queue(q, memflags); if (!ret) ret = elevator_change_done(q, ctx); + /* + * Free sched tags if it's allocated but we couldn't switch elevator. + */ + if (ctx->et && !ctx->new) + blk_mq_free_sched_tags(ctx->et, set); return ret; } @@ -690,6 +707,7 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx) */ void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e) { + struct blk_mq_tag_set *set = q->tag_set; struct elv_change_ctx ctx = {}; int ret = -ENODEV; @@ -697,15 +715,25 @@ void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e) if (e && !blk_queue_dying(q) && blk_queue_registered(q)) { ctx.name = e->elevator_name; - + ctx.et = blk_mq_alloc_sched_tags(set, set->nr_hw_queues); + if (!ctx.et) { + WARN_ON_ONCE(1); + goto unfreeze; + } mutex_lock(&q->elevator_lock); /* force to reattach elevator after nr_hw_queue is updated */ ret = elevator_switch(q, &ctx); mutex_unlock(&q->elevator_lock); } +unfreeze: blk_mq_unfreeze_queue_nomemrestore(q); if (!ret) WARN_ON_ONCE(elevator_change_done(q, &ctx)); + /* + * Free sched tags if it's allocated but we couldn't switch elevator. + */ + if (ctx.et && !ctx.new) + blk_mq_free_sched_tags(ctx.et, set); } /* diff --git a/block/elevator.h b/block/elevator.h index a4de5f9ad790..adc5c157e17e 100644 --- a/block/elevator.h +++ b/block/elevator.h @@ -23,6 +23,15 @@ enum elv_merge { struct blk_mq_alloc_data; struct blk_mq_hw_ctx; +struct elevator_tags { + /* num. of hardware queues for which tags are allocated */ + unsigned int nr_hw_queues; + /* depth used while allocating tags */ + unsigned int nr_requests; + /* shared tag is stored at index 0 */ + struct blk_mq_tags *tags[]; +}; + struct elevator_mq_ops { int (*init_sched)(struct request_queue *, struct elevator_queue *); void (*exit_sched)(struct elevator_queue *); @@ -113,6 +122,7 @@ struct request *elv_rqhash_find(struct request_queue *q, sector_t offset); struct elevator_queue { struct elevator_type *type; + struct elevator_tags *et; void *elevator_data; struct kobject kobj; struct mutex sysfs_lock; @@ -152,8 +162,8 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *page); ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count); extern bool elv_bio_merge_ok(struct request *, struct bio *); -extern struct elevator_queue *elevator_alloc(struct request_queue *, - struct elevator_type *); +struct elevator_queue *elevator_alloc(struct request_queue *, + struct elevator_type *, struct elevator_tags *); /* * Helper functions. From 04225d13aef11b2a539014def5e47d8c21fd74a5 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 30 Jul 2025 13:16:09 +0530 Subject: [PATCH 056/279] block: fix potential deadlock while running nr_hw_queue update Move scheduler tags (sched_tags) allocation and deallocation outside both the ->elevator_lock and ->freeze_lock when updating nr_hw_queues. This change breaks the dependency chain from the percpu allocator lock to the elevator lock, helping to prevent potential deadlocks, as observed in the reported lockdep splat[1]. This commit introduces batch allocation and deallocation helpers for sched_tags, which are now used from within __blk_mq_update_nr_hw_queues routine while iterating through the tagset. With this change, all sched_tags memory management is handled entirely outside the ->elevator_lock and the ->freeze_lock context, thereby eliminating the lock dependency that could otherwise manifest during nr_hw_queues updates. [1] https://lore.kernel.org/all/0659ea8d-a463-47c8-9180-43c719e106eb@linux.ibm.com/ Reported-by: Stefan Haberland Closes: https://lore.kernel.org/all/0659ea8d-a463-47c8-9180-43c719e106eb@linux.ibm.com/ Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Link: https://lore.kernel.org/r/20250730074614.2537382-4-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 65 ++++++++++++++++++++++++++++++++++++++++++++ block/blk-mq-sched.h | 4 +++ block/blk-mq.c | 16 +++++++---- block/blk.h | 4 ++- block/elevator.c | 15 ++++------ 5 files changed, 89 insertions(+), 15 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 2d6d1ebdd8fb..e2ce4a28e6c9 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -427,6 +427,32 @@ void blk_mq_free_sched_tags(struct elevator_tags *et, kfree(et); } +void blk_mq_free_sched_tags_batch(struct xarray *et_table, + struct blk_mq_tag_set *set) +{ + struct request_queue *q; + struct elevator_tags *et; + + lockdep_assert_held_write(&set->update_nr_hwq_lock); + + list_for_each_entry(q, &set->tag_list, tag_set_list) { + /* + * Accessing q->elevator without holding q->elevator_lock is + * safe because we're holding here set->update_nr_hwq_lock in + * the writer context. So, scheduler update/switch code (which + * acquires the same lock but in the reader context) can't run + * concurrently. + */ + if (q->elevator) { + et = xa_load(et_table, q->id); + if (unlikely(!et)) + WARN_ON_ONCE(1); + else + blk_mq_free_sched_tags(et, set); + } + } +} + struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, unsigned int nr_hw_queues) { @@ -477,6 +503,45 @@ struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, return NULL; } +int blk_mq_alloc_sched_tags_batch(struct xarray *et_table, + struct blk_mq_tag_set *set, unsigned int nr_hw_queues) +{ + struct request_queue *q; + struct elevator_tags *et; + gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; + + lockdep_assert_held_write(&set->update_nr_hwq_lock); + + list_for_each_entry(q, &set->tag_list, tag_set_list) { + /* + * Accessing q->elevator without holding q->elevator_lock is + * safe because we're holding here set->update_nr_hwq_lock in + * the writer context. So, scheduler update/switch code (which + * acquires the same lock but in the reader context) can't run + * concurrently. + */ + if (q->elevator) { + et = blk_mq_alloc_sched_tags(set, nr_hw_queues); + if (!et) + goto out_unwind; + if (xa_insert(et_table, q->id, et, gfp)) + goto out_free_tags; + } + } + return 0; +out_free_tags: + blk_mq_free_sched_tags(et, set); +out_unwind: + list_for_each_entry_continue_reverse(q, &set->tag_list, tag_set_list) { + if (q->elevator) { + et = xa_load(et_table, q->id); + if (et) + blk_mq_free_sched_tags(et, set); + } + } + return -ENOMEM; +} + /* caller must have a reference to @e, will grab another one if successful */ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e, struct elevator_tags *et) diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 0cde00cd1c47..b554e1d55950 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -25,8 +25,12 @@ void blk_mq_sched_free_rqs(struct request_queue *q); struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set, unsigned int nr_hw_queues); +int blk_mq_alloc_sched_tags_batch(struct xarray *et_table, + struct blk_mq_tag_set *set, unsigned int nr_hw_queues); void blk_mq_free_sched_tags(struct elevator_tags *et, struct blk_mq_tag_set *set); +void blk_mq_free_sched_tags_batch(struct xarray *et_table, + struct blk_mq_tag_set *set); static inline void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq.c b/block/blk-mq.c index 9692fa4c3ef2..b67d6c02eceb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4974,12 +4974,13 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) * Switch back to the elevator type stored in the xarray. */ static void blk_mq_elv_switch_back(struct request_queue *q, - struct xarray *elv_tbl) + struct xarray *elv_tbl, struct xarray *et_tbl) { struct elevator_type *e = xa_load(elv_tbl, q->id); + struct elevator_tags *t = xa_load(et_tbl, q->id); /* The elv_update_nr_hw_queues unfreezes the queue. */ - elv_update_nr_hw_queues(q, e); + elv_update_nr_hw_queues(q, e, t); /* Drop the reference acquired in blk_mq_elv_switch_none. */ if (e) @@ -5031,7 +5032,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int prev_nr_hw_queues = set->nr_hw_queues; unsigned int memflags; int i; - struct xarray elv_tbl; + struct xarray elv_tbl, et_tbl; lockdep_assert_held(&set->tag_list_lock); @@ -5044,6 +5045,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, memflags = memalloc_noio_save(); + xa_init(&et_tbl); + if (blk_mq_alloc_sched_tags_batch(&et_tbl, set, nr_hw_queues) < 0) + goto out_memalloc_restore; + xa_init(&elv_tbl); list_for_each_entry(q, &set->tag_list, tag_set_list) { @@ -5087,7 +5092,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, switch_back: /* The blk_mq_elv_switch_back unfreezes queue for us. */ list_for_each_entry(q, &set->tag_list, tag_set_list) - blk_mq_elv_switch_back(q, &elv_tbl); + blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl); list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_sysfs_register_hctxs(q); @@ -5098,7 +5103,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, } xa_destroy(&elv_tbl); - + xa_destroy(&et_tbl); +out_memalloc_restore: memalloc_noio_restore(memflags); /* Free the excess tags when nr_hw_queues shrink. */ diff --git a/block/blk.h b/block/blk.h index 76901a39997f..0a2eccf28ca4 100644 --- a/block/blk.h +++ b/block/blk.h @@ -12,6 +12,7 @@ #include "blk-crypto-internal.h" struct elevator_type; +struct elevator_tags; /* * Default upper limit for the software max_sectors limit used for regular I/Os. @@ -330,7 +331,8 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, bool blk_insert_flush(struct request *rq); -void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e); +void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e, + struct elevator_tags *t); void elevator_set_default(struct request_queue *q); void elevator_set_none(struct request_queue *q); diff --git a/block/elevator.c b/block/elevator.c index e9dc837b7b70..fe96c6f4753c 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -705,7 +705,8 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx) * The I/O scheduler depends on the number of hardware queues, this forces a * reattachment when nr_hw_queues changes. */ -void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e) +void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e, + struct elevator_tags *t) { struct blk_mq_tag_set *set = q->tag_set; struct elv_change_ctx ctx = {}; @@ -715,25 +716,21 @@ void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e) if (e && !blk_queue_dying(q) && blk_queue_registered(q)) { ctx.name = e->elevator_name; - ctx.et = blk_mq_alloc_sched_tags(set, set->nr_hw_queues); - if (!ctx.et) { - WARN_ON_ONCE(1); - goto unfreeze; - } + ctx.et = t; + mutex_lock(&q->elevator_lock); /* force to reattach elevator after nr_hw_queue is updated */ ret = elevator_switch(q, &ctx); mutex_unlock(&q->elevator_lock); } -unfreeze: blk_mq_unfreeze_queue_nomemrestore(q); if (!ret) WARN_ON_ONCE(elevator_change_done(q, &ctx)); /* * Free sched tags if it's allocated but we couldn't switch elevator. */ - if (ctx.et && !ctx.new) - blk_mq_free_sched_tags(ctx.et, set); + if (t && !ctx.new) + blk_mq_free_sched_tags(t, set); } /* From 11f74f48c14c1f4fe16541900ea5944c42e30ccf Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Wed, 30 Jul 2025 14:49:06 +0200 Subject: [PATCH 057/279] ASoC: Intel: avs: Fix uninitialized pointer error in probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If pcim_request_all_regions() fails, error path operates on uninitialized 'bus' pointer. Found out by Coverity static analyzer. Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20250730124906.351798-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c index 7af324753673..5ebadba07ecc 100644 --- a/sound/soc/intel/avs/core.c +++ b/sound/soc/intel/avs/core.c @@ -445,6 +445,8 @@ static int avs_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) adev = devm_kzalloc(dev, sizeof(*adev), GFP_KERNEL); if (!adev) return -ENOMEM; + bus = &adev->base.core; + ret = avs_bus_init(adev, pci, id); if (ret < 0) { dev_err(dev, "failed to init avs bus: %d\n", ret); @@ -455,7 +457,6 @@ static int avs_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) if (ret < 0) return ret; - bus = &adev->base.core; bus->addr = pci_resource_start(pci, 0); bus->remap_addr = pci_ioremap_bar(pci, 0); if (!bus->remap_addr) { From 6f02527729bd31ca4e473bff19fda4ccd5889148 Mon Sep 17 00:00:00 2001 From: Norman Maurer Date: Mon, 28 Jul 2025 20:59:53 -1000 Subject: [PATCH 058/279] io_uring/net: Allow to do vectorized send At the moment you have to use sendmsg for vectorized send. While this works it's suboptimal as it also means you need to allocate a struct msghdr that needs to be kept alive until a submission happens. We can remove this limitation by just allowing to use send directly. Signed-off-by: Norman Maurer Link: https://lore.kernel.org/r/20250729065952.26646-1-norman_maurer@apple.com [axboe: remove -EINVAL return for SENDMSG and SEND_VECTORIZED] [axboe: allow send_zc to set SEND_VECTORIZED too] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 ++++ io_uring/net.c | 9 +++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b8a0e70ee2fd..6957dc539d83 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -392,12 +392,16 @@ enum io_uring_op { * the starting buffer ID in cqe->flags as per * usual for provided buffer usage. The buffers * will be contiguous from the starting buffer ID. + * + * IORING_SEND_VECTORIZED If set, SEND[_ZC] will take a pointer to a io_vec + * to allow vectorized send operations. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) #define IORING_RECVSEND_FIXED_BUF (1U << 2) #define IORING_SEND_ZC_REPORT_USAGE (1U << 3) #define IORING_RECVSEND_BUNDLE (1U << 4) +#define IORING_SEND_VECTORIZED (1U << 5) /* * cqe.res for IORING_CQE_F_NOTIF if diff --git a/io_uring/net.c b/io_uring/net.c index 35585bdc59f3..dd96e355982f 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -382,6 +382,10 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) } if (req->flags & REQ_F_BUFFER_SELECT) return 0; + + if (sr->flags & IORING_SEND_VECTORIZED) + return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); + return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); } @@ -409,7 +413,7 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); } -#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) +#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE | IORING_SEND_VECTORIZED) int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -1318,7 +1322,8 @@ void io_send_zc_cleanup(struct io_kiocb *req) } #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF) -#define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE) +#define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE | \ + IORING_SEND_VECTORIZED) int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { From 3dca3d51b933beb3f35a60472ed2110d1bd7046a Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 30 Jul 2025 09:14:43 +0200 Subject: [PATCH 059/279] ARM: s3c/gpio: complete the conversion to new GPIO value setters Commit fb52f3226cab ("ARM: s3c/gpio: use new line value setter callbacks") correctly changed the assignment of the callback but missed the check one liner higher. Change it now too to using the recommended callback as the legacy one is going away soon. Fixes: fb52f3226cab ("ARM: s3c/gpio: use new line value setter callbacks") Signed-off-by: Bartosz Golaszewski Signed-off-by: Arnd Bergmann --- arch/arm/mach-s3c/gpio-samsung.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-s3c/gpio-samsung.c b/arch/arm/mach-s3c/gpio-samsung.c index 206a492fbaf5..3ee4ad969cc2 100644 --- a/arch/arm/mach-s3c/gpio-samsung.c +++ b/arch/arm/mach-s3c/gpio-samsung.c @@ -516,7 +516,7 @@ static void __init samsung_gpiolib_add(struct samsung_gpio_chip *chip) gc->direction_input = samsung_gpiolib_2bit_input; if (!gc->direction_output) gc->direction_output = samsung_gpiolib_2bit_output; - if (!gc->set) + if (!gc->set_rv) gc->set_rv = samsung_gpiolib_set; if (!gc->get) gc->get = samsung_gpiolib_get; From 1df1fc845d221eb646539836dbf509eb96b41afd Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 30 Jul 2025 15:33:21 +0800 Subject: [PATCH 060/279] md: fix create on open mddev lifetime regression Commit 9e59d609763f ("md: call del_gendisk in control path") moves setting MD_DELETED from __mddev_put() to do_md_stop(), however, for the case create on open, mddev can be freed without do_md_stop(): 1) open md_probe md_alloc_and_put md_alloc mddev_alloc atomic_set(&mddev->active, 1); mddev->hold_active = UNTIL_IOCTL mddev_put atomic_dec_and_test(&mddev->active) if (mddev->hold_active) -> active is 0, hold_active is set md_open mddev_get atomic_inc(&mddev->active); 2) ioctl that is not STOP_ARRAY, for example, GET_ARRAY_INFO: md_ioctl mddev->hold_active = 0 3) close md_release mddev_put(mddev); atomic_dec_and_lock(&mddev->active, &all_mddevs_lock) __mddev_put -> hold_active is cleared, mddev will be freed queue_work(md_misc_wq, &mddev->del_work) Now that MD_DELETED is not set, before mddev is freed by mddev_delayed_delete(), md_open can still succeed and break mddev lifetime, causing mddev->kobj refcount underflow or mddev uaf problem. Fix this problem by setting MD_DELETED before queuing del_work. Reported-by: syzbot+9921e319bd6168140b40@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68894408.a00a0220.26d0e1.0012.GAE@google.com/ Reported-by: syzbot+fa3a12519f0d3fd4ec16@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68894408.a00a0220.26d0e1.0013.GAE@google.com/ Fixes: 9e59d609763f ("md: call del_gendisk in control path") Link: https://lore.kernel.org/linux-raid/20250730073321.2583158-1-yukuai1@huaweicloud.com Signed-off-by: Yu Kuai Reviewed-by: Paul Menzel Reviewed-by: Xiao Ni --- drivers/md/md.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 046fe85c76fe..2716d5c59517 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -636,6 +636,12 @@ static void __mddev_put(struct mddev *mddev) mddev->ctime || mddev->hold_active) return; + /* + * If array is freed by stopping array, MD_DELETED is set by + * do_md_stop(), MD_DELETED is still set here in case mddev is freed + * directly by closing a mddev that is created by create_on_open. + */ + set_bit(MD_DELETED, &mddev->flags); /* * Call queue_work inside the spinlock so that flush_workqueue() after * mddev_find will succeed in waiting for the work to be done. From 948b1fe12005d39e2b49087b50e5ee55c9a8f76f Mon Sep 17 00:00:00 2001 From: Heming Zhao Date: Mon, 28 Jul 2025 12:21:40 +0800 Subject: [PATCH 061/279] md/md-cluster: handle REMOVE message earlier Commit a1fd37f97808 ("md: Don't wait for MD_RECOVERY_NEEDED for HOT_REMOVE_DISK ioctl") introduced a regression in the md_cluster module. (Failed cases 02r1_Manage_re-add & 02r10_Manage_re-add) Consider a 2-node cluster: - node1 set faulty & remove command on a disk. - node2 must correctly update the array metadata. Before a1fd37f97808, on node1, the delay between msg:METADATA_UPDATED (triggered by faulty) and msg:REMOVE was sufficient for node2 to reload the disk info (written by node1). After a1fd37f97808, node1 no longer waits between faulty and remove, causing it to send msg:REMOVE while node2 is still reloading disk info. This often results in node2 failing to remove the faulty disk. == how to trigger == set up a 2-node cluster (node1 & node2) with disks vdc & vdd. on node1: mdadm -CR /dev/md0 -l1 -b clustered -n2 /dev/vdc /dev/vdd --assume-clean ssh node2-ip mdadm -A /dev/md0 /dev/vdc /dev/vdd mdadm --manage /dev/md0 --fail /dev/vdc --remove /dev/vdc check array status on both nodes with "mdadm -D /dev/md0". node1 output: Number Major Minor RaidDevice State - 0 0 0 removed 1 254 48 1 active sync /dev/vdd node2 output: Number Major Minor RaidDevice State - 0 0 0 removed 1 254 48 1 active sync /dev/vdd 0 254 32 - faulty /dev/vdc Fixes: a1fd37f97808 ("md: Don't wait for MD_RECOVERY_NEEDED for HOT_REMOVE_DISK ioctl") Signed-off-by: Heming Zhao Reviewed-by: Su Yue Link: https://lore.kernel.org/linux-raid/20250728042145.9989-1-heming.zhao@suse.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 2716d5c59517..8af97ef80ec5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9777,8 +9777,8 @@ void md_check_recovery(struct mddev *mddev) * remove disk. */ rdev_for_each_safe(rdev, tmp, mddev) { - if (test_and_clear_bit(ClusterRemove, &rdev->flags) && - rdev->raid_disk < 0) + if (rdev->raid_disk < 0 && + test_and_clear_bit(ClusterRemove, &rdev->flags)) md_kick_rdev_from_array(rdev); } } @@ -10084,8 +10084,11 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) /* Check for change of roles in the active devices */ rdev_for_each_safe(rdev2, tmp, mddev) { - if (test_bit(Faulty, &rdev2->flags)) + if (test_bit(Faulty, &rdev2->flags)) { + if (test_bit(ClusterRemove, &rdev2->flags)) + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); continue; + } /* Check if the roles changed */ role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]); From 907a99c314a5a695e35acff78ac61f4ec950a6d3 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Tue, 22 Jul 2025 11:33:40 +0800 Subject: [PATCH 062/279] md: rename recovery_cp to resync_offset 'recovery_cp' was used to represent the progress of sync, but its name contains recovery, which can cause confusion. Replaces 'recovery_cp' with 'resync_offset' for clarity. Signed-off-by: Li Nan Link: https://lore.kernel.org/linux-raid/20250722033340.1933388-1-linan666@huaweicloud.com Signed-off-by: Yu Kuai --- drivers/md/dm-raid.c | 42 ++++++++++++++-------------- drivers/md/md-bitmap.c | 8 +++--- drivers/md/md-cluster.c | 16 +++++------ drivers/md/md.c | 50 +++++++++++++++++----------------- drivers/md/md.h | 2 +- drivers/md/raid0.c | 6 ++-- drivers/md/raid1-10.c | 2 +- drivers/md/raid1.c | 10 +++---- drivers/md/raid10.c | 16 +++++------ drivers/md/raid5-ppl.c | 6 ++-- drivers/md/raid5.c | 30 ++++++++++---------- include/uapi/linux/raid/md_p.h | 2 +- 12 files changed, 95 insertions(+), 95 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index e8c0a8c6fb51..9835f2fe26e9 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -439,7 +439,7 @@ static bool rs_is_reshapable(struct raid_set *rs) /* Return true, if raid set in @rs is recovering */ static bool rs_is_recovering(struct raid_set *rs) { - return rs->md.recovery_cp < rs->md.dev_sectors; + return rs->md.resync_offset < rs->md.dev_sectors; } /* Return true, if raid set in @rs is reshaping */ @@ -769,7 +769,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r rs->md.layout = raid_type->algorithm; rs->md.new_layout = rs->md.layout; rs->md.delta_disks = 0; - rs->md.recovery_cp = MaxSector; + rs->md.resync_offset = MaxSector; for (i = 0; i < raid_devs; i++) md_rdev_init(&rs->dev[i].rdev); @@ -913,7 +913,7 @@ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as) rs->md.external = 0; rs->md.persistent = 1; rs->md.major_version = 2; - } else if (rebuild && !rs->md.recovery_cp) { + } else if (rebuild && !rs->md.resync_offset) { /* * Without metadata, we will not be able to tell if the array * is in-sync or not - we must assume it is not. Therefore, @@ -1696,20 +1696,20 @@ static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) { /* raid0 does not recover */ if (rs_is_raid0(rs)) - rs->md.recovery_cp = MaxSector; + rs->md.resync_offset = MaxSector; /* * A raid6 set has to be recovered either * completely or for the grown part to * ensure proper parity and Q-Syndrome */ else if (rs_is_raid6(rs)) - rs->md.recovery_cp = dev_sectors; + rs->md.resync_offset = dev_sectors; /* * Other raid set types may skip recovery * depending on the 'nosync' flag. */ else - rs->md.recovery_cp = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) + rs->md.resync_offset = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) ? MaxSector : dev_sectors; } @@ -2144,7 +2144,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev) sb->events = cpu_to_le64(mddev->events); sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset); - sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp); + sb->array_resync_offset = cpu_to_le64(mddev->resync_offset); sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); @@ -2335,18 +2335,18 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) } if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) - mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset); + mddev->resync_offset = le64_to_cpu(sb->array_resync_offset); /* * During load, we set FirstUse if a new superblock was written. * There are two reasons we might not have a superblock: * 1) The raid set is brand new - in which case, all of the * devices must have their In_sync bit set. Also, - * recovery_cp must be 0, unless forced. + * resync_offset must be 0, unless forced. * 2) This is a new device being added to an old raid set * and the new device needs to be rebuilt - in which * case the In_sync bit will /not/ be set and - * recovery_cp must be MaxSector. + * resync_offset must be MaxSector. * 3) This is/are a new device(s) being added to an old * raid set during takeover to a higher raid level * to provide capacity for redundancy or during reshape @@ -2391,8 +2391,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) new_devs > 1 ? "s" : ""); return -EINVAL; } else if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) && rs_is_recovering(rs)) { - DMERR("'rebuild' specified while raid set is not in-sync (recovery_cp=%llu)", - (unsigned long long) mddev->recovery_cp); + DMERR("'rebuild' specified while raid set is not in-sync (resync_offset=%llu)", + (unsigned long long) mddev->resync_offset); return -EINVAL; } else if (rs_is_reshaping(rs)) { DMERR("'rebuild' specified while raid set is being reshaped (reshape_position=%llu)", @@ -2697,11 +2697,11 @@ static int rs_adjust_data_offsets(struct raid_set *rs) } out: /* - * Raise recovery_cp in case data_offset != 0 to + * Raise resync_offset in case data_offset != 0 to * avoid false recovery positives in the constructor. */ - if (rs->md.recovery_cp < rs->md.dev_sectors) - rs->md.recovery_cp += rs->dev[0].rdev.data_offset; + if (rs->md.resync_offset < rs->md.dev_sectors) + rs->md.resync_offset += rs->dev[0].rdev.data_offset; /* Adjust data offsets on all rdevs but on any raid4/5/6 journal device */ rdev_for_each(rdev, &rs->md) { @@ -2756,7 +2756,7 @@ static int rs_setup_takeover(struct raid_set *rs) } clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags); - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; while (d--) { rdev = &rs->dev[d].rdev; @@ -2764,7 +2764,7 @@ static int rs_setup_takeover(struct raid_set *rs) if (test_bit(d, (void *) rs->rebuild_disks)) { clear_bit(In_sync, &rdev->flags); clear_bit(Faulty, &rdev->flags); - mddev->recovery_cp = rdev->recovery_offset = 0; + mddev->resync_offset = rdev->recovery_offset = 0; /* Bitmap has to be created when we do an "up" takeover */ set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); } @@ -3222,7 +3222,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto bad; - rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors); + rs_setup_recovery(rs, rs->md.resync_offset < rs->md.dev_sectors ? rs->md.resync_offset : rs->md.dev_sectors); } else { /* This is no size change or it is shrinking, update size and record in superblocks */ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false); @@ -3446,7 +3446,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, } else { if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery)) - r = mddev->recovery_cp; + r = mddev->resync_offset; else r = mddev->curr_resync_completed; @@ -4074,9 +4074,9 @@ static int raid_preresume(struct dm_target *ti) } /* Check for any resize/reshape on @rs and adjust/initiate */ - if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) { + if (mddev->resync_offset && mddev->resync_offset < MaxSector) { set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); - mddev->resync_min = mddev->recovery_cp; + mddev->resync_min = mddev->resync_offset; if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) mddev->resync_max_sectors = mddev->dev_sectors; } diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 7f524a26cebc..334b71404930 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1987,12 +1987,12 @@ static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s, md_bitmap_set_memory_bits(bitmap, sec, 1); md_bitmap_file_set_bit(bitmap, sec); - if (sec < bitmap->mddev->recovery_cp) + if (sec < bitmap->mddev->resync_offset) /* We are asserting that the array is dirty, - * so move the recovery_cp address back so + * so move the resync_offset address back so * that it is obvious that it is dirty */ - bitmap->mddev->recovery_cp = sec; + bitmap->mddev->resync_offset = sec; } } @@ -2258,7 +2258,7 @@ static int bitmap_load(struct mddev *mddev) || bitmap->events_cleared == mddev->events) /* no need to keep dirty bits to optimise a * re-add of a missing device */ - start = mddev->recovery_cp; + start = mddev->resync_offset; mutex_lock(&mddev->bitmap_info.mutex); err = md_bitmap_init_from_disk(bitmap, start); diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 94221d964d4f..5497eaee96e7 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -337,11 +337,11 @@ static void recover_bitmaps(struct md_thread *thread) md_wakeup_thread(mddev->sync_thread); if (hi > 0) { - if (lo < mddev->recovery_cp) - mddev->recovery_cp = lo; + if (lo < mddev->resync_offset) + mddev->resync_offset = lo; /* wake up thread to continue resync in case resync * is not finished */ - if (mddev->recovery_cp != MaxSector) { + if (mddev->resync_offset != MaxSector) { /* * clear the REMOTE flag since we will launch * resync thread in current node. @@ -863,9 +863,9 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots) lockres_free(bm_lockres); continue; } - if ((hi > 0) && (lo < mddev->recovery_cp)) { + if ((hi > 0) && (lo < mddev->resync_offset)) { set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - mddev->recovery_cp = lo; + mddev->resync_offset = lo; md_check_recovery(mddev); } @@ -1027,7 +1027,7 @@ static int leave(struct mddev *mddev) * Also, we should send BITMAP_NEEDS_SYNC message in * case reshaping is interrupted. */ - if ((cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector) || + if ((cinfo->slot_number > 0 && mddev->resync_offset != MaxSector) || (mddev->reshape_position != MaxSector && test_bit(MD_CLOSING, &mddev->flags))) resync_bitmap(mddev); @@ -1605,8 +1605,8 @@ static int gather_bitmaps(struct md_rdev *rdev) pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn); goto out; } - if ((hi > 0) && (lo < mddev->recovery_cp)) - mddev->recovery_cp = lo; + if ((hi > 0) && (lo < mddev->resync_offset)) + mddev->resync_offset = lo; } out: return err; diff --git a/drivers/md/md.c b/drivers/md/md.c index 8af97ef80ec5..9c7ed23c45ad 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1415,13 +1415,13 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru mddev->layout = -1; if (sb->state & (1<recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; else { if (sb->events_hi == sb->cp_events_hi && sb->events_lo == sb->cp_events_lo) { - mddev->recovery_cp = sb->recovery_cp; + mddev->resync_offset = sb->resync_offset; } else - mddev->recovery_cp = 0; + mddev->resync_offset = 0; } memcpy(mddev->uuid+0, &sb->set_uuid0, 4); @@ -1547,13 +1547,13 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) mddev->minor_version = sb->minor_version; if (mddev->in_sync) { - sb->recovery_cp = mddev->recovery_cp; + sb->resync_offset = mddev->resync_offset; sb->cp_events_hi = (mddev->events>>32); sb->cp_events_lo = (u32)mddev->events; - if (mddev->recovery_cp == MaxSector) + if (mddev->resync_offset == MaxSector) sb->state = (1<< MD_SB_CLEAN); } else - sb->recovery_cp = 0; + sb->resync_offset = 0; sb->layout = mddev->layout; sb->chunk_size = mddev->chunk_sectors << 9; @@ -1901,7 +1901,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc mddev->bitmap_info.default_space = (4096-1024) >> 9; mddev->reshape_backwards = 0; - mddev->recovery_cp = le64_to_cpu(sb->resync_offset); + mddev->resync_offset = le64_to_cpu(sb->resync_offset); memcpy(mddev->uuid, sb->set_uuid, 16); mddev->max_disks = (4096-256)/2; @@ -2087,7 +2087,7 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) sb->utime = cpu_to_le64((__u64)mddev->utime); sb->events = cpu_to_le64(mddev->events); if (mddev->in_sync) - sb->resync_offset = cpu_to_le64(mddev->recovery_cp); + sb->resync_offset = cpu_to_le64(mddev->resync_offset); else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags)) sb->resync_offset = cpu_to_le64(MaxSector); else @@ -2767,7 +2767,7 @@ void md_update_sb(struct mddev *mddev, int force_change) /* If this is just a dirty<->clean transition, and the array is clean * and 'events' is odd, we can roll back to the previous clean state */ if (nospares - && (mddev->in_sync && mddev->recovery_cp == MaxSector) + && (mddev->in_sync && mddev->resync_offset == MaxSector) && mddev->can_decrease_events && mddev->events != 1) { mddev->events--; @@ -4303,9 +4303,9 @@ __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store); static ssize_t resync_start_show(struct mddev *mddev, char *page) { - if (mddev->recovery_cp == MaxSector) + if (mddev->resync_offset == MaxSector) return sprintf(page, "none\n"); - return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp); + return sprintf(page, "%llu\n", (unsigned long long)mddev->resync_offset); } static ssize_t @@ -4331,7 +4331,7 @@ resync_start_store(struct mddev *mddev, const char *buf, size_t len) err = -EBUSY; if (!err) { - mddev->recovery_cp = n; + mddev->resync_offset = n; if (mddev->pers) set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); } @@ -6423,7 +6423,7 @@ static void md_clean(struct mddev *mddev) mddev->external_size = 0; mddev->dev_sectors = 0; mddev->raid_disks = 0; - mddev->recovery_cp = 0; + mddev->resync_offset = 0; mddev->resync_min = 0; mddev->resync_max = MaxSector; mddev->reshape_position = MaxSector; @@ -7368,9 +7368,9 @@ int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info) * openned */ if (info->state & (1<recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; else - mddev->recovery_cp = 0; + mddev->resync_offset = 0; mddev->persistent = ! info->not_persistent; mddev->external = 0; @@ -8309,7 +8309,7 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) seq_printf(seq, "\tresync=REMOTE"); return 1; } - if (mddev->recovery_cp < MaxSector) { + if (mddev->resync_offset < MaxSector) { seq_printf(seq, "\tresync=PENDING"); return 1; } @@ -8952,7 +8952,7 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action) return mddev->resync_min; case ACTION_RESYNC: if (!mddev->bitmap) - return mddev->recovery_cp; + return mddev->resync_offset; return 0; case ACTION_RESHAPE: /* @@ -9190,8 +9190,8 @@ void md_do_sync(struct md_thread *thread) atomic_read(&mddev->recovery_active) == 0); mddev->curr_resync_completed = j; if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && - j > mddev->recovery_cp) - mddev->recovery_cp = j; + j > mddev->resync_offset) + mddev->resync_offset = j; update_time = jiffies; set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); sysfs_notify_dirent_safe(mddev->sysfs_completed); @@ -9311,19 +9311,19 @@ void md_do_sync(struct md_thread *thread) mddev->curr_resync > MD_RESYNC_ACTIVE) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { - if (mddev->curr_resync >= mddev->recovery_cp) { + if (mddev->curr_resync >= mddev->resync_offset) { pr_debug("md: checkpointing %s of %s.\n", desc, mdname(mddev)); if (test_bit(MD_RECOVERY_ERROR, &mddev->recovery)) - mddev->recovery_cp = + mddev->resync_offset = mddev->curr_resync_completed; else - mddev->recovery_cp = + mddev->resync_offset = mddev->curr_resync; } } else - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; } else { if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) mddev->curr_resync = MaxSector; @@ -9539,7 +9539,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares) } /* Check if resync is in progress. */ - if (mddev->recovery_cp < MaxSector) { + if (mddev->resync_offset < MaxSector) { remove_spares(mddev, NULL); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); @@ -9720,7 +9720,7 @@ void md_check_recovery(struct mddev *mddev) test_bit(MD_RECOVERY_DONE, &mddev->recovery) || (mddev->external == 0 && mddev->safemode == 1) || (mddev->safemode == 2 - && !mddev->in_sync && mddev->recovery_cp == MaxSector) + && !mddev->in_sync && mddev->resync_offset == MaxSector) )) return; diff --git a/drivers/md/md.h b/drivers/md/md.h index 67b365621507..51af29a03079 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -523,7 +523,7 @@ struct mddev { unsigned long normal_io_events; /* IO event timestamp */ atomic_t recovery_active; /* blocks scheduled, but not written */ wait_queue_head_t recovery_wait; - sector_t recovery_cp; + sector_t resync_offset; sector_t resync_min; /* user requested sync * starts here */ sector_t resync_max; /* resync should pause diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index cbe2a9054cb9..f1d8811a542a 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -674,7 +674,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev) mddev->raid_disks--; mddev->delta_disks = -1; /* make sure it will be not marked as dirty */ - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); create_strip_zones(mddev, &priv_conf); @@ -717,7 +717,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev) mddev->raid_disks += mddev->delta_disks; mddev->degraded = 0; /* make sure it will be not marked as dirty */ - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); create_strip_zones(mddev, &priv_conf); @@ -760,7 +760,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev) mddev->delta_disks = 1 - mddev->raid_disks; mddev->raid_disks = 1; /* make sure it will be not marked as dirty */ - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS); create_strip_zones(mddev, &priv_conf); diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index b8b3a9069701..52881e6032da 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -283,7 +283,7 @@ static inline int raid1_check_read_range(struct md_rdev *rdev, static inline bool raid1_should_read_first(struct mddev *mddev, sector_t this_sector, int len) { - if ((mddev->recovery_cp < this_sector + len)) + if ((mddev->resync_offset < this_sector + len)) return true; if (mddev_is_clustered(mddev) && diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 64b8176907a9..6cee738a645f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2822,7 +2822,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, } if (mddev->bitmap == NULL && - mddev->recovery_cp == MaxSector && + mddev->resync_offset == MaxSector && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && conf->fullsync == 0) { *skipped = 1; @@ -3282,9 +3282,9 @@ static int raid1_run(struct mddev *mddev) } if (conf->raid_disks - mddev->degraded == 1) - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; - if (mddev->recovery_cp != MaxSector) + if (mddev->resync_offset != MaxSector) pr_info("md/raid1:%s: not clean -- starting background reconstruction\n", mdname(mddev)); pr_info("md/raid1:%s: active with %d out of %d mirrors\n", @@ -3345,8 +3345,8 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors) md_set_array_sectors(mddev, newsize); if (sectors > mddev->dev_sectors && - mddev->recovery_cp > mddev->dev_sectors) { - mddev->recovery_cp = mddev->dev_sectors; + mddev->resync_offset > mddev->dev_sectors) { + mddev->resync_offset = mddev->dev_sectors; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->dev_sectors = sectors; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 95dc354a86a0..b60c30bfb6c7 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2117,7 +2117,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) int last = conf->geo.raid_disks - 1; struct raid10_info *p; - if (mddev->recovery_cp < MaxSector) + if (mddev->resync_offset < MaxSector) /* only hot-add to in-sync arrays, as recovery is * very different from resync */ @@ -3185,7 +3185,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, * of a clean array, like RAID1 does. */ if (mddev->bitmap == NULL && - mddev->recovery_cp == MaxSector && + mddev->resync_offset == MaxSector && mddev->reshape_position == MaxSector && !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && @@ -4145,7 +4145,7 @@ static int raid10_run(struct mddev *mddev) disk->recovery_disabled = mddev->recovery_disabled - 1; } - if (mddev->recovery_cp != MaxSector) + if (mddev->resync_offset != MaxSector) pr_notice("md/raid10:%s: not clean -- starting background reconstruction\n", mdname(mddev)); pr_info("md/raid10:%s: active with %d out of %d devices\n", @@ -4245,8 +4245,8 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors) md_set_array_sectors(mddev, size); if (sectors > mddev->dev_sectors && - mddev->recovery_cp > oldsize) { - mddev->recovery_cp = oldsize; + mddev->resync_offset > oldsize) { + mddev->resync_offset = oldsize; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } calc_sectors(conf, sectors); @@ -4275,7 +4275,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs) mddev->delta_disks = mddev->raid_disks; mddev->raid_disks *= 2; /* make sure it will be not marked as dirty */ - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; mddev->dev_sectors = size; conf = setup_conf(mddev); @@ -5087,8 +5087,8 @@ static void raid10_finish_reshape(struct mddev *mddev) return; if (mddev->delta_disks > 0) { - if (mddev->recovery_cp > mddev->resync_max_sectors) { - mddev->recovery_cp = mddev->resync_max_sectors; + if (mddev->resync_offset > mddev->resync_max_sectors) { + mddev->resync_offset = mddev->resync_max_sectors; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->resync_max_sectors = mddev->array_sectors; diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index c0fb335311aa..56b234683ee6 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -1163,7 +1163,7 @@ static int ppl_load_distributed(struct ppl_log *log) le64_to_cpu(pplhdr->generation)); /* attempt to recover from log if we are starting a dirty array */ - if (pplhdr && !mddev->pers && mddev->recovery_cp != MaxSector) + if (pplhdr && !mddev->pers && mddev->resync_offset != MaxSector) ret = ppl_recover(log, pplhdr, pplhdr_offset); /* write empty header if we are starting the array */ @@ -1422,14 +1422,14 @@ int ppl_init_log(struct r5conf *conf) if (ret) { goto err; - } else if (!mddev->pers && mddev->recovery_cp == 0 && + } else if (!mddev->pers && mddev->resync_offset == 0 && ppl_conf->recovered_entries > 0 && ppl_conf->mismatch_count == 0) { /* * If we are starting a dirty array and the recovery succeeds * without any issues, set the array as clean. */ - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); } else if (mddev->pers && ppl_conf->mismatch_count > 0) { /* no mismatch allowed when enabling PPL for a running array */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7ec61ee7b218..023649fe2476 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3740,7 +3740,7 @@ static int want_replace(struct stripe_head *sh, int disk_idx) && !test_bit(Faulty, &rdev->flags) && !test_bit(In_sync, &rdev->flags) && (rdev->recovery_offset <= sh->sector - || rdev->mddev->recovery_cp <= sh->sector)) + || rdev->mddev->resync_offset <= sh->sector)) rv = 1; return rv; } @@ -3832,7 +3832,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, * is missing/faulty, then we need to read everything we can. */ if (!force_rcw && - sh->sector < sh->raid_conf->mddev->recovery_cp) + sh->sector < sh->raid_conf->mddev->resync_offset) /* reconstruct-write isn't being forced */ return 0; for (i = 0; i < s->failed && i < 2; i++) { @@ -4097,7 +4097,7 @@ static int handle_stripe_dirtying(struct r5conf *conf, int disks) { int rmw = 0, rcw = 0, i; - sector_t recovery_cp = conf->mddev->recovery_cp; + sector_t resync_offset = conf->mddev->resync_offset; /* Check whether resync is now happening or should start. * If yes, then the array is dirty (after unclean shutdown or @@ -4107,14 +4107,14 @@ static int handle_stripe_dirtying(struct r5conf *conf, * generate correct data from the parity. */ if (conf->rmw_level == PARITY_DISABLE_RMW || - (recovery_cp < MaxSector && sh->sector >= recovery_cp && + (resync_offset < MaxSector && sh->sector >= resync_offset && s->failed == 0)) { /* Calculate the real rcw later - for now make it * look like rcw is cheaper */ rcw = 1; rmw = 2; - pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n", - conf->rmw_level, (unsigned long long)recovery_cp, + pr_debug("force RCW rmw_level=%u, resync_offset=%llu sh->sector=%llu\n", + conf->rmw_level, (unsigned long long)resync_offset, (unsigned long long)sh->sector); } else for (i = disks; i--; ) { /* would I have to read this buffer for read_modify_write */ @@ -4770,14 +4770,14 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) if (test_bit(STRIPE_SYNCING, &sh->state)) { /* If there is a failed device being replaced, * we must be recovering. - * else if we are after recovery_cp, we must be syncing + * else if we are after resync_offset, we must be syncing * else if MD_RECOVERY_REQUESTED is set, we also are syncing. * else we can only be replacing * sync and recovery both need to read all devices, and so * use the same flag. */ if (do_recovery || - sh->sector >= conf->mddev->recovery_cp || + sh->sector >= conf->mddev->resync_offset || test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery))) s->syncing = 1; else @@ -7780,7 +7780,7 @@ static int raid5_run(struct mddev *mddev) int first = 1; int ret = -EIO; - if (mddev->recovery_cp != MaxSector) + if (mddev->resync_offset != MaxSector) pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", mdname(mddev)); @@ -7921,7 +7921,7 @@ static int raid5_run(struct mddev *mddev) mdname(mddev)); mddev->ro = 1; set_disk_ro(mddev->gendisk, 1); - } else if (mddev->recovery_cp == MaxSector) + } else if (mddev->resync_offset == MaxSector) set_bit(MD_JOURNAL_CLEAN, &mddev->flags); } @@ -7988,7 +7988,7 @@ static int raid5_run(struct mddev *mddev) mddev->resync_max_sectors = mddev->dev_sectors; if (mddev->degraded > dirty_parity_disks && - mddev->recovery_cp != MaxSector) { + mddev->resync_offset != MaxSector) { if (test_bit(MD_HAS_PPL, &mddev->flags)) pr_crit("md/raid:%s: starting dirty degraded array with PPL.\n", mdname(mddev)); @@ -8328,8 +8328,8 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) md_set_array_sectors(mddev, newsize); if (sectors > mddev->dev_sectors && - mddev->recovery_cp > mddev->dev_sectors) { - mddev->recovery_cp = mddev->dev_sectors; + mddev->resync_offset > mddev->dev_sectors) { + mddev->resync_offset = mddev->dev_sectors; set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->dev_sectors = sectors; @@ -8423,7 +8423,7 @@ static int raid5_start_reshape(struct mddev *mddev) return -EINVAL; /* raid5 can't handle concurrent reshape and recovery */ - if (mddev->recovery_cp < MaxSector) + if (mddev->resync_offset < MaxSector) return -EBUSY; for (i = 0; i < conf->raid_disks; i++) if (conf->disks[i].replacement) @@ -8648,7 +8648,7 @@ static void *raid45_takeover_raid0(struct mddev *mddev, int level) mddev->raid_disks += 1; mddev->delta_disks = 1; /* make sure it will be not marked as dirty */ - mddev->recovery_cp = MaxSector; + mddev->resync_offset = MaxSector; return setup_conf(mddev); } diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index ff47b6f0ba0f..b13946287277 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -173,7 +173,7 @@ typedef struct mdp_superblock_s { #else #error unspecified endianness #endif - __u32 recovery_cp; /* 11 recovery checkpoint sector count */ + __u32 resync_offset; /* 11 resync checkpoint sector count */ /* There are only valid for minor_version > 90 */ __u64 reshape_position; /* 12,13 next address in array-space for reshape */ __u32 new_level; /* 14 new level we are reshaping to */ From c71fc0f457ca1c2cd4dff2d974df724beb14f67e Mon Sep 17 00:00:00 2001 From: Nitesh Shetty Date: Wed, 16 Jul 2025 19:09:44 +0530 Subject: [PATCH 063/279] nvmet: add support for FDP in fabrics passthru path Add support for admin_get_feature FDP(0x1d) feature id, thus enabling FDP at the initiator side for the target controller and namespaces attached to it. Signed-off-by: Nitesh Shetty Signed-off-by: Christoph Hellwig --- drivers/nvme/target/passthru.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 3b4b0df8f879..0c361b1e3566 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -533,6 +533,8 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req) case NVME_FEAT_HOST_ID: req->execute = nvmet_execute_get_features; return NVME_SC_SUCCESS; + case NVME_FEAT_FDP: + return nvmet_setup_passthru_command(req); default: return nvmet_passthru_get_set_features(req); } From e715b8733df60aa3280ab3e0de0560c8a72c5c1d Mon Sep 17 00:00:00 2001 From: Kamaljit Singh Date: Mon, 21 Jul 2025 10:36:59 -0700 Subject: [PATCH 064/279] nvme: add capability to connect to an administrative controller Add capability to connect to an administrative controller by preventing ioq creation for admin-controllers. Add a nvme_admin_ctrl() to check if a controller's CNTRLTYPE indicates that it is an administrative controller and override ctrl->queue_count to 1 for admin controllers, so that only the admin queue and no I/O queues are created for an administrative controller. This override is done in nvme_init_ctrl_finish() after ctrl->cntrltype has been initialized in nvme_init_identify() so nvme_admin_ctrl() will work correctly. Doing this override in generic code (nvme_init_ctrl_finish) makes it transport agnostic and will work properly for nvme/tcp as well as for nvme/rdma. Suggested-by: Niklas Cassel Reviewed-by: Damien Le Moal Reviewed-by: Niklas Cassel Signed-off-by: Kamaljit Singh Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9d988f4cb87a..812c1565114f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3158,6 +3158,11 @@ static inline bool nvme_discovery_ctrl(struct nvme_ctrl *ctrl) return ctrl->opts && ctrl->opts->discovery_nqn; } +static inline bool nvme_admin_ctrl(struct nvme_ctrl *ctrl) +{ + return ctrl->cntrltype == NVME_CTRL_ADMIN; +} + static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { @@ -3670,6 +3675,17 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended) if (ret) return ret; + if (nvme_admin_ctrl(ctrl)) { + /* + * An admin controller has one admin queue, but no I/O queues. + * Override queue_count so it only creates an admin queue. + */ + dev_dbg(ctrl->device, + "Subsystem %s is an administrative controller", + ctrl->subsys->subnqn); + ctrl->queue_count = 1; + } + ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; From 528589947c1802b9357c2a9b96d88cc4a11cd88b Mon Sep 17 00:00:00 2001 From: Mohamed Khalfella Date: Fri, 25 Jul 2025 13:50:05 -0700 Subject: [PATCH 065/279] nvmet: initialize discovery subsys after debugfs is initialized During nvme target initialization discovery subsystem is initialized before "nvmet" debugfs directory is created. This results in discovery subsystem debugfs directory to be created in debugfs root directory. nvmet_init() -> nvmet_init_discovery() -> nvmet_subsys_alloc() -> nvmet_debugfs_subsys_setup() In other words, the codepath above is exeucted before nvmet_debugfs is created. We get /sys/kernel/debug/nqn.2014-08.org.nvmexpress.discovery instead of /sys/kernel/debug/nvmet/nqn.2014-08.org.nvmexpress.discovery. Move nvmet_init_discovery() call after nvmet_init_debugfs() to fix it. Fixes: 649fd41420a8 ("nvmet: add debugfs support") Signed-off-by: Mohamed Khalfella Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 884286f90688..83f3d2f8ef2d 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1960,24 +1960,24 @@ static int __init nvmet_init(void) if (!nvmet_wq) goto out_free_buffered_work_queue; - error = nvmet_init_discovery(); + error = nvmet_init_debugfs(); if (error) goto out_free_nvmet_work_queue; - error = nvmet_init_debugfs(); - if (error) - goto out_exit_discovery; - - error = nvmet_init_configfs(); + error = nvmet_init_discovery(); if (error) goto out_exit_debugfs; + error = nvmet_init_configfs(); + if (error) + goto out_exit_discovery; + return 0; -out_exit_debugfs: - nvmet_exit_debugfs(); out_exit_discovery: nvmet_exit_discovery(); +out_exit_debugfs: + nvmet_exit_debugfs(); out_free_nvmet_work_queue: destroy_workqueue(nvmet_wq); out_free_buffered_work_queue: From 4e6e151cf92bbaa0622a4da351ff444e4fd9b865 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 29 Jul 2025 11:12:47 -0700 Subject: [PATCH 066/279] nvme-pci: fix leak on sgl setup error We need to free the descriptor that was allocated. We also don't necessarily need to unmap each sgl entry, which was previously being attempted unconditionally. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 071efec25346..2c6d9506b172 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -935,7 +935,7 @@ static blk_status_t nvme_pci_setup_data_sgl(struct request *req, nvme_pci_sgl_set_seg(&iod->cmd.common.dptr.sgl, sgl_dma, mapped); if (unlikely(iter->status)) - nvme_free_sgls(req); + nvme_unmap_data(req); return iter->status; } From b6160cd2c45c38d01405d8ee3758e9b8a6f8e595 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 25 Jul 2025 15:57:22 +0800 Subject: [PATCH 067/279] nvme-auth: remove unneeded semicolon No functional modification involved. ./drivers/nvme/host/auth.c:745:2-3: Unneeded semicolon. ./drivers/nvme/host/auth.c:755:2-3: Unneeded semicolon. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=22937 Signed-off-by: Jiapeng Chong Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/auth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index f6ddbe553289..201fc8809a62 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -742,7 +742,7 @@ static int nvme_auth_secure_concat(struct nvme_ctrl *ctrl, "%s: qid %d failed to generate digest, error %d\n", __func__, chap->qid, ret); goto out_free_psk; - }; + } dev_dbg(ctrl->device, "%s: generated digest %s\n", __func__, digest); ret = nvme_auth_derive_tls_psk(chap->hash_id, psk, psk_len, @@ -752,7 +752,7 @@ static int nvme_auth_secure_concat(struct nvme_ctrl *ctrl, "%s: qid %d failed to derive TLS psk, error %d\n", __func__, chap->qid, ret); goto out_free_digest; - }; + } tls_key = nvme_tls_psk_refresh(ctrl->opts->keyring, ctrl->opts->host->nqn, From 367c240b0a99c7ada700a44345dd3144a02b6164 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 30 Jul 2025 15:32:45 -0500 Subject: [PATCH 068/279] nvme: fix various comment typos Fix typos in comments. Signed-off-by: Bjorn Helgaas Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 4 ++-- drivers/nvme/host/tcp.c | 2 +- drivers/nvme/target/fc.c | 6 +++--- drivers/nvme/target/rdma.c | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 08a5ea3e9383..3e12d4683ac7 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1363,7 +1363,7 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) * down, and the related FC-NVME Association ID and Connection IDs * become invalid. * - * The behavior of the fc-nvme initiator is such that it's + * The behavior of the fc-nvme initiator is such that its * understanding of the association and connections will implicitly * be torn down. The action is implicit as it may be due to a loss of * connectivity with the fc-nvme target, so you may never get a @@ -2777,7 +2777,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, * as WRITE ZEROES will return a non-zero rq payload_bytes yet * there is no actual payload to be transferred. * To get it right, key data transmission on there being 1 or - * more physical segments in the sg list. If there is no + * more physical segments in the sg list. If there are no * physical segments, there is no payload. */ if (blk_rq_nr_phys_segments(rq)) { diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 9233f088fac8..c0fe8cfb7229 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2179,7 +2179,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) /* * Only start IO queues for which we have allocated the tagset - * and limitted it to the available queues. On reconnects, the + * and limited it to the available queues. On reconnects, the * queue number might have changed. */ nr_queues = min(ctrl->tagset->nr_hw_queues + 1, ctrl->queue_count); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 25598a46bf0d..a9b18c051f5b 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -459,7 +459,7 @@ nvmet_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) * down, and the related FC-NVME Association ID and Connection IDs * become invalid. * - * The behavior of the fc-nvme target is such that it's + * The behavior of the fc-nvme target is such that its * understanding of the association and connections will implicitly * be torn down. The action is implicit as it may be due to a loss of * connectivity with the fc-nvme host, so the target may never get a @@ -2313,7 +2313,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fod->fcpreq); if (ret) { /* - * should be ok to set w/o lock as its in the thread of + * should be ok to set w/o lock as it's in the thread of * execution (not an async timer routine) and doesn't * contend with any clearing action */ @@ -2629,7 +2629,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, * and the api of the FC LLDD which may issue a hw command to send the * response, but the LLDD may not get the hw completion for that command * and upcall the nvmet_fc layer before a new command may be - * asynchronously received - its possible for a command to be received + * asynchronously received - it's possible for a command to be received * before the LLDD and nvmet_fc have recycled the job structure. It gives * the appearance of more commands received than fits in the sq. * To alleviate this scenario, a temporary queue is maintained in the diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 67f61c67c167..0485e25ab797 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1731,7 +1731,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, * We registered an ib_client to handle device removal for queues, * so we only need to handle the listening port cm_ids. In this case * we nullify the priv to prevent double cm_id destruction and destroying - * the cm_id implicitely by returning a non-zero rc to the callout. + * the cm_id implicitly by returning a non-zero rc to the callout. */ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, struct nvmet_rdma_queue *queue) @@ -1742,7 +1742,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, /* * This is a queue cm_id. we have registered * an ib_client to handle queues removal - * so don't interfear and just return. + * so don't interfere and just return. */ return 0; } @@ -1760,7 +1760,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, /* * We need to return 1 so that the core will destroy - * it's own ID. What a great API design.. + * its own ID. What a great API design.. */ return 1; } From c36049da6c903b732f238eb6fd13c2051fac96cd Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 31 Jul 2025 18:18:02 +0200 Subject: [PATCH 069/279] arm64: tegra: Remove numa-node-id properties These were initially added because some software was checking for their presence. However, the device is not NUMA, so adding these is wrong and hence they should be removed. Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra264.dtsi | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra264.dtsi b/arch/arm64/boot/dts/nvidia/tegra264.dtsi index 62c87a387b14..e02659efa233 100644 --- a/arch/arm64/boot/dts/nvidia/tegra264.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra264.dtsi @@ -11,7 +11,6 @@ / { interrupt-parent = <&gic>; #address-cells = <2>; #size-cells = <2>; - numa-node-id = <0>; reserved-memory { #address-cells = <2>; @@ -341,7 +340,6 @@ cpu0: cpu@0 { status = "okay"; enable-method = "psci"; - numa-node-id = <0>; i-cache-size = <65536>; i-cache-line-size = <64>; @@ -358,7 +356,6 @@ cpu1: cpu@1 { status = "okay"; enable-method = "psci"; - numa-node-id = <0>; i-cache-size = <65536>; i-cache-line-size = <64>; From e2ba58ccc9099514380c3300cbc0750b5055fc1c Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 30 Jul 2025 21:49:53 -0700 Subject: [PATCH 070/279] block: Fix default IO priority if there is no IO context Upstream commit 53889bcaf536 ("block: make __get_task_ioprio() easier to read") changes the IO priority returned to the caller if no IO context is defined for the task. Prior to this commit, the returned IO priority was determined by task_nice_ioclass() and task_nice_ioprio(). Now it is always IOPRIO_DEFAULT, which translates to IOPRIO_CLASS_NONE with priority 0. However, task_nice_ioclass() returns IOPRIO_CLASS_IDLE, IOPRIO_CLASS_RT, or IOPRIO_CLASS_BE depending on the task scheduling policy, and task_nice_ioprio() returns a value determined by task_nice(). This causes regressions in test code checking the IO priority and class of IO operations on tasks with no IO context. Fix the problem by returning the IO priority calculated from task_nice_ioclass() and task_nice_ioprio() if no IO context is defined to match earlier behavior. Fixes: 53889bcaf536 ("block: make __get_task_ioprio() easier to read") Cc: Jens Axboe Cc: Bart Van Assche Signed-off-by: Guenter Roeck Reviewed-by: Yu Kuai Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250731044953.1852690-1-linux@roeck-us.net Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index b25377b6ea98..5210e8371238 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -60,7 +60,8 @@ static inline int __get_task_ioprio(struct task_struct *p) int prio; if (!ioc) - return IOPRIO_DEFAULT; + return IOPRIO_PRIO_VALUE(task_nice_ioclass(p), + task_nice_ioprio(p)); if (p != current) lockdep_assert_held(&p->alloc_lock); From 765761851d89c772f482494d452e266795460278 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Thu, 31 Jul 2025 20:07:45 +0900 Subject: [PATCH 071/279] zloop: fix KASAN use-after-free of tag set When a zoned loop device, or zloop device, is removed, KASAN enabled kernel reports "BUG KASAN use-after-free" in blk_mq_free_tag_set(). The BUG happens because zloop_ctl_remove() calls put_disk(), which invokes zloop_free_disk(). The zloop_free_disk() frees the memory allocated for the zlo pointer. However, after the memory is freed, zloop_ctl_remove() calls blk_mq_free_tag_set(&zlo->tag_set), which accesses the freed zlo. Hence the KASAN use-after-free. zloop_ctl_remove() put_disk(zlo->disk) put_device() kobject_put() ... zloop_free_disk() kvfree(zlo) blk_mq_free_tag_set(&zlo->tag_set) To avoid the BUG, move the call to blk_mq_free_tag_set(&zlo->tag_set) from zloop_ctl_remove() into zloop_free_disk(). This ensures that the tag_set is freed before the call to kvfree(zlo). Fixes: eb0570c7df23 ("block: new zoned loop block device driver") CC: stable@vger.kernel.org Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250731110745.165751-1-shinichiro.kawasaki@wdc.com Signed-off-by: Jens Axboe --- drivers/block/zloop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c index 553b1a713ab9..a423228e201b 100644 --- a/drivers/block/zloop.c +++ b/drivers/block/zloop.c @@ -700,6 +700,8 @@ static void zloop_free_disk(struct gendisk *disk) struct zloop_device *zlo = disk->private_data; unsigned int i; + blk_mq_free_tag_set(&zlo->tag_set); + for (i = 0; i < zlo->nr_zones; i++) { struct zloop_zone *zone = &zlo->zones[i]; @@ -1080,7 +1082,6 @@ static int zloop_ctl_remove(struct zloop_options *opts) del_gendisk(zlo->disk); put_disk(zlo->disk); - blk_mq_free_tag_set(&zlo->tag_set); pr_info("Removed device %d\n", opts->id); From fad6551fcf537375702b9af012508156a16a1ff7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 31 Jul 2025 08:22:28 -0700 Subject: [PATCH 072/279] block: ensure discard_granularity is zero when discard is not supported Documentation/ABI/stable/sysfs-block states: What: /sys/block//queue/discard_granularity [...] A discard_granularity of 0 means that the device does not support discard functionality. but this got broken when sorting out the block limits updates. Fix this by setting the discard_granularity limit to zero when the combined max_discard_sectors is zero. Fixes: 3c407dc723bb ("block: default the discard granularity to sector size") Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20250731152228.873923-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index a7a794baba72..07874e9b609f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -400,12 +400,19 @@ int blk_validate_limits(struct queue_limits *lim) lim->max_discard_sectors = min(lim->max_hw_discard_sectors, lim->max_user_discard_sectors); + /* + * When discard is not supported, discard_granularity should be reported + * as 0 to userspace. + */ + if (lim->max_discard_sectors) + lim->discard_granularity = + max(lim->discard_granularity, lim->physical_block_size); + else + lim->discard_granularity = 0; + if (!lim->max_discard_segments) lim->max_discard_segments = 1; - if (lim->discard_granularity < lim->physical_block_size) - lim->discard_granularity = lim->physical_block_size; - /* * By default there is no limit on the segment boundary alignment, * but if there is one it can't be smaller than the page size as From a967e758f8e9d8ce5ef096743393df5e6e51644b Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 31 Jul 2025 20:46:41 -0300 Subject: [PATCH 073/279] smb: client: set symlink type as native for POSIX mounts SMB3.1.1 POSIX mounts require symlinks to be created natively with IO_REPARSE_TAG_SYMLINK reparse point. Cc: linux-cifs@vger.kernel.org Cc: Ralph Boehme Cc: David Howells Cc: Reported-by: Matthew Richardson Closes: https://marc.info/?i=1124e7cd-6a46-40a6-9f44-b7664a66654b@ed.ac.uk Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 2 +- fs/smb/client/fs_context.c | 18 ------------------ fs/smb/client/fs_context.h | 18 +++++++++++++++++- fs/smb/client/link.c | 11 +++-------- fs/smb/client/reparse.c | 2 +- 5 files changed, 22 insertions(+), 29 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 0fdadd668a81..31930b7266db 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -723,7 +723,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root) else seq_puts(s, ",nativesocket"); seq_show_option(s, "symlink", - cifs_symlink_type_str(get_cifs_symlink_type(cifs_sb))); + cifs_symlink_type_str(cifs_symlink_type(cifs_sb))); seq_printf(s, ",rsize=%u", cifs_sb->ctx->rsize); seq_printf(s, ",wsize=%u", cifs_sb->ctx->wsize); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 3f34bb07997b..cc8bd79ebca9 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1829,24 +1829,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, return -EINVAL; } -enum cifs_symlink_type get_cifs_symlink_type(struct cifs_sb_info *cifs_sb) -{ - if (cifs_sb->ctx->symlink_type == CIFS_SYMLINK_TYPE_DEFAULT) { - if (cifs_sb->ctx->mfsymlinks) - return CIFS_SYMLINK_TYPE_MFSYMLINKS; - else if (cifs_sb->ctx->sfu_emul) - return CIFS_SYMLINK_TYPE_SFU; - else if (cifs_sb->ctx->linux_ext && !cifs_sb->ctx->no_linux_ext) - return CIFS_SYMLINK_TYPE_UNIX; - else if (cifs_sb->ctx->reparse_type != CIFS_REPARSE_TYPE_NONE) - return CIFS_SYMLINK_TYPE_NATIVE; - else - return CIFS_SYMLINK_TYPE_NONE; - } else { - return cifs_sb->ctx->symlink_type; - } -} - int smb3_init_fs_context(struct fs_context *fc) { struct smb3_fs_context *ctx; diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 9e83302ce4b8..b0fec6b9a23b 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -341,7 +341,23 @@ struct smb3_fs_context { extern const struct fs_parameter_spec smb3_fs_parameters[]; -extern enum cifs_symlink_type get_cifs_symlink_type(struct cifs_sb_info *cifs_sb); +static inline enum cifs_symlink_type cifs_symlink_type(struct cifs_sb_info *cifs_sb) +{ + bool posix = cifs_sb_master_tcon(cifs_sb)->posix_extensions; + + if (cifs_sb->ctx->symlink_type != CIFS_SYMLINK_TYPE_DEFAULT) + return cifs_sb->ctx->symlink_type; + + if (cifs_sb->ctx->mfsymlinks) + return CIFS_SYMLINK_TYPE_MFSYMLINKS; + else if (cifs_sb->ctx->sfu_emul) + return CIFS_SYMLINK_TYPE_SFU; + else if (cifs_sb->ctx->linux_ext && !cifs_sb->ctx->no_linux_ext) + return posix ? CIFS_SYMLINK_TYPE_NATIVE : CIFS_SYMLINK_TYPE_UNIX; + else if (cifs_sb->ctx->reparse_type != CIFS_REPARSE_TYPE_NONE) + return CIFS_SYMLINK_TYPE_NATIVE; + return CIFS_SYMLINK_TYPE_NONE; +} extern int smb3_init_fs_context(struct fs_context *fc); extern void smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx); diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index 2ecd705e9e8c..afe76367d2c8 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -605,14 +605,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, /* BB what if DFS and this volume is on different share? BB */ rc = -EOPNOTSUPP; - switch (get_cifs_symlink_type(cifs_sb)) { - case CIFS_SYMLINK_TYPE_DEFAULT: - /* should not happen, get_cifs_symlink_type() resolves the default */ - break; - - case CIFS_SYMLINK_TYPE_NONE: - break; - + switch (cifs_symlink_type(cifs_sb)) { case CIFS_SYMLINK_TYPE_UNIX: #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (pTcon->unix_ext) { @@ -648,6 +641,8 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, goto symlink_exit; } break; + default: + break; } if (rc == 0) { diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 33c1d970747c..7869cec58f52 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -38,7 +38,7 @@ int create_reparse_symlink(const unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, const char *symname) { - switch (get_cifs_symlink_type(CIFS_SB(inode->i_sb))) { + switch (cifs_symlink_type(CIFS_SB(inode->i_sb))) { case CIFS_SYMLINK_TYPE_NATIVE: return create_native_symlink(xid, inode, dentry, tcon, full_path, symname); case CIFS_SYMLINK_TYPE_NFS: From 55a984928bfa30c7877e28f16910e6de1c170f1f Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 1 Aug 2025 10:26:13 +0200 Subject: [PATCH 074/279] Revert "tty: vt: use _IO() to define ioctl numbers" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f1180ca37abe3d117e4a19be12142fe722612a7c. Since the commit, the vt ioctl numbers are defined differently on platforms where _IOC_NONE is non-zero: alpha, mips, powerpc, sparc. Signed-off-by: "Jiri Slaby (SUSE)" Reported-by: Christophe Leroy Link: https://lore.kernel.org/all/436489B9-E67B-4630-909F-386C30A2AAC9@xenosoft.de/ Link: https://lore.kernel.org/all/97ec2636-915a-498c-903b-d66957420d21@csgroup.eu/ Cc: Nicolas Pitre Cc: Ilpo Järvinen Link: https://lore.kernel.org/r/20250801082613.2564584-1-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/vt.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/include/uapi/linux/vt.h b/include/uapi/linux/vt.h index b60fcdfb2746..714483d68c69 100644 --- a/include/uapi/linux/vt.h +++ b/include/uapi/linux/vt.h @@ -14,9 +14,9 @@ /* Note: the ioctl VT_GETSTATE does not work for consoles 16 and higher (since it returns a short) */ -/* 'V' to avoid collision with termios and kd */ +/* 0x56 is 'V', to avoid collision with termios and kd */ -#define VT_OPENQRY _IO('V', 0x00) /* find available vt */ +#define VT_OPENQRY 0x5600 /* find available vt */ struct vt_mode { __u8 mode; /* vt mode */ @@ -25,8 +25,8 @@ struct vt_mode { __s16 acqsig; /* signal to raise on acquisition */ __s16 frsig; /* unused (set to 0) */ }; -#define VT_GETMODE _IO('V', 0x01) /* get mode of active vt */ -#define VT_SETMODE _IO('V', 0x02) /* set mode of active vt */ +#define VT_GETMODE 0x5601 /* get mode of active vt */ +#define VT_SETMODE 0x5602 /* set mode of active vt */ #define VT_AUTO 0x00 /* auto vt switching */ #define VT_PROCESS 0x01 /* process controls switching */ #define VT_ACKACQ 0x02 /* acknowledge switch */ @@ -36,21 +36,21 @@ struct vt_stat { __u16 v_signal; /* signal to send */ __u16 v_state; /* vt bitmask */ }; -#define VT_GETSTATE _IO('V', 0x03) /* get global vt state info */ -#define VT_SENDSIG _IO('V', 0x04) /* signal to send to bitmask of vts */ +#define VT_GETSTATE 0x5603 /* get global vt state info */ +#define VT_SENDSIG 0x5604 /* signal to send to bitmask of vts */ -#define VT_RELDISP _IO('V', 0x05) /* release display */ +#define VT_RELDISP 0x5605 /* release display */ -#define VT_ACTIVATE _IO('V', 0x06) /* make vt active */ -#define VT_WAITACTIVE _IO('V', 0x07) /* wait for vt active */ -#define VT_DISALLOCATE _IO('V', 0x08) /* free memory associated to vt */ +#define VT_ACTIVATE 0x5606 /* make vt active */ +#define VT_WAITACTIVE 0x5607 /* wait for vt active */ +#define VT_DISALLOCATE 0x5608 /* free memory associated to vt */ struct vt_sizes { __u16 v_rows; /* number of rows */ __u16 v_cols; /* number of columns */ __u16 v_scrollsize; /* number of lines of scrollback */ }; -#define VT_RESIZE _IO('V', 0x09) /* set kernel's idea of screensize */ +#define VT_RESIZE 0x5609 /* set kernel's idea of screensize */ struct vt_consize { __u16 v_rows; /* number of rows */ @@ -60,10 +60,10 @@ struct vt_consize { __u16 v_vcol; /* number of pixel columns on screen */ __u16 v_ccol; /* number of pixel columns per character */ }; -#define VT_RESIZEX _IO('V', 0x0A) /* set kernel's idea of screensize + more */ -#define VT_LOCKSWITCH _IO('V', 0x0B) /* disallow vt switching */ -#define VT_UNLOCKSWITCH _IO('V', 0x0C) /* allow vt switching */ -#define VT_GETHIFONTMASK _IO('V', 0x0D) /* return hi font mask */ +#define VT_RESIZEX 0x560A /* set kernel's idea of screensize + more */ +#define VT_LOCKSWITCH 0x560B /* disallow vt switching */ +#define VT_UNLOCKSWITCH 0x560C /* allow vt switching */ +#define VT_GETHIFONTMASK 0x560D /* return hi font mask */ struct vt_event { __u32 event; @@ -77,14 +77,14 @@ struct vt_event { __u32 pad[4]; /* Padding for expansion */ }; -#define VT_WAITEVENT _IO('V', 0x0E) /* Wait for an event */ +#define VT_WAITEVENT 0x560E /* Wait for an event */ struct vt_setactivate { __u32 console; struct vt_mode mode; }; -#define VT_SETACTIVATE _IO('V', 0x0F) /* Activate and set the mode of a console */ +#define VT_SETACTIVATE 0x560F /* Activate and set the mode of a console */ /* get console size and cursor position */ struct vt_consizecsrpos { From 9d9b193ed73a65ec47cf1fd39925b09da8216461 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 31 Jul 2025 09:41:47 +0800 Subject: [PATCH 075/279] crypto: hash - Increase HASH_MAX_DESCSIZE for hmac(sha3-224-s390) The value of HASH_MAX_DESCSIZE is off by one for hmac(sha3-224-s390). Fix this so that hmac(sha3-224-s390) can be registered. Reported-by: Ingo Franzki Reported-by: Eric Biggers Fixes: 6f90ba706551 ("crypto: s390/sha3 - Use API partial block handling") Cc: Signed-off-by: Herbert Xu --- include/crypto/hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 6f6b9de12cd3..ed63b904837d 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -184,7 +184,7 @@ struct shash_desc { * Worst case is hmac(sha3-224-s390). Its context is a nested 'shash_desc' * containing a 'struct s390_sha_ctx'. */ -#define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 360) +#define HASH_MAX_DESCSIZE (sizeof(struct shash_desc) + 361) #define MAX_SYNC_HASH_REQSIZE (sizeof(struct ahash_request) + \ HASH_MAX_DESCSIZE) From 1da33858af6250184d2ef907494d698af03283de Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 31 Jul 2025 21:38:18 +0100 Subject: [PATCH 076/279] regmap: irq: Free the regmap-irq mutex We do not currently free the mutex allocated by regmap-irq, do so. Tested-by: Russell King (Oracle) Reviewed-by: Russell King (Oracle) Signed-off-by: Mark Brown Link: https://patch.msgid.link/20250731-regmap-irq-nesting-v1-1-98b4d1bf20f0@kernel.org Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index d1585f073776..4aac12d38215 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -816,7 +816,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, d->mask_buf[i], chip->irq_drv_data); if (ret) - goto err_alloc; + goto err_mutex; } if (chip->mask_base && !chip->handle_mask_sync) { @@ -827,7 +827,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret) { dev_err(map->dev, "Failed to set masks in 0x%x: %d\n", reg, ret); - goto err_alloc; + goto err_mutex; } } @@ -838,7 +838,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret) { dev_err(map->dev, "Failed to set masks in 0x%x: %d\n", reg, ret); - goto err_alloc; + goto err_mutex; } } @@ -855,7 +855,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret != 0) { dev_err(map->dev, "Failed to read IRQ status: %d\n", ret); - goto err_alloc; + goto err_mutex; } } @@ -879,7 +879,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret != 0) { dev_err(map->dev, "Failed to ack 0x%x: %d\n", reg, ret); - goto err_alloc; + goto err_mutex; } } } @@ -901,7 +901,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (ret != 0) { dev_err(map->dev, "Failed to set masks in 0x%x: %d\n", reg, ret); - goto err_alloc; + goto err_mutex; } } } @@ -910,7 +910,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (chip->status_is_level) { ret = read_irq_data(d); if (ret < 0) - goto err_alloc; + goto err_mutex; memcpy(d->prev_status_buf, d->status_buf, array_size(d->chip->num_regs, sizeof(d->prev_status_buf[0]))); @@ -918,7 +918,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, ret = regmap_irq_create_domain(fwnode, irq_base, chip, d); if (ret) - goto err_alloc; + goto err_mutex; ret = request_threaded_irq(irq, NULL, regmap_irq_thread, irq_flags | IRQF_ONESHOT, @@ -935,6 +935,8 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, err_domain: /* Should really dispose of the domain but... */ +err_mutex: + mutex_destroy(&d->lock); err_alloc: kfree(d->type_buf); kfree(d->type_buf_def); @@ -1027,6 +1029,7 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d) kfree(d->config_buf[i]); kfree(d->config_buf); } + mutex_destroy(&d->lock); kfree(d); } EXPORT_SYMBOL_GPL(regmap_del_irq_chip); From 76b6e14aa7b081337d118a82397d919b5e072bb4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 31 Jul 2025 21:38:19 +0100 Subject: [PATCH 077/279] regmap: irq: Avoid lockdep warnings with nested regmap-irq chips While handling interrupts through regmap-irq we use a mutex to protect the updates we are caching while genirq runs in atomic context. Russell King reported that while running on the nVidia Jetson Xavier NX this generates lockdep warnings since that platform has a regmap-irq for the max77686 RTC which is a child of a max77620 which also uses regmap-irq. [ 46.723127] rtcwake/3984 is trying to acquire lock: [ 46.723235] ffff0000813b2c68 (&d->lock){+.+.}-{4:4}, at: regmap_irq_lock+0x18/0x24 [ 46.723452] but task is already holding lock: [ 46.723556] ffff00008504dc68 (&d->lock){+.+.}-{4:4}, at: regmap_irq_lock+0x18/0x24 This happens because by default lockdep uses a single lockdep class for all mutexes initialised from a single mutex_init() call and is unable to tell that two distinct mutex are being taken and verify that the ordering of operations is safe. This should be a very rare situation since normally anything using regmap-irq will be a leaf interrupt controller due to being on a slow bus like I2C. We can avoid these warnings by providing the lockdep key for the regmap-irq explicitly, allocating one for each chip so that lockdep can distinguish between them. Thanks to Russell for the report and analysis. Reported-by: Russell King (Oracle) Tested-by: Russell King (Oracle) Reviewed-by: Russell King (Oracle) Signed-off-by: Mark Brown Link: https://patch.msgid.link/20250731-regmap-irq-nesting-v1-2-98b4d1bf20f0@kernel.org Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 4aac12d38215..6112d942499b 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -21,6 +21,7 @@ struct regmap_irq_chip_data { struct mutex lock; + struct lock_class_key lock_key; struct irq_chip irq_chip; struct regmap *map; @@ -801,7 +802,13 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, goto err_alloc; } - mutex_init(&d->lock); + /* + * If one regmap-irq is the parent of another then we'll try + * to lock the child with the parent locked, use an explicit + * lock_key so lockdep can figure out what's going on. + */ + lockdep_register_key(&d->lock_key); + mutex_init_with_key(&d->lock, &d->lock_key); for (i = 0; i < chip->num_irqs; i++) d->mask_buf_def[chip->irqs[i].reg_offset / map->reg_stride] @@ -937,6 +944,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, /* Should really dispose of the domain but... */ err_mutex: mutex_destroy(&d->lock); + lockdep_unregister_key(&d->lock_key); err_alloc: kfree(d->type_buf); kfree(d->type_buf_def); @@ -1030,6 +1038,7 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d) kfree(d->config_buf); } mutex_destroy(&d->lock); + lockdep_unregister_key(&d->lock_key); kfree(d); } EXPORT_SYMBOL_GPL(regmap_del_irq_chip); From 918b744af3d4d11a087814ebb6c390016e5242f2 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 1 Aug 2025 11:51:35 +0530 Subject: [PATCH 078/279] ASoC: SOF: amd: Add sof audio support for acp7.2 platform Add pci revision id to support sof audio for acp7.2 platfom. Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250801062207.579388-2-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp.c | 8 ++++++++ sound/soc/sof/amd/acp.h | 1 + sound/soc/sof/amd/pci-acp70.c | 1 + 3 files changed, 10 insertions(+) diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index 7132916aa253..71a18f156de2 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -59,6 +59,7 @@ static void init_dma_descriptor(struct acp_dev_data *adata) switch (acp_data->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: acp_dma_desc_base_addr = ACP70_DMA_DESC_BASE_ADDR; acp_dma_desc_max_num_dscr = ACP70_DMA_DESC_MAX_NUM_DSCR; break; @@ -99,6 +100,7 @@ static int config_dma_channel(struct acp_dev_data *adata, unsigned int ch, switch (acp_data->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: acp_dma_cntl_0 = ACP70_DMA_CNTL_0; acp_dma_ch_rst_sts = ACP70_DMA_CH_RST_STS; acp_dma_dscr_err_sts_0 = ACP70_DMA_ERR_STS_0; @@ -339,6 +341,7 @@ int acp_dma_status(struct acp_dev_data *adata, unsigned char ch) switch (adata->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: acp_dma_ch_sts = ACP70_DMA_CH_STS; break; default: @@ -522,6 +525,7 @@ static irqreturn_t acp_irq_handler(int irq, void *dev_id) switch (adata->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: wake_irq_flag = amd_sof_check_and_handle_acp70_sdw_wake_irq(sdev); break; } @@ -559,6 +563,7 @@ static int acp_power_on(struct snd_sof_dev *sdev) break; case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: acp_pgfsm_status_mask = ACP70_PGFSM_STATUS_MASK; acp_pgfsm_cntl_mask = ACP70_PGFSM_CNTL_POWER_ON_MASK; break; @@ -661,6 +666,7 @@ static int acp_init(struct snd_sof_dev *sdev) switch (acp_data->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: sdw0_wake_en = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP70_SW0_WAKE_EN); sdw1_wake_en = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP70_SW1_WAKE_EN); if (sdw0_wake_en || sdw1_wake_en) @@ -712,6 +718,7 @@ int amd_sof_acp_suspend(struct snd_sof_dev *sdev, u32 target_state) switch (acp_data->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: enable = true; break; } @@ -738,6 +745,7 @@ int amd_sof_acp_resume(struct snd_sof_dev *sdev) switch (acp_data->pci_rev) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: snd_sof_dsp_write(sdev, ACP_DSP_BAR, ACP70_PME_EN, 1); break; } diff --git a/sound/soc/sof/amd/acp.h b/sound/soc/sof/amd/acp.h index d3c5b2386cdf..2b7ea8c64106 100644 --- a/sound/soc/sof/amd/acp.h +++ b/sound/soc/sof/amd/acp.h @@ -75,6 +75,7 @@ #define ACP63_PCI_ID 0x63 #define ACP70_PCI_ID 0x70 #define ACP71_PCI_ID 0x71 +#define ACP72_PCI_ID 0x72 #define HOST_BRIDGE_CZN 0x1630 #define HOST_BRIDGE_VGH 0x1645 diff --git a/sound/soc/sof/amd/pci-acp70.c b/sound/soc/sof/amd/pci-acp70.c index 51d36d43c42b..3523c9a92a94 100644 --- a/sound/soc/sof/amd/pci-acp70.c +++ b/sound/soc/sof/amd/pci-acp70.c @@ -77,6 +77,7 @@ static int acp70_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_ switch (pci->revision) { case ACP70_PCI_ID: case ACP71_PCI_ID: + case ACP72_PCI_ID: break; default: return -ENODEV; From 60e5b2441d7c035e732e4a1166779c6cc316c46b Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 1 Aug 2025 11:51:36 +0530 Subject: [PATCH 079/279] ASoC: amd: ps: Add SoundWire pci and dma driver support for acp7.2 platform Add SoundWire pci and dma driver support for acp7.2 platform. Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250801062207.579388-3-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/ps/acp63.h | 1 + sound/soc/amd/ps/pci-ps.c | 4 ++++ sound/soc/amd/ps/ps-sdw-dma.c | 5 +++++ 3 files changed, 10 insertions(+) diff --git a/sound/soc/amd/ps/acp63.h b/sound/soc/amd/ps/acp63.h index d7c994e26e4d..90fc016dac0b 100644 --- a/sound/soc/amd/ps/acp63.h +++ b/sound/soc/amd/ps/acp63.h @@ -14,6 +14,7 @@ #define ACP63_PCI_REV 0x63 #define ACP70_PCI_REV 0x70 #define ACP71_PCI_REV 0x71 +#define ACP72_PCI_REV 0x72 #define ACP_SOFT_RESET_SOFTRESET_AUDDONE_MASK 0x00010001 #define ACP63_PGFSM_CNTL_POWER_ON_MASK 1 diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c index 7936b3173632..c62299b29204 100644 --- a/sound/soc/amd/ps/pci-ps.c +++ b/sound/soc/amd/ps/pci-ps.c @@ -117,6 +117,7 @@ static short int check_and_handle_sdw_dma_irq(struct acp63_dev_data *adata, u32 break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: adata->acp70_sdw0_dma_intr_stat[stream_id] = 1; break; } @@ -141,6 +142,7 @@ static short int check_and_handle_sdw_dma_irq(struct acp63_dev_data *adata, u32 break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: if (ext_intr_stat1 & ACP70_P1_SDW_DMA_IRQ_MASK) { for (index = ACP70_P1_AUDIO2_RX_THRESHOLD; index <= ACP70_P1_AUDIO0_TX_THRESHOLD; index++) { @@ -552,6 +554,7 @@ static int acp_hw_init_ops(struct acp63_dev_data *adata, struct pci_dev *pci) break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: acp70_hw_init_ops(adata->hw_ops); break; default: @@ -581,6 +584,7 @@ static int snd_acp63_probe(struct pci_dev *pci, case ACP63_PCI_REV: case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: break; default: dev_dbg(&pci->dev, "acp63/acp70/acp71 pci device not found\n"); diff --git a/sound/soc/amd/ps/ps-sdw-dma.c b/sound/soc/amd/ps/ps-sdw-dma.c index 1b933a017c06..5449323e2728 100644 --- a/sound/soc/amd/ps/ps-sdw-dma.c +++ b/sound/soc/amd/ps/ps-sdw-dma.c @@ -269,6 +269,7 @@ static int acp63_configure_sdw_ringbuffer(void __iomem *acp_base, u32 stream_id, break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: switch (manager_instance) { case ACP_SDW0: reg_dma_size = acp70_sdw0_dma_reg[stream_id].reg_dma_size; @@ -382,6 +383,7 @@ static int acp63_sdw_dma_hw_params(struct snd_soc_component *component, break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: switch (stream->instance) { case ACP_SDW0: sdw_data->acp70_sdw0_dma_stream[stream_id] = substream; @@ -451,6 +453,7 @@ static u64 acp63_sdw_get_byte_count(struct acp_sdw_dma_stream *stream, void __io break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: switch (stream->instance) { case ACP_SDW0: pos_low_reg = acp70_sdw0_dma_reg[stream->stream_id].pos_low_reg; @@ -529,6 +532,7 @@ static int acp63_sdw_dma_close(struct snd_soc_component *component, break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: switch (stream->instance) { case ACP_SDW0: sdw_data->acp70_sdw0_dma_stream[stream->stream_id] = NULL; @@ -574,6 +578,7 @@ static int acp63_sdw_dma_enable(struct snd_pcm_substream *substream, break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: switch (stream->instance) { case ACP_SDW0: sdw_dma_en_reg = acp70_sdw0_dma_enable_reg[stream_id]; From 0df24f34794d2eea4bdc819fba0ba28f226286e6 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 1 Aug 2025 11:51:37 +0530 Subject: [PATCH 080/279] ASoC: amd: acp: Add SoundWire legacy machine driver support for acp7.2 platform Add SoundWire legacy machine driver support for acp7.2 platform. Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250801062207.579388-4-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-sdw-legacy-mach.c | 3 +++ sound/soc/amd/acp/soc_amd_sdw_common.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/sound/soc/amd/acp/acp-sdw-legacy-mach.c b/sound/soc/amd/acp/acp-sdw-legacy-mach.c index 6c24f9d8694e..c2197b75a7dd 100644 --- a/sound/soc/amd/acp/acp-sdw-legacy-mach.c +++ b/sound/soc/amd/acp/acp-sdw-legacy-mach.c @@ -158,6 +158,7 @@ static int create_sdw_dailink(struct snd_soc_card *card, break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: ret = get_acp70_cpu_pin_id(ffs(soc_end->link_mask - 1), *be_id, &cpu_pin_id, dev); if (ret) @@ -264,6 +265,7 @@ static int create_sdw_dailinks(struct snd_soc_card *card, case ACP63_PCI_REV: case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: sdw_platform_component->name = "amd_ps_sdw_dma.0"; break; default: @@ -311,6 +313,7 @@ static int create_dmic_dailinks(struct snd_soc_card *card, case ACP63_PCI_REV: case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: pdm_cpu->name = "acp_ps_pdm_dma.0"; pdm_platform->name = "acp_ps_pdm_dma.0"; break; diff --git a/sound/soc/amd/acp/soc_amd_sdw_common.h b/sound/soc/amd/acp/soc_amd_sdw_common.h index 1f24e0e06487..3930cc46fa58 100644 --- a/sound/soc/amd/acp/soc_amd_sdw_common.h +++ b/sound/soc/amd/acp/soc_amd_sdw_common.h @@ -21,6 +21,8 @@ #define ACP63_PCI_REV 0x63 #define ACP70_PCI_REV 0x70 #define ACP71_PCI_REV 0x71 +#define ACP72_PCI_REV 0x72 + #define SOC_JACK_JDSRC(quirk) ((quirk) & GENMASK(3, 0)) #define ASOC_SDW_FOUR_SPK BIT(4) #define ASOC_SDW_ACP_DMIC BIT(5) From 1c4c768d068616fa8948826ab714a4ac1f3b9aa9 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 1 Aug 2025 11:51:38 +0530 Subject: [PATCH 081/279] ASoC: amd: acp: Add SoundWire SOF machine driver support for acp7.2 platform Add SoundWire SOF machine driver support for acp7.2 platform. Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250801062207.579388-5-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-sdw-sof-mach.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/amd/acp/acp-sdw-sof-mach.c b/sound/soc/amd/acp/acp-sdw-sof-mach.c index 654fe78b2e2e..91d72d4bb9a2 100644 --- a/sound/soc/amd/acp/acp-sdw-sof-mach.c +++ b/sound/soc/amd/acp/acp-sdw-sof-mach.c @@ -130,6 +130,7 @@ static int create_sdw_dailink(struct snd_soc_card *card, break; case ACP70_PCI_REV: case ACP71_PCI_REV: + case ACP72_PCI_REV: ret = get_acp70_cpu_pin_id(ffs(sof_end->link_mask - 1), *be_id, &cpu_pin_id, dev); if (ret) From 9843cf7b6fd6f938c16fde51e86dd0e3ddbefb12 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Fri, 1 Aug 2025 10:16:18 +0800 Subject: [PATCH 082/279] ASoC: tas2781: Fix the wrong step for TLV on tas2781 The step for TLV on tas2781, should be 50 (-0.5dB). Fixes: 678f38eba1f2 ("ASoC: tas2781: Add Header file for tas2781 driver") Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20250801021618.64627-1-baojun.xu@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781-tlv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index d87263e43fdb..ef9b9f19d212 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -15,7 +15,7 @@ #ifndef __TAS2781_TLV_H__ #define __TAS2781_TLV_H__ -static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 50, 0); static const __maybe_unused DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); #endif From 1b03391d073dad748636a1ad9668b837cce58265 Mon Sep 17 00:00:00 2001 From: Peter Jakubek Date: Thu, 31 Jul 2025 18:21:04 +0100 Subject: [PATCH 083/279] ASoC: Intel: sof_sdw: Add quirk for Alienware Area 51 (2025) 0CCC SKU Add DMI quirk entry for Alienware systems with SKU "0CCC" to enable proper speaker codec configuration (SOC_SDW_CODEC_SPKR). This system requires the same audio configuration as some existing Dell systems. Without this patch, the laptop's speakers and microphone will not work. Signed-off-by: Peter Jakubek Link: https://patch.msgid.link/20250731172104.2009007-1-peterjakubek@gmail.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index c639df2cacdd..f997b2dc221b 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -741,6 +741,14 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { }, .driver_data = (void *)(SOC_SDW_CODEC_SPKR), }, + { + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0CCC") + }, + .driver_data = (void *)(SOC_SDW_CODEC_SPKR), + }, /* Pantherlake devices*/ { .callback = sof_sdw_quirk_cb, From ffcfd071eec7973e58c4ffff7da4cb0e9ca7b667 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Thu, 31 Jul 2025 16:01:09 +0000 Subject: [PATCH 084/279] spi: cs42l43: Property entry should be a null-terminated array The software node does not specify a count of property entries, so the array must be null-terminated. When unterminated, this can lead to a fault in the downstream cs35l56 amplifier driver, because the node parse walks off the end of the array into unknown memory. Fixes: 0ca645ab5b15 ("spi: cs42l43: Add speaker id support to the bridge configuration") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220371 Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20250731160109.1547131-1-simont@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi-cs42l43.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c index b28a840b3b04..14307dd800b7 100644 --- a/drivers/spi/spi-cs42l43.c +++ b/drivers/spi/spi-cs42l43.c @@ -295,7 +295,7 @@ static struct spi_board_info *cs42l43_create_bridge_amp(struct cs42l43_spi *priv struct spi_board_info *info; if (spkid >= 0) { - props = devm_kmalloc(priv->dev, sizeof(*props), GFP_KERNEL); + props = devm_kcalloc(priv->dev, 2, sizeof(*props), GFP_KERNEL); if (!props) return NULL; From 49f848788a4d157bb6648a57963cb060fed3d56e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 30 Jul 2025 08:04:37 -0700 Subject: [PATCH 085/279] x86/cpu: Add new Intel CPU model numbers for Wildcatlake and Novalake Wildcatlake is a mobile CPU. Novalake has both desktop and mobile versions. [ bp: Merge into a single patch. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250730150437.4701-1-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index be10c188614f..e345dbdf933e 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -150,6 +150,11 @@ #define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Crestmont */ +#define INTEL_WILDCATLAKE_L IFM(6, 0xD5) + +#define INTEL_NOVALAKE IFM(18, 0x01) +#define INTEL_NOVALAKE_L IFM(18, 0x03) + /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */ From 83e6384374bac8a9da3411fae7f24376a7dbd2a3 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Tue, 22 Jul 2025 09:18:18 -0700 Subject: [PATCH 086/279] smp: Fix spelling in on_each_cpu_cond_mask()'s doc-comment "boolean" is spelt as "blooean". Fix that. Signed-off-by: Roman Kisel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250722161818.6139-1-romank@linux.microsoft.com --- kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/smp.c b/kernel/smp.c index 4649fa4872ff..56f83aa58ec8 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -1018,7 +1018,7 @@ void __init smp_init(void) * @cond_func: A callback function that is passed a cpu id and * the info parameter. The function is called * with preemption disabled. The function should - * return a blooean value indicating whether to IPI + * return a boolean value indicating whether to IPI * the specified CPU. * @func: The function to run on all applicable CPUs. * This must be fast and non-blocking. From e703b7e247503b8bf87b62c02a4392749b09eca8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jul 2025 21:44:55 +0200 Subject: [PATCH 087/279] futex: Move futex cleanup to __mmdrop() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Futex hash allocations are done in mm_init() and the cleanup happens in __mmput(). That works most of the time, but there are mm instances which are instantiated via mm_alloc() and freed via mmdrop(), which causes the futex hash to be leaked. Move the cleanup to __mmdrop(). Fixes: 56180dd20c19 ("futex: Use RCU-based per-CPU reference counting instead of rcuref_t") Reported-by: André Draszik Signed-off-by: Thomas Gleixner Tested-by: André Draszik Link: https://lore.kernel.org/all/87ldo5ihu0.ffs@tglx Closes: https://lore.kernel.org/all/0c8cc83bb73abf080faf584f319008b67d0931db.camel@linaro.org --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index f82b77eef7fe..1b0535ee5ffa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -686,6 +686,7 @@ void __mmdrop(struct mm_struct *mm) mm_pasid_drop(mm); mm_destroy_cid(mm); percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS); + futex_hash_free(mm); free_mm(mm); } @@ -1133,7 +1134,6 @@ static inline void __mmput(struct mm_struct *mm) if (mm->binfmt) module_put(mm->binfmt->module); lru_gen_del_mm(mm); - futex_hash_free(mm); mmdrop(mm); } From 0808da36b982442afc4a34555e492a16f2e5973e Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 1 Aug 2025 11:47:10 -0400 Subject: [PATCH 088/279] ALSA: usb-audio: Don't use printk_ratelimit for debug prints printk_ratelimit is deprecated, since it shares state with all other printk sites. Additionally, the suppression message is printed at warning level even though the actual messages are printed at debug and are (usually) invisible! This can result in thousands of messages like retire_capture_urb: 4992 callbacks suppressed in the console, and can inhibit debugging since it is unclear what the source of the suppressed callbacks is. Switch to dev_dbg_ratelimited which doesn't print anything unless debug is enabled. Signed-off-by: Sean Anderson Link: https://patch.msgid.link/20250801154710.739464-1-sean.anderson@linux.dev Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index b24ee38fad72..bff92505e408 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -1336,11 +1336,10 @@ static void retire_capture_urb(struct snd_usb_substream *subs, for (i = 0; i < urb->number_of_packets; i++) { cp = (unsigned char *)urb->transfer_buffer + urb->iso_frame_desc[i].offset + subs->pkt_offset_adj; - if (urb->iso_frame_desc[i].status && printk_ratelimit()) { - dev_dbg(&subs->dev->dev, "frame %d active: %d\n", - i, urb->iso_frame_desc[i].status); - // continue; - } + if (urb->iso_frame_desc[i].status) + dev_dbg_ratelimited(&subs->dev->dev, + "frame %d active: %d\n", i, + urb->iso_frame_desc[i].status); bytes = urb->iso_frame_desc[i].actual_length; if (subs->stream_offset_adj > 0) { unsigned int adj = min(subs->stream_offset_adj, bytes); From 1b30d44417278196a90c79244bb43e8428586345 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 1 Aug 2025 16:23:30 -0700 Subject: [PATCH 089/279] bpf: Fix memory leak of bpf_scc_info objects env->scc_info array contains references to bpf_scc_info objects allocated lazily in verifier.c:scc_visit_alloc(). env->scc_cnt was supposed to track env->scc_info array size in order to free referenced objects in verifier.c:free_states(). Fix initialization of env->scc_cnt that was omitted in verifier.c:compute_scc(). To reproduce the bug: - build with CONFIG_DEBUG_KMEMLEAK - boot and load bpf program with loops, e.g.: ./veristat -q pyperf180.bpf.o - initiate memleak scan and check results: echo scan > /sys/kernel/debug/kmemleak cat /sys/kernel/debug/kmemleak Fixes: c9e31900b54c ("bpf: propagate read/precision marks over state graph backedges") Reported-by: Jens Axboe Closes: https://lore.kernel.org/bpf/CAADnVQKXUWg9uRCPD5ebRXwN4dmBCRUFFM7kN=GxymYz3zU25A@mail.gmail.com/T/ Suggested-by: Alexei Starovoitov Tested-by: Jens Axboe Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250801232330.1800436-1-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0806295945e4..c4f69a9e9af6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -23114,6 +23114,8 @@ static void free_states(struct bpf_verifier_env *env) for (i = 0; i < env->scc_cnt; ++i) { info = env->scc_info[i]; + if (!info) + continue; for (j = 0; j < info->num_visits; j++) free_backedges(&info->visits[j]); kvfree(info); @@ -24554,6 +24556,7 @@ static int compute_scc(struct bpf_verifier_env *env) err = -ENOMEM; goto exit; } + env->scc_cnt = next_scc_id; exit: kvfree(stack); kvfree(pre); From 987ca60637a46882a026ca9cc9f3e5f26e8aec28 Mon Sep 17 00:00:00 2001 From: Wang Jinchao Date: Mon, 7 Jul 2025 09:26:57 +0800 Subject: [PATCH 090/279] md/raid1: change r1conf->r1bio_pool to a pointer type In raid1_reshape(), newpool is a stack variable. mempool_init() initializes newpool->wait with the stack address. After assigning newpool to conf->r1bio_pool, the wait queue need to be reinitialized, which is not ideal. Change raid1_conf->r1bio_pool to a pointer type and replace mempool_init() with mempool_create_kmalloc_pool() to avoid referencing a stack-based wait queue. Signed-off-by: Wang Jinchao Link: https://lore.kernel.org/linux-raid/20250707012711.376844-2-yukuai1@huaweicloud.com Signed-off-by: Yu Kuai --- drivers/md/raid1.c | 39 ++++++++++++++++++--------------------- drivers/md/raid1.h | 2 +- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6cee738a645f..589223fef20a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -255,7 +255,7 @@ static void free_r1bio(struct r1bio *r1_bio) struct r1conf *conf = r1_bio->mddev->private; put_all_bios(conf, r1_bio); - mempool_free(r1_bio, &conf->r1bio_pool); + mempool_free(r1_bio, conf->r1bio_pool); } static void put_buf(struct r1bio *r1_bio) @@ -1305,9 +1305,8 @@ alloc_r1bio(struct mddev *mddev, struct bio *bio) struct r1conf *conf = mddev->private; struct r1bio *r1_bio; - r1_bio = mempool_alloc(&conf->r1bio_pool, GFP_NOIO); - /* Ensure no bio records IO_BLOCKED */ - memset(r1_bio->bios, 0, conf->raid_disks * sizeof(r1_bio->bios[0])); + r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); + memset(r1_bio, 0, offsetof(struct r1bio, bios[conf->raid_disks * 2])); init_r1bio(r1_bio, mddev, bio); return r1_bio; } @@ -3085,6 +3084,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) int i; struct raid1_info *disk; struct md_rdev *rdev; + size_t r1bio_size; int err = -ENOMEM; conf = kzalloc(sizeof(struct r1conf), GFP_KERNEL); @@ -3125,9 +3125,10 @@ static struct r1conf *setup_conf(struct mddev *mddev) if (!conf->poolinfo) goto abort; conf->poolinfo->raid_disks = mddev->raid_disks * 2; - err = mempool_init(&conf->r1bio_pool, NR_RAID_BIOS, r1bio_pool_alloc, - rbio_pool_free, conf->poolinfo); - if (err) + + r1bio_size = offsetof(struct r1bio, bios[mddev->raid_disks * 2]); + conf->r1bio_pool = mempool_create_kmalloc_pool(NR_RAID_BIOS, r1bio_size); + if (!conf->r1bio_pool) goto abort; err = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0); @@ -3198,7 +3199,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) abort: if (conf) { - mempool_exit(&conf->r1bio_pool); + mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf->poolinfo); @@ -3311,7 +3312,7 @@ static void raid1_free(struct mddev *mddev, void *priv) { struct r1conf *conf = priv; - mempool_exit(&conf->r1bio_pool); + mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf->poolinfo); @@ -3367,17 +3368,14 @@ static int raid1_reshape(struct mddev *mddev) * At the same time, we "pack" the devices so that all the missing * devices have the higher raid_disk numbers. */ - mempool_t newpool, oldpool; + mempool_t *newpool, *oldpool; struct pool_info *newpoolinfo; + size_t new_r1bio_size; struct raid1_info *newmirrors; struct r1conf *conf = mddev->private; int cnt, raid_disks; unsigned long flags; int d, d2; - int ret; - - memset(&newpool, 0, sizeof(newpool)); - memset(&oldpool, 0, sizeof(oldpool)); /* Cannot change chunk_size, layout, or level */ if (mddev->chunk_sectors != mddev->new_chunk_sectors || @@ -3409,18 +3407,18 @@ static int raid1_reshape(struct mddev *mddev) newpoolinfo->mddev = mddev; newpoolinfo->raid_disks = raid_disks * 2; - ret = mempool_init(&newpool, NR_RAID_BIOS, r1bio_pool_alloc, - rbio_pool_free, newpoolinfo); - if (ret) { + new_r1bio_size = offsetof(struct r1bio, bios[raid_disks * 2]); + newpool = mempool_create_kmalloc_pool(NR_RAID_BIOS, new_r1bio_size); + if (!newpool) { kfree(newpoolinfo); - return ret; + return -ENOMEM; } newmirrors = kzalloc(array3_size(sizeof(struct raid1_info), raid_disks, 2), GFP_KERNEL); if (!newmirrors) { kfree(newpoolinfo); - mempool_exit(&newpool); + mempool_destroy(newpool); return -ENOMEM; } @@ -3429,7 +3427,6 @@ static int raid1_reshape(struct mddev *mddev) /* ok, everything is stopped */ oldpool = conf->r1bio_pool; conf->r1bio_pool = newpool; - init_waitqueue_head(&conf->r1bio_pool.wait); for (d = d2 = 0; d < conf->raid_disks; d++) { struct md_rdev *rdev = conf->mirrors[d].rdev; @@ -3461,7 +3458,7 @@ static int raid1_reshape(struct mddev *mddev) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); - mempool_exit(&oldpool); + mempool_destroy(oldpool); return 0; } diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 33f318fcc268..652c347b1a70 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -118,7 +118,7 @@ struct r1conf { * mempools - it changes when the array grows or shrinks */ struct pool_info *poolinfo; - mempool_t r1bio_pool; + mempool_t *r1bio_pool; mempool_t r1buf_pool; struct bio_set bio_split; From 178d1391c5ce0fc829f3e748058acab9bd9ca4f4 Mon Sep 17 00:00:00 2001 From: Wang Jinchao Date: Mon, 7 Jul 2025 09:26:58 +0800 Subject: [PATCH 091/279] md/raid1: remove struct pool_info and related code The struct pool_info was originally introduced mainly to support reshape operations, serving as a parameter for mempool_init() when raid_disks changes. Now that mempool_create_kmalloc_pool() is sufficient for this purpose, struct pool_info and its related code are no longer needed. Remove struct pool_info and all associated code. Signed-off-by: Wang Jinchao Link: https://lore.kernel.org/linux-raid/20250707012711.376844-3-yukuai1@huaweicloud.com Signed-off-by: Yu Kuai --- drivers/md/raid1.c | 49 +++++++++++++--------------------------------- drivers/md/raid1.h | 20 ------------------- 2 files changed, 14 insertions(+), 55 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 589223fef20a..408c26398321 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -127,10 +127,9 @@ static inline struct r1bio *get_resync_r1bio(struct bio *bio) return get_resync_pages(bio)->raid_bio; } -static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) +static void *r1bio_pool_alloc(gfp_t gfp_flags, struct r1conf *conf) { - struct pool_info *pi = data; - int size = offsetof(struct r1bio, bios[pi->raid_disks]); + int size = offsetof(struct r1bio, bios[conf->raid_disks * 2]); /* allocate a r1bio with room for raid_disks entries in the bios array */ return kzalloc(size, gfp_flags); @@ -145,18 +144,18 @@ static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) { - struct pool_info *pi = data; + struct r1conf *conf = data; struct r1bio *r1_bio; struct bio *bio; int need_pages; int j; struct resync_pages *rps; - r1_bio = r1bio_pool_alloc(gfp_flags, pi); + r1_bio = r1bio_pool_alloc(gfp_flags, conf); if (!r1_bio) return NULL; - rps = kmalloc_array(pi->raid_disks, sizeof(struct resync_pages), + rps = kmalloc_array(conf->raid_disks * 2, sizeof(struct resync_pages), gfp_flags); if (!rps) goto out_free_r1bio; @@ -164,7 +163,7 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) /* * Allocate bios : 1 for reading, n-1 for writing */ - for (j = pi->raid_disks ; j-- ; ) { + for (j = conf->raid_disks * 2; j-- ; ) { bio = bio_kmalloc(RESYNC_PAGES, gfp_flags); if (!bio) goto out_free_bio; @@ -177,11 +176,11 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) * If this is a user-requested check/repair, allocate * RESYNC_PAGES for each bio. */ - if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) - need_pages = pi->raid_disks; + if (test_bit(MD_RECOVERY_REQUESTED, &conf->mddev->recovery)) + need_pages = conf->raid_disks * 2; else need_pages = 1; - for (j = 0; j < pi->raid_disks; j++) { + for (j = 0; j < conf->raid_disks * 2; j++) { struct resync_pages *rp = &rps[j]; bio = r1_bio->bios[j]; @@ -207,7 +206,7 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) resync_free_pages(&rps[j]); out_free_bio: - while (++j < pi->raid_disks) { + while (++j < conf->raid_disks * 2) { bio_uninit(r1_bio->bios[j]); kfree(r1_bio->bios[j]); } @@ -220,12 +219,12 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) static void r1buf_pool_free(void *__r1_bio, void *data) { - struct pool_info *pi = data; + struct r1conf *conf = data; int i; struct r1bio *r1bio = __r1_bio; struct resync_pages *rp = NULL; - for (i = pi->raid_disks; i--; ) { + for (i = conf->raid_disks * 2; i--; ) { rp = get_resync_pages(r1bio->bios[i]); resync_free_pages(rp); bio_uninit(r1bio->bios[i]); @@ -2746,7 +2745,7 @@ static int init_resync(struct r1conf *conf) BUG_ON(mempool_initialized(&conf->r1buf_pool)); return mempool_init(&conf->r1buf_pool, buffs, r1buf_pool_alloc, - r1buf_pool_free, conf->poolinfo); + r1buf_pool_free, conf); } static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf) @@ -2756,7 +2755,7 @@ static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf) struct bio *bio; int i; - for (i = conf->poolinfo->raid_disks; i--; ) { + for (i = conf->raid_disks * 2; i--; ) { bio = r1bio->bios[i]; rps = bio->bi_private; bio_reset(bio, NULL, 0); @@ -3121,11 +3120,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) if (!conf->tmppage) goto abort; - conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL); - if (!conf->poolinfo) - goto abort; - conf->poolinfo->raid_disks = mddev->raid_disks * 2; - r1bio_size = offsetof(struct r1bio, bios[mddev->raid_disks * 2]); conf->r1bio_pool = mempool_create_kmalloc_pool(NR_RAID_BIOS, r1bio_size); if (!conf->r1bio_pool) @@ -3135,8 +3129,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) if (err) goto abort; - conf->poolinfo->mddev = mddev; - err = -EINVAL; spin_lock_init(&conf->device_lock); conf->raid_disks = mddev->raid_disks; @@ -3202,7 +3194,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); - kfree(conf->poolinfo); kfree(conf->nr_pending); kfree(conf->nr_waiting); kfree(conf->nr_queued); @@ -3315,7 +3306,6 @@ static void raid1_free(struct mddev *mddev, void *priv) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); - kfree(conf->poolinfo); kfree(conf->nr_pending); kfree(conf->nr_waiting); kfree(conf->nr_queued); @@ -3369,7 +3359,6 @@ static int raid1_reshape(struct mddev *mddev) * devices have the higher raid_disk numbers. */ mempool_t *newpool, *oldpool; - struct pool_info *newpoolinfo; size_t new_r1bio_size; struct raid1_info *newmirrors; struct r1conf *conf = mddev->private; @@ -3401,23 +3390,15 @@ static int raid1_reshape(struct mddev *mddev) return -EBUSY; } - newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL); - if (!newpoolinfo) - return -ENOMEM; - newpoolinfo->mddev = mddev; - newpoolinfo->raid_disks = raid_disks * 2; - new_r1bio_size = offsetof(struct r1bio, bios[raid_disks * 2]); newpool = mempool_create_kmalloc_pool(NR_RAID_BIOS, new_r1bio_size); if (!newpool) { - kfree(newpoolinfo); return -ENOMEM; } newmirrors = kzalloc(array3_size(sizeof(struct raid1_info), raid_disks, 2), GFP_KERNEL); if (!newmirrors) { - kfree(newpoolinfo); mempool_destroy(newpool); return -ENOMEM; } @@ -3443,8 +3424,6 @@ static int raid1_reshape(struct mddev *mddev) } kfree(conf->mirrors); conf->mirrors = newmirrors; - kfree(conf->poolinfo); - conf->poolinfo = newpoolinfo; spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded += (raid_disks - conf->raid_disks); diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 652c347b1a70..d236ef179cfb 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -49,22 +49,6 @@ struct raid1_info { sector_t seq_start; }; -/* - * memory pools need a pointer to the mddev, so they can force an unplug - * when memory is tight, and a count of the number of drives that the - * pool was allocated for, so they know how much to allocate and free. - * mddev->raid_disks cannot be used, as it can change while a pool is active - * These two datums are stored in a kmalloced struct. - * The 'raid_disks' here is twice the raid_disks in r1conf. - * This allows space for each 'real' device can have a replacement in the - * second half of the array. - */ - -struct pool_info { - struct mddev *mddev; - int raid_disks; -}; - struct r1conf { struct mddev *mddev; struct raid1_info *mirrors; /* twice 'raid_disks' to @@ -114,10 +98,6 @@ struct r1conf { */ int recovery_disabled; - /* poolinfo contains information about the content of the - * mempools - it changes when the array grows or shrinks - */ - struct pool_info *poolinfo; mempool_t *r1bio_pool; mempool_t r1buf_pool; From 13017b427118f4311471ee47df74872372ca8482 Mon Sep 17 00:00:00 2001 From: Yang Erkun Date: Thu, 31 Jul 2025 19:45:30 +0800 Subject: [PATCH 092/279] md: make rdev_addable usable for rcu mode Our testcase trigger panic: BUG: kernel NULL pointer dereference, address: 00000000000000e0 ... Oops: Oops: 0000 [#1] SMP NOPTI CPU: 2 UID: 0 PID: 85 Comm: kworker/2:1 Not tainted 6.16.0+ #94 PREEMPT(none) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.1-2.fc37 04/01/2014 Workqueue: md_misc md_start_sync RIP: 0010:rdev_addable+0x4d/0xf0 ... Call Trace: md_start_sync+0x329/0x480 process_one_work+0x226/0x6d0 worker_thread+0x19e/0x340 kthread+0x10f/0x250 ret_from_fork+0x14d/0x180 ret_from_fork_asm+0x1a/0x30 Modules linked in: raid10 CR2: 00000000000000e0 ---[ end trace 0000000000000000 ]--- RIP: 0010:rdev_addable+0x4d/0xf0 md_spares_need_change in md_start_sync will call rdev_addable which protected by rcu_read_lock/rcu_read_unlock. This rcu context will help protect rdev won't be released, but rdev->mddev will be set to NULL before we call synchronize_rcu in md_kick_rdev_from_array. Fix this by using READ_ONCE and check does rdev->mddev still alive. Fixes: bc08041b32ab ("md: suspend array in md_start_sync() if array need reconfiguration") Fixes: 570b9147deb6 ("md: use RCU lock to protect traversal in md_spares_need_change()") Signed-off-by: Yang Erkun Link: https://lore.kernel.org/linux-raid/20250731114530.776670-1-yangerkun@huawei.com Signed-off-by: Yu Kuai --- drivers/md/md.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9c7ed23c45ad..ac85ec73a409 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9427,6 +9427,12 @@ static bool rdev_is_spare(struct md_rdev *rdev) static bool rdev_addable(struct md_rdev *rdev) { + struct mddev *mddev; + + mddev = READ_ONCE(rdev->mddev); + if (!mddev) + return false; + /* rdev is already used, don't add it again. */ if (test_bit(Candidate, &rdev->flags) || rdev->raid_disk >= 0 || test_bit(Faulty, &rdev->flags)) @@ -9437,7 +9443,7 @@ static bool rdev_addable(struct md_rdev *rdev) return true; /* Allow to add if array is read-write. */ - if (md_is_rdwr(rdev->mddev)) + if (md_is_rdwr(mddev)) return true; /* From b644c640923b625340c603cdb8d8f456406eb4de Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 1 Aug 2025 09:18:58 +0200 Subject: [PATCH 093/279] Revert "gpio: pxa: Make irq_chip immutable" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 20117cf426b6 ("gpio: pxa: Make irq_chip immutableas") as it caused a regression on samsung coreprimevelte and we've not been able to fix it so far. Cc: stable@vger.kernel.org # v6.16 Fixes: 20117cf426b6 ("gpio: pxa: Make irq_chip immutableas") Reported-by: Duje Mihanović Closes: https://lore.kernel.org/all/3367665.aeNJFYEL58@radijator/ Tested-by: Duje Mihanović Link: https://lore.kernel.org/r/20250801071858.7554-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pxa.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index 13f7da2a9486..cbcdd416f8b9 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -499,8 +499,6 @@ static void pxa_mask_muxed_gpio(struct irq_data *d) gfer = readl_relaxed(base + GFER_OFFSET) & ~GPIO_bit(gpio); writel_relaxed(grer, base + GRER_OFFSET); writel_relaxed(gfer, base + GFER_OFFSET); - - gpiochip_disable_irq(&pchip->chip, gpio); } static int pxa_gpio_set_wake(struct irq_data *d, unsigned int on) @@ -520,21 +518,17 @@ static void pxa_unmask_muxed_gpio(struct irq_data *d) unsigned int gpio = irqd_to_hwirq(d); struct pxa_gpio_bank *c = gpio_to_pxabank(&pchip->chip, gpio); - gpiochip_enable_irq(&pchip->chip, gpio); - c->irq_mask |= GPIO_bit(gpio); update_edge_detect(c); } -static const struct irq_chip pxa_muxed_gpio_chip = { +static struct irq_chip pxa_muxed_gpio_chip = { .name = "GPIO", .irq_ack = pxa_ack_muxed_gpio, .irq_mask = pxa_mask_muxed_gpio, .irq_unmask = pxa_unmask_muxed_gpio, .irq_set_type = pxa_gpio_irq_type, .irq_set_wake = pxa_gpio_set_wake, - .flags = IRQCHIP_IMMUTABLE, - GPIOCHIP_IRQ_RESOURCE_HELPERS, }; static int pxa_gpio_nums(struct platform_device *pdev) From 63c7bc53a35e785accdc2ceab8f72d94501931ab Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 28 Jul 2025 10:46:19 -0400 Subject: [PATCH 094/279] gpio: mlxbf2: use platform_get_irq_optional() The gpio-mlxbf2 driver interfaces with four GPIO controllers, device instances 0-3. There are two IRQ resources shared between the four controllers, and they are found in the ACPI table for instances 0 and 3. The driver should not use platform_get_irq(), otherwise this error is logged when probing instances 1 and 2: mlxbf2_gpio MLNXBF22:01: error -ENXIO: IRQ index 0 not found Fixes: 2b725265cb08 ("gpio: mlxbf2: Introduce IRQ support") Cc: stable@vger.kernel.org Signed-off-by: David Thompson Reviewed-by: Shravan Kumar Ramani Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/r/20250728144619.29894-1-davthompson@nvidia.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mlxbf2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c index 6f3dda6b635f..390f2e74a9d8 100644 --- a/drivers/gpio/gpio-mlxbf2.c +++ b/drivers/gpio/gpio-mlxbf2.c @@ -397,7 +397,7 @@ mlxbf2_gpio_probe(struct platform_device *pdev) gc->ngpio = npins; gc->owner = THIS_MODULE; - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq >= 0) { girq = &gs->gc.irq; gpio_irq_chip_set_chip(girq, &mlxbf2_gpio_irq_chip); From 533210f23936a482010016ac3f57995046c58565 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 31 Jul 2025 16:10:38 +0800 Subject: [PATCH 095/279] nfs/localio: use read_seqbegin() rather than read_seqbegin_or_lock() The usage of read_seqbegin_or_lock() in nfs_copy_boot_verifier() is wrong. "seq" is always even and thus "or_lock" has no effect. nfs_copy_boot_verifier() just copies 8 bytes and is supposed to be very rare operation, so we do not need the adaptive locking in this case. Signed-off-by: Li RongQing Link: https://lore.kernel.org/r/20250731081038.3478-1-lirongqing@baidu.com Signed-off-by: Trond Myklebust --- fs/nfs/localio.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 510d0a16cfe9..bd5fca285899 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -500,14 +500,13 @@ nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode) { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; u32 *verf = (u32 *)verifier->data; - int seq = 0; + unsigned int seq; do { - read_seqbegin_or_lock(&clp->cl_boot_lock, &seq); + seq = read_seqbegin(&clp->cl_boot_lock); verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec; verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec; - } while (need_seqretry(&clp->cl_boot_lock, seq)); - done_seqretry(&clp->cl_boot_lock, seq); + } while (read_seqretry(&clp->cl_boot_lock, seq)); } static void From cc5d59081fa26506d02de2127ab822f40d88bc5a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 31 Jul 2025 14:00:56 -0400 Subject: [PATCH 096/279] sunrpc: fix client side handling of tls alerts A security exploit was discovered in NFS over TLS in tls_alert_recv due to its assumption that there is valid data in the msghdr's iterator's kvec. Instead, this patch proposes the rework how control messages are setup and used by sock_recvmsg(). If no control message structure is setup, kTLS layer will read and process TLS data record types. As soon as it encounters a TLS control message, it would return an error. At that point, NFS can setup a kvec backed control buffer and read in the control message such as a TLS alert. Scott found that a msg iterator can advance the kvec pointer as a part of the copy process thus we need to revert the iterator before calling into the tls_alert_recv. Fixes: dea034b963c8 ("SUNRPC: Capture CMSG metadata on client-side receive") Suggested-by: Trond Myklebust Suggested-by: Scott Mayhew Signed-off-by: Olga Kornievskaia Link: https://lore.kernel.org/r/20250731180058.4669-3-okorniev@redhat.com Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 04ff66758fc3..c5f7bbf5775f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -358,7 +358,7 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) static int xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, - struct cmsghdr *cmsg, int ret) + unsigned int *msg_flags, struct cmsghdr *cmsg, int ret) { u8 content_type = tls_get_record_type(sock->sk, cmsg); u8 level, description; @@ -371,7 +371,7 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, * record, even though there might be more frames * waiting to be decrypted. */ - msg->msg_flags &= ~MSG_EOR; + *msg_flags &= ~MSG_EOR; break; case TLS_RECORD_TYPE_ALERT: tls_alert_recv(sock->sk, msg, &level, &description); @@ -386,19 +386,33 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, } static int -xs_sock_recv_cmsg(struct socket *sock, struct msghdr *msg, int flags) +xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags) { union { struct cmsghdr cmsg; u8 buf[CMSG_SPACE(sizeof(u8))]; } u; + u8 alert[2]; + struct kvec alert_kvec = { + .iov_base = alert, + .iov_len = sizeof(alert), + }; + struct msghdr msg = { + .msg_flags = *msg_flags, + .msg_control = &u, + .msg_controllen = sizeof(u), + }; int ret; - msg->msg_control = &u; - msg->msg_controllen = sizeof(u); - ret = sock_recvmsg(sock, msg, flags); - if (msg->msg_controllen != sizeof(u)) - ret = xs_sock_process_cmsg(sock, msg, &u.cmsg, ret); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, flags); + if (ret > 0 && + tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { + iov_iter_revert(&msg.msg_iter, ret); + ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg, + -EAGAIN); + } return ret; } @@ -408,7 +422,13 @@ xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek) ssize_t ret; if (seek != 0) iov_iter_advance(&msg->msg_iter, seek); - ret = xs_sock_recv_cmsg(sock, msg, flags); + ret = sock_recvmsg(sock, msg, flags); + /* Handle TLS inband control message lazily */ + if (msg->msg_flags & MSG_CTRUNC) { + msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR); + if (ret == 0 || ret == -EIO) + ret = xs_sock_recv_cmsg(sock, &msg->msg_flags, flags); + } return ret > 0 ? ret + seek : ret; } @@ -434,7 +454,7 @@ xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, size_t count) { iov_iter_discard(&msg->msg_iter, ITER_DEST, count); - return xs_sock_recv_cmsg(sock, msg, flags); + return xs_sock_recvmsg(sock, msg, flags, 0); } #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE From 407728da41cd6450cec6a4277027015a75744d56 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 2 Aug 2025 09:25:59 +0200 Subject: [PATCH 097/279] block, bfq: Reorder struct bfq_iocq_bfqq_data The size of struct bfq_iocq_bfqq_data can be reduced by moving a few fields around. On a x86_64, with allmodconfig, this shrinks the size from 144 to 128 bytes. The main benefit is to reduce the size of struct bfq_io_cq from 1360 to 1232. This structure is stored in a dedicated slab cache. So reducing its size improves cache usage. Signed-off-by: Christophe JAILLET Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/79394db1befaa658e8066b8e3348073ce27d9d26.1754119538.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jens Axboe --- block/bfq-iosched.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 687a3a7ba784..0b4704932d72 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -427,9 +427,6 @@ struct bfq_iocq_bfqq_data { */ bool saved_IO_bound; - u64 saved_io_start_time; - u64 saved_tot_idle_time; - /* * Same purpose as the previous fields for the values of the * field keeping the queue's belonging to a large burst @@ -450,6 +447,9 @@ struct bfq_iocq_bfqq_data { */ unsigned int saved_weight; + u64 saved_io_start_time; + u64 saved_tot_idle_time; + /* * Similar to previous fields: save wr information. */ @@ -457,13 +457,13 @@ struct bfq_iocq_bfqq_data { unsigned long saved_last_wr_start_finish; unsigned long saved_service_from_wr; unsigned long saved_wr_start_at_switch_to_srt; - unsigned int saved_wr_cur_max_time; struct bfq_ttime saved_ttime; + unsigned int saved_wr_cur_max_time; /* Save also injection state */ - u64 saved_last_serv_time_ns; unsigned int saved_inject_limit; unsigned long saved_decrease_time_jif; + u64 saved_last_serv_time_ns; /* candidate queue for a stable merge (due to close creation time) */ struct bfq_queue *stable_merge_bfqq; From b01f21cacde9f2878492cf318fee61bf4ccad323 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Aug 2025 14:31:59 -0700 Subject: [PATCH 098/279] NFS: Fix the setting of capabilities when automounting a new filesystem Capabilities cannot be inherited when we cross into a new filesystem. They need to be reset to the minimal defaults, and then probed for again. Fixes: 54ceac451598 ("NFS: Share NFS superblocks per-protocol per-server per-FSID") Cc: stable@vger.kernel.org Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- fs/nfs/internal.h | 2 +- fs/nfs/nfs4client.c | 20 +------------------- fs/nfs/nfs4proc.c | 2 +- 4 files changed, 45 insertions(+), 23 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e13eb429b8b5..8fb4a950dd55 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -682,6 +682,44 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp, } EXPORT_SYMBOL_GPL(nfs_init_client); +static void nfs4_server_set_init_caps(struct nfs_server *server) +{ +#if IS_ENABLED(CONFIG_NFS_V4) + /* Set the basic capabilities */ + server->caps = server->nfs_client->cl_mvops->init_caps; + if (server->flags & NFS_MOUNT_NORDIRPLUS) + server->caps &= ~NFS_CAP_READDIRPLUS; + if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) + server->caps &= ~NFS_CAP_READ_PLUS; + + /* + * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower + * authentication. + */ + if (nfs4_disable_idmapping && + server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) + server->caps |= NFS_CAP_UIDGID_NOMAP; +#endif +} + +void nfs_server_set_init_caps(struct nfs_server *server) +{ + switch (server->nfs_client->rpc_ops->version) { + case 2: + server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; + break; + case 3: + server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; + if (!(server->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; + break; + default: + nfs4_server_set_init_caps(server); + break; + } +} +EXPORT_SYMBOL_GPL(nfs_server_set_init_caps); + /* * Create a version 2 or 3 client */ @@ -726,7 +764,6 @@ static int nfs_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = ctx->flags; server->options = ctx->options; - server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; switch (clp->rpc_ops->version) { case 2: @@ -762,6 +799,8 @@ static int nfs_init_server(struct nfs_server *server, if (error < 0) goto error; + nfs_server_set_init_caps(server); + /* Preserve the values of mount_server-related mount options */ if (ctx->mount_server.addrlen) { memcpy(&server->mountd_address, &ctx->mount_server.address, @@ -934,7 +973,6 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->acregmax = source->acregmax; target->acdirmin = source->acdirmin; target->acdirmax = source->acdirmax; - target->caps = source->caps; target->options = source->options; target->auth_info = source->auth_info; target->port = source->port; @@ -1169,6 +1207,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, if (error < 0) goto out_free_server; + nfs_server_set_init_caps(server); + /* probe the filesystem info for this server filesystem */ error = nfs_probe_server(server, fh); if (error < 0) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 0143e0794d32..1a18d8d9be25 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -231,7 +231,7 @@ extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *, u32); extern struct nfs_server *nfs_create_server(struct fs_context *); -extern void nfs4_server_set_init_caps(struct nfs_server *); +extern void nfs_server_set_init_caps(struct nfs_server *); extern struct nfs_server *nfs4_create_server(struct fs_context *); extern struct nfs_server *nfs4_create_referral_server(struct fs_context *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 2ea98f1f116f..6fddf43d729c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1074,24 +1074,6 @@ static void nfs4_session_limit_xasize(struct nfs_server *server) #endif } -void nfs4_server_set_init_caps(struct nfs_server *server) -{ - /* Set the basic capabilities */ - server->caps |= server->nfs_client->cl_mvops->init_caps; - if (server->flags & NFS_MOUNT_NORDIRPLUS) - server->caps &= ~NFS_CAP_READDIRPLUS; - if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) - server->caps &= ~NFS_CAP_READ_PLUS; - - /* - * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower - * authentication. - */ - if (nfs4_disable_idmapping && - server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) - server->caps |= NFS_CAP_UIDGID_NOMAP; -} - static int nfs4_server_common_setup(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe) { @@ -1110,7 +1092,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, if (error < 0) return error; - nfs4_server_set_init_caps(server); + nfs_server_set_init_caps(server); /* Probe the root fh to retrieve its FSID and filehandle */ error = nfs4_get_rootfh(server, mntfh, auth_probe); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d7dc669d84c5..c7c7ec22f21d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4092,7 +4092,7 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) }; int err; - nfs4_server_set_init_caps(server); + nfs_server_set_init_caps(server); do { err = nfs4_handle_exception(server, _nfs4_server_capabilities(server, fhandle), From b9defd611abf3d24354e14c8d85da14c7abaa07e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 3 Aug 2025 16:32:00 -0700 Subject: [PATCH 099/279] NFSv4: Remove duplicate lookups, capability probes and fsinfo calls When crossing into a new filesystem, the NFSv4 client will look up the new directory, and then call nfs4_server_capabilities() as well as nfs4_do_fsinfo() at least twice. This patch removes the duplicate calls, and reduces the initial lookup to retrieve just a minimal set of attributes. Reviewed-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 5 ++- fs/nfs/nfs4getroot.c | 14 +++---- fs/nfs/nfs4proc.c | 87 ++++++++++++++++++++------------------------ 3 files changed, 48 insertions(+), 58 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d3ca91f60fc1..c34c89af9c7d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -63,7 +63,7 @@ struct nfs4_minor_version_ops { bool (*match_stateid)(const nfs4_stateid *, const nfs4_stateid *); int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, - struct nfs_fsinfo *); + struct nfs_fattr *); void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); int (*test_and_free_expired)(struct nfs_server *, @@ -296,7 +296,8 @@ extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int, int); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, const struct cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, const struct cred *); -extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); +extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, + struct nfs_fattr *, bool); extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, const struct cred *cred); extern int nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred); extern int nfs4_destroy_clientid(struct nfs_client *clp); diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c index 1a69479a3a59..e67ea345de69 100644 --- a/fs/nfs/nfs4getroot.c +++ b/fs/nfs/nfs4getroot.c @@ -12,30 +12,28 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe) { - struct nfs_fsinfo fsinfo; + struct nfs_fattr *fattr = nfs_alloc_fattr(); int ret = -ENOMEM; - fsinfo.fattr = nfs_alloc_fattr(); - if (fsinfo.fattr == NULL) + if (fattr == NULL) goto out; /* Start by getting the root filehandle from the server */ - ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo, auth_probe); + ret = nfs4_proc_get_rootfh(server, mntfh, fattr, auth_probe); if (ret < 0) { dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); goto out; } - if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE) - || !S_ISDIR(fsinfo.fattr->mode)) { + if (!(fattr->valid & NFS_ATTR_FATTR_TYPE) || !S_ISDIR(fattr->mode)) { printk(KERN_ERR "nfs4_get_rootfh:" " getroot encountered non-directory\n"); ret = -ENOTDIR; goto out; } - memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); + memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); out: - nfs_free_fattr(fsinfo.fattr); + nfs_free_fattr(fattr); return ret; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c7c7ec22f21d..7d2b67e06cc3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4240,15 +4240,18 @@ static int nfs4_discover_trunking(struct nfs_server *server, } static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fattr *fattr) { - u32 bitmask[3]; + u32 bitmask[3] = { + [0] = FATTR4_WORD0_TYPE | FATTR4_WORD0_CHANGE | + FATTR4_WORD0_SIZE | FATTR4_WORD0_FSID, + }; struct nfs4_lookup_root_arg args = { .bitmask = bitmask, }; struct nfs4_lookup_res res = { .server = server, - .fattr = info->fattr, + .fattr = fattr, .fh = fhandle, }; struct rpc_message msg = { @@ -4257,27 +4260,20 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = &res, }; - bitmask[0] = nfs4_fattr_bitmap[0]; - bitmask[1] = nfs4_fattr_bitmap[1]; - /* - * Process the label in the upcoming getfattr - */ - bitmask[2] = nfs4_fattr_bitmap[2] & ~FATTR4_WORD2_SECURITY_LABEL; - - nfs_fattr_init(info->fattr); + nfs_fattr_init(fattr); return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); } static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fattr *fattr) { struct nfs4_exception exception = { .interruptible = true, }; int err; do { - err = _nfs4_lookup_root(server, fhandle, info); - trace_nfs4_lookup_root(server, fhandle, info->fattr, err); + err = _nfs4_lookup_root(server, fhandle, fattr); + trace_nfs4_lookup_root(server, fhandle, fattr, err); switch (err) { case 0: case -NFS4ERR_WRONGSEC: @@ -4290,8 +4286,9 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, rpc_authflavor_t flavor) +static int nfs4_lookup_root_sec(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs_fattr *fattr, + rpc_authflavor_t flavor) { struct rpc_auth_create_args auth_args = { .pseudoflavor = flavor, @@ -4301,7 +4298,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl auth = rpcauth_create(&auth_args, server->client); if (IS_ERR(auth)) return -EACCES; - return nfs4_lookup_root(server, fhandle, info); + return nfs4_lookup_root(server, fhandle, fattr); } /* @@ -4314,7 +4311,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl * negative errno value. */ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fattr *fattr) { /* Per 3530bis 15.33.5 */ static const rpc_authflavor_t flav_array[] = { @@ -4330,8 +4327,9 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, if (server->auth_info.flavor_len > 0) { /* try each flavor specified by user */ for (i = 0; i < server->auth_info.flavor_len; i++) { - status = nfs4_lookup_root_sec(server, fhandle, info, - server->auth_info.flavors[i]); + status = nfs4_lookup_root_sec( + server, fhandle, fattr, + server->auth_info.flavors[i]); if (status == -NFS4ERR_WRONGSEC || status == -EACCES) continue; break; @@ -4339,7 +4337,7 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, } else { /* no flavors specified by user, try default list */ for (i = 0; i < ARRAY_SIZE(flav_array); i++) { - status = nfs4_lookup_root_sec(server, fhandle, info, + status = nfs4_lookup_root_sec(server, fhandle, fattr, flav_array[i]); if (status == -NFS4ERR_WRONGSEC || status == -EACCES) continue; @@ -4363,28 +4361,22 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, * nfs4_proc_get_rootfh - get file handle for server's pseudoroot * @server: initialized nfs_server handle * @fhandle: we fill in the pseudo-fs root file handle - * @info: we fill in an FSINFO struct + * @fattr: we fill in a bare bones struct fattr * @auth_probe: probe the auth flavours * * Returns zero on success, or a negative errno. */ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, - bool auth_probe) + struct nfs_fattr *fattr, bool auth_probe) { int status = 0; if (!auth_probe) - status = nfs4_lookup_root(server, fhandle, info); + status = nfs4_lookup_root(server, fhandle, fattr); if (auth_probe || status == NFS4ERR_WRONGSEC) - status = server->nfs_client->cl_mvops->find_root_sec(server, - fhandle, info); - - if (status == 0) - status = nfs4_server_capabilities(server, fhandle); - if (status == 0) - status = nfs4_do_fsinfo(server, fhandle, info); + status = server->nfs_client->cl_mvops->find_root_sec( + server, fhandle, fattr); return nfs4_map_errors(status); } @@ -10351,10 +10343,10 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) * Use the state managment nfs_client cl_rpcclient, which uses krb5i (if * possible) as per RFC3530bis and RFC5661 Security Considerations sections */ -static int -_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, - struct nfs4_secinfo_flavors *flavors, bool use_integrity) +static int _nfs41_proc_secinfo_no_name(struct nfs_server *server, + struct nfs_fh *fhandle, + struct nfs4_secinfo_flavors *flavors, + bool use_integrity) { struct nfs41_secinfo_no_name_args args = { .style = SECINFO_STYLE_CURRENT_FH, @@ -10398,9 +10390,9 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -static int -nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) +static int nfs41_proc_secinfo_no_name(struct nfs_server *server, + struct nfs_fh *fhandle, + struct nfs4_secinfo_flavors *flavors) { struct nfs4_exception exception = { .interruptible = true, @@ -10412,7 +10404,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, /* try to use integrity protection with machine cred */ if (_nfs4_is_integrity_protected(server->nfs_client)) - err = _nfs41_proc_secinfo_no_name(server, fhandle, info, + err = _nfs41_proc_secinfo_no_name(server, fhandle, flavors, true); /* @@ -10422,7 +10414,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, * the current filesystem's rpc_client and the user cred. */ if (err == -NFS4ERR_WRONGSEC) - err = _nfs41_proc_secinfo_no_name(server, fhandle, info, + err = _nfs41_proc_secinfo_no_name(server, fhandle, flavors, false); switch (err) { @@ -10438,9 +10430,8 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -static int -nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) +static int nfs41_find_root_sec(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { int err; struct page *page; @@ -10456,14 +10447,14 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, } flavors = page_address(page); - err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + err = nfs41_proc_secinfo_no_name(server, fhandle, flavors); /* * Fall back on "guess and check" method if * the server doesn't support SECINFO_NO_NAME */ if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) { - err = nfs4_find_root_sec(server, fhandle, info); + err = nfs4_find_root_sec(server, fhandle, fattr); goto out_freepage; } if (err) @@ -10488,8 +10479,8 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, flavor = RPC_AUTH_MAXFLAVOR; if (flavor != RPC_AUTH_MAXFLAVOR) { - err = nfs4_lookup_root_sec(server, fhandle, - info, flavor); + err = nfs4_lookup_root_sec(server, fhandle, fattr, + flavor); if (!err) break; } From dbe05428c4e54068a86e7e02405f3b30b1d2b3dd Mon Sep 17 00:00:00 2001 From: Thomas Croft Date: Mon, 4 Aug 2025 09:12:07 -0600 Subject: [PATCH 100/279] ALSA: hda/realtek: add LG gram 16Z90R-A to alc269 fixup table Several months ago, Joshua Grisham submitted a patch [1] for several ALC298 based sound cards. The entry for the LG gram 16 in the alc269_fixup_tbl only matches the Subsystem ID for the 16Z90R-Q and 16Z90R-K models [2]. My 16Z90R-A has a different Subsystem ID [3]. I'm not sure why these IDs differ, but I speculate it's due to the NVIDIA GPU included in the 16Z90R-A model that isn't present in the other models. I applied the patch to the latest Arch Linux kernel and the card was initialized as expected. [1]: https://lore.kernel.org/linux-sound/20240909193000.838815-1-josh@joshuagrisham.com/ [2]: https://linux-hardware.org/?id=pci:8086-51ca-1854-0488 [3]: https://linux-hardware.org/?id=pci:8086-51ca-1854-0489 Signed-off-by: Thomas Croft Link: https://patch.msgid.link/20250804151457.134761-2-thomasmcft@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 2554b42eeb0f..e27a36e4e92a 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7110,6 +7110,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1854, 0x0440, "LG CQ6", ALC256_FIXUP_HEADPHONE_AMP_VOL), SND_PCI_QUIRK(0x1854, 0x0441, "LG CQ6 AIO", ALC256_FIXUP_HEADPHONE_AMP_VOL), SND_PCI_QUIRK(0x1854, 0x0488, "LG gram 16 (16Z90R)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS), + SND_PCI_QUIRK(0x1854, 0x0489, "LG gram 16 (16Z90R-A)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS), SND_PCI_QUIRK(0x1854, 0x048a, "LG gram 17 (17ZD90R)", ALC298_FIXUP_SAMSUNG_AMP_V2_4_AMPS), SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS), SND_PCI_QUIRK(0x19e5, 0x320f, "Huawei WRT-WX9 ", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), From ce0b5eedcb753697d43f61dd2e27d68eb5d3150f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Jul 2025 12:49:30 +0200 Subject: [PATCH 101/279] x86/irq: Plug vector setup race Hogan reported a vector setup race, which overwrites the interrupt descriptor in the per CPU vector array resulting in a disfunctional device. CPU0 CPU1 interrupt is raised in APIC IRR but not handled free_irq() per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; request_irq() common_interrupt() d = this_cpu_read(vector_irq[vector]); per_cpu(vector_irq, CPU1)[vector] = desc; if (d == VECTOR_SHUTDOWN) this_cpu_write(vector_irq[vector], VECTOR_UNUSED); free_irq() cannot observe the pending vector in the CPU1 APIC as there is no way to query the remote CPUs APIC IRR. This requires that request_irq() uses the same vector/CPU as the one which was freed, but this also can be triggered by a spurious interrupt. Interestingly enough this problem managed to be hidden for more than a decade. Prevent this by reevaluating vector_irq under the vector lock, which is held by the interrupt activation code when vector_irq is updated. To avoid ifdeffery or IS_ENABLED() nonsense, move the [un]lock_vector_lock() declarations out under the CONFIG_IRQ_DOMAIN_HIERARCHY guard as it's only provided when CONFIG_X86_LOCAL_APIC=y. The current CONFIG_IRQ_DOMAIN_HIERARCHY guard is selected by CONFIG_X86_LOCAL_APIC, but can also be selected by other parts of the Kconfig system, which makes 32-bit UP builds with CONFIG_X86_LOCAL_APIC=n fail. Can we just get rid of this !APIC nonsense once and forever? Fixes: 9345005f4eed ("x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered irqs") Reported-by: Hogan Wang Signed-off-by: Thomas Gleixner Tested-by: Hogan Wang Link: https://lore.kernel.org/all/draft-87ikjhrhhh.ffs@tglx --- arch/x86/include/asm/hw_irq.h | 12 ++++--- arch/x86/kernel/irq.c | 63 ++++++++++++++++++++++++++--------- 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 162ebd73a698..cbe19e669080 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -92,8 +92,6 @@ struct irq_cfg { extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); -extern void lock_vector_lock(void); -extern void unlock_vector_lock(void); #ifdef CONFIG_SMP extern void vector_schedule_cleanup(struct irq_cfg *); extern void irq_complete_move(struct irq_cfg *cfg); @@ -101,12 +99,16 @@ extern void irq_complete_move(struct irq_cfg *cfg); static inline void vector_schedule_cleanup(struct irq_cfg *c) { } static inline void irq_complete_move(struct irq_cfg *c) { } #endif - extern void apic_ack_edge(struct irq_data *data); -#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ +#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ + +#ifdef CONFIG_X86_LOCAL_APIC +extern void lock_vector_lock(void); +extern void unlock_vector_lock(void); +#else static inline void lock_vector_lock(void) {} static inline void unlock_vector_lock(void) {} -#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ +#endif /* Statistics */ extern atomic_t irq_err_count; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 9ed29ff10e59..10721a125226 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -256,26 +256,59 @@ static __always_inline void handle_irq(struct irq_desc *desc, __handle_irq(desc, regs); } -static __always_inline int call_irq_handler(int vector, struct pt_regs *regs) +static struct irq_desc *reevaluate_vector(int vector) { - struct irq_desc *desc; - int ret = 0; + struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); + + if (!IS_ERR_OR_NULL(desc)) + return desc; + + if (desc == VECTOR_UNUSED) + pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector); + else + __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + return NULL; +} + +static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs) +{ + struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); - desc = __this_cpu_read(vector_irq[vector]); if (likely(!IS_ERR_OR_NULL(desc))) { handle_irq(desc, regs); - } else { - ret = -EINVAL; - if (desc == VECTOR_UNUSED) { - pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n", - __func__, smp_processor_id(), - vector); - } else { - __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); - } + return true; } - return ret; + /* + * Reevaluate with vector_lock held to prevent a race against + * request_irq() setting up the vector: + * + * CPU0 CPU1 + * interrupt is raised in APIC IRR + * but not handled + * free_irq() + * per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; + * + * request_irq() common_interrupt() + * d = this_cpu_read(vector_irq[vector]); + * + * per_cpu(vector_irq, CPU1)[vector] = desc; + * + * if (d == VECTOR_SHUTDOWN) + * this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + * + * This requires that the same vector on the same target CPU is + * handed out or that a spurious interrupt hits that CPU/vector. + */ + lock_vector_lock(); + desc = reevaluate_vector(vector); + unlock_vector_lock(); + + if (!desc) + return false; + + handle_irq(desc, regs); + return true; } /* @@ -289,7 +322,7 @@ DEFINE_IDTENTRY_IRQ(common_interrupt) /* entry code tells RCU that we're not quiescent. Check it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); - if (unlikely(call_irq_handler(vector, regs))) + if (unlikely(!call_irq_handler(vector, regs))) apic_eoi(); set_irq_regs(old_regs); From 6b445309eec2bc0594f3e24c7777aeef891d386e Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 31 Jul 2025 20:46:42 -0300 Subject: [PATCH 102/279] smb: client: default to nonativesocket under POSIX mounts SMB3.1.1 POSIX mounts require sockets to be created with NFS reparse points. Cc: linux-cifs@vger.kernel.org Cc: Ralph Boehme Cc: David Howells Cc: Reported-by: Matthew Richardson Closes: https://marc.info/?i=1124e7cd-6a46-40a6-9f44-b7664a66654b@ed.ac.uk Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index cc8bd79ebca9..072383899e81 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1652,6 +1652,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, pr_warn_once("conflicting posix mount options specified\n"); ctx->linux_ext = 1; ctx->no_linux_ext = 0; + ctx->nonativesocket = 1; /* POSIX mounts use NFS style reparse points */ } break; case Opt_nocase: From 5b432ae5dff5eb2e6acd55473309fdd5c16ff779 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 31 Jul 2025 20:46:43 -0300 Subject: [PATCH 103/279] smb: client: fix creating symlinks under POSIX mounts SMB3.1.1 POSIX mounts support native symlinks that are created with IO_REPARSE_TAG_SYMLINK reparse points, so skip the checking of FILE_SUPPORTS_REPARSE_POINTS as some servers might not have it set. Cc: linux-cifs@vger.kernel.org Cc: Ralph Boehme Cc: David Howells Cc: Reported-by: Matthew Richardson Closes: https://marc.info/?i=1124e7cd-6a46-40a6-9f44-b7664a66654b@ed.ac.uk Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 5 +++++ fs/smb/client/cifssmb.c | 4 ++-- fs/smb/client/link.c | 2 +- fs/smb/client/smb1ops.c | 2 +- fs/smb/client/smb2inode.c | 5 ++--- fs/smb/client/smb2ops.c | 5 ++--- 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 19dd901fe8ab..a97e2cca2f53 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -2377,4 +2377,9 @@ static inline bool cifs_netbios_name(const char *name, size_t namelen) return ret; } +#define CIFS_REPARSE_SUPPORT(tcon) \ + ((tcon)->posix_extensions || \ + (le32_to_cpu((tcon)->fsAttrInfo.Attributes) & \ + FILE_SUPPORTS_REPARSE_POINTS)) + #endif /* _CIFS_GLOB_H */ diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 6c890db06593..d20766f664c4 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -2751,7 +2751,7 @@ int cifs_query_reparse_point(const unsigned int xid, if (cap_unix(tcon->ses)) return -EOPNOTSUPP; - if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS)) + if (!CIFS_REPARSE_SUPPORT(tcon)) return -EOPNOTSUPP; oparms = (struct cifs_open_parms) { @@ -2879,7 +2879,7 @@ struct inode *cifs_create_reparse_inode(struct cifs_open_info_data *data, * attempt to create reparse point. This will prevent creating unusable * empty object on the server. */ - if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS)) + if (!CIFS_REPARSE_SUPPORT(tcon)) return ERR_PTR(-EOPNOTSUPP); #ifndef CONFIG_CIFS_XATTR diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index afe76367d2c8..fe80e711cd75 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -635,7 +635,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, case CIFS_SYMLINK_TYPE_NATIVE: case CIFS_SYMLINK_TYPE_NFS: case CIFS_SYMLINK_TYPE_WSL: - if (le32_to_cpu(pTcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) { + if (CIFS_REPARSE_SUPPORT(pTcon)) { rc = create_reparse_symlink(xid, inode, direntry, pTcon, full_path, symname); goto symlink_exit; diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index e364b6515af3..f722c7f47b07 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -1272,7 +1272,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, */ return cifs_sfu_make_node(xid, inode, dentry, tcon, full_path, mode, dev); - } else if (le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) { + } else if (CIFS_REPARSE_SUPPORT(tcon)) { /* * mknod via reparse points requires server support for * storing reparse points, which is available since diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 69d251726c02..2a0316c514e4 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1346,9 +1346,8 @@ struct inode *smb2_create_reparse_inode(struct cifs_open_info_data *data, * attempt to create reparse point. This will prevent creating unusable * empty object on the server. */ - if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS)) - if (!tcon->posix_extensions) - return ERR_PTR(-EOPNOTSUPP); + if (!CIFS_REPARSE_SUPPORT(tcon)) + return ERR_PTR(-EOPNOTSUPP); oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, SYNCHRONIZE | DELETE | diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 1b4a31894f43..bd6c1fb2a992 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5260,10 +5260,9 @@ static int smb2_make_node(unsigned int xid, struct inode *inode, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { rc = cifs_sfu_make_node(xid, inode, dentry, tcon, full_path, mode, dev); - } else if ((le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) - || (tcon->posix_extensions)) { + } else if (CIFS_REPARSE_SUPPORT(tcon)) { rc = mknod_reparse(xid, inode, dentry, tcon, - full_path, mode, dev); + full_path, mode, dev); } return rc; } From cb9f6a40382ca7b7a81d6f52285f897b09b5851b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 2 Aug 2025 12:59:13 +0200 Subject: [PATCH 104/279] irqchip/riscv-imsic: Don't dereference before NULL pointer check smatch warns about a dereference before check: drivers/irqchip/irq-riscv-imsic-platform.c:317 imsic_irqdomain_init() warn: variable dereferenced before check 'imsic' (see line 311) Cure it by moving the firmware not assignement after the checks. Fixes: 59422904dd98 ("irqchip/riscv-imsic: Convert to msi_create_parent_irq_domain() helper") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Thomas Gleixner Closes: https://lore.kernel.org/r/202507311953.NFVZkr0a-lkp@intel.com/ --- drivers/irqchip/irq-riscv-imsic-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- drivers/irqchip/irq-riscv-imsic-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c index 74a2a28f9403..643c8e459611 100644 --- a/drivers/irqchip/irq-riscv-imsic-platform.c +++ b/drivers/irqchip/irq-riscv-imsic-platform.c @@ -308,7 +308,6 @@ static const struct msi_parent_ops imsic_msi_parent_ops = { int imsic_irqdomain_init(void) { struct irq_domain_info info = { - .fwnode = imsic->fwnode, .ops = &imsic_base_domain_ops, .host_data = imsic, }; @@ -325,6 +324,7 @@ int imsic_irqdomain_init(void) } /* Create Base IRQ domain */ + info.fwnode = imsic->fwnode, imsic->base_domain = msi_create_parent_irq_domain(&info, &imsic_msi_parent_ops); if (!imsic->base_domain) { pr_err("%pfwP: failed to create IMSIC base domain\n", imsic->fwnode); From 02cbf8e0692bd30717b35a3ff5e46460d1d5d471 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 4 Aug 2025 16:55:53 +0200 Subject: [PATCH 105/279] irqchip/msi-lib: Fix fwnode refcount in msi_lib_irq_domain_select() Commit 8b65db1e93a2 ("irqchip/msi-lib: Add IRQ_DOMAIN_FLAG_FWNODE_PARENT handling") added logic in msi_lib_irq_domain_select() to match the domain fwnode against the fwnode parent of the fwspec.fwnode. The fwnode_get_parent() caller must call fwnode_handle_put() on the returned pointer value, lest fwnode refcounting for the parent ends up being out of kilter. Fix this by relying on the fwnode_handle clean-up handlers and by incrementing the fwnode refcount regardless of whether parent matching is used or not (the domain selection code already holds a reference before calling msi_lib_irq_domain_select() but to make the exit path more uniform if IRQ_DOMAIN_FLAG_FWNODE_PARENT is not set fwnode_handle_get() is called again on fwspec.fwnode so that the clean-up code is the same for the two matching patterns). Fixes: 8b65db1e93a2 ("irqchip/msi-lib: Add IRQ_DOMAIN_FLAG_FWNODE_PARENT handling") Signed-off-by: Lorenzo Pieralisi Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250804145553.795065-1-lpieralisi@kernel.org --- drivers/irqchip/irq-msi-lib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-msi-lib.c b/drivers/irqchip/irq-msi-lib.c index 454c7f16dd4d..908944009c21 100644 --- a/drivers/irqchip/irq-msi-lib.c +++ b/drivers/irqchip/irq-msi-lib.c @@ -133,13 +133,13 @@ int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, { const struct msi_parent_ops *ops = d->msi_parent_ops; u32 busmask = BIT(bus_token); - struct fwnode_handle *fwh; if (!ops) return 0; - fwh = d->flags & IRQ_DOMAIN_FLAG_FWNODE_PARENT ? fwnode_get_parent(fwspec->fwnode) - : fwspec->fwnode; + struct fwnode_handle *fwh __free(fwnode_handle) = + d->flags & IRQ_DOMAIN_FLAG_FWNODE_PARENT ? fwnode_get_parent(fwspec->fwnode) + : fwnode_handle_get(fwspec->fwnode); if (fwh != d->fwnode || fwspec->param_count != 0) return 0; From 3c3d7dbab2c70a4bca47634d564bf659351c05ca Mon Sep 17 00:00:00 2001 From: Elad Nachman Date: Sun, 3 Aug 2025 13:25:48 +0300 Subject: [PATCH 106/279] irqchip/mvebu-gicp: Clear pending interrupts on init When a kexec'ed kernel boots up, there might be stale unhandled interrupts pending in the interrupt controller. These are delivered as spurious interrupts once the boot CPU enables interrupts. Clear all pending interrupts when the driver is initialized to prevent these spurious interrupts from locking the CPU in an endless loop. Signed-off-by: Elad Nachman Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250803102548.669682-2-enachman@marvell.com --- drivers/irqchip/irq-mvebu-gicp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c index d3232d6d8dce..fd85c845e015 100644 --- a/drivers/irqchip/irq-mvebu-gicp.c +++ b/drivers/irqchip/irq-mvebu-gicp.c @@ -177,6 +177,7 @@ static int mvebu_gicp_probe(struct platform_device *pdev) .ops = &gicp_domain_ops, }; struct mvebu_gicp *gicp; + void __iomem *base; int ret, i; gicp = devm_kzalloc(&pdev->dev, sizeof(*gicp), GFP_KERNEL); @@ -236,6 +237,15 @@ static int mvebu_gicp_probe(struct platform_device *pdev) return -ENODEV; } + base = ioremap(gicp->res->start, gicp->res->end - gicp->res->start); + if (IS_ERR(base)) { + dev_err(&pdev->dev, "ioremap() failed. Unable to clear pending interrupts.\n"); + } else { + for (i = 0; i < 64; i++) + writel(i, base + GICP_CLRSPI_NSR_OFFSET); + iounmap(base); + } + return msi_create_parent_irq_domain(&info, &gicp_msi_parent_ops) ? 0 : -ENOMEM; } From a8913d54ab1f9ed871b4e45a7c8a4f7a9949d071 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 1 Aug 2025 09:58:18 +0200 Subject: [PATCH 107/279] irqchip/gic-v5: iwb: Fix iounmap probe failure path The 0-day bot reported that on the failure path the driver iounmap()s IWB resources that are managed through devm_ioremap(), which is clearly wrong because the driver would end up unmapping the MMIO resource twice on probing failure. Fix this by removing the error path altogether and by letting devres manage the iounmapping on clean-up. Fixes: 695949d8b16f ("irqchip/gic-v5: Add GICv5 IWB support") Reported-by: kernel test robot Signed-off-by: Lorenzo Pieralisi Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/all/20250801-gic-v5-fixes-6-17-v1-1-4fcedaccf9e6@kernel.org Closes: https://lore.kernel.org/oe-kbuild-all/202508010038.N3r4ZmII-lkp@intel.com --- drivers/irqchip/irq-gic-v5-iwb.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-gic-v5-iwb.c b/drivers/irqchip/irq-gic-v5-iwb.c index ed72fbdd4900..ad9fdc14d1c6 100644 --- a/drivers/irqchip/irq-gic-v5-iwb.c +++ b/drivers/irqchip/irq-gic-v5-iwb.c @@ -241,7 +241,6 @@ static int gicv5_iwb_device_probe(struct platform_device *pdev) struct gicv5_iwb_chip_data *iwb_node; void __iomem *iwb_base; struct resource *res; - int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) @@ -254,16 +253,10 @@ static int gicv5_iwb_device_probe(struct platform_device *pdev) } iwb_node = gicv5_iwb_init_bases(iwb_base, pdev); - if (IS_ERR(iwb_node)) { - ret = PTR_ERR(iwb_node); - goto out_unmap; - } + if (IS_ERR(iwb_node)) + return PTR_ERR(iwb_node); return 0; - -out_unmap: - iounmap(iwb_base); - return ret; } static const struct of_device_id gicv5_iwb_of_match[] = { From 9ba0a63badc8e74ac0d490f9113300dda0ce2c19 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 1 Aug 2025 09:58:20 +0200 Subject: [PATCH 108/279] irqchip/gic-v5: Remove IRQD_RESEND_WHEN_IN_PROGRESS for ITS IRQs GICv5 LPI interrupts have an active state hence they cannot retrigger while the interrupt is being handled. Therefore, setting the IRQD_RESEND_WHEN_IN_PROGRESS flag on LPIs is pointless, as the situation this flag caters for cannot happen. Remove it. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/all/20250801-gic-v5-fixes-6-17-v1-3-4fcedaccf9e6@kernel.org --- drivers/irqchip/irq-gic-v5-its.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v5-its.c b/drivers/irqchip/irq-gic-v5-its.c index 340640fdbdf6..9290ac741949 100644 --- a/drivers/irqchip/irq-gic-v5-its.c +++ b/drivers/irqchip/irq-gic-v5-its.c @@ -973,7 +973,6 @@ static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int vi irqd = irq_get_irq_data(virq + i); irqd_set_single_target(irqd); irqd_set_affinity_on_activate(irqd); - irqd_set_resend_when_in_progress(irqd); } return 0; From f9a348e0de19226fc3c7e81de7677d3fa2c4b2d8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Jul 2025 09:34:29 -0400 Subject: [PATCH 109/279] nfsd: don't set the ctime on delegated atime updates Clients will typically precede a DELEGRETURN for a delegation with delegated timestamp with a SETATTR to set the timestamps on the server to match what the client has. knfsd implements this by using the nfsd_setattr() infrastructure, which will set ATTR_CTIME on any update that goes to notify_change(). This is problematic as it means that the client will get a spurious ctime update when updating the atime. POSIX unfortunately doesn't phrase it succinctly, but updating the atime due to reads should not update the ctime. In this case, the client is sending a SETATTR to update the atime on the server to match its latest value. The ctime should not be advanced in this case as that would incorrectly indicate a change to the inode. Fix this by not implicitly setting ATTR_CTIME when ATTR_DELEG is set in __nfsd_setattr(). The decoder for FATTR4_WORD2_TIME_DELEG_MODIFY already sets ATTR_CTIME, so this is sufficient to make it skip setting the ctime on atime-only updates. Fixes: 7e13f4f8d27d ("nfsd: handle delegated timestamps in SETATTR") Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ee78b6fb1709..eaf04751d07f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -470,7 +470,15 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) if (!iap->ia_valid) return 0; - iap->ia_valid |= ATTR_CTIME; + /* + * If ATTR_DELEG is set, then this is an update from a client that + * holds a delegation. If this is an update for only the atime, the + * ctime should not be changed. If the update contains the mtime + * too, then ATTR_CTIME should already be set. + */ + if (!(iap->ia_valid & ATTR_DELEG)) + iap->ia_valid |= ATTR_CTIME; + return notify_change(&nop_mnt_idmap, dentry, iap, NULL); } From e5a73150776f18547ee685c9f6bfafe549714899 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 18 Jul 2025 11:26:14 +1000 Subject: [PATCH 110/279] nfsd: avoid ref leak in nfsd_open_local_fh() If two calls to nfsd_open_local_fh() race and both successfully call nfsd_file_acquire_local(), they will both get an extra reference to the net to accompany the file reference stored in *pnf. One of them will fail to store (using xchg()) the file reference in *pnf and will drop that reference but WON'T drop the accompanying reference to the net. This leak means that when the nfs server is shut down it will hang in nfsd_shutdown_net() waiting for &nn->nfsd_net_free_done. This patch adds the missing nfsd_net_put(). Reported-by: Mike Snitzer Fixes: e6f7e1487ab5 ("nfs_localio: simplify interface to nfsd for getting nfsd_file") Cc: stable@vger.kernel.org Signed-off-by: NeilBrown Tested-by: Mike Snitzer Reviewed-by: Mike Snitzer Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/localio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index 4f6468eb2adf..cb237f1b902a 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -103,10 +103,11 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, if (nfsd_file_get(new) == NULL) goto again; /* - * Drop the ref we were going to install and the - * one we were going to return. + * Drop the ref we were going to install (both file and + * net) and the one we were going to return (only file). */ nfsd_file_put(localio); + nfsd_net_put(net); nfsd_file_put(localio); localio = new; } From f3ba7c9b0421e3935998334a860bd88f2ffdb18e Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Mon, 4 Aug 2025 21:40:03 +0800 Subject: [PATCH 111/279] smb: client: rename server mid_lock to mid_queue_lock This is step 1/4 of a patch series to fix mid_q_entry memory leaks caused by race conditions in callback execution. The current mid_lock name is somewhat ambiguous about what it protects. To prepare for splitting this lock into separate, more granular locks, this patch renames mid_lock to mid_queue_lock to clearly indicate its specific responsibility for protecting the pending_mid_q list and related queue operations. No functional changes are made in this patch - it only prepares the codebase for the lock splitting that follows. - mid_queue_lock for queue operations - mid_counter_lock for mid counter operations - per-mid locks for individual mid state management Signed-off-by: Wang Zhaolong Acked-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 8 ++++---- fs/smb/client/cifsglob.h | 4 ++-- fs/smb/client/connect.c | 20 +++++++++---------- fs/smb/client/smb1ops.c | 10 +++++----- fs/smb/client/smb2ops.c | 26 ++++++++++++------------- fs/smb/client/smb2transport.c | 4 ++-- fs/smb/client/transport.c | 36 +++++++++++++++++------------------ 7 files changed, 54 insertions(+), 54 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index f1cea365b6f1..80d6a51b8c11 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -60,7 +60,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server) return; cifs_dbg(VFS, "Dump pending requests:\n"); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { cifs_dbg(VFS, "State: %d Cmd: %d Pid: %d Cbdata: %p Mid %llu\n", mid_entry->mid_state, @@ -83,7 +83,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server) mid_entry->resp_buf, 62); } } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); #endif /* CONFIG_CIFS_DEBUG2 */ } @@ -672,7 +672,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\n\tServer ConnectionId: 0x%llx", chan_server->conn_id); - spin_lock(&chan_server->mid_lock); + spin_lock(&chan_server->mid_queue_lock); list_for_each_entry(mid_entry, &chan_server->pending_mid_q, qhead) { seq_printf(m, "\n\t\tState: %d com: %d pid: %d cbdata: %p mid %llu", mid_entry->mid_state, @@ -681,7 +681,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) mid_entry->callback_data, mid_entry->mid); } - spin_unlock(&chan_server->mid_lock); + spin_unlock(&chan_server->mid_queue_lock); } spin_unlock(&ses->chan_lock); seq_puts(m, "\n--\n"); diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index a97e2cca2f53..2dd1ef274250 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -732,7 +732,7 @@ struct TCP_Server_Info { #endif wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ - spinlock_t mid_lock; /* protect mid queue and it's entries */ + spinlock_t mid_queue_lock; /* protect mid queue */ struct list_head pending_mid_q; bool noblocksnd; /* use blocking sendmsg */ bool noautotune; /* do not autotune send buf sizes */ @@ -2007,7 +2007,7 @@ require use of the stronger protocol */ * GlobalCurrentXid * GlobalTotalActiveXid * TCP_Server_Info->srv_lock (anything in struct not protected by another lock and can change) - * TCP_Server_Info->mid_lock TCP_Server_Info->pending_mid_q cifs_get_tcp_session + * TCP_Server_Info->mid_queue_lock TCP_Server_Info->pending_mid_q cifs_get_tcp_session * ->CurrentMid * (any changes in mid_q_entry fields) * TCP_Server_Info->req_lock TCP_Server_Info->in_flight cifs_get_tcp_session diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 5eec8957f2a9..e4b577ca48d5 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -321,7 +321,7 @@ cifs_abort_connection(struct TCP_Server_Info *server) /* mark submitted MIDs for retry and issue callback */ INIT_LIST_HEAD(&retry_list); cifs_dbg(FYI, "%s: moving mids to private list\n", __func__); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) { kref_get(&mid->refcount); if (mid->mid_state == MID_REQUEST_SUBMITTED) @@ -329,7 +329,7 @@ cifs_abort_connection(struct TCP_Server_Info *server) list_move(&mid->qhead, &retry_list); mid->mid_flags |= MID_DELETED; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); cifs_server_unlock(server); cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); @@ -884,13 +884,13 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) * server there should be exactly one pending mid * corresponding to SMB1/SMB2 Negotiate packet. */ - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) { kref_get(&mid->refcount); list_move(&mid->qhead, &dispose_list); mid->mid_flags |= MID_DELETED; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); /* Now try to reconnect once with NetBIOS session. */ server->with_rfc1001 = true; @@ -957,7 +957,7 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed) #ifdef CONFIG_CIFS_STATS2 mid->when_received = jiffies; #endif - spin_lock(&mid->server->mid_lock); + spin_lock(&mid->server->mid_queue_lock); if (!malformed) mid->mid_state = MID_RESPONSE_RECEIVED; else @@ -967,12 +967,12 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed) * function has finished processing it is a bug. */ if (mid->mid_flags & MID_DELETED) { - spin_unlock(&mid->server->mid_lock); + spin_unlock(&mid->server->mid_queue_lock); pr_warn_once("trying to dequeue a deleted mid\n"); } else { list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; - spin_unlock(&mid->server->mid_lock); + spin_unlock(&mid->server->mid_queue_lock); } } @@ -1101,7 +1101,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server) struct list_head *tmp, *tmp2; LIST_HEAD(dispose_list); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); cifs_dbg(FYI, "Clearing mid %llu\n", mid_entry->mid); @@ -1110,7 +1110,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server) list_move(&mid_entry->qhead, &dispose_list); mid_entry->mid_flags |= MID_DELETED; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); /* now walk dispose list and issue callbacks */ list_for_each_safe(tmp, tmp2, &dispose_list) { @@ -1822,7 +1822,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, tcp_ses->compression.requested = ctx->compress; spin_lock_init(&tcp_ses->req_lock); spin_lock_init(&tcp_ses->srv_lock); - spin_lock_init(&tcp_ses->mid_lock); + spin_lock_init(&tcp_ses->mid_queue_lock); INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index f722c7f47b07..e16566d3c319 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -95,17 +95,17 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer) struct smb_hdr *buf = (struct smb_hdr *)buffer; struct mid_q_entry *mid; - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_for_each_entry(mid, &server->pending_mid_q, qhead) { if (compare_mid(mid->mid, buf) && mid->mid_state == MID_REQUEST_SUBMITTED && le16_to_cpu(mid->command) == buf->Command) { kref_get(&mid->refcount); - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return mid; } } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return NULL; } @@ -169,7 +169,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) __u16 last_mid, cur_mid; bool collision, reconnect = false; - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); /* mid is 16 bit only for CIFS/SMB */ cur_mid = (__u16)((server->CurrentMid) & 0xffff); @@ -228,7 +228,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) } cur_mid++; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); if (reconnect) { cifs_signal_cifsd_for_reconnect(server, false); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index bd6c1fb2a992..7935f9b433ac 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -374,19 +374,19 @@ smb2_get_next_mid(struct TCP_Server_Info *server) { __u64 mid; /* for SMB2 we need the current value */ - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); mid = server->CurrentMid++; - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return mid; } static void smb2_revert_current_mid(struct TCP_Server_Info *server, const unsigned int val) { - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); if (server->CurrentMid >= val) server->CurrentMid -= val; - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); } static struct mid_q_entry * @@ -401,7 +401,7 @@ __smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue) return NULL; } - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_for_each_entry(mid, &server->pending_mid_q, qhead) { if ((mid->mid == wire_mid) && (mid->mid_state == MID_REQUEST_SUBMITTED) && @@ -411,11 +411,11 @@ __smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue) list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return mid; } } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return NULL; } @@ -460,9 +460,9 @@ smb2_negotiate(const unsigned int xid, { int rc; - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); server->CurrentMid = 0; - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); rc = SMB2_negotiate(xid, ses, server); return rc; } @@ -4809,18 +4809,18 @@ static void smb2_decrypt_offload(struct work_struct *work) } else { spin_lock(&dw->server->srv_lock); if (dw->server->tcpStatus == CifsNeedReconnect) { - spin_lock(&dw->server->mid_lock); + spin_lock(&dw->server->mid_queue_lock); mid->mid_state = MID_RETRY_NEEDED; - spin_unlock(&dw->server->mid_lock); + spin_unlock(&dw->server->mid_queue_lock); spin_unlock(&dw->server->srv_lock); mid->callback(mid); } else { - spin_lock(&dw->server->mid_lock); + spin_lock(&dw->server->mid_queue_lock); mid->mid_state = MID_REQUEST_SUBMITTED; mid->mid_flags &= ~(MID_DELETED); list_add_tail(&mid->qhead, &dw->server->pending_mid_q); - spin_unlock(&dw->server->mid_lock); + spin_unlock(&dw->server->mid_queue_lock); spin_unlock(&dw->server->srv_lock); } } diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 475b36c27f65..ff9ef7fcd010 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -840,9 +840,9 @@ smb2_get_mid_entry(struct cifs_ses *ses, struct TCP_Server_Info *server, *mid = smb2_mid_entry_alloc(shdr, server); if (*mid == NULL) return -ENOMEM; - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_add_tail(&(*mid)->qhead, &server->pending_mid_q); - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return 0; } diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 191783f553ce..12dc927aa4a2 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -160,12 +160,12 @@ void __release_mid(struct kref *refcount) void delete_mid(struct mid_q_entry *mid) { - spin_lock(&mid->server->mid_lock); + spin_lock(&mid->server->mid_queue_lock); if (!(mid->mid_flags & MID_DELETED)) { list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; } - spin_unlock(&mid->server->mid_lock); + spin_unlock(&mid->server->mid_queue_lock); release_mid(mid); } @@ -716,9 +716,9 @@ static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, *ppmidQ = alloc_mid(in_buf, ses->server); if (*ppmidQ == NULL) return -ENOMEM; - spin_lock(&ses->server->mid_lock); + spin_lock(&ses->server->mid_queue_lock); list_add_tail(&(*ppmidQ)->qhead, &ses->server->pending_mid_q); - spin_unlock(&ses->server->mid_lock); + spin_unlock(&ses->server->mid_queue_lock); return 0; } @@ -819,9 +819,9 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, mid->mid_state = MID_REQUEST_SUBMITTED; /* put it on the pending_mid_q */ - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); list_add_tail(&mid->qhead, &server->pending_mid_q); - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); /* * Need to store the time in mid before calling I/O. For call_async, @@ -880,10 +880,10 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n", __func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); switch (mid->mid_state) { case MID_RESPONSE_READY: - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return rc; case MID_RETRY_NEEDED: rc = -EAGAIN; @@ -902,13 +902,13 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", __func__, mid->mid, mid->mid_state); rc = -EIO; goto sync_mid_done; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); sync_mid_done: release_mid(mid); @@ -1213,7 +1213,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n", midQ[i]->mid, le16_to_cpu(midQ[i]->command)); send_cancel(server, &rqst[i], midQ[i]); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); midQ[i]->mid_flags |= MID_WAIT_CANCELLED; if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED || midQ[i]->mid_state == MID_RESPONSE_RECEIVED) { @@ -1221,7 +1221,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, cancelled_mid[i] = true; credits[i].value = 0; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); } } @@ -1423,16 +1423,16 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, rc = wait_for_response(server, midQ); if (rc != 0) { send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); if (midQ->mid_state == MID_REQUEST_SUBMITTED || midQ->mid_state == MID_RESPONSE_RECEIVED) { /* no longer considered to be "in-flight" */ midQ->callback = release_mid; - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); add_credits(server, &credits, 0); return rc; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); } rc = cifs_sync_mid_result(midQ, server); @@ -1605,15 +1605,15 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, rc = wait_for_response(server, midQ); if (rc) { send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_lock); + spin_lock(&server->mid_queue_lock); if (midQ->mid_state == MID_REQUEST_SUBMITTED || midQ->mid_state == MID_RESPONSE_RECEIVED) { /* no longer considered to be "in-flight" */ midQ->callback = release_mid; - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); return rc; } - spin_unlock(&server->mid_lock); + spin_unlock(&server->mid_queue_lock); } /* We got the response - restart system call. */ From 9bd42798d5bf87f56d229a27e40140df95ef743d Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Mon, 4 Aug 2025 21:40:04 +0800 Subject: [PATCH 112/279] smb: client: add mid_counter_lock to protect the mid counter counter This is step 2/4 of a patch series to fix mid_q_entry memory leaks caused by race conditions in callback execution. Add a dedicated mid_counter_lock to protect current_mid counter, separating it from mid_queue_lock which protects pending_mid_q operations. This reduces lock contention and prepares for finer- grained locking in subsequent patches. Changes: - Add TCP_Server_Info->mid_counter_lock spinlock - Rename CurrentMid to current_mid for consistency - Use mid_counter_lock to protect current_mid access - Update locking documentation in cifsglob.h This separation allows mid allocation to proceed without blocking queue operations, improving performance under heavy load. Signed-off-by: Wang Zhaolong Acked-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 5 +++-- fs/smb/client/connect.c | 5 +++-- fs/smb/client/smb1ops.c | 11 ++++++----- fs/smb/client/smb2ops.c | 40 +++++++++++++++++++-------------------- fs/smb/client/transport.c | 12 ++++++------ 5 files changed, 38 insertions(+), 35 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 2dd1ef274250..cfba226f3396 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -733,6 +733,7 @@ struct TCP_Server_Info { wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ spinlock_t mid_queue_lock; /* protect mid queue */ + spinlock_t mid_counter_lock; struct list_head pending_mid_q; bool noblocksnd; /* use blocking sendmsg */ bool noautotune; /* do not autotune send buf sizes */ @@ -770,7 +771,7 @@ struct TCP_Server_Info { /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ unsigned int capabilities; /* selective disabling of caps by smb sess */ int timeAdj; /* Adjust for difference in server time zone in sec */ - __u64 CurrentMid; /* multiplex id - rotating counter, protected by GlobalMid_Lock */ + __u64 current_mid; /* multiplex id - rotating counter, protected by mid_counter_lock */ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; @@ -2008,8 +2009,8 @@ require use of the stronger protocol */ * GlobalTotalActiveXid * TCP_Server_Info->srv_lock (anything in struct not protected by another lock and can change) * TCP_Server_Info->mid_queue_lock TCP_Server_Info->pending_mid_q cifs_get_tcp_session - * ->CurrentMid * (any changes in mid_q_entry fields) + * TCP_Server_Info->mid_counter_lock TCP_Server_Info->current_mid cifs_get_tcp_session * TCP_Server_Info->req_lock TCP_Server_Info->in_flight cifs_get_tcp_session * ->credits * ->echo_credits diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index e4b577ca48d5..74ad5881ee45 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -358,7 +358,7 @@ static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num } cifs_dbg(FYI, "Mark tcp session as need reconnect\n"); - trace_smb3_reconnect(server->CurrentMid, server->conn_id, + trace_smb3_reconnect(server->current_mid, server->conn_id, server->hostname); server->tcpStatus = CifsNeedReconnect; @@ -1242,7 +1242,7 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) spin_unlock(&server->req_lock); wake_up(&server->request_q); - trace_smb3_hdr_credits(server->CurrentMid, + trace_smb3_hdr_credits(server->current_mid, server->conn_id, server->hostname, scredits, le16_to_cpu(shdr->CreditRequest), in_flight); cifs_server_dbg(FYI, "%s: added %u credits total=%d\n", @@ -1823,6 +1823,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, spin_lock_init(&tcp_ses->req_lock); spin_lock_init(&tcp_ses->srv_lock); spin_lock_init(&tcp_ses->mid_queue_lock); + spin_lock_init(&tcp_ses->mid_counter_lock); INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index e16566d3c319..893a1ea8c000 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -169,10 +169,9 @@ cifs_get_next_mid(struct TCP_Server_Info *server) __u16 last_mid, cur_mid; bool collision, reconnect = false; - spin_lock(&server->mid_queue_lock); - + spin_lock(&server->mid_counter_lock); /* mid is 16 bit only for CIFS/SMB */ - cur_mid = (__u16)((server->CurrentMid) & 0xffff); + cur_mid = (__u16)((server->current_mid) & 0xffff); /* we do not want to loop forever */ last_mid = cur_mid; cur_mid++; @@ -198,6 +197,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) cur_mid++; num_mids = 0; + spin_lock(&server->mid_queue_lock); list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { ++num_mids; if (mid_entry->mid == cur_mid && @@ -207,6 +207,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) break; } } + spin_unlock(&server->mid_queue_lock); /* * if we have more than 32k mids in the list, then something @@ -223,12 +224,12 @@ cifs_get_next_mid(struct TCP_Server_Info *server) if (!collision) { mid = (__u64)cur_mid; - server->CurrentMid = mid; + server->current_mid = mid; break; } cur_mid++; } - spin_unlock(&server->mid_queue_lock); + spin_unlock(&server->mid_counter_lock); if (reconnect) { cifs_signal_cifsd_for_reconnect(server, false); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 7935f9b433ac..ebaeb2993569 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -91,7 +91,7 @@ smb2_add_credits(struct TCP_Server_Info *server, if (*val > 65000) { *val = 65000; /* Don't get near 64K credits, avoid srv bugs */ pr_warn_once("server overflowed SMB3 credits\n"); - trace_smb3_overflow_credits(server->CurrentMid, + trace_smb3_overflow_credits(server->current_mid, server->conn_id, server->hostname, *val, add, server->in_flight); } @@ -136,7 +136,7 @@ smb2_add_credits(struct TCP_Server_Info *server, wake_up(&server->request_q); if (reconnect_detected) { - trace_smb3_reconnect_detected(server->CurrentMid, + trace_smb3_reconnect_detected(server->current_mid, server->conn_id, server->hostname, scredits, add, in_flight); cifs_dbg(FYI, "trying to put %d credits from the old server instance %d\n", @@ -144,7 +144,7 @@ smb2_add_credits(struct TCP_Server_Info *server, } if (reconnect_with_invalid_credits) { - trace_smb3_reconnect_with_invalid_credits(server->CurrentMid, + trace_smb3_reconnect_with_invalid_credits(server->current_mid, server->conn_id, server->hostname, scredits, add, in_flight); cifs_dbg(FYI, "Negotiate operation when server credits is non-zero. Optype: %d, server credits: %d, credits added: %d\n", optype, scredits, add); @@ -176,7 +176,7 @@ smb2_add_credits(struct TCP_Server_Info *server, break; } - trace_smb3_add_credits(server->CurrentMid, + trace_smb3_add_credits(server->current_mid, server->conn_id, server->hostname, scredits, add, in_flight); cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, scredits); } @@ -203,7 +203,7 @@ smb2_set_credits(struct TCP_Server_Info *server, const int val) in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_set_credits(server->CurrentMid, + trace_smb3_set_credits(server->current_mid, server->conn_id, server->hostname, scredits, val, in_flight); cifs_dbg(FYI, "%s: set %u credits\n", __func__, val); @@ -288,7 +288,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_wait_credits(server->CurrentMid, + trace_smb3_wait_credits(server->current_mid, server->conn_id, server->hostname, scredits, -(credits->value), in_flight); cifs_dbg(FYI, "%s: removed %u credits total=%d\n", __func__, credits->value, scredits); @@ -316,7 +316,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server, server->credits, server->in_flight, new_val - credits->value, cifs_trace_rw_credits_no_adjust_up); - trace_smb3_too_many_credits(server->CurrentMid, + trace_smb3_too_many_credits(server->current_mid, server->conn_id, server->hostname, 0, credits->value - new_val, 0); cifs_server_dbg(VFS, "R=%x[%x] request has less credits (%d) than required (%d)", subreq->rreq->debug_id, subreq->subreq.debug_index, @@ -338,7 +338,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server, server->credits, server->in_flight, new_val - credits->value, cifs_trace_rw_credits_old_session); - trace_smb3_reconnect_detected(server->CurrentMid, + trace_smb3_reconnect_detected(server->current_mid, server->conn_id, server->hostname, scredits, credits->value - new_val, in_flight); cifs_server_dbg(VFS, "R=%x[%x] trying to return %d credits to old session\n", @@ -358,7 +358,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server, spin_unlock(&server->req_lock); wake_up(&server->request_q); - trace_smb3_adj_credits(server->CurrentMid, + trace_smb3_adj_credits(server->current_mid, server->conn_id, server->hostname, scredits, credits->value - new_val, in_flight); cifs_dbg(FYI, "%s: adjust added %u credits total=%d\n", @@ -374,19 +374,19 @@ smb2_get_next_mid(struct TCP_Server_Info *server) { __u64 mid; /* for SMB2 we need the current value */ - spin_lock(&server->mid_queue_lock); - mid = server->CurrentMid++; - spin_unlock(&server->mid_queue_lock); + spin_lock(&server->mid_counter_lock); + mid = server->current_mid++; + spin_unlock(&server->mid_counter_lock); return mid; } static void smb2_revert_current_mid(struct TCP_Server_Info *server, const unsigned int val) { - spin_lock(&server->mid_queue_lock); - if (server->CurrentMid >= val) - server->CurrentMid -= val; - spin_unlock(&server->mid_queue_lock); + spin_lock(&server->mid_counter_lock); + if (server->current_mid >= val) + server->current_mid -= val; + spin_unlock(&server->mid_counter_lock); } static struct mid_q_entry * @@ -460,9 +460,9 @@ smb2_negotiate(const unsigned int xid, { int rc; - spin_lock(&server->mid_queue_lock); - server->CurrentMid = 0; - spin_unlock(&server->mid_queue_lock); + spin_lock(&server->mid_counter_lock); + server->current_mid = 0; + spin_unlock(&server->mid_counter_lock); rc = SMB2_negotiate(xid, ses, server); return rc; } @@ -2498,7 +2498,7 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server) spin_unlock(&server->req_lock); wake_up(&server->request_q); - trace_smb3_pend_credits(server->CurrentMid, + trace_smb3_pend_credits(server->current_mid, server->conn_id, server->hostname, scredits, le16_to_cpu(shdr->CreditRequest), in_flight); cifs_dbg(FYI, "%s: status pending add %u credits total=%d\n", diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 12dc927aa4a2..8037accc3987 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -397,7 +397,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, * socket so the server throws away the partial SMB */ cifs_signal_cifsd_for_reconnect(server, false); - trace_smb3_partial_send_reconnect(server->CurrentMid, + trace_smb3_partial_send_reconnect(server->current_mid, server->conn_id, server->hostname); } smbd_done: @@ -509,7 +509,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_nblk_credits(server->CurrentMid, + trace_smb3_nblk_credits(server->current_mid, server->conn_id, server->hostname, scredits, -1, in_flight); cifs_dbg(FYI, "%s: remove %u credits total=%d\n", __func__, 1, scredits); @@ -542,7 +542,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_credit_timeout(server->CurrentMid, + trace_smb3_credit_timeout(server->current_mid, server->conn_id, server->hostname, scredits, num_credits, in_flight); cifs_server_dbg(VFS, "wait timed out after %d ms\n", @@ -585,7 +585,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, spin_unlock(&server->req_lock); trace_smb3_credit_timeout( - server->CurrentMid, + server->current_mid, server->conn_id, server->hostname, scredits, num_credits, in_flight); cifs_server_dbg(VFS, "wait timed out after %d ms\n", @@ -615,7 +615,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, in_flight = server->in_flight; spin_unlock(&server->req_lock); - trace_smb3_waitff_credits(server->CurrentMid, + trace_smb3_waitff_credits(server->current_mid, server->conn_id, server->hostname, scredits, -(num_credits), in_flight); cifs_dbg(FYI, "%s: remove %u credits total=%d\n", @@ -666,7 +666,7 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num, */ if (server->in_flight == 0) { spin_unlock(&server->req_lock); - trace_smb3_insufficient_credits(server->CurrentMid, + trace_smb3_insufficient_credits(server->current_mid, server->conn_id, server->hostname, scredits, num, in_flight); cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n", From 3fd8ec2fc93b009e5288b123d77292b8b1b9e1e7 Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Mon, 4 Aug 2025 21:40:05 +0800 Subject: [PATCH 113/279] smb: client: smb: client: eliminate mid_flags field This is step 3/4 of a patch series to fix mid_q_entry memory leaks caused by race conditions in callback execution. Replace the mid_flags bitmask with dedicated boolean fields to simplify locking logic and improve code readability: - Replace MID_DELETED with bool deleted_from_q - Replace MID_WAIT_CANCELLED with bool wait_cancelled - Remove mid_flags field entirely The new boolean fields have clearer semantics: - deleted_from_q: whether mid has been removed from pending_mid_q - wait_cancelled: whether request was cancelled during wait This change reduces memory usage (from 4-byte bitmask to 2 boolean flags) and eliminates confusion about which lock protects which flag bits, preparing for per-mid locking in the next patch. Signed-off-by: Wang Zhaolong Acked-by: Enzo Matsumiya Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 9 +++------ fs/smb/client/connect.c | 10 +++++----- fs/smb/client/smb2ops.c | 4 ++-- fs/smb/client/transport.c | 12 ++++++------ 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index cfba226f3396..e6830ab3a546 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1730,9 +1730,10 @@ struct mid_q_entry { unsigned int resp_buf_size; int mid_state; /* wish this were enum but can not pass to wait_event */ int mid_rc; /* rc for MID_RC */ - unsigned int mid_flags; __le16 command; /* smb command code */ unsigned int optype; /* operation type */ + bool wait_cancelled:1; /* Cancelled while waiting for response */ + bool deleted_from_q:1; /* Whether Mid has been dequeued frem pending_mid_q */ bool large_buf:1; /* if valid response, is pointer to large buf */ bool multiRsp:1; /* multiple trans2 responses for one request */ bool multiEnd:1; /* both received */ @@ -1894,10 +1895,6 @@ static inline bool is_replayable_error(int error) #define MID_RESPONSE_READY 0x40 /* ready for other process handle the rsp */ #define MID_RC 0x80 /* mid_rc contains custom rc */ -/* Flags */ -#define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */ -#define MID_DELETED 2 /* Mid has been dequeued/deleted */ - /* Types of response buffer returned from SendReceive2 */ #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ #define CIFS_SMALL_BUFFER 1 @@ -2009,7 +2006,7 @@ require use of the stronger protocol */ * GlobalTotalActiveXid * TCP_Server_Info->srv_lock (anything in struct not protected by another lock and can change) * TCP_Server_Info->mid_queue_lock TCP_Server_Info->pending_mid_q cifs_get_tcp_session - * (any changes in mid_q_entry fields) + * mid_q_entry->deleted_from_q * TCP_Server_Info->mid_counter_lock TCP_Server_Info->current_mid cifs_get_tcp_session * TCP_Server_Info->req_lock TCP_Server_Info->in_flight cifs_get_tcp_session * ->credits diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 74ad5881ee45..587845a2452d 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -327,7 +327,7 @@ cifs_abort_connection(struct TCP_Server_Info *server) if (mid->mid_state == MID_REQUEST_SUBMITTED) mid->mid_state = MID_RETRY_NEEDED; list_move(&mid->qhead, &retry_list); - mid->mid_flags |= MID_DELETED; + mid->deleted_from_q = true; } spin_unlock(&server->mid_queue_lock); cifs_server_unlock(server); @@ -888,7 +888,7 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) { kref_get(&mid->refcount); list_move(&mid->qhead, &dispose_list); - mid->mid_flags |= MID_DELETED; + mid->deleted_from_q = true; } spin_unlock(&server->mid_queue_lock); @@ -966,12 +966,12 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed) * Trying to handle/dequeue a mid after the send_recv() * function has finished processing it is a bug. */ - if (mid->mid_flags & MID_DELETED) { + if (mid->deleted_from_q == true) { spin_unlock(&mid->server->mid_queue_lock); pr_warn_once("trying to dequeue a deleted mid\n"); } else { list_del_init(&mid->qhead); - mid->mid_flags |= MID_DELETED; + mid->deleted_from_q = true; spin_unlock(&mid->server->mid_queue_lock); } } @@ -1108,7 +1108,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server) kref_get(&mid_entry->refcount); mid_entry->mid_state = MID_SHUTDOWN; list_move(&mid_entry->qhead, &dispose_list); - mid_entry->mid_flags |= MID_DELETED; + mid_entry->deleted_from_q = true; } spin_unlock(&server->mid_queue_lock); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index ebaeb2993569..ad8947434b71 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -409,7 +409,7 @@ __smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue) kref_get(&mid->refcount); if (dequeue) { list_del_init(&mid->qhead); - mid->mid_flags |= MID_DELETED; + mid->deleted_from_q = true; } spin_unlock(&server->mid_queue_lock); return mid; @@ -4817,7 +4817,7 @@ static void smb2_decrypt_offload(struct work_struct *work) } else { spin_lock(&dw->server->mid_queue_lock); mid->mid_state = MID_REQUEST_SUBMITTED; - mid->mid_flags &= ~(MID_DELETED); + mid->deleted_from_q = false; list_add_tail(&mid->qhead, &dw->server->pending_mid_q); spin_unlock(&dw->server->mid_queue_lock); diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 8037accc3987..ca9358c24ceb 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -89,7 +89,7 @@ void __release_mid(struct kref *refcount) #endif struct TCP_Server_Info *server = midEntry->server; - if (midEntry->resp_buf && (midEntry->mid_flags & MID_WAIT_CANCELLED) && + if (midEntry->resp_buf && (midEntry->wait_cancelled) && (midEntry->mid_state == MID_RESPONSE_RECEIVED || midEntry->mid_state == MID_RESPONSE_READY) && server->ops->handle_cancelled_mid) @@ -161,9 +161,9 @@ void delete_mid(struct mid_q_entry *mid) { spin_lock(&mid->server->mid_queue_lock); - if (!(mid->mid_flags & MID_DELETED)) { + if (mid->deleted_from_q == false) { list_del_init(&mid->qhead); - mid->mid_flags |= MID_DELETED; + mid->deleted_from_q = true; } spin_unlock(&mid->server->mid_queue_lock); @@ -898,9 +898,9 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) rc = mid->mid_rc; break; default: - if (!(mid->mid_flags & MID_DELETED)) { + if (mid->deleted_from_q == false) { list_del_init(&mid->qhead); - mid->mid_flags |= MID_DELETED; + mid->deleted_from_q = true; } spin_unlock(&server->mid_queue_lock); cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", @@ -1214,7 +1214,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, midQ[i]->mid, le16_to_cpu(midQ[i]->command)); send_cancel(server, &rqst[i], midQ[i]); spin_lock(&server->mid_queue_lock); - midQ[i]->mid_flags |= MID_WAIT_CANCELLED; + midQ[i]->wait_cancelled = true; if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED || midQ[i]->mid_state == MID_RESPONSE_RECEIVED) { midQ[i]->callback = cifs_cancelled_callback; From 475356fe2814f2f0b188da8bf0f1fcc579d81272 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 28 Jul 2025 22:11:54 +0200 Subject: [PATCH 114/279] kasan/test: fix protection against compiler elision The kunit test is using assignments to "static volatile void *kasan_ptr_result" to prevent elision of memory loads, but that's not working: In this variable definition, the "volatile" applies to the "void", not to the pointer. To make "volatile" apply to the pointer as intended, it must follow after the "*". This makes the kasan_memchr test pass again on my system. The kasan_strings test is still failing because all the definitions of load_unaligned_zeropad() are lacking explicit instrumentation hooks and ASAN does not instrument asm() memory operands. Link: https://lkml.kernel.org/r/20250728-kasan-kunit-fix-volatile-v1-1-e7157c9af82d@google.com Fixes: 5f1c8108e7ad ("mm:kasan: fix sparse warnings: Should it be static?") Signed-off-by: Jann Horn Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitriy Vyukov Cc: Jann Horn Cc: Nihar Chaithanya Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- mm/kasan/kasan_test_c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c index 2aa12dfa427a..e0968acc03aa 100644 --- a/mm/kasan/kasan_test_c.c +++ b/mm/kasan/kasan_test_c.c @@ -47,7 +47,7 @@ static struct { * Some tests use these global variables to store return values from function * calls that could otherwise be eliminated by the compiler as dead code. */ -static volatile void *kasan_ptr_result; +static void *volatile kasan_ptr_result; static volatile int kasan_int_result; /* Probe for console output: obtains test_status lines of interest. */ From 47b0f6d8f0d2be4d311a49e13d2fd5f152f492b2 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 31 Jul 2025 02:57:18 -0700 Subject: [PATCH 115/279] mm/kmemleak: avoid deadlock by moving pr_warn() outside kmemleak_lock When netpoll is enabled, calling pr_warn_once() while holding kmemleak_lock in mem_pool_alloc() can cause a deadlock due to lock inversion with the netconsole subsystem. This occurs because pr_warn_once() may trigger netpoll, which eventually leads to __alloc_skb() and back into kmemleak code, attempting to reacquire kmemleak_lock. This is the path for the deadlock. mem_pool_alloc() -> raw_spin_lock_irqsave(&kmemleak_lock, flags); -> pr_warn_once() -> netconsole subsystem -> netpoll -> __alloc_skb -> __create_object -> raw_spin_lock_irqsave(&kmemleak_lock, flags); Fix this by setting a flag and issuing the pr_warn_once() after kmemleak_lock is released. Link: https://lkml.kernel.org/r/20250731-kmemleak_lock-v1-1-728fd470198f@debian.org Fixes: c5665868183f ("mm: kmemleak: use the memory pool for early allocations") Signed-off-by: Breno Leitao Reported-by: Jakub Kicinski Acked-by: Catalin Marinas Cc: Signed-off-by: Andrew Morton --- mm/kmemleak.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 8d588e685311..e0333455c738 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -470,6 +470,7 @@ static struct kmemleak_object *mem_pool_alloc(gfp_t gfp) { unsigned long flags; struct kmemleak_object *object; + bool warn = false; /* try the slab allocator first */ if (object_cache) { @@ -488,8 +489,10 @@ static struct kmemleak_object *mem_pool_alloc(gfp_t gfp) else if (mem_pool_free_count) object = &mem_pool[--mem_pool_free_count]; else - pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n"); + warn = true; raw_spin_unlock_irqrestore(&kmemleak_lock, flags); + if (warn) + pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n"); return object; } From 5a309dbf1f829de7f8dc84a518d0b6e7e9be9994 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 30 Jul 2025 23:25:08 +0900 Subject: [PATCH 116/279] MAINTAINERS: add Masami as a reviewer of hung task detector Since I'm actively working on hung task blocker detector, add myself to a reviewer of the HUNG TASK DETECTOR feature. Link: https://lkml.kernel.org/r/175388550841.627474.3260499035226455392.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Acked-by: Lance Yang Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1b57dd4fcf01..3f957983c192 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11438,6 +11438,7 @@ F: drivers/tty/hvc/ HUNG TASK DETECTOR M: Andrew Morton R: Lance Yang +R: Masami Hiramatsu L: linux-kernel@vger.kernel.org S: Maintained F: include/linux/hung_task.h From d1534ae23c2b6be350c8ab060803fbf6e9682adc Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 28 Jul 2025 15:02:48 -0400 Subject: [PATCH 117/279] mm/kmemleak: avoid soft lockup in __kmemleak_do_cleanup() A soft lockup warning was observed on a relative small system x86-64 system with 16 GB of memory when running a debug kernel with kmemleak enabled. watchdog: BUG: soft lockup - CPU#8 stuck for 33s! [kworker/8:1:134] The test system was running a workload with hot unplug happening in parallel. Then kemleak decided to disable itself due to its inability to allocate more kmemleak objects. The debug kernel has its CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE set to 40,000. The soft lockup happened in kmemleak_do_cleanup() when the existing kmemleak objects were being removed and deleted one-by-one in a loop via a workqueue. In this particular case, there are at least 40,000 objects that need to be processed and given the slowness of a debug kernel and the fact that a raw_spinlock has to be acquired and released in __delete_object(), it could take a while to properly handle all these objects. As kmemleak has been disabled in this case, the object removal and deletion process can be further optimized as locking isn't really needed. However, it is probably not worth the effort to optimize for such an edge case that should rarely happen. So the simple solution is to call cond_resched() at periodic interval in the iteration loop to avoid soft lockup. Link: https://lkml.kernel.org/r/20250728190248.605750-1-longman@redhat.com Signed-off-by: Waiman Long Acked-by: Catalin Marinas Cc: Signed-off-by: Andrew Morton --- mm/kmemleak.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e0333455c738..84265983f239 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -2184,6 +2184,7 @@ static const struct file_operations kmemleak_fops = { static void __kmemleak_do_cleanup(void) { struct kmemleak_object *object, *tmp; + unsigned int cnt = 0; /* * Kmemleak has already been disabled, no need for RCU list traversal @@ -2192,6 +2193,10 @@ static void __kmemleak_do_cleanup(void) list_for_each_entry_safe(object, tmp, &object_list, object_list) { __remove_object(object); __delete_object(object); + + /* Call cond_resched() once per 64 iterations to avoid soft lockup */ + if (!(++cnt & 0x3f)) + cond_resched(); } } From 366a4532d96fc357998465133db34d34edb79e4c Mon Sep 17 00:00:00 2001 From: Barry Song Date: Tue, 5 Aug 2025 11:54:47 +0800 Subject: [PATCH 118/279] mm: fix the race between collapse and PT_RECLAIM under per-vma lock The check_pmd_still_valid() call during collapse is currently only protected by the mmap_lock in write mode, which was sufficient when pt_reclaim always ran under mmap_lock in read mode. However, since madvise_dontneed can now execute under a per-VMA lock, this assumption is no longer valid. As a result, a race condition can occur between collapse and PT_RECLAIM, potentially leading to a kernel panic. [ 38.151897] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] SMP KASI [ 38.153519] KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] [ 38.154605] CPU: 0 UID: 0 PID: 721 Comm: repro Not tainted 6.16.0-next-20250801-next-2025080 #1 PREEMPT(voluntary) [ 38.155929] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org4 [ 38.157418] RIP: 0010:kasan_byte_accessible+0x15/0x30 [ 38.158125] Code: 03 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 48 b8 00 00 00 00 00 fc0 [ 38.160461] RSP: 0018:ffff88800feef678 EFLAGS: 00010286 [ 38.161220] RAX: dffffc0000000000 RBX: 0000000000000001 RCX: 1ffffffff0dde60c [ 38.162232] RDX: 0000000000000000 RSI: ffffffff85da1e18 RDI: dffffc0000000003 [ 38.163176] RBP: ffff88800feef698 R08: 0000000000000001 R09: 0000000000000000 [ 38.164195] R10: 0000000000000000 R11: ffff888016a8ba58 R12: 0000000000000018 [ 38.165189] R13: 0000000000000018 R14: ffffffff85da1e18 R15: 0000000000000000 [ 38.166100] FS: 0000000000000000(0000) GS:ffff8880e3b40000(0000) knlGS:0000000000000000 [ 38.167137] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 38.167891] CR2: 00007f97fadfe504 CR3: 0000000007088005 CR4: 0000000000770ef0 [ 38.168812] PKRU: 55555554 [ 38.169275] Call Trace: [ 38.169647] [ 38.169975] ? __kasan_check_byte+0x19/0x50 [ 38.170581] lock_acquire+0xea/0x310 [ 38.171083] ? rcu_is_watching+0x19/0xc0 [ 38.171615] ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20 [ 38.172343] ? __sanitizer_cov_trace_const_cmp8+0x1c/0x30 [ 38.173130] _raw_spin_lock+0x38/0x50 [ 38.173707] ? __pte_offset_map_lock+0x1a2/0x3c0 [ 38.174390] __pte_offset_map_lock+0x1a2/0x3c0 [ 38.174987] ? __pfx___pte_offset_map_lock+0x10/0x10 [ 38.175724] ? __pfx_pud_val+0x10/0x10 [ 38.176308] ? __sanitizer_cov_trace_const_cmp1+0x1e/0x30 [ 38.177183] unmap_page_range+0xb60/0x43e0 [ 38.177824] ? __pfx_unmap_page_range+0x10/0x10 [ 38.178485] ? mas_next_slot+0x133a/0x1a50 [ 38.179079] unmap_single_vma.constprop.0+0x15b/0x250 [ 38.179830] unmap_vmas+0x1fa/0x460 [ 38.180373] ? __pfx_unmap_vmas+0x10/0x10 [ 38.180994] ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20 [ 38.181877] exit_mmap+0x1a2/0xb40 [ 38.182396] ? lock_release+0x14f/0x2c0 [ 38.182929] ? __pfx_exit_mmap+0x10/0x10 [ 38.183474] ? __pfx___mutex_unlock_slowpath+0x10/0x10 [ 38.184188] ? mutex_unlock+0x16/0x20 [ 38.184704] mmput+0x132/0x370 [ 38.185208] do_exit+0x7e7/0x28c0 [ 38.185682] ? __this_cpu_preempt_check+0x21/0x30 [ 38.186328] ? do_group_exit+0x1d8/0x2c0 [ 38.186873] ? __pfx_do_exit+0x10/0x10 [ 38.187401] ? __this_cpu_preempt_check+0x21/0x30 [ 38.188036] ? _raw_spin_unlock_irq+0x2c/0x60 [ 38.188634] ? lockdep_hardirqs_on+0x89/0x110 [ 38.189313] do_group_exit+0xe4/0x2c0 [ 38.189831] __x64_sys_exit_group+0x4d/0x60 [ 38.190413] x64_sys_call+0x2174/0x2180 [ 38.190935] do_syscall_64+0x6d/0x2e0 [ 38.191449] entry_SYSCALL_64_after_hwframe+0x76/0x7e This patch moves the vma_start_write() call to precede check_pmd_still_valid(), ensuring that the check is also properly protected by the per-VMA lock. Link: https://lkml.kernel.org/r/20250805035447.7958-1-21cnbao@gmail.com Fixes: a6fde7add78d ("mm: use per_vma lock for MADV_DONTNEED") Signed-off-by: Barry Song Tested-by: "Lai, Yi" Reported-by: "Lai, Yi" Closes: https://lore.kernel.org/all/aJAFrYfyzGpbm+0m@ly-workstation/ Reviewed-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Qi Zheng Cc: Vlastimil Babka Cc: Jann Horn Cc: Suren Baghdasaryan Cc: Lokesh Gidra Cc: Tangquan Zheng Cc: Lance Yang Cc: Zi Yan Cc: Baolin Wang Cc: Liam R. Howlett Cc: Nico Pache Cc: Ryan Roberts Cc: Dev Jain Signed-off-by: Andrew Morton --- mm/khugepaged.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 374a6a5193a7..6b40bdfd224c 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1172,11 +1172,11 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, if (result != SCAN_SUCCEED) goto out_up_write; /* check if the pmd is still valid */ + vma_start_write(vma); result = check_pmd_still_valid(mm, address, pmd); if (result != SCAN_SUCCEED) goto out_up_write; - vma_start_write(vma); anon_vma_lock_write(vma->anon_vma); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, address, From 45d19b4b6c2d422771c29b83462d84afcbb33f01 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Thu, 24 Jul 2025 17:09:56 +0800 Subject: [PATCH 119/279] mm/smaps: fix race between smaps_hugetlb_range and migration smaps_hugetlb_range() handles the pte without holdling ptl, and may be concurrenct with migration, leaing to BUG_ON in pfn_swap_entry_to_page(). The race is as follows. smaps_hugetlb_range migrate_pages huge_ptep_get remove_migration_ptes folio_unlock pfn_swap_entry_folio BUG_ON To fix it, hold ptl lock in smaps_hugetlb_range(). Link: https://lkml.kernel.org/r/20250724090958.455887-1-tujinjiang@huawei.com Link: https://lkml.kernel.org/r/20250724090958.455887-2-tujinjiang@huawei.com Fixes: 25ee01a2fca0 ("mm: hugetlb: proc: add hugetlb-related fields to /proc/PID/smaps") Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand Cc: Andrei Vagin Cc: Andrii Nakryiko Cc: Baolin Wang Cc: Brahmajit Das Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Rientjes Cc: Dev Jain Cc: Hugh Dickins Cc: Joern Engel Cc: Kefeng Wang Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Ryan Roberts Cc: Thiago Jung Bauermann Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3d6d8a9f13fc..55bab10bc779 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1148,10 +1148,13 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; - pte_t ptent = huge_ptep_get(walk->mm, addr, pte); struct folio *folio = NULL; bool present = false; + spinlock_t *ptl; + pte_t ptent; + ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); + ptent = huge_ptep_get(walk->mm, addr, pte); if (pte_present(ptent)) { folio = page_folio(pte_page(ptent)); present = true; @@ -1170,6 +1173,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, else mss->private_hugetlb += huge_page_size(hstate_vma(vma)); } + spin_unlock(ptl); return 0; } #else From aa5a10b070690225317ed4d85413d144abfff750 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Thu, 24 Jul 2025 17:09:57 +0800 Subject: [PATCH 120/279] fs/proc/task_mmu: hold PTL in pagemap_hugetlb_range and gather_hugetlb_stats Hold PTL in pagemap_hugetlb_range() and gather_hugetlb_stats() to avoid operating on stale page, as pagemap_pmd_range() and gather_pte_stats() have done. Link: https://lkml.kernel.org/r/20250724090958.455887-3-tujinjiang@huawei.com Signed-off-by: Jinjiang Tu Acked-by: David Hildenbrand Cc: Andrei Vagin Cc: Andrii Nakryiko Cc: Baolin Wang Cc: Brahmajit Das Cc: Catalin Marinas Cc: Christophe Leroy Cc: David Rientjes Cc: Dev Jain Cc: Hugh Dickins Cc: Joern Engel Cc: Kefeng Wang Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Ryan Roberts Cc: Thiago Jung Bauermann Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 55bab10bc779..ee1e4ccd33bd 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2021,12 +2021,14 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, struct pagemapread *pm = walk->private; struct vm_area_struct *vma = walk->vma; u64 flags = 0, frame = 0; + spinlock_t *ptl; int err = 0; pte_t pte; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; + ptl = huge_pte_lock(hstate_vma(vma), walk->mm, ptep); pte = huge_ptep_get(walk->mm, addr, ptep); if (pte_present(pte)) { struct folio *folio = page_folio(pte_page(pte)); @@ -2054,11 +2056,12 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, err = add_to_pagemap(&pme, pm); if (err) - return err; + break; if (pm->show_pfn && (flags & PM_PRESENT)) frame++; } + spin_unlock(ptl); cond_resched(); return err; @@ -3132,17 +3135,22 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { - pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte); + pte_t huge_pte; struct numa_maps *md; struct page *page; + spinlock_t *ptl; + ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); + huge_pte = huge_ptep_get(walk->mm, addr, pte); if (!pte_present(huge_pte)) - return 0; + goto out; page = pte_page(huge_pte); md = walk->private; gather_stats(page, md, pte_dirty(huge_pte), 1); +out: + spin_unlock(ptl); return 0; } From e144d53cf21fb9d02626c669533788c6bdc61ce3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Jul 2025 12:43:41 -0700 Subject: [PATCH 121/279] NFS/localio: nfs_close_local_fh() fix check for file closed If the struct nfs_file_localio is closed, its list entry will be empty, but the nfs_uuid->files list might still contain other entries. Acked-by: Mike Snitzer Tested-by: Mike Snitzer Reviewed-by: NeilBrown Fixes: 21fb44034695 ("nfs_localio: protect race between nfs_uuid_put() and nfs_close_local_fh()") Signed-off-by: Trond Myklebust --- fs/nfs_common/nfslocalio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 05c7c16e37ab..64949c46c174 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -314,7 +314,7 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl) rcu_read_unlock(); return; } - if (list_empty(&nfs_uuid->files)) { + if (list_empty(&nfl->list)) { /* nfs_uuid_put() has started closing files, wait for it * to finished */ From fdd015de767977f21892329af5e12276eb80375f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Jul 2025 12:49:00 -0700 Subject: [PATCH 122/279] NFS/localio: nfs_uuid_put() fix races with nfs_open/close_local_fh() In order for the wait in nfs_uuid_put() to be safe, it is necessary to ensure that nfs_uuid_add_file() doesn't add a new entry once the nfs_uuid->net has been NULLed out. Also fix up the wake_up_var_locked() / wait_var_event_spinlock() to both use the nfs_uuid address, since nfl, and &nfl->uuid could be used elsewhere. Acked-by: Mike Snitzer Tested-by: Mike Snitzer Link: https://lore.kernel.org/all/175262893035.2234665.1735173020338594784@noble.neil.brown.name/ Fixes: 21fb44034695 ("nfs_localio: protect race between nfs_uuid_put() and nfs_close_local_fh()") Signed-off-by: Trond Myklebust --- fs/nfs_common/nfslocalio.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 64949c46c174..f1f1592ac134 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -177,7 +177,7 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) /* nfs_close_local_fh() is doing the * close and we must wait. until it unlinks */ - wait_var_event_spinlock(nfl, + wait_var_event_spinlock(nfs_uuid, list_first_entry_or_null( &nfs_uuid->files, struct nfs_file_localio, @@ -243,15 +243,20 @@ void nfs_localio_invalidate_clients(struct list_head *nn_local_clients, } EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients); -static void nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl) +static int nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl) { + int ret = 0; + /* Add nfl to nfs_uuid->files if it isn't already */ spin_lock(&nfs_uuid->lock); - if (list_empty(&nfl->list)) { + if (rcu_access_pointer(nfs_uuid->net) == NULL) { + ret = -ENXIO; + } else if (list_empty(&nfl->list)) { rcu_assign_pointer(nfl->nfs_uuid, nfs_uuid); list_add_tail(&nfl->list, &nfs_uuid->files); } spin_unlock(&nfs_uuid->lock); + return ret; } /* @@ -285,11 +290,13 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, } rcu_read_unlock(); /* We have an implied reference to net thanks to nfsd_net_try_get */ - localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt, - cred, nfs_fh, pnf, fmode); + localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt, cred, + nfs_fh, pnf, fmode); + if (!IS_ERR(localio) && nfs_uuid_add_file(uuid, nfl) < 0) { + /* Delete the cached file when racing with nfs_uuid_put() */ + nfs_to_nfsd_file_put_local(pnf); + } nfs_to_nfsd_net_put(net); - if (!IS_ERR(localio)) - nfs_uuid_add_file(uuid, nfl); return localio; } @@ -338,7 +345,7 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl) */ spin_lock(&nfs_uuid->lock); list_del_init(&nfl->list); - wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock); + wake_up_var_locked(nfs_uuid, &nfs_uuid->lock); spin_unlock(&nfs_uuid->lock); } EXPORT_SYMBOL_GPL(nfs_close_local_fh); From 4ec752ce6debd5a0e7e0febf6bcf780ccda6ab5e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Jul 2025 11:29:51 -0700 Subject: [PATCH 123/279] NFS/localio: nfs_uuid_put() fix the wake up after unlinking the file Use store_release_wake_up() instead of wake_up_var_locked(), because the waiter cannot retake the nfs_uuid->lock. Acked-by: Mike Snitzer Tested-by: Mike Snitzer Suggested-by: NeilBrown Link: https://lore.kernel.org/all/175262948827.2234665.1891349021754495573@noble.neil.brown.name/ Fixes: 21fb44034695 ("nfs_localio: protect race between nfs_uuid_put() and nfs_close_local_fh()") Signed-off-by: Trond Myklebust --- fs/nfs_common/nfslocalio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index f1f1592ac134..dd715cdb6c04 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -198,8 +198,7 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) /* Now we can allow racing nfs_close_local_fh() to * skip the locking. */ - RCU_INIT_POINTER(nfl->nfs_uuid, NULL); - wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock); + store_release_wake_up(&nfl->nfs_uuid, RCU_INITIALIZER(NULL)); } /* Remove client from nn->local_clients */ From 8cbe564974248ee980562be02f2b1912769562c7 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 6 Aug 2025 01:41:53 +0200 Subject: [PATCH 124/279] ALSA: intel_hdmi: Fix off-by-one error in __hdmi_lpe_audio_probe() In __hdmi_lpe_audio_probe(), strscpy() is incorrectly called with the length of the source string (excluding the NUL terminator) rather than the size of the destination buffer. This results in one character less being copied from 'card->shortname' to 'pcm->name'. Use the destination buffer size instead to ensure the card name is copied correctly. Cc: stable@vger.kernel.org Fixes: 75b1a8f9d62e ("ALSA: Convert strlcpy to strscpy when return value is unused") Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20250805234156.60294-1-thorsten.blum@linux.dev Signed-off-by: Takashi Iwai --- sound/x86/intel_hdmi_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index cc54539c6030..01f49555c5f6 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1765,7 +1765,7 @@ static int __hdmi_lpe_audio_probe(struct platform_device *pdev) /* setup private data which can be retrieved when required */ pcm->private_data = ctx; pcm->info_flags = 0; - strscpy(pcm->name, card->shortname, strlen(card->shortname)); + strscpy(pcm->name, card->shortname, sizeof(pcm->name)); /* setup the ops for playback */ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops); From cac5f2af13459f6258c4857d2e61ea53d0dfd751 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Tue, 5 Aug 2025 15:09:45 +0800 Subject: [PATCH 125/279] ALSA: hda/tas2781: Support L"SmartAmpCalibrationData" to save calibrated data Some devices save the calibrated data into L"CALI_DATA", and others into L"SmartAmpCalibrationData". Driver code will support both. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20250805070945.524-1-shenghao-ding@ti.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/side-codecs/tas2781_hda.c | 47 +++++++++++++++------- sound/hda/codecs/side-codecs/tas2781_hda.h | 2 +- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/sound/hda/codecs/side-codecs/tas2781_hda.c b/sound/hda/codecs/side-codecs/tas2781_hda.c index 34217ce9f28e..f46d2e06c64f 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda.c +++ b/sound/hda/codecs/side-codecs/tas2781_hda.c @@ -18,6 +18,8 @@ #include "tas2781_hda.h" +#define CALIBRATION_DATA_AREA_NUM 2 + const efi_guid_t tasdev_fct_efi_guid[] = { /* DELL */ EFI_GUID(0xcc92382d, 0x6337, 0x41cb, 0xa8, 0x8b, 0x8e, 0xce, 0x74, @@ -160,36 +162,51 @@ int tas2781_save_calibration(struct tas2781_hda *hda) * manufactory. */ efi_guid_t efi_guid = tasdev_fct_efi_guid[LENOVO]; - static efi_char16_t efi_name[] = TASDEVICE_CALIBRATION_DATA_NAME; + /* + * Some devices save the calibrated data into L"CALI_DATA", + * and others into L"SmartAmpCalibrationData". + */ + static efi_char16_t *efi_name[CALIBRATION_DATA_AREA_NUM] = { + L"CALI_DATA", + L"SmartAmpCalibrationData", + }; struct tasdevice_priv *p = hda->priv; struct calidata *cali_data = &p->cali_data; unsigned long total_sz = 0; unsigned int attr, size; unsigned char *data; efi_status_t status; + int i; if (hda->catlog_id < LENOVO) efi_guid = tasdev_fct_efi_guid[hda->catlog_id]; cali_data->cali_dat_sz_per_dev = 20; size = p->ndev * (cali_data->cali_dat_sz_per_dev + 1); - /* Get real size of UEFI variable */ - status = efi.get_variable(efi_name, &efi_guid, &attr, &total_sz, NULL); - cali_data->total_sz = total_sz > size ? total_sz : size; - if (status == EFI_BUFFER_TOO_SMALL) { - /* Allocate data buffer of data_size bytes */ - data = p->cali_data.data = devm_kzalloc(p->dev, - p->cali_data.total_sz, GFP_KERNEL); - if (!data) { - p->cali_data.total_sz = 0; - return -ENOMEM; + for (i = 0; i < CALIBRATION_DATA_AREA_NUM; i++) { + /* Get real size of UEFI variable */ + status = efi.get_variable(efi_name[i], &efi_guid, &attr, + &total_sz, NULL); + cali_data->total_sz = total_sz > size ? total_sz : size; + if (status == EFI_BUFFER_TOO_SMALL) { + /* Allocate data buffer of data_size bytes */ + data = cali_data->data = devm_kzalloc(p->dev, + cali_data->total_sz, GFP_KERNEL); + if (!data) { + status = -ENOMEM; + continue; + } + /* Get variable contents into buffer */ + status = efi.get_variable(efi_name[i], &efi_guid, + &attr, &cali_data->total_sz, data); } - /* Get variable contents into buffer */ - status = efi.get_variable(efi_name, &efi_guid, &attr, - &p->cali_data.total_sz, data); + /* Check whether get the calibrated data */ + if (status == EFI_SUCCESS) + break; } + if (status != EFI_SUCCESS) { - p->cali_data.total_sz = 0; + cali_data->total_sz = 0; return status; } diff --git a/sound/hda/codecs/side-codecs/tas2781_hda.h b/sound/hda/codecs/side-codecs/tas2781_hda.h index 575a701c8dfb..66188909a0bb 100644 --- a/sound/hda/codecs/side-codecs/tas2781_hda.h +++ b/sound/hda/codecs/side-codecs/tas2781_hda.h @@ -11,7 +11,7 @@ /* Flag of calibration registers address. */ #define TASDEV_UEFI_CALI_REG_ADDR_FLG BIT(7) -#define TASDEVICE_CALIBRATION_DATA_NAME L"CALI_DATA" + #define TASDEV_CALIB_N 5 /* From 42e42562c9cfcdacf000f1b42284a4fad24f8546 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Aug 2025 11:05:43 +0200 Subject: [PATCH 126/279] xfrm: flush all states in xfrm_state_fini While reverting commit f75a2804da39 ("xfrm: destroy xfrm_state synchronously on net exit path"), I incorrectly changed xfrm_state_flush's "proto" argument back to IPSEC_PROTO_ANY. This reverts some of the changes in commit dbb2483b2a46 ("xfrm: clean up xfrm protocol checks"), and leads to some states not being removed when we exit the netns. Pass 0 instead of IPSEC_PROTO_ANY from both xfrm_state_fini xfrm6_tunnel_net_exit, so that xfrm_state_flush deletes all states. Fixes: 2a198bbec691 ("Revert "xfrm: destroy xfrm_state synchronously on net exit path"") Reported-by: syzbot+6641a61fe0e2e89ae8c5@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6641a61fe0e2e89ae8c5 Tested-by: syzbot+6641a61fe0e2e89ae8c5@syzkaller.appspotmail.com Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_tunnel.c | 2 +- net/xfrm/xfrm_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 5120a763da0d..0a0eeaed0591 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -334,7 +334,7 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); unsigned int i; - xfrm_state_flush(net, IPSEC_PROTO_ANY, false); + xfrm_state_flush(net, 0, false); xfrm_flush_gc(); for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 77db3b5fe4ac..78fcbb89cf32 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -3297,7 +3297,7 @@ void xfrm_state_fini(struct net *net) unsigned int sz; flush_work(&net->xfrm.state_hash_work); - xfrm_state_flush(net, IPSEC_PROTO_ANY, false); + xfrm_state_flush(net, 0, false); flush_work(&xfrm_state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); From 5b65258229117995eb6c4bd74995e15fb5f2cfe3 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 5 Aug 2025 11:32:20 -0700 Subject: [PATCH 127/279] genirq/test: Resolve irq lock inversion warnings irq_shutdown_and_deactivate() is normally called with the descriptor lock held, and interrupts disabled. Nested a few levels down, it grabs the global irq_resend_lock. Lockdep rightfully complains when interrupts are not disabled: CPU0 CPU1 ---- ---- lock(irq_resend_lock); local_irq_disable(); lock(&irq_desc_lock_class); lock(irq_resend_lock); lock(&irq_desc_lock_class); ... _raw_spin_lock+0x2b/0x40 clear_irq_resend+0x14/0x70 irq_shutdown_and_deactivate+0x29/0x80 irq_shutdown_depth_test+0x1ce/0x600 kunit_try_run_case+0x90/0x120 Grab the descriptor lock and disable interrupts, to resolve the problem. Fixes: 66067c3c8a1e ("genirq: Add kunit tests for depth counts") Reported-by: Guenter Roeck Signed-off-by: Brian Norris Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Link: https://lore.kernel.org/all/aJJONEIoIiTSDMqc@google.com Closes: https://lore.kernel.org/lkml/31a761e4-8f81-40cf-aaf5-d220ba11911c@roeck-us.net/ --- kernel/irq/irq_test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/irq/irq_test.c b/kernel/irq/irq_test.c index 5161b56a12f9..a75abebed7f2 100644 --- a/kernel/irq/irq_test.c +++ b/kernel/irq/irq_test.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1+ +#include #include #include #include @@ -134,7 +135,8 @@ static void irq_shutdown_depth_test(struct kunit *test) disable_irq(virq); KUNIT_EXPECT_EQ(test, desc->depth, 1); - irq_shutdown_and_deactivate(desc); + scoped_guard(raw_spinlock_irqsave, &desc->lock) + irq_shutdown_and_deactivate(desc); KUNIT_EXPECT_FALSE(test, irqd_is_activated(data)); KUNIT_EXPECT_FALSE(test, irqd_is_started(data)); From 3b6a18f0da8720d612d8a682ea5c55870da068e0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Aug 2025 18:09:49 +0200 Subject: [PATCH 128/279] irqchip: Build IMX_MU_MSI only on ARM Compile-testing IMX_MU_MSI on x86 without PCI_MSI support results in a build failure: drivers/gpio/gpio-sprd.c:8: include/linux/gpio/driver.h:41:33: error: field 'msiinfo' has incomplete type drivers/iommu/iommufd/viommu.c:4: include/linux/msi.h:528:33: error: field 'alloc_info' has incomplete type Tighten the dependency further to only allow compile testing on Arm. This could be refined further to allow certain x86 configs. This was submitted before to address a different build failure, which was fixed differently, but the problem has now returned in a different form. Fixes: 70afdab904d2d1e6 ("irqchip: Add IMX MU MSI controller driver") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250805160952.4006075-1-arnd@kernel.org Link: https://lore.kernel.org/all/20221215164109.761427-1-arnd@kernel.org/ --- drivers/irqchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 39a6ae1d574b..6d12c6ab9ea4 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -554,6 +554,7 @@ config IMX_MU_MSI tristate "i.MX MU used as MSI controller" depends on OF && HAS_IOMEM depends on ARCH_MXC || COMPILE_TEST + depends on ARM || ARM64 default m if ARCH_MXC select IRQ_DOMAIN select IRQ_DOMAIN_HIERARCHY From 614d416dd8aee2675fb591c598308a901a660db8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Aug 2025 18:04:25 +0200 Subject: [PATCH 129/279] ASoC: SOF: Intel: hda-sdw-bpt: fix SND_SOF_SOF_HDA_SDW_BPT dependencies The hda-sdw-bpt code links against the soundwire driver, but that fails when trying to link from built-in code into loadable module: x86_64-linux-ld: vmlinux.o: in function `intel_ace2x_bpt_close_stream.isra.0': intel_ace2x.c:(.text+0x137a531): undefined reference to `hda_sdw_bpt_close' x86_64-linux-ld: vmlinux.o: in function `intel_ace2x_bpt_send_async': intel_ace2x.c:(.text+0x137aa45): undefined reference to `hda_sdw_bpt_open' x86_64-linux-ld: intel_ace2x.c:(.text+0x137ab67): undefined reference to `hda_sdw_bpt_close' x86_64-linux-ld: intel_ace2x.c:(.text+0x137ac30): undefined reference to `hda_sdw_bpt_send_async' x86_64-linux-ld: vmlinux.o: in function `intel_ace2x_bpt_wait': intel_ace2x.c:(.text+0x137aced): undefined reference to `hda_sdw_bpt_wait' Ensure that both SOUNDWIRE_INTEL and SND_SOF_SOF_HDA_SDW_BPT are selected at the same time by SND_SOC_SOF_INTEL_LNL, and that this happens even if SND_SOC_SOF_INTEL_SOUNDWIRE is a loadable module but SND_SOC_SOF_INTEL_LNL is built-in. This follows the same logic as commit c5a61db9bf89 ("ASoC: SOF: fix intel-soundwire link failure"). Fixes: 5d5cb86fb46e ("ASoC: SOF: Intel: hda-sdw-bpt: add helpers for SoundWire BPT DMA") Signed-off-by: Arnd Bergmann Reviewed-by: Bard Liao Link: https://patch.msgid.link/20250805160451.4004602-1-arnd@kernel.org Signed-off-by: Mark Brown --- sound/soc/sof/intel/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index dc1d21de4ab7..4f27f8c8debf 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -266,9 +266,10 @@ config SND_SOC_SOF_METEORLAKE config SND_SOC_SOF_INTEL_LNL tristate + select SOUNDWIRE_INTEL if SND_SOC_SOF_INTEL_SOUNDWIRE != n select SND_SOC_SOF_HDA_GENERIC select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE - select SND_SOF_SOF_HDA_SDW_BPT if SND_SOC_SOF_INTEL_SOUNDWIRE + select SND_SOF_SOF_HDA_SDW_BPT if SND_SOC_SOF_INTEL_SOUNDWIRE != n select SND_SOC_SOF_IPC4 select SND_SOC_SOF_INTEL_MTL From 9f320dfb0ffc555aa2eac8331dee0c2c16f67633 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Aug 2025 11:44:22 +0200 Subject: [PATCH 130/279] ALSA: hda/ca0132: Fix missing error handling in ca0132_alt_select_out() There are a couple of cases where the error is ignored or the error code isn't propagated in ca0132_alt_select_out(). Fix those. Fixes: def3f0a5c700 ("ALSA: hda/ca0132 - Add quirk output selection structures.") Link: https://patch.msgid.link/20250806094423.8843-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/hda/codecs/ca0132.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/hda/codecs/ca0132.c b/sound/hda/codecs/ca0132.c index b716f721f25d..b7d456e16c93 100644 --- a/sound/hda/codecs/ca0132.c +++ b/sound/hda/codecs/ca0132.c @@ -4802,7 +4802,8 @@ static int ca0132_alt_select_out(struct hda_codec *codec) if (err < 0) goto exit; - if (ca0132_alt_select_out_quirk_set(codec) < 0) + err = ca0132_alt_select_out_quirk_set(codec); + if (err < 0) goto exit; switch (spec->cur_out_type) { @@ -4892,6 +4893,8 @@ static int ca0132_alt_select_out(struct hda_codec *codec) spec->bass_redirection_val); else err = ca0132_alt_surround_set_bass_redirection(codec, 0); + if (err < 0) + goto exit; /* Unmute DSP now that we're done with output selection. */ err = dspio_set_uint_param(codec, 0x96, From bee47cb026e762841f3faece47b51f985e215edb Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 29 Jul 2025 12:40:20 -0400 Subject: [PATCH 131/279] sunrpc: fix handling of server side tls alerts Scott Mayhew discovered a security exploit in NFS over TLS in tls_alert_recv() due to its assumption it can read data from the msg iterator's kvec.. kTLS implementation splits TLS non-data record payload between the control message buffer (which includes the type such as TLS aler or TLS cipher change) and the rest of the payload (say TLS alert's level/description) which goes into the msg payload buffer. This patch proposes to rework how control messages are setup and used by sock_recvmsg(). If no control message structure is setup, kTLS layer will read and process TLS data record types. As soon as it encounters a TLS control message, it would return an error. At that point, NFS can setup a kvec backed msg buffer and read in the control message such as a TLS alert. Msg iterator can advance the kvec pointer as a part of the copy process thus we need to revert the iterator before calling into the tls_alert_recv. Reported-by: Scott Mayhew Fixes: 5e052dda121e ("SUNRPC: Recognize control messages in server-side TCP socket code") Suggested-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 46c156b121db..e2c5e0e626f9 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -257,20 +257,47 @@ svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg, } static int -svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg) +svc_tcp_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags) { union { struct cmsghdr cmsg; u8 buf[CMSG_SPACE(sizeof(u8))]; } u; - struct socket *sock = svsk->sk_sock; + u8 alert[2]; + struct kvec alert_kvec = { + .iov_base = alert, + .iov_len = sizeof(alert), + }; + struct msghdr msg = { + .msg_flags = *msg_flags, + .msg_control = &u, + .msg_controllen = sizeof(u), + }; int ret; - msg->msg_control = &u; - msg->msg_controllen = sizeof(u); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, MSG_DONTWAIT); + if (ret > 0 && + tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { + iov_iter_revert(&msg.msg_iter, ret); + ret = svc_tcp_sock_process_cmsg(sock, &msg, &u.cmsg, -EAGAIN); + } + return ret; +} + +static int +svc_tcp_sock_recvmsg(struct svc_sock *svsk, struct msghdr *msg) +{ + int ret; + struct socket *sock = svsk->sk_sock; + ret = sock_recvmsg(sock, msg, MSG_DONTWAIT); - if (unlikely(msg->msg_controllen != sizeof(u))) - ret = svc_tcp_sock_process_cmsg(sock, msg, &u.cmsg, ret); + if (msg->msg_flags & MSG_CTRUNC) { + msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR); + if (ret == 0 || ret == -EIO) + ret = svc_tcp_sock_recv_cmsg(sock, &msg->msg_flags); + } return ret; } @@ -321,7 +348,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, iov_iter_advance(&msg.msg_iter, seek); buflen -= seek; } - len = svc_tcp_sock_recv_cmsg(svsk, &msg); + len = svc_tcp_sock_recvmsg(svsk, &msg); if (len > 0) svc_flush_bvec(bvec, len, seek); @@ -1018,7 +1045,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want); - len = svc_tcp_sock_recv_cmsg(svsk, &msg); + len = svc_tcp_sock_recvmsg(svsk, &msg); if (len < 0) return len; svsk->sk_tcplen += len; From 9f7488f24c7571d349d938061e0ede7a39b65d6b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Aug 2025 16:53:18 +0200 Subject: [PATCH 132/279] irqchip/mvebu-gicp: Use resource_size() for ioremap() 0-day reported an off by one in the ioremap() sizing: drivers/irqchip/irq-mvebu-gicp.c:240:45-48: WARNING: Suspicious code. resource_size is maybe missing with gicp -> res Convert it to resource_size(), which does the right thing. Fixes: 3c3d7dbab2c7 ("irqchip/mvebu-gicp: Clear pending interrupts on init") Reported-by: kernel test robot Signed-off-by: Thomas Gleixner Closes: https://lore.kernel.org/oe-kbuild-all/202508062150.mtFQMTXc-lkp@intel.com/ --- drivers/irqchip/irq-mvebu-gicp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c index fd85c845e015..54833717f8a7 100644 --- a/drivers/irqchip/irq-mvebu-gicp.c +++ b/drivers/irqchip/irq-mvebu-gicp.c @@ -237,7 +237,7 @@ static int mvebu_gicp_probe(struct platform_device *pdev) return -ENODEV; } - base = ioremap(gicp->res->start, gicp->res->end - gicp->res->start); + base = ioremap(gicp->res->start, resource_size(gicp->res)); if (IS_ERR(base)) { dev_err(&pdev->dev, "ioremap() failed. Unable to clear pending interrupts.\n"); } else { From 1cdd5a2626d8c9eb059c6b93a628413da833df95 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 5 Aug 2025 17:56:33 -0500 Subject: [PATCH 133/279] cifs: Move the SMB1 transport code out of transport.c Shrink the size of cifs.ko when SMB1 is not enabled in the config by moving the SMB1 transport code to different file. Signed-off-by: David Howells cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/Makefile | 2 +- fs/smb/client/cifsproto.h | 15 + fs/smb/client/cifstransport.c | 566 ++++++++++++++++++++++++++++++++++ fs/smb/client/transport.c | 558 +-------------------------------- 4 files changed, 588 insertions(+), 553 deletions(-) create mode 100644 fs/smb/client/cifstransport.c diff --git a/fs/smb/client/Makefile b/fs/smb/client/Makefile index 22023e30915b..4c97b31a25c2 100644 --- a/fs/smb/client/Makefile +++ b/fs/smb/client/Makefile @@ -32,6 +32,6 @@ cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o cifs-$(CONFIG_CIFS_ROOT) += cifsroot.o -cifs-$(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) += smb1ops.o cifssmb.o +cifs-$(CONFIG_CIFS_ALLOW_INSECURE_LEGACY) += smb1ops.o cifssmb.o cifstransport.o cifs-$(CONFIG_CIFS_COMPRESSION) += compress.o compress/lz77.o diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 40ec0634377f..c34c533b2efa 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -116,16 +116,31 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, int * /* bytes returned */ , const int); extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, char *in_buf, int flags); +int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server); extern struct mid_q_entry *cifs_setup_request(struct cifs_ses *, struct TCP_Server_Info *, struct smb_rqst *); extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *, struct smb_rqst *); +int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, + struct smb_rqst *rqst); extern int cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, bool log_error); +int wait_for_free_request(struct TCP_Server_Info *server, const int flags, + unsigned int *instance); extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, size_t *num, struct cifs_credits *credits); + +static inline int +send_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst, + struct mid_q_entry *mid) +{ + return server->ops->send_cancel ? + server->ops->send_cancel(server, rqst, mid) : 0; +} + +int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ); extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, struct kvec *, int /* nvec to send */, int * /* type of buf returned */, const int flags, diff --git a/fs/smb/client/cifstransport.c b/fs/smb/client/cifstransport.c new file mode 100644 index 000000000000..352dafb888dd --- /dev/null +++ b/fs/smb/client/cifstransport.c @@ -0,0 +1,566 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * + * Copyright (C) International Business Machines Corp., 2002,2008 + * Author(s): Steve French (sfrench@us.ibm.com) + * Jeremy Allison (jra@samba.org) 2006. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cifspdu.h" +#include "cifsglob.h" +#include "cifsproto.h" +#include "cifs_debug.h" +#include "smb2proto.h" +#include "smbdirect.h" +#include "compress.h" + +/* Max number of iovectors we can use off the stack when sending requests. */ +#define CIFS_MAX_IOV_SIZE 8 + +static struct mid_q_entry * +alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) +{ + struct mid_q_entry *temp; + + if (server == NULL) { + cifs_dbg(VFS, "%s: null TCP session\n", __func__); + return NULL; + } + + temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); + memset(temp, 0, sizeof(struct mid_q_entry)); + kref_init(&temp->refcount); + temp->mid = get_mid(smb_buffer); + temp->pid = current->pid; + temp->command = cpu_to_le16(smb_buffer->Command); + cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command); + /* easier to use jiffies */ + /* when mid allocated can be before when sent */ + temp->when_alloc = jiffies; + temp->server = server; + + /* + * The default is for the mid to be synchronous, so the + * default callback just wakes up the current task. + */ + get_task_struct(current); + temp->creator = current; + temp->callback = cifs_wake_up_task; + temp->callback_data = current; + + atomic_inc(&mid_count); + temp->mid_state = MID_REQUEST_ALLOCATED; + return temp; +} + +int +smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, + unsigned int smb_buf_length) +{ + struct kvec iov[2]; + struct smb_rqst rqst = { .rq_iov = iov, + .rq_nvec = 2 }; + + iov[0].iov_base = smb_buffer; + iov[0].iov_len = 4; + iov[1].iov_base = (char *)smb_buffer + 4; + iov[1].iov_len = smb_buf_length; + + return __smb_send_rqst(server, 1, &rqst); +} + +static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, + struct mid_q_entry **ppmidQ) +{ + spin_lock(&ses->ses_lock); + if (ses->ses_status == SES_NEW) { + if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && + (in_buf->Command != SMB_COM_NEGOTIATE)) { + spin_unlock(&ses->ses_lock); + return -EAGAIN; + } + /* else ok - we are setting up session */ + } + + if (ses->ses_status == SES_EXITING) { + /* check if SMB session is bad because we are setting it up */ + if (in_buf->Command != SMB_COM_LOGOFF_ANDX) { + spin_unlock(&ses->ses_lock); + return -EAGAIN; + } + /* else ok - we are shutting down session */ + } + spin_unlock(&ses->ses_lock); + + *ppmidQ = alloc_mid(in_buf, ses->server); + if (*ppmidQ == NULL) + return -ENOMEM; + spin_lock(&ses->server->mid_queue_lock); + list_add_tail(&(*ppmidQ)->qhead, &ses->server->pending_mid_q); + spin_unlock(&ses->server->mid_queue_lock); + return 0; +} + +struct mid_q_entry * +cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) +{ + int rc; + struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; + struct mid_q_entry *mid; + + if (rqst->rq_iov[0].iov_len != 4 || + rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base) + return ERR_PTR(-EIO); + + /* enable signing if server requires it */ + if (server->sign) + hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; + + mid = alloc_mid(hdr, server); + if (mid == NULL) + return ERR_PTR(-ENOMEM); + + rc = cifs_sign_rqst(rqst, server, &mid->sequence_number); + if (rc) { + release_mid(mid); + return ERR_PTR(rc); + } + + return mid; +} + +/* + * + * Send an SMB Request. No response info (other than return code) + * needs to be parsed. + * + * flags indicate the type of request buffer and how long to wait + * and whether to log NT STATUS code (error) before mapping it to POSIX error + * + */ +int +SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, + char *in_buf, int flags) +{ + int rc; + struct kvec iov[1]; + struct kvec rsp_iov; + int resp_buf_type; + + iov[0].iov_base = in_buf; + iov[0].iov_len = get_rfc1002_length(in_buf) + 4; + flags |= CIFS_NO_RSP_BUF; + rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); + cifs_dbg(NOISY, "SendRcvNoRsp flags %d rc %d\n", flags, rc); + + return rc; +} + +int +cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, + bool log_error) +{ + unsigned int len = get_rfc1002_length(mid->resp_buf) + 4; + + dump_smb(mid->resp_buf, min_t(u32, 92, len)); + + /* convert the length into a more usable form */ + if (server->sign) { + struct kvec iov[2]; + int rc = 0; + struct smb_rqst rqst = { .rq_iov = iov, + .rq_nvec = 2 }; + + iov[0].iov_base = mid->resp_buf; + iov[0].iov_len = 4; + iov[1].iov_base = (char *)mid->resp_buf + 4; + iov[1].iov_len = len - 4; + /* FIXME: add code to kill session */ + rc = cifs_verify_signature(&rqst, server, + mid->sequence_number); + if (rc) + cifs_server_dbg(VFS, "SMB signature verification returned error = %d\n", + rc); + } + + /* BB special case reconnect tid and uid here? */ + return map_and_check_smb_error(mid, log_error); +} + +struct mid_q_entry * +cifs_setup_request(struct cifs_ses *ses, struct TCP_Server_Info *ignored, + struct smb_rqst *rqst) +{ + int rc; + struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; + struct mid_q_entry *mid; + + if (rqst->rq_iov[0].iov_len != 4 || + rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base) + return ERR_PTR(-EIO); + + rc = allocate_mid(ses, hdr, &mid); + if (rc) + return ERR_PTR(rc); + rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number); + if (rc) { + delete_mid(mid); + return ERR_PTR(rc); + } + return mid; +} + +int +SendReceive2(const unsigned int xid, struct cifs_ses *ses, + struct kvec *iov, int n_vec, int *resp_buf_type /* ret */, + const int flags, struct kvec *resp_iov) +{ + struct smb_rqst rqst; + struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov; + int rc; + + if (n_vec + 1 > CIFS_MAX_IOV_SIZE) { + new_iov = kmalloc_array(n_vec + 1, sizeof(struct kvec), + GFP_KERNEL); + if (!new_iov) { + /* otherwise cifs_send_recv below sets resp_buf_type */ + *resp_buf_type = CIFS_NO_BUFFER; + return -ENOMEM; + } + } else + new_iov = s_iov; + + /* 1st iov is a RFC1001 length followed by the rest of the packet */ + memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec)); + + new_iov[0].iov_base = new_iov[1].iov_base; + new_iov[0].iov_len = 4; + new_iov[1].iov_base += 4; + new_iov[1].iov_len -= 4; + + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = new_iov; + rqst.rq_nvec = n_vec + 1; + + rc = cifs_send_recv(xid, ses, ses->server, + &rqst, resp_buf_type, flags, resp_iov); + if (n_vec + 1 > CIFS_MAX_IOV_SIZE) + kfree(new_iov); + return rc; +} + +int +SendReceive(const unsigned int xid, struct cifs_ses *ses, + struct smb_hdr *in_buf, struct smb_hdr *out_buf, + int *pbytes_returned, const int flags) +{ + int rc = 0; + struct mid_q_entry *midQ; + unsigned int len = be32_to_cpu(in_buf->smb_buf_length); + struct kvec iov = { .iov_base = in_buf, .iov_len = len }; + struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; + struct cifs_credits credits = { .value = 1, .instance = 0 }; + struct TCP_Server_Info *server; + + if (ses == NULL) { + cifs_dbg(VFS, "Null smb session\n"); + return -EIO; + } + server = ses->server; + if (server == NULL) { + cifs_dbg(VFS, "Null tcp session\n"); + return -EIO; + } + + spin_lock(&server->srv_lock); + if (server->tcpStatus == CifsExiting) { + spin_unlock(&server->srv_lock); + return -ENOENT; + } + spin_unlock(&server->srv_lock); + + /* Ensure that we do not send more than 50 overlapping requests + to the same server. We may make this configurable later or + use ses->maxReq */ + + if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { + cifs_server_dbg(VFS, "Invalid length, greater than maximum frame, %d\n", + len); + return -EIO; + } + + rc = wait_for_free_request(server, flags, &credits.instance); + if (rc) + return rc; + + /* make sure that we sign in the same order that we send on this socket + and avoid races inside tcp sendmsg code that could cause corruption + of smb data */ + + cifs_server_lock(server); + + rc = allocate_mid(ses, in_buf, &midQ); + if (rc) { + cifs_server_unlock(server); + /* Update # of requests on wire to server */ + add_credits(server, &credits, 0); + return rc; + } + + rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number); + if (rc) { + cifs_server_unlock(server); + goto out; + } + + midQ->mid_state = MID_REQUEST_SUBMITTED; + + rc = smb_send(server, in_buf, len); + cifs_save_when_sent(midQ); + + if (rc < 0) + server->sequence_number -= 2; + + cifs_server_unlock(server); + + if (rc < 0) + goto out; + + rc = wait_for_response(server, midQ); + if (rc != 0) { + send_cancel(server, &rqst, midQ); + spin_lock(&server->mid_queue_lock); + if (midQ->mid_state == MID_REQUEST_SUBMITTED || + midQ->mid_state == MID_RESPONSE_RECEIVED) { + /* no longer considered to be "in-flight" */ + midQ->callback = release_mid; + spin_unlock(&server->mid_queue_lock); + add_credits(server, &credits, 0); + return rc; + } + spin_unlock(&server->mid_queue_lock); + } + + rc = cifs_sync_mid_result(midQ, server); + if (rc != 0) { + add_credits(server, &credits, 0); + return rc; + } + + if (!midQ->resp_buf || !out_buf || + midQ->mid_state != MID_RESPONSE_READY) { + rc = -EIO; + cifs_server_dbg(VFS, "Bad MID state?\n"); + goto out; + } + + *pbytes_returned = get_rfc1002_length(midQ->resp_buf); + memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); + rc = cifs_check_receive(midQ, server, 0); +out: + delete_mid(midQ); + add_credits(server, &credits, 0); + + return rc; +} + +/* We send a LOCKINGX_CANCEL_LOCK to cause the Windows + blocking lock to return. */ + +static int +send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon, + struct smb_hdr *in_buf, + struct smb_hdr *out_buf) +{ + int bytes_returned; + struct cifs_ses *ses = tcon->ses; + LOCK_REQ *pSMB = (LOCK_REQ *)in_buf; + + /* We just modify the current in_buf to change + the type of lock from LOCKING_ANDX_SHARED_LOCK + or LOCKING_ANDX_EXCLUSIVE_LOCK to + LOCKING_ANDX_CANCEL_LOCK. */ + + pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; + pSMB->Timeout = 0; + pSMB->hdr.Mid = get_next_mid(ses->server); + + return SendReceive(xid, ses, in_buf, out_buf, + &bytes_returned, 0); +} + +int +SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, + struct smb_hdr *in_buf, struct smb_hdr *out_buf, + int *pbytes_returned) +{ + int rc = 0; + int rstart = 0; + struct mid_q_entry *midQ; + struct cifs_ses *ses; + unsigned int len = be32_to_cpu(in_buf->smb_buf_length); + struct kvec iov = { .iov_base = in_buf, .iov_len = len }; + struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; + unsigned int instance; + struct TCP_Server_Info *server; + + if (tcon == NULL || tcon->ses == NULL) { + cifs_dbg(VFS, "Null smb session\n"); + return -EIO; + } + ses = tcon->ses; + server = ses->server; + + if (server == NULL) { + cifs_dbg(VFS, "Null tcp session\n"); + return -EIO; + } + + spin_lock(&server->srv_lock); + if (server->tcpStatus == CifsExiting) { + spin_unlock(&server->srv_lock); + return -ENOENT; + } + spin_unlock(&server->srv_lock); + + /* Ensure that we do not send more than 50 overlapping requests + to the same server. We may make this configurable later or + use ses->maxReq */ + + if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { + cifs_tcon_dbg(VFS, "Invalid length, greater than maximum frame, %d\n", + len); + return -EIO; + } + + rc = wait_for_free_request(server, CIFS_BLOCKING_OP, &instance); + if (rc) + return rc; + + /* make sure that we sign in the same order that we send on this socket + and avoid races inside tcp sendmsg code that could cause corruption + of smb data */ + + cifs_server_lock(server); + + rc = allocate_mid(ses, in_buf, &midQ); + if (rc) { + cifs_server_unlock(server); + return rc; + } + + rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number); + if (rc) { + delete_mid(midQ); + cifs_server_unlock(server); + return rc; + } + + midQ->mid_state = MID_REQUEST_SUBMITTED; + rc = smb_send(server, in_buf, len); + cifs_save_when_sent(midQ); + + if (rc < 0) + server->sequence_number -= 2; + + cifs_server_unlock(server); + + if (rc < 0) { + delete_mid(midQ); + return rc; + } + + /* Wait for a reply - allow signals to interrupt. */ + rc = wait_event_interruptible(server->response_q, + (!(midQ->mid_state == MID_REQUEST_SUBMITTED || + midQ->mid_state == MID_RESPONSE_RECEIVED)) || + ((server->tcpStatus != CifsGood) && + (server->tcpStatus != CifsNew))); + + /* Were we interrupted by a signal ? */ + spin_lock(&server->srv_lock); + if ((rc == -ERESTARTSYS) && + (midQ->mid_state == MID_REQUEST_SUBMITTED || + midQ->mid_state == MID_RESPONSE_RECEIVED) && + ((server->tcpStatus == CifsGood) || + (server->tcpStatus == CifsNew))) { + spin_unlock(&server->srv_lock); + + if (in_buf->Command == SMB_COM_TRANSACTION2) { + /* POSIX lock. We send a NT_CANCEL SMB to cause the + blocking lock to return. */ + rc = send_cancel(server, &rqst, midQ); + if (rc) { + delete_mid(midQ); + return rc; + } + } else { + /* Windows lock. We send a LOCKINGX_CANCEL_LOCK + to cause the blocking lock to return. */ + + rc = send_lock_cancel(xid, tcon, in_buf, out_buf); + + /* If we get -ENOLCK back the lock may have + already been removed. Don't exit in this case. */ + if (rc && rc != -ENOLCK) { + delete_mid(midQ); + return rc; + } + } + + rc = wait_for_response(server, midQ); + if (rc) { + send_cancel(server, &rqst, midQ); + spin_lock(&server->mid_queue_lock); + if (midQ->mid_state == MID_REQUEST_SUBMITTED || + midQ->mid_state == MID_RESPONSE_RECEIVED) { + /* no longer considered to be "in-flight" */ + midQ->callback = release_mid; + spin_unlock(&server->mid_queue_lock); + return rc; + } + spin_unlock(&server->mid_queue_lock); + } + + /* We got the response - restart system call. */ + rstart = 1; + spin_lock(&server->srv_lock); + } + spin_unlock(&server->srv_lock); + + rc = cifs_sync_mid_result(midQ, server); + if (rc != 0) + return rc; + + /* rcvd frame is ok */ + if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_READY) { + rc = -EIO; + cifs_tcon_dbg(VFS, "Bad MID state?\n"); + goto out; + } + + *pbytes_returned = get_rfc1002_length(midQ->resp_buf); + memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); + rc = cifs_check_receive(midQ, server, 0); +out: + delete_mid(midQ); + if (rstart && rc == -EACCES) + return -ERESTARTSYS; + return rc; +} diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index ca9358c24ceb..32d528b4dd83 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -30,9 +30,6 @@ #include "smbdirect.h" #include "compress.h" -/* Max number of iovectors we can use off the stack when sending requests. */ -#define CIFS_MAX_IOV_SIZE 8 - void cifs_wake_up_task(struct mid_q_entry *mid) { @@ -41,42 +38,6 @@ cifs_wake_up_task(struct mid_q_entry *mid) wake_up_process(mid->callback_data); } -static struct mid_q_entry * -alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) -{ - struct mid_q_entry *temp; - - if (server == NULL) { - cifs_dbg(VFS, "%s: null TCP session\n", __func__); - return NULL; - } - - temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); - memset(temp, 0, sizeof(struct mid_q_entry)); - kref_init(&temp->refcount); - temp->mid = get_mid(smb_buffer); - temp->pid = current->pid; - temp->command = cpu_to_le16(smb_buffer->Command); - cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command); - /* easier to use jiffies */ - /* when mid allocated can be before when sent */ - temp->when_alloc = jiffies; - temp->server = server; - - /* - * The default is for the mid to be synchronous, so the - * default callback just wakes up the current task. - */ - get_task_struct(current); - temp->creator = current; - temp->callback = cifs_wake_up_task; - temp->callback_data = current; - - atomic_inc(&mid_count); - temp->mid_state = MID_REQUEST_ALLOCATED; - return temp; -} - void __release_mid(struct kref *refcount) { struct mid_q_entry *midEntry = @@ -269,9 +230,8 @@ smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) return buflen; } -static int -__smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, - struct smb_rqst *rqst) +int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, + struct smb_rqst *rqst) { int rc; struct kvec *iov; @@ -456,22 +416,6 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, return rc; } -int -smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, - unsigned int smb_buf_length) -{ - struct kvec iov[2]; - struct smb_rqst rqst = { .rq_iov = iov, - .rq_nvec = 2 }; - - iov[0].iov_base = smb_buffer; - iov[0].iov_len = 4; - iov[1].iov_base = (char *)smb_buffer + 4; - iov[1].iov_len = smb_buf_length; - - return __smb_send_rqst(server, 1, &rqst); -} - static int wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, const int timeout, const int flags, @@ -626,9 +570,8 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits, return 0; } -static int -wait_for_free_request(struct TCP_Server_Info *server, const int flags, - unsigned int *instance) +int wait_for_free_request(struct TCP_Server_Info *server, const int flags, + unsigned int *instance) { return wait_for_free_credits(server, 1, -1, flags, instance); @@ -690,40 +633,7 @@ cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size, return 0; } -static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, - struct mid_q_entry **ppmidQ) -{ - spin_lock(&ses->ses_lock); - if (ses->ses_status == SES_NEW) { - if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && - (in_buf->Command != SMB_COM_NEGOTIATE)) { - spin_unlock(&ses->ses_lock); - return -EAGAIN; - } - /* else ok - we are setting up session */ - } - - if (ses->ses_status == SES_EXITING) { - /* check if SMB session is bad because we are setting it up */ - if (in_buf->Command != SMB_COM_LOGOFF_ANDX) { - spin_unlock(&ses->ses_lock); - return -EAGAIN; - } - /* else ok - we are shutting down session */ - } - spin_unlock(&ses->ses_lock); - - *ppmidQ = alloc_mid(in_buf, ses->server); - if (*ppmidQ == NULL) - return -ENOMEM; - spin_lock(&ses->server->mid_queue_lock); - list_add_tail(&(*ppmidQ)->qhead, &ses->server->pending_mid_q); - spin_unlock(&ses->server->mid_queue_lock); - return 0; -} - -static int -wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) +int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) { int error; @@ -737,34 +647,6 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) return 0; } -struct mid_q_entry * -cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst) -{ - int rc; - struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; - struct mid_q_entry *mid; - - if (rqst->rq_iov[0].iov_len != 4 || - rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base) - return ERR_PTR(-EIO); - - /* enable signing if server requires it */ - if (server->sign) - hdr->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; - - mid = alloc_mid(hdr, server); - if (mid == NULL) - return ERR_PTR(-ENOMEM); - - rc = cifs_sign_rqst(rqst, server, &mid->sequence_number); - if (rc) { - release_mid(mid); - return ERR_PTR(rc); - } - - return mid; -} - /* * Send a SMB request and set the callback function in the mid to handle * the result. Caller is responsible for dealing with timeouts. @@ -845,35 +727,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, return rc; } -/* - * - * Send an SMB Request. No response info (other than return code) - * needs to be parsed. - * - * flags indicate the type of request buffer and how long to wait - * and whether to log NT STATUS code (error) before mapping it to POSIX error - * - */ -int -SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, - char *in_buf, int flags) -{ - int rc; - struct kvec iov[1]; - struct kvec rsp_iov; - int resp_buf_type; - - iov[0].iov_base = in_buf; - iov[0].iov_len = get_rfc1002_length(in_buf) + 4; - flags |= CIFS_NO_RSP_BUF; - rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); - cifs_dbg(NOISY, "SendRcvNoRsp flags %d rc %d\n", flags, rc); - - return rc; -} - -static int -cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) +int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) { int rc = 0; @@ -915,68 +769,6 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) return rc; } -static inline int -send_cancel(struct TCP_Server_Info *server, struct smb_rqst *rqst, - struct mid_q_entry *mid) -{ - return server->ops->send_cancel ? - server->ops->send_cancel(server, rqst, mid) : 0; -} - -int -cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, - bool log_error) -{ - unsigned int len = get_rfc1002_length(mid->resp_buf) + 4; - - dump_smb(mid->resp_buf, min_t(u32, 92, len)); - - /* convert the length into a more usable form */ - if (server->sign) { - struct kvec iov[2]; - int rc = 0; - struct smb_rqst rqst = { .rq_iov = iov, - .rq_nvec = 2 }; - - iov[0].iov_base = mid->resp_buf; - iov[0].iov_len = 4; - iov[1].iov_base = (char *)mid->resp_buf + 4; - iov[1].iov_len = len - 4; - /* FIXME: add code to kill session */ - rc = cifs_verify_signature(&rqst, server, - mid->sequence_number); - if (rc) - cifs_server_dbg(VFS, "SMB signature verification returned error = %d\n", - rc); - } - - /* BB special case reconnect tid and uid here? */ - return map_and_check_smb_error(mid, log_error); -} - -struct mid_q_entry * -cifs_setup_request(struct cifs_ses *ses, struct TCP_Server_Info *ignored, - struct smb_rqst *rqst) -{ - int rc; - struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base; - struct mid_q_entry *mid; - - if (rqst->rq_iov[0].iov_len != 4 || - rqst->rq_iov[0].iov_base + 4 != rqst->rq_iov[1].iov_base) - return ERR_PTR(-EIO); - - rc = allocate_mid(ses, hdr, &mid); - if (rc) - return ERR_PTR(rc); - rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number); - if (rc) { - delete_mid(mid); - return ERR_PTR(rc); - } - return mid; -} - static void cifs_compound_callback(struct mid_q_entry *mid) { @@ -1304,344 +1096,6 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, rqst, resp_buf_type, resp_iov); } -int -SendReceive2(const unsigned int xid, struct cifs_ses *ses, - struct kvec *iov, int n_vec, int *resp_buf_type /* ret */, - const int flags, struct kvec *resp_iov) -{ - struct smb_rqst rqst; - struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov; - int rc; - - if (n_vec + 1 > CIFS_MAX_IOV_SIZE) { - new_iov = kmalloc_array(n_vec + 1, sizeof(struct kvec), - GFP_KERNEL); - if (!new_iov) { - /* otherwise cifs_send_recv below sets resp_buf_type */ - *resp_buf_type = CIFS_NO_BUFFER; - return -ENOMEM; - } - } else - new_iov = s_iov; - - /* 1st iov is a RFC1001 length followed by the rest of the packet */ - memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec)); - - new_iov[0].iov_base = new_iov[1].iov_base; - new_iov[0].iov_len = 4; - new_iov[1].iov_base += 4; - new_iov[1].iov_len -= 4; - - memset(&rqst, 0, sizeof(struct smb_rqst)); - rqst.rq_iov = new_iov; - rqst.rq_nvec = n_vec + 1; - - rc = cifs_send_recv(xid, ses, ses->server, - &rqst, resp_buf_type, flags, resp_iov); - if (n_vec + 1 > CIFS_MAX_IOV_SIZE) - kfree(new_iov); - return rc; -} - -int -SendReceive(const unsigned int xid, struct cifs_ses *ses, - struct smb_hdr *in_buf, struct smb_hdr *out_buf, - int *pbytes_returned, const int flags) -{ - int rc = 0; - struct mid_q_entry *midQ; - unsigned int len = be32_to_cpu(in_buf->smb_buf_length); - struct kvec iov = { .iov_base = in_buf, .iov_len = len }; - struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; - struct cifs_credits credits = { .value = 1, .instance = 0 }; - struct TCP_Server_Info *server; - - if (ses == NULL) { - cifs_dbg(VFS, "Null smb session\n"); - return -EIO; - } - server = ses->server; - if (server == NULL) { - cifs_dbg(VFS, "Null tcp session\n"); - return -EIO; - } - - spin_lock(&server->srv_lock); - if (server->tcpStatus == CifsExiting) { - spin_unlock(&server->srv_lock); - return -ENOENT; - } - spin_unlock(&server->srv_lock); - - /* Ensure that we do not send more than 50 overlapping requests - to the same server. We may make this configurable later or - use ses->maxReq */ - - if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - cifs_server_dbg(VFS, "Invalid length, greater than maximum frame, %d\n", - len); - return -EIO; - } - - rc = wait_for_free_request(server, flags, &credits.instance); - if (rc) - return rc; - - /* make sure that we sign in the same order that we send on this socket - and avoid races inside tcp sendmsg code that could cause corruption - of smb data */ - - cifs_server_lock(server); - - rc = allocate_mid(ses, in_buf, &midQ); - if (rc) { - cifs_server_unlock(server); - /* Update # of requests on wire to server */ - add_credits(server, &credits, 0); - return rc; - } - - rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number); - if (rc) { - cifs_server_unlock(server); - goto out; - } - - midQ->mid_state = MID_REQUEST_SUBMITTED; - - rc = smb_send(server, in_buf, len); - cifs_save_when_sent(midQ); - - if (rc < 0) - server->sequence_number -= 2; - - cifs_server_unlock(server); - - if (rc < 0) - goto out; - - rc = wait_for_response(server, midQ); - if (rc != 0) { - send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_queue_lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) { - /* no longer considered to be "in-flight" */ - midQ->callback = release_mid; - spin_unlock(&server->mid_queue_lock); - add_credits(server, &credits, 0); - return rc; - } - spin_unlock(&server->mid_queue_lock); - } - - rc = cifs_sync_mid_result(midQ, server); - if (rc != 0) { - add_credits(server, &credits, 0); - return rc; - } - - if (!midQ->resp_buf || !out_buf || - midQ->mid_state != MID_RESPONSE_READY) { - rc = -EIO; - cifs_server_dbg(VFS, "Bad MID state?\n"); - goto out; - } - - *pbytes_returned = get_rfc1002_length(midQ->resp_buf); - memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); - rc = cifs_check_receive(midQ, server, 0); -out: - delete_mid(midQ); - add_credits(server, &credits, 0); - - return rc; -} - -/* We send a LOCKINGX_CANCEL_LOCK to cause the Windows - blocking lock to return. */ - -static int -send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon, - struct smb_hdr *in_buf, - struct smb_hdr *out_buf) -{ - int bytes_returned; - struct cifs_ses *ses = tcon->ses; - LOCK_REQ *pSMB = (LOCK_REQ *)in_buf; - - /* We just modify the current in_buf to change - the type of lock from LOCKING_ANDX_SHARED_LOCK - or LOCKING_ANDX_EXCLUSIVE_LOCK to - LOCKING_ANDX_CANCEL_LOCK. */ - - pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; - pSMB->Timeout = 0; - pSMB->hdr.Mid = get_next_mid(ses->server); - - return SendReceive(xid, ses, in_buf, out_buf, - &bytes_returned, 0); -} - -int -SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, - struct smb_hdr *in_buf, struct smb_hdr *out_buf, - int *pbytes_returned) -{ - int rc = 0; - int rstart = 0; - struct mid_q_entry *midQ; - struct cifs_ses *ses; - unsigned int len = be32_to_cpu(in_buf->smb_buf_length); - struct kvec iov = { .iov_base = in_buf, .iov_len = len }; - struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; - unsigned int instance; - struct TCP_Server_Info *server; - - if (tcon == NULL || tcon->ses == NULL) { - cifs_dbg(VFS, "Null smb session\n"); - return -EIO; - } - ses = tcon->ses; - server = ses->server; - - if (server == NULL) { - cifs_dbg(VFS, "Null tcp session\n"); - return -EIO; - } - - spin_lock(&server->srv_lock); - if (server->tcpStatus == CifsExiting) { - spin_unlock(&server->srv_lock); - return -ENOENT; - } - spin_unlock(&server->srv_lock); - - /* Ensure that we do not send more than 50 overlapping requests - to the same server. We may make this configurable later or - use ses->maxReq */ - - if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - cifs_tcon_dbg(VFS, "Invalid length, greater than maximum frame, %d\n", - len); - return -EIO; - } - - rc = wait_for_free_request(server, CIFS_BLOCKING_OP, &instance); - if (rc) - return rc; - - /* make sure that we sign in the same order that we send on this socket - and avoid races inside tcp sendmsg code that could cause corruption - of smb data */ - - cifs_server_lock(server); - - rc = allocate_mid(ses, in_buf, &midQ); - if (rc) { - cifs_server_unlock(server); - return rc; - } - - rc = cifs_sign_smb(in_buf, server, &midQ->sequence_number); - if (rc) { - delete_mid(midQ); - cifs_server_unlock(server); - return rc; - } - - midQ->mid_state = MID_REQUEST_SUBMITTED; - rc = smb_send(server, in_buf, len); - cifs_save_when_sent(midQ); - - if (rc < 0) - server->sequence_number -= 2; - - cifs_server_unlock(server); - - if (rc < 0) { - delete_mid(midQ); - return rc; - } - - /* Wait for a reply - allow signals to interrupt. */ - rc = wait_event_interruptible(server->response_q, - (!(midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED)) || - ((server->tcpStatus != CifsGood) && - (server->tcpStatus != CifsNew))); - - /* Were we interrupted by a signal ? */ - spin_lock(&server->srv_lock); - if ((rc == -ERESTARTSYS) && - (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) && - ((server->tcpStatus == CifsGood) || - (server->tcpStatus == CifsNew))) { - spin_unlock(&server->srv_lock); - - if (in_buf->Command == SMB_COM_TRANSACTION2) { - /* POSIX lock. We send a NT_CANCEL SMB to cause the - blocking lock to return. */ - rc = send_cancel(server, &rqst, midQ); - if (rc) { - delete_mid(midQ); - return rc; - } - } else { - /* Windows lock. We send a LOCKINGX_CANCEL_LOCK - to cause the blocking lock to return. */ - - rc = send_lock_cancel(xid, tcon, in_buf, out_buf); - - /* If we get -ENOLCK back the lock may have - already been removed. Don't exit in this case. */ - if (rc && rc != -ENOLCK) { - delete_mid(midQ); - return rc; - } - } - - rc = wait_for_response(server, midQ); - if (rc) { - send_cancel(server, &rqst, midQ); - spin_lock(&server->mid_queue_lock); - if (midQ->mid_state == MID_REQUEST_SUBMITTED || - midQ->mid_state == MID_RESPONSE_RECEIVED) { - /* no longer considered to be "in-flight" */ - midQ->callback = release_mid; - spin_unlock(&server->mid_queue_lock); - return rc; - } - spin_unlock(&server->mid_queue_lock); - } - - /* We got the response - restart system call. */ - rstart = 1; - spin_lock(&server->srv_lock); - } - spin_unlock(&server->srv_lock); - - rc = cifs_sync_mid_result(midQ, server); - if (rc != 0) - return rc; - - /* rcvd frame is ok */ - if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_READY) { - rc = -EIO; - cifs_tcon_dbg(VFS, "Bad MID state?\n"); - goto out; - } - - *pbytes_returned = get_rfc1002_length(midQ->resp_buf); - memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); - rc = cifs_check_receive(midQ, server, 0); -out: - delete_mid(midQ); - if (rstart && rc == -EACCES) - return -ERESTARTSYS; - return rc; -} /* * Discard any remaining data in the current SMB. To do this, we borrow the From 7b306dfa326f70114312b320d083b21fa9481e1e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 29 Jul 2025 13:41:29 -0500 Subject: [PATCH 134/279] x86/sev: Evict cache lines during SNP memory validation An SNP cache coherency vulnerability requires a cache line eviction mitigation when validating memory after a page state change to private. The specific mitigation is to touch the first and last byte of each 4K page that is being validated. There is no need to perform the mitigation when performing a page state change to shared and rescinding validation. CPUID bit Fn8000001F_EBX[31] defines the COHERENCY_SFW_NO CPUID bit that, when set, indicates that the software mitigation for this vulnerability is not needed. Implement the mitigation and invoke it when validating memory (making it private) and the COHERENCY_SFW_NO bit is not set, indicating the SNP guest is vulnerable. Co-developed-by: Michael Roth Signed-off-by: Michael Roth Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Acked-by: Thomas Gleixner --- arch/x86/boot/cpuflags.c | 13 +++++++++++++ arch/x86/boot/startup/sev-shared.c | 7 +++++++ arch/x86/coco/sev/core.c | 21 +++++++++++++++++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/sev.h | 19 +++++++++++++++++++ arch/x86/kernel/cpu/scattered.c | 1 + 6 files changed, 62 insertions(+) diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c index 916bac09b464..63e037e94e4c 100644 --- a/arch/x86/boot/cpuflags.c +++ b/arch/x86/boot/cpuflags.c @@ -106,5 +106,18 @@ void get_cpuflags(void) cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], &cpu.flags[1]); } + + if (max_amd_level >= 0x8000001f) { + u32 ebx; + + /* + * The X86_FEATURE_COHERENCY_SFW_NO feature bit is in + * the virtualization flags entry (word 8) and set by + * scattered.c, so the bit needs to be explicitly set. + */ + cpuid(0x8000001f, &ignored, &ebx, &ignored, &ignored); + if (ebx & BIT(31)) + set_bit(X86_FEATURE_COHERENCY_SFW_NO, cpu.flags); + } } } diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c index 7a706db87b93..ac7dfd21ddd4 100644 --- a/arch/x86/boot/startup/sev-shared.c +++ b/arch/x86/boot/startup/sev-shared.c @@ -810,6 +810,13 @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, if (ret) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); } + + /* + * If validating memory (making it private) and affected by the + * cache-coherency vulnerability, perform the cache eviction mitigation. + */ + if (validate && !has_cpuflag(X86_FEATURE_COHERENCY_SFW_NO)) + sev_evict_cache((void *)vaddr, 1); } /* diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index fc59ce78c477..400a6ab75d45 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -358,10 +358,31 @@ static void svsm_pval_pages(struct snp_psc_desc *desc) static void pvalidate_pages(struct snp_psc_desc *desc) { + struct psc_entry *e; + unsigned int i; + if (snp_vmpl) svsm_pval_pages(desc); else pval_pages(desc); + + /* + * If not affected by the cache-coherency vulnerability there is no need + * to perform the cache eviction mitigation. + */ + if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO)) + return; + + for (i = 0; i <= desc->hdr.end_entry; i++) { + e = &desc->entries[i]; + + /* + * If validating memory (making it private) perform the cache + * eviction mitigation. + */ + if (e->operation == SNP_PAGE_STATE_PRIVATE) + sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1); + } } static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 602957dd2609..06fc0479a23f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -218,6 +218,7 @@ #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ #define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ #define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ +#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */ #define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 89075ff19afa..02236962fdb1 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -619,6 +619,24 @@ int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); void kdump_sev_callback(void); void snp_fixup_e820_tables(void); + +static inline void sev_evict_cache(void *va, int npages) +{ + volatile u8 val __always_unused; + u8 *bytes = va; + int page_idx; + + /* + * For SEV guests, a read from the first/last cache-lines of a 4K page + * using the guest key is sufficient to cause a flush of all cache-lines + * associated with that 4K page without incurring all the overhead of a + * full CLFLUSH sequence. + */ + for (page_idx = 0; page_idx < npages; page_idx++) { + val = bytes[page_idx * PAGE_SIZE]; + val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1]; + } +} #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_rmptable_init(void) { return -ENOSYS; } @@ -634,6 +652,7 @@ static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} static inline void kdump_sev_callback(void) { } static inline void snp_fixup_e820_tables(void) {} +static inline void sev_evict_cache(void *va, int npages) {} #endif #endif diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index b4a1f6732a3a..6b868afb26c3 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -48,6 +48,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, + { X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, From 472f8a3fccbb579cb98c1821da4cb9cbd51ee3e4 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 25 Jun 2025 11:15:01 +0300 Subject: [PATCH 135/279] mailbox: mtk-cmdq: Switch to pm_runtime_put_autosuspend() __pm_runtime_put_autosuspend() was meant to be used by callers that needed to put the Runtime PM usage_count without marking the device's last busy timestamp. It was however seen that the Runtime PM autosuspend related functions should include that call. Thus switch the driver to use pm_runtime_put_autosuspend(). Signed-off-by: Sakari Ailus Signed-off-by: Jassi Brar --- drivers/mailbox/mtk-cmdq-mailbox.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index ab4e8d1954a1..532929916e99 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -390,7 +390,7 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data) task = kzalloc(sizeof(*task), GFP_ATOMIC); if (!task) { - __pm_runtime_put_autosuspend(cmdq->mbox.dev); + pm_runtime_put_autosuspend(cmdq->mbox.dev); return -ENOMEM; } @@ -440,7 +440,7 @@ static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data) list_move_tail(&task->list_entry, &thread->task_busy_list); pm_runtime_mark_last_busy(cmdq->mbox.dev); - __pm_runtime_put_autosuspend(cmdq->mbox.dev); + pm_runtime_put_autosuspend(cmdq->mbox.dev); return 0; } @@ -488,7 +488,7 @@ static void cmdq_mbox_shutdown(struct mbox_chan *chan) spin_unlock_irqrestore(&thread->chan->lock, flags); pm_runtime_mark_last_busy(cmdq->mbox.dev); - __pm_runtime_put_autosuspend(cmdq->mbox.dev); + pm_runtime_put_autosuspend(cmdq->mbox.dev); } static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout) @@ -528,7 +528,7 @@ static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout) out: spin_unlock_irqrestore(&thread->chan->lock, flags); pm_runtime_mark_last_busy(cmdq->mbox.dev); - __pm_runtime_put_autosuspend(cmdq->mbox.dev); + pm_runtime_put_autosuspend(cmdq->mbox.dev); return 0; @@ -543,7 +543,7 @@ static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout) return -EFAULT; } pm_runtime_mark_last_busy(cmdq->mbox.dev); - __pm_runtime_put_autosuspend(cmdq->mbox.dev); + pm_runtime_put_autosuspend(cmdq->mbox.dev); return 0; } From fd3a4decb4f8ff1362db16fe42fc1af77d0259c3 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Wed, 11 Jun 2025 12:43:39 +0200 Subject: [PATCH 136/279] mailbox: Use dev_fwnode() irq_domain_create_simple() takes fwnode as the first argument. It can be extracted from the struct device using dev_fwnode() helper instead of using of_node with of_fwnode_handle(). So use the dev_fwnode() helper. Signed-off-by: Jiri Slaby (SUSE) Cc: Manivannan Sadhasivam Cc: Jassi Brar Signed-off-by: Jassi Brar --- drivers/mailbox/qcom-ipcc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mailbox/qcom-ipcc.c b/drivers/mailbox/qcom-ipcc.c index ea44ffb5ce1a..d957d989c0ce 100644 --- a/drivers/mailbox/qcom-ipcc.c +++ b/drivers/mailbox/qcom-ipcc.c @@ -312,8 +312,7 @@ static int qcom_ipcc_probe(struct platform_device *pdev) if (!name) return -ENOMEM; - ipcc->irq_domain = irq_domain_create_tree(of_fwnode_handle(pdev->dev.of_node), - &qcom_ipcc_irq_ops, ipcc); + ipcc->irq_domain = irq_domain_create_tree(dev_fwnode(&pdev->dev), &qcom_ipcc_irq_ops, ipcc); if (!ipcc->irq_domain) return -ENOMEM; From dfa477b6e674d51f87b342f1d31d9316e44d67d3 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Mon, 2 Jun 2025 15:23:10 -0700 Subject: [PATCH 137/279] dt-bindings: mailbox: Add support for bcm74110 Add devicetree YAML binding for brcmstb bcm74110 mailbox used for communicating with a co-processor. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Justin Chen Reviewed-by: Florian Fainelli Signed-off-by: Jassi Brar --- .../bindings/mailbox/brcm,bcm74110-mbox.yaml | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 Documentation/devicetree/bindings/mailbox/brcm,bcm74110-mbox.yaml diff --git a/Documentation/devicetree/bindings/mailbox/brcm,bcm74110-mbox.yaml b/Documentation/devicetree/bindings/mailbox/brcm,bcm74110-mbox.yaml new file mode 100644 index 000000000000..750cc96edb46 --- /dev/null +++ b/Documentation/devicetree/bindings/mailbox/brcm,bcm74110-mbox.yaml @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mailbox/brcm,bcm74110-mbox.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Broadcom BCM74110 Mailbox + +maintainers: + - Justin Chen + - Florian Fainelli + +description: Broadcom mailbox hardware first introduced with 74110 + +properties: + compatible: + enum: + - brcm,bcm74110-mbox + + reg: + maxItems: 1 + + interrupts: + items: + - description: RX doorbell and watermark interrupts + - description: TX doorbell and watermark interrupts + + "#mbox-cells": + const: 2 + description: + The first cell is channel type and second cell is shared memory slot + + brcm,rx: + $ref: /schemas/types.yaml#/definitions/uint32 + description: RX Mailbox number + + brcm,tx: + $ref: /schemas/types.yaml#/definitions/uint32 + description: TX Mailbox number + +required: + - compatible + - reg + - interrupts + - "#mbox-cells" + - brcm,rx + - brcm,tx + +additionalProperties: false + +examples: + - | + #include + #include + + mailbox@a552000 { + compatible = "brcm,bcm74110-mbox"; + reg = <0xa552000 0x1104>; + interrupts = , + ; + #mbox-cells = <0x2>; + brcm,rx = <0x7>; + brcm,tx = <0x6>; + }; From 52436007b862a90348ac8efc3a89eaceb2234f53 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Mon, 2 Jun 2025 15:23:11 -0700 Subject: [PATCH 138/279] mailbox: Add support for bcm74110 The bcm74110 mailbox driver is used to communicate with a co-processor for various power management and firmware related tasks. Signed-off-by: Justin Chen Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: Jassi Brar --- drivers/mailbox/Kconfig | 10 + drivers/mailbox/Makefile | 2 + drivers/mailbox/bcm74110-mailbox.c | 656 +++++++++++++++++++++++++++++ 3 files changed, 668 insertions(+) create mode 100644 drivers/mailbox/bcm74110-mailbox.c diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index 4fef4797b110..9abf193acd0b 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -350,4 +350,14 @@ config CIX_MBOX is unidirectional. Say Y here if you want to use the CIX Mailbox support. +config BCM74110_MAILBOX + tristate "Brcmstb BCM74110 Mailbox" + depends on ARCH_BRCMSTB || COMPILE_TEST + default ARCH_BRCMSTB + help + Broadcom STB mailbox driver present starting with brcmstb bcm74110 + SoCs. The mailbox is a communication channel between the host + processor and coprocessor that handles various power management task + and more. + endif diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile index 786a46587ba1..c0a4278aa129 100644 --- a/drivers/mailbox/Makefile +++ b/drivers/mailbox/Makefile @@ -74,3 +74,5 @@ obj-$(CONFIG_QCOM_IPCC) += qcom-ipcc.o obj-$(CONFIG_THEAD_TH1520_MBOX) += mailbox-th1520.o obj-$(CONFIG_CIX_MBOX) += cix-mailbox.o + +obj-$(CONFIG_BCM74110_MAILBOX) += bcm74110-mailbox.o diff --git a/drivers/mailbox/bcm74110-mailbox.c b/drivers/mailbox/bcm74110-mailbox.c new file mode 100644 index 000000000000..0680be8dc18f --- /dev/null +++ b/drivers/mailbox/bcm74110-mailbox.c @@ -0,0 +1,656 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Broadcom BCM74110 Mailbox Driver + * + * Copyright (c) 2025 Broadcom + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BCM_MBOX_BASE(sel) ((sel) * 0x40) +#define BCM_MBOX_IRQ_BASE(sel) (((sel) * 0x20) + 0x800) + +#define BCM_MBOX_CFGA 0x0 +#define BCM_MBOX_CFGB 0x4 +#define BCM_MBOX_CFGC 0x8 +#define BCM_MBOX_CFGD 0xc +#define BCM_MBOX_CTRL 0x10 +#define BCM_MBOX_CTRL_EN BIT(0) +#define BCM_MBOX_CTRL_CLR BIT(1) +#define BCM_MBOX_STATUS0 0x14 +#define BCM_MBOX_STATUS0_NOT_EMPTY BIT(28) +#define BCM_MBOX_STATUS0_FULL BIT(29) +#define BCM_MBOX_STATUS1 0x18 +#define BCM_MBOX_STATUS2 0x1c +#define BCM_MBOX_WDATA 0x20 +#define BCM_MBOX_RDATA 0x28 + +#define BCM_MBOX_IRQ_STATUS 0x0 +#define BCM_MBOX_IRQ_SET 0x4 +#define BCM_MBOX_IRQ_CLEAR 0x8 +#define BCM_MBOX_IRQ_MASK_STATUS 0xc +#define BCM_MBOX_IRQ_MASK_SET 0x10 +#define BCM_MBOX_IRQ_MASK_CLEAR 0x14 +#define BCM_MBOX_IRQ_TIMEOUT BIT(0) +#define BCM_MBOX_IRQ_NOT_EMPTY BIT(1) +#define BCM_MBOX_IRQ_FULL BIT(2) +#define BCM_MBOX_IRQ_LOW_WM BIT(3) +#define BCM_MBOX_IRQ_HIGH_WM BIT(4) + +#define BCM_LINK_CODE0 0xbe0 +#define BCM_LINK_CODE1 0xbe1 +#define BCM_LINK_CODE2 0xbe2 + +enum { + BCM_MSG_FUNC_LINK_START = 0, + BCM_MSG_FUNC_LINK_STOP, + BCM_MSG_FUNC_SHMEM_TX, + BCM_MSG_FUNC_SHMEM_RX, + BCM_MSG_FUNC_SHMEM_STOP, + BCM_MSG_FUNC_MAX, +}; + +enum { + BCM_MSG_SVC_INIT = 0, + BCM_MSG_SVC_PMC, + BCM_MSG_SVC_SCMI, + BCM_MSG_SVC_DPFE, + BCM_MSG_SVC_MAX, +}; + +struct bcm74110_mbox_msg { + struct list_head list_entry; +#define BCM_MSG_VERSION_MASK GENMASK(31, 29) +#define BCM_MSG_VERSION 0x1 +#define BCM_MSG_REQ_MASK BIT(28) +#define BCM_MSG_RPLY_MASK BIT(27) +#define BCM_MSG_SVC_MASK GENMASK(26, 24) +#define BCM_MSG_FUNC_MASK GENMASK(23, 16) +#define BCM_MSG_LENGTH_MASK GENMASK(15, 4) +#define BCM_MSG_SLOT_MASK GENMASK(3, 0) + +#define BCM_MSG_SET_FIELD(hdr, field, val) \ + do { \ + hdr &= ~BCM_MSG_##field##_MASK; \ + hdr |= FIELD_PREP(BCM_MSG_##field##_MASK, val); \ + } while (0) + +#define BCM_MSG_GET_FIELD(hdr, field) \ + FIELD_GET(BCM_MSG_##field##_MASK, hdr) + u32 msg; +}; + +struct bcm74110_mbox_chan { + struct bcm74110_mbox *mbox; + bool en; + int slot; + int type; +}; + +struct bcm74110_mbox { + struct platform_device *pdev; + void __iomem *base; + + int tx_chan; + int rx_chan; + int rx_irq; + struct list_head rx_svc_init_list; + spinlock_t rx_svc_list_lock; + + struct mbox_controller controller; + struct bcm74110_mbox_chan *mbox_chan; +}; + +#define BCM74110_OFFSET_IO_WRITEL_MACRO(name, offset_base) \ +static void bcm74110_##name##_writel(struct bcm74110_mbox *mbox,\ + u32 val, u32 off) \ +{ \ + writel_relaxed(val, mbox->base + offset_base + off); \ +} +BCM74110_OFFSET_IO_WRITEL_MACRO(tx, BCM_MBOX_BASE(mbox->tx_chan)); +BCM74110_OFFSET_IO_WRITEL_MACRO(irq, BCM_MBOX_IRQ_BASE(mbox->rx_chan)); + +#define BCM74110_OFFSET_IO_READL_MACRO(name, offset_base) \ +static u32 bcm74110_##name##_readl(struct bcm74110_mbox *mbox, \ + u32 off) \ +{ \ + return readl_relaxed(mbox->base + offset_base + off); \ +} +BCM74110_OFFSET_IO_READL_MACRO(tx, BCM_MBOX_BASE(mbox->tx_chan)); +BCM74110_OFFSET_IO_READL_MACRO(rx, BCM_MBOX_BASE(mbox->rx_chan)); +BCM74110_OFFSET_IO_READL_MACRO(irq, BCM_MBOX_IRQ_BASE(mbox->rx_chan)); + +static inline struct bcm74110_mbox *bcm74110_mbox_from_cntrl( + struct mbox_controller *cntrl) +{ + return container_of(cntrl, struct bcm74110_mbox, controller); +} + +static void bcm74110_rx_push_init_msg(struct bcm74110_mbox *mbox, u32 val) +{ + struct bcm74110_mbox_msg *msg; + + msg = kzalloc(sizeof(*msg), GFP_ATOMIC); + if (!msg) + return; + + INIT_LIST_HEAD(&msg->list_entry); + msg->msg = val; + + spin_lock(&mbox->rx_svc_list_lock); + list_add_tail(&msg->list_entry, &mbox->rx_svc_init_list); + spin_unlock(&mbox->rx_svc_list_lock); +} + +static void bcm74110_rx_process_msg(struct bcm74110_mbox *mbox) +{ + struct device *dev = &mbox->pdev->dev; + struct bcm74110_mbox_chan *chan_priv; + struct mbox_chan *chan; + u32 msg, status; + int type; + + do { + msg = bcm74110_rx_readl(mbox, BCM_MBOX_RDATA); + status = bcm74110_rx_readl(mbox, BCM_MBOX_STATUS0); + + dev_dbg(dev, "rx: [{req=%lu|rply=%lu|srv=%lu|fn=%lu|length=%lu|slot=%lu]\n", + BCM_MSG_GET_FIELD(msg, REQ), BCM_MSG_GET_FIELD(msg, RPLY), + BCM_MSG_GET_FIELD(msg, SVC), BCM_MSG_GET_FIELD(msg, FUNC), + BCM_MSG_GET_FIELD(msg, LENGTH), BCM_MSG_GET_FIELD(msg, SLOT)); + + type = BCM_MSG_GET_FIELD(msg, SVC); + switch (type) { + case BCM_MSG_SVC_INIT: + bcm74110_rx_push_init_msg(mbox, msg); + break; + case BCM_MSG_SVC_PMC: + case BCM_MSG_SVC_SCMI: + case BCM_MSG_SVC_DPFE: + chan = &mbox->controller.chans[type]; + chan_priv = chan->con_priv; + if (chan_priv->en) + mbox_chan_received_data(chan, NULL); + else + dev_warn(dev, "Channel not enabled\n"); + break; + default: + dev_warn(dev, "Unsupported msg received\n"); + } + } while (status & BCM_MBOX_STATUS0_NOT_EMPTY); +} + +static irqreturn_t bcm74110_mbox_isr(int irq, void *data) +{ + struct bcm74110_mbox *mbox = data; + u32 status; + + status = bcm74110_irq_readl(mbox, BCM_MBOX_IRQ_STATUS); + + bcm74110_irq_writel(mbox, 0xffffffff, BCM_MBOX_IRQ_CLEAR); + + if (status & BCM_MBOX_IRQ_NOT_EMPTY) + bcm74110_rx_process_msg(mbox); + else + dev_warn(&mbox->pdev->dev, "Spurious interrupt\n"); + + return IRQ_HANDLED; +} + +static void bcm74110_mbox_mask_and_clear(struct bcm74110_mbox *mbox) +{ + bcm74110_irq_writel(mbox, 0xffffffff, BCM_MBOX_IRQ_MASK_SET); + bcm74110_irq_writel(mbox, 0xffffffff, BCM_MBOX_IRQ_CLEAR); +} + +static int bcm74110_rx_pop_init_msg(struct bcm74110_mbox *mbox, u32 func_type, + u32 *val) +{ + struct bcm74110_mbox_msg *msg, *msg_tmp; + unsigned long flags; + bool found = false; + + spin_lock_irqsave(&mbox->rx_svc_list_lock, flags); + list_for_each_entry_safe(msg, msg_tmp, &mbox->rx_svc_init_list, + list_entry) { + if (BCM_MSG_GET_FIELD(msg->msg, FUNC) == func_type) { + list_del(&msg->list_entry); + found = true; + break; + } + } + spin_unlock_irqrestore(&mbox->rx_svc_list_lock, flags); + + if (!found) + return -EINVAL; + + *val = msg->msg; + kfree(msg); + + return 0; +} + +static void bcm74110_rx_flush_msg(struct bcm74110_mbox *mbox) +{ + struct bcm74110_mbox_msg *msg, *msg_tmp; + LIST_HEAD(list_temp); + unsigned long flags; + + spin_lock_irqsave(&mbox->rx_svc_list_lock, flags); + list_splice_init(&mbox->rx_svc_init_list, &list_temp); + spin_unlock_irqrestore(&mbox->rx_svc_list_lock, flags); + + list_for_each_entry_safe(msg, msg_tmp, &list_temp, list_entry) { + list_del(&msg->list_entry); + kfree(msg); + } +} + +#define BCM_DEQUEUE_TIMEOUT_MS 30 +static int bcm74110_rx_pop_init_msg_block(struct bcm74110_mbox *mbox, u32 func_type, + u32 *val) +{ + int ret, timeout = 0; + + do { + ret = bcm74110_rx_pop_init_msg(mbox, func_type, val); + + if (!ret) + return 0; + + /* TODO: Figure out what is a good sleep here. */ + usleep_range(1000, 2000); + timeout++; + } while (timeout < BCM_DEQUEUE_TIMEOUT_MS); + + dev_warn(&mbox->pdev->dev, "Timeout waiting for service init response\n"); + return -ETIMEDOUT; +} + +static int bcm74110_mbox_create_msg(int req, int rply, int svc, int func, + int length, int slot) +{ + u32 msg = 0; + + BCM_MSG_SET_FIELD(msg, REQ, req); + BCM_MSG_SET_FIELD(msg, RPLY, rply); + BCM_MSG_SET_FIELD(msg, SVC, svc); + BCM_MSG_SET_FIELD(msg, FUNC, func); + BCM_MSG_SET_FIELD(msg, LENGTH, length); + BCM_MSG_SET_FIELD(msg, SLOT, slot); + + return msg; +} + +static int bcm74110_mbox_tx_msg(struct bcm74110_mbox *mbox, u32 msg) +{ + int val; + + /* We can potentially poll with timeout here instead */ + val = bcm74110_tx_readl(mbox, BCM_MBOX_STATUS0); + if (val & BCM_MBOX_STATUS0_FULL) { + dev_err(&mbox->pdev->dev, "Mailbox full\n"); + return -EINVAL; + } + + dev_dbg(&mbox->pdev->dev, "tx: [{req=%lu|rply=%lu|srv=%lu|fn=%lu|length=%lu|slot=%lu]\n", + BCM_MSG_GET_FIELD(msg, REQ), BCM_MSG_GET_FIELD(msg, RPLY), + BCM_MSG_GET_FIELD(msg, SVC), BCM_MSG_GET_FIELD(msg, FUNC), + BCM_MSG_GET_FIELD(msg, LENGTH), BCM_MSG_GET_FIELD(msg, SLOT)); + + bcm74110_tx_writel(mbox, msg, BCM_MBOX_WDATA); + + return 0; +} + +#define BCM_MBOX_LINK_TRAINING_RETRIES 5 +static int bcm74110_mbox_link_training(struct bcm74110_mbox *mbox) +{ + int ret, retries = 0; + u32 msg = 0, orig_len = 0, len = BCM_LINK_CODE0; + + do { + switch (len) { + case 0: + retries++; + dev_warn(&mbox->pdev->dev, + "Link train failed, trying again... %d\n", + retries); + if (retries > BCM_MBOX_LINK_TRAINING_RETRIES) + return -EINVAL; + len = BCM_LINK_CODE0; + fallthrough; + case BCM_LINK_CODE0: + case BCM_LINK_CODE1: + case BCM_LINK_CODE2: + msg = bcm74110_mbox_create_msg(1, 0, BCM_MSG_SVC_INIT, + BCM_MSG_FUNC_LINK_START, + len, BCM_MSG_SVC_INIT); + break; + default: + break; + } + + bcm74110_mbox_tx_msg(mbox, msg); + + /* No response expected for LINK_CODE2 */ + if (len == BCM_LINK_CODE2) + return 0; + + orig_len = len; + + ret = bcm74110_rx_pop_init_msg_block(mbox, + BCM_MSG_GET_FIELD(msg, FUNC), + &msg); + if (ret) { + len = 0; + continue; + } + + if ((BCM_MSG_GET_FIELD(msg, SVC) != BCM_MSG_SVC_INIT) || + (BCM_MSG_GET_FIELD(msg, FUNC) != BCM_MSG_FUNC_LINK_START) || + (BCM_MSG_GET_FIELD(msg, SLOT) != 0) || + (BCM_MSG_GET_FIELD(msg, RPLY) != 1) || + (BCM_MSG_GET_FIELD(msg, REQ) != 0)) { + len = 0; + continue; + } + + len = BCM_MSG_GET_FIELD(msg, LENGTH); + + /* Make sure sequence is good */ + if (len != (orig_len + 1)) { + len = 0; + continue; + } + } while (1); + + return -EINVAL; +} + +static int bcm74110_mbox_tx_msg_and_wait_ack(struct bcm74110_mbox *mbox, u32 msg) +{ + int ret; + u32 recv_msg; + + ret = bcm74110_mbox_tx_msg(mbox, msg); + if (ret) + return ret; + + ret = bcm74110_rx_pop_init_msg_block(mbox, BCM_MSG_GET_FIELD(msg, FUNC), + &recv_msg); + if (ret) + return ret; + + /* + * Modify tx message to verify rx ack. + * Flip RPLY/REQ for synchronous messages + */ + if (BCM_MSG_GET_FIELD(msg, REQ) == 1) { + BCM_MSG_SET_FIELD(msg, RPLY, 1); + BCM_MSG_SET_FIELD(msg, REQ, 0); + } + + if (msg != recv_msg) { + dev_err(&mbox->pdev->dev, "Found ack, but ack is invalid\n"); + return -EINVAL; + } + + return 0; +} + +/* Each index points to 0x100 of HAB MEM. IDX size counts from 0 */ +#define BCM_MBOX_HAB_MEM_IDX_START 0x30 +#define BCM_MBOX_HAB_MEM_IDX_SIZE 0x0 +static int bcm74110_mbox_shmem_init(struct bcm74110_mbox *mbox) +{ + u32 msg = 0; + int ret; + + msg = bcm74110_mbox_create_msg(1, 0, BCM_MSG_SVC_INIT, + BCM_MSG_FUNC_SHMEM_STOP, + 0, BCM_MSG_SVC_INIT); + ret = bcm74110_mbox_tx_msg_and_wait_ack(mbox, msg); + if (ret) + return -EINVAL; + + msg = bcm74110_mbox_create_msg(1, 0, BCM_MSG_SVC_INIT, + BCM_MSG_FUNC_SHMEM_TX, + BCM_MBOX_HAB_MEM_IDX_START, + BCM_MBOX_HAB_MEM_IDX_SIZE); + ret = bcm74110_mbox_tx_msg_and_wait_ack(mbox, msg); + if (ret) + return -EINVAL; + + msg = bcm74110_mbox_create_msg(1, 0, BCM_MSG_SVC_INIT, + BCM_MSG_FUNC_SHMEM_RX, + BCM_MBOX_HAB_MEM_IDX_START, + BCM_MBOX_HAB_MEM_IDX_SIZE); + ret = bcm74110_mbox_tx_msg_and_wait_ack(mbox, msg); + if (ret) + return -EINVAL; + + return 0; +} + +static int bcm74110_mbox_init(struct bcm74110_mbox *mbox) +{ + int ret = 0; + + /* Disable queues tx/rx */ + bcm74110_tx_writel(mbox, 0x0, BCM_MBOX_CTRL); + + /* Clear status & restart tx/rx*/ + bcm74110_tx_writel(mbox, BCM_MBOX_CTRL_EN | BCM_MBOX_CTRL_CLR, + BCM_MBOX_CTRL); + + /* Unmask irq */ + bcm74110_irq_writel(mbox, BCM_MBOX_IRQ_NOT_EMPTY, BCM_MBOX_IRQ_MASK_CLEAR); + + ret = bcm74110_mbox_link_training(mbox); + if (ret) { + dev_err(&mbox->pdev->dev, "Training failed\n"); + return ret; + } + + return bcm74110_mbox_shmem_init(mbox); +} + +static int bcm74110_mbox_send_data(struct mbox_chan *chan, void *data) +{ + struct bcm74110_mbox_chan *chan_priv = chan->con_priv; + u32 msg; + + switch (chan_priv->type) { + case BCM_MSG_SVC_PMC: + case BCM_MSG_SVC_SCMI: + case BCM_MSG_SVC_DPFE: + msg = bcm74110_mbox_create_msg(1, 0, chan_priv->type, 0, + 128 + 28, chan_priv->slot); + break; + default: + return -EINVAL; + }; + + return bcm74110_mbox_tx_msg(chan_priv->mbox, msg); +} + +static int bcm74110_mbox_chan_startup(struct mbox_chan *chan) +{ + struct bcm74110_mbox_chan *chan_priv = chan->con_priv; + + chan_priv->en = true; + + return 0; +} + +static void bcm74110_mbox_chan_shutdown(struct mbox_chan *chan) +{ + struct bcm74110_mbox_chan *chan_priv = chan->con_priv; + + chan_priv->en = false; +} + +static const struct mbox_chan_ops bcm74110_mbox_chan_ops = { + .send_data = bcm74110_mbox_send_data, + .startup = bcm74110_mbox_chan_startup, + .shutdown = bcm74110_mbox_chan_shutdown, +}; + +static void bcm74110_mbox_shutdown(struct platform_device *pdev) +{ + struct bcm74110_mbox *mbox = dev_get_drvdata(&pdev->dev); + u32 msg; + + msg = bcm74110_mbox_create_msg(1, 0, BCM_MSG_SVC_INIT, + BCM_MSG_FUNC_LINK_STOP, + 0, 0); + + bcm74110_mbox_tx_msg_and_wait_ack(mbox, msg); + + /* Even if we don't receive ACK, lets shut it down */ + + bcm74110_mbox_mask_and_clear(mbox); + + /* Disable queues tx/rx */ + bcm74110_tx_writel(mbox, 0x0, BCM_MBOX_CTRL); + + /* Flush queues */ + bcm74110_rx_flush_msg(mbox); +} + +static struct mbox_chan *bcm74110_mbox_of_xlate(struct mbox_controller *cntrl, + const struct of_phandle_args *p) +{ + struct bcm74110_mbox *mbox = bcm74110_mbox_from_cntrl(cntrl); + struct device *dev = &mbox->pdev->dev; + struct bcm74110_mbox_chan *chan_priv; + int slot, type; + + if (p->args_count != 2) { + dev_err(dev, "Invalid arguments\n"); + return ERR_PTR(-EINVAL); + } + + type = p->args[0]; + slot = p->args[1]; + + switch (type) { + case BCM_MSG_SVC_PMC: + case BCM_MSG_SVC_SCMI: + case BCM_MSG_SVC_DPFE: + if (slot > BCM_MBOX_HAB_MEM_IDX_SIZE) { + dev_err(dev, "Not enough shared memory\n"); + return ERR_PTR(-EINVAL); + } + chan_priv = cntrl->chans[type].con_priv; + chan_priv->slot = slot; + chan_priv->type = type; + break; + default: + dev_err(dev, "Invalid channel type: %d\n", type); + return ERR_PTR(-EINVAL); + }; + + return &cntrl->chans[type]; +} + +static int bcm74110_mbox_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct bcm74110_mbox *mbox; + int i, ret; + + mbox = devm_kzalloc(dev, sizeof(*mbox), GFP_KERNEL); + if (!mbox) + return -ENOMEM; + + mbox->pdev = pdev; + platform_set_drvdata(pdev, mbox); + + mbox->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mbox->base)) + return dev_err_probe(dev, PTR_ERR(mbox->base), "Failed to iomap\n"); + + ret = of_property_read_u32(dev->of_node, "brcm,tx", &mbox->tx_chan); + if (ret) + return dev_err_probe(dev, ret, "Failed to find tx channel\n"); + + ret = of_property_read_u32(dev->of_node, "brcm,rx", &mbox->rx_chan); + if (ret) + return dev_err_probe(dev, ret, "Failed to find rx channel\n"); + + mbox->rx_irq = platform_get_irq(pdev, 0); + if (mbox->rx_irq < 0) + return mbox->rx_irq; + + INIT_LIST_HEAD(&mbox->rx_svc_init_list); + spin_lock_init(&mbox->rx_svc_list_lock); + bcm74110_mbox_mask_and_clear(mbox); + + ret = devm_request_irq(dev, mbox->rx_irq, bcm74110_mbox_isr, + IRQF_NO_SUSPEND, pdev->name, mbox); + if (ret) + return dev_err_probe(dev, ret, "Failed to request irq\n"); + + mbox->controller.ops = &bcm74110_mbox_chan_ops; + mbox->controller.dev = dev; + mbox->controller.num_chans = BCM_MSG_SVC_MAX; + mbox->controller.of_xlate = &bcm74110_mbox_of_xlate; + mbox->controller.chans = devm_kcalloc(dev, BCM_MSG_SVC_MAX, + sizeof(*mbox->controller.chans), + GFP_KERNEL); + if (!mbox->controller.chans) + return -ENOMEM; + + mbox->mbox_chan = devm_kcalloc(dev, BCM_MSG_SVC_MAX, + sizeof(*mbox->mbox_chan), + GFP_KERNEL); + if (!mbox->mbox_chan) + return -ENOMEM; + + for (i = 0; i < BCM_MSG_SVC_MAX; i++) { + mbox->mbox_chan[i].mbox = mbox; + mbox->controller.chans[i].con_priv = &mbox->mbox_chan[i]; + } + + ret = devm_mbox_controller_register(dev, &mbox->controller); + if (ret) + return ret; + + ret = bcm74110_mbox_init(mbox); + if (ret) + return ret; + + return 0; +} + +static const struct of_device_id bcm74110_mbox_of_match[] = { + { .compatible = "brcm,bcm74110-mbox", }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, bcm74110_mbox_of_match); + +static struct platform_driver bcm74110_mbox_driver = { + .driver = { + .name = "bcm74110-mbox", + .of_match_table = bcm74110_mbox_of_match, + }, + .probe = bcm74110_mbox_probe, + .shutdown = bcm74110_mbox_shutdown, +}; +module_platform_driver(bcm74110_mbox_driver); + +MODULE_AUTHOR("Justin Chen "); +MODULE_DESCRIPTION("BCM74110 mailbox driver"); +MODULE_LICENSE("GPL"); From 9bdaf9a96d04c9c520e720ebb2211550331f15ac Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Sun, 13 Jul 2025 10:05:28 +0200 Subject: [PATCH 139/279] dt-bindings: mailbox: qcom-ipcc: document the Milos Inter-Processor Communication Controller Document the Inter-Processor Communication Controller on the Milos SoC. Signed-off-by: Luca Weiss Acked-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml index f69c0ec5d19d..e5c423130db6 100644 --- a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml +++ b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml @@ -24,6 +24,7 @@ properties: compatible: items: - enum: + - qcom,milos-ipcc - qcom,qcs8300-ipcc - qcom,qdu1000-ipcc - qcom,sa8255p-ipcc From b8fa5e827f2056c207f5c447e48e33af28bc1c19 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Jun 2025 13:57:08 +0200 Subject: [PATCH 140/279] dt-bindings: mailbox: amlogic,meson-gxbb-mhu: Add missing interrupts maxItems Lists should have fixed constraint, so add missing maxItems to the "interrupts" property. Since minItems=maxItems, the minItems is implied by dtschema so can be dropped. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Martin Blumenstingl Acked-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- .../devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml b/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml index 385809ed1569..0849799ee0c5 100644 --- a/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml +++ b/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml @@ -27,7 +27,7 @@ properties: maxItems: 1 interrupts: - minItems: 3 + maxItems: 3 description: Contains the interrupt information corresponding to each of the 3 links of MHU. From f869e8f7da4d73a6d0d106d6891d81937b52de12 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Jun 2025 13:57:09 +0200 Subject: [PATCH 141/279] dt-bindings: mailbox: ti,secure-proxy: Add missing reg maxItems Lists should have fixed constraint, so add missing maxItems to the "reg" property. Since minItems=maxItems, the minItems is implied by dtschema so can be dropped. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Nishanth Menon Acked-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml b/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml index eea822861804..682ccd76f5c2 100644 --- a/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml +++ b/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml @@ -36,7 +36,7 @@ properties: - const: scfg reg: - minItems: 3 + maxItems: 3 interrupt-names: minItems: 1 From 5682a215daae506af20d2caf3e9811ccfb24caf3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Jun 2025 13:57:10 +0200 Subject: [PATCH 142/279] dt-bindings: mailbox: Correct example indentation DTS example in the bindings should be indented with 2- or 4-spaces, so correct a mixture of different styles to keep consistent 4-spaces. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Nishanth Menon Reviewed-by: Sven Peter Acked-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- .../mailbox/allwinner,sun6i-a31-msgbox.yaml | 14 +++++++------- .../bindings/mailbox/amlogic,meson-gxbb-mhu.yaml | 8 ++++---- .../bindings/mailbox/apple,mailbox.yaml | 16 ++++++++-------- .../bindings/mailbox/ti,secure-proxy.yaml | 16 ++++++++-------- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/Documentation/devicetree/bindings/mailbox/allwinner,sun6i-a31-msgbox.yaml b/Documentation/devicetree/bindings/mailbox/allwinner,sun6i-a31-msgbox.yaml index 75d5d97305e1..87d31963c1b7 100644 --- a/Documentation/devicetree/bindings/mailbox/allwinner,sun6i-a31-msgbox.yaml +++ b/Documentation/devicetree/bindings/mailbox/allwinner,sun6i-a31-msgbox.yaml @@ -68,13 +68,13 @@ examples: #include msgbox: mailbox@1c17000 { - compatible = "allwinner,sun8i-h3-msgbox", - "allwinner,sun6i-a31-msgbox"; - reg = <0x01c17000 0x1000>; - clocks = <&ccu CLK_BUS_MSGBOX>; - resets = <&ccu RST_BUS_MSGBOX>; - interrupts = ; - #mbox-cells = <1>; + compatible = "allwinner,sun8i-h3-msgbox", + "allwinner,sun6i-a31-msgbox"; + reg = <0x01c17000 0x1000>; + clocks = <&ccu CLK_BUS_MSGBOX>; + resets = <&ccu RST_BUS_MSGBOX>; + interrupts = ; + #mbox-cells = <1>; }; ... diff --git a/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml b/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml index 0849799ee0c5..79963c9878ba 100644 --- a/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml +++ b/Documentation/devicetree/bindings/mailbox/amlogic,meson-gxbb-mhu.yaml @@ -46,8 +46,8 @@ additionalProperties: false examples: - | mailbox@c883c404 { - compatible = "amlogic,meson-gxbb-mhu"; - reg = <0xc883c404 0x4c>; - interrupts = <208>, <209>, <210>; - #mbox-cells = <1>; + compatible = "amlogic,meson-gxbb-mhu"; + reg = <0xc883c404 0x4c>; + interrupts = <208>, <209>, <210>; + #mbox-cells = <1>; }; diff --git a/Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml b/Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml index 4c0668e5f0bd..474c1a0f99f3 100644 --- a/Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml +++ b/Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml @@ -78,11 +78,11 @@ additionalProperties: false examples: - | - mailbox@77408000 { - compatible = "apple,t8103-asc-mailbox", "apple,asc-mailbox-v4"; - reg = <0x77408000 0x4000>; - interrupts = <1 583 4>, <1 584 4>, <1 585 4>, <1 586 4>; - interrupt-names = "send-empty", "send-not-empty", - "recv-empty", "recv-not-empty"; - #mbox-cells = <0>; - }; + mailbox@77408000 { + compatible = "apple,t8103-asc-mailbox", "apple,asc-mailbox-v4"; + reg = <0x77408000 0x4000>; + interrupts = <1 583 4>, <1 584 4>, <1 585 4>, <1 586 4>; + interrupt-names = "send-empty", "send-not-empty", + "recv-empty", "recv-not-empty"; + #mbox-cells = <0>; + }; diff --git a/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml b/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml index 682ccd76f5c2..c321b69f0ccd 100644 --- a/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml +++ b/Documentation/devicetree/bindings/mailbox/ti,secure-proxy.yaml @@ -68,12 +68,12 @@ examples: - | #include secure_proxy: mailbox@32c00000 { - compatible = "ti,am654-secure-proxy"; - #mbox-cells = <1>; - reg-names = "target_data", "rt", "scfg"; - reg = <0x32c00000 0x100000>, - <0x32400000 0x100000>, - <0x32800000 0x100000>; - interrupt-names = "rx_011"; - interrupts = ; + compatible = "ti,am654-secure-proxy"; + #mbox-cells = <1>; + reg-names = "target_data", "rt", "scfg"; + reg = <0x32c00000 0x100000>, + <0x32400000 0x100000>, + <0x32800000 0x100000>; + interrupt-names = "rx_011"; + interrupts = ; }; From cc0dce769bcedb621f2c5535c31570cd51bc0235 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Jun 2025 13:57:11 +0200 Subject: [PATCH 143/279] dt-bindings: mailbox: nvidia,tegra186-hsp: Use generic node name According to Devicetree specifications, device node names should be generic, thus Mailbox provider should be called "mailbox", not "hsp". Signed-off-by: Krzysztof Kozlowski Acked-by: Thierry Reding Acked-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- .../devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml b/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml index d97050e40fbf..307dea3fd83f 100644 --- a/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml +++ b/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml @@ -59,9 +59,6 @@ description: | properties: - $nodename: - pattern: "^hsp@[0-9a-f]+$" - compatible: oneOf: - enum: @@ -131,7 +128,7 @@ examples: #include #include - hsp_top0: hsp@3c00000 { + hsp_top0: mailbox@3c00000 { compatible = "nvidia,tegra186-hsp"; reg = <0x03c00000 0xa0000>; interrupts = ; From b92f05bc61e23de4f4bb83647c295a4b84047eef Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Jun 2025 13:57:12 +0200 Subject: [PATCH 144/279] dt-bindings: mailbox: Drop consumers example DTS Providers DTS examples should not contain consumer nodes, because they are completely redundant, obvious (defined in common schema) and add unnecessary bloat. Drop consumer examples and unneeded node labels. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Nishanth Menon Acked-by: Rob Herring (Arm) Signed-off-by: Jassi Brar --- .../bindings/mailbox/nvidia,tegra186-hsp.yaml | 6 +----- .../bindings/mailbox/qcom,apcs-kpss-global.yaml | 9 +-------- .../devicetree/bindings/mailbox/ti,omap-mailbox.yaml | 10 +++------- 3 files changed, 5 insertions(+), 20 deletions(-) diff --git a/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml b/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml index 307dea3fd83f..f833b845de0d 100644 --- a/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml +++ b/Documentation/devicetree/bindings/mailbox/nvidia,tegra186-hsp.yaml @@ -128,14 +128,10 @@ examples: #include #include - hsp_top0: mailbox@3c00000 { + mailbox@3c00000 { compatible = "nvidia,tegra186-hsp"; reg = <0x03c00000 0xa0000>; interrupts = ; interrupt-names = "doorbell"; #mbox-cells = <2>; }; - - client { - mboxes = <&hsp_top0 TEGRA_HSP_MBOX_TYPE_DB TEGRA_HSP_DB_MASTER_CCPLEX>; - }; diff --git a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml index ac726136f7e5..615ed103b7e6 100644 --- a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml +++ b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml @@ -251,7 +251,7 @@ examples: # Example apcs with msm8996 - | #include - apcs_glb: mailbox@9820000 { + mailbox@9820000 { compatible = "qcom,msm8996-apcs-hmss-global"; reg = <0x9820000 0x1000>; @@ -259,13 +259,6 @@ examples: #clock-cells = <0>; }; - rpm-glink { - compatible = "qcom,glink-rpm"; - interrupts = ; - qcom,rpm-msg-ram = <&rpm_msg_ram>; - mboxes = <&apcs_glb 0>; - }; - # Example apcs with qcs404 - | #define GCC_APSS_AHB_CLK_SRC 1 diff --git a/Documentation/devicetree/bindings/mailbox/ti,omap-mailbox.yaml b/Documentation/devicetree/bindings/mailbox/ti,omap-mailbox.yaml index 1a2001e58880..8504ceb64806 100644 --- a/Documentation/devicetree/bindings/mailbox/ti,omap-mailbox.yaml +++ b/Documentation/devicetree/bindings/mailbox/ti,omap-mailbox.yaml @@ -242,7 +242,7 @@ examples: - | /* OMAP4 */ #include - mailbox: mailbox@4a0f4000 { + mailbox@4a0f4000 { compatible = "ti,omap4-mailbox"; reg = <0x4a0f4000 0x200>; interrupts = ; @@ -260,13 +260,9 @@ examples: }; }; - dsp { - mboxes = <&mailbox &mbox_dsp>; - }; - - | /* AM33xx */ - mailbox1: mailbox@480c8000 { + mailbox@480c8000 { compatible = "ti,omap4-mailbox"; reg = <0x480c8000 0x200>; interrupts = <77>; @@ -283,7 +279,7 @@ examples: - | /* AM65x */ - mailbox0_cluster0: mailbox@31f80000 { + mailbox@31f80000 { compatible = "ti,am654-mailbox"; reg = <0x31f80000 0x200>; #mbox-cells = <1>; From 7d33dd2d0e6825d4d2a61d06ae609dce17b56a3a Mon Sep 17 00:00:00 2001 From: Jammy Huang Date: Tue, 22 Jul 2025 09:31:16 +0800 Subject: [PATCH 145/279] dt-bindings: mailbox: Add ASPEED AST2700 series SoC Introduce the mailbox module for AST27XX series SoC, which is responsible for interchanging messages between asymmetric processors. Signed-off-by: Jammy Huang Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andrew Jeffery Signed-off-by: Jassi Brar --- .../mailbox/aspeed,ast2700-mailbox.yaml | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 Documentation/devicetree/bindings/mailbox/aspeed,ast2700-mailbox.yaml diff --git a/Documentation/devicetree/bindings/mailbox/aspeed,ast2700-mailbox.yaml b/Documentation/devicetree/bindings/mailbox/aspeed,ast2700-mailbox.yaml new file mode 100644 index 000000000000..600e2d63fccd --- /dev/null +++ b/Documentation/devicetree/bindings/mailbox/aspeed,ast2700-mailbox.yaml @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mailbox/aspeed,ast2700-mailbox.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ASPEED AST2700 mailbox controller + +maintainers: + - Jammy Huang + +description: > + ASPEED AST2700 has multiple processors that need to communicate with each + other. The mailbox controller provides a way for these processors to send + messages to each other. It is a hardware-based inter-processor communication + mechanism that allows processors to send and receive messages through + dedicated channels. + + The mailbox's tx/rx are independent, meaning that one processor can send a + message while another processor is receiving a message simultaneously. + There are 4 channels available for both tx and rx operations. Each channel + has a FIFO buffer that can hold messages of a fixed size (32 bytes in this + case). + + The mailbox controller also supports interrupt generation, allowing + processors to notify each other when a message is available or when an event + occurs. + +properties: + compatible: + const: aspeed,ast2700-mailbox + + reg: + items: + - description: TX control register + - description: RX control register + + reg-names: + items: + - const: tx + - const: rx + + interrupts: + maxItems: 1 + + "#mbox-cells": + const: 1 + +required: + - compatible + - reg + - reg-names + - interrupts + - "#mbox-cells" + +additionalProperties: false + +examples: + - | + #include + + mailbox@12c1c200 { + compatible = "aspeed,ast2700-mailbox"; + reg = <0x12c1c200 0x100>, <0x12c1c300 0x100>; + reg-names = "tx", "rx"; + interrupts = ; + #mbox-cells = <1>; + }; From ae524eb766460a9f7957bf2db0968c9cccb71d90 Mon Sep 17 00:00:00 2001 From: Jammy Huang Date: Tue, 22 Jul 2025 09:31:17 +0800 Subject: [PATCH 146/279] mailbox: aspeed: add mailbox driver for AST27XX series SoC Add mailbox controller driver for AST27XX SoCs, which provides independent tx/rx mailbox between different processors. There are 4 channels for each tx/rx mailbox and each channel has an 32-byte FIFO. Signed-off-by: Jammy Huang Reviewed-by: Andrew Jeffery Signed-off-by: Jassi Brar --- drivers/mailbox/Kconfig | 9 ++ drivers/mailbox/Makefile | 2 + drivers/mailbox/ast2700-mailbox.c | 235 ++++++++++++++++++++++++++++++ 3 files changed, 246 insertions(+) create mode 100644 drivers/mailbox/ast2700-mailbox.c diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index 9abf193acd0b..02432d4a5ccd 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -36,6 +36,15 @@ config ARM_MHU_V3 that provides different means of transports: supported extensions will be discovered and possibly managed at probe-time. +config AST2700_MBOX + tristate "ASPEED AST2700 IPC driver" + depends on ARCH_ASPEED || COMPILE_TEST + help + Mailbox driver implementation for ASPEED AST27XX SoCs. This driver + can be used to send message between different processors in SoC. + The driver provides mailbox support for sending interrupts to the + clients. Say Y here if you want to build this driver. + config CV1800_MBOX tristate "cv1800 mailbox" depends on ARCH_SOPHGO || COMPILE_TEST diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile index c0a4278aa129..98a68f838486 100644 --- a/drivers/mailbox/Makefile +++ b/drivers/mailbox/Makefile @@ -11,6 +11,8 @@ obj-$(CONFIG_ARM_MHU_V2) += arm_mhuv2.o obj-$(CONFIG_ARM_MHU_V3) += arm_mhuv3.o +obj-$(CONFIG_AST2700_MBOX) += ast2700-mailbox.o + obj-$(CONFIG_CV1800_MBOX) += cv1800-mailbox.o obj-$(CONFIG_EXYNOS_MBOX) += exynos-mailbox.o diff --git a/drivers/mailbox/ast2700-mailbox.c b/drivers/mailbox/ast2700-mailbox.c new file mode 100644 index 000000000000..83c6afe5411f --- /dev/null +++ b/drivers/mailbox/ast2700-mailbox.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright Aspeed Technology Inc. (C) 2025. All rights reserved + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Each bit in the register represents an IPC ID */ +#define IPCR_TX_TRIG 0x00 +#define IPCR_ENABLE 0x04 +#define IPCR_STATUS 0x08 +#define RX_IRQ(n) BIT(n) +#define RX_IRQ_MASK 0xf +#define IPCR_DATA 0x10 + +struct ast2700_mbox_data { + u8 num_chans; + u8 msg_size; +}; + +struct ast2700_mbox { + struct mbox_controller mbox; + u8 msg_size; + void __iomem *tx_regs; + void __iomem *rx_regs; + spinlock_t lock; +}; + +static inline int ch_num(struct mbox_chan *chan) +{ + return chan - chan->mbox->chans; +} + +static inline bool ast2700_mbox_tx_done(struct ast2700_mbox *mb, int idx) +{ + return !(readl(mb->tx_regs + IPCR_STATUS) & BIT(idx)); +} + +static irqreturn_t ast2700_mbox_irq(int irq, void *p) +{ + struct ast2700_mbox *mb = p; + void __iomem *data_reg; + int num_words = mb->msg_size / sizeof(u32); + u32 *word_data; + u32 status; + int n, i; + + /* Only examine channels that are currently enabled. */ + status = readl(mb->rx_regs + IPCR_ENABLE) & + readl(mb->rx_regs + IPCR_STATUS); + + if (!(status & RX_IRQ_MASK)) + return IRQ_NONE; + + for (n = 0; n < mb->mbox.num_chans; ++n) { + struct mbox_chan *chan = &mb->mbox.chans[n]; + + if (!(status & RX_IRQ(n))) + continue; + + data_reg = mb->rx_regs + IPCR_DATA + mb->msg_size * n; + word_data = chan->con_priv; + /* Read the message data */ + for (i = 0; i < num_words; i++) + word_data[i] = readl(data_reg + i * sizeof(u32)); + + mbox_chan_received_data(chan, chan->con_priv); + + /* The IRQ can be cleared only once the FIFO is empty. */ + writel(RX_IRQ(n), mb->rx_regs + IPCR_STATUS); + } + + return IRQ_HANDLED; +} + +static int ast2700_mbox_send_data(struct mbox_chan *chan, void *data) +{ + struct ast2700_mbox *mb = dev_get_drvdata(chan->mbox->dev); + int idx = ch_num(chan); + void __iomem *data_reg = mb->tx_regs + IPCR_DATA + mb->msg_size * idx; + u32 *word_data = data; + int num_words = mb->msg_size / sizeof(u32); + int i; + + if (!(readl(mb->tx_regs + IPCR_ENABLE) & BIT(idx))) { + dev_warn(mb->mbox.dev, "%s: Ch-%d not enabled yet\n", __func__, idx); + return -ENODEV; + } + + if (!(ast2700_mbox_tx_done(mb, idx))) { + dev_warn(mb->mbox.dev, "%s: Ch-%d last data has not finished\n", __func__, idx); + return -EBUSY; + } + + /* Write the message data */ + for (i = 0 ; i < num_words; i++) + writel(word_data[i], data_reg + i * sizeof(u32)); + + writel(BIT(idx), mb->tx_regs + IPCR_TX_TRIG); + dev_dbg(mb->mbox.dev, "%s: Ch-%d sent\n", __func__, idx); + + return 0; +} + +static int ast2700_mbox_startup(struct mbox_chan *chan) +{ + struct ast2700_mbox *mb = dev_get_drvdata(chan->mbox->dev); + int idx = ch_num(chan); + void __iomem *reg = mb->rx_regs + IPCR_ENABLE; + unsigned long flags; + + spin_lock_irqsave(&mb->lock, flags); + writel(readl(reg) | BIT(idx), reg); + spin_unlock_irqrestore(&mb->lock, flags); + + return 0; +} + +static void ast2700_mbox_shutdown(struct mbox_chan *chan) +{ + struct ast2700_mbox *mb = dev_get_drvdata(chan->mbox->dev); + int idx = ch_num(chan); + void __iomem *reg = mb->rx_regs + IPCR_ENABLE; + unsigned long flags; + + spin_lock_irqsave(&mb->lock, flags); + writel(readl(reg) & ~BIT(idx), reg); + spin_unlock_irqrestore(&mb->lock, flags); +} + +static bool ast2700_mbox_last_tx_done(struct mbox_chan *chan) +{ + struct ast2700_mbox *mb = dev_get_drvdata(chan->mbox->dev); + int idx = ch_num(chan); + + return ast2700_mbox_tx_done(mb, idx); +} + +static const struct mbox_chan_ops ast2700_mbox_chan_ops = { + .send_data = ast2700_mbox_send_data, + .startup = ast2700_mbox_startup, + .shutdown = ast2700_mbox_shutdown, + .last_tx_done = ast2700_mbox_last_tx_done, +}; + +static int ast2700_mbox_probe(struct platform_device *pdev) +{ + struct ast2700_mbox *mb; + const struct ast2700_mbox_data *dev_data; + struct device *dev = &pdev->dev; + int irq, ret; + + if (!pdev->dev.of_node) + return -ENODEV; + + dev_data = device_get_match_data(&pdev->dev); + + mb = devm_kzalloc(dev, sizeof(*mb), GFP_KERNEL); + if (!mb) + return -ENOMEM; + + mb->mbox.chans = devm_kcalloc(&pdev->dev, dev_data->num_chans, + sizeof(*mb->mbox.chans), GFP_KERNEL); + if (!mb->mbox.chans) + return -ENOMEM; + + /* con_priv of each channel is used to store the message received */ + for (int i = 0; i < dev_data->num_chans; i++) { + mb->mbox.chans[i].con_priv = devm_kcalloc(dev, dev_data->msg_size, + sizeof(u8), GFP_KERNEL); + if (!mb->mbox.chans[i].con_priv) + return -ENOMEM; + } + + platform_set_drvdata(pdev, mb); + + mb->tx_regs = devm_platform_ioremap_resource_byname(pdev, "tx"); + if (IS_ERR(mb->tx_regs)) + return PTR_ERR(mb->tx_regs); + + mb->rx_regs = devm_platform_ioremap_resource_byname(pdev, "rx"); + if (IS_ERR(mb->rx_regs)) + return PTR_ERR(mb->rx_regs); + + mb->msg_size = dev_data->msg_size; + mb->mbox.dev = dev; + mb->mbox.num_chans = dev_data->num_chans; + mb->mbox.ops = &ast2700_mbox_chan_ops; + mb->mbox.txdone_irq = false; + mb->mbox.txdone_poll = true; + mb->mbox.txpoll_period = 5; + spin_lock_init(&mb->lock); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ret = devm_request_irq(dev, irq, ast2700_mbox_irq, 0, dev_name(dev), mb); + if (ret) + return ret; + + return devm_mbox_controller_register(dev, &mb->mbox); +} + +static const struct ast2700_mbox_data ast2700_dev_data = { + .num_chans = 4, + .msg_size = 0x20, +}; + +static const struct of_device_id ast2700_mbox_of_match[] = { + { .compatible = "aspeed,ast2700-mailbox", .data = &ast2700_dev_data }, + {} +}; +MODULE_DEVICE_TABLE(of, ast2700_mbox_of_match); + +static struct platform_driver ast2700_mbox_driver = { + .driver = { + .name = "ast2700-mailbox", + .of_match_table = ast2700_mbox_of_match, + }, + .probe = ast2700_mbox_probe, +}; +module_platform_driver(ast2700_mbox_driver); + +MODULE_AUTHOR("Jammy Huang "); +MODULE_DESCRIPTION("ASPEED AST2700 IPC driver"); +MODULE_LICENSE("GPL"); From 7fbb5a5672cce49dc0e1d54fd15621eec9d48448 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 25 Jul 2025 15:56:10 +0800 Subject: [PATCH 147/279] mailbox: bcm74110: remove unneeded semicolon No functional modification involved. ./drivers/mailbox/bcm74110-mailbox.c:483:2-3: Unneeded semicolon. ./drivers/mailbox/bcm74110-mailbox.c:563:2-3: Unneeded semicolon. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=22936 Signed-off-by: Jiapeng Chong Signed-off-by: Jassi Brar --- drivers/mailbox/bcm74110-mailbox.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mailbox/bcm74110-mailbox.c b/drivers/mailbox/bcm74110-mailbox.c index 0680be8dc18f..623ece0b2c1e 100644 --- a/drivers/mailbox/bcm74110-mailbox.c +++ b/drivers/mailbox/bcm74110-mailbox.c @@ -480,7 +480,7 @@ static int bcm74110_mbox_send_data(struct mbox_chan *chan, void *data) break; default: return -EINVAL; - }; + } return bcm74110_mbox_tx_msg(chan_priv->mbox, msg); } @@ -560,7 +560,7 @@ static struct mbox_chan *bcm74110_mbox_of_xlate(struct mbox_controller *cntrl, default: dev_err(dev, "Invalid channel type: %d\n", type); return ERR_PTR(-EINVAL); - }; + } return &cntrl->chans[type]; } From 75f1fbc9fd409a0c232dc78871ee7df186da9d57 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 23 Jul 2025 10:53:15 +0100 Subject: [PATCH 148/279] mailbox: bcm74110: Fix spelling mistake There is a spelling mistake in the author's email address. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Justin Chen Signed-off-by: Jassi Brar --- drivers/mailbox/bcm74110-mailbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mailbox/bcm74110-mailbox.c b/drivers/mailbox/bcm74110-mailbox.c index 623ece0b2c1e..2e7e86f3e6a4 100644 --- a/drivers/mailbox/bcm74110-mailbox.c +++ b/drivers/mailbox/bcm74110-mailbox.c @@ -651,6 +651,6 @@ static struct platform_driver bcm74110_mbox_driver = { }; module_platform_driver(bcm74110_mbox_driver); -MODULE_AUTHOR("Justin Chen "); +MODULE_AUTHOR("Justin Chen "); MODULE_DESCRIPTION("BCM74110 mailbox driver"); MODULE_LICENSE("GPL"); From 01027a62b508c48c762096f347de925eedcbd008 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:15:50 +0200 Subject: [PATCH 149/279] smb: server: remove separate empty_recvmsg_queue There's no need to maintain two lists, we can just have a single list of receive buffers, which are free to use. Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 60 +++++----------------------------- 1 file changed, 8 insertions(+), 52 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index c6cbe0d56e32..393254109fc4 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -129,9 +129,6 @@ struct smb_direct_transport { spinlock_t recvmsg_queue_lock; struct list_head recvmsg_queue; - spinlock_t empty_recvmsg_queue_lock; - struct list_head empty_recvmsg_queue; - int send_credit_target; atomic_t send_credits; spinlock_t lock_new_recv_credits; @@ -276,32 +273,6 @@ static void put_recvmsg(struct smb_direct_transport *t, spin_unlock(&t->recvmsg_queue_lock); } -static struct -smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t) -{ - struct smb_direct_recvmsg *recvmsg = NULL; - - spin_lock(&t->empty_recvmsg_queue_lock); - if (!list_empty(&t->empty_recvmsg_queue)) { - recvmsg = list_first_entry(&t->empty_recvmsg_queue, - struct smb_direct_recvmsg, list); - list_del(&recvmsg->list); - } - spin_unlock(&t->empty_recvmsg_queue_lock); - return recvmsg; -} - -static void put_empty_recvmsg(struct smb_direct_transport *t, - struct smb_direct_recvmsg *recvmsg) -{ - ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, - recvmsg->sge.length, DMA_FROM_DEVICE); - - spin_lock(&t->empty_recvmsg_queue_lock); - list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue); - spin_unlock(&t->empty_recvmsg_queue_lock); -} - static void enqueue_reassembly(struct smb_direct_transport *t, struct smb_direct_recvmsg *recvmsg, int data_length) @@ -386,9 +357,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) spin_lock_init(&t->recvmsg_queue_lock); INIT_LIST_HEAD(&t->recvmsg_queue); - spin_lock_init(&t->empty_recvmsg_queue_lock); - INIT_LIST_HEAD(&t->empty_recvmsg_queue); - init_waitqueue_head(&t->wait_send_pending); atomic_set(&t->send_pending, 0); @@ -554,7 +522,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) wc->opcode); smb_direct_disconnect_rdma_connection(t); } - put_empty_recvmsg(t, recvmsg); + put_recvmsg(t, recvmsg); return; } @@ -568,7 +536,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) switch (recvmsg->type) { case SMB_DIRECT_MSG_NEGOTIATE_REQ: if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { - put_empty_recvmsg(t, recvmsg); + put_recvmsg(t, recvmsg); return; } t->negotiation_requested = true; @@ -585,7 +553,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->byte_len < offsetof(struct smb_direct_data_transfer, padding)) { - put_empty_recvmsg(t, recvmsg); + put_recvmsg(t, recvmsg); return; } @@ -593,7 +561,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (data_length) { if (wc->byte_len < sizeof(struct smb_direct_data_transfer) + (u64)data_length) { - put_empty_recvmsg(t, recvmsg); + put_recvmsg(t, recvmsg); return; } @@ -613,7 +581,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) avail_recvmsg_count = t->count_avail_recvmsg; spin_unlock(&t->receive_credit_lock); } else { - put_empty_recvmsg(t, recvmsg); + put_recvmsg(t, recvmsg); spin_lock(&t->receive_credit_lock); receive_credits = --(t->recv_credits); @@ -811,7 +779,6 @@ static void smb_direct_post_recv_credits(struct work_struct *work) struct smb_direct_recvmsg *recvmsg; int receive_credits, credits = 0; int ret; - int use_free = 1; spin_lock(&t->receive_credit_lock); receive_credits = t->recv_credits; @@ -819,18 +786,9 @@ static void smb_direct_post_recv_credits(struct work_struct *work) if (receive_credits < t->recv_credit_target) { while (true) { - if (use_free) - recvmsg = get_free_recvmsg(t); - else - recvmsg = get_empty_recvmsg(t); - if (!recvmsg) { - if (use_free) { - use_free = 0; - continue; - } else { - break; - } - } + recvmsg = get_free_recvmsg(t); + if (!recvmsg) + break; recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER; recvmsg->first_segment = false; @@ -1806,8 +1764,6 @@ static void smb_direct_destroy_pools(struct smb_direct_transport *t) while ((recvmsg = get_free_recvmsg(t))) mempool_free(recvmsg, t->recvmsg_mempool); - while ((recvmsg = get_empty_recvmsg(t))) - mempool_free(recvmsg, t->recvmsg_mempool); mempool_destroy(t->recvmsg_mempool); t->recvmsg_mempool = NULL; From afb4108c92898350e66b9a009692230bcdd2ac73 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:15:51 +0200 Subject: [PATCH 150/279] smb: server: make sure we call ib_dma_unmap_single() only if we called ib_dma_map_single already In case of failures either ib_dma_map_single() might not be called yet or ib_dma_unmap_single() was already called. We should make sure put_recvmsg() only calls ib_dma_unmap_single() if needed. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 393254109fc4..fac82e60ff80 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -265,8 +265,13 @@ smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t) static void put_recvmsg(struct smb_direct_transport *t, struct smb_direct_recvmsg *recvmsg) { - ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, - recvmsg->sge.length, DMA_FROM_DEVICE); + if (likely(recvmsg->sge.length != 0)) { + ib_dma_unmap_single(t->cm_id->device, + recvmsg->sge.addr, + recvmsg->sge.length, + DMA_FROM_DEVICE); + recvmsg->sge.length = 0; + } spin_lock(&t->recvmsg_queue_lock); list_add(&recvmsg->list, &t->recvmsg_queue); @@ -638,6 +643,7 @@ static int smb_direct_post_recv(struct smb_direct_transport *t, ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr, recvmsg->sge.length, DMA_FROM_DEVICE); + recvmsg->sge.length = 0; smb_direct_disconnect_rdma_connection(t); return ret; } @@ -1819,6 +1825,7 @@ static int smb_direct_create_pools(struct smb_direct_transport *t) if (!recvmsg) goto err; recvmsg->transport = t; + recvmsg->sge.length = 0; list_add(&recvmsg->list, &t->recvmsg_queue); } t->count_avail_recvmsg = t->recv_credit_max; From cfe76fdbb9729c650f3505d9cfb2f70ddda2dbdc Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:15:52 +0200 Subject: [PATCH 151/279] smb: server: let recv_done() consistently call put_recvmsg/smb_direct_disconnect_rdma_connection We should call put_recvmsg() before smb_direct_disconnect_rdma_connection() in order to call it before waking up the callers. In all error cases we should call smb_direct_disconnect_rdma_connection() in order to avoid stale connections. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index fac82e60ff80..cd8a92fe372b 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -521,13 +521,13 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) t = recvmsg->transport; if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { + put_recvmsg(t, recvmsg); if (wc->status != IB_WC_WR_FLUSH_ERR) { pr_err("Recv error. status='%s (%d)' opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, wc->opcode); smb_direct_disconnect_rdma_connection(t); } - put_recvmsg(t, recvmsg); return; } @@ -542,6 +542,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) case SMB_DIRECT_MSG_NEGOTIATE_REQ: if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { put_recvmsg(t, recvmsg); + smb_direct_disconnect_rdma_connection(t); return; } t->negotiation_requested = true; @@ -549,7 +550,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) t->status = SMB_DIRECT_CS_CONNECTED; enqueue_reassembly(t, recvmsg, 0); wake_up_interruptible(&t->wait_status); - break; + return; case SMB_DIRECT_MSG_DATA_TRANSFER: { struct smb_direct_data_transfer *data_transfer = (struct smb_direct_data_transfer *)recvmsg->packet; @@ -559,6 +560,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->byte_len < offsetof(struct smb_direct_data_transfer, padding)) { put_recvmsg(t, recvmsg); + smb_direct_disconnect_rdma_connection(t); return; } @@ -567,6 +569,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->byte_len < sizeof(struct smb_direct_data_transfer) + (u64)data_length) { put_recvmsg(t, recvmsg); + smb_direct_disconnect_rdma_connection(t); return; } @@ -609,11 +612,16 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count)) mod_delayed_work(smb_direct_wq, &t->post_recv_credits_work, 0); - break; + return; } - default: - break; } + + /* + * This is an internal error! + */ + WARN_ON_ONCE(recvmsg->type != SMB_DIRECT_MSG_DATA_TRANSFER); + put_recvmsg(t, recvmsg); + smb_direct_disconnect_rdma_connection(t); } static int smb_direct_post_recv(struct smb_direct_transport *t, From a6c015b7ac2d8c5233337e5793f50d04fac17669 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:15:53 +0200 Subject: [PATCH 152/279] smb: server: let recv_done() avoid touching data_transfer after cleanup/move Calling enqueue_reassembly() and wake_up_interruptible(&t->wait_reassembly_queue) or put_receive_buffer() means the recvmsg/data_transfer pointer might get re-used by another thread, which means these should be the last operations before calling return. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index cd8a92fe372b..8d366db5f605 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -581,16 +581,11 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) else t->full_packet_received = true; - enqueue_reassembly(t, recvmsg, (int)data_length); - wake_up_interruptible(&t->wait_reassembly_queue); - spin_lock(&t->receive_credit_lock); receive_credits = --(t->recv_credits); avail_recvmsg_count = t->count_avail_recvmsg; spin_unlock(&t->receive_credit_lock); } else { - put_recvmsg(t, recvmsg); - spin_lock(&t->receive_credit_lock); receive_credits = --(t->recv_credits); avail_recvmsg_count = ++(t->count_avail_recvmsg); @@ -612,6 +607,13 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count)) mod_delayed_work(smb_direct_wq, &t->post_recv_credits_work, 0); + + if (data_length) { + enqueue_reassembly(t, recvmsg, (int)data_length); + wake_up_interruptible(&t->wait_reassembly_queue); + } else + put_recvmsg(t, recvmsg); + return; } } From b0b73329ebeeb727913f07b5b6bb85e66e03d156 Mon Sep 17 00:00:00 2001 From: Yunseong Kim Date: Wed, 6 Aug 2025 13:22:12 +0000 Subject: [PATCH 153/279] cifs: Fix null-ptr-deref by static initializing global lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A kernel panic can be triggered by reading /proc/fs/cifs/debug_dirs. The crash is a null-ptr-deref inside spin_lock(), caused by the use of the uninitialized global spinlock cifs_tcp_ses_lock. init_cifs() └── cifs_proc_init() └── // User can access /proc/fs/cifs/debug_dirs here └── cifs_debug_dirs_proc_show() └── spin_lock(&cifs_tcp_ses_lock); // Uninitialized! KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] Mem abort info: ESR = 0x0000000096000005 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault Data abort info: ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [dfff800000000000] address between user and kernel address ranges Internal error: Oops: 0000000096000005 [#1] SMP Modules linked in: CPU: 3 UID: 0 PID: 16435 Comm: stress-ng-procf Not tainted 6.16.0-10385-g79f14b5d84c6 #37 PREEMPT Hardware name: QEMU KVM Virtual Machine, BIOS 2025.02-8ubuntu1 06/11/2025 pstate: 23400005 (nzCv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) pc : do_raw_spin_lock+0x84/0x2cc lr : _raw_spin_lock+0x24/0x34 sp : ffff8000966477e0 x29: ffff800096647860 x28: ffff800096647b88 x27: ffff0001c0c22070 x26: ffff0003eb2b60c8 x25: ffff0001c0c22018 x24: dfff800000000000 x23: ffff0000f624e000 x22: ffff0003eb2b6020 x21: ffff0000f624e768 x20: 0000000000000004 x19: 0000000000000000 x18: 0000000000000000 x17: 0000000000000000 x16: ffff8000804b9600 x15: ffff700012cc8f04 x14: 1ffff00012cc8f04 x13: 0000000000000004 x12: ffffffffffffffff x11: 1ffff00012cc8f00 x10: ffff80008d9af0d2 x9 : f3f3f304f1f1f1f1 x8 : 0000000000000000 x7 : 7365733c203e6469 x6 : 20656572743c2023 x5 : ffff0000e0ce0044 x4 : ffff80008a4deb6e x3 : ffff8000804b9718 x2 : 0000000000000001 x1 : 0000000000000000 x0 : 0000000000000000 Call trace: do_raw_spin_lock+0x84/0x2cc (P) _raw_spin_lock+0x24/0x34 cifs_debug_dirs_proc_show+0x1ac/0x4c0 seq_read_iter+0x3b0/0xc28 proc_reg_read_iter+0x178/0x2a8 vfs_read+0x5f8/0x88c ksys_read+0x120/0x210 __arm64_sys_read+0x7c/0x90 invoke_syscall+0x98/0x2b8 el0_svc_common+0x130/0x23c do_el0_svc+0x48/0x58 el0_svc+0x40/0x140 el0t_64_sync_handler+0x84/0x12c el0t_64_sync+0x1ac/0x1b0 Code: aa0003f3 f9000feb f2fe7e69 f8386969 (38f86908) ---[ end trace 0000000000000000 ]--- The root cause is an initialization order problem. The lock is declared as a global variable and intended to be initialized during module startup. However, the procfs entry that uses this lock can be accessed by userspace before the spin_lock_init() call has run. This creates a race window where reading the proc file will attempt to use the lock before it is initialized, leading to the crash. For a global lock with a static lifetime, the correct and robust approach is to use compile-time initialization. Fixes: 844e5c0eb176 ("smb3 client: add way to show directory leases for improved debugging") Signed-off-by: Yunseong Kim Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 31930b7266db..3bd85ab2deb1 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -77,7 +77,7 @@ unsigned int global_secflags = CIFSSEC_DEF; unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */ unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */ unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */ -spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */ +DEFINE_SPINLOCK(GlobalMid_Lock); /* protects above & list operations on midQ entries */ /* * Global counters, updated atomically @@ -97,7 +97,7 @@ atomic_t total_buf_alloc_count; atomic_t total_small_buf_alloc_count; #endif/* STATS2 */ struct list_head cifs_tcp_ses_list; -spinlock_t cifs_tcp_ses_lock; +DEFINE_SPINLOCK(cifs_tcp_ses_lock); static const struct super_operations cifs_super_ops; unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; module_param(CIFSMaxBufSize, uint, 0444); @@ -1863,8 +1863,6 @@ init_cifs(void) GlobalCurrentXid = 0; GlobalTotalActiveXid = 0; GlobalMaxActiveXid = 0; - spin_lock_init(&cifs_tcp_ses_lock); - spin_lock_init(&GlobalMid_Lock); cifs_lock_secret = get_random_u32(); From 5349ae5e05fa37409fd48a1eb483b199c32c889b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:10:12 +0200 Subject: [PATCH 154/279] smb: client: let send_done() cleanup before calling smbd_disconnect_rdma_connection() We should call ib_dma_unmap_single() and mempool_free() before calling smbd_disconnect_rdma_connection(). And smbd_disconnect_rdma_connection() needs to be the last function to call as all other state might already be gone after it returns. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 754e94a0e07f..e99e783f1b0e 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -281,18 +281,20 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n", request, wc->status); - if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { - log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n", - wc->status, wc->opcode); - smbd_disconnect_rdma_connection(request->info); - } - for (i = 0; i < request->num_sge; i++) ib_dma_unmap_single(sc->ib.dev, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); + if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { + log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n", + wc->status, wc->opcode); + mempool_free(request, info->request_mempool); + smbd_disconnect_rdma_connection(info); + return; + } + if (atomic_dec_and_test(&request->info->send_pending)) wake_up(&request->info->wait_send_pending); From 24b6afc36db748467e853e166a385df07e443859 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:10:13 +0200 Subject: [PATCH 155/279] smb: client: remove separate empty_packet_queue There's no need to maintain two lists, we can just have a single list of receive buffers, which are free to use. It just added unneeded complexity and resulted in ib_dma_unmap_single() not being called from recv_done() for empty keepalive packets. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 6 ++-- fs/smb/client/smbdirect.c | 62 +++----------------------------------- fs/smb/client/smbdirect.h | 4 --- 3 files changed, 7 insertions(+), 65 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 80d6a51b8c11..65421703593a 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -481,10 +481,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) server->smbd_conn->receive_credit_target); seq_printf(m, "\nPending send_pending: %x ", atomic_read(&server->smbd_conn->send_pending)); - seq_printf(m, "\nReceive buffers count_receive_queue: %x " - "count_empty_packet_queue: %x", - server->smbd_conn->count_receive_queue, - server->smbd_conn->count_empty_packet_queue); + seq_printf(m, "\nReceive buffers count_receive_queue: %x ", + server->smbd_conn->count_receive_queue); seq_printf(m, "\nMR responder_resources: %x " "max_frmr_depth: %x mr_type: %x", server->smbd_conn->responder_resources, diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index e99e783f1b0e..0ab490c0a9b0 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -13,8 +13,6 @@ #include "cifsproto.h" #include "smb2proto.h" -static struct smbd_response *get_empty_queue_buffer( - struct smbd_connection *info); static struct smbd_response *get_receive_buffer( struct smbd_connection *info); static void put_receive_buffer( @@ -23,8 +21,6 @@ static void put_receive_buffer( static int allocate_receive_buffers(struct smbd_connection *info, int num_buf); static void destroy_receive_buffers(struct smbd_connection *info); -static void put_empty_packet( - struct smbd_connection *info, struct smbd_response *response); static void enqueue_reassembly( struct smbd_connection *info, struct smbd_response *response, int data_length); @@ -393,7 +389,6 @@ static bool process_negotiation_response( static void smbd_post_send_credits(struct work_struct *work) { int ret = 0; - int use_receive_queue = 1; int rc; struct smbd_response *response; struct smbd_connection *info = @@ -409,18 +404,9 @@ static void smbd_post_send_credits(struct work_struct *work) if (info->receive_credit_target > atomic_read(&info->receive_credits)) { while (true) { - if (use_receive_queue) - response = get_receive_buffer(info); - else - response = get_empty_queue_buffer(info); - if (!response) { - /* now switch to empty packet queue */ - if (use_receive_queue) { - use_receive_queue = 0; - continue; - } else - break; - } + response = get_receive_buffer(info); + if (!response) + break; response->type = SMBD_TRANSFER_DATA; response->first_segment = false; @@ -511,7 +497,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) response, data_length); } else - put_empty_packet(info, response); + put_receive_buffer(info, response); if (data_length) wake_up_interruptible(&info->wait_reassembly_queue); @@ -1115,17 +1101,6 @@ static int smbd_negotiate(struct smbd_connection *info) return rc; } -static void put_empty_packet( - struct smbd_connection *info, struct smbd_response *response) -{ - spin_lock(&info->empty_packet_queue_lock); - list_add_tail(&response->list, &info->empty_packet_queue); - info->count_empty_packet_queue++; - spin_unlock(&info->empty_packet_queue_lock); - - queue_work(info->workqueue, &info->post_send_credits_work); -} - /* * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1 * This is a queue for reassembling upper layer payload and present to upper @@ -1174,25 +1149,6 @@ static struct smbd_response *_get_first_reassembly(struct smbd_connection *info) return ret; } -static struct smbd_response *get_empty_queue_buffer( - struct smbd_connection *info) -{ - struct smbd_response *ret = NULL; - unsigned long flags; - - spin_lock_irqsave(&info->empty_packet_queue_lock, flags); - if (!list_empty(&info->empty_packet_queue)) { - ret = list_first_entry( - &info->empty_packet_queue, - struct smbd_response, list); - list_del(&ret->list); - info->count_empty_packet_queue--; - } - spin_unlock_irqrestore(&info->empty_packet_queue_lock, flags); - - return ret; -} - /* * Get a receive buffer * For each remote send, we need to post a receive. The receive buffers are @@ -1257,10 +1213,6 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) spin_lock_init(&info->receive_queue_lock); info->count_receive_queue = 0; - INIT_LIST_HEAD(&info->empty_packet_queue); - spin_lock_init(&info->empty_packet_queue_lock); - info->count_empty_packet_queue = 0; - init_waitqueue_head(&info->wait_receive_queues); for (i = 0; i < num_buf; i++) { @@ -1294,9 +1246,6 @@ static void destroy_receive_buffers(struct smbd_connection *info) while ((response = get_receive_buffer(info))) mempool_free(response, info->response_mempool); - - while ((response = get_empty_queue_buffer(info))) - mempool_free(response, info->response_mempool); } /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ @@ -1383,8 +1332,7 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "free receive buffers\n"); wait_event(info->wait_receive_queues, - info->count_receive_queue + info->count_empty_packet_queue - == sp->recv_credit_max); + info->count_receive_queue == sp->recv_credit_max); destroy_receive_buffers(info); /* diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 75b3f491c3ad..ea04ce8a9763 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -110,10 +110,6 @@ struct smbd_connection { int count_receive_queue; spinlock_t receive_queue_lock; - struct list_head empty_packet_queue; - int count_empty_packet_queue; - spinlock_t empty_packet_queue_lock; - wait_queue_head_t wait_receive_queues; /* Reassembly queue */ From 047682c370b6f18fec818b57b0ed8b501bdb79f8 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:10:14 +0200 Subject: [PATCH 156/279] smb: client: make sure we call ib_dma_unmap_single() only if we called ib_dma_map_single already In case of failures either ib_dma_map_single() might not be called yet or ib_dma_unmap_single() was already called. We should make sure put_receive_buffer() only calls ib_dma_unmap_single() if needed. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 0ab490c0a9b0..5690e8b3d101 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1057,6 +1057,7 @@ static int smbd_post_recv( if (rc) { ib_dma_unmap_single(sc->ib.dev, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); + response->sge.length = 0; smbd_disconnect_rdma_connection(info); log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); } @@ -1186,8 +1187,13 @@ static void put_receive_buffer( struct smbdirect_socket *sc = &info->socket; unsigned long flags; - ib_dma_unmap_single(sc->ib.dev, response->sge.addr, - response->sge.length, DMA_FROM_DEVICE); + if (likely(response->sge.length != 0)) { + ib_dma_unmap_single(sc->ib.dev, + response->sge.addr, + response->sge.length, + DMA_FROM_DEVICE); + response->sge.length = 0; + } spin_lock_irqsave(&info->receive_queue_lock, flags); list_add_tail(&response->list, &info->receive_queue); @@ -1221,6 +1227,7 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) goto allocate_failed; response->info = info; + response->sge.length = 0; list_add_tail(&response->list, &info->receive_queue); info->count_receive_queue++; } From bdd7afc6dca5e0ebbb75583484aa6ea9e03fbb13 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:10:15 +0200 Subject: [PATCH 157/279] smb: client: let recv_done() cleanup before notifying the callers. We should call put_receive_buffer() before waking up the callers. For the internal error case of response->type being unexpected, we now also call smbd_disconnect_rdma_connection() instead of not waking up the callers at all. Note that the SMBD_TRANSFER_DATA case still has problems, which will be addressed in the next commit in order to make it easier to review this one. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 5690e8b3d101..d26b8cef82d6 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -454,7 +454,6 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { log_rdma_recv(INFO, "wc->status=%d opcode=%d\n", wc->status, wc->opcode); - smbd_disconnect_rdma_connection(info); goto error; } @@ -471,8 +470,9 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) info->full_packet_received = true; info->negotiate_done = process_negotiation_response(response, wc->byte_len); + put_receive_buffer(info, response); complete(&info->negotiate_completion); - break; + return; /* SMBD data transfer packet */ case SMBD_TRANSFER_DATA: @@ -529,14 +529,16 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) } return; - - default: - log_rdma_recv(ERR, - "unexpected response type=%d\n", response->type); } + /* + * This is an internal error! + */ + log_rdma_recv(ERR, "unexpected response type=%d\n", response->type); + WARN_ON_ONCE(response->type != SMBD_TRANSFER_DATA); error: put_receive_buffer(info, response); + smbd_disconnect_rdma_connection(info); } static struct rdma_cm_id *smbd_create_id( From 24eff17887cb45c25a427e662dda352973c5c171 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 4 Aug 2025 14:10:16 +0200 Subject: [PATCH 158/279] smb: client: let recv_done() avoid touching data_transfer after cleanup/move Calling enqueue_reassembly() and wake_up_interruptible(&info->wait_reassembly_queue) or put_receive_buffer() means the response/data_transfer pointer might get re-used by another thread, which means these should be the last operations before calling return. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index d26b8cef82d6..47f2a6cc1c0c 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -479,10 +479,6 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) data_transfer = smbd_response_payload(response); data_length = le32_to_cpu(data_transfer->data_length); - /* - * If this is a packet with data playload place the data in - * reassembly queue and wake up the reading thread - */ if (data_length) { if (info->full_packet_received) response->first_segment = true; @@ -491,16 +487,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) info->full_packet_received = false; else info->full_packet_received = true; - - enqueue_reassembly( - info, - response, - data_length); - } else - put_receive_buffer(info, response); - - if (data_length) - wake_up_interruptible(&info->wait_reassembly_queue); + } atomic_dec(&info->receive_credits); info->receive_credit_target = @@ -528,6 +515,16 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) info->keep_alive_requested = KEEP_ALIVE_PENDING; } + /* + * If this is a packet with data playload place the data in + * reassembly queue and wake up the reading thread + */ + if (data_length) { + enqueue_reassembly(info, response, data_length); + wake_up_interruptible(&info->wait_reassembly_queue); + } else + put_receive_buffer(info, response); + return; } From 0edf9fc0a34436e9f257e8508e795b2caddc74d6 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:29 +0200 Subject: [PATCH 159/279] smb: client: remove unused smbd_connection->fragment_reassembly_remaining Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 2 -- fs/smb/client/smbdirect.h | 1 - 2 files changed, 3 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 65421703593a..c7cbaf12c16f 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -465,13 +465,11 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\nRead Queue count_reassembly_queue: %x " "count_enqueue_reassembly_queue: %x " "count_dequeue_reassembly_queue: %x " - "fragment_reassembly_remaining: %x " "reassembly_data_length: %x " "reassembly_queue_length: %x", server->smbd_conn->count_reassembly_queue, server->smbd_conn->count_enqueue_reassembly_queue, server->smbd_conn->count_dequeue_reassembly_queue, - server->smbd_conn->fragment_reassembly_remaining, server->smbd_conn->reassembly_data_length, server->smbd_conn->reassembly_queue_length); seq_printf(m, "\nCurrent Credits send_credits: %x " diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index ea04ce8a9763..a2026c542989 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -75,7 +75,6 @@ struct smbd_connection { atomic_t send_credits; atomic_t receive_credits; int receive_credit_target; - int fragment_reassembly_remaining; /* Memory registrations */ /* Maximum number of RDMA read/write outstanding on this connection */ From 33dd53a90e3419ea260e9ff2b4aa107385cdf7fa Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:30 +0200 Subject: [PATCH 160/279] smb: smbdirect: introduce smbdirect_socket.recv_io.expected The expected message type can be global as they never change during the after negotiation process. This will replace smbd_response->type and smb_direct_recvmsg->type in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index e5b15cc44a7b..5db7815b614f 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -38,6 +38,20 @@ struct smbdirect_socket { } ib; struct smbdirect_socket_parameters parameters; + + /* + * The state for posted receive buffers + */ + struct { + /* + * The type of PDU we are expecting + */ + enum { + SMBDIRECT_EXPECT_NEGOTIATE_REQ = 1, + SMBDIRECT_EXPECT_NEGOTIATE_REP = 2, + SMBDIRECT_EXPECT_DATA_TRANSFER = 3, + } expected; + } recv_io; }; #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ From bbdbd9ae47155da65aa0c1641698a44d85c2faa2 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:31 +0200 Subject: [PATCH 161/279] smb: client: make use of smbdirect_socket->recv_io.expected The expected incoming message type can be per connection. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 22 ++++++++++++++-------- fs/smb/client/smbdirect.h | 7 ------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 47f2a6cc1c0c..a189973df1be 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -383,6 +383,7 @@ static bool process_negotiation_response( info->max_frmr_depth * PAGE_SIZE); info->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; + sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; return true; } @@ -408,7 +409,6 @@ static void smbd_post_send_credits(struct work_struct *work) if (!response) break; - response->type = SMBD_TRANSFER_DATA; response->first_segment = false; rc = smbd_post_recv(info, response); if (rc) { @@ -445,10 +445,11 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) struct smbd_response *response = container_of(wc->wr_cqe, struct smbd_response, cqe); struct smbd_connection *info = response->info; + struct smbdirect_socket *sc = &info->socket; int data_length = 0; log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n", - response, response->type, wc->status, wc->opcode, + response, sc->recv_io.expected, wc->status, wc->opcode, wc->byte_len, wc->pkey_index); if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { @@ -463,9 +464,9 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) response->sge.length, DMA_FROM_DEVICE); - switch (response->type) { + switch (sc->recv_io.expected) { /* SMBD negotiation response */ - case SMBD_NEGOTIATE_RESP: + case SMBDIRECT_EXPECT_NEGOTIATE_REP: dump_smbdirect_negotiate_resp(smbd_response_payload(response)); info->full_packet_received = true; info->negotiate_done = @@ -475,7 +476,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) return; /* SMBD data transfer packet */ - case SMBD_TRANSFER_DATA: + case SMBDIRECT_EXPECT_DATA_TRANSFER: data_transfer = smbd_response_payload(response); data_length = le32_to_cpu(data_transfer->data_length); @@ -526,13 +527,17 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) put_receive_buffer(info, response); return; + + case SMBDIRECT_EXPECT_NEGOTIATE_REQ: + /* Only server... */ + break; } /* * This is an internal error! */ - log_rdma_recv(ERR, "unexpected response type=%d\n", response->type); - WARN_ON_ONCE(response->type != SMBD_TRANSFER_DATA); + log_rdma_recv(ERR, "unexpected response type=%d\n", sc->recv_io.expected); + WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); error: put_receive_buffer(info, response); smbd_disconnect_rdma_connection(info); @@ -1067,10 +1072,11 @@ static int smbd_post_recv( /* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ static int smbd_negotiate(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; int rc; struct smbd_response *response = get_receive_buffer(info); - response->type = SMBD_NEGOTIATE_RESP; + sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; rc = smbd_post_recv(info, response); log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n", rc, response->sge.addr, diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index a2026c542989..dbb138900973 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -156,11 +156,6 @@ struct smbd_connection { unsigned int count_send_empty; }; -enum smbd_message_type { - SMBD_NEGOTIATE_RESP, - SMBD_TRANSFER_DATA, -}; - /* Maximum number of SGEs used by smbdirect.c in any send work request */ #define SMBDIRECT_MAX_SEND_SGE 6 @@ -186,8 +181,6 @@ struct smbd_response { struct ib_cqe cqe; struct ib_sge sge; - enum smbd_message_type type; - /* Link to receive queue or reassembly queue */ struct list_head list; From 60812d20da82606f0620904c281579a9af0ab452 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:32 +0200 Subject: [PATCH 162/279] smb: smbdirect: introduce struct smbdirect_recv_io This will be used in client and server soon in order to replace smbd_response/smb_direct_recvmsg. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 5db7815b614f..a7ad31c471a7 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -54,4 +54,19 @@ struct smbdirect_socket { } recv_io; }; +struct smbdirect_recv_io { + struct smbdirect_socket *socket; + struct ib_cqe cqe; + struct ib_sge sge; + + /* Link to free or reassembly list */ + struct list_head list; + + /* Indicate if this is the 1st packet of a payload */ + bool first_segment; + + /* SMBD packet header and payload follows this structure */ + u8 packet[]; +}; + #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ From 5dddf0497445d247e995306daf3b76dd0633831c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:33 +0200 Subject: [PATCH 163/279] smb: client: make use of struct smbdirect_recv_io This is the shared structure that will be used in the server too and will allow us to move helper functions into common code soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 79 ++++++++++++++++++++------------------- fs/smb/client/smbdirect.h | 16 -------- 2 files changed, 41 insertions(+), 54 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index a189973df1be..2589834882cb 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -13,23 +13,23 @@ #include "cifsproto.h" #include "smb2proto.h" -static struct smbd_response *get_receive_buffer( +static struct smbdirect_recv_io *get_receive_buffer( struct smbd_connection *info); static void put_receive_buffer( struct smbd_connection *info, - struct smbd_response *response); + struct smbdirect_recv_io *response); static int allocate_receive_buffers(struct smbd_connection *info, int num_buf); static void destroy_receive_buffers(struct smbd_connection *info); static void enqueue_reassembly( struct smbd_connection *info, - struct smbd_response *response, int data_length); -static struct smbd_response *_get_first_reassembly( + struct smbdirect_recv_io *response, int data_length); +static struct smbdirect_recv_io *_get_first_reassembly( struct smbd_connection *info); static int smbd_post_recv( struct smbd_connection *info, - struct smbd_response *response); + struct smbdirect_recv_io *response); static int smbd_post_send_empty(struct smbd_connection *info); @@ -260,7 +260,7 @@ static inline void *smbd_request_payload(struct smbd_request *request) return (void *)request->packet; } -static inline void *smbd_response_payload(struct smbd_response *response) +static inline void *smbdirect_recv_io_payload(struct smbdirect_recv_io *response) { return (void *)response->packet; } @@ -315,12 +315,13 @@ static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) * return value: true if negotiation is a success, false if failed */ static bool process_negotiation_response( - struct smbd_response *response, int packet_length) + struct smbdirect_recv_io *response, int packet_length) { - struct smbd_connection *info = response->info; - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = response->socket; + struct smbd_connection *info = + container_of(sc, struct smbd_connection, socket); struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_negotiate_resp *packet = smbd_response_payload(response); + struct smbdirect_negotiate_resp *packet = smbdirect_recv_io_payload(response); if (packet_length < sizeof(struct smbdirect_negotiate_resp)) { log_rdma_event(ERR, @@ -391,7 +392,7 @@ static void smbd_post_send_credits(struct work_struct *work) { int ret = 0; int rc; - struct smbd_response *response; + struct smbdirect_recv_io *response; struct smbd_connection *info = container_of(work, struct smbd_connection, post_send_credits_work); @@ -442,10 +443,11 @@ static void smbd_post_send_credits(struct work_struct *work) static void recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct smbdirect_data_transfer *data_transfer; - struct smbd_response *response = - container_of(wc->wr_cqe, struct smbd_response, cqe); - struct smbd_connection *info = response->info; - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_recv_io *response = + container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); + struct smbdirect_socket *sc = response->socket; + struct smbd_connection *info = + container_of(sc, struct smbd_connection, socket); int data_length = 0; log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n", @@ -467,7 +469,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) switch (sc->recv_io.expected) { /* SMBD negotiation response */ case SMBDIRECT_EXPECT_NEGOTIATE_REP: - dump_smbdirect_negotiate_resp(smbd_response_payload(response)); + dump_smbdirect_negotiate_resp(smbdirect_recv_io_payload(response)); info->full_packet_received = true; info->negotiate_done = process_negotiation_response(response, wc->byte_len); @@ -477,7 +479,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) /* SMBD data transfer packet */ case SMBDIRECT_EXPECT_DATA_TRANSFER: - data_transfer = smbd_response_payload(response); + data_transfer = smbdirect_recv_io_payload(response); data_length = le32_to_cpu(data_transfer->data_length); if (data_length) { @@ -1034,7 +1036,7 @@ static int smbd_post_send_full_iter(struct smbd_connection *info, * The interaction is controlled by send/receive credit system */ static int smbd_post_recv( - struct smbd_connection *info, struct smbd_response *response) + struct smbd_connection *info, struct smbdirect_recv_io *response) { struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; @@ -1074,7 +1076,7 @@ static int smbd_negotiate(struct smbd_connection *info) { struct smbdirect_socket *sc = &info->socket; int rc; - struct smbd_response *response = get_receive_buffer(info); + struct smbdirect_recv_io *response = get_receive_buffer(info); sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; rc = smbd_post_recv(info, response); @@ -1119,7 +1121,7 @@ static int smbd_negotiate(struct smbd_connection *info) */ static void enqueue_reassembly( struct smbd_connection *info, - struct smbd_response *response, + struct smbdirect_recv_io *response, int data_length) { spin_lock(&info->reassembly_queue_lock); @@ -1143,14 +1145,14 @@ static void enqueue_reassembly( * Caller is responsible for locking * return value: the first entry if any, NULL if queue is empty */ -static struct smbd_response *_get_first_reassembly(struct smbd_connection *info) +static struct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *info) { - struct smbd_response *ret = NULL; + struct smbdirect_recv_io *ret = NULL; if (!list_empty(&info->reassembly_queue)) { ret = list_first_entry( &info->reassembly_queue, - struct smbd_response, list); + struct smbdirect_recv_io, list); } return ret; } @@ -1161,16 +1163,16 @@ static struct smbd_response *_get_first_reassembly(struct smbd_connection *info) * pre-allocated in advance. * return value: the receive buffer, NULL if none is available */ -static struct smbd_response *get_receive_buffer(struct smbd_connection *info) +static struct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info) { - struct smbd_response *ret = NULL; + struct smbdirect_recv_io *ret = NULL; unsigned long flags; spin_lock_irqsave(&info->receive_queue_lock, flags); if (!list_empty(&info->receive_queue)) { ret = list_first_entry( &info->receive_queue, - struct smbd_response, list); + struct smbdirect_recv_io, list); list_del(&ret->list); info->count_receive_queue--; info->count_get_receive_buffer++; @@ -1187,7 +1189,7 @@ static struct smbd_response *get_receive_buffer(struct smbd_connection *info) * receive buffer is returned. */ static void put_receive_buffer( - struct smbd_connection *info, struct smbd_response *response) + struct smbd_connection *info, struct smbdirect_recv_io *response) { struct smbdirect_socket *sc = &info->socket; unsigned long flags; @@ -1212,8 +1214,9 @@ static void put_receive_buffer( /* Preallocate all receive buffer on transport establishment */ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_recv_io *response; int i; - struct smbd_response *response; INIT_LIST_HEAD(&info->reassembly_queue); spin_lock_init(&info->reassembly_queue_lock); @@ -1231,7 +1234,7 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) if (!response) goto allocate_failed; - response->info = info; + response->socket = sc; response->sge.length = 0; list_add_tail(&response->list, &info->receive_queue); info->count_receive_queue++; @@ -1243,7 +1246,7 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) while (!list_empty(&info->receive_queue)) { response = list_first_entry( &info->receive_queue, - struct smbd_response, list); + struct smbdirect_recv_io, list); list_del(&response->list); info->count_receive_queue--; @@ -1254,7 +1257,7 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) static void destroy_receive_buffers(struct smbd_connection *info) { - struct smbd_response *response; + struct smbdirect_recv_io *response; while ((response = get_receive_buffer(info))) mempool_free(response, info->response_mempool); @@ -1295,7 +1298,7 @@ void smbd_destroy(struct TCP_Server_Info *server) struct smbd_connection *info = server->smbd_conn; struct smbdirect_socket *sc; struct smbdirect_socket_parameters *sp; - struct smbd_response *response; + struct smbdirect_recv_io *response; unsigned long flags; if (!info) { @@ -1456,17 +1459,17 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (!info->request_mempool) goto out1; - scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info); + scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", info); struct kmem_cache_args response_args = { - .align = __alignof__(struct smbd_response), - .useroffset = (offsetof(struct smbd_response, packet) + + .align = __alignof__(struct smbdirect_recv_io), + .useroffset = (offsetof(struct smbdirect_recv_io, packet) + sizeof(struct smbdirect_data_transfer)), .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), }; info->response_cache = kmem_cache_create(name, - sizeof(struct smbd_response) + sp->max_recv_size, + sizeof(struct smbdirect_recv_io) + sp->max_recv_size, &response_args, SLAB_HWCACHE_ALIGN); if (!info->response_cache) goto out2; @@ -1756,7 +1759,7 @@ struct smbd_connection *smbd_get_connection( int smbd_recv(struct smbd_connection *info, struct msghdr *msg) { struct smbdirect_socket *sc = &info->socket; - struct smbd_response *response; + struct smbdirect_recv_io *response; struct smbdirect_data_transfer *data_transfer; size_t size = iov_iter_count(&msg->msg_iter); int to_copy, to_read, data_read, offset; @@ -1792,7 +1795,7 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) offset = info->first_entry_offset; while (data_read < size) { response = _get_first_reassembly(info); - data_transfer = smbd_response_payload(response); + data_transfer = smbdirect_recv_io_payload(response); data_length = le32_to_cpu(data_transfer->data_length); remaining_data_length = le32_to_cpu( diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index dbb138900973..f53781f98e64 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -175,22 +175,6 @@ struct smbd_request { /* Maximum number of SGEs used by smbdirect.c in any receive work request */ #define SMBDIRECT_MAX_RECV_SGE 1 -/* The context for a SMBD response */ -struct smbd_response { - struct smbd_connection *info; - struct ib_cqe cqe; - struct ib_sge sge; - - /* Link to receive queue or reassembly queue */ - struct list_head list; - - /* Indicate if this is the 1st packet of a payload */ - bool first_segment; - - /* SMBD packet header and payload follows this structure */ - u8 packet[]; -}; - /* Create a SMBDirect session */ struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr); From d0df32a3025c2de47d7eb8766aaee82644a53581 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:34 +0200 Subject: [PATCH 164/279] smb: smbdirect: introduce smbdirect_socket.recv_io.free.{list,lock} This will allow the list of free smbdirect_recv_io messages including the spinlock to be in common between client and server in order to split out common helper functions in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index a7ad31c471a7..21a58e6078cb 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -51,6 +51,15 @@ struct smbdirect_socket { SMBDIRECT_EXPECT_NEGOTIATE_REP = 2, SMBDIRECT_EXPECT_DATA_TRANSFER = 3, } expected; + + /* + * The list of free smbdirect_recv_io + * structures + */ + struct { + struct list_head list; + spinlock_t lock; + } free; } recv_io; }; From 59500450843a5af9ca4fdba0ac2808e929a47584 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:35 +0200 Subject: [PATCH 165/279] smb: client: make use of smb: smbdirect_socket.recv_io.free.{list,lock} This will be used by the server too in order to have common helper functions in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 25 +++++++++++++------------ fs/smb/client/smbdirect.h | 3 --- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 2589834882cb..073331080e3a 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1165,19 +1165,20 @@ static struct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *i */ static struct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *ret = NULL; unsigned long flags; - spin_lock_irqsave(&info->receive_queue_lock, flags); - if (!list_empty(&info->receive_queue)) { + spin_lock_irqsave(&sc->recv_io.free.lock, flags); + if (!list_empty(&sc->recv_io.free.list)) { ret = list_first_entry( - &info->receive_queue, + &sc->recv_io.free.list, struct smbdirect_recv_io, list); list_del(&ret->list); info->count_receive_queue--; info->count_get_receive_buffer++; } - spin_unlock_irqrestore(&info->receive_queue_lock, flags); + spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); return ret; } @@ -1202,11 +1203,11 @@ static void put_receive_buffer( response->sge.length = 0; } - spin_lock_irqsave(&info->receive_queue_lock, flags); - list_add_tail(&response->list, &info->receive_queue); + spin_lock_irqsave(&sc->recv_io.free.lock, flags); + list_add_tail(&response->list, &sc->recv_io.free.list); info->count_receive_queue++; info->count_put_receive_buffer++; - spin_unlock_irqrestore(&info->receive_queue_lock, flags); + spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); queue_work(info->workqueue, &info->post_send_credits_work); } @@ -1223,8 +1224,8 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) info->reassembly_data_length = 0; info->reassembly_queue_length = 0; - INIT_LIST_HEAD(&info->receive_queue); - spin_lock_init(&info->receive_queue_lock); + INIT_LIST_HEAD(&sc->recv_io.free.list); + spin_lock_init(&sc->recv_io.free.lock); info->count_receive_queue = 0; init_waitqueue_head(&info->wait_receive_queues); @@ -1236,16 +1237,16 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) response->socket = sc; response->sge.length = 0; - list_add_tail(&response->list, &info->receive_queue); + list_add_tail(&response->list, &sc->recv_io.free.list); info->count_receive_queue++; } return 0; allocate_failed: - while (!list_empty(&info->receive_queue)) { + while (!list_empty(&sc->recv_io.free.list)) { response = list_first_entry( - &info->receive_queue, + &sc->recv_io.free.list, struct smbdirect_recv_io, list); list_del(&response->list); info->count_receive_queue--; diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index f53781f98e64..3381e01f5b83 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -105,10 +105,7 @@ struct smbd_connection { wait_queue_head_t wait_post_send; /* Receive queue */ - struct list_head receive_queue; int count_receive_queue; - spinlock_t receive_queue_lock; - wait_queue_head_t wait_receive_queues; /* Reassembly queue */ From b7ffb4d2a0360043d821b612040088ae9299aa8c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:36 +0200 Subject: [PATCH 166/279] smb: smbdirect: introduce smbdirect_socket.recv_io.reassembly.* This will be used in common between client and server soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 21a58e6078cb..3ae834ca3af1 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -60,6 +60,32 @@ struct smbdirect_socket { struct list_head list; spinlock_t lock; } free; + + /* + * The list of arrived non-empty smbdirect_recv_io + * structures + * + * This represents the reassembly queue. + */ + struct { + struct list_head list; + spinlock_t lock; + wait_queue_head_t wait_queue; + /* total data length of reassembly queue */ + int data_length; + int queue_length; + /* the offset to first buffer in reassembly queue */ + int first_entry_offset; + /* + * Indicate if we have received a full packet on the + * connection This is used to identify the first SMBD + * packet of a assembled payload (SMB packet) in + * reassembly queue so we can return a RFC1002 length to + * upper layer to indicate the length of the SMB packet + * received + */ + bool full_packet_received; + } reassembly; } recv_io; }; From 61b4918e4ea17f3ce14ad13e4c2757c0b6afe708 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 5 Aug 2025 18:11:37 +0200 Subject: [PATCH 167/279] smb: client: make use of smbdirect_socket.recv_io.reassembly.* This will be used by the server too and will allow us to create common helper functions. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 8 ++-- fs/smb/client/smbdirect.c | 79 ++++++++++++++++++++------------------ fs/smb/client/smbdirect.h | 20 ---------- 3 files changed, 46 insertions(+), 61 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index c7cbaf12c16f..beb4f18f05ef 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -412,6 +412,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { #ifdef CONFIG_CIFS_SMB_DIRECT + struct smbdirect_socket *sc; struct smbdirect_socket_parameters *sp; #endif @@ -436,7 +437,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\nSMBDirect transport not available"); goto skip_rdma; } - sp = &server->smbd_conn->socket.parameters; + sc = &server->smbd_conn->socket; + sp = &sc->parameters; seq_printf(m, "\nSMBDirect (in hex) protocol version: %x " "transport status: %x", @@ -470,8 +472,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) server->smbd_conn->count_reassembly_queue, server->smbd_conn->count_enqueue_reassembly_queue, server->smbd_conn->count_dequeue_reassembly_queue, - server->smbd_conn->reassembly_data_length, - server->smbd_conn->reassembly_queue_length); + sc->recv_io.reassembly.data_length, + sc->recv_io.reassembly.queue_length); seq_printf(m, "\nCurrent Credits send_credits: %x " "receive_credits: %x receive_credit_target: %x", atomic_read(&server->smbd_conn->send_credits), diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 073331080e3a..5217a8122a94 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -224,7 +224,7 @@ static int smbd_conn_upcall( sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up_interruptible(&info->disconn_wait); - wake_up_interruptible(&info->wait_reassembly_queue); + wake_up_interruptible(&sc->recv_io.reassembly.wait_queue); wake_up_interruptible_all(&info->wait_send_queue); break; @@ -470,7 +470,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) /* SMBD negotiation response */ case SMBDIRECT_EXPECT_NEGOTIATE_REP: dump_smbdirect_negotiate_resp(smbdirect_recv_io_payload(response)); - info->full_packet_received = true; + sc->recv_io.reassembly.full_packet_received = true; info->negotiate_done = process_negotiation_response(response, wc->byte_len); put_receive_buffer(info, response); @@ -483,13 +483,13 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) data_length = le32_to_cpu(data_transfer->data_length); if (data_length) { - if (info->full_packet_received) + if (sc->recv_io.reassembly.full_packet_received) response->first_segment = true; if (le32_to_cpu(data_transfer->remaining_data_length)) - info->full_packet_received = false; + sc->recv_io.reassembly.full_packet_received = false; else - info->full_packet_received = true; + sc->recv_io.reassembly.full_packet_received = true; } atomic_dec(&info->receive_credits); @@ -524,7 +524,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) */ if (data_length) { enqueue_reassembly(info, response, data_length); - wake_up_interruptible(&info->wait_reassembly_queue); + wake_up_interruptible(&sc->recv_io.reassembly.wait_queue); } else put_receive_buffer(info, response); @@ -1124,9 +1124,11 @@ static void enqueue_reassembly( struct smbdirect_recv_io *response, int data_length) { - spin_lock(&info->reassembly_queue_lock); - list_add_tail(&response->list, &info->reassembly_queue); - info->reassembly_queue_length++; + struct smbdirect_socket *sc = &info->socket; + + spin_lock(&sc->recv_io.reassembly.lock); + list_add_tail(&response->list, &sc->recv_io.reassembly.list); + sc->recv_io.reassembly.queue_length++; /* * Make sure reassembly_data_length is updated after list and * reassembly_queue_length are updated. On the dequeue side @@ -1134,8 +1136,8 @@ static void enqueue_reassembly( * if reassembly_queue_length and list is up to date */ virt_wmb(); - info->reassembly_data_length += data_length; - spin_unlock(&info->reassembly_queue_lock); + sc->recv_io.reassembly.data_length += data_length; + spin_unlock(&sc->recv_io.reassembly.lock); info->count_reassembly_queue++; info->count_enqueue_reassembly_queue++; } @@ -1147,11 +1149,12 @@ static void enqueue_reassembly( */ static struct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *ret = NULL; - if (!list_empty(&info->reassembly_queue)) { + if (!list_empty(&sc->recv_io.reassembly.list)) { ret = list_first_entry( - &info->reassembly_queue, + &sc->recv_io.reassembly.list, struct smbdirect_recv_io, list); } return ret; @@ -1219,10 +1222,10 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) struct smbdirect_recv_io *response; int i; - INIT_LIST_HEAD(&info->reassembly_queue); - spin_lock_init(&info->reassembly_queue_lock); - info->reassembly_data_length = 0; - info->reassembly_queue_length = 0; + INIT_LIST_HEAD(&sc->recv_io.reassembly.list); + spin_lock_init(&sc->recv_io.reassembly.lock); + sc->recv_io.reassembly.data_length = 0; + sc->recv_io.reassembly.queue_length = 0; INIT_LIST_HEAD(&sc->recv_io.free.list); spin_lock_init(&sc->recv_io.free.lock); @@ -1333,18 +1336,18 @@ void smbd_destroy(struct TCP_Server_Info *server) /* It's not possible for upper layer to get to reassembly */ log_rdma_event(INFO, "drain the reassembly queue\n"); do { - spin_lock_irqsave(&info->reassembly_queue_lock, flags); + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); response = _get_first_reassembly(info); if (response) { list_del(&response->list); spin_unlock_irqrestore( - &info->reassembly_queue_lock, flags); + &sc->recv_io.reassembly.lock, flags); put_receive_buffer(info, response); } else spin_unlock_irqrestore( - &info->reassembly_queue_lock, flags); + &sc->recv_io.reassembly.lock, flags); } while (response); - info->reassembly_data_length = 0; + sc->recv_io.reassembly.data_length = 0; log_rdma_event(INFO, "free receive buffers\n"); wait_event(info->wait_receive_queues, @@ -1639,7 +1642,7 @@ static struct smbd_connection *_smbd_get_connection( init_waitqueue_head(&info->conn_wait); init_waitqueue_head(&info->disconn_wait); - init_waitqueue_head(&info->wait_reassembly_queue); + init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); rc = rdma_connect(sc->rdma.cm_id, &conn_param); if (rc) { log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); @@ -1776,9 +1779,9 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) * the only one reading from the front of the queue. The transport * may add more entries to the back of the queue at the same time */ - log_read(INFO, "size=%zd info->reassembly_data_length=%d\n", size, - info->reassembly_data_length); - if (info->reassembly_data_length >= size) { + log_read(INFO, "size=%zd sc->recv_io.reassembly.data_length=%d\n", size, + sc->recv_io.reassembly.data_length); + if (sc->recv_io.reassembly.data_length >= size) { int queue_length; int queue_removed = 0; @@ -1790,10 +1793,10 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) * updated in SOFTIRQ as more data is received */ virt_rmb(); - queue_length = info->reassembly_queue_length; + queue_length = sc->recv_io.reassembly.queue_length; data_read = 0; to_read = size; - offset = info->first_entry_offset; + offset = sc->recv_io.reassembly.first_entry_offset; while (data_read < size) { response = _get_first_reassembly(info); data_transfer = smbdirect_recv_io_payload(response); @@ -1841,10 +1844,10 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) list_del(&response->list); else { spin_lock_irq( - &info->reassembly_queue_lock); + &sc->recv_io.reassembly.lock); list_del(&response->list); spin_unlock_irq( - &info->reassembly_queue_lock); + &sc->recv_io.reassembly.lock); } queue_removed++; info->count_reassembly_queue--; @@ -1863,23 +1866,23 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) to_read, data_read, offset); } - spin_lock_irq(&info->reassembly_queue_lock); - info->reassembly_data_length -= data_read; - info->reassembly_queue_length -= queue_removed; - spin_unlock_irq(&info->reassembly_queue_lock); + spin_lock_irq(&sc->recv_io.reassembly.lock); + sc->recv_io.reassembly.data_length -= data_read; + sc->recv_io.reassembly.queue_length -= queue_removed; + spin_unlock_irq(&sc->recv_io.reassembly.lock); - info->first_entry_offset = offset; + sc->recv_io.reassembly.first_entry_offset = offset; log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", - data_read, info->reassembly_data_length, - info->first_entry_offset); + data_read, sc->recv_io.reassembly.data_length, + sc->recv_io.reassembly.first_entry_offset); read_rfc1002_done: return data_read; } log_read(INFO, "wait_event on more data\n"); rc = wait_event_interruptible( - info->wait_reassembly_queue, - info->reassembly_data_length >= size || + sc->recv_io.reassembly.wait_queue, + sc->recv_io.reassembly.data_length >= size || sc->status != SMBDIRECT_SOCKET_CONNECTED); /* Don't return any data if interrupted */ if (rc) diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 3381e01f5b83..9df434f6bb8c 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -108,30 +108,10 @@ struct smbd_connection { int count_receive_queue; wait_queue_head_t wait_receive_queues; - /* Reassembly queue */ - struct list_head reassembly_queue; - spinlock_t reassembly_queue_lock; - wait_queue_head_t wait_reassembly_queue; - - /* total data length of reassembly queue */ - int reassembly_data_length; - int reassembly_queue_length; - /* the offset to first buffer in reassembly queue */ - int first_entry_offset; - bool send_immediate; wait_queue_head_t wait_send_queue; - /* - * Indicate if we have received a full packet on the connection - * This is used to identify the first SMBD packet of a assembled - * payload (SMB packet) in reassembly queue so we can return a - * RFC1002 length to upper layer to indicate the length of the SMB - * packet received - */ - bool full_packet_received; - struct workqueue_struct *workqueue; struct delayed_work idle_timer_work; From b126645b79547bd68fb8353ad27841b7408c08bb Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:47 +0200 Subject: [PATCH 168/279] smb: client: remove unused enum smbd_connection_status Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 9df434f6bb8c..0463fde1bf26 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -33,16 +33,6 @@ enum keep_alive_status { KEEP_ALIVE_SENT, }; -enum smbd_connection_status { - SMBD_CREATED, - SMBD_CONNECTING, - SMBD_CONNECTED, - SMBD_NEGOTIATE_FAILED, - SMBD_DISCONNECTING, - SMBD_DISCONNECTED, - SMBD_DESTROYED -}; - /* * The context for the SMBDirect transport * Everything related to the transport is here. It has several logical parts From 3515aa6e43077157cf6070ad2cd181b179964856 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:48 +0200 Subject: [PATCH 169/279] smb: smbdirect: add SMBDIRECT_RECV_IO_MAX_SGE This will allow the client and server specific defines to be replaced. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 3ae834ca3af1..7270fcee1048 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -92,6 +92,13 @@ struct smbdirect_socket { struct smbdirect_recv_io { struct smbdirect_socket *socket; struct ib_cqe cqe; + + /* + * For now we only use a single SGE + * as we have just one large buffer + * per posted recv. + */ +#define SMBDIRECT_RECV_IO_MAX_SGE 1 struct ib_sge sge; /* Link to free or reassembly list */ From 8b5964a1188f659afda0c72f7c7b7c9d54aa73cc Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:49 +0200 Subject: [PATCH 170/279] smb: client: make use of SMBDIRECT_RECV_IO_MAX_SGE Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 4 ++-- fs/smb/client/smbdirect.h | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 5217a8122a94..5d1fa83583f6 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1563,7 +1563,7 @@ static struct smbd_connection *_smbd_get_connection( sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || - sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) { + sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { log_rdma_event(ERR, "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", IB_DEVICE_NAME_MAX, @@ -1595,7 +1595,7 @@ static struct smbd_connection *_smbd_get_connection( qp_attr.cap.max_send_wr = sp->send_credit_target; qp_attr.cap.max_recv_wr = sp->recv_credit_max; qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE; - qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE; + qp_attr.cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; qp_attr.cap.max_inline_data = 0; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 0463fde1bf26..81b55c0de552 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -139,9 +139,6 @@ struct smbd_request { u8 packet[]; }; -/* Maximum number of SGEs used by smbdirect.c in any receive work request */ -#define SMBDIRECT_MAX_RECV_SGE 1 - /* Create a SMBDirect session */ struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr); From 92ac696be763461b575022f06bbd843aadbe9593 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:50 +0200 Subject: [PATCH 171/279] smb: smbdirect: introduce struct smbdirect_send_io This will be used in client and server soon in order to replace smbd_request/smb_direct_sendmsg. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 7270fcee1048..4660c05c358f 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -89,6 +89,30 @@ struct smbdirect_socket { } recv_io; }; +struct smbdirect_send_io { + struct smbdirect_socket *socket; + struct ib_cqe cqe; + + /* + * The SGE entries for this work request + * + * The first points to the packet header + */ +#define SMBDIRECT_SEND_IO_MAX_SGE 6 + size_t num_sge; + struct ib_sge sge[SMBDIRECT_SEND_IO_MAX_SGE]; + + /* + * Link to the list of sibling smbdirect_send_io + * messages. + */ + struct list_head sibling_list; + struct ib_send_wr wr; + + /* SMBD packet header follows this structure */ + u8 packet[]; +}; + struct smbdirect_recv_io { struct smbdirect_socket *socket; struct ib_cqe cqe; From 977ea06fddda493a8e16e609a408fdd08233793d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:51 +0200 Subject: [PATCH 172/279] smb: client: make use of struct smbdirect_send_io The server will also use this soon, so that we can split out common helper functions in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 45 ++++++++++++++++++++------------------- fs/smb/client/smbdirect.h | 16 -------------- 2 files changed, 23 insertions(+), 38 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 5d1fa83583f6..c367efef8c7a 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -255,7 +255,7 @@ smbd_qp_async_error_upcall(struct ib_event *event, void *context) } } -static inline void *smbd_request_payload(struct smbd_request *request) +static inline void *smbdirect_send_io_payload(struct smbdirect_send_io *request) { return (void *)request->packet; } @@ -269,12 +269,13 @@ static inline void *smbdirect_recv_io_payload(struct smbdirect_recv_io *response static void send_done(struct ib_cq *cq, struct ib_wc *wc) { int i; - struct smbd_request *request = - container_of(wc->wr_cqe, struct smbd_request, cqe); - struct smbd_connection *info = request->info; - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_send_io *request = + container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); + struct smbdirect_socket *sc = request->socket; + struct smbd_connection *info = + container_of(sc, struct smbd_connection, socket); - log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n", + log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%d\n", request, wc->status); for (i = 0; i < request->num_sge; i++) @@ -291,12 +292,12 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) return; } - if (atomic_dec_and_test(&request->info->send_pending)) - wake_up(&request->info->wait_send_pending); + if (atomic_dec_and_test(&info->send_pending)) + wake_up(&info->wait_send_pending); - wake_up(&request->info->wait_post_send); + wake_up(&info->wait_post_send); - mempool_free(request, request->info->request_mempool); + mempool_free(request, info->request_mempool); } static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) @@ -688,16 +689,16 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc = -ENOMEM; - struct smbd_request *request; + struct smbdirect_send_io *request; struct smbdirect_negotiate_req *packet; request = mempool_alloc(info->request_mempool, GFP_KERNEL); if (!request) return rc; - request->info = info; + request->socket = sc; - packet = smbd_request_payload(request); + packet = smbdirect_send_io_payload(request); packet->min_version = cpu_to_le16(SMBDIRECT_V1); packet->max_version = cpu_to_le16(SMBDIRECT_V1); packet->reserved = 0; @@ -794,7 +795,7 @@ static int manage_keep_alive_before_sending(struct smbd_connection *info) /* Post the send request */ static int smbd_post_send(struct smbd_connection *info, - struct smbd_request *request) + struct smbdirect_send_io *request) { struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; @@ -843,7 +844,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, int i, rc; int header_length; int data_length; - struct smbd_request *request; + struct smbdirect_send_io *request; struct smbdirect_data_transfer *packet; int new_credits = 0; @@ -888,14 +889,14 @@ static int smbd_post_send_iter(struct smbd_connection *info, goto err_alloc; } - request->info = info; + request->socket = sc; memset(request->sge, 0, sizeof(request->sge)); /* Fill in the data payload to find out how much data we can add */ if (iter) { struct smb_extract_to_rdma extract = { .nr_sge = 1, - .max_sge = SMBDIRECT_MAX_SEND_SGE, + .max_sge = SMBDIRECT_SEND_IO_MAX_SGE, .sge = request->sge, .device = sc->ib.dev, .local_dma_lkey = sc->ib.pd->local_dma_lkey, @@ -917,7 +918,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, } /* Fill in the packet header */ - packet = smbd_request_payload(request); + packet = smbdirect_send_io_payload(request); packet->credits_requested = cpu_to_le16(sp->send_credit_target); new_credits = manage_credits_prior_sending(info); @@ -1447,11 +1448,11 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) return -ENOMEM; - scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info); + scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", info); info->request_cache = kmem_cache_create( name, - sizeof(struct smbd_request) + + sizeof(struct smbdirect_send_io) + sizeof(struct smbdirect_data_transfer), 0, SLAB_HWCACHE_ALIGN, NULL); if (!info->request_cache) @@ -1562,7 +1563,7 @@ static struct smbd_connection *_smbd_get_connection( sp->max_recv_size = smbd_max_receive_size; sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; - if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || + if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE || sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { log_rdma_event(ERR, "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", @@ -1594,7 +1595,7 @@ static struct smbd_connection *_smbd_get_connection( qp_attr.qp_context = info; qp_attr.cap.max_send_wr = sp->send_credit_target; qp_attr.cap.max_recv_wr = sp->recv_credit_max; - qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE; + qp_attr.cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; qp_attr.cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; qp_attr.cap.max_inline_data = 0; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 81b55c0de552..a8380bccf623 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -123,22 +123,6 @@ struct smbd_connection { unsigned int count_send_empty; }; -/* Maximum number of SGEs used by smbdirect.c in any send work request */ -#define SMBDIRECT_MAX_SEND_SGE 6 - -/* The context for a SMBD request */ -struct smbd_request { - struct smbd_connection *info; - struct ib_cqe cqe; - - /* the SGE entries for this work request */ - struct ib_sge sge[SMBDIRECT_MAX_SEND_SGE]; - int num_sge; - - /* SMBD packet header follows this structure */ - u8 packet[]; -}; - /* Create a SMBDirect session */ struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr); From 5ef8278e3734a8817fb0b8d302572dc2f2f5c46c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:52 +0200 Subject: [PATCH 173/279] smb: smbdirect: add smbdirect_socket.{send,recv}_io.mem.{cache,pool} This will be the common location memory caches and pools. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 4660c05c358f..3c4a8d627aa3 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -39,6 +39,20 @@ struct smbdirect_socket { struct smbdirect_socket_parameters parameters; + /* + * The state for posted send buffers + */ + struct { + /* + * Memory pools for preallocating + * smbdirect_send_io buffers + */ + struct { + struct kmem_cache *cache; + mempool_t *pool; + } mem; + } send_io; + /* * The state for posted receive buffers */ @@ -52,6 +66,15 @@ struct smbdirect_socket { SMBDIRECT_EXPECT_DATA_TRANSFER = 3, } expected; + /* + * Memory pools for preallocating + * smbdirect_recv_io buffers + */ + struct { + struct kmem_cache *cache; + mempool_t *pool; + } mem; + /* * The list of free smbdirect_recv_io * structures From bef82d5848dadb10c2671970fae2cc4cd2c6a123 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 6 Aug 2025 19:35:53 +0200 Subject: [PATCH 174/279] smb: client: make use of smbdirect_socket.{send,recv}_io.mem.{cache,pool} This will allow common helper functions to be created later. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 65 ++++++++++++++++++++------------------- fs/smb/client/smbdirect.h | 9 ------ 2 files changed, 34 insertions(+), 40 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index c367efef8c7a..6c2af00be44c 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -287,7 +287,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n", wc->status, wc->opcode); - mempool_free(request, info->request_mempool); + mempool_free(request, sc->send_io.mem.pool); smbd_disconnect_rdma_connection(info); return; } @@ -297,7 +297,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) wake_up(&info->wait_post_send); - mempool_free(request, info->request_mempool); + mempool_free(request, sc->send_io.mem.pool); } static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) @@ -692,7 +692,7 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) struct smbdirect_send_io *request; struct smbdirect_negotiate_req *packet; - request = mempool_alloc(info->request_mempool, GFP_KERNEL); + request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); if (!request) return rc; @@ -751,7 +751,7 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) smbd_disconnect_rdma_connection(info); dma_mapping_failed: - mempool_free(request, info->request_mempool); + mempool_free(request, sc->send_io.mem.pool); return rc; } @@ -883,7 +883,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, goto wait_send_queue; } - request = mempool_alloc(info->request_mempool, GFP_KERNEL); + request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); if (!request) { rc = -ENOMEM; goto err_alloc; @@ -977,7 +977,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); - mempool_free(request, info->request_mempool); + mempool_free(request, sc->send_io.mem.pool); /* roll back receive credits and credits to be offered */ spin_lock(&info->lock_new_credits_offered); @@ -1235,7 +1235,7 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) init_waitqueue_head(&info->wait_receive_queues); for (i = 0; i < num_buf; i++) { - response = mempool_alloc(info->response_mempool, GFP_KERNEL); + response = mempool_alloc(sc->recv_io.mem.pool, GFP_KERNEL); if (!response) goto allocate_failed; @@ -1255,17 +1255,18 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) list_del(&response->list); info->count_receive_queue--; - mempool_free(response, info->response_mempool); + mempool_free(response, sc->recv_io.mem.pool); } return -ENOMEM; } static void destroy_receive_buffers(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *response; while ((response = get_receive_buffer(info))) - mempool_free(response, info->response_mempool); + mempool_free(response, sc->recv_io.mem.pool); } /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ @@ -1377,11 +1378,11 @@ void smbd_destroy(struct TCP_Server_Info *server) rdma_destroy_id(sc->rdma.cm_id); /* free mempools */ - mempool_destroy(info->request_mempool); - kmem_cache_destroy(info->request_cache); + mempool_destroy(sc->send_io.mem.pool); + kmem_cache_destroy(sc->send_io.mem.cache); - mempool_destroy(info->response_mempool); - kmem_cache_destroy(info->response_cache); + mempool_destroy(sc->recv_io.mem.pool); + kmem_cache_destroy(sc->recv_io.mem.cache); sc->status = SMBDIRECT_SOCKET_DESTROYED; @@ -1429,12 +1430,14 @@ int smbd_reconnect(struct TCP_Server_Info *server) static void destroy_caches_and_workqueue(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; + destroy_receive_buffers(info); destroy_workqueue(info->workqueue); - mempool_destroy(info->response_mempool); - kmem_cache_destroy(info->response_cache); - mempool_destroy(info->request_mempool); - kmem_cache_destroy(info->request_cache); + mempool_destroy(sc->recv_io.mem.pool); + kmem_cache_destroy(sc->recv_io.mem.cache); + mempool_destroy(sc->send_io.mem.pool); + kmem_cache_destroy(sc->send_io.mem.cache); } #define MAX_NAME_LEN 80 @@ -1449,19 +1452,19 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) return -ENOMEM; scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", info); - info->request_cache = + sc->send_io.mem.cache = kmem_cache_create( name, sizeof(struct smbdirect_send_io) + sizeof(struct smbdirect_data_transfer), 0, SLAB_HWCACHE_ALIGN, NULL); - if (!info->request_cache) + if (!sc->send_io.mem.cache) return -ENOMEM; - info->request_mempool = + sc->send_io.mem.pool = mempool_create(sp->send_credit_target, mempool_alloc_slab, - mempool_free_slab, info->request_cache); - if (!info->request_mempool) + mempool_free_slab, sc->send_io.mem.cache); + if (!sc->send_io.mem.pool) goto out1; scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", info); @@ -1472,17 +1475,17 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) sizeof(struct smbdirect_data_transfer)), .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), }; - info->response_cache = + sc->recv_io.mem.cache = kmem_cache_create(name, sizeof(struct smbdirect_recv_io) + sp->max_recv_size, &response_args, SLAB_HWCACHE_ALIGN); - if (!info->response_cache) + if (!sc->recv_io.mem.cache) goto out2; - info->response_mempool = + sc->recv_io.mem.pool = mempool_create(sp->recv_credit_max, mempool_alloc_slab, - mempool_free_slab, info->response_cache); - if (!info->response_mempool) + mempool_free_slab, sc->recv_io.mem.cache); + if (!sc->recv_io.mem.pool) goto out3; scnprintf(name, MAX_NAME_LEN, "smbd_%p", info); @@ -1501,13 +1504,13 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) out5: destroy_workqueue(info->workqueue); out4: - mempool_destroy(info->response_mempool); + mempool_destroy(sc->recv_io.mem.pool); out3: - kmem_cache_destroy(info->response_cache); + kmem_cache_destroy(sc->recv_io.mem.cache); out2: - mempool_destroy(info->request_mempool); + mempool_destroy(sc->send_io.mem.pool); out1: - kmem_cache_destroy(info->request_cache); + kmem_cache_destroy(sc->send_io.mem.cache); return -ENOMEM; } diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index a8380bccf623..0d4d45428c85 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -105,15 +105,6 @@ struct smbd_connection { struct workqueue_struct *workqueue; struct delayed_work idle_timer_work; - /* Memory pool for preallocating buffers */ - /* request pool for RDMA send */ - struct kmem_cache *request_cache; - mempool_t *request_mempool; - - /* response pool for RDMA receive */ - struct kmem_cache *response_cache; - mempool_t *response_mempool; - /* for debug purposes */ unsigned int count_get_receive_buffer; unsigned int count_put_receive_buffer; From da274853fe7dbc7124e2dd84dad802be52a09321 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 29 Jul 2025 15:12:32 -0400 Subject: [PATCH 175/279] cpu: Remove obsolete comment from takedown_cpu() takedown_cpu() has a comment about "all preempt/rcu users must observe !cpu_active()" which is kind of meaningless in this function. This comment was originally introduced by commit 6acce3ef8452 ("sched: Remove get_online_cpus() usage") when _cpu_down() was setting cpu_active_mask and synchronize_rcu()/synchronize_sched() were added after that. Later commit 40190a78f85f ("sched/hotplug: Convert cpu_[in]active notifiers to state machine") added a new CPUHP_AP_ACTIVE hotplug state to set/clear cpu_active_mask. The following commit b2454caa8977 ("sched/hotplug: Move sync_rcu to be with set_cpu_active(false)") move the synchronize_*() calls to sched_cpu_deactivate() associated with the new hotplug state, but left the comment behind. Remove this comment as it is no longer relevant in takedown_cpu(). Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250729191232.664931-1-longman@redhat.com --- kernel/cpu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index faf0f23fc5d8..db9f6c539b28 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1309,9 +1309,6 @@ static int takedown_cpu(unsigned int cpu) */ irq_lock_sparse(); - /* - * So now all preempt/rcu users must observe !cpu_active(). - */ err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu)); if (err) { /* CPU refused to die */ From 234d1eff5d4987024be9d40ac07b918a5ae8db1a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Aug 2025 11:26:25 +0200 Subject: [PATCH 176/279] xfrm: restore GSO for SW crypto Commit 49431af6c4ef incorrectly assumes that the GSO path is only used by HW offload, but it's also useful for SW crypto. This patch re-enables GSO for SW crypto. It's not an exact revert to preserve the other changes made to xfrm_dev_offload_ok afterwards, but it reverts all of its effects. Fixes: 49431af6c4ef ("xfrm: rely on XFRM offload") Signed-off-by: Sabrina Dubroca Reviewed-by: Leon Romanovsky Reviewed-by: Zhu Yanjun Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index d2819baea414..1f88472aaac0 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -415,10 +415,12 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) struct net_device *dev = x->xso.dev; bool check_tunnel_size; - if (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED) + if (!x->type_offload || + (x->xso.type == XFRM_DEV_OFFLOAD_UNSPECIFIED && x->encap)) return false; - if ((dev == xfrm_dst_path(dst)->dev) && !xdst->child->xfrm) { + if ((!dev || dev == xfrm_dst_path(dst)->dev) && + !xdst->child->xfrm) { mtu = xfrm_state_mtu(x, xdst->child_mtu_cached); if (skb->len <= mtu) goto ok; @@ -430,6 +432,9 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) return false; ok: + if (!dev) + return true; + check_tunnel_size = x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->props.mode == XFRM_MODE_TUNNEL; switch (x->props.family) { From 65f079a6c446a939eefe71e6d5957d5d6365fcf9 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Aug 2025 11:26:26 +0200 Subject: [PATCH 177/279] xfrm: bring back device check in validate_xmit_xfrm This is partial revert of commit d53dda291bbd993a29b84d358d282076e3d01506. This change causes traffic using GSO with SW crypto running through a NIC capable of HW offload to no longer get segmented during validate_xmit_xfrm, and is unrelated to the bonding use case mentioned in the commit. Fixes: d53dda291bbd ("xfrm: Remove unneeded device check from validate_xmit_xfrm") Signed-off-by: Sabrina Dubroca Reviewed-by: Cosmin Ratiu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 1f88472aaac0..c7a1f080d2de 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -155,7 +155,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } - if (skb_is_gso(skb) && unlikely(xmit_xfrm_check_overflow(skb))) { + if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || + unlikely(xmit_xfrm_check_overflow(skb)))) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ From 1118aaa3b35157777890fffab91d8c1da841b20b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Aug 2025 11:26:27 +0200 Subject: [PATCH 178/279] udp: also consider secpath when evaluating ipsec use for checksumming Commit b40c5f4fde22 ("udp: disable inner UDP checksum offloads in IPsec case") tried to fix checksumming in UFO when the packets are going through IPsec, so that we can't rely on offloads because the UDP header and payload will be encrypted. But when doing a TCP test over VXLAN going through IPsec transport mode with GSO enabled (esp4_offload module loaded), I'm seeing broken UDP checksums on the encap after successful decryption. The skbs get to udp4_ufo_fragment/__skb_udp_tunnel_segment via __dev_queue_xmit -> validate_xmit_skb -> skb_gso_segment and at this point we've already dropped the dst (unless the device sets IFF_XMIT_DST_RELEASE, which is not common), so need_ipsec is false and we proceed with checksum offload. Make need_ipsec also check the secpath, which is not dropped on this callpath. Fixes: b40c5f4fde22 ("udp: disable inner UDP checksum offloads in IPsec case") Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/ipv4/udp_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 5128e2a5b00a..b1f3fd302e9d 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -217,7 +217,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); skb->remcsum_offload = remcsum; - need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); + need_ipsec = (skb_dst(skb) && dst_xfrm(skb_dst(skb))) || skb_sec_path(skb); /* Try to offload checksum if possible */ offload_csum = !!(need_csum && !need_ipsec && From 6bd05db76751c872970c63be41c97172cfbec4c1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Aug 2025 21:25:37 +0200 Subject: [PATCH 179/279] ALSA: hda/realtek: Restrict prompt only for CONFIG_EXPERT The split of Realtek HD-audio codec driver may cause confusions especially when migrating from the previous kernel configurations because it's hard to know which driver to be enabled. Although we've already set default=y for those codec drivers, it may still make people changing the stuff unnecessarily without knowing its side effect. This patch is for avoiding such pitfalls by marking the prompt of each Realtek codec driver with CONFIG_EXPERT. For "normal" users (that is, unless CONFIG_EXPERT is set), all Realtek HD-audio codecs are enabled together with CONFIG_SND_HDA_CODEC_REALTEK; this is the very same situation like the previous kernels, after all. For users who really care about the minimalistic configuration, they can turn each driver on/off individually after setting CONFIG_EXPERT=y. The patch also adds the missing help text to the top-level CONFIG_SND_HDA_CODEC_REALTEK together with the explanation of individual choices, too. Fixes: aeeb85f26c3b ("ALSA: hda: Split Realtek HD-audio codec driver") Link: https://lore.kernel.org/10172c80-daec-4e20-ab57-a483cf1afc02@molgen.mpg.de Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250806192541.21949-2-tiwai@suse.de --- sound/hda/codecs/realtek/Kconfig | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/sound/hda/codecs/realtek/Kconfig b/sound/hda/codecs/realtek/Kconfig index 20899f3fc051..cdc6d9509a01 100644 --- a/sound/hda/codecs/realtek/Kconfig +++ b/sound/hda/codecs/realtek/Kconfig @@ -2,6 +2,12 @@ menuconfig SND_HDA_CODEC_REALTEK tristate "Realtek HD-audio codec support" + help + Say Y or M here to include Realtek HD-audio codec support. + + This will enable all Realtek HD-audio codec drivers as default, + but you can enable/disable each codec driver individually, too + (only when CONFIG_EXPERT is set). if SND_HDA_CODEC_REALTEK @@ -12,7 +18,7 @@ config SND_HDA_CODEC_REALTEK_LIB select SND_HDA_SCODEC_COMPONENT config SND_HDA_CODEC_ALC260 - tristate "Build Realtek ALC260 HD-audio codec support" + tristate "Build Realtek ALC260 HD-audio codec support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -20,7 +26,7 @@ config SND_HDA_CODEC_ALC260 Say Y or M here to include Realtek ALC260 HD-audio codec support config SND_HDA_CODEC_ALC262 - tristate "Build Realtek ALC262 HD-audio codec support" + tristate "Build Realtek ALC262 HD-audio codec support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -28,7 +34,7 @@ config SND_HDA_CODEC_ALC262 Say Y or M here to include Realtek ALC262 HD-audio codec support config SND_HDA_CODEC_ALC268 - tristate "Build Realtek ALC268 HD-audio codec support" + tristate "Build Realtek ALC268 HD-audio codec support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -37,7 +43,7 @@ config SND_HDA_CODEC_ALC268 codec support config SND_HDA_CODEC_ALC269 - tristate "Build Realtek ALC269 HD-audio codecs support" + tristate "Build Realtek ALC269 HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -46,7 +52,7 @@ config SND_HDA_CODEC_ALC269 codec support config SND_HDA_CODEC_ALC662 - tristate "Build Realtek ALC662 HD-audio codecs support" + tristate "Build Realtek ALC662 HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -55,7 +61,7 @@ config SND_HDA_CODEC_ALC662 codec support config SND_HDA_CODEC_ALC680 - tristate "Build Realtek ALC680 HD-audio codecs support" + tristate "Build Realtek ALC680 HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -63,7 +69,7 @@ config SND_HDA_CODEC_ALC680 Say Y or M here to include Realtek ALC680 HD-audio codec support config SND_HDA_CODEC_ALC861 - tristate "Build Realtek ALC861 HD-audio codecs support" + tristate "Build Realtek ALC861 HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -71,7 +77,7 @@ config SND_HDA_CODEC_ALC861 Say Y or M here to include Realtek ALC861 HD-audio codec support config SND_HDA_CODEC_ALC861VD - tristate "Build Realtek ALC861-VD HD-audio codecs support" + tristate "Build Realtek ALC861-VD HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -79,7 +85,7 @@ config SND_HDA_CODEC_ALC861VD Say Y or M here to include Realtek ALC861-VD HD-audio codec support config SND_HDA_CODEC_ALC880 - tristate "Build Realtek ALC880 HD-audio codecs support" + tristate "Build Realtek ALC880 HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -87,7 +93,7 @@ config SND_HDA_CODEC_ALC880 Say Y or M here to include Realtek ALC880 HD-audio codec support config SND_HDA_CODEC_ALC882 - tristate "Build Realtek ALC882 HD-audio codecs support" + tristate "Build Realtek ALC882 HD-audio codecs support" if EXPERT depends on INPUT select SND_HDA_CODEC_REALTEK_LIB default y @@ -96,5 +102,3 @@ config SND_HDA_CODEC_ALC882 codec support endif - - From 606fcab9aa212ba7d99a259663411d665070e317 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Aug 2025 21:25:38 +0200 Subject: [PATCH 180/279] ALSA: hda/hdmi: Restrict prompt only for CONFIG_EXPERT The split of HDMI codec driver may confuse users when migrating from the previous kernel configs and leave some drivers disabled unexpectedly. Although we've already set y to all HDMI codec drivers as default, it's still safer to paper over the wrong choices. This patch marks the prompt of each HDMI codec driver with CONFIG_EXPERT, so that they are all enabled when the top-level CONFIG_SND_HDA_CODEC_HDMI is set. For users who really care about the minimalistic configuration, they can turn each driver on/off individually after setting CONFIG_EXPERT=y. The patch also adds the missing help text to the top-level CONFIG_SND_HDA_CODEC_HDMI together with the explanation of individual choices, too. Fixes: 73cd0490819d ("ALSA: hda/hdmi: Split vendor codec drivers") Link: https://lore.kernel.org/10172c80-daec-4e20-ab57-a483cf1afc02@molgen.mpg.de Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250806192541.21949-3-tiwai@suse.de --- sound/hda/codecs/hdmi/Kconfig | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/sound/hda/codecs/hdmi/Kconfig b/sound/hda/codecs/hdmi/Kconfig index 973ca4ca077b..6ea3553ba9f8 100644 --- a/sound/hda/codecs/hdmi/Kconfig +++ b/sound/hda/codecs/hdmi/Kconfig @@ -2,11 +2,17 @@ menuconfig SND_HDA_CODEC_HDMI tristate "HD-audio HDMI codec support" + help + Say Y or M here to include HD-audio HDMI/DislayPort codec support. + + This will enable all HDMI/DP codec drivers as default, but you can + enable/disable each codec driver individually, too (only when + CONFIG_EXPERT is set). if SND_HDA_CODEC_HDMI config SND_HDA_CODEC_HDMI_GENERIC - tristate "Generic HDMI/DisplayPort HD-audio codec support" + tristate "Generic HDMI/DisplayPort HD-audio codec support" if EXPERT select SND_DYNAMIC_MINORS select SND_PCM_ELD default y @@ -18,14 +24,14 @@ config SND_HDA_CODEC_HDMI_GENERIC to assure the multiple streams for DP-MST support. config SND_HDA_CODEC_HDMI_SIMPLE - tristate "Simple HDMI/DisplayPort HD-audio codec support" + tristate "Simple HDMI/DisplayPort HD-audio codec support" if EXPERT default y help Say Y or M here to include Simple HDMI and DisplayPort HD-audio codec support for VIA and other codecs. config SND_HDA_CODEC_HDMI_INTEL - tristate "Intel HDMI/DisplayPort HD-audio codec support" + tristate "Intel HDMI/DisplayPort HD-audio codec support" if EXPERT select SND_HDA_CODEC_HDMI_GENERIC default y help @@ -48,7 +54,7 @@ config SND_HDA_INTEL_HDMI_SILENT_STREAM are kept reserved both at transmitter and receiver. config SND_HDA_CODEC_HDMI_ATI - tristate "AMD/ATI HDMI/DisplayPort HD-audio codec support" + tristate "AMD/ATI HDMI/DisplayPort HD-audio codec support" if EXPERT select SND_HDA_CODEC_HDMI_GENERIC default y help @@ -56,7 +62,7 @@ config SND_HDA_CODEC_HDMI_ATI HD-audio codec support. config SND_HDA_CODEC_HDMI_NVIDIA - tristate "Nvidia HDMI/DisplayPort HD-audio codec support" + tristate "Nvidia HDMI/DisplayPort HD-audio codec support" if EXPERT select SND_HDA_CODEC_HDMI_GENERIC default y help @@ -64,7 +70,7 @@ config SND_HDA_CODEC_HDMI_NVIDIA support for the recent Nvidia graphics cards. config SND_HDA_CODEC_HDMI_NVIDIA_MCP - tristate "Legacy Nvidia HDMI/DisplayPort HD-audio codec support" + tristate "Legacy Nvidia HDMI/DisplayPort HD-audio codec support" if EXPERT select SND_HDA_CODEC_HDMI_SIMPLE default y help @@ -72,7 +78,7 @@ config SND_HDA_CODEC_HDMI_NVIDIA_MCP support for the legacy Nvidia graphics like MCP73, MCP67, MCP77/78. config SND_HDA_CODEC_HDMI_TEGRA - tristate "Nvidia Tegra HDMI/DisplayPort HD-audio codec support" + tristate "Nvidia Tegra HDMI/DisplayPort HD-audio codec support" if EXPERT select SND_HDA_CODEC_HDMI_GENERIC default y help From e8e4f3c242cc26de9d69bd8b3a678d1e50980abe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 6 Aug 2025 21:25:39 +0200 Subject: [PATCH 181/279] ALSA: hda/cirrus: Restrict prompt only for CONFIG_EXPERT The split of Cirrus HD-audio codec driver may confuse users when migrating from the previous kernel configs and leave the needed drivers disabled. Although we've already set y as default, it's still safer to paper over the wrong choices. This patch marks the prompt of split CS420x and CS421x codec drivers with CONFIG_EXPERT, so that they are all enabled when the top-level CONFIG_SND_HDA_CODEC_CIRRUS is set. For users who really care about the minimalistic configuration, they can turn each driver on/off individually after setting CONFIG_EXPERT=y. This patch adds the missing help text to the top-level CONFIG_SND_HDA_CIRRUS_CODEC together with the explanation of individual choices, and corrects the help texts that don't fit well nowadays, too. Fixes: 1cb8744a36c7 ("ALSA: hda/cirrus: Split to cs420x and cs421x drivers") Link: https://lore.kernel.org/10172c80-daec-4e20-ab57-a483cf1afc02@molgen.mpg.de Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250806192541.21949-4-tiwai@suse.de --- sound/hda/codecs/cirrus/Kconfig | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sound/hda/codecs/cirrus/Kconfig b/sound/hda/codecs/cirrus/Kconfig index 33cfe52713bc..ec6cbcaf64f0 100644 --- a/sound/hda/codecs/cirrus/Kconfig +++ b/sound/hda/codecs/cirrus/Kconfig @@ -2,27 +2,31 @@ menuconfig SND_HDA_CODEC_CIRRUS tristate "Cirrus Logic HD-audio codec support" + help + Say Y or M here to include Cirrus Logic HD-audio codec support. + + This will enable both CS420x and CS421x HD-audio codec drivers + as default, but you can enable/disable each codec driver + individually, too (only when CONFIG_EXPERT is set). if SND_HDA_CODEC_CIRRUS config SND_HDA_CODEC_CS420X - tristate "Build Cirrus Logic CS420x codec support" + tristate "Build Cirrus Logic CS420x codec support" if EXPERT select SND_HDA_GENERIC default y help - Say Y or M here to include Cirrus Logic CS420x codec support in - snd-hda-intel driver + Say Y or M here to include Cirrus Logic CS420x codec support comment "Set to Y if you want auto-loading the codec driver" depends on SND_HDA=y && SND_HDA_CODEC_CS420X=m config SND_HDA_CODEC_CS421X - tristate "Build Cirrus Logic CS421x codec support" + tristate "Build Cirrus Logic CS421x codec support" if EXPERT select SND_HDA_GENERIC default y help - Say Y or M here to include Cirrus Logic CS421x codec support in - snd-hda-intel driver + Say Y or M here to include Cirrus Logic CS421x codec support comment "Set to Y if you want auto-loading the codec driver" depends on SND_HDA=y && SND_HDA_CODEC_CS421X=m @@ -31,8 +35,8 @@ config SND_HDA_CODEC_CS8409 tristate "Build Cirrus Logic HDA bridge support" select SND_HDA_GENERIC help - Say Y or M here to include Cirrus Logic HDA bridge support in - snd-hda-intel driver, such as CS8409. + Say Y or M here to include Cirrus Logic HDA bridge support + such as CS8409. comment "Set to Y if you want auto-loading the codec driver" depends on SND_HDA=y && SND_HDA_CODEC_CS8409=m From 397a46c9aa3343e8efe6847bdaa124945bab1de4 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 25 Jul 2025 09:46:50 +0200 Subject: [PATCH 182/279] gpio: remove legacy GPIO line value setter callbacks With no more users of the legacy GPIO line value setters - .set() and .set_multiple() - we can now remove them from the kernel. Link: https://lore.kernel.org/r/20250725074651.14002-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 27 ++++++--------------------- include/linux/gpio/driver.h | 7 ------- 2 files changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index a93d2a9355e2..9ac4c23d656a 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1037,11 +1037,6 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, int base = 0; int ret; - /* Only allow one set() and one set_multiple(). */ - if ((gc->set && gc->set_rv) || - (gc->set_multiple && gc->set_multiple_rv)) - return -EINVAL; - /* * First: allocate and populate the internal stat container, and * set up the struct device. @@ -2891,19 +2886,14 @@ static int gpiochip_set(struct gpio_chip *gc, unsigned int offset, int value) lockdep_assert_held(&gc->gpiodev->srcu); - if (WARN_ON(unlikely(!gc->set && !gc->set_rv))) + if (WARN_ON(unlikely(!gc->set_rv))) return -EOPNOTSUPP; - if (gc->set_rv) { - ret = gc->set_rv(gc, offset, value); - if (ret > 0) - ret = -EBADE; + ret = gc->set_rv(gc, offset, value); + if (ret > 0) + ret = -EBADE; - return ret; - } - - gc->set(gc, offset, value); - return 0; + return ret; } static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) @@ -2919,7 +2909,7 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) * output-only, but if there is then not even a .set() operation it * is pretty tricky to drive the output line. */ - if (!guard.gc->set && !guard.gc->set_rv && !guard.gc->direction_output) { + if (!guard.gc->set_rv && !guard.gc->direction_output) { gpiod_warn(desc, "%s: missing set() and direction_output() operations\n", __func__); @@ -3673,11 +3663,6 @@ static int gpiochip_set_multiple(struct gpio_chip *gc, return ret; } - if (gc->set_multiple) { - gc->set_multiple(gc, mask, bits); - return 0; - } - /* set outputs if the corresponding mask bit is set */ for_each_set_bit(i, mask, gc->ngpio) { ret = gpiochip_set(gc, i, test_bit(i, bits)); diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4b984e8f8fcd..90567dde7d8e 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -347,8 +347,6 @@ struct gpio_irq_chip { * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error - * @set: **DEPRECATED** - please use set_rv() instead - * @set_multiple: **DEPRECATED** - please use set_multiple_rv() instead * @set_rv: assigns output value for signal "offset", returns 0 on success or * negative error value * @set_multiple_rv: assigns output values for multiple signals defined by @@ -445,11 +443,6 @@ struct gpio_chip { int (*get_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); - void (*set)(struct gpio_chip *gc, - unsigned int offset, int value); - void (*set_multiple)(struct gpio_chip *gc, - unsigned long *mask, - unsigned long *bits); int (*set_rv)(struct gpio_chip *gc, unsigned int offset, int value); From d9d87d90cc0b10cd56ae353f50b11417e7d21712 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 17 Jul 2025 15:21:26 +0200 Subject: [PATCH 183/279] treewide: rename GPIO set callbacks back to their original names The conversion of all GPIO drivers to using the .set_rv() and .set_multiple_rv() callbacks from struct gpio_chip (which - unlike their predecessors - return an integer and allow the controller drivers to indicate failures to users) is now complete and the legacy ones have been removed. Rename the new callbacks back to their original names in one sweeping change. Signed-off-by: Bartosz Golaszewski --- arch/arm/common/sa1111.c | 4 ++-- arch/arm/common/scoop.c | 2 +- arch/arm/mach-s3c/gpio-samsung.c | 2 +- arch/arm/mach-sa1100/assabet.c | 2 +- arch/arm/mach-sa1100/neponset.c | 2 +- arch/arm/plat-orion/gpio.c | 2 +- arch/m68k/coldfire/gpio.c | 2 +- arch/mips/alchemy/common/gpiolib.c | 6 ++--- arch/mips/bcm63xx/gpio.c | 2 +- arch/mips/kernel/gpio_txx9.c | 2 +- arch/mips/rb532/gpio.c | 2 +- arch/mips/txx9/generic/setup.c | 2 +- arch/powerpc/platforms/44x/gpio.c | 2 +- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 2 +- .../powerpc/platforms/83xx/mcu_mpc8349emitx.c | 2 +- arch/powerpc/platforms/8xx/cpm1.c | 4 ++-- arch/powerpc/sysdev/cpm_common.c | 2 +- drivers/bcma/driver_gpio.c | 2 +- drivers/gpio/gpio-74x164.c | 4 ++-- drivers/gpio/gpio-adnp.c | 2 +- drivers/gpio/gpio-adp5520.c | 2 +- drivers/gpio/gpio-adp5585.c | 2 +- drivers/gpio/gpio-aggregator.c | 4 ++-- drivers/gpio/gpio-altera-a10sr.c | 2 +- drivers/gpio/gpio-altera.c | 2 +- drivers/gpio/gpio-amd-fch.c | 2 +- drivers/gpio/gpio-amd8111.c | 2 +- drivers/gpio/gpio-arizona.c | 2 +- drivers/gpio/gpio-aspeed-sgpio.c | 2 +- drivers/gpio/gpio-aspeed.c | 2 +- drivers/gpio/gpio-bcm-kona.c | 2 +- drivers/gpio/gpio-bd71815.c | 2 +- drivers/gpio/gpio-bd71828.c | 2 +- drivers/gpio/gpio-bd9571mwv.c | 2 +- drivers/gpio/gpio-bt8xx.c | 2 +- drivers/gpio/gpio-cgbc.c | 2 +- drivers/gpio/gpio-creg-snps.c | 2 +- drivers/gpio/gpio-cros-ec.c | 2 +- drivers/gpio/gpio-crystalcove.c | 2 +- drivers/gpio/gpio-cs5535.c | 2 +- drivers/gpio/gpio-da9052.c | 2 +- drivers/gpio/gpio-da9055.c | 2 +- drivers/gpio/gpio-davinci.c | 2 +- drivers/gpio/gpio-dln2.c | 2 +- drivers/gpio/gpio-eic-sprd.c | 2 +- drivers/gpio/gpio-em.c | 2 +- drivers/gpio/gpio-exar.c | 2 +- drivers/gpio/gpio-f7188x.c | 2 +- drivers/gpio/gpio-graniterapids.c | 2 +- drivers/gpio/gpio-gw-pld.c | 2 +- drivers/gpio/gpio-htc-egpio.c | 2 +- drivers/gpio/gpio-ich.c | 2 +- drivers/gpio/gpio-imx-scu.c | 2 +- drivers/gpio/gpio-it87.c | 2 +- drivers/gpio/gpio-janz-ttl.c | 2 +- drivers/gpio/gpio-kempld.c | 2 +- drivers/gpio/gpio-latch.c | 4 ++-- drivers/gpio/gpio-ljca.c | 2 +- drivers/gpio/gpio-logicvc.c | 2 +- drivers/gpio/gpio-loongson-64bit.c | 2 +- drivers/gpio/gpio-loongson.c | 2 +- drivers/gpio/gpio-lp3943.c | 2 +- drivers/gpio/gpio-lp873x.c | 2 +- drivers/gpio/gpio-lp87565.c | 2 +- drivers/gpio/gpio-lpc18xx.c | 2 +- drivers/gpio/gpio-lpc32xx.c | 10 ++++---- drivers/gpio/gpio-macsmc.c | 2 +- drivers/gpio/gpio-madera.c | 2 +- drivers/gpio/gpio-max730x.c | 2 +- drivers/gpio/gpio-max732x.c | 4 ++-- drivers/gpio/gpio-max77620.c | 2 +- drivers/gpio/gpio-max77650.c | 2 +- drivers/gpio/gpio-max77759.c | 2 +- drivers/gpio/gpio-mb86s7x.c | 2 +- drivers/gpio/gpio-mc33880.c | 2 +- drivers/gpio/gpio-ml-ioh.c | 2 +- drivers/gpio/gpio-mm-lantiq.c | 2 +- drivers/gpio/gpio-mmio.c | 24 +++++++++---------- drivers/gpio/gpio-mockup.c | 4 ++-- drivers/gpio/gpio-moxtet.c | 2 +- drivers/gpio/gpio-mpc5200.c | 4 ++-- drivers/gpio/gpio-mpfs.c | 2 +- drivers/gpio/gpio-mpsse.c | 4 ++-- drivers/gpio/gpio-msc313.c | 2 +- drivers/gpio/gpio-mvebu.c | 2 +- drivers/gpio/gpio-nomadik.c | 2 +- drivers/gpio/gpio-npcm-sgpio.c | 4 ++-- drivers/gpio/gpio-octeon.c | 2 +- drivers/gpio/gpio-omap.c | 4 ++-- drivers/gpio/gpio-palmas.c | 2 +- drivers/gpio/gpio-pca953x.c | 4 ++-- drivers/gpio/gpio-pca9570.c | 2 +- drivers/gpio/gpio-pcf857x.c | 4 ++-- drivers/gpio/gpio-pch.c | 2 +- drivers/gpio/gpio-pl061.c | 2 +- drivers/gpio/gpio-pxa.c | 2 +- drivers/gpio/gpio-raspberrypi-exp.c | 2 +- drivers/gpio/gpio-rc5t583.c | 2 +- drivers/gpio/gpio-rcar.c | 4 ++-- drivers/gpio/gpio-rdc321x.c | 2 +- drivers/gpio/gpio-reg.c | 6 ++--- drivers/gpio/gpio-regmap.c | 4 ++-- drivers/gpio/gpio-rockchip.c | 2 +- drivers/gpio/gpio-rtd.c | 2 +- drivers/gpio/gpio-sa1100.c | 2 +- drivers/gpio/gpio-sama5d2-piobu.c | 2 +- drivers/gpio/gpio-sch.c | 2 +- drivers/gpio/gpio-sch311x.c | 2 +- drivers/gpio/gpio-sim.c | 4 ++-- drivers/gpio/gpio-siox.c | 2 +- drivers/gpio/gpio-spear-spics.c | 2 +- drivers/gpio/gpio-sprd.c | 2 +- drivers/gpio/gpio-stmpe.c | 2 +- drivers/gpio/gpio-stp-xway.c | 2 +- drivers/gpio/gpio-syscon.c | 4 ++-- drivers/gpio/gpio-tangier.c | 2 +- drivers/gpio/gpio-tc3589x.c | 2 +- drivers/gpio/gpio-tegra.c | 2 +- drivers/gpio/gpio-tegra186.c | 2 +- drivers/gpio/gpio-thunderx.c | 4 ++-- drivers/gpio/gpio-timberdale.c | 2 +- drivers/gpio/gpio-tpic2810.c | 4 ++-- drivers/gpio/gpio-tps65086.c | 2 +- drivers/gpio/gpio-tps65218.c | 2 +- drivers/gpio/gpio-tps65219.c | 4 ++-- drivers/gpio/gpio-tps6586x.c | 2 +- drivers/gpio/gpio-tps65910.c | 2 +- drivers/gpio/gpio-tps65912.c | 2 +- drivers/gpio/gpio-tps68470.c | 2 +- drivers/gpio/gpio-tqmx86.c | 2 +- drivers/gpio/gpio-ts4900.c | 2 +- drivers/gpio/gpio-ts5500.c | 2 +- drivers/gpio/gpio-twl4030.c | 2 +- drivers/gpio/gpio-twl6040.c | 2 +- drivers/gpio/gpio-uniphier.c | 4 ++-- drivers/gpio/gpio-viperboard.c | 4 ++-- drivers/gpio/gpio-virtio.c | 2 +- drivers/gpio/gpio-vx855.c | 2 +- drivers/gpio/gpio-wcd934x.c | 2 +- drivers/gpio/gpio-wcove.c | 2 +- drivers/gpio/gpio-winbond.c | 2 +- drivers/gpio/gpio-wm831x.c | 2 +- drivers/gpio/gpio-wm8350.c | 2 +- drivers/gpio/gpio-wm8994.c | 2 +- drivers/gpio/gpio-xgene.c | 2 +- drivers/gpio/gpio-xilinx.c | 4 ++-- drivers/gpio/gpio-xlp.c | 2 +- drivers/gpio/gpio-xra1403.c | 2 +- drivers/gpio/gpio-xtensa.c | 2 +- drivers/gpio/gpio-zevio.c | 2 +- drivers/gpio/gpio-zynq.c | 2 +- drivers/gpio/gpio-zynqmp-modepin.c | 2 +- drivers/gpio/gpiolib.c | 10 ++++---- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 2 +- drivers/hid/hid-cp2112.c | 2 +- drivers/hid/hid-mcp2200.c | 4 ++-- drivers/hid/hid-mcp2221.c | 2 +- drivers/hwmon/ltc2992.c | 4 ++-- drivers/hwmon/pmbus/ucd9000.c | 2 +- drivers/i2c/muxes/i2c-mux-ltc4306.c | 2 +- drivers/iio/adc/ad4130.c | 2 +- drivers/iio/adc/ad4170-4.c | 2 +- drivers/iio/adc/ad7768-1.c | 2 +- drivers/iio/adc/rohm-bd79124.c | 4 ++-- drivers/iio/adc/ti-ads7950.c | 2 +- drivers/iio/addac/ad74115.c | 2 +- drivers/iio/addac/ad74413r.c | 4 ++-- drivers/iio/dac/ad5592r-base.c | 2 +- drivers/input/keyboard/adp5588-keys.c | 2 +- drivers/input/touchscreen/ad7879.c | 2 +- drivers/leds/blink/leds-lgm-sso.c | 2 +- drivers/leds/leds-pca9532.c | 2 +- drivers/leds/leds-pca955x.c | 2 +- drivers/leds/leds-tca6507.c | 2 +- drivers/media/dvb-frontends/cxd2820r_core.c | 2 +- drivers/media/i2c/ds90ub913.c | 2 +- drivers/media/i2c/ds90ub953.c | 2 +- drivers/media/i2c/max9286.c | 2 +- drivers/media/i2c/max96717.c | 2 +- drivers/media/pci/solo6x10/solo6x10-gpio.c | 2 +- drivers/mfd/sm501.c | 2 +- drivers/mfd/tps65010.c | 2 +- drivers/mfd/ucb1x00-core.c | 2 +- .../misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c | 2 +- drivers/misc/ti_fpc202.c | 2 +- drivers/net/can/spi/mcp251x.c | 4 ++-- drivers/net/dsa/mt7530.c | 2 +- drivers/net/dsa/vitesse-vsc73xx-core.c | 2 +- drivers/net/phy/qcom/qca807x.c | 2 +- drivers/pinctrl/actions/pinctrl-owl.c | 2 +- drivers/pinctrl/bcm/pinctrl-bcm2835.c | 4 ++-- drivers/pinctrl/bcm/pinctrl-iproc-gpio.c | 2 +- drivers/pinctrl/bcm/pinctrl-nsp-gpio.c | 2 +- drivers/pinctrl/cirrus/pinctrl-cs42l43.c | 2 +- drivers/pinctrl/cirrus/pinctrl-lochnagar.c | 2 +- drivers/pinctrl/intel/pinctrl-baytrail.c | 2 +- drivers/pinctrl/intel/pinctrl-cherryview.c | 2 +- drivers/pinctrl/intel/pinctrl-intel.c | 2 +- drivers/pinctrl/intel/pinctrl-lynxpoint.c | 2 +- drivers/pinctrl/mediatek/pinctrl-airoha.c | 2 +- drivers/pinctrl/mediatek/pinctrl-moore.c | 2 +- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 2 +- drivers/pinctrl/mediatek/pinctrl-paris.c | 2 +- drivers/pinctrl/meson/pinctrl-amlogic-a4.c | 2 +- drivers/pinctrl/meson/pinctrl-meson.c | 2 +- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 2 +- drivers/pinctrl/nomadik/pinctrl-abx500.c | 2 +- drivers/pinctrl/nuvoton/pinctrl-ma35.c | 2 +- drivers/pinctrl/pinctrl-amd.c | 2 +- drivers/pinctrl/pinctrl-amdisp.c | 2 +- drivers/pinctrl/pinctrl-apple-gpio.c | 2 +- drivers/pinctrl/pinctrl-as3722.c | 2 +- drivers/pinctrl/pinctrl-at91-pio4.c | 4 ++-- drivers/pinctrl/pinctrl-at91.c | 4 ++-- drivers/pinctrl/pinctrl-aw9523.c | 4 ++-- drivers/pinctrl/pinctrl-axp209.c | 4 ++-- drivers/pinctrl/pinctrl-cy8c95x0.c | 4 ++-- drivers/pinctrl/pinctrl-da9062.c | 2 +- drivers/pinctrl/pinctrl-digicolor.c | 2 +- drivers/pinctrl/pinctrl-ingenic.c | 2 +- drivers/pinctrl/pinctrl-keembay.c | 2 +- drivers/pinctrl/pinctrl-mcp23s08.c | 4 ++-- drivers/pinctrl/pinctrl-microchip-sgpio.c | 2 +- drivers/pinctrl/pinctrl-ocelot.c | 2 +- drivers/pinctrl/pinctrl-pic32.c | 2 +- drivers/pinctrl/pinctrl-pistachio.c | 2 +- drivers/pinctrl/pinctrl-rk805.c | 2 +- drivers/pinctrl/pinctrl-rp1.c | 2 +- drivers/pinctrl/pinctrl-st.c | 2 +- drivers/pinctrl/pinctrl-stmfx.c | 2 +- drivers/pinctrl/pinctrl-sx150x.c | 4 ++-- drivers/pinctrl/pinctrl-xway.c | 2 +- drivers/pinctrl/qcom/pinctrl-lpass-lpi.c | 2 +- drivers/pinctrl/qcom/pinctrl-msm.c | 2 +- drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 2 +- drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 2 +- drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c | 2 +- drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c | 2 +- drivers/pinctrl/renesas/gpio.c | 2 +- drivers/pinctrl/renesas/pinctrl-rza1.c | 2 +- drivers/pinctrl/renesas/pinctrl-rza2.c | 2 +- drivers/pinctrl/renesas/pinctrl-rzg2l.c | 2 +- drivers/pinctrl/renesas/pinctrl-rzv2m.c | 2 +- drivers/pinctrl/samsung/pinctrl-samsung.c | 2 +- drivers/pinctrl/spear/pinctrl-plgpio.c | 2 +- .../starfive/pinctrl-starfive-jh7100.c | 2 +- .../starfive/pinctrl-starfive-jh7110.c | 2 +- drivers/pinctrl/stm32/pinctrl-stm32.c | 2 +- drivers/pinctrl/sunplus/sppctl.c | 2 +- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 2 +- drivers/pinctrl/vt8500/pinctrl-wmt.c | 2 +- .../platform/cznic/turris-omnia-mcu-gpio.c | 4 ++-- drivers/platform/x86/barco-p50-gpio.c | 2 +- drivers/platform/x86/intel/int0002_vgpio.c | 2 +- drivers/platform/x86/portwell-ec.c | 4 ++-- drivers/platform/x86/silicom-platform.c | 2 +- drivers/pwm/pwm-pca9685.c | 2 +- .../regulator/rpi-panel-attiny-regulator.c | 2 +- drivers/soc/fsl/qe/gpio.c | 4 ++-- drivers/soc/renesas/pwc-rzv2m.c | 2 +- drivers/spi/spi-xcomm.c | 2 +- drivers/ssb/driver_gpio.c | 4 ++-- drivers/staging/greybus/gpio.c | 2 +- drivers/tty/serial/max310x.c | 2 +- drivers/tty/serial/sc16is7xx.c | 2 +- drivers/usb/serial/cp210x.c | 2 +- drivers/usb/serial/ftdi_sio.c | 4 ++-- drivers/video/fbdev/via/via-gpio.c | 2 +- include/linux/gpio/driver.h | 19 +++++++-------- include/linux/gpio/generic.h | 4 ++-- .../codecs/side-codecs/cirrus_scodec_test.c | 2 +- sound/soc/codecs/idt821034.c | 2 +- sound/soc/codecs/peb2466.c | 2 +- sound/soc/codecs/rt5677.c | 2 +- sound/soc/codecs/tlv320adc3xxx.c | 2 +- sound/soc/codecs/wm5100.c | 2 +- sound/soc/codecs/wm8903.c | 2 +- sound/soc/codecs/wm8962.c | 2 +- sound/soc/codecs/wm8996.c | 2 +- sound/soc/codecs/zl38060.c | 2 +- sound/soc/soc-ac97.c | 2 +- sound/soc/ti/davinci-mcasp.c | 2 +- 282 files changed, 355 insertions(+), 356 deletions(-) diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index d7e2ea27ce59..3389a70e4d49 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -617,8 +617,8 @@ static int sa1111_setup_gpios(struct sa1111 *sachip) sachip->gc.direction_input = sa1111_gpio_direction_input; sachip->gc.direction_output = sa1111_gpio_direction_output; sachip->gc.get = sa1111_gpio_get; - sachip->gc.set_rv = sa1111_gpio_set; - sachip->gc.set_multiple_rv = sa1111_gpio_set_multiple; + sachip->gc.set = sa1111_gpio_set; + sachip->gc.set_multiple = sa1111_gpio_set_multiple; sachip->gc.to_irq = sa1111_gpio_to_irq; sachip->gc.base = -1; sachip->gc.ngpio = 18; diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c index 2d3ee76c8e17..dddb73c96826 100644 --- a/arch/arm/common/scoop.c +++ b/arch/arm/common/scoop.c @@ -218,7 +218,7 @@ static int scoop_probe(struct platform_device *pdev) devptr->gpio.label = dev_name(&pdev->dev); devptr->gpio.base = inf->gpio_base; devptr->gpio.ngpio = 12; /* PA11 = 0, PA12 = 1, etc. up to PA22 = 11 */ - devptr->gpio.set_rv = scoop_gpio_set; + devptr->gpio.set = scoop_gpio_set; devptr->gpio.get = scoop_gpio_get; devptr->gpio.direction_input = scoop_gpio_direction_input; devptr->gpio.direction_output = scoop_gpio_direction_output; diff --git a/arch/arm/mach-s3c/gpio-samsung.c b/arch/arm/mach-s3c/gpio-samsung.c index 206a492fbaf5..81e198e5a6d3 100644 --- a/arch/arm/mach-s3c/gpio-samsung.c +++ b/arch/arm/mach-s3c/gpio-samsung.c @@ -517,7 +517,7 @@ static void __init samsung_gpiolib_add(struct samsung_gpio_chip *chip) if (!gc->direction_output) gc->direction_output = samsung_gpiolib_2bit_output; if (!gc->set) - gc->set_rv = samsung_gpiolib_set; + gc->set = samsung_gpiolib_set; if (!gc->get) gc->get = samsung_gpiolib_get; diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index bad8aa661e9d..2b833aa0212b 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -80,7 +80,7 @@ void ASSABET_BCR_frob(unsigned int mask, unsigned int val) { unsigned long m = mask, v = val; - assabet_bcr_gc->set_multiple_rv(assabet_bcr_gc, &m, &v); + assabet_bcr_gc->set_multiple(assabet_bcr_gc, &m, &v); } EXPORT_SYMBOL(ASSABET_BCR_frob); diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c index 6516598c8a71..88fe79f0a4ed 100644 --- a/arch/arm/mach-sa1100/neponset.c +++ b/arch/arm/mach-sa1100/neponset.c @@ -126,7 +126,7 @@ void neponset_ncr_frob(unsigned int mask, unsigned int val) unsigned long m = mask, v = val; if (nep) - n->gpio[0]->set_multiple_rv(n->gpio[0], &m, &v); + n->gpio[0]->set_multiple(n->gpio[0], &m, &v); else WARN(1, "nep unset\n"); } diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c index 6f09f65e3d95..49e29b7894a3 100644 --- a/arch/arm/plat-orion/gpio.c +++ b/arch/arm/plat-orion/gpio.c @@ -540,7 +540,7 @@ void __init orion_gpio_init(int gpio_base, int ngpio, ochip->chip.direction_input = orion_gpio_direction_input; ochip->chip.get = orion_gpio_get; ochip->chip.direction_output = orion_gpio_direction_output; - ochip->chip.set_rv = orion_gpio_set; + ochip->chip.set = orion_gpio_set; ochip->chip.to_irq = orion_gpio_to_irq; ochip->chip.base = gpio_base; ochip->chip.ngpio = ngpio; diff --git a/arch/m68k/coldfire/gpio.c b/arch/m68k/coldfire/gpio.c index 30e5a4ed799d..e2f7af1facb2 100644 --- a/arch/m68k/coldfire/gpio.c +++ b/arch/m68k/coldfire/gpio.c @@ -160,7 +160,7 @@ static struct gpio_chip mcfgpio_chip = { .direction_input = mcfgpio_direction_input, .direction_output = mcfgpio_direction_output, .get = mcfgpio_get_value, - .set_rv = mcfgpio_set_value, + .set = mcfgpio_set_value, .to_irq = mcfgpio_to_irq, .base = 0, .ngpio = MCFGPIO_PIN_MAX, diff --git a/arch/mips/alchemy/common/gpiolib.c b/arch/mips/alchemy/common/gpiolib.c index 194034eba75f..e79e26ffac99 100644 --- a/arch/mips/alchemy/common/gpiolib.c +++ b/arch/mips/alchemy/common/gpiolib.c @@ -101,7 +101,7 @@ struct gpio_chip alchemy_gpio_chip[] = { .direction_input = gpio1_direction_input, .direction_output = gpio1_direction_output, .get = gpio1_get, - .set_rv = gpio1_set, + .set = gpio1_set, .to_irq = gpio1_to_irq, .base = ALCHEMY_GPIO1_BASE, .ngpio = ALCHEMY_GPIO1_NUM, @@ -111,7 +111,7 @@ struct gpio_chip alchemy_gpio_chip[] = { .direction_input = gpio2_direction_input, .direction_output = gpio2_direction_output, .get = gpio2_get, - .set_rv = gpio2_set, + .set = gpio2_set, .to_irq = gpio2_to_irq, .base = ALCHEMY_GPIO2_BASE, .ngpio = ALCHEMY_GPIO2_NUM, @@ -151,7 +151,7 @@ static struct gpio_chip au1300_gpiochip = { .direction_input = alchemy_gpic_dir_input, .direction_output = alchemy_gpic_dir_output, .get = alchemy_gpic_get, - .set_rv = alchemy_gpic_set, + .set = alchemy_gpic_set, .to_irq = alchemy_gpic_gpio_to_irq, .base = AU1300_GPIO_BASE, .ngpio = AU1300_GPIO_NUM, diff --git a/arch/mips/bcm63xx/gpio.c b/arch/mips/bcm63xx/gpio.c index e7a53cd0dec5..ff45a6989c3a 100644 --- a/arch/mips/bcm63xx/gpio.c +++ b/arch/mips/bcm63xx/gpio.c @@ -131,7 +131,7 @@ static struct gpio_chip bcm63xx_gpio_chip = { .direction_input = bcm63xx_gpio_direction_input, .direction_output = bcm63xx_gpio_direction_output, .get = bcm63xx_gpio_get, - .set_rv = bcm63xx_gpio_set, + .set = bcm63xx_gpio_set, .base = 0, }; diff --git a/arch/mips/kernel/gpio_txx9.c b/arch/mips/kernel/gpio_txx9.c index 027fb57d0d79..96ac40d20c23 100644 --- a/arch/mips/kernel/gpio_txx9.c +++ b/arch/mips/kernel/gpio_txx9.c @@ -70,7 +70,7 @@ static int txx9_gpio_dir_out(struct gpio_chip *chip, unsigned int offset, static struct gpio_chip txx9_gpio_chip = { .get = txx9_gpio_get, - .set_rv = txx9_gpio_set, + .set = txx9_gpio_set, .direction_input = txx9_gpio_dir_in, .direction_output = txx9_gpio_dir_out, .label = "TXx9", diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c index 0e47cd59b6cb..9aa5ef374465 100644 --- a/arch/mips/rb532/gpio.c +++ b/arch/mips/rb532/gpio.c @@ -164,7 +164,7 @@ static struct rb532_gpio_chip rb532_gpio_chip[] = { .direction_input = rb532_gpio_direction_input, .direction_output = rb532_gpio_direction_output, .get = rb532_gpio_get, - .set_rv = rb532_gpio_set, + .set = rb532_gpio_set, .to_irq = rb532_gpio_to_irq, .base = 0, .ngpio = 32, diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 5a37e8b234a3..5dc867ea2c69 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -655,7 +655,7 @@ void __init txx9_iocled_init(unsigned long baseaddr, if (!iocled->mmioaddr) goto out_free; iocled->chip.get = txx9_iocled_get; - iocled->chip.set_rv = txx9_iocled_set; + iocled->chip.set = txx9_iocled_set; iocled->chip.direction_input = txx9_iocled_dir_in; iocled->chip.direction_output = txx9_iocled_dir_out; iocled->chip.label = "iocled"; diff --git a/arch/powerpc/platforms/44x/gpio.c b/arch/powerpc/platforms/44x/gpio.c index d540e261d85a..08ab76582568 100644 --- a/arch/powerpc/platforms/44x/gpio.c +++ b/arch/powerpc/platforms/44x/gpio.c @@ -180,7 +180,7 @@ static int __init ppc4xx_add_gpiochips(void) gc->direction_input = ppc4xx_gpio_dir_in; gc->direction_output = ppc4xx_gpio_dir_out; gc->get = ppc4xx_gpio_get; - gc->set_rv = ppc4xx_gpio_set; + gc->set = ppc4xx_gpio_set; ret = of_mm_gpiochip_add_data(np, mm_gc, ppc4xx_gc); if (ret) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index bda707d848a6..7748b6641a3c 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -336,7 +336,7 @@ static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt) gpt->gc.direction_input = mpc52xx_gpt_gpio_dir_in; gpt->gc.direction_output = mpc52xx_gpt_gpio_dir_out; gpt->gc.get = mpc52xx_gpt_gpio_get; - gpt->gc.set_rv = mpc52xx_gpt_gpio_set; + gpt->gc.set = mpc52xx_gpt_gpio_set; gpt->gc.base = -1; gpt->gc.parent = gpt->dev; diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 6e37dfc6c5c9..cb7b9498f291 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -126,7 +126,7 @@ static int mcu_gpiochip_add(struct mcu *mcu) gc->can_sleep = 1; gc->ngpio = MCU_NUM_GPIO; gc->base = -1; - gc->set_rv = mcu_gpio_set; + gc->set = mcu_gpio_set; gc->direction_output = mcu_gpio_dir_out; gc->parent = dev; diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 7462c221115c..7433be7d66ee 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -499,7 +499,7 @@ int cpm1_gpiochip_add16(struct device *dev) gc->direction_input = cpm1_gpio16_dir_in; gc->direction_output = cpm1_gpio16_dir_out; gc->get = cpm1_gpio16_get; - gc->set_rv = cpm1_gpio16_set; + gc->set = cpm1_gpio16_set; gc->to_irq = cpm1_gpio16_to_irq; gc->parent = dev; gc->owner = THIS_MODULE; @@ -622,7 +622,7 @@ int cpm1_gpiochip_add32(struct device *dev) gc->direction_input = cpm1_gpio32_dir_in; gc->direction_output = cpm1_gpio32_dir_out; gc->get = cpm1_gpio32_get; - gc->set_rv = cpm1_gpio32_set; + gc->set = cpm1_gpio32_set; gc->parent = dev; gc->owner = THIS_MODULE; diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index e22fc638dbc7..f469f6a9f6e0 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -210,7 +210,7 @@ int cpm2_gpiochip_add32(struct device *dev) gc->direction_input = cpm2_gpio32_dir_in; gc->direction_output = cpm2_gpio32_dir_out; gc->get = cpm2_gpio32_get; - gc->set_rv = cpm2_gpio32_set; + gc->set = cpm2_gpio32_set; gc->parent = dev; gc->owner = THIS_MODULE; diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c index f021e27644e0..658c7e2ac8bf 100644 --- a/drivers/bcma/driver_gpio.c +++ b/drivers/bcma/driver_gpio.c @@ -186,7 +186,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc) chip->request = bcma_gpio_request; chip->free = bcma_gpio_free; chip->get = bcma_gpio_get_value; - chip->set_rv = bcma_gpio_set_value; + chip->set = bcma_gpio_set_value; chip->direction_input = bcma_gpio_direction_input; chip->direction_output = bcma_gpio_direction_output; chip->parent = bus->dev; diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index 4dd5c2c330bb..c226524efeba 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -141,8 +141,8 @@ static int gen_74x164_probe(struct spi_device *spi) chip->gpio_chip.label = spi->modalias; chip->gpio_chip.direction_output = gen_74x164_direction_output; chip->gpio_chip.get = gen_74x164_get_value; - chip->gpio_chip.set_rv = gen_74x164_set_value; - chip->gpio_chip.set_multiple_rv = gen_74x164_set_multiple; + chip->gpio_chip.set = gen_74x164_set_value; + chip->gpio_chip.set_multiple = gen_74x164_set_multiple; chip->gpio_chip.base = -1; chip->gpio_chip.ngpio = GEN_74X164_NUMBER_GPIOS * chip->registers; chip->gpio_chip.can_sleep = true; diff --git a/drivers/gpio/gpio-adnp.c b/drivers/gpio/gpio-adnp.c index dc2b941c3726..e5ac2d211013 100644 --- a/drivers/gpio/gpio-adnp.c +++ b/drivers/gpio/gpio-adnp.c @@ -430,7 +430,7 @@ static int adnp_gpio_setup(struct adnp *adnp, unsigned int num_gpios, chip->direction_input = adnp_gpio_direction_input; chip->direction_output = adnp_gpio_direction_output; chip->get = adnp_gpio_get; - chip->set_rv = adnp_gpio_set; + chip->set = adnp_gpio_set; chip->can_sleep = true; if (IS_ENABLED(CONFIG_DEBUG_FS)) diff --git a/drivers/gpio/gpio-adp5520.c b/drivers/gpio/gpio-adp5520.c index 57d12c10cbda..6305c8b7dc05 100644 --- a/drivers/gpio/gpio-adp5520.c +++ b/drivers/gpio/gpio-adp5520.c @@ -122,7 +122,7 @@ static int adp5520_gpio_probe(struct platform_device *pdev) gc->direction_input = adp5520_gpio_direction_input; gc->direction_output = adp5520_gpio_direction_output; gc->get = adp5520_gpio_get_value; - gc->set_rv = adp5520_gpio_set_value; + gc->set = adp5520_gpio_set_value; gc->can_sleep = true; gc->base = pdata->gpio_start; diff --git a/drivers/gpio/gpio-adp5585.c b/drivers/gpio/gpio-adp5585.c index b2c8836c5f84..0fd3cc26d017 100644 --- a/drivers/gpio/gpio-adp5585.c +++ b/drivers/gpio/gpio-adp5585.c @@ -428,7 +428,7 @@ static int adp5585_gpio_probe(struct platform_device *pdev) gc->direction_input = adp5585_gpio_direction_input; gc->direction_output = adp5585_gpio_direction_output; gc->get = adp5585_gpio_get_value; - gc->set_rv = adp5585_gpio_set_value; + gc->set = adp5585_gpio_set_value; gc->set_config = adp5585_gpio_set_config; gc->request = adp5585_gpio_request; gc->free = adp5585_gpio_free; diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index 6f941db02c04..af9d8b3a711d 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -534,8 +534,8 @@ static struct gpiochip_fwd *gpiochip_fwd_create(struct device *dev, chip->direction_output = gpio_fwd_direction_output; chip->get = gpio_fwd_get; chip->get_multiple = gpio_fwd_get_multiple_locked; - chip->set_rv = gpio_fwd_set; - chip->set_multiple_rv = gpio_fwd_set_multiple_locked; + chip->set = gpio_fwd_set; + chip->set_multiple = gpio_fwd_set_multiple_locked; chip->to_irq = gpio_fwd_to_irq; chip->base = -1; chip->ngpio = ngpios; diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c index 77a674cf99e4..4524c18a87e7 100644 --- a/drivers/gpio/gpio-altera-a10sr.c +++ b/drivers/gpio/gpio-altera-a10sr.c @@ -69,7 +69,7 @@ static const struct gpio_chip altr_a10sr_gc = { .label = "altr_a10sr_gpio", .owner = THIS_MODULE, .get = altr_a10sr_gpio_get, - .set_rv = altr_a10sr_gpio_set, + .set = altr_a10sr_gpio_set, .direction_input = altr_a10sr_gpio_direction_input, .direction_output = altr_a10sr_gpio_direction_output, .can_sleep = true, diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c index 1b28525726d7..9508d764cce4 100644 --- a/drivers/gpio/gpio-altera.c +++ b/drivers/gpio/gpio-altera.c @@ -259,7 +259,7 @@ static int altera_gpio_probe(struct platform_device *pdev) altera_gc->gc.direction_input = altera_gpio_direction_input; altera_gc->gc.direction_output = altera_gpio_direction_output; altera_gc->gc.get = altera_gpio_get; - altera_gc->gc.set_rv = altera_gpio_set; + altera_gc->gc.set = altera_gpio_set; altera_gc->gc.owner = THIS_MODULE; altera_gc->gc.parent = &pdev->dev; altera_gc->gc.base = -1; diff --git a/drivers/gpio/gpio-amd-fch.c b/drivers/gpio/gpio-amd-fch.c index f8d0cea46049..e6c6c3ec7656 100644 --- a/drivers/gpio/gpio-amd-fch.c +++ b/drivers/gpio/gpio-amd-fch.c @@ -165,7 +165,7 @@ static int amd_fch_gpio_probe(struct platform_device *pdev) priv->gc.direction_output = amd_fch_gpio_direction_output; priv->gc.get_direction = amd_fch_gpio_get_direction; priv->gc.get = amd_fch_gpio_get; - priv->gc.set_rv = amd_fch_gpio_set; + priv->gc.set = amd_fch_gpio_set; spin_lock_init(&priv->lock); diff --git a/drivers/gpio/gpio-amd8111.c b/drivers/gpio/gpio-amd8111.c index 425d8472f744..15fd5e210d74 100644 --- a/drivers/gpio/gpio-amd8111.c +++ b/drivers/gpio/gpio-amd8111.c @@ -165,7 +165,7 @@ static struct amd_gpio gp = { .ngpio = 32, .request = amd_gpio_request, .free = amd_gpio_free, - .set_rv = amd_gpio_set, + .set = amd_gpio_set, .get = amd_gpio_get, .direction_output = amd_gpio_dirout, .direction_input = amd_gpio_dirin, diff --git a/drivers/gpio/gpio-arizona.c b/drivers/gpio/gpio-arizona.c index 89ffde693019..a7e98d395d8e 100644 --- a/drivers/gpio/gpio-arizona.c +++ b/drivers/gpio/gpio-arizona.c @@ -138,7 +138,7 @@ static const struct gpio_chip template_chip = { .direction_input = arizona_gpio_direction_in, .get = arizona_gpio_get, .direction_output = arizona_gpio_direction_out, - .set_rv = arizona_gpio_set, + .set = arizona_gpio_set, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c index 00b31497ecff..7622f9e9f54a 100644 --- a/drivers/gpio/gpio-aspeed-sgpio.c +++ b/drivers/gpio/gpio-aspeed-sgpio.c @@ -596,7 +596,7 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev) gpio->chip.request = NULL; gpio->chip.free = NULL; gpio->chip.get = aspeed_sgpio_get; - gpio->chip.set_rv = aspeed_sgpio_set; + gpio->chip.set = aspeed_sgpio_set; gpio->chip.set_config = aspeed_sgpio_set_config; gpio->chip.label = dev_name(&pdev->dev); gpio->chip.base = -1; diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index 2d340a343a17..7953a9c4e36d 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -1352,7 +1352,7 @@ static int aspeed_gpio_probe(struct platform_device *pdev) gpio->chip.request = aspeed_gpio_request; gpio->chip.free = aspeed_gpio_free; gpio->chip.get = aspeed_gpio_get; - gpio->chip.set_rv = aspeed_gpio_set; + gpio->chip.set = aspeed_gpio_set; gpio->chip.set_config = aspeed_gpio_set_config; gpio->chip.label = dev_name(&pdev->dev); gpio->chip.base = -1; diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 8f22cb36004d..208b71c59d58 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -339,7 +339,7 @@ static const struct gpio_chip template_chip = { .direction_input = bcm_kona_gpio_direction_input, .get = bcm_kona_gpio_get, .direction_output = bcm_kona_gpio_direction_output, - .set_rv = bcm_kona_gpio_set, + .set = bcm_kona_gpio_set, .set_config = bcm_kona_gpio_set_config, .to_irq = bcm_kona_gpio_to_irq, .base = 0, diff --git a/drivers/gpio/gpio-bd71815.c b/drivers/gpio/gpio-bd71815.c index 36701500925e..afb18a5a9d79 100644 --- a/drivers/gpio/gpio-bd71815.c +++ b/drivers/gpio/gpio-bd71815.c @@ -85,7 +85,7 @@ static const struct gpio_chip bd71815gpo_chip = { .owner = THIS_MODULE, .get = bd71815gpo_get, .get_direction = bd71815gpo_direction_get, - .set_rv = bd71815gpo_set, + .set = bd71815gpo_set, .set_config = bd71815_gpio_set_config, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-bd71828.c b/drivers/gpio/gpio-bd71828.c index 4ba151e5cf25..e439dbfffc62 100644 --- a/drivers/gpio/gpio-bd71828.c +++ b/drivers/gpio/gpio-bd71828.c @@ -109,7 +109,7 @@ static int bd71828_probe(struct platform_device *pdev) bdgpio->gpio.set_config = bd71828_gpio_set_config; bdgpio->gpio.can_sleep = true; bdgpio->gpio.get = bd71828_gpio_get; - bdgpio->gpio.set_rv = bd71828_gpio_set; + bdgpio->gpio.set = bd71828_gpio_set; bdgpio->gpio.base = -1; /* diff --git a/drivers/gpio/gpio-bd9571mwv.c b/drivers/gpio/gpio-bd9571mwv.c index 8df1361e3e84..7c95bb36511e 100644 --- a/drivers/gpio/gpio-bd9571mwv.c +++ b/drivers/gpio/gpio-bd9571mwv.c @@ -88,7 +88,7 @@ static const struct gpio_chip template_chip = { .direction_input = bd9571mwv_gpio_direction_input, .direction_output = bd9571mwv_gpio_direction_output, .get = bd9571mwv_gpio_get, - .set_rv = bd9571mwv_gpio_set, + .set = bd9571mwv_gpio_set, .base = -1, .ngpio = 2, .can_sleep = true, diff --git a/drivers/gpio/gpio-bt8xx.c b/drivers/gpio/gpio-bt8xx.c index 7c9e81fea37a..05401da03ca3 100644 --- a/drivers/gpio/gpio-bt8xx.c +++ b/drivers/gpio/gpio-bt8xx.c @@ -145,7 +145,7 @@ static void bt8xxgpio_gpio_setup(struct bt8xxgpio *bg) c->direction_input = bt8xxgpio_gpio_direction_input; c->get = bt8xxgpio_gpio_get; c->direction_output = bt8xxgpio_gpio_direction_output; - c->set_rv = bt8xxgpio_gpio_set; + c->set = bt8xxgpio_gpio_set; c->dbg_show = NULL; c->base = modparam_gpiobase; c->ngpio = BT8XXGPIO_NR_GPIOS; diff --git a/drivers/gpio/gpio-cgbc.c b/drivers/gpio/gpio-cgbc.c index 1495bec62456..0efa1b61001a 100644 --- a/drivers/gpio/gpio-cgbc.c +++ b/drivers/gpio/gpio-cgbc.c @@ -171,7 +171,7 @@ static int cgbc_gpio_probe(struct platform_device *pdev) chip->direction_output = cgbc_gpio_direction_output; chip->get_direction = cgbc_gpio_get_direction; chip->get = cgbc_gpio_get; - chip->set_rv = cgbc_gpio_set; + chip->set = cgbc_gpio_set; chip->ngpio = CGBC_GPIO_NGPIO; ret = devm_mutex_init(dev, &gpio->lock); diff --git a/drivers/gpio/gpio-creg-snps.c b/drivers/gpio/gpio-creg-snps.c index 8b49f02c7896..f8ea961fa1de 100644 --- a/drivers/gpio/gpio-creg-snps.c +++ b/drivers/gpio/gpio-creg-snps.c @@ -167,7 +167,7 @@ static int creg_gpio_probe(struct platform_device *pdev) hcg->gc.label = dev_name(dev); hcg->gc.base = -1; hcg->gc.ngpio = ngpios; - hcg->gc.set_rv = creg_gpio_set; + hcg->gc.set = creg_gpio_set; hcg->gc.direction_output = creg_gpio_dir_out; ret = devm_gpiochip_add_data(dev, &hcg->gc, hcg); diff --git a/drivers/gpio/gpio-cros-ec.c b/drivers/gpio/gpio-cros-ec.c index 53cd5ff6247b..435483826c6e 100644 --- a/drivers/gpio/gpio-cros-ec.c +++ b/drivers/gpio/gpio-cros-ec.c @@ -188,7 +188,7 @@ static int cros_ec_gpio_probe(struct platform_device *pdev) gc->can_sleep = true; gc->label = dev_name(dev); gc->base = -1; - gc->set_rv = cros_ec_gpio_set; + gc->set = cros_ec_gpio_set; gc->get = cros_ec_gpio_get; gc->get_direction = cros_ec_gpio_get_direction; diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c index 8db7cca3a060..0fb5c06d0886 100644 --- a/drivers/gpio/gpio-crystalcove.c +++ b/drivers/gpio/gpio-crystalcove.c @@ -349,7 +349,7 @@ static int crystalcove_gpio_probe(struct platform_device *pdev) cg->chip.direction_input = crystalcove_gpio_dir_in; cg->chip.direction_output = crystalcove_gpio_dir_out; cg->chip.get = crystalcove_gpio_get; - cg->chip.set_rv = crystalcove_gpio_set; + cg->chip.set = crystalcove_gpio_set; cg->chip.base = -1; cg->chip.ngpio = CRYSTALCOVE_VGPIO_NUM; cg->chip.can_sleep = true; diff --git a/drivers/gpio/gpio-cs5535.c b/drivers/gpio/gpio-cs5535.c index 143d1f4173a6..8affe4e9f90e 100644 --- a/drivers/gpio/gpio-cs5535.c +++ b/drivers/gpio/gpio-cs5535.c @@ -296,7 +296,7 @@ static struct cs5535_gpio_chip cs5535_gpio_chip = { .request = chip_gpio_request, .get = chip_gpio_get, - .set_rv = chip_gpio_set, + .set = chip_gpio_set, .direction_input = chip_direction_input, .direction_output = chip_direction_output, diff --git a/drivers/gpio/gpio-da9052.c b/drivers/gpio/gpio-da9052.c index 6482c5b267db..495f0ee58505 100644 --- a/drivers/gpio/gpio-da9052.c +++ b/drivers/gpio/gpio-da9052.c @@ -172,7 +172,7 @@ static const struct gpio_chip reference_gp = { .label = "da9052-gpio", .owner = THIS_MODULE, .get = da9052_gpio_get, - .set_rv = da9052_gpio_set, + .set = da9052_gpio_set, .direction_input = da9052_gpio_direction_input, .direction_output = da9052_gpio_direction_output, .to_irq = da9052_gpio_to_irq, diff --git a/drivers/gpio/gpio-da9055.c b/drivers/gpio/gpio-da9055.c index 3d9d0c700100..a09bd6eb93cf 100644 --- a/drivers/gpio/gpio-da9055.c +++ b/drivers/gpio/gpio-da9055.c @@ -116,7 +116,7 @@ static const struct gpio_chip reference_gp = { .label = "da9055-gpio", .owner = THIS_MODULE, .get = da9055_gpio_get, - .set_rv = da9055_gpio_set, + .set = da9055_gpio_set, .direction_input = da9055_gpio_direction_input, .direction_output = da9055_gpio_direction_output, .to_irq = da9055_gpio_to_irq, diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index 8f3a36d0191d..538f27209ce7 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -202,7 +202,7 @@ static int davinci_gpio_probe(struct platform_device *pdev) chips->chip.direction_input = davinci_direction_in; chips->chip.get = davinci_gpio_get; chips->chip.direction_output = davinci_direction_out; - chips->chip.set_rv = davinci_gpio_set; + chips->chip.set = davinci_gpio_set; chips->chip.ngpio = ngpio; chips->chip.base = -1; diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c index 4bd3c47eaf93..4670ffd7ea7f 100644 --- a/drivers/gpio/gpio-dln2.c +++ b/drivers/gpio/gpio-dln2.c @@ -469,7 +469,7 @@ static int dln2_gpio_probe(struct platform_device *pdev) dln2->gpio.base = -1; dln2->gpio.ngpio = pins; dln2->gpio.can_sleep = true; - dln2->gpio.set_rv = dln2_gpio_set; + dln2->gpio.set = dln2_gpio_set; dln2->gpio.get = dln2_gpio_get; dln2->gpio.request = dln2_gpio_request; dln2->gpio.free = dln2_gpio_free; diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index f2973d0b7138..50fafeda8d7e 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -663,7 +663,7 @@ static int sprd_eic_probe(struct platform_device *pdev) sprd_eic->chip.request = sprd_eic_request; sprd_eic->chip.free = sprd_eic_free; sprd_eic->chip.set_config = sprd_eic_set_config; - sprd_eic->chip.set_rv = sprd_eic_set; + sprd_eic->chip.set = sprd_eic_set; fallthrough; case SPRD_EIC_ASYNC: case SPRD_EIC_SYNC: diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c index 015f1ac32dd9..a214b0672726 100644 --- a/drivers/gpio/gpio-em.c +++ b/drivers/gpio/gpio-em.c @@ -306,7 +306,7 @@ static int em_gio_probe(struct platform_device *pdev) gpio_chip->direction_input = em_gio_direction_input; gpio_chip->get = em_gio_get; gpio_chip->direction_output = em_gio_direction_output; - gpio_chip->set_rv = em_gio_set; + gpio_chip->set = em_gio_set; gpio_chip->to_irq = em_gio_to_irq; gpio_chip->request = pinctrl_gpio_request; gpio_chip->free = em_gio_free; diff --git a/drivers/gpio/gpio-exar.c b/drivers/gpio/gpio-exar.c index beb98286d13e..9053662f1817 100644 --- a/drivers/gpio/gpio-exar.c +++ b/drivers/gpio/gpio-exar.c @@ -211,7 +211,7 @@ static int gpio_exar_probe(struct platform_device *pdev) exar_gpio->gpio_chip.direction_input = exar_direction_input; exar_gpio->gpio_chip.get_direction = exar_get_direction; exar_gpio->gpio_chip.get = exar_get_value; - exar_gpio->gpio_chip.set_rv = exar_set_value; + exar_gpio->gpio_chip.set = exar_set_value; exar_gpio->gpio_chip.base = -1; exar_gpio->gpio_chip.ngpio = ngpios; exar_gpio->index = index; diff --git a/drivers/gpio/gpio-f7188x.c b/drivers/gpio/gpio-f7188x.c index dfcd3634f279..4d5b927ad70f 100644 --- a/drivers/gpio/gpio-f7188x.c +++ b/drivers/gpio/gpio-f7188x.c @@ -173,7 +173,7 @@ static int f7188x_gpio_set_config(struct gpio_chip *chip, unsigned offset, .direction_input = f7188x_gpio_direction_in, \ .get = f7188x_gpio_get, \ .direction_output = f7188x_gpio_direction_out, \ - .set_rv = f7188x_gpio_set, \ + .set = f7188x_gpio_set, \ .set_config = f7188x_gpio_set_config, \ .base = -1, \ .ngpio = _ngpio, \ diff --git a/drivers/gpio/gpio-graniterapids.c b/drivers/gpio/gpio-graniterapids.c index f25283e5239d..121bf29a27f5 100644 --- a/drivers/gpio/gpio-graniterapids.c +++ b/drivers/gpio/gpio-graniterapids.c @@ -159,7 +159,7 @@ static const struct gpio_chip gnr_gpio_chip = { .owner = THIS_MODULE, .request = gnr_gpio_request, .get = gnr_gpio_get, - .set_rv = gnr_gpio_set, + .set = gnr_gpio_set, .get_direction = gnr_gpio_get_direction, .direction_input = gnr_gpio_direction_input, .direction_output = gnr_gpio_direction_output, diff --git a/drivers/gpio/gpio-gw-pld.c b/drivers/gpio/gpio-gw-pld.c index a40ba99a3aea..2e5d97b7363f 100644 --- a/drivers/gpio/gpio-gw-pld.c +++ b/drivers/gpio/gpio-gw-pld.c @@ -86,7 +86,7 @@ static int gw_pld_probe(struct i2c_client *client) gw->chip.direction_input = gw_pld_input8; gw->chip.get = gw_pld_get8; gw->chip.direction_output = gw_pld_output8; - gw->chip.set_rv = gw_pld_set8; + gw->chip.set = gw_pld_set8; gw->client = client; /* diff --git a/drivers/gpio/gpio-htc-egpio.c b/drivers/gpio/gpio-htc-egpio.c index b1844a676c7c..2eaed83214d8 100644 --- a/drivers/gpio/gpio-htc-egpio.c +++ b/drivers/gpio/gpio-htc-egpio.c @@ -324,7 +324,7 @@ static int __init egpio_probe(struct platform_device *pdev) chip->parent = &pdev->dev; chip->owner = THIS_MODULE; chip->get = egpio_get; - chip->set_rv = egpio_set; + chip->set = egpio_set; chip->direction_input = egpio_direction_input; chip->direction_output = egpio_direction_output; chip->get_direction = egpio_get_direction; diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c index 67089b2423d8..1802c9116ffe 100644 --- a/drivers/gpio/gpio-ich.c +++ b/drivers/gpio/gpio-ich.c @@ -273,7 +273,7 @@ static void ichx_gpiolib_setup(struct gpio_chip *chip) chip->get = ichx_priv.desc->get ? ichx_priv.desc->get : ichx_gpio_get; - chip->set_rv = ichx_gpio_set; + chip->set = ichx_gpio_set; chip->get_direction = ichx_gpio_get_direction; chip->direction_input = ichx_gpio_direction_input; chip->direction_output = ichx_gpio_direction_output; diff --git a/drivers/gpio/gpio-imx-scu.c b/drivers/gpio/gpio-imx-scu.c index 1693dbf1b777..0a75afecf9f8 100644 --- a/drivers/gpio/gpio-imx-scu.c +++ b/drivers/gpio/gpio-imx-scu.c @@ -102,7 +102,7 @@ static int imx_scu_gpio_probe(struct platform_device *pdev) gc->ngpio = ARRAY_SIZE(scu_rsrc_arr); gc->label = dev_name(dev); gc->get = imx_scu_gpio_get; - gc->set_rv = imx_scu_gpio_set; + gc->set = imx_scu_gpio_set; gc->get_direction = imx_scu_gpio_get_direction; platform_set_drvdata(pdev, priv); diff --git a/drivers/gpio/gpio-it87.c b/drivers/gpio/gpio-it87.c index d8184b527bac..5d677bcfccf2 100644 --- a/drivers/gpio/gpio-it87.c +++ b/drivers/gpio/gpio-it87.c @@ -267,7 +267,7 @@ static const struct gpio_chip it87_template_chip = { .request = it87_gpio_request, .get = it87_gpio_get, .direction_input = it87_gpio_direction_in, - .set_rv = it87_gpio_set, + .set = it87_gpio_set, .direction_output = it87_gpio_direction_out, .base = -1 }; diff --git a/drivers/gpio/gpio-janz-ttl.c b/drivers/gpio/gpio-janz-ttl.c index 9f548eda3888..b0c4a3346e7d 100644 --- a/drivers/gpio/gpio-janz-ttl.c +++ b/drivers/gpio/gpio-janz-ttl.c @@ -171,7 +171,7 @@ static int ttl_probe(struct platform_device *pdev) gpio->parent = &pdev->dev; gpio->label = pdev->name; gpio->get = ttl_get_value; - gpio->set_rv = ttl_set_value; + gpio->set = ttl_set_value; gpio->owner = THIS_MODULE; /* request dynamic allocation */ diff --git a/drivers/gpio/gpio-kempld.c b/drivers/gpio/gpio-kempld.c index e38e604baa22..923aad3ab4d4 100644 --- a/drivers/gpio/gpio-kempld.c +++ b/drivers/gpio/gpio-kempld.c @@ -169,7 +169,7 @@ static int kempld_gpio_probe(struct platform_device *pdev) chip->direction_output = kempld_gpio_direction_output; chip->get_direction = kempld_gpio_get_direction; chip->get = kempld_gpio_get; - chip->set_rv = kempld_gpio_set; + chip->set = kempld_gpio_set; chip->ngpio = kempld_gpio_pincount(pld); if (chip->ngpio == 0) { dev_err(dev, "No GPIO pins detected\n"); diff --git a/drivers/gpio/gpio-latch.c b/drivers/gpio/gpio-latch.c index 3d0ff09284fb..c64aaa896766 100644 --- a/drivers/gpio/gpio-latch.c +++ b/drivers/gpio/gpio-latch.c @@ -166,11 +166,11 @@ static int gpio_latch_probe(struct platform_device *pdev) if (gpio_latch_can_sleep(priv, n_latches)) { priv->gc.can_sleep = true; - priv->gc.set_rv = gpio_latch_set_can_sleep; + priv->gc.set = gpio_latch_set_can_sleep; mutex_init(&priv->mutex); } else { priv->gc.can_sleep = false; - priv->gc.set_rv = gpio_latch_set; + priv->gc.set = gpio_latch_set; spin_lock_init(&priv->spinlock); } diff --git a/drivers/gpio/gpio-ljca.c b/drivers/gpio/gpio-ljca.c index 61524a9ba765..3b4f8830c741 100644 --- a/drivers/gpio/gpio-ljca.c +++ b/drivers/gpio/gpio-ljca.c @@ -437,7 +437,7 @@ static int ljca_gpio_probe(struct auxiliary_device *auxdev, ljca_gpio->gc.direction_output = ljca_gpio_direction_output; ljca_gpio->gc.get_direction = ljca_gpio_get_direction; ljca_gpio->gc.get = ljca_gpio_get_value; - ljca_gpio->gc.set_rv = ljca_gpio_set_value; + ljca_gpio->gc.set = ljca_gpio_set_value; ljca_gpio->gc.set_config = ljca_gpio_set_config; ljca_gpio->gc.init_valid_mask = ljca_gpio_init_valid_mask; ljca_gpio->gc.can_sleep = true; diff --git a/drivers/gpio/gpio-logicvc.c b/drivers/gpio/gpio-logicvc.c index 19cd2847467c..cb9dbcc290ad 100644 --- a/drivers/gpio/gpio-logicvc.c +++ b/drivers/gpio/gpio-logicvc.c @@ -134,7 +134,7 @@ static int logicvc_gpio_probe(struct platform_device *pdev) logicvc->chip.ngpio = LOGICVC_CTRL_GPIO_BITS + LOGICVC_POWER_CTRL_GPIO_BITS; logicvc->chip.get = logicvc_gpio_get; - logicvc->chip.set_rv = logicvc_gpio_set; + logicvc->chip.set = logicvc_gpio_set; logicvc->chip.direction_output = logicvc_gpio_direction_output; return devm_gpiochip_add_data(dev, &logicvc->chip, logicvc); diff --git a/drivers/gpio/gpio-loongson-64bit.c b/drivers/gpio/gpio-loongson-64bit.c index add09971d26a..818c606fbc51 100644 --- a/drivers/gpio/gpio-loongson-64bit.c +++ b/drivers/gpio/gpio-loongson-64bit.c @@ -157,7 +157,7 @@ static int loongson_gpio_init(struct device *dev, struct loongson_gpio_chip *lgp lgpio->chip.get = loongson_gpio_get; lgpio->chip.get_direction = loongson_gpio_get_direction; lgpio->chip.direction_output = loongson_gpio_direction_output; - lgpio->chip.set_rv = loongson_gpio_set; + lgpio->chip.set = loongson_gpio_set; lgpio->chip.parent = dev; spin_lock_init(&lgpio->lock); } diff --git a/drivers/gpio/gpio-loongson.c b/drivers/gpio/gpio-loongson.c index 8f3668169ebf..f3e0559f969d 100644 --- a/drivers/gpio/gpio-loongson.c +++ b/drivers/gpio/gpio-loongson.c @@ -106,7 +106,7 @@ static int loongson_gpio_probe(struct platform_device *pdev) gc->base = 0; gc->ngpio = LOONGSON_N_GPIO; gc->get = loongson_gpio_get_value; - gc->set_rv = loongson_gpio_set_value; + gc->set = loongson_gpio_set_value; gc->direction_input = loongson_gpio_direction_input; gc->direction_output = loongson_gpio_direction_output; diff --git a/drivers/gpio/gpio-lp3943.c b/drivers/gpio/gpio-lp3943.c index 52ab3ac4844c..e8e00daff7df 100644 --- a/drivers/gpio/gpio-lp3943.c +++ b/drivers/gpio/gpio-lp3943.c @@ -184,7 +184,7 @@ static const struct gpio_chip lp3943_gpio_chip = { .direction_input = lp3943_gpio_direction_input, .get = lp3943_gpio_get, .direction_output = lp3943_gpio_direction_output, - .set_rv = lp3943_gpio_set, + .set = lp3943_gpio_set, .base = -1, .ngpio = LP3943_MAX_GPIO, .can_sleep = 1, diff --git a/drivers/gpio/gpio-lp873x.c b/drivers/gpio/gpio-lp873x.c index 1908ed302e92..5376708a81bf 100644 --- a/drivers/gpio/gpio-lp873x.c +++ b/drivers/gpio/gpio-lp873x.c @@ -124,7 +124,7 @@ static const struct gpio_chip template_chip = { .direction_input = lp873x_gpio_direction_input, .direction_output = lp873x_gpio_direction_output, .get = lp873x_gpio_get, - .set_rv = lp873x_gpio_set, + .set = lp873x_gpio_set, .set_config = lp873x_gpio_set_config, .base = -1, .ngpio = 2, diff --git a/drivers/gpio/gpio-lp87565.c b/drivers/gpio/gpio-lp87565.c index 8ea687d5d028..0f337c1283b2 100644 --- a/drivers/gpio/gpio-lp87565.c +++ b/drivers/gpio/gpio-lp87565.c @@ -139,7 +139,7 @@ static const struct gpio_chip template_chip = { .direction_input = lp87565_gpio_direction_input, .direction_output = lp87565_gpio_direction_output, .get = lp87565_gpio_get, - .set_rv = lp87565_gpio_set, + .set = lp87565_gpio_set, .set_config = lp87565_gpio_set_config, .base = -1, .ngpio = 3, diff --git a/drivers/gpio/gpio-lpc18xx.c b/drivers/gpio/gpio-lpc18xx.c index 2dbfbf90176c..37a2342eb2e6 100644 --- a/drivers/gpio/gpio-lpc18xx.c +++ b/drivers/gpio/gpio-lpc18xx.c @@ -327,7 +327,7 @@ static const struct gpio_chip lpc18xx_chip = { .free = gpiochip_generic_free, .direction_input = lpc18xx_gpio_direction_input, .direction_output = lpc18xx_gpio_direction_output, - .set_rv = lpc18xx_gpio_set, + .set = lpc18xx_gpio_set, .get = lpc18xx_gpio_get, .ngpio = LPC18XX_MAX_PORTS * LPC18XX_PINS_PER_PORT, .owner = THIS_MODULE, diff --git a/drivers/gpio/gpio-lpc32xx.c b/drivers/gpio/gpio-lpc32xx.c index 6668b8bd9f1e..37fc54fc7385 100644 --- a/drivers/gpio/gpio-lpc32xx.c +++ b/drivers/gpio/gpio-lpc32xx.c @@ -407,7 +407,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = { .direction_input = lpc32xx_gpio_dir_input_p012, .get = lpc32xx_gpio_get_value_p012, .direction_output = lpc32xx_gpio_dir_output_p012, - .set_rv = lpc32xx_gpio_set_value_p012, + .set = lpc32xx_gpio_set_value_p012, .request = lpc32xx_gpio_request, .to_irq = lpc32xx_gpio_to_irq_p01, .base = LPC32XX_GPIO_P0_GRP, @@ -423,7 +423,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = { .direction_input = lpc32xx_gpio_dir_input_p012, .get = lpc32xx_gpio_get_value_p012, .direction_output = lpc32xx_gpio_dir_output_p012, - .set_rv = lpc32xx_gpio_set_value_p012, + .set = lpc32xx_gpio_set_value_p012, .request = lpc32xx_gpio_request, .to_irq = lpc32xx_gpio_to_irq_p01, .base = LPC32XX_GPIO_P1_GRP, @@ -439,7 +439,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = { .direction_input = lpc32xx_gpio_dir_input_p012, .get = lpc32xx_gpio_get_value_p012, .direction_output = lpc32xx_gpio_dir_output_p012, - .set_rv = lpc32xx_gpio_set_value_p012, + .set = lpc32xx_gpio_set_value_p012, .request = lpc32xx_gpio_request, .base = LPC32XX_GPIO_P2_GRP, .ngpio = LPC32XX_GPIO_P2_MAX, @@ -454,7 +454,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = { .direction_input = lpc32xx_gpio_dir_input_p3, .get = lpc32xx_gpio_get_value_p3, .direction_output = lpc32xx_gpio_dir_output_p3, - .set_rv = lpc32xx_gpio_set_value_p3, + .set = lpc32xx_gpio_set_value_p3, .request = lpc32xx_gpio_request, .to_irq = lpc32xx_gpio_to_irq_gpio_p3, .base = LPC32XX_GPIO_P3_GRP, @@ -482,7 +482,7 @@ static struct lpc32xx_gpio_chip lpc32xx_gpiochip[] = { .chip = { .label = "gpo_p3", .direction_output = lpc32xx_gpio_dir_out_always, - .set_rv = lpc32xx_gpo_set_value, + .set = lpc32xx_gpo_set_value, .get = lpc32xx_gpo_get_value, .request = lpc32xx_gpio_request, .base = LPC32XX_GPO_P3_GRP, diff --git a/drivers/gpio/gpio-macsmc.c b/drivers/gpio/gpio-macsmc.c index 7570d9e89adf..30ef258e7655 100644 --- a/drivers/gpio/gpio-macsmc.c +++ b/drivers/gpio/gpio-macsmc.c @@ -261,7 +261,7 @@ static int macsmc_gpio_probe(struct platform_device *pdev) smcgp->gc.label = "macsmc-pmu-gpio"; smcgp->gc.owner = THIS_MODULE; smcgp->gc.get = macsmc_gpio_get; - smcgp->gc.set_rv = macsmc_gpio_set; + smcgp->gc.set = macsmc_gpio_set; smcgp->gc.get_direction = macsmc_gpio_get_direction; smcgp->gc.init_valid_mask = macsmc_gpio_init_valid_mask; smcgp->gc.can_sleep = true; diff --git a/drivers/gpio/gpio-madera.c b/drivers/gpio/gpio-madera.c index e73e72d62bc8..551faf9655b2 100644 --- a/drivers/gpio/gpio-madera.c +++ b/drivers/gpio/gpio-madera.c @@ -109,7 +109,7 @@ static const struct gpio_chip madera_gpio_chip = { .direction_input = madera_gpio_direction_in, .get = madera_gpio_get, .direction_output = madera_gpio_direction_out, - .set_rv = madera_gpio_set, + .set = madera_gpio_set, .set_config = gpiochip_generic_config, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-max730x.c b/drivers/gpio/gpio-max730x.c index 75d414d8c992..84c7c2dca822 100644 --- a/drivers/gpio/gpio-max730x.c +++ b/drivers/gpio/gpio-max730x.c @@ -188,7 +188,7 @@ int __max730x_probe(struct max7301 *ts) ts->chip.direction_input = max7301_direction_input; ts->chip.get = max7301_get; ts->chip.direction_output = max7301_direction_output; - ts->chip.set_rv = max7301_set; + ts->chip.set = max7301_set; ts->chip.ngpio = PIN_NUMBER; ts->chip.can_sleep = true; diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c index d5ffedb086af..a61d670ceeda 100644 --- a/drivers/gpio/gpio-max732x.c +++ b/drivers/gpio/gpio-max732x.c @@ -585,8 +585,8 @@ static int max732x_setup_gpio(struct max732x_chip *chip, gc->direction_input = max732x_gpio_direction_input; if (chip->dir_output) { gc->direction_output = max732x_gpio_direction_output; - gc->set_rv = max732x_gpio_set_value; - gc->set_multiple_rv = max732x_gpio_set_multiple; + gc->set = max732x_gpio_set_value; + gc->set_multiple = max732x_gpio_set_multiple; } gc->get = max732x_gpio_get_value; gc->can_sleep = true; diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c index af7af8e40afe..02eca400b307 100644 --- a/drivers/gpio/gpio-max77620.c +++ b/drivers/gpio/gpio-max77620.c @@ -311,7 +311,7 @@ static int max77620_gpio_probe(struct platform_device *pdev) mgpio->gpio_chip.direction_input = max77620_gpio_dir_input; mgpio->gpio_chip.get = max77620_gpio_get; mgpio->gpio_chip.direction_output = max77620_gpio_dir_output; - mgpio->gpio_chip.set_rv = max77620_gpio_set; + mgpio->gpio_chip.set = max77620_gpio_set; mgpio->gpio_chip.set_config = max77620_gpio_set_config; mgpio->gpio_chip.ngpio = MAX77620_GPIO_NR; mgpio->gpio_chip.can_sleep = 1; diff --git a/drivers/gpio/gpio-max77650.c b/drivers/gpio/gpio-max77650.c index a553e141059f..4540da4c1418 100644 --- a/drivers/gpio/gpio-max77650.c +++ b/drivers/gpio/gpio-max77650.c @@ -166,7 +166,7 @@ static int max77650_gpio_probe(struct platform_device *pdev) chip->gc.direction_input = max77650_gpio_direction_input; chip->gc.direction_output = max77650_gpio_direction_output; - chip->gc.set_rv = max77650_gpio_set_value; + chip->gc.set = max77650_gpio_set_value; chip->gc.get = max77650_gpio_get_value; chip->gc.get_direction = max77650_gpio_get_direction; chip->gc.set_config = max77650_gpio_set_config; diff --git a/drivers/gpio/gpio-max77759.c b/drivers/gpio/gpio-max77759.c index 7fe8e6f697d0..5e48eb03e7b3 100644 --- a/drivers/gpio/gpio-max77759.c +++ b/drivers/gpio/gpio-max77759.c @@ -469,7 +469,7 @@ static int max77759_gpio_probe(struct platform_device *pdev) chip->gc.direction_input = max77759_gpio_direction_input; chip->gc.direction_output = max77759_gpio_direction_output; chip->gc.get = max77759_gpio_get_value; - chip->gc.set_rv = max77759_gpio_set_value; + chip->gc.set = max77759_gpio_set_value; girq = &chip->gc.irq; gpio_irq_chip_set_chip(girq, &max77759_gpio_irq_chip); diff --git a/drivers/gpio/gpio-mb86s7x.c b/drivers/gpio/gpio-mb86s7x.c index 5ee2991ecdfd..581a71872eab 100644 --- a/drivers/gpio/gpio-mb86s7x.c +++ b/drivers/gpio/gpio-mb86s7x.c @@ -180,7 +180,7 @@ static int mb86s70_gpio_probe(struct platform_device *pdev) gchip->gc.request = mb86s70_gpio_request; gchip->gc.free = mb86s70_gpio_free; gchip->gc.get = mb86s70_gpio_get; - gchip->gc.set_rv = mb86s70_gpio_set; + gchip->gc.set = mb86s70_gpio_set; gchip->gc.to_irq = mb86s70_gpio_to_irq; gchip->gc.label = dev_name(&pdev->dev); gchip->gc.ngpio = 32; diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c index e68956104161..9a40e9579e95 100644 --- a/drivers/gpio/gpio-mc33880.c +++ b/drivers/gpio/gpio-mc33880.c @@ -103,7 +103,7 @@ static int mc33880_probe(struct spi_device *spi) mc->spi = spi; mc->chip.label = DRIVER_NAME; - mc->chip.set_rv = mc33880_set; + mc->chip.set = mc33880_set; mc->chip.base = pdata->base; mc->chip.ngpio = PIN_NUMBER; mc->chip.can_sleep = true; diff --git a/drivers/gpio/gpio-ml-ioh.c b/drivers/gpio/gpio-ml-ioh.c index 12cf36f9ca63..f6af81bf2b13 100644 --- a/drivers/gpio/gpio-ml-ioh.c +++ b/drivers/gpio/gpio-ml-ioh.c @@ -224,7 +224,7 @@ static void ioh_gpio_setup(struct ioh_gpio *chip, int num_port) gpio->direction_input = ioh_gpio_direction_input; gpio->get = ioh_gpio_get; gpio->direction_output = ioh_gpio_direction_output; - gpio->set_rv = ioh_gpio_set; + gpio->set = ioh_gpio_set; gpio->dbg_show = NULL; gpio->base = -1; gpio->ngpio = num_port; diff --git a/drivers/gpio/gpio-mm-lantiq.c b/drivers/gpio/gpio-mm-lantiq.c index 897a1e004681..8f1405733d98 100644 --- a/drivers/gpio/gpio-mm-lantiq.c +++ b/drivers/gpio/gpio-mm-lantiq.c @@ -111,7 +111,7 @@ static int ltq_mm_probe(struct platform_device *pdev) chip->mmchip.gc.ngpio = 16; chip->mmchip.gc.direction_output = ltq_mm_dir_out; - chip->mmchip.gc.set_rv = ltq_mm_set; + chip->mmchip.gc.set = ltq_mm_set; chip->mmchip.save_regs = ltq_mm_save_regs; /* store the shadow value if one was passed by the devicetree */ diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c index cf878c2ea6bf..021ad62778c2 100644 --- a/drivers/gpio/gpio-mmio.c +++ b/drivers/gpio/gpio-mmio.c @@ -367,7 +367,7 @@ static int bgpio_dir_out_err(struct gpio_chip *gc, unsigned int gpio, static int bgpio_simple_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) { - gc->set_rv(gc, gpio, val); + gc->set(gc, gpio, val); return bgpio_dir_return(gc, gpio, true); } @@ -432,14 +432,14 @@ static int bgpio_dir_out_dir_first(struct gpio_chip *gc, unsigned int gpio, int val) { bgpio_dir_out(gc, gpio, val); - gc->set_rv(gc, gpio, val); + gc->set(gc, gpio, val); return bgpio_dir_return(gc, gpio, true); } static int bgpio_dir_out_val_first(struct gpio_chip *gc, unsigned int gpio, int val) { - gc->set_rv(gc, gpio, val); + gc->set(gc, gpio, val); bgpio_dir_out(gc, gpio, val); return bgpio_dir_return(gc, gpio, true); } @@ -528,18 +528,18 @@ static int bgpio_setup_io(struct gpio_chip *gc, if (set && clr) { gc->reg_set = set; gc->reg_clr = clr; - gc->set_rv = bgpio_set_with_clear; - gc->set_multiple_rv = bgpio_set_multiple_with_clear; + gc->set = bgpio_set_with_clear; + gc->set_multiple = bgpio_set_multiple_with_clear; } else if (set && !clr) { gc->reg_set = set; - gc->set_rv = bgpio_set_set; - gc->set_multiple_rv = bgpio_set_multiple_set; + gc->set = bgpio_set_set; + gc->set_multiple = bgpio_set_multiple_set; } else if (flags & BGPIOF_NO_OUTPUT) { - gc->set_rv = bgpio_set_none; - gc->set_multiple_rv = NULL; + gc->set = bgpio_set_none; + gc->set_multiple = NULL; } else { - gc->set_rv = bgpio_set; - gc->set_multiple_rv = bgpio_set_multiple; + gc->set = bgpio_set; + gc->set_multiple = bgpio_set_multiple; } if (!(flags & BGPIOF_UNREADABLE_REG_SET) && @@ -676,7 +676,7 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, } gc->bgpio_data = gc->read_reg(gc->reg_dat); - if (gc->set_rv == bgpio_set_set && + if (gc->set == bgpio_set_set && !(flags & BGPIOF_UNREADABLE_REG_SET)) gc->bgpio_data = gc->read_reg(gc->reg_set); diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index 266c0953d914..a7d69f3835c1 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -449,9 +449,9 @@ static int gpio_mockup_probe(struct platform_device *pdev) gc->owner = THIS_MODULE; gc->parent = dev; gc->get = gpio_mockup_get; - gc->set_rv = gpio_mockup_set; + gc->set = gpio_mockup_set; gc->get_multiple = gpio_mockup_get_multiple; - gc->set_multiple_rv = gpio_mockup_set_multiple; + gc->set_multiple = gpio_mockup_set_multiple; gc->direction_output = gpio_mockup_dirout; gc->direction_input = gpio_mockup_dirin; gc->get_direction = gpio_mockup_get_direction; diff --git a/drivers/gpio/gpio-moxtet.c b/drivers/gpio/gpio-moxtet.c index 27dd9c3e7b77..4eb9f1a2779b 100644 --- a/drivers/gpio/gpio-moxtet.c +++ b/drivers/gpio/gpio-moxtet.c @@ -140,7 +140,7 @@ static int moxtet_gpio_probe(struct device *dev) chip->gpio_chip.direction_input = moxtet_gpio_direction_input; chip->gpio_chip.direction_output = moxtet_gpio_direction_output; chip->gpio_chip.get = moxtet_gpio_get_value; - chip->gpio_chip.set_rv = moxtet_gpio_set_value; + chip->gpio_chip.set = moxtet_gpio_set_value; chip->gpio_chip.base = -1; chip->gpio_chip.ngpio = MOXTET_GPIO_NGPIOS; diff --git a/drivers/gpio/gpio-mpc5200.c b/drivers/gpio/gpio-mpc5200.c index 40d587176a75..dad0eca1ca2e 100644 --- a/drivers/gpio/gpio-mpc5200.c +++ b/drivers/gpio/gpio-mpc5200.c @@ -153,7 +153,7 @@ static int mpc52xx_wkup_gpiochip_probe(struct platform_device *ofdev) gc->direction_input = mpc52xx_wkup_gpio_dir_in; gc->direction_output = mpc52xx_wkup_gpio_dir_out; gc->get = mpc52xx_wkup_gpio_get; - gc->set_rv = mpc52xx_wkup_gpio_set; + gc->set = mpc52xx_wkup_gpio_set; ret = of_mm_gpiochip_add_data(ofdev->dev.of_node, &chip->mmchip, chip); if (ret) @@ -315,7 +315,7 @@ static int mpc52xx_simple_gpiochip_probe(struct platform_device *ofdev) gc->direction_input = mpc52xx_simple_gpio_dir_in; gc->direction_output = mpc52xx_simple_gpio_dir_out; gc->get = mpc52xx_simple_gpio_get; - gc->set_rv = mpc52xx_simple_gpio_set; + gc->set = mpc52xx_simple_gpio_set; ret = of_mm_gpiochip_add_data(ofdev->dev.of_node, &chip->mmchip, chip); if (ret) diff --git a/drivers/gpio/gpio-mpfs.c b/drivers/gpio/gpio-mpfs.c index 3415cb7ebb0f..82d557a7e5d8 100644 --- a/drivers/gpio/gpio-mpfs.c +++ b/drivers/gpio/gpio-mpfs.c @@ -150,7 +150,7 @@ static int mpfs_gpio_probe(struct platform_device *pdev) mpfs_gpio->gc.direction_output = mpfs_gpio_direction_output; mpfs_gpio->gc.get_direction = mpfs_gpio_get_direction; mpfs_gpio->gc.get = mpfs_gpio_get; - mpfs_gpio->gc.set_rv = mpfs_gpio_set; + mpfs_gpio->gc.set = mpfs_gpio_set; mpfs_gpio->gc.base = -1; mpfs_gpio->gc.ngpio = ngpios; mpfs_gpio->gc.label = dev_name(dev); diff --git a/drivers/gpio/gpio-mpsse.c b/drivers/gpio/gpio-mpsse.c index b17de08e9e03..9f42bb30b4ec 100644 --- a/drivers/gpio/gpio-mpsse.c +++ b/drivers/gpio/gpio-mpsse.c @@ -448,9 +448,9 @@ static int gpio_mpsse_probe(struct usb_interface *interface, priv->gpio.direction_input = gpio_mpsse_direction_input; priv->gpio.direction_output = gpio_mpsse_direction_output; priv->gpio.get = gpio_mpsse_gpio_get; - priv->gpio.set_rv = gpio_mpsse_gpio_set; + priv->gpio.set = gpio_mpsse_gpio_set; priv->gpio.get_multiple = gpio_mpsse_get_multiple; - priv->gpio.set_multiple_rv = gpio_mpsse_set_multiple; + priv->gpio.set_multiple = gpio_mpsse_set_multiple; priv->gpio.base = -1; priv->gpio.ngpio = 16; priv->gpio.offset = priv->intf_id * priv->gpio.ngpio; diff --git a/drivers/gpio/gpio-msc313.c b/drivers/gpio/gpio-msc313.c index 992339a89d19..b0cccd856840 100644 --- a/drivers/gpio/gpio-msc313.c +++ b/drivers/gpio/gpio-msc313.c @@ -658,7 +658,7 @@ static int msc313_gpio_probe(struct platform_device *pdev) gpiochip->direction_input = msc313_gpio_direction_input; gpiochip->direction_output = msc313_gpio_direction_output; gpiochip->get = msc313_gpio_get; - gpiochip->set_rv = msc313_gpio_set; + gpiochip->set = msc313_gpio_set; gpiochip->base = -1; gpiochip->ngpio = gpio->gpio_data->num; gpiochip->names = gpio->gpio_data->names; diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 24792b8eb083..5e3f54cb8bc4 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -1168,7 +1168,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev) mvchip->chip.direction_input = mvebu_gpio_direction_input; mvchip->chip.get = mvebu_gpio_get; mvchip->chip.direction_output = mvebu_gpio_direction_output; - mvchip->chip.set_rv = mvebu_gpio_set; + mvchip->chip.set = mvebu_gpio_set; if (have_irqs) mvchip->chip.to_irq = mvebu_gpio_to_irq; mvchip->chip.base = id * MVEBU_MAX_GPIO_PER_BANK; diff --git a/drivers/gpio/gpio-nomadik.c b/drivers/gpio/gpio-nomadik.c index 296d13845b30..bcf4b07dd458 100644 --- a/drivers/gpio/gpio-nomadik.c +++ b/drivers/gpio/gpio-nomadik.c @@ -674,7 +674,7 @@ static int nmk_gpio_probe(struct platform_device *pdev) chip->direction_input = nmk_gpio_make_input; chip->get = nmk_gpio_get_input; chip->direction_output = nmk_gpio_make_output; - chip->set_rv = nmk_gpio_set_output; + chip->set = nmk_gpio_set_output; chip->dbg_show = nmk_gpio_dbg_show; chip->can_sleep = false; chip->owner = THIS_MODULE; diff --git a/drivers/gpio/gpio-npcm-sgpio.c b/drivers/gpio/gpio-npcm-sgpio.c index 25b203a89e38..83c77a2c0623 100644 --- a/drivers/gpio/gpio-npcm-sgpio.c +++ b/drivers/gpio/gpio-npcm-sgpio.c @@ -211,7 +211,7 @@ static int npcm_sgpio_dir_in(struct gpio_chip *gc, unsigned int offset) static int npcm_sgpio_dir_out(struct gpio_chip *gc, unsigned int offset, int val) { - return gc->set_rv(gc, offset, val); + return gc->set(gc, offset, val); } static int npcm_sgpio_get_direction(struct gpio_chip *gc, unsigned int offset) @@ -546,7 +546,7 @@ static int npcm_sgpio_probe(struct platform_device *pdev) gpio->chip.direction_output = npcm_sgpio_dir_out; gpio->chip.get_direction = npcm_sgpio_get_direction; gpio->chip.get = npcm_sgpio_get; - gpio->chip.set_rv = npcm_sgpio_set; + gpio->chip.set = npcm_sgpio_set; gpio->chip.label = dev_name(&pdev->dev); gpio->chip.base = -1; diff --git a/drivers/gpio/gpio-octeon.c b/drivers/gpio/gpio-octeon.c index 24966161742a..777e20c608dc 100644 --- a/drivers/gpio/gpio-octeon.c +++ b/drivers/gpio/gpio-octeon.c @@ -108,7 +108,7 @@ static int octeon_gpio_probe(struct platform_device *pdev) chip->direction_input = octeon_gpio_dir_in; chip->get = octeon_gpio_get; chip->direction_output = octeon_gpio_dir_out; - chip->set_rv = octeon_gpio_set; + chip->set = octeon_gpio_set; err = devm_gpiochip_add_data(&pdev->dev, chip, gpio); if (err) return err; diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index ed5c88a5c520..a268c76bdca6 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1046,8 +1046,8 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct device *pm_dev) bank->chip.get_multiple = omap_gpio_get_multiple; bank->chip.direction_output = omap_gpio_output; bank->chip.set_config = omap_gpio_set_config; - bank->chip.set_rv = omap_gpio_set; - bank->chip.set_multiple_rv = omap_gpio_set_multiple; + bank->chip.set = omap_gpio_set; + bank->chip.set_multiple = omap_gpio_set_multiple; if (bank->is_mpuio) { bank->chip.label = "mpuio"; if (bank->regs->wkup_en) diff --git a/drivers/gpio/gpio-palmas.c b/drivers/gpio/gpio-palmas.c index 9329d8ce8f59..e377f6dd4ccf 100644 --- a/drivers/gpio/gpio-palmas.c +++ b/drivers/gpio/gpio-palmas.c @@ -166,7 +166,7 @@ static int palmas_gpio_probe(struct platform_device *pdev) palmas_gpio->gpio_chip.direction_input = palmas_gpio_input; palmas_gpio->gpio_chip.direction_output = palmas_gpio_output; palmas_gpio->gpio_chip.to_irq = palmas_gpio_to_irq; - palmas_gpio->gpio_chip.set_rv = palmas_gpio_set; + palmas_gpio->gpio_chip.set = palmas_gpio_set; palmas_gpio->gpio_chip.get = palmas_gpio_get; palmas_gpio->gpio_chip.parent = &pdev->dev; diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 69906a9af7e6..b46927f55038 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -789,10 +789,10 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios) gc->direction_input = pca953x_gpio_direction_input; gc->direction_output = pca953x_gpio_direction_output; gc->get = pca953x_gpio_get_value; - gc->set_rv = pca953x_gpio_set_value; + gc->set = pca953x_gpio_set_value; gc->get_direction = pca953x_gpio_get_direction; gc->get_multiple = pca953x_gpio_get_multiple; - gc->set_multiple_rv = pca953x_gpio_set_multiple; + gc->set_multiple = pca953x_gpio_set_multiple; gc->set_config = pca953x_gpio_set_config; gc->can_sleep = true; diff --git a/drivers/gpio/gpio-pca9570.c b/drivers/gpio/gpio-pca9570.c index a33246f20fd8..c5a1287079a0 100644 --- a/drivers/gpio/gpio-pca9570.c +++ b/drivers/gpio/gpio-pca9570.c @@ -126,7 +126,7 @@ static int pca9570_probe(struct i2c_client *client) gpio->chip.owner = THIS_MODULE; gpio->chip.get_direction = pca9570_get_direction; gpio->chip.get = pca9570_get; - gpio->chip.set_rv = pca9570_set; + gpio->chip.set = pca9570_set; gpio->chip.base = -1; gpio->chip_data = device_get_match_data(&client->dev); gpio->chip.ngpio = gpio->chip_data->ngpio; diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c index a04203680333..3b9de8c3d924 100644 --- a/drivers/gpio/gpio-pcf857x.c +++ b/drivers/gpio/gpio-pcf857x.c @@ -295,8 +295,8 @@ static int pcf857x_probe(struct i2c_client *client) gpio->chip.owner = THIS_MODULE; gpio->chip.get = pcf857x_get; gpio->chip.get_multiple = pcf857x_get_multiple; - gpio->chip.set_rv = pcf857x_set; - gpio->chip.set_multiple_rv = pcf857x_set_multiple; + gpio->chip.set = pcf857x_set; + gpio->chip.set_multiple = pcf857x_set_multiple; gpio->chip.direction_input = pcf857x_input; gpio->chip.direction_output = pcf857x_output; gpio->chip.ngpio = (uintptr_t)i2c_get_match_data(client); diff --git a/drivers/gpio/gpio-pch.c b/drivers/gpio/gpio-pch.c index c6f313342ba0..9925687e05fb 100644 --- a/drivers/gpio/gpio-pch.c +++ b/drivers/gpio/gpio-pch.c @@ -219,7 +219,7 @@ static void pch_gpio_setup(struct pch_gpio *chip) gpio->direction_input = pch_gpio_direction_input; gpio->get = pch_gpio_get; gpio->direction_output = pch_gpio_direction_output; - gpio->set_rv = pch_gpio_set; + gpio->set = pch_gpio_set; gpio->base = -1; gpio->ngpio = gpio_pins[chip->ioh]; gpio->can_sleep = false; diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c index 98cfac4eac85..02e4ffcf5a6f 100644 --- a/drivers/gpio/gpio-pl061.c +++ b/drivers/gpio/gpio-pl061.c @@ -330,7 +330,7 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id) pl061->gc.direction_input = pl061_direction_input; pl061->gc.direction_output = pl061_direction_output; pl061->gc.get = pl061_get_value; - pl061->gc.set_rv = pl061_set_value; + pl061->gc.set = pl061_set_value; pl061->gc.ngpio = PL061_GPIO_NR; pl061->gc.label = dev_name(dev); pl061->gc.parent = dev; diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index cbcdd416f8b9..fa22f3faa163 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -355,7 +355,7 @@ static int pxa_init_gpio_chip(struct pxa_gpio_chip *pchip, int ngpio, void __iom pchip->chip.direction_input = pxa_gpio_direction_input; pchip->chip.direction_output = pxa_gpio_direction_output; pchip->chip.get = pxa_gpio_get; - pchip->chip.set_rv = pxa_gpio_set; + pchip->chip.set = pxa_gpio_set; pchip->chip.to_irq = pxa_gpio_to_irq; pchip->chip.ngpio = ngpio; pchip->chip.request = gpiochip_generic_request; diff --git a/drivers/gpio/gpio-raspberrypi-exp.c b/drivers/gpio/gpio-raspberrypi-exp.c index b4b607515a04..40413e06b69c 100644 --- a/drivers/gpio/gpio-raspberrypi-exp.c +++ b/drivers/gpio/gpio-raspberrypi-exp.c @@ -232,7 +232,7 @@ static int rpi_exp_gpio_probe(struct platform_device *pdev) rpi_gpio->gc.direction_output = rpi_exp_gpio_dir_out; rpi_gpio->gc.get_direction = rpi_exp_gpio_get_direction; rpi_gpio->gc.get = rpi_exp_gpio_get; - rpi_gpio->gc.set_rv = rpi_exp_gpio_set; + rpi_gpio->gc.set = rpi_exp_gpio_set; rpi_gpio->gc.can_sleep = true; return devm_gpiochip_add_data(dev, &rpi_gpio->gc, rpi_gpio); diff --git a/drivers/gpio/gpio-rc5t583.c b/drivers/gpio/gpio-rc5t583.c index cf3e91d235df..5a69e4534591 100644 --- a/drivers/gpio/gpio-rc5t583.c +++ b/drivers/gpio/gpio-rc5t583.c @@ -118,7 +118,7 @@ static int rc5t583_gpio_probe(struct platform_device *pdev) rc5t583_gpio->gpio_chip.free = rc5t583_gpio_free, rc5t583_gpio->gpio_chip.direction_input = rc5t583_gpio_dir_input, rc5t583_gpio->gpio_chip.direction_output = rc5t583_gpio_dir_output, - rc5t583_gpio->gpio_chip.set_rv = rc5t583_gpio_set, + rc5t583_gpio->gpio_chip.set = rc5t583_gpio_set, rc5t583_gpio->gpio_chip.get = rc5t583_gpio_get, rc5t583_gpio->gpio_chip.to_irq = rc5t583_gpio_to_irq, rc5t583_gpio->gpio_chip.ngpio = RC5T583_MAX_GPIO, diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index cd31580effa9..86777e097fd8 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -535,8 +535,8 @@ static int gpio_rcar_probe(struct platform_device *pdev) gpio_chip->get = gpio_rcar_get; gpio_chip->get_multiple = gpio_rcar_get_multiple; gpio_chip->direction_output = gpio_rcar_direction_output; - gpio_chip->set_rv = gpio_rcar_set; - gpio_chip->set_multiple_rv = gpio_rcar_set_multiple; + gpio_chip->set = gpio_rcar_set; + gpio_chip->set_multiple = gpio_rcar_set_multiple; gpio_chip->label = name; gpio_chip->parent = dev; gpio_chip->owner = THIS_MODULE; diff --git a/drivers/gpio/gpio-rdc321x.c b/drivers/gpio/gpio-rdc321x.c index a75ed8021de5..ba62b81aa8ae 100644 --- a/drivers/gpio/gpio-rdc321x.c +++ b/drivers/gpio/gpio-rdc321x.c @@ -159,7 +159,7 @@ static int rdc321x_gpio_probe(struct platform_device *pdev) rdc321x_gpio_dev->chip.direction_input = rdc_gpio_direction_input; rdc321x_gpio_dev->chip.direction_output = rdc_gpio_config; rdc321x_gpio_dev->chip.get = rdc_gpio_get_value; - rdc321x_gpio_dev->chip.set_rv = rdc_gpio_set_value; + rdc321x_gpio_dev->chip.set = rdc_gpio_set_value; rdc321x_gpio_dev->chip.base = 0; rdc321x_gpio_dev->chip.ngpio = pdata->max_gpios; diff --git a/drivers/gpio/gpio-reg.c b/drivers/gpio/gpio-reg.c index d8da99f97385..f2238196faf1 100644 --- a/drivers/gpio/gpio-reg.c +++ b/drivers/gpio/gpio-reg.c @@ -46,7 +46,7 @@ static int gpio_reg_direction_output(struct gpio_chip *gc, unsigned offset, if (r->direction & BIT(offset)) return -ENOTSUPP; - gc->set_rv(gc, offset, value); + gc->set(gc, offset, value); return 0; } @@ -161,9 +161,9 @@ struct gpio_chip *gpio_reg_init(struct device *dev, void __iomem *reg, r->gc.get_direction = gpio_reg_get_direction; r->gc.direction_input = gpio_reg_direction_input; r->gc.direction_output = gpio_reg_direction_output; - r->gc.set_rv = gpio_reg_set; + r->gc.set = gpio_reg_set; r->gc.get = gpio_reg_get; - r->gc.set_multiple_rv = gpio_reg_set_multiple; + r->gc.set_multiple = gpio_reg_set_multiple; if (irqs) r->gc.to_irq = gpio_reg_to_irq; r->gc.base = base; diff --git a/drivers/gpio/gpio-regmap.c b/drivers/gpio/gpio-regmap.c index 87c4225784cf..e8a32dfebdcb 100644 --- a/drivers/gpio/gpio-regmap.c +++ b/drivers/gpio/gpio-regmap.c @@ -260,9 +260,9 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config chip->free = gpiochip_generic_free; chip->get = gpio_regmap_get; if (gpio->reg_set_base && gpio->reg_clr_base) - chip->set_rv = gpio_regmap_set_with_clear; + chip->set = gpio_regmap_set_with_clear; else if (gpio->reg_set_base) - chip->set_rv = gpio_regmap_set; + chip->set = gpio_regmap_set; chip->get_direction = gpio_regmap_get_direction; if (gpio->reg_dir_in_base || gpio->reg_dir_out_base) { diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index ecd60ff9e1dd..bcfc323a8315 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -327,7 +327,7 @@ static int rockchip_gpio_to_irq(struct gpio_chip *gc, unsigned int offset) static const struct gpio_chip rockchip_gpiolib_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, - .set_rv = rockchip_gpio_set, + .set = rockchip_gpio_set, .get = rockchip_gpio_get, .get_direction = rockchip_gpio_get_direction, .direction_input = rockchip_gpio_direction_input, diff --git a/drivers/gpio/gpio-rtd.c b/drivers/gpio/gpio-rtd.c index 25bbd749b019..d46b40dd5283 100644 --- a/drivers/gpio/gpio-rtd.c +++ b/drivers/gpio/gpio-rtd.c @@ -565,7 +565,7 @@ static int rtd_gpio_probe(struct platform_device *pdev) data->gpio_chip.get_direction = rtd_gpio_get_direction; data->gpio_chip.direction_input = rtd_gpio_direction_input; data->gpio_chip.direction_output = rtd_gpio_direction_output; - data->gpio_chip.set_rv = rtd_gpio_set; + data->gpio_chip.set = rtd_gpio_set; data->gpio_chip.get = rtd_gpio_get; data->gpio_chip.set_config = rtd_gpio_set_config; data->gpio_chip.parent = dev; diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c index e9d054d78ccb..7f6a62f5d1ee 100644 --- a/drivers/gpio/gpio-sa1100.c +++ b/drivers/gpio/gpio-sa1100.c @@ -99,7 +99,7 @@ static struct sa1100_gpio_chip sa1100_gpio_chip = { .get_direction = sa1100_get_direction, .direction_input = sa1100_direction_input, .direction_output = sa1100_direction_output, - .set_rv = sa1100_gpio_set, + .set = sa1100_gpio_set, .get = sa1100_gpio_get, .to_irq = sa1100_to_irq, .base = 0, diff --git a/drivers/gpio/gpio-sama5d2-piobu.c b/drivers/gpio/gpio-sama5d2-piobu.c index c31244cf5e89..5005688f6e67 100644 --- a/drivers/gpio/gpio-sama5d2-piobu.c +++ b/drivers/gpio/gpio-sama5d2-piobu.c @@ -196,7 +196,7 @@ static int sama5d2_piobu_probe(struct platform_device *pdev) piobu->chip.direction_input = sama5d2_piobu_direction_input; piobu->chip.direction_output = sama5d2_piobu_direction_output; piobu->chip.get = sama5d2_piobu_get; - piobu->chip.set_rv = sama5d2_piobu_set; + piobu->chip.set = sama5d2_piobu_set; piobu->chip.base = -1; piobu->chip.ngpio = PIOBU_NUM; piobu->chip.can_sleep = 0; diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c index 833ffdd98d74..966d16a6d515 100644 --- a/drivers/gpio/gpio-sch.c +++ b/drivers/gpio/gpio-sch.c @@ -167,7 +167,7 @@ static const struct gpio_chip sch_gpio_chip = { .direction_input = sch_gpio_direction_in, .get = sch_gpio_get, .direction_output = sch_gpio_direction_out, - .set_rv = sch_gpio_set, + .set = sch_gpio_set, .get_direction = sch_gpio_get_direction, }; diff --git a/drivers/gpio/gpio-sch311x.c b/drivers/gpio/gpio-sch311x.c index 44fb5fc21fb8..f95566998d30 100644 --- a/drivers/gpio/gpio-sch311x.c +++ b/drivers/gpio/gpio-sch311x.c @@ -297,7 +297,7 @@ static int sch311x_gpio_probe(struct platform_device *pdev) block->chip.get_direction = sch311x_gpio_get_direction; block->chip.set_config = sch311x_gpio_set_config; block->chip.get = sch311x_gpio_get; - block->chip.set_rv = sch311x_gpio_set; + block->chip.set = sch311x_gpio_set; block->chip.ngpio = 8; block->chip.parent = &pdev->dev; block->chip.base = sch311x_gpio_blocks[i].base; diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 9503296422fd..050092583f79 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -486,9 +486,9 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev) gc->parent = dev; gc->fwnode = swnode; gc->get = gpio_sim_get; - gc->set_rv = gpio_sim_set; + gc->set = gpio_sim_set; gc->get_multiple = gpio_sim_get_multiple; - gc->set_multiple_rv = gpio_sim_set_multiple; + gc->set_multiple = gpio_sim_set_multiple; gc->direction_output = gpio_sim_direction_output; gc->direction_input = gpio_sim_direction_input; gc->get_direction = gpio_sim_get_direction; diff --git a/drivers/gpio/gpio-siox.c b/drivers/gpio/gpio-siox.c index 95355dda621b..958034b9f3f3 100644 --- a/drivers/gpio/gpio-siox.c +++ b/drivers/gpio/gpio-siox.c @@ -237,7 +237,7 @@ static int gpio_siox_probe(struct siox_device *sdevice) gc->parent = dev; gc->owner = THIS_MODULE; gc->get = gpio_siox_get; - gc->set_rv = gpio_siox_set; + gc->set = gpio_siox_set; gc->direction_input = gpio_siox_direction_input; gc->direction_output = gpio_siox_direction_output; gc->get_direction = gpio_siox_get_direction; diff --git a/drivers/gpio/gpio-spear-spics.c b/drivers/gpio/gpio-spear-spics.c index 55f0e8afa291..96a0e1211500 100644 --- a/drivers/gpio/gpio-spear-spics.c +++ b/drivers/gpio/gpio-spear-spics.c @@ -140,7 +140,7 @@ static int spics_gpio_probe(struct platform_device *pdev) spics->chip.request = spics_request; spics->chip.free = spics_free; spics->chip.direction_output = spics_direction_output; - spics->chip.set_rv = spics_set_value; + spics->chip.set = spics_set_value; spics->chip.label = dev_name(&pdev->dev); spics->chip.parent = &pdev->dev; spics->chip.owner = THIS_MODULE; diff --git a/drivers/gpio/gpio-sprd.c b/drivers/gpio/gpio-sprd.c index bbd5bf51c088..413bcd0a4240 100644 --- a/drivers/gpio/gpio-sprd.c +++ b/drivers/gpio/gpio-sprd.c @@ -245,7 +245,7 @@ static int sprd_gpio_probe(struct platform_device *pdev) sprd_gpio->chip.request = sprd_gpio_request; sprd_gpio->chip.free = sprd_gpio_free; sprd_gpio->chip.get = sprd_gpio_get; - sprd_gpio->chip.set_rv = sprd_gpio_set; + sprd_gpio->chip.set = sprd_gpio_set; sprd_gpio->chip.direction_input = sprd_gpio_direction_input; sprd_gpio->chip.direction_output = sprd_gpio_direction_output; diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index 0a270156e0be..5dd4c21a8e60 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -136,7 +136,7 @@ static const struct gpio_chip template_chip = { .direction_input = stmpe_gpio_direction_input, .get = stmpe_gpio_get, .direction_output = stmpe_gpio_direction_output, - .set_rv = stmpe_gpio_set, + .set = stmpe_gpio_set, .request = stmpe_gpio_request, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-stp-xway.c b/drivers/gpio/gpio-stp-xway.c index fdda8de6ca36..493c027afdd6 100644 --- a/drivers/gpio/gpio-stp-xway.c +++ b/drivers/gpio/gpio-stp-xway.c @@ -249,7 +249,7 @@ static int xway_stp_probe(struct platform_device *pdev) chip->gc.label = "stp-xway"; chip->gc.direction_output = xway_stp_dir_out; chip->gc.get = xway_stp_get; - chip->gc.set_rv = xway_stp_set; + chip->gc.set = xway_stp_set; chip->gc.request = xway_stp_request; chip->gc.base = -1; chip->gc.owner = THIS_MODULE; diff --git a/drivers/gpio/gpio-syscon.c b/drivers/gpio/gpio-syscon.c index f86f78655c24..40064d4cf47f 100644 --- a/drivers/gpio/gpio-syscon.c +++ b/drivers/gpio/gpio-syscon.c @@ -115,7 +115,7 @@ static int syscon_gpio_dir_out(struct gpio_chip *chip, unsigned offset, int val) BIT(offs % SYSCON_REG_BITS)); } - return chip->set_rv(chip, offset, val); + return chip->set(chip, offset, val); } static const struct syscon_gpio_data clps711x_mctrl_gpio = { @@ -251,7 +251,7 @@ static int syscon_gpio_probe(struct platform_device *pdev) if (priv->data->flags & GPIO_SYSCON_FEAT_IN) priv->chip.direction_input = syscon_gpio_dir_in; if (priv->data->flags & GPIO_SYSCON_FEAT_OUT) { - priv->chip.set_rv = priv->data->set ? : syscon_gpio_set; + priv->chip.set = priv->data->set ? : syscon_gpio_set; priv->chip.direction_output = syscon_gpio_dir_out; } diff --git a/drivers/gpio/gpio-tangier.c b/drivers/gpio/gpio-tangier.c index ce17b98e0623..ba5a8ede8912 100644 --- a/drivers/gpio/gpio-tangier.c +++ b/drivers/gpio/gpio-tangier.c @@ -430,7 +430,7 @@ int devm_tng_gpio_probe(struct device *dev, struct tng_gpio *gpio) gpio->chip.direction_input = tng_gpio_direction_input; gpio->chip.direction_output = tng_gpio_direction_output; gpio->chip.get = tng_gpio_get; - gpio->chip.set_rv = tng_gpio_set; + gpio->chip.set = tng_gpio_set; gpio->chip.get_direction = tng_gpio_get_direction; gpio->chip.set_config = tng_gpio_set_config; gpio->chip.base = info->base; diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c index 0bd32809fd68..90d048f9da08 100644 --- a/drivers/gpio/gpio-tc3589x.c +++ b/drivers/gpio/gpio-tc3589x.c @@ -149,7 +149,7 @@ static const struct gpio_chip template_chip = { .label = "tc3589x", .owner = THIS_MODULE, .get = tc3589x_gpio_get, - .set_rv = tc3589x_gpio_set, + .set = tc3589x_gpio_set, .direction_output = tc3589x_gpio_direction_output, .direction_input = tc3589x_gpio_direction_input, .get_direction = tc3589x_gpio_get_direction, diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 126fd12550aa..15a5762a82c2 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -720,7 +720,7 @@ static int tegra_gpio_probe(struct platform_device *pdev) tgi->gc.direction_input = tegra_gpio_direction_input; tgi->gc.get = tegra_gpio_get; tgi->gc.direction_output = tegra_gpio_direction_output; - tgi->gc.set_rv = tegra_gpio_set; + tgi->gc.set = tegra_gpio_set; tgi->gc.get_direction = tegra_gpio_get_direction; tgi->gc.base = 0; tgi->gc.ngpio = tgi->bank_count * 32; diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index f902da15c419..5fd3ec3e2c53 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -891,7 +891,7 @@ static int tegra186_gpio_probe(struct platform_device *pdev) gpio->gpio.direction_input = tegra186_gpio_direction_input; gpio->gpio.direction_output = tegra186_gpio_direction_output; gpio->gpio.get = tegra186_gpio_get; - gpio->gpio.set_rv = tegra186_gpio_set; + gpio->gpio.set = tegra186_gpio_set; gpio->gpio.set_config = tegra186_gpio_set_config; gpio->gpio.add_pin_ranges = tegra186_gpio_add_pin_ranges; gpio->gpio.init_valid_mask = tegra186_init_valid_mask; diff --git a/drivers/gpio/gpio-thunderx.c b/drivers/gpio/gpio-thunderx.c index eb6a1f0279c0..be96853063ba 100644 --- a/drivers/gpio/gpio-thunderx.c +++ b/drivers/gpio/gpio-thunderx.c @@ -533,8 +533,8 @@ static int thunderx_gpio_probe(struct pci_dev *pdev, chip->direction_input = thunderx_gpio_dir_in; chip->get = thunderx_gpio_get; chip->direction_output = thunderx_gpio_dir_out; - chip->set_rv = thunderx_gpio_set; - chip->set_multiple_rv = thunderx_gpio_set_multiple; + chip->set = thunderx_gpio_set; + chip->set_multiple = thunderx_gpio_set_multiple; chip->set_config = thunderx_gpio_set_config; girq = &chip->irq; gpio_irq_chip_set_chip(girq, &thunderx_gpio_irq_chip); diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c index fbb883089189..679e27f00ff6 100644 --- a/drivers/gpio/gpio-timberdale.c +++ b/drivers/gpio/gpio-timberdale.c @@ -253,7 +253,7 @@ static int timbgpio_probe(struct platform_device *pdev) gc->direction_input = timbgpio_gpio_direction_input; gc->get = timbgpio_gpio_get; gc->direction_output = timbgpio_gpio_direction_output; - gc->set_rv = timbgpio_gpio_set; + gc->set = timbgpio_gpio_set; gc->to_irq = (irq >= 0 && tgpio->irq_base > 0) ? timbgpio_to_irq : NULL; gc->dbg_show = NULL; gc->base = pdata->gpio_base; diff --git a/drivers/gpio/gpio-tpic2810.c b/drivers/gpio/gpio-tpic2810.c index d5b8568ab061..866ff2d436d5 100644 --- a/drivers/gpio/gpio-tpic2810.c +++ b/drivers/gpio/gpio-tpic2810.c @@ -80,8 +80,8 @@ static const struct gpio_chip template_chip = { .owner = THIS_MODULE, .get_direction = tpic2810_get_direction, .direction_output = tpic2810_direction_output, - .set_rv = tpic2810_set, - .set_multiple_rv = tpic2810_set_multiple, + .set = tpic2810_set, + .set_multiple = tpic2810_set_multiple, .base = -1, .ngpio = 8, .can_sleep = true, diff --git a/drivers/gpio/gpio-tps65086.c b/drivers/gpio/gpio-tps65086.c index 08fa061b73ef..84b17b83476f 100644 --- a/drivers/gpio/gpio-tps65086.c +++ b/drivers/gpio/gpio-tps65086.c @@ -69,7 +69,7 @@ static const struct gpio_chip template_chip = { .direction_input = tps65086_gpio_direction_input, .direction_output = tps65086_gpio_direction_output, .get = tps65086_gpio_get, - .set_rv = tps65086_gpio_set, + .set = tps65086_gpio_set, .base = -1, .ngpio = 4, .can_sleep = true, diff --git a/drivers/gpio/gpio-tps65218.c b/drivers/gpio/gpio-tps65218.c index 49cd7754ed05..3b4c41f5ef55 100644 --- a/drivers/gpio/gpio-tps65218.c +++ b/drivers/gpio/gpio-tps65218.c @@ -169,7 +169,7 @@ static const struct gpio_chip template_chip = { .request = tps65218_gpio_request, .direction_output = tps65218_gpio_output, .get = tps65218_gpio_get, - .set_rv = tps65218_gpio_set, + .set = tps65218_gpio_set, .set_config = tps65218_gpio_set_config, .can_sleep = true, .ngpio = 3, diff --git a/drivers/gpio/gpio-tps65219.c b/drivers/gpio/gpio-tps65219.c index c0177088c54c..158f63bcf10c 100644 --- a/drivers/gpio/gpio-tps65219.c +++ b/drivers/gpio/gpio-tps65219.c @@ -203,7 +203,7 @@ static const struct gpio_chip tps65214_template_chip = { .direction_input = tps65219_gpio_direction_input, .direction_output = tps65219_gpio_direction_output, .get = tps65219_gpio_get, - .set_rv = tps65219_gpio_set, + .set = tps65219_gpio_set, .base = -1, .ngpio = 2, .can_sleep = true, @@ -216,7 +216,7 @@ static const struct gpio_chip tps65219_template_chip = { .direction_input = tps65219_gpio_direction_input, .direction_output = tps65219_gpio_direction_output, .get = tps65219_gpio_get, - .set_rv = tps65219_gpio_set, + .set = tps65219_gpio_set, .base = -1, .ngpio = 3, .can_sleep = true, diff --git a/drivers/gpio/gpio-tps6586x.c b/drivers/gpio/gpio-tps6586x.c index f1ced092f38a..aaacbb54bf5d 100644 --- a/drivers/gpio/gpio-tps6586x.c +++ b/drivers/gpio/gpio-tps6586x.c @@ -98,7 +98,7 @@ static int tps6586x_gpio_probe(struct platform_device *pdev) /* FIXME: add handling of GPIOs as dedicated inputs */ tps6586x_gpio->gpio_chip.direction_output = tps6586x_gpio_output; - tps6586x_gpio->gpio_chip.set_rv = tps6586x_gpio_set; + tps6586x_gpio->gpio_chip.set = tps6586x_gpio_set; tps6586x_gpio->gpio_chip.get = tps6586x_gpio_get; tps6586x_gpio->gpio_chip.to_irq = tps6586x_gpio_to_irq; diff --git a/drivers/gpio/gpio-tps65910.c b/drivers/gpio/gpio-tps65910.c index 3204f55394cf..25e9f41efe78 100644 --- a/drivers/gpio/gpio-tps65910.c +++ b/drivers/gpio/gpio-tps65910.c @@ -139,7 +139,7 @@ static int tps65910_gpio_probe(struct platform_device *pdev) tps65910_gpio->gpio_chip.can_sleep = true; tps65910_gpio->gpio_chip.direction_input = tps65910_gpio_input; tps65910_gpio->gpio_chip.direction_output = tps65910_gpio_output; - tps65910_gpio->gpio_chip.set_rv = tps65910_gpio_set; + tps65910_gpio->gpio_chip.set = tps65910_gpio_set; tps65910_gpio->gpio_chip.get = tps65910_gpio_get; tps65910_gpio->gpio_chip.parent = &pdev->dev; diff --git a/drivers/gpio/gpio-tps65912.c b/drivers/gpio/gpio-tps65912.c index d586ccfbfc56..7a2c5685c2fd 100644 --- a/drivers/gpio/gpio-tps65912.c +++ b/drivers/gpio/gpio-tps65912.c @@ -92,7 +92,7 @@ static const struct gpio_chip template_chip = { .direction_input = tps65912_gpio_direction_input, .direction_output = tps65912_gpio_direction_output, .get = tps65912_gpio_get, - .set_rv = tps65912_gpio_set, + .set = tps65912_gpio_set, .base = -1, .ngpio = 5, .can_sleep = true, diff --git a/drivers/gpio/gpio-tps68470.c b/drivers/gpio/gpio-tps68470.c index 3b8805c854f7..d4fbdf90e190 100644 --- a/drivers/gpio/gpio-tps68470.c +++ b/drivers/gpio/gpio-tps68470.c @@ -142,7 +142,7 @@ static int tps68470_gpio_probe(struct platform_device *pdev) tps68470_gpio->gc.direction_output = tps68470_gpio_output; tps68470_gpio->gc.get = tps68470_gpio_get; tps68470_gpio->gc.get_direction = tps68470_gpio_get_direction; - tps68470_gpio->gc.set_rv = tps68470_gpio_set; + tps68470_gpio->gc.set = tps68470_gpio_set; tps68470_gpio->gc.can_sleep = true; tps68470_gpio->gc.names = tps68470_names; tps68470_gpio->gc.ngpio = TPS68470_N_GPIO; diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c index 056799ecce6a..27dd09273292 100644 --- a/drivers/gpio/gpio-tqmx86.c +++ b/drivers/gpio/gpio-tqmx86.c @@ -370,7 +370,7 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) chip->direction_output = tqmx86_gpio_direction_output; chip->get_direction = tqmx86_gpio_get_direction; chip->get = tqmx86_gpio_get; - chip->set_rv = tqmx86_gpio_set; + chip->set = tqmx86_gpio_set; chip->ngpio = TQMX86_NGPIO; chip->parent = pdev->dev.parent; diff --git a/drivers/gpio/gpio-ts4900.c b/drivers/gpio/gpio-ts4900.c index 35dd2d09b4d4..d9ee8fc77ccd 100644 --- a/drivers/gpio/gpio-ts4900.c +++ b/drivers/gpio/gpio-ts4900.c @@ -119,7 +119,7 @@ static const struct gpio_chip template_chip = { .direction_input = ts4900_gpio_direction_input, .direction_output = ts4900_gpio_direction_output, .get = ts4900_gpio_get, - .set_rv = ts4900_gpio_set, + .set = ts4900_gpio_set, .base = -1, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-ts5500.c b/drivers/gpio/gpio-ts5500.c index bb432ed73698..3c7f2efe10fd 100644 --- a/drivers/gpio/gpio-ts5500.c +++ b/drivers/gpio/gpio-ts5500.c @@ -340,7 +340,7 @@ static int ts5500_dio_probe(struct platform_device *pdev) priv->gpio_chip.direction_input = ts5500_gpio_input; priv->gpio_chip.direction_output = ts5500_gpio_output; priv->gpio_chip.get = ts5500_gpio_get; - priv->gpio_chip.set_rv = ts5500_gpio_set; + priv->gpio_chip.set = ts5500_gpio_set; priv->gpio_chip.to_irq = ts5500_gpio_to_irq; priv->gpio_chip.base = -1; diff --git a/drivers/gpio/gpio-twl4030.c b/drivers/gpio/gpio-twl4030.c index e39e39e3ef85..a33dc7c7e7a0 100644 --- a/drivers/gpio/gpio-twl4030.c +++ b/drivers/gpio/gpio-twl4030.c @@ -419,7 +419,7 @@ static const struct gpio_chip template_chip = { .direction_output = twl_direction_out, .get_direction = twl_get_direction, .get = twl_get, - .set_rv = twl_set, + .set = twl_set, .to_irq = twl_to_irq, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-twl6040.c b/drivers/gpio/gpio-twl6040.c index b2196b62b528..4ec9bcd40439 100644 --- a/drivers/gpio/gpio-twl6040.c +++ b/drivers/gpio/gpio-twl6040.c @@ -69,7 +69,7 @@ static struct gpio_chip twl6040gpo_chip = { .get = twl6040gpo_get, .direction_output = twl6040gpo_direction_out, .get_direction = twl6040gpo_get_direction, - .set_rv = twl6040gpo_set, + .set = twl6040gpo_set, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c index 8939556f42b6..197bb1d22b3c 100644 --- a/drivers/gpio/gpio-uniphier.c +++ b/drivers/gpio/gpio-uniphier.c @@ -386,8 +386,8 @@ static int uniphier_gpio_probe(struct platform_device *pdev) chip->direction_input = uniphier_gpio_direction_input; chip->direction_output = uniphier_gpio_direction_output; chip->get = uniphier_gpio_get; - chip->set_rv = uniphier_gpio_set; - chip->set_multiple_rv = uniphier_gpio_set_multiple; + chip->set = uniphier_gpio_set; + chip->set_multiple = uniphier_gpio_set_multiple; chip->to_irq = uniphier_gpio_to_irq; chip->base = -1; chip->ngpio = ngpios; diff --git a/drivers/gpio/gpio-viperboard.c b/drivers/gpio/gpio-viperboard.c index e8e906b54d51..15e495c109d2 100644 --- a/drivers/gpio/gpio-viperboard.c +++ b/drivers/gpio/gpio-viperboard.c @@ -408,7 +408,7 @@ static int vprbrd_gpio_probe(struct platform_device *pdev) vb_gpio->gpioa.base = -1; vb_gpio->gpioa.ngpio = 16; vb_gpio->gpioa.can_sleep = true; - vb_gpio->gpioa.set_rv = vprbrd_gpioa_set; + vb_gpio->gpioa.set = vprbrd_gpioa_set; vb_gpio->gpioa.get = vprbrd_gpioa_get; vb_gpio->gpioa.direction_input = vprbrd_gpioa_direction_input; vb_gpio->gpioa.direction_output = vprbrd_gpioa_direction_output; @@ -424,7 +424,7 @@ static int vprbrd_gpio_probe(struct platform_device *pdev) vb_gpio->gpiob.base = -1; vb_gpio->gpiob.ngpio = 16; vb_gpio->gpiob.can_sleep = true; - vb_gpio->gpiob.set_rv = vprbrd_gpiob_set; + vb_gpio->gpiob.set = vprbrd_gpiob_set; vb_gpio->gpiob.get = vprbrd_gpiob_get; vb_gpio->gpiob.direction_input = vprbrd_gpiob_direction_input; vb_gpio->gpiob.direction_output = vprbrd_gpiob_direction_output; diff --git a/drivers/gpio/gpio-virtio.c b/drivers/gpio/gpio-virtio.c index 07552611da98..17e040991e46 100644 --- a/drivers/gpio/gpio-virtio.c +++ b/drivers/gpio/gpio-virtio.c @@ -567,7 +567,7 @@ static int virtio_gpio_probe(struct virtio_device *vdev) vgpio->gc.direction_input = virtio_gpio_direction_input; vgpio->gc.direction_output = virtio_gpio_direction_output; vgpio->gc.get = virtio_gpio_get; - vgpio->gc.set_rv = virtio_gpio_set; + vgpio->gc.set = virtio_gpio_set; vgpio->gc.ngpio = ngpio; vgpio->gc.base = -1; /* Allocate base dynamically */ vgpio->gc.label = dev_name(dev); diff --git a/drivers/gpio/gpio-vx855.c b/drivers/gpio/gpio-vx855.c index a3bceac7854c..84b3a973a503 100644 --- a/drivers/gpio/gpio-vx855.c +++ b/drivers/gpio/gpio-vx855.c @@ -216,7 +216,7 @@ static void vx855gpio_gpio_setup(struct vx855_gpio *vg) c->direction_input = vx855gpio_direction_input; c->direction_output = vx855gpio_direction_output; c->get = vx855gpio_get; - c->set_rv = vx855gpio_set; + c->set = vx855gpio_set; c->set_config = vx855gpio_set_config; c->dbg_show = NULL; c->base = 0; diff --git a/drivers/gpio/gpio-wcd934x.c b/drivers/gpio/gpio-wcd934x.c index c89da9a22016..4af504c23e6f 100644 --- a/drivers/gpio/gpio-wcd934x.c +++ b/drivers/gpio/gpio-wcd934x.c @@ -98,7 +98,7 @@ static int wcd_gpio_probe(struct platform_device *pdev) chip->direction_output = wcd_gpio_direction_output; chip->get_direction = wcd_gpio_get_direction; chip->get = wcd_gpio_get; - chip->set_rv = wcd_gpio_set; + chip->set = wcd_gpio_set; chip->parent = dev; chip->base = -1; chip->ngpio = WCD934X_NPINS; diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c index f7df3d5fc71c..4a5e20e936a9 100644 --- a/drivers/gpio/gpio-wcove.c +++ b/drivers/gpio/gpio-wcove.c @@ -439,7 +439,7 @@ static int wcove_gpio_probe(struct platform_device *pdev) wg->chip.direction_output = wcove_gpio_dir_out; wg->chip.get_direction = wcove_gpio_get_direction; wg->chip.get = wcove_gpio_get; - wg->chip.set_rv = wcove_gpio_set; + wg->chip.set = wcove_gpio_set; wg->chip.set_config = wcove_gpio_set_config; wg->chip.base = -1; wg->chip.ngpio = WCOVE_VGPIO_NUM; diff --git a/drivers/gpio/gpio-winbond.c b/drivers/gpio/gpio-winbond.c index 421655b5d4c2..dcfda738fd69 100644 --- a/drivers/gpio/gpio-winbond.c +++ b/drivers/gpio/gpio-winbond.c @@ -494,7 +494,7 @@ static struct gpio_chip winbond_gpio_chip = { .can_sleep = true, .get = winbond_gpio_get, .direction_input = winbond_gpio_direction_in, - .set_rv = winbond_gpio_set, + .set = winbond_gpio_set, .direction_output = winbond_gpio_direction_out, }; diff --git a/drivers/gpio/gpio-wm831x.c b/drivers/gpio/gpio-wm831x.c index ab58aa7c0b99..f03c0e808fab 100644 --- a/drivers/gpio/gpio-wm831x.c +++ b/drivers/gpio/gpio-wm831x.c @@ -253,7 +253,7 @@ static const struct gpio_chip template_chip = { .direction_input = wm831x_gpio_direction_in, .get = wm831x_gpio_get, .direction_output = wm831x_gpio_direction_out, - .set_rv = wm831x_gpio_set, + .set = wm831x_gpio_set, .to_irq = wm831x_gpio_to_irq, .set_config = wm831x_set_config, .dbg_show = wm831x_gpio_dbg_show, diff --git a/drivers/gpio/gpio-wm8350.c b/drivers/gpio/gpio-wm8350.c index 9a7677f841fc..46923b23a72e 100644 --- a/drivers/gpio/gpio-wm8350.c +++ b/drivers/gpio/gpio-wm8350.c @@ -93,7 +93,7 @@ static const struct gpio_chip template_chip = { .direction_input = wm8350_gpio_direction_in, .get = wm8350_gpio_get, .direction_output = wm8350_gpio_direction_out, - .set_rv = wm8350_gpio_set, + .set = wm8350_gpio_set, .to_irq = wm8350_gpio_to_irq, .can_sleep = true, }; diff --git a/drivers/gpio/gpio-wm8994.c b/drivers/gpio/gpio-wm8994.c index ccc005628dd2..df47a27f508d 100644 --- a/drivers/gpio/gpio-wm8994.c +++ b/drivers/gpio/gpio-wm8994.c @@ -256,7 +256,7 @@ static const struct gpio_chip template_chip = { .direction_input = wm8994_gpio_direction_in, .get = wm8994_gpio_get, .direction_output = wm8994_gpio_direction_out, - .set_rv = wm8994_gpio_set, + .set = wm8994_gpio_set, .set_config = wm8994_gpio_set_config, .to_irq = wm8994_gpio_to_irq, .dbg_show = wm8994_gpio_dbg_show, diff --git a/drivers/gpio/gpio-xgene.c b/drivers/gpio/gpio-xgene.c index 28f794e5eb26..4f627de3f56c 100644 --- a/drivers/gpio/gpio-xgene.c +++ b/drivers/gpio/gpio-xgene.c @@ -178,7 +178,7 @@ static int xgene_gpio_probe(struct platform_device *pdev) gpio->chip.direction_input = xgene_gpio_dir_in; gpio->chip.direction_output = xgene_gpio_dir_out; gpio->chip.get = xgene_gpio_get; - gpio->chip.set_rv = xgene_gpio_set; + gpio->chip.set = xgene_gpio_set; gpio->chip.label = dev_name(&pdev->dev); gpio->chip.base = -1; diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c index 36d91cacc2d9..83675ac81077 100644 --- a/drivers/gpio/gpio-xilinx.c +++ b/drivers/gpio/gpio-xilinx.c @@ -604,10 +604,10 @@ static int xgpio_probe(struct platform_device *pdev) chip->gc.direction_input = xgpio_dir_in; chip->gc.direction_output = xgpio_dir_out; chip->gc.get = xgpio_get; - chip->gc.set_rv = xgpio_set; + chip->gc.set = xgpio_set; chip->gc.request = xgpio_request; chip->gc.free = xgpio_free; - chip->gc.set_multiple_rv = xgpio_set_multiple; + chip->gc.set_multiple = xgpio_set_multiple; chip->gc.label = dev_name(dev); diff --git a/drivers/gpio/gpio-xlp.c b/drivers/gpio/gpio-xlp.c index bcd2dfec462d..aede6324387f 100644 --- a/drivers/gpio/gpio-xlp.c +++ b/drivers/gpio/gpio-xlp.c @@ -274,7 +274,7 @@ static int xlp_gpio_probe(struct platform_device *pdev) gc->ngpio = 70; gc->direction_output = xlp_gpio_dir_output; gc->direction_input = xlp_gpio_dir_input; - gc->set_rv = xlp_gpio_set; + gc->set = xlp_gpio_set; gc->get = xlp_gpio_get; spin_lock_init(&priv->lock); diff --git a/drivers/gpio/gpio-xra1403.c b/drivers/gpio/gpio-xra1403.c index 70402c6b5407..faadcb4b0b2d 100644 --- a/drivers/gpio/gpio-xra1403.c +++ b/drivers/gpio/gpio-xra1403.c @@ -164,7 +164,7 @@ static int xra1403_probe(struct spi_device *spi) xra->chip.direction_output = xra1403_direction_output; xra->chip.get_direction = xra1403_get_direction; xra->chip.get = xra1403_get; - xra->chip.set_rv = xra1403_set; + xra->chip.set = xra1403_set; xra->chip.dbg_show = xra1403_dbg_show; diff --git a/drivers/gpio/gpio-xtensa.c b/drivers/gpio/gpio-xtensa.c index e7ff3c60324d..4418947a10e5 100644 --- a/drivers/gpio/gpio-xtensa.c +++ b/drivers/gpio/gpio-xtensa.c @@ -132,7 +132,7 @@ static struct gpio_chip expstate_chip = { .ngpio = 32, .get_direction = xtensa_expstate_get_direction, .get = xtensa_expstate_get_value, - .set_rv = xtensa_expstate_set_value, + .set = xtensa_expstate_set_value, }; static int xtensa_gpio_probe(struct platform_device *pdev) diff --git a/drivers/gpio/gpio-zevio.c b/drivers/gpio/gpio-zevio.c index 0799f7976710..29375bea2289 100644 --- a/drivers/gpio/gpio-zevio.c +++ b/drivers/gpio/gpio-zevio.c @@ -161,7 +161,7 @@ static int zevio_gpio_to_irq(struct gpio_chip *chip, unsigned pin) static const struct gpio_chip zevio_gpio_chip = { .direction_input = zevio_gpio_direction_input, .direction_output = zevio_gpio_direction_output, - .set_rv = zevio_gpio_set, + .set = zevio_gpio_set, .get = zevio_gpio_get, .to_irq = zevio_gpio_to_irq, .base = 0, diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c index b22b4e25c68d..0ffd76e8951f 100644 --- a/drivers/gpio/gpio-zynq.c +++ b/drivers/gpio/gpio-zynq.c @@ -932,7 +932,7 @@ static int zynq_gpio_probe(struct platform_device *pdev) chip->owner = THIS_MODULE; chip->parent = &pdev->dev; chip->get = zynq_gpio_get_value; - chip->set_rv = zynq_gpio_set_value; + chip->set = zynq_gpio_set_value; chip->request = zynq_gpio_request; chip->free = zynq_gpio_free; chip->direction_input = zynq_gpio_dir_in; diff --git a/drivers/gpio/gpio-zynqmp-modepin.c b/drivers/gpio/gpio-zynqmp-modepin.c index 6dc5d7acb89c..5e651482e985 100644 --- a/drivers/gpio/gpio-zynqmp-modepin.c +++ b/drivers/gpio/gpio-zynqmp-modepin.c @@ -130,7 +130,7 @@ static int modepin_gpio_probe(struct platform_device *pdev) chip->owner = THIS_MODULE; chip->parent = &pdev->dev; chip->get = modepin_gpio_get_value; - chip->set_rv = modepin_gpio_set_value; + chip->set = modepin_gpio_set_value; chip->direction_input = modepin_gpio_dir_in; chip->direction_output = modepin_gpio_dir_out; chip->label = dev_name(&pdev->dev); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 9ac4c23d656a..0d2b470a252e 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2886,10 +2886,10 @@ static int gpiochip_set(struct gpio_chip *gc, unsigned int offset, int value) lockdep_assert_held(&gc->gpiodev->srcu); - if (WARN_ON(unlikely(!gc->set_rv))) + if (WARN_ON(unlikely(!gc->set))) return -EOPNOTSUPP; - ret = gc->set_rv(gc, offset, value); + ret = gc->set(gc, offset, value); if (ret > 0) ret = -EBADE; @@ -2909,7 +2909,7 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) * output-only, but if there is then not even a .set() operation it * is pretty tricky to drive the output line. */ - if (!guard.gc->set_rv && !guard.gc->direction_output) { + if (!guard.gc->set && !guard.gc->direction_output) { gpiod_warn(desc, "%s: missing set() and direction_output() operations\n", __func__); @@ -3655,8 +3655,8 @@ static int gpiochip_set_multiple(struct gpio_chip *gc, lockdep_assert_held(&gc->gpiodev->srcu); - if (gc->set_multiple_rv) { - ret = gc->set_multiple_rv(gc, mask, bits); + if (gc->set_multiple) { + ret = gc->set_multiple(gc, mask, bits); if (ret > 0) ret = -EBADE; diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index e3a8c0c0c945..464390372b34 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -1836,7 +1836,7 @@ static int ti_sn_gpio_probe(struct auxiliary_device *adev, pdata->gchip.direction_input = ti_sn_bridge_gpio_direction_input; pdata->gchip.direction_output = ti_sn_bridge_gpio_direction_output; pdata->gchip.get = ti_sn_bridge_gpio_get; - pdata->gchip.set_rv = ti_sn_bridge_gpio_set; + pdata->gchip.set = ti_sn_bridge_gpio_set; pdata->gchip.can_sleep = true; pdata->gchip.names = ti_sn_bridge_gpio_names; pdata->gchip.ngpio = SN_NUM_GPIOS; diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 234fa82eab07..482f62a78c41 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -1288,7 +1288,7 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id) dev->gc.label = "cp2112_gpio"; dev->gc.direction_input = cp2112_gpio_direction_input; dev->gc.direction_output = cp2112_gpio_direction_output; - dev->gc.set_rv = cp2112_gpio_set; + dev->gc.set = cp2112_gpio_set; dev->gc.get = cp2112_gpio_get; dev->gc.base = -1; dev->gc.ngpio = CP2112_GPIO_MAX_GPIO; diff --git a/drivers/hid/hid-mcp2200.c b/drivers/hid/hid-mcp2200.c index e6ea0a2140eb..dafdd5b4a079 100644 --- a/drivers/hid/hid-mcp2200.c +++ b/drivers/hid/hid-mcp2200.c @@ -279,8 +279,8 @@ static const struct gpio_chip template_chip = { .get_direction = mcp_get_direction, .direction_input = mcp_direction_input, .direction_output = mcp_direction_output, - .set_rv = mcp_set, - .set_multiple_rv = mcp_set_multiple, + .set = mcp_set, + .set_multiple = mcp_set_multiple, .get = mcp_get, .get_multiple = mcp_get_multiple, .base = -1, diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index fcfe9370a887..475ac352df30 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -1298,7 +1298,7 @@ static int mcp2221_probe(struct hid_device *hdev, mcp->gc->direction_input = mcp_gpio_direction_input; mcp->gc->direction_output = mcp_gpio_direction_output; mcp->gc->get_direction = mcp_gpio_get_direction; - mcp->gc->set_rv = mcp_gpio_set; + mcp->gc->set = mcp_gpio_set; mcp->gc->get = mcp_gpio_get; mcp->gc->ngpio = MCP_NGPIO; mcp->gc->base = -1; diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index a07e2eb93c71..1fcd320d6161 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c @@ -339,8 +339,8 @@ static int ltc2992_config_gpio(struct ltc2992_state *st) st->gc.ngpio = ARRAY_SIZE(st->gpio_names); st->gc.get = ltc2992_gpio_get; st->gc.get_multiple = ltc2992_gpio_get_multiple; - st->gc.set_rv = ltc2992_gpio_set; - st->gc.set_multiple_rv = ltc2992_gpio_set_multiple; + st->gc.set = ltc2992_gpio_set; + st->gc.set_multiple = ltc2992_gpio_set_multiple; ret = devm_gpiochip_add_data(&st->client->dev, &st->gc, st); if (ret) diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c index 52d4000902d5..55e7af3a5f98 100644 --- a/drivers/hwmon/pmbus/ucd9000.c +++ b/drivers/hwmon/pmbus/ucd9000.c @@ -364,7 +364,7 @@ static void ucd9000_probe_gpio(struct i2c_client *client, data->gpio.direction_input = ucd9000_gpio_direction_input; data->gpio.direction_output = ucd9000_gpio_direction_output; data->gpio.get = ucd9000_gpio_get; - data->gpio.set_rv = ucd9000_gpio_set; + data->gpio.set = ucd9000_gpio_set; data->gpio.can_sleep = true; data->gpio.base = -1; data->gpio.parent = &client->dev; diff --git a/drivers/i2c/muxes/i2c-mux-ltc4306.c b/drivers/i2c/muxes/i2c-mux-ltc4306.c index c688af270a11..50fbc0d06e62 100644 --- a/drivers/i2c/muxes/i2c-mux-ltc4306.c +++ b/drivers/i2c/muxes/i2c-mux-ltc4306.c @@ -164,7 +164,7 @@ static int ltc4306_gpio_init(struct ltc4306 *data) data->gpiochip.direction_input = ltc4306_gpio_direction_input; data->gpiochip.direction_output = ltc4306_gpio_direction_output; data->gpiochip.get = ltc4306_gpio_get; - data->gpiochip.set_rv = ltc4306_gpio_set; + data->gpiochip.set = ltc4306_gpio_set; data->gpiochip.set_config = ltc4306_gpio_set_config; data->gpiochip.owner = THIS_MODULE; diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index 6cf790ff3eb5..dcdb5778f7d6 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -2064,7 +2064,7 @@ static int ad4130_probe(struct spi_device *spi) st->gc.can_sleep = true; st->gc.init_valid_mask = ad4130_gpio_init_valid_mask; st->gc.get_direction = ad4130_gpio_get_direction; - st->gc.set_rv = ad4130_gpio_set; + st->gc.set = ad4130_gpio_set; ret = devm_gpiochip_add_data(dev, &st->gc, st); if (ret) diff --git a/drivers/iio/adc/ad4170-4.c b/drivers/iio/adc/ad4170-4.c index 6cd84d6fb08b..efaed92191f1 100644 --- a/drivers/iio/adc/ad4170-4.c +++ b/drivers/iio/adc/ad4170-4.c @@ -1807,7 +1807,7 @@ static int ad4170_gpio_init(struct iio_dev *indio_dev) st->gpiochip.direction_input = ad4170_gpio_direction_input; st->gpiochip.direction_output = ad4170_gpio_direction_output; st->gpiochip.get = ad4170_gpio_get; - st->gpiochip.set_rv = ad4170_gpio_set; + st->gpiochip.set = ad4170_gpio_set; st->gpiochip.owner = THIS_MODULE; return devm_gpiochip_add_data(&st->spi->dev, &st->gpiochip, indio_dev); diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index a2e061f0cb08..ca8fa91796ca 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -673,7 +673,7 @@ static int ad7768_gpio_init(struct iio_dev *indio_dev) .direction_input = ad7768_gpio_direction_input, .direction_output = ad7768_gpio_direction_output, .get = ad7768_gpio_get, - .set_rv = ad7768_gpio_set, + .set = ad7768_gpio_set, .owner = THIS_MODULE, }; diff --git a/drivers/iio/adc/rohm-bd79124.c b/drivers/iio/adc/rohm-bd79124.c index bb7c93ae4055..06c55c8da93f 100644 --- a/drivers/iio/adc/rohm-bd79124.c +++ b/drivers/iio/adc/rohm-bd79124.c @@ -246,8 +246,8 @@ static int bd79124_init_valid_mask(struct gpio_chip *gc, static const struct gpio_chip bd79124gpo_chip = { .label = "bd79124-gpo", .get_direction = bd79124gpo_direction_get, - .set_rv = bd79124gpo_set, - .set_multiple_rv = bd79124gpo_set_multiple, + .set = bd79124gpo_set, + .set_multiple = bd79124gpo_set_multiple, .init_valid_mask = bd79124_init_valid_mask, .can_sleep = true, .ngpio = 8, diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c index 0356ccf23fea..bbe1ce577789 100644 --- a/drivers/iio/adc/ti-ads7950.c +++ b/drivers/iio/adc/ti-ads7950.c @@ -648,7 +648,7 @@ static int ti_ads7950_probe(struct spi_device *spi) st->chip.direction_input = ti_ads7950_direction_input; st->chip.direction_output = ti_ads7950_direction_output; st->chip.get = ti_ads7950_get; - st->chip.set_rv = ti_ads7950_set; + st->chip.set = ti_ads7950_set; ret = gpiochip_add_data(&st->chip, st); if (ret) { diff --git a/drivers/iio/addac/ad74115.c b/drivers/iio/addac/ad74115.c index 4d8b64048e4f..f8b04d86b01f 100644 --- a/drivers/iio/addac/ad74115.c +++ b/drivers/iio/addac/ad74115.c @@ -1577,7 +1577,7 @@ static int ad74115_setup_gpio_chip(struct ad74115_state *st) .direction_input = ad74115_gpio_direction_input, .direction_output = ad74115_gpio_direction_output, .get = ad74115_gpio_get, - .set_rv = ad74115_gpio_set, + .set = ad74115_gpio_set, }; return devm_gpiochip_add_data(dev, &st->gc, st); diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c index a0bb1dbcb7ad..a20b4d48c5f7 100644 --- a/drivers/iio/addac/ad74413r.c +++ b/drivers/iio/addac/ad74413r.c @@ -1425,8 +1425,8 @@ static int ad74413r_probe(struct spi_device *spi) st->gpo_gpiochip.ngpio = st->num_gpo_gpios; st->gpo_gpiochip.parent = st->dev; st->gpo_gpiochip.can_sleep = true; - st->gpo_gpiochip.set_rv = ad74413r_gpio_set; - st->gpo_gpiochip.set_multiple_rv = ad74413r_gpio_set_multiple; + st->gpo_gpiochip.set = ad74413r_gpio_set; + st->gpo_gpiochip.set_multiple = ad74413r_gpio_set_multiple; st->gpo_gpiochip.set_config = ad74413r_gpio_set_gpo_config; st->gpo_gpiochip.get_direction = ad74413r_gpio_get_gpo_direction; diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c index 5f2cd51723f6..4720733d66b2 100644 --- a/drivers/iio/dac/ad5592r-base.c +++ b/drivers/iio/dac/ad5592r-base.c @@ -129,7 +129,7 @@ static int ad5592r_gpio_init(struct ad5592r_state *st) st->gpiochip.direction_input = ad5592r_gpio_direction_input; st->gpiochip.direction_output = ad5592r_gpio_direction_output; st->gpiochip.get = ad5592r_gpio_get; - st->gpiochip.set_rv = ad5592r_gpio_set; + st->gpiochip.set = ad5592r_gpio_set; st->gpiochip.request = ad5592r_gpio_request; st->gpiochip.owner = THIS_MODULE; st->gpiochip.names = ad5592r_gpio_names; diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c index 2b2aca08423a..414fbef4abf9 100644 --- a/drivers/input/keyboard/adp5588-keys.c +++ b/drivers/input/keyboard/adp5588-keys.c @@ -425,7 +425,7 @@ static int adp5588_gpio_add(struct adp5588_kpad *kpad) kpad->gc.direction_input = adp5588_gpio_direction_input; kpad->gc.direction_output = adp5588_gpio_direction_output; kpad->gc.get = adp5588_gpio_get_value; - kpad->gc.set_rv = adp5588_gpio_set_value; + kpad->gc.set = adp5588_gpio_set_value; kpad->gc.set_config = adp5588_gpio_set_config; kpad->gc.can_sleep = 1; diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c index d2a3a5e016b6..8b4f3e3660b8 100644 --- a/drivers/input/touchscreen/ad7879.c +++ b/drivers/input/touchscreen/ad7879.c @@ -475,7 +475,7 @@ static int ad7879_gpio_add(struct ad7879 *ts) ts->gc.direction_input = ad7879_gpio_direction_input; ts->gc.direction_output = ad7879_gpio_direction_output; ts->gc.get = ad7879_gpio_get_value; - ts->gc.set_rv = ad7879_gpio_set_value; + ts->gc.set = ad7879_gpio_set_value; ts->gc.can_sleep = 1; ts->gc.base = -1; ts->gc.ngpio = 1; diff --git a/drivers/leds/blink/leds-lgm-sso.c b/drivers/leds/blink/leds-lgm-sso.c index c9027f9c4bb7..8923d2df4704 100644 --- a/drivers/leds/blink/leds-lgm-sso.c +++ b/drivers/leds/blink/leds-lgm-sso.c @@ -471,7 +471,7 @@ static int sso_gpio_gc_init(struct device *dev, struct sso_led_priv *priv) gc->get_direction = sso_gpio_get_dir; gc->direction_output = sso_gpio_dir_out; gc->get = sso_gpio_get; - gc->set_rv = sso_gpio_set; + gc->set = sso_gpio_set; gc->label = "lgm-sso"; gc->base = -1; diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c index 7d4c071a6cd0..0344189bb991 100644 --- a/drivers/leds/leds-pca9532.c +++ b/drivers/leds/leds-pca9532.c @@ -473,7 +473,7 @@ static int pca9532_configure(struct i2c_client *client, data->gpio.label = "gpio-pca9532"; data->gpio.direction_input = pca9532_gpio_direction_input; data->gpio.direction_output = pca9532_gpio_direction_output; - data->gpio.set_rv = pca9532_gpio_set_value; + data->gpio.set = pca9532_gpio_set_value; data->gpio.get = pca9532_gpio_get_value; data->gpio.request = pca9532_gpio_request_pin; data->gpio.can_sleep = 1; diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c index 70d109246088..2007fe6217ec 100644 --- a/drivers/leds/leds-pca955x.c +++ b/drivers/leds/leds-pca955x.c @@ -737,7 +737,7 @@ static int pca955x_probe(struct i2c_client *client) pca955x->gpio.label = "gpio-pca955x"; pca955x->gpio.direction_input = pca955x_gpio_direction_input; pca955x->gpio.direction_output = pca955x_gpio_direction_output; - pca955x->gpio.set_rv = pca955x_gpio_set_value; + pca955x->gpio.set = pca955x_gpio_set_value; pca955x->gpio.get = pca955x_gpio_get_value; pca955x->gpio.request = pca955x_gpio_request_pin; pca955x->gpio.free = pca955x_gpio_free_pin; diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c index 89c165c8ee9c..fd0e8bab9a4b 100644 --- a/drivers/leds/leds-tca6507.c +++ b/drivers/leds/leds-tca6507.c @@ -637,7 +637,7 @@ static int tca6507_probe_gpios(struct device *dev, tca->gpio.base = -1; tca->gpio.owner = THIS_MODULE; tca->gpio.direction_output = tca6507_gpio_direction_output; - tca->gpio.set_rv = tca6507_gpio_set_value; + tca->gpio.set = tca6507_gpio_set_value; tca->gpio.parent = dev; err = devm_gpiochip_add_data(dev, &tca->gpio, tca); if (err) { diff --git a/drivers/media/dvb-frontends/cxd2820r_core.c b/drivers/media/dvb-frontends/cxd2820r_core.c index a31a8a6a4946..5aa3d45a691a 100644 --- a/drivers/media/dvb-frontends/cxd2820r_core.c +++ b/drivers/media/dvb-frontends/cxd2820r_core.c @@ -651,7 +651,7 @@ static int cxd2820r_probe(struct i2c_client *client) priv->gpio_chip.parent = &client->dev; priv->gpio_chip.owner = THIS_MODULE; priv->gpio_chip.direction_output = cxd2820r_gpio_direction_output; - priv->gpio_chip.set_rv = cxd2820r_gpio_set; + priv->gpio_chip.set = cxd2820r_gpio_set; priv->gpio_chip.get = cxd2820r_gpio_get; priv->gpio_chip.base = -1; /* Dynamic allocation */ priv->gpio_chip.ngpio = GPIO_COUNT; diff --git a/drivers/media/i2c/ds90ub913.c b/drivers/media/i2c/ds90ub913.c index bc74499b0a96..a80da2b4a8fa 100644 --- a/drivers/media/i2c/ds90ub913.c +++ b/drivers/media/i2c/ds90ub913.c @@ -235,7 +235,7 @@ static int ub913_gpiochip_probe(struct ub913_data *priv) gc->ngpio = UB913_NUM_GPIOS; gc->get_direction = ub913_gpio_get_direction; gc->direction_output = ub913_gpio_direction_out; - gc->set_rv = ub913_gpio_set; + gc->set = ub913_gpio_set; gc->of_xlate = ub913_gpio_of_xlate; gc->of_gpio_n_cells = 2; diff --git a/drivers/media/i2c/ds90ub953.c b/drivers/media/i2c/ds90ub953.c index a865bfc89500..e3fc9d66970a 100644 --- a/drivers/media/i2c/ds90ub953.c +++ b/drivers/media/i2c/ds90ub953.c @@ -361,7 +361,7 @@ static int ub953_gpiochip_probe(struct ub953_data *priv) gc->direction_input = ub953_gpio_direction_in; gc->direction_output = ub953_gpio_direction_out; gc->get = ub953_gpio_get; - gc->set_rv = ub953_gpio_set; + gc->set = ub953_gpio_set; gc->of_xlate = ub953_gpio_of_xlate; gc->of_gpio_n_cells = 2; diff --git a/drivers/media/i2c/max9286.c b/drivers/media/i2c/max9286.c index 1d0b5f56f989..7c0961688d61 100644 --- a/drivers/media/i2c/max9286.c +++ b/drivers/media/i2c/max9286.c @@ -1220,7 +1220,7 @@ static int max9286_register_gpio(struct max9286_priv *priv) gpio->owner = THIS_MODULE; gpio->ngpio = 2; gpio->base = -1; - gpio->set_rv = max9286_gpiochip_set; + gpio->set = max9286_gpiochip_set; gpio->get = max9286_gpiochip_get; gpio->can_sleep = true; diff --git a/drivers/media/i2c/max96717.c b/drivers/media/i2c/max96717.c index 015e42fbe246..c8ae7890d9fa 100644 --- a/drivers/media/i2c/max96717.c +++ b/drivers/media/i2c/max96717.c @@ -355,7 +355,7 @@ static int max96717_gpiochip_probe(struct max96717_priv *priv) gc->get_direction = max96717_gpio_get_direction; gc->direction_input = max96717_gpio_direction_in; gc->direction_output = max96717_gpio_direction_out; - gc->set_rv = max96717_gpiochip_set; + gc->set = max96717_gpiochip_set; gc->get = max96717_gpiochip_get; /* Disable GPIO forwarding */ diff --git a/drivers/media/pci/solo6x10/solo6x10-gpio.c b/drivers/media/pci/solo6x10/solo6x10-gpio.c index b16a8453a62a..71848741c55c 100644 --- a/drivers/media/pci/solo6x10/solo6x10-gpio.c +++ b/drivers/media/pci/solo6x10/solo6x10-gpio.c @@ -158,7 +158,7 @@ int solo_gpio_init(struct solo_dev *solo_dev) solo_dev->gpio_dev.get_direction = solo_gpiochip_get_direction; solo_dev->gpio_dev.get = solo_gpiochip_get; - solo_dev->gpio_dev.set_rv = solo_gpiochip_set; + solo_dev->gpio_dev.set = solo_gpiochip_set; ret = gpiochip_add_data(&solo_dev->gpio_dev, solo_dev); diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index a5f9241fa3f2..50bf3260f65d 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -965,7 +965,7 @@ static const struct gpio_chip gpio_chip_template = { .ngpio = 32, .direction_input = sm501_gpio_input, .direction_output = sm501_gpio_output, - .set_rv = sm501_gpio_set, + .set = sm501_gpio_set, .get = sm501_gpio_get, }; diff --git a/drivers/mfd/tps65010.c b/drivers/mfd/tps65010.c index 03bd5cd66798..8a144ec52201 100644 --- a/drivers/mfd/tps65010.c +++ b/drivers/mfd/tps65010.c @@ -620,7 +620,7 @@ static int tps65010_probe(struct i2c_client *client) tps->chip.parent = &client->dev; tps->chip.owner = THIS_MODULE; - tps->chip.set_rv = tps65010_gpio_set; + tps->chip.set = tps65010_gpio_set; tps->chip.direction_output = tps65010_output; /* NOTE: only partial support for inputs; nyet IRQs */ diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index fd71ba29f6b5..4b450d78a65f 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -570,7 +570,7 @@ static int ucb1x00_probe(struct mcp *mcp) ucb->gpio.owner = THIS_MODULE; ucb->gpio.base = pdata->gpio_base; ucb->gpio.ngpio = 10; - ucb->gpio.set_rv = ucb1x00_gpio_set; + ucb->gpio.set = ucb1x00_gpio_set; ucb->gpio.get = ucb1x00_gpio_get; ucb->gpio.direction_input = ucb1x00_gpio_direction_input; ucb->gpio.direction_output = ucb1x00_gpio_direction_output; diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c index ff8f4404d10f..8eddbaa1fccd 100644 --- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c +++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c @@ -438,7 +438,7 @@ static int pci1xxxx_gpio_setup(struct pci1xxxx_gpio *priv, int irq) gchip->direction_output = pci1xxxx_gpio_direction_output; gchip->get_direction = pci1xxxx_gpio_get_direction; gchip->get = pci1xxxx_gpio_get; - gchip->set_rv = pci1xxxx_gpio_set; + gchip->set = pci1xxxx_gpio_set; gchip->set_config = pci1xxxx_gpio_set_config; gchip->dbg_show = NULL; gchip->base = -1; diff --git a/drivers/misc/ti_fpc202.c b/drivers/misc/ti_fpc202.c index 0b1a6350c02b..7964e46c7448 100644 --- a/drivers/misc/ti_fpc202.c +++ b/drivers/misc/ti_fpc202.c @@ -333,7 +333,7 @@ static int fpc202_probe(struct i2c_client *client) priv->gpio.base = -1; priv->gpio.direction_input = fpc202_gpio_direction_input; priv->gpio.direction_output = fpc202_gpio_direction_output; - priv->gpio.set_rv = fpc202_gpio_set; + priv->gpio.set = fpc202_gpio_set; priv->gpio.get = fpc202_gpio_get; priv->gpio.ngpio = FPC202_GPIO_COUNT; priv->gpio.parent = dev; diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 5a95877b7419..313e1d241f01 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -607,8 +607,8 @@ static int mcp251x_gpio_setup(struct mcp251x_priv *priv) gpio->get_direction = mcp251x_gpio_get_direction; gpio->get = mcp251x_gpio_get; gpio->get_multiple = mcp251x_gpio_get_multiple; - gpio->set_rv = mcp251x_gpio_set; - gpio->set_multiple_rv = mcp251x_gpio_set_multiple; + gpio->set = mcp251x_gpio_set; + gpio->set_multiple = mcp251x_gpio_set_multiple; gpio->base = -1; gpio->ngpio = ARRAY_SIZE(mcp251x_gpio_names); gpio->names = mcp251x_gpio_names; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index e5bed4237ff4..548b85befbf4 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2187,7 +2187,7 @@ mt7530_setup_gpio(struct mt7530_priv *priv) gc->direction_input = mt7530_gpio_direction_input; gc->direction_output = mt7530_gpio_direction_output; gc->get = mt7530_gpio_get; - gc->set_rv = mt7530_gpio_set; + gc->set = mt7530_gpio_set; gc->base = -1; gc->ngpio = 15; gc->can_sleep = true; diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index 4f9687ab3b2b..9d31b8258268 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -2317,7 +2317,7 @@ static int vsc73xx_gpio_probe(struct vsc73xx *vsc) vsc->gc.parent = vsc->dev; vsc->gc.base = -1; vsc->gc.get = vsc73xx_gpio_get; - vsc->gc.set_rv = vsc73xx_gpio_set; + vsc->gc.set = vsc73xx_gpio_set; vsc->gc.direction_input = vsc73xx_gpio_direction_input; vsc->gc.direction_output = vsc73xx_gpio_direction_output; vsc->gc.get_direction = vsc73xx_gpio_get_direction; diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c index 04e84ebb646c..070dc8c00835 100644 --- a/drivers/net/phy/qcom/qca807x.c +++ b/drivers/net/phy/qcom/qca807x.c @@ -427,7 +427,7 @@ static int qca807x_gpio(struct phy_device *phydev) gc->get_direction = qca807x_gpio_get_direction; gc->direction_output = qca807x_gpio_dir_out; gc->get = qca807x_gpio_get; - gc->set_rv = qca807x_gpio_set; + gc->set = qca807x_gpio_set; return devm_gpiochip_add_data(dev, gc, priv); } diff --git a/drivers/pinctrl/actions/pinctrl-owl.c b/drivers/pinctrl/actions/pinctrl-owl.c index 86f3d5c69e36..1f0ef4727ba7 100644 --- a/drivers/pinctrl/actions/pinctrl-owl.c +++ b/drivers/pinctrl/actions/pinctrl-owl.c @@ -962,7 +962,7 @@ int owl_pinctrl_probe(struct platform_device *pdev, pctrl->chip.direction_input = owl_gpio_direction_input; pctrl->chip.direction_output = owl_gpio_direction_output; pctrl->chip.get = owl_gpio_get; - pctrl->chip.set_rv = owl_gpio_set; + pctrl->chip.set = owl_gpio_set; pctrl->chip.request = owl_gpio_request; pctrl->chip.free = owl_gpio_free; diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index 826827800474..7dbf079739bc 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -397,7 +397,7 @@ static const struct gpio_chip bcm2835_gpio_chip = { .direction_output = bcm2835_gpio_direction_output, .get_direction = bcm2835_gpio_get_direction, .get = bcm2835_gpio_get, - .set_rv = bcm2835_gpio_set, + .set = bcm2835_gpio_set, .set_config = gpiochip_generic_config, .base = -1, .ngpio = BCM2835_NUM_GPIOS, @@ -414,7 +414,7 @@ static const struct gpio_chip bcm2711_gpio_chip = { .direction_output = bcm2835_gpio_direction_output, .get_direction = bcm2835_gpio_get_direction, .get = bcm2835_gpio_get, - .set_rv = bcm2835_gpio_set, + .set = bcm2835_gpio_set, .set_config = gpiochip_generic_config, .base = -1, .ngpio = BCM2711_NUM_GPIOS, diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c index 1d08b8d4cdd7..8c353676f2af 100644 --- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c @@ -865,7 +865,7 @@ static int iproc_gpio_probe(struct platform_device *pdev) gc->direction_input = iproc_gpio_direction_input; gc->direction_output = iproc_gpio_direction_output; gc->get_direction = iproc_gpio_get_direction; - gc->set_rv = iproc_gpio_set; + gc->set = iproc_gpio_set; gc->get = iproc_gpio_get; chip->pinmux_is_supported = of_property_read_bool(dev->of_node, diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c index b08f8480ddc6..b425ecacd1b0 100644 --- a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c @@ -656,7 +656,7 @@ static int nsp_gpio_probe(struct platform_device *pdev) gc->direction_input = nsp_gpio_direction_input; gc->direction_output = nsp_gpio_direction_output; gc->get_direction = nsp_gpio_get_direction; - gc->set_rv = nsp_gpio_set; + gc->set = nsp_gpio_set; gc->get = nsp_gpio_get; /* optional GPIO interrupt support */ diff --git a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c index 4e47710eb3d5..68abb6d6cecd 100644 --- a/drivers/pinctrl/cirrus/pinctrl-cs42l43.c +++ b/drivers/pinctrl/cirrus/pinctrl-cs42l43.c @@ -555,7 +555,7 @@ static int cs42l43_pin_probe(struct platform_device *pdev) priv->gpio_chip.direction_output = cs42l43_gpio_direction_out; priv->gpio_chip.add_pin_ranges = cs42l43_gpio_add_pin_ranges; priv->gpio_chip.get = cs42l43_gpio_get; - priv->gpio_chip.set_rv = cs42l43_gpio_set; + priv->gpio_chip.set = cs42l43_gpio_set; priv->gpio_chip.label = dev_name(priv->dev); priv->gpio_chip.parent = priv->dev; priv->gpio_chip.can_sleep = true; diff --git a/drivers/pinctrl/cirrus/pinctrl-lochnagar.c b/drivers/pinctrl/cirrus/pinctrl-lochnagar.c index dcc0a2f3c7dd..ca6ae566082b 100644 --- a/drivers/pinctrl/cirrus/pinctrl-lochnagar.c +++ b/drivers/pinctrl/cirrus/pinctrl-lochnagar.c @@ -1161,7 +1161,7 @@ static int lochnagar_pin_probe(struct platform_device *pdev) priv->gpio_chip.request = gpiochip_generic_request; priv->gpio_chip.free = gpiochip_generic_free; priv->gpio_chip.direction_output = lochnagar_gpio_direction_out; - priv->gpio_chip.set_rv = lochnagar_gpio_set; + priv->gpio_chip.set = lochnagar_gpio_set; priv->gpio_chip.can_sleep = true; priv->gpio_chip.parent = dev; priv->gpio_chip.base = -1; diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 6eb649f1ffd6..5fd107a00ef8 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -1231,7 +1231,7 @@ static const struct gpio_chip byt_gpio_chip = { .direction_input = byt_gpio_direction_input, .direction_output = byt_gpio_direction_output, .get = byt_gpio_get, - .set_rv = byt_gpio_set, + .set = byt_gpio_set, .set_config = gpiochip_generic_config, .dbg_show = byt_gpio_dbg_show, }; diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 769e8c4102a5..f81f7929cd3b 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1168,7 +1168,7 @@ static const struct gpio_chip chv_gpio_chip = { .direction_input = chv_gpio_direction_input, .direction_output = chv_gpio_direction_output, .get = chv_gpio_get, - .set_rv = chv_gpio_set, + .set = chv_gpio_set, }; static void chv_gpio_irq_ack(struct irq_data *d) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index f2ff71e5ea6f..d68cef4ec52a 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -1114,7 +1114,7 @@ static const struct gpio_chip intel_gpio_chip = { .direction_input = intel_gpio_direction_input, .direction_output = intel_gpio_direction_output, .get = intel_gpio_get, - .set_rv = intel_gpio_set, + .set = intel_gpio_set, .set_config = gpiochip_generic_config, }; diff --git a/drivers/pinctrl/intel/pinctrl-lynxpoint.c b/drivers/pinctrl/intel/pinctrl-lynxpoint.c index 5d4a5dd493d1..3fb628309fb2 100644 --- a/drivers/pinctrl/intel/pinctrl-lynxpoint.c +++ b/drivers/pinctrl/intel/pinctrl-lynxpoint.c @@ -777,7 +777,7 @@ static int lp_gpio_probe(struct platform_device *pdev) gc->direction_input = lp_gpio_direction_input; gc->direction_output = lp_gpio_direction_output; gc->get = lp_gpio_get; - gc->set_rv = lp_gpio_set; + gc->set = lp_gpio_set; gc->set_config = gpiochip_generic_config; gc->get_direction = lp_gpio_get_direction; gc->base = -1; diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c index 1737b88530c3..5f1ec9e0de21 100644 --- a/drivers/pinctrl/mediatek/pinctrl-airoha.c +++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c @@ -2418,7 +2418,7 @@ static int airoha_pinctrl_add_gpiochip(struct airoha_pinctrl *pinctrl, gc->free = gpiochip_generic_free; gc->direction_input = pinctrl_gpio_direction_input; gc->direction_output = airoha_gpio_direction_output; - gc->set_rv = airoha_gpio_set; + gc->set = airoha_gpio_set; gc->get = airoha_gpio_get; gc->base = -1; gc->ngpio = AIROHA_NUM_PINS; diff --git a/drivers/pinctrl/mediatek/pinctrl-moore.c b/drivers/pinctrl/mediatek/pinctrl-moore.c index ba0d6f880c6e..6e4f6c07a509 100644 --- a/drivers/pinctrl/mediatek/pinctrl-moore.c +++ b/drivers/pinctrl/mediatek/pinctrl-moore.c @@ -569,7 +569,7 @@ static int mtk_build_gpiochip(struct mtk_pinctrl *hw) chip->direction_input = pinctrl_gpio_direction_input; chip->direction_output = mtk_gpio_direction_output; chip->get = mtk_gpio_get; - chip->set_rv = mtk_gpio_set; + chip->set = mtk_gpio_set; chip->to_irq = mtk_gpio_to_irq; chip->set_config = mtk_gpio_set_config; chip->base = -1; diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index a4cb6d511fcd..d10306024111 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -898,7 +898,7 @@ static const struct gpio_chip mtk_gpio_chip = { .direction_input = pinctrl_gpio_direction_input, .direction_output = mtk_gpio_direction_output, .get = mtk_gpio_get, - .set_rv = mtk_gpio_set, + .set = mtk_gpio_set, .to_irq = mtk_gpio_to_irq, .set_config = mtk_gpio_set_config, }; diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c index 89ef4e530fcc..3e714554789d 100644 --- a/drivers/pinctrl/mediatek/pinctrl-paris.c +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c @@ -949,7 +949,7 @@ static int mtk_build_gpiochip(struct mtk_pinctrl *hw) chip->direction_input = mtk_gpio_direction_input; chip->direction_output = mtk_gpio_direction_output; chip->get = mtk_gpio_get; - chip->set_rv = mtk_gpio_set; + chip->set = mtk_gpio_set; chip->to_irq = mtk_gpio_to_irq; chip->set_config = mtk_gpio_set_config; chip->base = -1; diff --git a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c index c8958222df8c..e34e984c2b38 100644 --- a/drivers/pinctrl/meson/pinctrl-amlogic-a4.c +++ b/drivers/pinctrl/meson/pinctrl-amlogic-a4.c @@ -888,7 +888,7 @@ static const struct gpio_chip aml_gpio_template = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, .set_config = gpiochip_generic_config, - .set_rv = aml_gpio_set, + .set = aml_gpio_set, .get = aml_gpio_get, .direction_input = aml_gpio_direction_input, .direction_output = aml_gpio_direction_output, diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index f5be61f2ede4..277e9c40490d 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -616,7 +616,7 @@ static int meson_gpiolib_register(struct meson_pinctrl *pc) pc->chip.direction_input = meson_gpio_direction_input; pc->chip.direction_output = meson_gpio_direction_output; pc->chip.get = meson_gpio_get; - pc->chip.set_rv = meson_gpio_set; + pc->chip.set = meson_gpio_set; pc->chip.base = -1; pc->chip.ngpio = pc->data->num_pins; pc->chip.can_sleep = false; diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index a6b106984e12..881df5e08f61 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -518,7 +518,7 @@ static const struct pinmux_ops armada_37xx_pmx_ops = { static const struct gpio_chip armada_37xx_gpiolib_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, - .set_rv = armada_37xx_gpio_set, + .set = armada_37xx_gpio_set, .get = armada_37xx_gpio_get, .get_direction = armada_37xx_gpio_get_direction, .direction_input = armada_37xx_gpio_direction_input, diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c index 2f55f83127cf..7b5f94d8cb23 100644 --- a/drivers/pinctrl/nomadik/pinctrl-abx500.c +++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c @@ -536,7 +536,7 @@ static const struct gpio_chip abx500gpio_chip = { .direction_input = abx500_gpio_direction_input, .get = abx500_gpio_get, .direction_output = abx500_gpio_direction_output, - .set_rv = abx500_gpio_set, + .set = abx500_gpio_set, .to_irq = abx500_gpio_to_irq, .dbg_show = abx500_gpio_dbg_show, }; diff --git a/drivers/pinctrl/nuvoton/pinctrl-ma35.c b/drivers/pinctrl/nuvoton/pinctrl-ma35.c index da5220da5149..54652bfbe6ac 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-ma35.c +++ b/drivers/pinctrl/nuvoton/pinctrl-ma35.c @@ -526,7 +526,7 @@ static int ma35_gpiolib_register(struct platform_device *pdev, struct ma35_pinct bank->chip.direction_input = ma35_gpio_core_direction_in; bank->chip.direction_output = ma35_gpio_core_direction_out; bank->chip.get = ma35_gpio_core_get; - bank->chip.set_rv = ma35_gpio_core_set; + bank->chip.set = ma35_gpio_core_set; bank->chip.base = -1; bank->chip.ngpio = bank->nr_pins; bank->chip.can_sleep = false; diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index b90ef3a26ae8..09a5425d54ba 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -1187,7 +1187,7 @@ static int amd_gpio_probe(struct platform_device *pdev) gpio_dev->gc.direction_input = amd_gpio_direction_input; gpio_dev->gc.direction_output = amd_gpio_direction_output; gpio_dev->gc.get = amd_gpio_get_value; - gpio_dev->gc.set_rv = amd_gpio_set_value; + gpio_dev->gc.set = amd_gpio_set_value; gpio_dev->gc.set_config = amd_gpio_set_config; gpio_dev->gc.dbg_show = amd_gpio_dbg_show; diff --git a/drivers/pinctrl/pinctrl-amdisp.c b/drivers/pinctrl/pinctrl-amdisp.c index 2e706bf8bcde..efbf40c776ea 100644 --- a/drivers/pinctrl/pinctrl-amdisp.c +++ b/drivers/pinctrl/pinctrl-amdisp.c @@ -151,7 +151,7 @@ static int amdisp_gpiochip_add(struct platform_device *pdev, gc->direction_input = amdisp_gpio_direction_input; gc->direction_output = amdisp_gpio_direction_output; gc->get = amdisp_gpio_get; - gc->set_rv = amdisp_gpio_set; + gc->set = amdisp_gpio_set; gc->base = -1; gc->ngpio = ARRAY_SIZE(amdisp_range_pins); diff --git a/drivers/pinctrl/pinctrl-apple-gpio.c b/drivers/pinctrl/pinctrl-apple-gpio.c index dcf3a921b4df..a09daa72bfe4 100644 --- a/drivers/pinctrl/pinctrl-apple-gpio.c +++ b/drivers/pinctrl/pinctrl-apple-gpio.c @@ -378,7 +378,7 @@ static int apple_gpio_register(struct apple_gpio_pinctrl *pctl) pctl->gpio_chip.direction_input = apple_gpio_direction_input; pctl->gpio_chip.direction_output = apple_gpio_direction_output; pctl->gpio_chip.get = apple_gpio_get; - pctl->gpio_chip.set_rv = apple_gpio_set; + pctl->gpio_chip.set = apple_gpio_set; pctl->gpio_chip.base = -1; pctl->gpio_chip.ngpio = pctl->pinctrl_desc.npins; pctl->gpio_chip.parent = pctl->dev; diff --git a/drivers/pinctrl/pinctrl-as3722.c b/drivers/pinctrl/pinctrl-as3722.c index 30ed758bbe9d..e713dea98aa8 100644 --- a/drivers/pinctrl/pinctrl-as3722.c +++ b/drivers/pinctrl/pinctrl-as3722.c @@ -529,7 +529,7 @@ static const struct gpio_chip as3722_gpio_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, .get = as3722_gpio_get, - .set_rv = as3722_gpio_set, + .set = as3722_gpio_set, .direction_input = pinctrl_gpio_direction_input, .direction_output = as3722_gpio_direction_output, .to_irq = as3722_gpio_to_irq, diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c index 57f105ac962d..35ea3414cb96 100644 --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -442,8 +442,8 @@ static struct gpio_chip atmel_gpio_chip = { .get = atmel_gpio_get, .get_multiple = atmel_gpio_get_multiple, .direction_output = atmel_gpio_direction_output, - .set_rv = atmel_gpio_set, - .set_multiple_rv = atmel_gpio_set_multiple, + .set = atmel_gpio_set, + .set_multiple = atmel_gpio_set_multiple, .to_irq = atmel_gpio_to_irq, .base = 0, }; diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 6c2727bd55bc..0a57ed51d4c9 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -1801,8 +1801,8 @@ static const struct gpio_chip at91_gpio_template = { .direction_input = at91_gpio_direction_input, .get = at91_gpio_get, .direction_output = at91_gpio_direction_output, - .set_rv = at91_gpio_set, - .set_multiple_rv = at91_gpio_set_multiple, + .set = at91_gpio_set, + .set_multiple = at91_gpio_set_multiple, .dbg_show = at91_gpio_dbg_show, .can_sleep = false, .ngpio = MAX_NB_GPIO_PER_BANK, diff --git a/drivers/pinctrl/pinctrl-aw9523.c b/drivers/pinctrl/pinctrl-aw9523.c index 9570ef346af6..890b83fddea3 100644 --- a/drivers/pinctrl/pinctrl-aw9523.c +++ b/drivers/pinctrl/pinctrl-aw9523.c @@ -785,8 +785,8 @@ static int aw9523_init_gpiochip(struct aw9523 *awi, unsigned int npins) gc->direction_output = aw9523_direction_output; gc->get = aw9523_gpio_get; gc->get_multiple = aw9523_gpio_get_multiple; - gc->set_rv = aw9523_gpio_set; - gc->set_multiple_rv = aw9523_gpio_set_multiple; + gc->set = aw9523_gpio_set; + gc->set_multiple = aw9523_gpio_set_multiple; gc->set_config = gpiochip_generic_config; gc->parent = dev; gc->owner = THIS_MODULE; diff --git a/drivers/pinctrl/pinctrl-axp209.c b/drivers/pinctrl/pinctrl-axp209.c index fff408b60c4a..2bd8487484a8 100644 --- a/drivers/pinctrl/pinctrl-axp209.c +++ b/drivers/pinctrl/pinctrl-axp209.c @@ -192,7 +192,7 @@ static int axp20x_gpio_get_direction(struct gpio_chip *chip, static int axp20x_gpio_output(struct gpio_chip *chip, unsigned int offset, int value) { - return chip->set_rv(chip, offset, value); + return chip->set(chip, offset, value); } static int axp20x_gpio_set(struct gpio_chip *chip, unsigned int offset, @@ -463,7 +463,7 @@ static int axp20x_pctl_probe(struct platform_device *pdev) pctl->chip.owner = THIS_MODULE; pctl->chip.get = axp20x_gpio_get; pctl->chip.get_direction = axp20x_gpio_get_direction; - pctl->chip.set_rv = axp20x_gpio_set; + pctl->chip.set = axp20x_gpio_set; pctl->chip.direction_input = pinctrl_gpio_direction_input; pctl->chip.direction_output = axp20x_gpio_output; diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index 8a2fd632bdd4..cf7f80497fde 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c @@ -939,10 +939,10 @@ static int cy8c95x0_setup_gpiochip(struct cy8c95x0_pinctrl *chip) gc->direction_input = cy8c95x0_gpio_direction_input; gc->direction_output = cy8c95x0_gpio_direction_output; gc->get = cy8c95x0_gpio_get_value; - gc->set_rv = cy8c95x0_gpio_set_value; + gc->set = cy8c95x0_gpio_set_value; gc->get_direction = cy8c95x0_gpio_get_direction; gc->get_multiple = cy8c95x0_gpio_get_multiple; - gc->set_multiple_rv = cy8c95x0_gpio_set_multiple; + gc->set_multiple = cy8c95x0_gpio_set_multiple; gc->set_config = gpiochip_generic_config; gc->can_sleep = true; gc->add_pin_ranges = cy8c95x0_add_pin_ranges; diff --git a/drivers/pinctrl/pinctrl-da9062.c b/drivers/pinctrl/pinctrl-da9062.c index 3295b09dfc3d..53298cbcc5cf 100644 --- a/drivers/pinctrl/pinctrl-da9062.c +++ b/drivers/pinctrl/pinctrl-da9062.c @@ -233,7 +233,7 @@ static int da9062_gpio_to_irq(struct gpio_chip *gc, unsigned int offset) static const struct gpio_chip reference_gc = { .owner = THIS_MODULE, .get = da9062_gpio_get, - .set_rv = da9062_gpio_set, + .set = da9062_gpio_set, .get_direction = da9062_gpio_get_direction, .direction_input = da9062_gpio_direction_input, .direction_output = da9062_gpio_direction_output, diff --git a/drivers/pinctrl/pinctrl-digicolor.c b/drivers/pinctrl/pinctrl-digicolor.c index 1676cb3cc4c9..2e16f09aeb47 100644 --- a/drivers/pinctrl/pinctrl-digicolor.c +++ b/drivers/pinctrl/pinctrl-digicolor.c @@ -248,7 +248,7 @@ static int dc_gpiochip_add(struct dc_pinmap *pmap) chip->direction_input = dc_gpio_direction_input; chip->direction_output = dc_gpio_direction_output; chip->get = dc_gpio_get; - chip->set_rv = dc_gpio_set; + chip->set = dc_gpio_set; chip->base = -1; chip->ngpio = PINS_COUNT; diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index 79119cf20efc..2900513467fa 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -4451,7 +4451,7 @@ static int __init ingenic_gpio_probe(struct ingenic_pinctrl *jzpc, jzgc->gc.fwnode = fwnode; jzgc->gc.owner = THIS_MODULE; - jzgc->gc.set_rv = ingenic_gpio_set; + jzgc->gc.set = ingenic_gpio_set; jzgc->gc.get = ingenic_gpio_get; jzgc->gc.direction_input = pinctrl_gpio_direction_input; jzgc->gc.direction_output = ingenic_gpio_direction_output; diff --git a/drivers/pinctrl/pinctrl-keembay.c b/drivers/pinctrl/pinctrl-keembay.c index 30e641571cfe..60cf017498b3 100644 --- a/drivers/pinctrl/pinctrl-keembay.c +++ b/drivers/pinctrl/pinctrl-keembay.c @@ -1481,7 +1481,7 @@ static int keembay_gpiochip_probe(struct keembay_pinctrl *kpc, gc->direction_input = keembay_gpio_set_direction_in; gc->direction_output = keembay_gpio_set_direction_out; gc->get = keembay_gpio_get; - gc->set_rv = keembay_gpio_set; + gc->set = keembay_gpio_set; gc->set_config = gpiochip_generic_config; gc->base = -1; gc->ngpio = kpc->npins; diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index c8027ef03ecc..a17fcaddf490 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -632,8 +632,8 @@ int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, mcp->chip.get = mcp23s08_get; mcp->chip.get_multiple = mcp23s08_get_multiple; mcp->chip.direction_output = mcp23s08_direction_output; - mcp->chip.set_rv = mcp23s08_set; - mcp->chip.set_multiple_rv = mcp23s08_set_multiple; + mcp->chip.set = mcp23s08_set; + mcp->chip.set_multiple = mcp23s08_set_multiple; mcp->chip.base = base; mcp->chip.can_sleep = true; diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c index 88c2f14cfc6b..6191e5c13815 100644 --- a/drivers/pinctrl/pinctrl-microchip-sgpio.c +++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c @@ -858,7 +858,7 @@ static int microchip_sgpio_register_bank(struct device *dev, gc->direction_input = microchip_sgpio_direction_input; gc->direction_output = microchip_sgpio_direction_output; gc->get = microchip_sgpio_get_value; - gc->set_rv = microchip_sgpio_set_value; + gc->set = microchip_sgpio_set_value; gc->request = gpiochip_generic_request; gc->free = gpiochip_generic_free; gc->of_xlate = microchip_sgpio_of_xlate; diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index fbb3d43746bb..b82bf83fed25 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -1997,7 +1997,7 @@ static int ocelot_gpio_direction_output(struct gpio_chip *chip, static const struct gpio_chip ocelot_gpiolib_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, - .set_rv = ocelot_gpio_set, + .set = ocelot_gpio_set, .get = ocelot_gpio_get, .get_direction = ocelot_gpio_get_direction, .direction_input = pinctrl_gpio_direction_input, diff --git a/drivers/pinctrl/pinctrl-pic32.c b/drivers/pinctrl/pinctrl-pic32.c index 6d64cab97e81..37c2bf752154 100644 --- a/drivers/pinctrl/pinctrl-pic32.c +++ b/drivers/pinctrl/pinctrl-pic32.c @@ -2120,7 +2120,7 @@ static void pic32_gpio_irq_handler(struct irq_desc *desc) .direction_input = pic32_gpio_direction_input, \ .direction_output = pic32_gpio_direction_output, \ .get = pic32_gpio_get, \ - .set_rv = pic32_gpio_set, \ + .set = pic32_gpio_set, \ .ngpio = _npins, \ .base = GPIO_BANK_START(_bank), \ .owner = THIS_MODULE, \ diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index 7f8b562c81c9..0b33b01dbaad 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -1331,7 +1331,7 @@ static void pistachio_gpio_irq_handler(struct irq_desc *desc) .direction_input = pistachio_gpio_direction_input, \ .direction_output = pistachio_gpio_direction_output, \ .get = pistachio_gpio_get, \ - .set_rv = pistachio_gpio_set, \ + .set = pistachio_gpio_set, \ .base = _pin_base, \ .ngpio = _npins, \ }, \ diff --git a/drivers/pinctrl/pinctrl-rk805.c b/drivers/pinctrl/pinctrl-rk805.c index fc0e330b1d11..3acf770316c1 100644 --- a/drivers/pinctrl/pinctrl-rk805.c +++ b/drivers/pinctrl/pinctrl-rk805.c @@ -378,7 +378,7 @@ static const struct gpio_chip rk805_gpio_chip = { .free = gpiochip_generic_free, .get_direction = rk805_gpio_get_direction, .get = rk805_gpio_get, - .set_rv = rk805_gpio_set, + .set = rk805_gpio_set, .direction_input = pinctrl_gpio_direction_input, .direction_output = rk805_gpio_direction_output, .can_sleep = true, diff --git a/drivers/pinctrl/pinctrl-rp1.c b/drivers/pinctrl/pinctrl-rp1.c index 6080b57a5d87..dadafc935dbb 100644 --- a/drivers/pinctrl/pinctrl-rp1.c +++ b/drivers/pinctrl/pinctrl-rp1.c @@ -851,7 +851,7 @@ static const struct gpio_chip rp1_gpio_chip = { .direction_output = rp1_gpio_direction_output, .get_direction = rp1_gpio_get_direction, .get = rp1_gpio_get, - .set_rv = rp1_gpio_set, + .set = rp1_gpio_set, .base = -1, .set_config = rp1_gpio_set_config, .ngpio = RP1_NUM_GPIOS, diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index 574fe2cbfbec..d3cea3437d7f 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -1467,7 +1467,7 @@ static const struct gpio_chip st_gpio_template = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, .get = st_gpio_get, - .set_rv = st_gpio_set, + .set = st_gpio_set, .direction_input = pinctrl_gpio_direction_input, .direction_output = st_gpio_direction_output, .get_direction = st_gpio_get_direction, diff --git a/drivers/pinctrl/pinctrl-stmfx.c b/drivers/pinctrl/pinctrl-stmfx.c index f4fdcaa043e6..c89b99003b71 100644 --- a/drivers/pinctrl/pinctrl-stmfx.c +++ b/drivers/pinctrl/pinctrl-stmfx.c @@ -697,7 +697,7 @@ static int stmfx_pinctrl_probe(struct platform_device *pdev) pctl->gpio_chip.direction_input = stmfx_gpio_direction_input; pctl->gpio_chip.direction_output = stmfx_gpio_direction_output; pctl->gpio_chip.get = stmfx_gpio_get; - pctl->gpio_chip.set_rv = stmfx_gpio_set; + pctl->gpio_chip.set = stmfx_gpio_set; pctl->gpio_chip.set_config = gpiochip_generic_config; pctl->gpio_chip.base = -1; pctl->gpio_chip.ngpio = pctl->pctl_desc.npins; diff --git a/drivers/pinctrl/pinctrl-sx150x.c b/drivers/pinctrl/pinctrl-sx150x.c index d3a12c1c0de2..53cf8168b274 100644 --- a/drivers/pinctrl/pinctrl-sx150x.c +++ b/drivers/pinctrl/pinctrl-sx150x.c @@ -1176,7 +1176,7 @@ static int sx150x_probe(struct i2c_client *client) pctl->gpio.direction_input = sx150x_gpio_direction_input; pctl->gpio.direction_output = sx150x_gpio_direction_output; pctl->gpio.get = sx150x_gpio_get; - pctl->gpio.set_rv = sx150x_gpio_set; + pctl->gpio.set = sx150x_gpio_set; pctl->gpio.set_config = gpiochip_generic_config; pctl->gpio.parent = dev; pctl->gpio.can_sleep = true; @@ -1191,7 +1191,7 @@ static int sx150x_probe(struct i2c_client *client) * would require locking that is not in place at this time. */ if (pctl->data->model != SX150X_789) - pctl->gpio.set_multiple_rv = sx150x_gpio_set_multiple; + pctl->gpio.set_multiple = sx150x_gpio_set_multiple; /* Add Interrupt support if an irq is specified */ if (client->irq > 0) { diff --git a/drivers/pinctrl/pinctrl-xway.c b/drivers/pinctrl/pinctrl-xway.c index 53c6c22ff24d..3d4ad61d0da9 100644 --- a/drivers/pinctrl/pinctrl-xway.c +++ b/drivers/pinctrl/pinctrl-xway.c @@ -1354,7 +1354,7 @@ static struct gpio_chip xway_chip = { .direction_input = xway_gpio_dir_in, .direction_output = xway_gpio_dir_out, .get = xway_gpio_get, - .set_rv = xway_gpio_set, + .set = xway_gpio_set, .request = gpiochip_generic_request, .free = gpiochip_generic_free, .to_irq = xway_gpio_to_irq, diff --git a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c index 57fefeb603f0..54c77e0b96e9 100644 --- a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c +++ b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c @@ -398,7 +398,7 @@ static const struct gpio_chip lpi_gpio_template = { .direction_input = lpi_gpio_direction_input, .direction_output = lpi_gpio_direction_output, .get = lpi_gpio_get, - .set_rv = lpi_gpio_set, + .set = lpi_gpio_set, .request = gpiochip_generic_request, .free = gpiochip_generic_free, .dbg_show = lpi_gpio_dbg_show, diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index f713c80d7f3e..83eb075b6bfa 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -792,7 +792,7 @@ static const struct gpio_chip msm_gpio_template = { .direction_output = msm_gpio_direction_output, .get_direction = msm_gpio_get_direction, .get = msm_gpio_get, - .set_rv = msm_gpio_set, + .set = msm_gpio_set, .request = gpiochip_generic_request, .free = gpiochip_generic_free, .dbg_show = msm_gpio_dbg_show, diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index 606becc160eb..b7b15874e488 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -802,7 +802,7 @@ static const struct gpio_chip pmic_gpio_gpio_template = { .direction_input = pmic_gpio_direction_input, .direction_output = pmic_gpio_direction_output, .get = pmic_gpio_get, - .set_rv = pmic_gpio_set, + .set = pmic_gpio_set, .request = gpiochip_generic_request, .free = gpiochip_generic_free, .of_xlate = pmic_gpio_of_xlate, diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index ba9084978f90..22d76b1013a3 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -638,7 +638,7 @@ static const struct gpio_chip pmic_mpp_gpio_template = { .direction_input = pmic_mpp_direction_input, .direction_output = pmic_mpp_direction_output, .get = pmic_mpp_get, - .set_rv = pmic_mpp_set, + .set = pmic_mpp_set, .request = gpiochip_generic_request, .free = gpiochip_generic_free, .of_xlate = pmic_mpp_of_xlate, diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index 3a8014ebf064..fb37b1c1acb4 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -597,7 +597,7 @@ static const struct gpio_chip pm8xxx_gpio_template = { .direction_input = pm8xxx_gpio_direction_input, .direction_output = pm8xxx_gpio_direction_output, .get = pm8xxx_gpio_get, - .set_rv = pm8xxx_gpio_set, + .set = pm8xxx_gpio_set, .of_xlate = pm8xxx_gpio_of_xlate, .dbg_show = pm8xxx_gpio_dbg_show, .owner = THIS_MODULE, diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c index 087c37d304fc..6103849af042 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c @@ -634,7 +634,7 @@ static const struct gpio_chip pm8xxx_mpp_template = { .direction_input = pm8xxx_mpp_direction_input, .direction_output = pm8xxx_mpp_direction_output, .get = pm8xxx_mpp_get, - .set_rv = pm8xxx_mpp_set, + .set = pm8xxx_mpp_set, .of_xlate = pm8xxx_mpp_of_xlate, .dbg_show = pm8xxx_mpp_dbg_show, .owner = THIS_MODULE, diff --git a/drivers/pinctrl/renesas/gpio.c b/drivers/pinctrl/renesas/gpio.c index 8efbdc1b0078..2293af642849 100644 --- a/drivers/pinctrl/renesas/gpio.c +++ b/drivers/pinctrl/renesas/gpio.c @@ -234,7 +234,7 @@ static int gpio_pin_setup(struct sh_pfc_chip *chip) gc->direction_input = gpio_pin_direction_input; gc->get = gpio_pin_get; gc->direction_output = gpio_pin_direction_output; - gc->set_rv = gpio_pin_set; + gc->set = gpio_pin_set; gc->to_irq = gpio_pin_to_irq; gc->label = pfc->info->name; diff --git a/drivers/pinctrl/renesas/pinctrl-rza1.c b/drivers/pinctrl/renesas/pinctrl-rza1.c index 3d8492c91710..23812116ef42 100644 --- a/drivers/pinctrl/renesas/pinctrl-rza1.c +++ b/drivers/pinctrl/renesas/pinctrl-rza1.c @@ -846,7 +846,7 @@ static const struct gpio_chip rza1_gpiochip_template = { .direction_input = rza1_gpio_direction_input, .direction_output = rza1_gpio_direction_output, .get = rza1_gpio_get, - .set_rv = rza1_gpio_set, + .set = rza1_gpio_set, }; /* ---------------------------------------------------------------------------- * pinctrl operations diff --git a/drivers/pinctrl/renesas/pinctrl-rza2.c b/drivers/pinctrl/renesas/pinctrl-rza2.c index 7a0b268d3eb9..b78b5b4ec5af 100644 --- a/drivers/pinctrl/renesas/pinctrl-rza2.c +++ b/drivers/pinctrl/renesas/pinctrl-rza2.c @@ -237,7 +237,7 @@ static struct gpio_chip chip = { .direction_input = rza2_chip_direction_input, .direction_output = rza2_chip_direction_output, .get = rza2_chip_get, - .set_rv = rza2_chip_set, + .set = rza2_chip_set, }; static int rza2_gpio_register(struct rza2_pinctrl_priv *priv) diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index 2a10ae0bf5bd..c52263c2a7b0 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -2795,7 +2795,7 @@ static int rzg2l_gpio_register(struct rzg2l_pinctrl *pctrl) chip->direction_input = rzg2l_gpio_direction_input; chip->direction_output = rzg2l_gpio_direction_output; chip->get = rzg2l_gpio_get; - chip->set_rv = rzg2l_gpio_set; + chip->set = rzg2l_gpio_set; chip->label = name; chip->parent = pctrl->dev; chip->owner = THIS_MODULE; diff --git a/drivers/pinctrl/renesas/pinctrl-rzv2m.c b/drivers/pinctrl/renesas/pinctrl-rzv2m.c index a17b68b4c466..daaa986d994d 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzv2m.c +++ b/drivers/pinctrl/renesas/pinctrl-rzv2m.c @@ -957,7 +957,7 @@ static int rzv2m_gpio_register(struct rzv2m_pinctrl *pctrl) chip->direction_input = rzv2m_gpio_direction_input; chip->direction_output = rzv2m_gpio_direction_output; chip->get = rzv2m_gpio_get; - chip->set_rv = rzv2m_gpio_set; + chip->set = rzv2m_gpio_set; chip->label = name; chip->parent = pctrl->dev; chip->owner = THIS_MODULE; diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index fe1ac82b9d79..24745e1d78ce 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -1067,7 +1067,7 @@ static int samsung_gpio_set_config(struct gpio_chip *gc, unsigned int offset, static const struct gpio_chip samsung_gpiolib_chip = { .request = gpiochip_generic_request, .free = gpiochip_generic_free, - .set_rv = samsung_gpio_set, + .set = samsung_gpio_set, .get = samsung_gpio_get, .direction_input = samsung_gpio_direction_input, .direction_output = samsung_gpio_direction_output, diff --git a/drivers/pinctrl/spear/pinctrl-plgpio.c b/drivers/pinctrl/spear/pinctrl-plgpio.c index e8234d2156da..1ec22010a3f9 100644 --- a/drivers/pinctrl/spear/pinctrl-plgpio.c +++ b/drivers/pinctrl/spear/pinctrl-plgpio.c @@ -582,7 +582,7 @@ static int plgpio_probe(struct platform_device *pdev) plgpio->chip.direction_input = plgpio_direction_input; plgpio->chip.direction_output = plgpio_direction_output; plgpio->chip.get = plgpio_get_value; - plgpio->chip.set_rv = plgpio_set_value; + plgpio->chip.set = plgpio_set_value; plgpio->chip.label = dev_name(&pdev->dev); plgpio->chip.parent = &pdev->dev; plgpio->chip.owner = THIS_MODULE; diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c index b729ca4de422..7fa13f282b85 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c @@ -1302,7 +1302,7 @@ static int starfive_probe(struct platform_device *pdev) sfp->gc.direction_input = starfive_gpio_direction_input; sfp->gc.direction_output = starfive_gpio_direction_output; sfp->gc.get = starfive_gpio_get; - sfp->gc.set_rv = starfive_gpio_set; + sfp->gc.set = starfive_gpio_set; sfp->gc.set_config = starfive_gpio_set_config; sfp->gc.add_pin_ranges = starfive_gpio_add_pin_ranges; sfp->gc.base = -1; diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c index 082bb1c6cea9..05e3af75b09f 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c @@ -935,7 +935,7 @@ int jh7110_pinctrl_probe(struct platform_device *pdev) sfp->gc.direction_input = jh7110_gpio_direction_input; sfp->gc.direction_output = jh7110_gpio_direction_output; sfp->gc.get = jh7110_gpio_get; - sfp->gc.set_rv = jh7110_gpio_set; + sfp->gc.set = jh7110_gpio_set; sfp->gc.set_config = jh7110_gpio_set_config; sfp->gc.add_pin_ranges = jh7110_gpio_add_pin_ranges; sfp->gc.base = info->gc_base; diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index f47c4e6f12b4..823c8fe758e2 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -433,7 +433,7 @@ static const struct gpio_chip stm32_gpio_template = { .request = stm32_gpio_request, .free = stm32_gpio_free, .get = stm32_gpio_get, - .set_rv = stm32_gpio_set, + .set = stm32_gpio_set, .direction_input = pinctrl_gpio_direction_input, .direction_output = stm32_gpio_direction_output, .to_irq = stm32_gpio_to_irq, diff --git a/drivers/pinctrl/sunplus/sppctl.c b/drivers/pinctrl/sunplus/sppctl.c index 3c3357f80889..3e924aa86cc2 100644 --- a/drivers/pinctrl/sunplus/sppctl.c +++ b/drivers/pinctrl/sunplus/sppctl.c @@ -547,7 +547,7 @@ static int sppctl_gpio_new(struct platform_device *pdev, struct sppctl_pdata *pc gchip->direction_input = sppctl_gpio_direction_input; gchip->direction_output = sppctl_gpio_direction_output; gchip->get = sppctl_gpio_get; - gchip->set_rv = sppctl_gpio_set; + gchip->set = sppctl_gpio_set; gchip->set_config = sppctl_gpio_set_config; gchip->dbg_show = IS_ENABLED(CONFIG_DEBUG_FS) ? sppctl_gpio_dbg_show : NULL; diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 0db8429a013f..0fb057a07dcc 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -1604,7 +1604,7 @@ int sunxi_pinctrl_init_with_flags(struct platform_device *pdev, pctl->chip->direction_input = sunxi_pinctrl_gpio_direction_input; pctl->chip->direction_output = sunxi_pinctrl_gpio_direction_output; pctl->chip->get = sunxi_pinctrl_gpio_get; - pctl->chip->set_rv = sunxi_pinctrl_gpio_set; + pctl->chip->set = sunxi_pinctrl_gpio_set; pctl->chip->of_xlate = sunxi_pinctrl_gpio_of_xlate; pctl->chip->to_irq = sunxi_pinctrl_gpio_to_irq; pctl->chip->of_gpio_n_cells = 3; diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c index 767c6808a463..7213a8d4bf09 100644 --- a/drivers/pinctrl/vt8500/pinctrl-wmt.c +++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c @@ -549,7 +549,7 @@ static const struct gpio_chip wmt_gpio_chip = { .direction_input = pinctrl_gpio_direction_input, .direction_output = wmt_gpio_direction_output, .get = wmt_gpio_get_value, - .set_rv = wmt_gpio_set_value, + .set = wmt_gpio_set_value, .can_sleep = false, }; diff --git a/drivers/platform/cznic/turris-omnia-mcu-gpio.c b/drivers/platform/cznic/turris-omnia-mcu-gpio.c index 77184c8b42ea..7f0ada4fa606 100644 --- a/drivers/platform/cznic/turris-omnia-mcu-gpio.c +++ b/drivers/platform/cznic/turris-omnia-mcu-gpio.c @@ -1024,8 +1024,8 @@ int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu) mcu->gc.direction_output = omnia_gpio_direction_output; mcu->gc.get = omnia_gpio_get; mcu->gc.get_multiple = omnia_gpio_get_multiple; - mcu->gc.set_rv = omnia_gpio_set; - mcu->gc.set_multiple_rv = omnia_gpio_set_multiple; + mcu->gc.set = omnia_gpio_set; + mcu->gc.set_multiple = omnia_gpio_set_multiple; mcu->gc.init_valid_mask = omnia_gpio_init_valid_mask; mcu->gc.can_sleep = true; mcu->gc.names = omnia_mcu_gpio_names; diff --git a/drivers/platform/x86/barco-p50-gpio.c b/drivers/platform/x86/barco-p50-gpio.c index bb3393bbfb89..28012eebdb10 100644 --- a/drivers/platform/x86/barco-p50-gpio.c +++ b/drivers/platform/x86/barco-p50-gpio.c @@ -316,7 +316,7 @@ static int p50_gpio_probe(struct platform_device *pdev) p50->gc.base = -1; p50->gc.get_direction = p50_gpio_get_direction; p50->gc.get = p50_gpio_get; - p50->gc.set_rv = p50_gpio_set; + p50->gc.set = p50_gpio_set; /* reset mbox */ diff --git a/drivers/platform/x86/intel/int0002_vgpio.c b/drivers/platform/x86/intel/int0002_vgpio.c index 9bc24ed19c64..6f5629dc3f8d 100644 --- a/drivers/platform/x86/intel/int0002_vgpio.c +++ b/drivers/platform/x86/intel/int0002_vgpio.c @@ -193,7 +193,7 @@ static int int0002_probe(struct platform_device *pdev) chip->parent = dev; chip->owner = THIS_MODULE; chip->get = int0002_gpio_get; - chip->set_rv = int0002_gpio_set; + chip->set = int0002_gpio_set; chip->direction_input = int0002_gpio_get; chip->direction_output = int0002_gpio_direction_output; chip->base = -1; diff --git a/drivers/platform/x86/portwell-ec.c b/drivers/platform/x86/portwell-ec.c index 3e019c51913e..322f296e9315 100644 --- a/drivers/platform/x86/portwell-ec.c +++ b/drivers/platform/x86/portwell-ec.c @@ -86,7 +86,7 @@ static int pwec_gpio_get(struct gpio_chip *chip, unsigned int offset) return pwec_read(PORTWELL_GPIO_VAL_REG) & BIT(offset) ? 1 : 0; } -static int pwec_gpio_set_rv(struct gpio_chip *chip, unsigned int offset, int val) +static int pwec_gpio_set(struct gpio_chip *chip, unsigned int offset, int val) { u8 tmp = pwec_read(PORTWELL_GPIO_VAL_REG); @@ -130,7 +130,7 @@ static struct gpio_chip pwec_gpio_chip = { .direction_input = pwec_gpio_direction_input, .direction_output = pwec_gpio_direction_output, .get = pwec_gpio_get, - .set_rv = pwec_gpio_set_rv, + .set = pwec_gpio_set, .base = -1, .ngpio = PORTWELL_GPIO_PINS, }; diff --git a/drivers/platform/x86/silicom-platform.c b/drivers/platform/x86/silicom-platform.c index 63b5da410ed5..266f7bc5e416 100644 --- a/drivers/platform/x86/silicom-platform.c +++ b/drivers/platform/x86/silicom-platform.c @@ -466,7 +466,7 @@ static struct gpio_chip silicom_gpio_chip = { .direction_input = silicom_gpio_direction_input, .direction_output = silicom_gpio_direction_output, .get = silicom_gpio_get, - .set_rv = silicom_gpio_set, + .set = silicom_gpio_set, .base = -1, .ngpio = ARRAY_SIZE(plat_0222_gpio_channels), .names = plat_0222_gpio_names, diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index eb03ccd5b688..9ce75704a15f 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -323,7 +323,7 @@ static int pca9685_pwm_gpio_probe(struct pwm_chip *chip) pca->gpio.direction_input = pca9685_pwm_gpio_direction_input; pca->gpio.direction_output = pca9685_pwm_gpio_direction_output; pca->gpio.get = pca9685_pwm_gpio_get; - pca->gpio.set_rv = pca9685_pwm_gpio_set; + pca->gpio.set = pca9685_pwm_gpio_set; pca->gpio.base = -1; pca->gpio.ngpio = PCA9685_MAXCHAN; pca->gpio.can_sleep = true; diff --git a/drivers/regulator/rpi-panel-attiny-regulator.c b/drivers/regulator/rpi-panel-attiny-regulator.c index 58dbf8bffa5d..3020839b9ef1 100644 --- a/drivers/regulator/rpi-panel-attiny-regulator.c +++ b/drivers/regulator/rpi-panel-attiny-regulator.c @@ -351,7 +351,7 @@ static int attiny_i2c_probe(struct i2c_client *i2c) state->gc.base = -1; state->gc.ngpio = NUM_GPIO; - state->gc.set_rv = attiny_gpio_set; + state->gc.set = attiny_gpio_set; state->gc.get_direction = attiny_gpio_get_direction; state->gc.can_sleep = true; diff --git a/drivers/soc/fsl/qe/gpio.c b/drivers/soc/fsl/qe/gpio.c index 710a3a03758b..8df1e8fa86a5 100644 --- a/drivers/soc/fsl/qe/gpio.c +++ b/drivers/soc/fsl/qe/gpio.c @@ -321,8 +321,8 @@ static int __init qe_add_gpiochips(void) gc->direction_input = qe_gpio_dir_in; gc->direction_output = qe_gpio_dir_out; gc->get = qe_gpio_get; - gc->set_rv = qe_gpio_set; - gc->set_multiple_rv = qe_gpio_set_multiple; + gc->set = qe_gpio_set; + gc->set_multiple = qe_gpio_set_multiple; ret = of_mm_gpiochip_add_data(np, mm_gc, qe_gc); if (ret) diff --git a/drivers/soc/renesas/pwc-rzv2m.c b/drivers/soc/renesas/pwc-rzv2m.c index 4dbcb3d4a90c..6209168b3734 100644 --- a/drivers/soc/renesas/pwc-rzv2m.c +++ b/drivers/soc/renesas/pwc-rzv2m.c @@ -64,7 +64,7 @@ static const struct gpio_chip rzv2m_pwc_gc = { .label = "gpio_rzv2m_pwc", .owner = THIS_MODULE, .get = rzv2m_pwc_gpio_get, - .set_rv = rzv2m_pwc_gpio_set, + .set = rzv2m_pwc_gpio_set, .direction_output = rzv2m_pwc_gpio_direction_output, .can_sleep = false, .ngpio = 2, diff --git a/drivers/spi/spi-xcomm.c b/drivers/spi/spi-xcomm.c index 1a40c4866ce1..33b78c537520 100644 --- a/drivers/spi/spi-xcomm.c +++ b/drivers/spi/spi-xcomm.c @@ -70,7 +70,7 @@ static int spi_xcomm_gpio_add(struct spi_xcomm *spi_xcomm) return 0; spi_xcomm->gc.get_direction = spi_xcomm_gpio_get_direction; - spi_xcomm->gc.set_rv = spi_xcomm_gpio_set_value; + spi_xcomm->gc.set = spi_xcomm_gpio_set_value; spi_xcomm->gc.can_sleep = 1; spi_xcomm->gc.base = -1; spi_xcomm->gc.ngpio = 1; diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c index e1f5f0a9c8a2..905657c925bc 100644 --- a/drivers/ssb/driver_gpio.c +++ b/drivers/ssb/driver_gpio.c @@ -225,7 +225,7 @@ static int ssb_gpio_chipco_init(struct ssb_bus *bus) chip->request = ssb_gpio_chipco_request; chip->free = ssb_gpio_chipco_free; chip->get = ssb_gpio_chipco_get_value; - chip->set_rv = ssb_gpio_chipco_set_value; + chip->set = ssb_gpio_chipco_set_value; chip->direction_input = ssb_gpio_chipco_direction_input; chip->direction_output = ssb_gpio_chipco_direction_output; #if IS_ENABLED(CONFIG_SSB_EMBEDDED) @@ -422,7 +422,7 @@ static int ssb_gpio_extif_init(struct ssb_bus *bus) chip->label = "ssb_extif_gpio"; chip->owner = THIS_MODULE; chip->get = ssb_gpio_extif_get_value; - chip->set_rv = ssb_gpio_extif_set_value; + chip->set = ssb_gpio_extif_set_value; chip->direction_input = ssb_gpio_extif_direction_input; chip->direction_output = ssb_gpio_extif_direction_output; #if IS_ENABLED(CONFIG_SSB_EMBEDDED) diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c index 1280530c8987..ac62b932e6a4 100644 --- a/drivers/staging/greybus/gpio.c +++ b/drivers/staging/greybus/gpio.c @@ -551,7 +551,7 @@ static int gb_gpio_probe(struct gbphy_device *gbphy_dev, gpio->direction_input = gb_gpio_direction_input; gpio->direction_output = gb_gpio_direction_output; gpio->get = gb_gpio_get; - gpio->set_rv = gb_gpio_set; + gpio->set = gb_gpio_set; gpio->set_config = gb_gpio_set_config; gpio->base = -1; /* Allocate base dynamically */ gpio->ngpio = ggc->line_max + 1; diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 541c790c0109..ce260e9949c3 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1414,7 +1414,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty s->gpio.direction_input = max310x_gpio_direction_input; s->gpio.get = max310x_gpio_get; s->gpio.direction_output= max310x_gpio_direction_output; - s->gpio.set_rv = max310x_gpio_set; + s->gpio.set = max310x_gpio_set; s->gpio.set_config = max310x_gpio_set_config; s->gpio.base = -1; s->gpio.ngpio = devtype->nr * 4; diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 5ea8aadb6e69..3f38fba8f6ea 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1425,7 +1425,7 @@ static int sc16is7xx_setup_gpio_chip(struct sc16is7xx_port *s) s->gpio.direction_input = sc16is7xx_gpio_direction_input; s->gpio.get = sc16is7xx_gpio_get; s->gpio.direction_output = sc16is7xx_gpio_direction_output; - s->gpio.set_rv = sc16is7xx_gpio_set; + s->gpio.set = sc16is7xx_gpio_set; s->gpio.base = -1; s->gpio.ngpio = s->devtype->nr_gpio; s->gpio.can_sleep = 1; diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index cfa1d68c7919..36b25418b214 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -1962,7 +1962,7 @@ static int cp210x_gpio_init(struct usb_serial *serial) priv->gc.direction_input = cp210x_gpio_direction_input; priv->gc.direction_output = cp210x_gpio_direction_output; priv->gc.get = cp210x_gpio_get; - priv->gc.set_rv = cp210x_gpio_set; + priv->gc.set = cp210x_gpio_set; priv->gc.set_config = cp210x_gpio_set_config; priv->gc.init_valid_mask = cp210x_gpio_init_valid_mask; priv->gc.owner = THIS_MODULE; diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 7737285a84ba..49666c33b41f 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -2150,9 +2150,9 @@ static int ftdi_gpio_init(struct usb_serial_port *port) priv->gc.direction_output = ftdi_gpio_direction_output; priv->gc.init_valid_mask = ftdi_gpio_init_valid_mask; priv->gc.get = ftdi_gpio_get; - priv->gc.set_rv = ftdi_gpio_set; + priv->gc.set = ftdi_gpio_set; priv->gc.get_multiple = ftdi_gpio_get_multiple; - priv->gc.set_multiple_rv = ftdi_gpio_set_multiple; + priv->gc.set_multiple = ftdi_gpio_set_multiple; priv->gc.owner = THIS_MODULE; priv->gc.parent = &serial->interface->dev; priv->gc.base = -1; diff --git a/drivers/video/fbdev/via/via-gpio.c b/drivers/video/fbdev/via/via-gpio.c index 72302384bf77..45c0a4a6f85c 100644 --- a/drivers/video/fbdev/via/via-gpio.c +++ b/drivers/video/fbdev/via/via-gpio.c @@ -145,7 +145,7 @@ static struct viafb_gpio_cfg viafb_gpio_config = { .label = "VIAFB onboard GPIO", .owner = THIS_MODULE, .direction_output = via_gpio_dir_out, - .set_rv = via_gpio_set, + .set = via_gpio_set, .direction_input = via_gpio_dir_input, .get = via_gpio_get, .base = -1, diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 90567dde7d8e..667f8fd58a79 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -347,10 +347,10 @@ struct gpio_irq_chip { * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error - * @set_rv: assigns output value for signal "offset", returns 0 on success or - * negative error value - * @set_multiple_rv: assigns output values for multiple signals defined by - * "mask", returns 0 on success or negative error value + * @set: assigns output value for signal "offset", returns 0 on success or + * negative error value + * @set_multiple: assigns output values for multiple signals defined by + * "mask", returns 0 on success or negative error value * @set_config: optional hook for all kinds of settings. Uses the same * packed config format as generic pinconf. Must return 0 on success and * a negative error number on failure. @@ -443,12 +443,11 @@ struct gpio_chip { int (*get_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); - int (*set_rv)(struct gpio_chip *gc, - unsigned int offset, - int value); - int (*set_multiple_rv)(struct gpio_chip *gc, - unsigned long *mask, - unsigned long *bits); + int (*set)(struct gpio_chip *gc, + unsigned int offset, int value); + int (*set_multiple)(struct gpio_chip *gc, + unsigned long *mask, + unsigned long *bits); int (*set_config)(struct gpio_chip *gc, unsigned int offset, unsigned long config); diff --git a/include/linux/gpio/generic.h b/include/linux/gpio/generic.h index b511acd58ab0..f3a8db4598bb 100644 --- a/include/linux/gpio/generic.h +++ b/include/linux/gpio/generic.h @@ -88,10 +88,10 @@ static inline int gpio_generic_chip_set(struct gpio_generic_chip *chip, unsigned int offset, int value) { - if (WARN_ON(!chip->gc.set_rv)) + if (WARN_ON(!chip->gc.set)) return -EOPNOTSUPP; - return chip->gc.set_rv(&chip->gc, offset, value); + return chip->gc.set(&chip->gc, offset, value); } #define gpio_generic_chip_lock(gen_gc) \ diff --git a/sound/hda/codecs/side-codecs/cirrus_scodec_test.c b/sound/hda/codecs/side-codecs/cirrus_scodec_test.c index 93b9cbf1f08a..9ba14c09c07f 100644 --- a/sound/hda/codecs/side-codecs/cirrus_scodec_test.c +++ b/sound/hda/codecs/side-codecs/cirrus_scodec_test.c @@ -86,7 +86,7 @@ static const struct gpio_chip cirrus_scodec_test_gpio_chip = { .direction_input = cirrus_scodec_test_gpio_direction_in, .get = cirrus_scodec_test_gpio_get, .direction_output = cirrus_scodec_test_gpio_direction_out, - .set_rv = cirrus_scodec_test_gpio_set, + .set = cirrus_scodec_test_gpio_set, .set_config = cirrus_scodec_test_gpio_set_config, .base = -1, .ngpio = 32, diff --git a/sound/soc/codecs/idt821034.c b/sound/soc/codecs/idt821034.c index 55e90604bbaa..6738cf21983b 100644 --- a/sound/soc/codecs/idt821034.c +++ b/sound/soc/codecs/idt821034.c @@ -1117,7 +1117,7 @@ static int idt821034_gpio_init(struct idt821034 *idt821034) idt821034->gpio_chip.direction_input = idt821034_chip_direction_input; idt821034->gpio_chip.direction_output = idt821034_chip_direction_output; idt821034->gpio_chip.get = idt821034_chip_gpio_get; - idt821034->gpio_chip.set_rv = idt821034_chip_gpio_set; + idt821034->gpio_chip.set = idt821034_chip_gpio_set; idt821034->gpio_chip.can_sleep = true; return devm_gpiochip_add_data(&idt821034->spi->dev, &idt821034->gpio_chip, diff --git a/sound/soc/codecs/peb2466.c b/sound/soc/codecs/peb2466.c index b8905c03445e..c0c5b3c3e98b 100644 --- a/sound/soc/codecs/peb2466.c +++ b/sound/soc/codecs/peb2466.c @@ -1945,7 +1945,7 @@ static int peb2466_gpio_init(struct peb2466 *peb2466) peb2466->gpio.gpio_chip.direction_input = peb2466_chip_direction_input; peb2466->gpio.gpio_chip.direction_output = peb2466_chip_direction_output; peb2466->gpio.gpio_chip.get = peb2466_chip_gpio_get; - peb2466->gpio.gpio_chip.set_rv = peb2466_chip_gpio_set; + peb2466->gpio.gpio_chip.set = peb2466_chip_gpio_set; peb2466->gpio.gpio_chip.can_sleep = true; return devm_gpiochip_add_data(&peb2466->spi->dev, &peb2466->gpio.gpio_chip, diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index 69a0fb8d7f77..6b6c690a9e45 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -4835,7 +4835,7 @@ static const struct gpio_chip rt5677_template_chip = { .label = RT5677_DRV_NAME, .owner = THIS_MODULE, .direction_output = rt5677_gpio_direction_out, - .set_rv = rt5677_gpio_set, + .set = rt5677_gpio_set, .direction_input = rt5677_gpio_direction_in, .get = rt5677_gpio_get, .to_irq = rt5677_to_irq, diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c index 1035ba17dc5d..258fbcaf345a 100644 --- a/sound/soc/codecs/tlv320adc3xxx.c +++ b/sound/soc/codecs/tlv320adc3xxx.c @@ -1052,7 +1052,7 @@ static const struct gpio_chip adc3xxx_gpio_chip = { .owner = THIS_MODULE, .request = adc3xxx_gpio_request, .direction_output = adc3xxx_gpio_direction_out, - .set_rv = adc3xxx_gpio_set, + .set = adc3xxx_gpio_set, .get = adc3xxx_gpio_get, .can_sleep = 1, }; diff --git a/sound/soc/codecs/wm5100.c b/sound/soc/codecs/wm5100.c index fb5ed4ba7f60..2d0a20f2fd8c 100644 --- a/sound/soc/codecs/wm5100.c +++ b/sound/soc/codecs/wm5100.c @@ -2290,7 +2290,7 @@ static const struct gpio_chip wm5100_template_chip = { .label = "wm5100", .owner = THIS_MODULE, .direction_output = wm5100_gpio_direction_out, - .set_rv = wm5100_gpio_set, + .set = wm5100_gpio_set, .direction_input = wm5100_gpio_direction_in, .get = wm5100_gpio_get, .can_sleep = 1, diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index 2ed9f493d507..f7d726e3052c 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -1843,7 +1843,7 @@ static const struct gpio_chip wm8903_template_chip = { .direction_input = wm8903_gpio_direction_in, .get = wm8903_gpio_get, .direction_output = wm8903_gpio_direction_out, - .set_rv = wm8903_gpio_set, + .set = wm8903_gpio_set, .can_sleep = 1, }; diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index d69aa8b15629..7bf6b88c056c 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -3442,7 +3442,7 @@ static const struct gpio_chip wm8962_template_chip = { .owner = THIS_MODULE, .request = wm8962_gpio_request, .direction_output = wm8962_gpio_direction_out, - .set_rv = wm8962_gpio_set, + .set = wm8962_gpio_set, .can_sleep = 1, }; diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c index e364d0da9044..459b39998307 100644 --- a/sound/soc/codecs/wm8996.c +++ b/sound/soc/codecs/wm8996.c @@ -2186,7 +2186,7 @@ static const struct gpio_chip wm8996_template_chip = { .label = "wm8996", .owner = THIS_MODULE, .direction_output = wm8996_gpio_direction_out, - .set_rv = wm8996_gpio_set, + .set = wm8996_gpio_set, .direction_input = wm8996_gpio_direction_in, .get = wm8996_gpio_get, .can_sleep = 1, diff --git a/sound/soc/codecs/zl38060.c b/sound/soc/codecs/zl38060.c index 180d45a349ac..7de4014e626d 100644 --- a/sound/soc/codecs/zl38060.c +++ b/sound/soc/codecs/zl38060.c @@ -440,7 +440,7 @@ static const struct gpio_chip template_chip = { .direction_input = chip_direction_input, .direction_output = chip_direction_output, .get = chip_gpio_get, - .set_rv = chip_gpio_set, + .set = chip_gpio_set, .can_sleep = true, }; diff --git a/sound/soc/soc-ac97.c b/sound/soc/soc-ac97.c index 29790807d785..37486d6a438e 100644 --- a/sound/soc/soc-ac97.c +++ b/sound/soc/soc-ac97.c @@ -125,7 +125,7 @@ static const struct gpio_chip snd_soc_ac97_gpio_chip = { .direction_input = snd_soc_ac97_gpio_direction_in, .get = snd_soc_ac97_gpio_get, .direction_output = snd_soc_ac97_gpio_direction_out, - .set_rv = snd_soc_ac97_gpio_set, + .set = snd_soc_ac97_gpio_set, .can_sleep = 1, }; diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c index caf1887cc9d1..621a9d5f9377 100644 --- a/sound/soc/ti/davinci-mcasp.c +++ b/sound/soc/ti/davinci-mcasp.c @@ -2218,7 +2218,7 @@ static const struct gpio_chip davinci_mcasp_template_chip = { .request = davinci_mcasp_gpio_request, .free = davinci_mcasp_gpio_free, .direction_output = davinci_mcasp_gpio_direction_out, - .set_rv = davinci_mcasp_gpio_set, + .set = davinci_mcasp_gpio_set, .direction_input = davinci_mcasp_gpio_direction_in, .get = davinci_mcasp_gpio_get, .get_direction = davinci_mcasp_gpio_get_direction, From f752adfaf5f7d796007f9c1a867b9bdccc15cc2c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 25 Jul 2025 14:31:34 +0200 Subject: [PATCH 184/279] MAINTAINERS: resurrect my netfilter maintainer entry This reverts commit b5048d27872a9734d142540ea23c3e897e47e05c. Its been more than a year, hope my motivation lasts a bit longer than last time :-) Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index b968bc6959d1..cd9415702b28 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17313,6 +17313,7 @@ F: drivers/net/ethernet/neterion/ NETFILTER M: Pablo Neira Ayuso M: Jozsef Kadlecsik +M: Florian Westphal L: netfilter-devel@vger.kernel.org L: coreteam@netfilter.org S: Maintained From 25a8b88f000c33a1d580c317e93e40b953dc2fa5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Jul 2025 23:45:32 +0200 Subject: [PATCH 185/279] netfilter: add back NETFILTER_XTABLES dependencies Some Kconfig symbols were changed to depend on the 'bool' symbol NETFILTER_XTABLES_LEGACY, which means they can now be set to built-in when the xtables code itself is in a loadable module: x86_64-linux-ld: vmlinux.o: in function `arpt_unregister_table_pre_exit': (.text+0x1831987): undefined reference to `xt_find_table' x86_64-linux-ld: vmlinux.o: in function `get_info.constprop.0': arp_tables.c:(.text+0x1831aab): undefined reference to `xt_request_find_table_lock' x86_64-linux-ld: arp_tables.c:(.text+0x1831bea): undefined reference to `xt_table_unlock' x86_64-linux-ld: vmlinux.o: in function `do_arpt_get_ctl': arp_tables.c:(.text+0x183205d): undefined reference to `xt_find_table_lock' x86_64-linux-ld: arp_tables.c:(.text+0x18320c1): undefined reference to `xt_table_unlock' x86_64-linux-ld: arp_tables.c:(.text+0x183219a): undefined reference to `xt_recseq' Change these to depend on both NETFILTER_XTABLES and NETFILTER_XTABLES_LEGACY. Fixes: 9fce66583f06 ("netfilter: Exclude LEGACY TABLES on PREEMPT_RT.") Signed-off-by: Arnd Bergmann Acked-by: Florian Westphal Tested-by: Breno Leitao Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/Kconfig | 1 + net/ipv4/netfilter/Kconfig | 3 +++ net/ipv6/netfilter/Kconfig | 1 + 3 files changed, 5 insertions(+) diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 60f28e4fb5c0..4fd5a6ea26b4 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -43,6 +43,7 @@ config NF_CONNTRACK_BRIDGE config BRIDGE_NF_EBTABLES_LEGACY tristate "Legacy EBTABLES support" depends on BRIDGE && NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES default n help Legacy ebtables packet/frame classifier. diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 2c438b140e88..7dc9772fe2d8 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -14,6 +14,7 @@ config NF_DEFRAG_IPV4 config IP_NF_IPTABLES_LEGACY tristate "Legacy IP tables support" depends on NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES default m if NETFILTER_XTABLES_LEGACY help iptables is a legacy packet classifier. @@ -326,6 +327,7 @@ endif # IP_NF_IPTABLES config IP_NF_ARPTABLES tristate "Legacy ARPTABLES support" depends on NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES default n help arptables is a legacy packet classifier. @@ -343,6 +345,7 @@ config IP_NF_ARPFILTER select IP_NF_ARPTABLES select NETFILTER_FAMILY_ARP depends on NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES help ARP packet filtering defines a table `filter', which has a series of rules for simple ARP packet filtering at local input and diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 276860f65baa..81daf82ddc2d 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -10,6 +10,7 @@ menu "IPv6: Netfilter Configuration" config IP6_NF_IPTABLES_LEGACY tristate "Legacy IP6 tables support" depends on INET && IPV6 && NETFILTER_XTABLES_LEGACY + depends on NETFILTER_XTABLES default m if NETFILTER_XTABLES_LEGACY help ip6tables is a legacy packet classifier. From de788b2e6227462b6dcd0e07474e72c089008f74 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Aug 2025 17:25:08 +0200 Subject: [PATCH 186/279] netfilter: ctnetlink: fix refcount leak on table dump There is a reference count leak in ctnetlink_dump_table(): if (res < 0) { nf_conntrack_get(&ct->ct_general); // HERE cb->args[1] = (unsigned long)ct; ... While its very unlikely, its possible that ct == last. If this happens, then the refcount of ct was already incremented. This 2nd increment is never undone. This prevents the conntrack object from being released, which in turn keeps prevents cnet->count from dropping back to 0. This will then block the netns dismantle (or conntrack rmmod) as nf_conntrack_cleanup_net_list() will wait forever. This can be reproduced by running conntrack_resize.sh selftest in a loop. It takes ~20 minutes for me on a preemptible kernel on average before I see a runaway kworker spinning in nf_conntrack_cleanup_net_list. One fix would to change this to: if (res < 0) { if (ct != last) nf_conntrack_get(&ct->ct_general); But this reference counting isn't needed in the first place. We can just store a cookie value instead. A followup patch will do the same for ctnetlink_exp_dump_table, it looks to me as if this has the same problem and like ctnetlink_dump_table, we only need a 'skip hint', not the actual object so we can apply the same cookie strategy there as well. Fixes: d205dc40798d ("[NETFILTER]: ctnetlink: fix deadlock in table dumping") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 486d52b45fe5..f403acd82437 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -884,8 +884,6 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) static int ctnetlink_done(struct netlink_callback *cb) { - if (cb->args[1]) - nf_ct_put((struct nf_conn *)cb->args[1]); kfree(cb->data); return 0; } @@ -1208,19 +1206,26 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) return 0; } +static unsigned long ctnetlink_get_id(const struct nf_conn *ct) +{ + unsigned long id = nf_ct_get_id(ct); + + return id ? id : 1; +} + static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { unsigned int flags = cb->data ? NLM_F_DUMP_FILTERED : 0; struct net *net = sock_net(skb->sk); - struct nf_conn *ct, *last; + unsigned long last_id = cb->args[1]; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct nf_conn *nf_ct_evict[8]; + struct nf_conn *ct; int res, i; spinlock_t *lockp; - last = (struct nf_conn *)cb->args[1]; i = 0; local_bh_disable(); @@ -1257,7 +1262,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) continue; if (cb->args[1]) { - if (ct != last) + if (ctnetlink_get_id(ct) != last_id) continue; cb->args[1] = 0; } @@ -1270,8 +1275,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct, true, flags); if (res < 0) { - nf_conntrack_get(&ct->ct_general); - cb->args[1] = (unsigned long)ct; + cb->args[1] = ctnetlink_get_id(ct); spin_unlock(lockp); goto out; } @@ -1284,12 +1288,10 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) } out: local_bh_enable(); - if (last) { + if (last_id) { /* nf ct hash resize happened, now clear the leftover. */ - if ((struct nf_conn *)cb->args[1] == last) + if (cb->args[1] == last_id) cb->args[1] = 0; - - nf_ct_put(last); } while (i) { From 1492e3dcb2be3aa46d1963da96aa9593e4e4db5a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Aug 2025 17:25:09 +0200 Subject: [PATCH 187/279] netfilter: ctnetlink: remove refcounting in expectation dumpers Same pattern as previous patch: do not keep the expectation object alive via refcount, only store a cookie value and then use that as the skip hint for dump resumption. AFAICS this has the same issue as the one resolved in the conntrack dumper, when we do if (!refcount_inc_not_zero(&exp->use)) to increment the refcount, there is a chance that exp == last, which causes a double-increment of the refcount and subsequent memory leak. Fixes: cf6994c2b981 ("[NETFILTER]: nf_conntrack_netlink: sync expectation dumping with conntrack table dumping") Fixes: e844a928431f ("netfilter: ctnetlink: allow to dump expectation per master conntrack") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 41 ++++++++++++---------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index f403acd82437..50fd6809380f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3170,23 +3170,27 @@ ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item) return 0; } #endif -static int ctnetlink_exp_done(struct netlink_callback *cb) + +static unsigned long ctnetlink_exp_id(const struct nf_conntrack_expect *exp) { - if (cb->args[1]) - nf_ct_expect_put((struct nf_conntrack_expect *)cb->args[1]); - return 0; + unsigned long id = (unsigned long)exp; + + id += nf_ct_get_id(exp->master); + id += exp->class; + + return id ? id : 1; } static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; + unsigned long last_id = cb->args[1]; + struct nf_conntrack_expect *exp; rcu_read_lock(); - last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: hlist_for_each_entry_rcu(exp, &nf_ct_expect_hash[cb->args[0]], @@ -3198,7 +3202,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) continue; if (cb->args[1]) { - if (exp != last) + if (ctnetlink_exp_id(exp) != last_id) continue; cb->args[1] = 0; } @@ -3207,9 +3211,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { - if (!refcount_inc_not_zero(&exp->use)) - continue; - cb->args[1] = (unsigned long)exp; + cb->args[1] = ctnetlink_exp_id(exp); goto out; } } @@ -3220,32 +3222,30 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) } out: rcu_read_unlock(); - if (last) - nf_ct_expect_put(last); - return skb->len; } static int ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { - struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct nf_conn *ct = cb->data; struct nf_conn_help *help = nfct_help(ct); u_int8_t l3proto = nfmsg->nfgen_family; + unsigned long last_id = cb->args[1]; + struct nf_conntrack_expect *exp; if (cb->args[0]) return 0; rcu_read_lock(); - last = (struct nf_conntrack_expect *)cb->args[1]; + restart: hlist_for_each_entry_rcu(exp, &help->expectations, lnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; if (cb->args[1]) { - if (exp != last) + if (ctnetlink_exp_id(exp) != last_id) continue; cb->args[1] = 0; } @@ -3253,9 +3253,7 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { - if (!refcount_inc_not_zero(&exp->use)) - continue; - cb->args[1] = (unsigned long)exp; + cb->args[1] = ctnetlink_exp_id(exp); goto out; } } @@ -3266,9 +3264,6 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0] = 1; out: rcu_read_unlock(); - if (last) - nf_ct_expect_put(last); - return skb->len; } @@ -3287,7 +3282,6 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl, struct nf_conntrack_zone zone; struct netlink_dump_control c = { .dump = ctnetlink_exp_ct_dump_table, - .done = ctnetlink_exp_done, }; err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, @@ -3337,7 +3331,6 @@ static int ctnetlink_get_expect(struct sk_buff *skb, else { struct netlink_dump_control c = { .dump = ctnetlink_exp_dump_table, - .done = ctnetlink_exp_done, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } From c8a7c2c608180f3b4e51dc958b3861242dcdd76d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Aug 2025 12:10:41 +0200 Subject: [PATCH 188/279] netfilter: nft_set_pipapo: don't return bogus extension pointer Dan Carpenter says: Commit 17a20e09f086 ("netfilter: nft_set: remove one argument from lookup and update functions") [..] leads to the following Smatch static checker warning: net/netfilter/nft_set_pipapo_avx2.c:1269 nft_pipapo_avx2_lookup() error: uninitialized symbol 'ext'. Fix this by initing ext to NULL and set it only once we've found a match. Fixes: 17a20e09f086 ("netfilter: nft_set: remove one argument from lookup and update functions") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/netfilter-devel/aJBzc3V5wk-yPOnH@stanley.mountain/ Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo_avx2.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index db5d367e43c4..2f090e253caf 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1150,12 +1150,12 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key) { struct nft_pipapo *priv = nft_set_priv(set); + const struct nft_set_ext *ext = NULL; struct nft_pipapo_scratch *scratch; u8 genmask = nft_genmask_cur(net); const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; const u8 *rp = (const u8 *)key; - const struct nft_set_ext *ext; unsigned long *res, *fill; bool map_index; int i; @@ -1246,13 +1246,13 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, goto out; if (last) { - ext = &f->mt[ret].e->ext; - if (unlikely(nft_set_elem_expired(ext) || - !nft_set_elem_active(ext, genmask))) { - ext = NULL; - goto next_match; - } + const struct nft_set_ext *e = &f->mt[ret].e->ext; + if (unlikely(nft_set_elem_expired(e) || + !nft_set_elem_active(e, genmask))) + goto next_match; + + ext = e; goto out; } From f54186df806fb1e9cb262d553f4ff942f9467cf1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Aug 2025 13:35:15 +0300 Subject: [PATCH 189/279] netfilter: conntrack: clean up returns in nf_conntrack_log_invalid_sysctl() Smatch complains that these look like error paths with missing error codes, especially the one where we return if nf_log_is_registered() is true: net/netfilter/nf_conntrack_standalone.c:575 nf_conntrack_log_invalid_sysctl() warn: missing error code? 'ret' In fact, all these return zero deliberately. Change them to return a literal instead which helps readability as well as silencing the warning. Fixes: e89a68046687 ("netfilter: load nf_log_syslog on enabling nf_conntrack_log_invalid") Signed-off-by: Dan Carpenter Acked-by: Lance Yang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 9b8b10a85233..1f14ef0436c6 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -567,16 +567,16 @@ nf_conntrack_log_invalid_sysctl(const struct ctl_table *table, int write, return ret; if (*(u8 *)table->data == 0) - return ret; + return 0; /* Load nf_log_syslog only if no logger is currently registered */ for (i = 0; i < NFPROTO_NUMPROTO; i++) { if (nf_log_is_registered(i)) - return ret; + return 0; } request_module("%s", "nf_log_syslog"); - return ret; + return 0; } static struct ctl_table_header *nf_ct_netfilter_header; From 1dee968d22eaeb3eede70df513ab3f8dd1712e3e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Aug 2025 12:02:42 +0200 Subject: [PATCH 190/279] netfilter: nft_socket: remove WARN_ON_ONCE with huge level value syzbot managed to reach this WARN_ON_ONCE by passing a huge level value, remove it. WARNING: CPU: 0 PID: 5853 at net/netfilter/nft_socket.c:220 nft_socket_init+0x2f4/0x3d0 net/netfilter/nft_socket.c:220 Reported-by: syzbot+a225fea35d7baf8dbdc3@syzkaller.appspotmail.com Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 35d0409b0095..36affbb697c2 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -217,7 +217,7 @@ static int nft_socket_init(const struct nft_ctx *ctx, level += err; /* Implies a giant cgroup tree */ - if (WARN_ON_ONCE(level > 255)) + if (level > 255) return -EOPNOTSUPP; priv->level = level; From 80f21806b8e34ae1e24c0fc6a0f0dfd9b055e130 Mon Sep 17 00:00:00 2001 From: Mohamed Khalfella Date: Wed, 6 Aug 2025 22:35:07 -0700 Subject: [PATCH 191/279] nvmet: exit debugfs after discovery subsystem exits Commit 528589947c180 ("nvmet: initialize discovery subsys after debugfs is initialized") changed nvmet_init() to initialize nvme discovery after "nvmet" debugfs directory is initialized. The change broke nvmet_exit() because discovery subsystem now depends on debugfs. Debugfs should be destroyed after discovery subsystem. Fix nvmet_exit() to do that. Reported-by: Yi Zhang Closes: https://lore.kernel.org/all/CAHj4cs96AfFQpyDKF_MdfJsnOEo=2V7dQgqjFv+k3t7H-=yGhA@mail.gmail.com/ Fixes: 528589947c180 ("nvmet: initialize discovery subsys after debugfs is initialized") Signed-off-by: Mohamed Khalfella Reviewed-by: Hannes Reinecke Reviewed-by: Daniel Wagner Link: https://lore.kernel.org/r/20250807053507.2794335-1-mkhalfella@purestorage.com Signed-off-by: Jens Axboe --- drivers/nvme/target/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 83f3d2f8ef2d..0dd7bd99afa3 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1992,8 +1992,8 @@ static int __init nvmet_init(void) static void __exit nvmet_exit(void) { nvmet_exit_configfs(); - nvmet_exit_debugfs(); nvmet_exit_discovery(); + nvmet_exit_debugfs(); ida_destroy(&cntlid_ida); destroy_workqueue(nvmet_wq); destroy_workqueue(buffered_io_wq); From 42e6c6ce03fd3e41e39a0f93f9b1a1d9fa664338 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 7 Aug 2025 11:24:12 +0800 Subject: [PATCH 192/279] lib/sbitmap: convert shallow_depth from one word to the whole sbitmap Currently elevators will record internal 'async_depth' to throttle asynchronous requests, and they both calculate shallow_dpeth based on sb->shift, with the respect that sb->shift is the available tags in one word. However, sb->shift is not the availbale tags in the last word, see __map_depth: if (index == sb->map_nr - 1) return sb->depth - (index << sb->shift); For consequence, if the last word is used, more tags can be get than expected, for example, assume nr_requests=256 and there are four words, in the worst case if user set nr_requests=32, then the first word is the last word, and still use bits per word, which is 64, to calculate async_depth is wrong. One the ohter hand, due to cgroup qos, bfq can allow only one request to be allocated, and set shallow_dpeth=1 will still allow the number of words request to be allocated. Fix this problems by using shallow_depth to the whole sbitmap instead of per word, also change kyber, mq-deadline and bfq to follow this, a new helper __map_depth_with_shallow() is introduced to calculate available bits in each word. Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20250807032413.1469456-2-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 35 ++++++++++++-------------- block/bfq-iosched.h | 3 +-- block/kyber-iosched.c | 9 ++----- block/mq-deadline.c | 16 +----------- include/linux/sbitmap.h | 6 ++--- lib/sbitmap.c | 56 +++++++++++++++++++++-------------------- 6 files changed, 52 insertions(+), 73 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index aca9886c9ee3..3bf76902f07f 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -694,17 +694,13 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) { struct bfq_data *bfqd = data->q->elevator->elevator_data; struct bfq_io_cq *bic = bfq_bic_lookup(data->q); - int depth; - unsigned limit = data->q->nr_requests; - unsigned int act_idx; + unsigned int limit, act_idx; /* Sync reads have full depth available */ - if (op_is_sync(opf) && !op_is_write(opf)) { - depth = 0; - } else { - depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)]; - limit = (limit * depth) >> bfqd->full_depth_shift; - } + if (op_is_sync(opf) && !op_is_write(opf)) + limit = data->q->nr_requests; + else + limit = bfqd->async_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)]; for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) { /* Fast path to check if bfqq is already allocated. */ @@ -718,14 +714,16 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) * available requests and thus starve other entities. */ if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) { - depth = 1; + limit = 1; break; } } + bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", - __func__, bfqd->wr_busy_queues, op_is_sync(opf), depth); - if (depth) - data->shallow_depth = depth; + __func__, bfqd->wr_busy_queues, op_is_sync(opf), limit); + + if (limit < data->q->nr_requests) + data->shallow_depth = limit; } static struct bfq_queue * @@ -7114,9 +7112,8 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) */ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) { - unsigned int depth = 1U << bt->sb.shift; + unsigned int nr_requests = bfqd->queue->nr_requests; - bfqd->full_depth_shift = bt->sb.shift; /* * In-word depths if no bfq_queue is being weight-raised: * leaving 25% of tags only for sync reads. @@ -7128,13 +7125,13 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) * limit 'something'. */ /* no more than 50% of tags for async I/O */ - bfqd->word_depths[0][0] = max(depth >> 1, 1U); + bfqd->async_depths[0][0] = max(nr_requests >> 1, 1U); /* * no more than 75% of tags for sync writes (25% extra tags * w.r.t. async I/O, to prevent async I/O from starving sync * writes) */ - bfqd->word_depths[0][1] = max((depth * 3) >> 2, 1U); + bfqd->async_depths[0][1] = max((nr_requests * 3) >> 2, 1U); /* * In-word depths in case some bfq_queue is being weight- @@ -7144,9 +7141,9 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) * shortage. */ /* no more than ~18% of tags for async I/O */ - bfqd->word_depths[1][0] = max((depth * 3) >> 4, 1U); + bfqd->async_depths[1][0] = max((nr_requests * 3) >> 4, 1U); /* no more than ~37% of tags for sync writes (~20% extra tags) */ - bfqd->word_depths[1][1] = max((depth * 6) >> 4, 1U); + bfqd->async_depths[1][1] = max((nr_requests * 6) >> 4, 1U); } static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx) diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 0b4704932d72..34a498e6b2a5 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -813,8 +813,7 @@ struct bfq_data { * Depth limits used in bfq_limit_depth (see comments on the * function) */ - unsigned int word_depths[2][2]; - unsigned int full_depth_shift; + unsigned int async_depths[2][2]; /* * Number of independent actuators. This is equal to 1 in diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 7b6832cb3a8d..70cbc7b2deb4 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -157,10 +157,7 @@ struct kyber_queue_data { */ struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS]; - /* - * Async request percentage, converted to per-word depth for - * sbitmap_get_shallow(). - */ + /* Number of allowed async requests. */ unsigned int async_depth; struct kyber_cpu_latency __percpu *cpu_latency; @@ -447,10 +444,8 @@ static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx) { struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data; struct blk_mq_tags *tags = hctx->sched_tags; - unsigned int shift = tags->bitmap_tags.sb.shift; - - kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; + kqd->async_depth = hctx->queue->nr_requests * KYBER_ASYNC_PERCENT / 100U; sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth); } diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 7b6caf30e00a..b9b7cdf1d3c9 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -487,20 +487,6 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) return rq; } -/* - * 'depth' is a number in the range 1..INT_MAX representing a number of - * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since - * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow(). - * Values larger than q->nr_requests have the same effect as q->nr_requests. - */ -static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth) -{ - struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags; - const unsigned int nrr = hctx->queue->nr_requests; - - return ((qdepth << bt->sb.shift) + nrr - 1) / nrr; -} - /* * Called by __blk_mq_alloc_request(). The shallow_depth value set by this * function is used by __blk_mq_get_tag(). @@ -517,7 +503,7 @@ static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) * Throttle asynchronous requests and writes such that these requests * do not block the allocation of synchronous requests. */ - data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth); + data->shallow_depth = dd->async_depth; } /* Called by blk_mq_update_nr_requests(). */ diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 189140bf11fc..4adf4b364fcd 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -213,12 +213,12 @@ int sbitmap_get(struct sbitmap *sb); * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, * limiting the depth used from each word. * @sb: Bitmap to allocate from. - * @shallow_depth: The maximum number of bits to allocate from a single word. + * @shallow_depth: The maximum number of bits to allocate from the bitmap. * * This rather specific operation allows for having multiple users with * different allocation limits. E.g., there can be a high-priority class that * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() - * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority + * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority * class can only allocate half of the total bits in the bitmap, preventing it * from starving out the high-priority class. * @@ -478,7 +478,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. - * @shallow_depth: The maximum number of bits to allocate from a single word. + * @shallow_depth: The maximum number of bits to allocate from the queue. * See sbitmap_get_shallow(). * * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after diff --git a/lib/sbitmap.c b/lib/sbitmap.c index d3412984170c..c07e3cd82e29 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -208,8 +208,28 @@ static int sbitmap_find_bit_in_word(struct sbitmap_word *map, return nr; } +static unsigned int __map_depth_with_shallow(const struct sbitmap *sb, + int index, + unsigned int shallow_depth) +{ + u64 shallow_word_depth; + unsigned int word_depth, reminder; + + word_depth = __map_depth(sb, index); + if (shallow_depth >= sb->depth) + return word_depth; + + shallow_word_depth = word_depth * shallow_depth; + reminder = do_div(shallow_word_depth, sb->depth); + + if (reminder >= (index + 1) * word_depth) + shallow_word_depth++; + + return (unsigned int)shallow_word_depth; +} + static int sbitmap_find_bit(struct sbitmap *sb, - unsigned int depth, + unsigned int shallow_depth, unsigned int index, unsigned int alloc_hint, bool wrap) @@ -218,12 +238,12 @@ static int sbitmap_find_bit(struct sbitmap *sb, int nr = -1; for (i = 0; i < sb->map_nr; i++) { - nr = sbitmap_find_bit_in_word(&sb->map[index], - min_t(unsigned int, - __map_depth(sb, index), - depth), - alloc_hint, wrap); + unsigned int depth = __map_depth_with_shallow(sb, index, + shallow_depth); + if (depth) + nr = sbitmap_find_bit_in_word(&sb->map[index], depth, + alloc_hint, wrap); if (nr != -1) { nr += index << sb->shift; break; @@ -406,27 +426,9 @@ EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { - unsigned int wake_batch; - unsigned int shallow_depth; - - /* - * Each full word of the bitmap has bits_per_word bits, and there might - * be a partial word. There are depth / bits_per_word full words and - * depth % bits_per_word bits left over. In bitwise arithmetic: - * - * bits_per_word = 1 << shift - * depth / bits_per_word = depth >> shift - * depth % bits_per_word = depth & ((1 << shift) - 1) - * - * Each word can be limited to sbq->min_shallow_depth bits. - */ - shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); - depth = ((depth >> sbq->sb.shift) * shallow_depth + - min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); - wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, - SBQ_WAKE_BATCH); - - return wake_batch; + return clamp_t(unsigned int, + min(depth, sbq->min_shallow_depth) / SBQ_WAIT_QUEUES, + 1, SBQ_WAKE_BATCH); } int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, From 45fa9f97e65231a9fd4f9429489cb74c10ccd0fd Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 7 Aug 2025 11:24:13 +0800 Subject: [PATCH 193/279] lib/sbitmap: make sbitmap_get_shallow() internal Because it's only used in sbitmap.c Signed-off-by: Yu Kuai Reviewed-by: Damien Le Moal Reviewed-by: Jan Kara Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20250807032413.1469456-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 17 ----------------- lib/sbitmap.c | 18 ++++++++++++++++-- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 4adf4b364fcd..ffb9907c7070 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -209,23 +209,6 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); */ int sbitmap_get(struct sbitmap *sb); -/** - * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, - * limiting the depth used from each word. - * @sb: Bitmap to allocate from. - * @shallow_depth: The maximum number of bits to allocate from the bitmap. - * - * This rather specific operation allows for having multiple users with - * different allocation limits. E.g., there can be a high-priority class that - * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() - * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority - * class can only allocate half of the total bits in the bitmap, preventing it - * from starving out the high-priority class. - * - * Return: Non-negative allocated bit number if successful, -1 otherwise. - */ -int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth); - /** * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. * @sb: Bitmap to check. diff --git a/lib/sbitmap.c b/lib/sbitmap.c index c07e3cd82e29..4d188d05db15 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -307,7 +307,22 @@ static int __sbitmap_get_shallow(struct sbitmap *sb, return sbitmap_find_bit(sb, shallow_depth, index, alloc_hint, true); } -int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) +/** + * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, + * limiting the depth used from each word. + * @sb: Bitmap to allocate from. + * @shallow_depth: The maximum number of bits to allocate from the bitmap. + * + * This rather specific operation allows for having multiple users with + * different allocation limits. E.g., there can be a high-priority class that + * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() + * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority + * class can only allocate half of the total bits in the bitmap, preventing it + * from starving out the high-priority class. + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +static int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) { int nr; unsigned int hint, depth; @@ -322,7 +337,6 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) return nr; } -EXPORT_SYMBOL_GPL(sbitmap_get_shallow); bool sbitmap_any_bit_set(const struct sbitmap *sb) { From ad580dfa388fabb52af033e3f8cc5d04be985e54 Mon Sep 17 00:00:00 2001 From: Leo Martins Date: Mon, 21 Jul 2025 10:49:16 -0700 Subject: [PATCH 194/279] btrfs: fix subpage deadlock in try_release_subpage_extent_buffer() There is a potential deadlock that can happen in try_release_subpage_extent_buffer() because the irq-safe xarray spin lock fs_info->buffer_tree is being acquired before the irq-unsafe eb->refs_lock. This leads to the potential race: // T1 (random eb->refs user) // T2 (release folio) spin_lock(&eb->refs_lock); // interrupt end_bbio_meta_write() btrfs_meta_folio_clear_writeback() btree_release_folio() folio_test_writeback() //false try_release_extent_buffer() try_release_subpage_extent_buffer() xa_lock_irq(&fs_info->buffer_tree) spin_lock(&eb->refs_lock); // blocked; held by T1 buffer_tree_clear_mark() xas_lock_irqsave() // blocked; held by T2 I believe that the spin lock can safely be replaced by an rcu_read_lock. The xa_for_each loop does not need the spin lock as it's already internally protected by the rcu_read_lock. The extent buffer is also protected by the rcu_read_lock so it won't be freed before we take the eb->refs_lock and check the ref count. The rcu_read_lock is taken and released every iteration, just like the spin lock, which means we're not protected against concurrent insertions into the xarray. This is fine because we rely on folio->private to detect if there are any ebs remaining in the folio. There is already some precedent for this with find_extent_buffer_nolock, which loads an extent buffer from the xarray with only rcu_read_lock. lockdep warning: ===================================================== WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected 6.16.0-0_fbk701_debug_rc0_123_g4c06e63b9203 #1 Tainted: G E N ----------------------------------------------------- kswapd0/66 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: ffff000011ffd600 (&eb->refs_lock){+.+.}-{3:3}, at: try_release_extent_buffer+0x18c/0x560 and this task is already holding: ffff0000c1d91b88 (&buffer_xa_class){-.-.}-{3:3}, at: try_release_extent_buffer+0x13c/0x560 which would create a new lock dependency: (&buffer_xa_class){-.-.}-{3:3} -> (&eb->refs_lock){+.+.}-{3:3} but this new dependency connects a HARDIRQ-irq-safe lock: (&buffer_xa_class){-.-.}-{3:3} ... which became HARDIRQ-irq-safe at: lock_acquire+0x178/0x358 _raw_spin_lock_irqsave+0x60/0x88 buffer_tree_clear_mark+0xc4/0x160 end_bbio_meta_write+0x238/0x398 btrfs_bio_end_io+0x1f8/0x330 btrfs_orig_write_end_io+0x1c4/0x2c0 bio_endio+0x63c/0x678 blk_update_request+0x1c4/0xa00 blk_mq_end_request+0x54/0x88 virtblk_request_done+0x124/0x1d0 blk_mq_complete_request+0x84/0xa0 virtblk_done+0x130/0x238 vring_interrupt+0x130/0x288 __handle_irq_event_percpu+0x1e8/0x708 handle_irq_event+0x98/0x1b0 handle_fasteoi_irq+0x264/0x7c0 generic_handle_domain_irq+0xa4/0x108 gic_handle_irq+0x7c/0x1a0 do_interrupt_handler+0xe4/0x148 el1_interrupt+0x30/0x50 el1h_64_irq_handler+0x14/0x20 el1h_64_irq+0x6c/0x70 _raw_spin_unlock_irq+0x38/0x70 __run_timer_base+0xdc/0x5e0 run_timer_softirq+0xa0/0x138 handle_softirqs.llvm.13542289750107964195+0x32c/0xbd0 ____do_softirq.llvm.17674514681856217165+0x18/0x28 call_on_irq_stack+0x24/0x30 __irq_exit_rcu+0x164/0x430 irq_exit_rcu+0x18/0x88 el1_interrupt+0x34/0x50 el1h_64_irq_handler+0x14/0x20 el1h_64_irq+0x6c/0x70 arch_local_irq_enable+0x4/0x8 do_idle+0x1a0/0x3b8 cpu_startup_entry+0x60/0x80 rest_init+0x204/0x228 start_kernel+0x394/0x3f0 __primary_switched+0x8c/0x8958 to a HARDIRQ-irq-unsafe lock: (&eb->refs_lock){+.+.}-{3:3} ... which became HARDIRQ-irq-unsafe at: ... lock_acquire+0x178/0x358 _raw_spin_lock+0x4c/0x68 free_extent_buffer_stale+0x2c/0x170 btrfs_read_sys_array+0x1b0/0x338 open_ctree+0xeb0/0x1df8 btrfs_get_tree+0xb60/0x1110 vfs_get_tree+0x8c/0x250 fc_mount+0x20/0x98 btrfs_get_tree+0x4a4/0x1110 vfs_get_tree+0x8c/0x250 do_new_mount+0x1e0/0x6c0 path_mount+0x4ec/0xa58 __arm64_sys_mount+0x370/0x490 invoke_syscall+0x6c/0x208 el0_svc_common+0x14c/0x1b8 do_el0_svc+0x4c/0x60 el0_svc+0x4c/0x160 el0t_64_sync_handler+0x70/0x100 el0t_64_sync+0x168/0x170 other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&eb->refs_lock); local_irq_disable(); lock(&buffer_xa_class); lock(&eb->refs_lock); lock(&buffer_xa_class); *** DEADLOCK *** 2 locks held by kswapd0/66: #0: ffff800085506e40 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0xe8/0xe50 #1: ffff0000c1d91b88 (&buffer_xa_class){-.-.}-{3:3}, at: try_release_extent_buffer+0x13c/0x560 Link: https://www.kernel.org/doc/Documentation/locking/lockdep-design.rst#:~:text=Multi%2Dlock%20dependency%20rules%3A Fixes: 19d7f65f032f ("btrfs: convert the buffer_radix to an xarray") CC: stable@vger.kernel.org # 6.16+ Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Leo Martins Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 835b0deef9bb..f23d75986947 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4331,15 +4331,18 @@ static int try_release_subpage_extent_buffer(struct folio *folio) unsigned long end = index + (PAGE_SIZE >> fs_info->nodesize_bits) - 1; int ret; - xa_lock_irq(&fs_info->buffer_tree); + rcu_read_lock(); xa_for_each_range(&fs_info->buffer_tree, index, eb, start, end) { /* * The same as try_release_extent_buffer(), to ensure the eb * won't disappear out from under us. */ spin_lock(&eb->refs_lock); + rcu_read_unlock(); + if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { spin_unlock(&eb->refs_lock); + rcu_read_lock(); continue; } @@ -4358,11 +4361,10 @@ static int try_release_subpage_extent_buffer(struct folio *folio) * check the folio private at the end. And * release_extent_buffer() will release the refs_lock. */ - xa_unlock_irq(&fs_info->buffer_tree); release_extent_buffer(eb); - xa_lock_irq(&fs_info->buffer_tree); + rcu_read_lock(); } - xa_unlock_irq(&fs_info->buffer_tree); + rcu_read_unlock(); /* * Finally to check if we have cleared folio private, as if we have @@ -4375,7 +4377,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio) ret = 0; spin_unlock(&folio->mapping->i_private_lock); return ret; - } int try_release_extent_buffer(struct folio *folio) From 15fc0bec883c95007a4901fe75f247bd0ca21651 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 20 Jul 2025 07:56:48 +0930 Subject: [PATCH 195/279] btrfs: make btrfs_cleanup_ordered_extents() support large folios When hitting a large folio, btrfs_cleanup_ordered_extents() will get the same large folio multiple times, and clearing the same range again and again. Thankfully this is not causing anything wrong, just inefficiency. This is caused by the fact that we're iterating folios using the old page index, thus can hit the same large folio again and again. Enhance it by increasing @index to the index of the folio end, and only increase @index by 1 if we failed to grab a folio. Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b77dd22b8cdb..a2de289e662b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -401,10 +401,12 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, while (index <= end_index) { folio = filemap_get_folio(inode->vfs_inode.i_mapping, index); - index++; - if (IS_ERR(folio)) + if (IS_ERR(folio)) { + index++; continue; + } + index = folio_end(folio) >> PAGE_SHIFT; /* * Here we just clear all Ordered bits for every page in the * range, then btrfs_mark_ordered_io_finished() will handle From deaf895212da74635a7f0a420e1ecf8f5eca1fe5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 20 Jul 2025 15:01:39 +0930 Subject: [PATCH 196/279] btrfs: fix wrong length parameter for btrfs_cleanup_ordered_extents() Inside nocow_one_range(), if the checksum cloning for data reloc inode failed, we call btrfs_cleanup_ordered_extents() to cleanup the just allocated ordered extents. But unlike extent_clear_unlock_delalloc(), btrfs_cleanup_ordered_extents() requires a length, not an inclusive end bytenr. This can be problematic, as the @end is normally way larger than @len. This means btrfs_cleanup_ordered_extents() can be called on folios out of the correct range, and if the out-of-range folio is under writeback, we can incorrectly clear the ordered flag of the folio, and trigger the DEBUG_WARN() inside btrfs_writepage_cow_fixup(). Fix the wrong parameter with correct length instead. Fixes: 94f6c5c17e52 ("btrfs: move ordered extent cleanup to where they are allocated") CC: stable@vger.kernel.org # 6.15+ Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a2de289e662b..d740910e071a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2015,7 +2015,7 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio * cleaered by the caller. */ if (ret < 0) - btrfs_cleanup_ordered_extents(inode, file_pos, end); + btrfs_cleanup_ordered_extents(inode, file_pos, len); return ret; } From fc5799986fbca957e2e3c0480027f249951b7bcf Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 16 Jul 2025 11:41:21 +0100 Subject: [PATCH 197/279] btrfs: error on missing block group when unaccounting log tree extent buffers Currently we only log an error message if we can't find the block group for a log tree extent buffer when unaccounting it (while freeing a log tree). A missing block group means something is seriously wrong and we end up leaking space from the metadata space info. So return -ENOENT in case we don't find the block group. CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2186e87fb61b..69e11557fd13 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2605,14 +2605,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, /* * Correctly adjust the reserved bytes occupied by a log tree extent buffer */ -static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) +static int unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) { struct btrfs_block_group *cache; cache = btrfs_lookup_block_group(fs_info, start); if (!cache) { btrfs_err(fs_info, "unable to find block group for %llu", start); - return; + return -ENOENT; } spin_lock(&cache->space_info->lock); @@ -2623,27 +2623,22 @@ static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) spin_unlock(&cache->space_info->lock); btrfs_put_block_group(cache); + + return 0; } static int clean_log_buffer(struct btrfs_trans_handle *trans, struct extent_buffer *eb) { - int ret; - btrfs_tree_lock(eb); btrfs_clear_buffer_dirty(trans, eb); wait_on_extent_buffer_writeback(eb); btrfs_tree_unlock(eb); - if (trans) { - ret = btrfs_pin_reserved_extent(trans, eb); - if (ret) - return ret; - } else { - unaccount_log_buffer(eb->fs_info, eb->start); - } + if (trans) + return btrfs_pin_reserved_extent(trans, eb); - return 0; + return unaccount_log_buffer(eb->fs_info, eb->start); } static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, From 4289b494ac553e74e86fed1c66b2bf9530bc1082 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 25 Jul 2025 20:33:25 +0930 Subject: [PATCH 198/279] btrfs: do not allow relocation of partially dropped subvolumes [BUG] There is an internal report that balance triggered transaction abort, with the following call trace: item 85 key (594509824 169 0) itemoff 12599 itemsize 33 extent refs 1 gen 197740 flags 2 ref#0: tree block backref root 7 item 86 key (594558976 169 0) itemoff 12566 itemsize 33 extent refs 1 gen 197522 flags 2 ref#0: tree block backref root 7 ... BTRFS error (device loop0): extent item not found for insert, bytenr 594526208 num_bytes 16384 parent 449921024 root_objectid 934 owner 1 offset 0 BTRFS error (device loop0): failed to run delayed ref for logical 594526208 num_bytes 16384 type 182 action 1 ref_mod 1: -117 ------------[ cut here ]------------ BTRFS: Transaction aborted (error -117) WARNING: CPU: 1 PID: 6963 at ../fs/btrfs/extent-tree.c:2168 btrfs_run_delayed_refs+0xfa/0x110 [btrfs] And btrfs check doesn't report anything wrong related to the extent tree. [CAUSE] The cause is a little complex, firstly the extent tree indeed doesn't have the backref for 594526208. The extent tree only have the following two backrefs around that bytenr on-disk: item 65 key (594509824 METADATA_ITEM 0) itemoff 13880 itemsize 33 refs 1 gen 197740 flags TREE_BLOCK tree block skinny level 0 (176 0x7) tree block backref root CSUM_TREE item 66 key (594558976 METADATA_ITEM 0) itemoff 13847 itemsize 33 refs 1 gen 197522 flags TREE_BLOCK tree block skinny level 0 (176 0x7) tree block backref root CSUM_TREE But the such missing backref item is not an corruption on disk, as the offending delayed ref belongs to subvolume 934, and that subvolume is being dropped: item 0 key (934 ROOT_ITEM 198229) itemoff 15844 itemsize 439 generation 198229 root_dirid 256 bytenr 10741039104 byte_limit 0 bytes_used 345571328 last_snapshot 198229 flags 0x1000000000001(RDONLY) refs 0 drop_progress key (206324 EXTENT_DATA 2711650304) drop_level 2 level 2 generation_v2 198229 And that offending tree block 594526208 is inside the dropped range of that subvolume. That explains why there is no backref item for that bytenr and why btrfs check is not reporting anything wrong. But this also shows another problem, as btrfs will do all the orphan subvolume cleanup at a read-write mount. So half-dropped subvolume should not exist after an RW mount, and balance itself is also exclusive to subvolume cleanup, meaning we shouldn't hit a subvolume half-dropped during relocation. The root cause is, there is no orphan item for this subvolume. In fact there are 5 subvolumes from around 2021 that have the same problem. It looks like the original report has some older kernels running, and caused those zombie subvolumes. Thankfully upstream commit 8d488a8c7ba2 ("btrfs: fix subvolume/snapshot deletion not triggered on mount") has long fixed the bug. [ENHANCEMENT] For repairing such old fs, btrfs-progs will be enhanced. Considering how delayed the problem will show up (at run delayed ref time) and at that time we have to abort transaction already, it is too late. Instead here we reject any half-dropped subvolume for reloc tree at the earliest time, preventing confusion and extra time wasted on debugging similar bugs. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e58151933844..7256f6748c8f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -602,6 +602,25 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, if (btrfs_root_id(root) == objectid) { u64 commit_root_gen; + /* + * Relocation will wait for cleaner thread, and any half-dropped + * subvolume will be fully cleaned up at mount time. + * So here we shouldn't hit a subvolume with non-zero drop_progress. + * + * If this isn't the case, error out since it can make us attempt to + * drop references for extents that were already dropped before. + */ + if (unlikely(btrfs_disk_key_objectid(&root->root_item.drop_progress))) { + struct btrfs_key cpu_key; + + btrfs_disk_key_to_cpu(&cpu_key, &root->root_item.drop_progress); + btrfs_err(fs_info, + "cannot relocate partially dropped subvolume %llu, drop progress key (%llu %u %llu)", + objectid, cpu_key.objectid, cpu_key.type, cpu_key.offset); + ret = -EUCLEAN; + goto fail; + } + /* called by btrfs_init_reloc_root */ ret = btrfs_copy_root(trans, root, root->commit_root, &eb, BTRFS_TREE_RELOC_OBJECTID); From 3a931e9b39c7ff8066657042f5f00d3b7e6ad315 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 16 Jul 2025 16:59:52 +0900 Subject: [PATCH 199/279] btrfs: zoned: do not select metadata BG as finish target We call btrfs_zone_finish_one_bg() to zone finish one block group and make room to activate another block group. Currently, we can choose a metadata block group as a target. But, as we reserve an active metadata block group, we no longer want to select a metadata block group. So, skip it in the loop. CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 245e813ecd78..db11b5b5f0e6 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2650,7 +2650,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info) spin_lock(&block_group->lock); if (block_group->reserved || block_group->alloc_offset == 0 || - (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) || + !(block_group->flags & BTRFS_BLOCK_GROUP_DATA) || test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { spin_unlock(&block_group->lock); continue; From 7b632596188e1973c6b3ac1c9f8252f735e1039f Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 30 Jul 2025 09:29:23 -0700 Subject: [PATCH 200/279] btrfs: fix iteration bug in __qgroup_excl_accounting() __qgroup_excl_accounting() uses the qgroup iterator machinery to update the account of one qgroups usage for all its parent hierarchy, when we either add or remove a relation and have only exclusive usage. However, there is a small bug there: we loop with an extra iteration temporary qgroup called `cur` but never actually refer to that in the body of the loop. As a result, we redundantly account the same usage to the first qgroup in the list. This can be reproduced in the following way: mkfs.btrfs -f -O squota mount btrfs subvol create /sv dd if=/dev/zero of=/sv/f bs=1M count=1 sync btrfs qgroup create 1/100 btrfs qgroup create 2/200 btrfs qgroup assign 1/100 2/200 btrfs qgroup assign 0/256 1/100 btrfs qgroup show and the broken result is (note the 2MiB on 1/100 and 0Mib on 2/100): Qgroupid Referenced Exclusive Path -------- ---------- --------- ---- 0/5 16.00KiB 16.00KiB 0/256 1.02MiB 1.02MiB sv Qgroupid Referenced Exclusive Path -------- ---------- --------- ---- 0/5 16.00KiB 16.00KiB 0/256 1.02MiB 1.02MiB sv 1/100 2.03MiB 2.03MiB 2/100<1 member qgroup> 2/100 0.00B 0.00B <0 member qgroups> With this fix, which simply re-uses `qgroup` as the iteration variable, we see the expected result: Qgroupid Referenced Exclusive Path -------- ---------- --------- ---- 0/5 16.00KiB 16.00KiB 0/256 1.02MiB 1.02MiB sv Qgroupid Referenced Exclusive Path -------- ---------- --------- ---- 0/5 16.00KiB 16.00KiB 0/256 1.02MiB 1.02MiB sv 1/100 1.02MiB 1.02MiB 2/100<1 member qgroup> 2/100 1.02MiB 1.02MiB <0 member qgroups> The existing fstests did not exercise two layer inheritance so this bug was missed. I intend to add that testing there, as well. Fixes: a0bdc04b0732 ("btrfs: qgroup: use qgroup_iterator in __qgroup_excl_accounting()") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Filipe Manana Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1a5972178b3a..ccaa9a3cf1ce 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1453,7 +1453,6 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, struct btrfs_qgroup *src, int sign) { struct btrfs_qgroup *qgroup; - struct btrfs_qgroup *cur; LIST_HEAD(qgroup_list); u64 num_bytes = src->excl; int ret = 0; @@ -1463,7 +1462,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, goto out; qgroup_iterator_add(&qgroup_list, qgroup); - list_for_each_entry(cur, &qgroup_list, iterator) { + list_for_each_entry(qgroup, &qgroup_list, iterator) { struct btrfs_qgroup_list *glist; qgroup->rfer += sign * num_bytes; From 9474e27a24a41e55d0ac2b77d8171fddec7dbb87 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 6 Aug 2025 18:22:41 +0200 Subject: [PATCH 201/279] libbpf: Add the ability to suppress perf event enablement Automatically enabling a perf event after attaching a BPF prog to it is not always desirable. Add a new "dont_enable" field to struct bpf_perf_event_opts. While introducing "enable" instead would be nicer in that it would avoid a double negation in the implementation, it would make DECLARE_LIBBPF_OPTS() less efficient. Acked-by: Eduard Zingerman Suggested-by: Jiri Olsa Tested-by: Thomas Richter Co-developed-by: Thomas Richter Signed-off-by: Thomas Richter Signed-off-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20250806162417.19666-2-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 13 ++++++++----- tools/lib/bpf/libbpf.h | 4 +++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index fb4d92c5c339..8f5a81b672e1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10965,11 +10965,14 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p } link->link.fd = pfd; } - if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { - err = -errno; - pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", - prog->name, pfd, errstr(err)); - goto err_out; + + if (!OPTS_GET(opts, dont_enable, false)) { + if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { + err = -errno; + pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", + prog->name, pfd, errstr(err)); + goto err_out; + } } return &link->link; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index d1cf813a057b..455a957cb702 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -499,9 +499,11 @@ struct bpf_perf_event_opts { __u64 bpf_cookie; /* don't use BPF link when attach BPF program */ bool force_ioctl_attach; + /* don't automatically enable the event */ + bool dont_enable; size_t :0; }; -#define bpf_perf_event_opts__last_field force_ioctl_attach +#define bpf_perf_event_opts__last_field dont_enable LIBBPF_API struct bpf_link * bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd); From 5e2ac8e8571df54d0a9c9d08f287e006269a6674 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 6 Aug 2025 18:22:42 +0200 Subject: [PATCH 202/279] perf bpf-filter: Enable events manually On s390, and, in general, on all platforms where the respective event supports auxiliary data gathering, the command: # ./perf record -u 0 -aB --synth=no -- ./perf test -w thloop [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.011 MB perf.data ] # ./perf report --stats | grep SAMPLE # does not generate samples in the perf.data file. On x86 the command: # sudo perf record -e intel_pt// -u 0 ls is broken too. Looking at the sequence of calls in 'perf record' reveals this behavior: 1. The event 'cycles' is created and enabled: record__open() +-> evlist__apply_filters() +-> perf_bpf_filter__prepare() +-> bpf_program.attach_perf_event() +-> bpf_program.attach_perf_event_opts() +-> __GI___ioctl(..., PERF_EVENT_IOC_ENABLE, ...) The event 'cycles' is enabled and active now. However the event's ring-buffer to store the samples generated by hardware is not allocated yet. 2. The event's fd is mmap()ed to create the ring buffer: record__open() +-> record__mmap() +-> record__mmap_evlist() +-> evlist__mmap_ex() +-> perf_evlist__mmap_ops() +-> mmap_per_cpu() +-> mmap_per_evsel() +-> mmap__mmap() +-> perf_mmap__mmap() +-> mmap() This allocates the ring buffer for the event 'cycles'. With mmap() the kernel creates the ring buffer: perf_mmap(): kernel function to create the event's ring | buffer to save the sampled data. | +-> ring_buffer_attach(): Allocates memory for ring buffer. | The PMU has auxiliary data setup function. The | has_aux(event) condition is true and the PMU's | stop() is called to stop sampling. It is not | restarted: | | if (has_aux(event)) | perf_event_stop(event, 0); | +-> cpumsf_pmu_stop(): Hardware sampling is stopped. No samples are generated and saved anymore. 3. After the event 'cycles' has been mapped, the event is enabled a second time in: __cmd_record() +-> evlist__enable() +-> __evlist__enable() +-> evsel__enable_cpu() +-> perf_evsel__enable_cpu() +-> perf_evsel__run_ioctl() +-> perf_evsel__ioctl() +-> __GI___ioctl(., PERF_EVENT_IOC_ENABLE, .) The second ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); is just a NOP in this case. The first invocation in (1.) sets the event::state to PERF_EVENT_STATE_ACTIVE. The kernel functions perf_ioctl() +-> _perf_ioctl() +-> _perf_event_enable() +-> __perf_event_enable() return immediately because event::state is already set to PERF_EVENT_STATE_ACTIVE. This happens on s390, because the event 'cycles' offers the possibility to save auxilary data. The PMU callbacks setup_aux() and free_aux() are defined. Without both callback functions, cpumsf_pmu_stop() is not invoked and sampling continues. To remedy this, remove the first invocation of ioctl(..., PERF_EVENT_IOC_ENABLE, ...). in step (1.) Create the event in step (1.) and enable it in step (3.) after the ring buffer has been mapped. Output after: # ./perf record -aB --synth=no -u 0 -- ./perf test -w thloop 2 [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.876 MB perf.data ] # ./perf report --stats | grep SAMPLE SAMPLE events: 16200 (99.5%) SAMPLE events: 16200 # The software event succeeded both before and after the patch: # ./perf record -e cpu-clock -aB --synth=no -u 0 -- \ ./perf test -w thloop 2 [ perf record: Woken up 7 times to write data ] [ perf record: Captured and wrote 2.870 MB perf.data ] # ./perf report --stats | grep SAMPLE SAMPLE events: 53506 (99.8%) SAMPLE events: 53506 # Fixes: b4c658d4d63d61 ("perf target: Remove uid from target") Suggested-by: Jiri Olsa Tested-by: Thomas Richter Acked-by: Namhyung Kim Co-developed-by: Thomas Richter Signed-off-by: Thomas Richter Signed-off-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20250806162417.19666-3-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov --- tools/perf/util/bpf-filter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index d0e013eeb0f7..a0b11f35395f 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -451,6 +451,8 @@ int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target) struct bpf_link *link; struct perf_bpf_filter_entry *entry; bool needs_idx_hash = !target__has_cpu(target); + DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts, + .dont_enable = true); entry = calloc(MAX_FILTERS, sizeof(*entry)); if (entry == NULL) @@ -522,7 +524,8 @@ int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target) prog = skel->progs.perf_sample_filter; for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) { for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) { - link = bpf_program__attach_perf_event(prog, FD(evsel, x, y)); + link = bpf_program__attach_perf_event_opts(prog, FD(evsel, x, y), + &pe_opts); if (IS_ERR(link)) { pr_err("Failed to attach perf sample-filter program\n"); ret = PTR_ERR(link); From d5c647b08ee02cb7fa50d89414ed0f5dc7c1ca0e Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 7 Aug 2025 10:10:51 +0200 Subject: [PATCH 203/279] PCI: vmd: Fix wrong kfree() in vmd_msi_free() vmd_msi_alloc() allocates struct vmd_irq and stashes it into irq_data->chip_data associated with the VMD's interrupt domain. vmd_msi_free() extracts the pointer by calling irq_get_chip_data() and frees it. irq_get_chip_data() returns the chip_data associated with the top interrupt domain. This worked in the past because VMD's interrupt domain was the top domain. But d7d8ab87e3e7 ("PCI: vmd: Switch to msi_create_parent_irq_domain()") changed the interrupt domain hierarchy so VMD's interrupt domain is not the top domain anymore. irq_get_chip_data() now returns the chip_data at the MSI devices' interrupt domains. It is therefore broken for vmd_msi_free() to kfree() this chip_data. Fix by extracting the chip_data associated with the VMD's interrupt domain. Fixes: d7d8ab87e3e7 ("PCI: vmd: Switch to msi_create_parent_irq_domain()") Reported-by: Kenneth Crudup Closes: https://lore.kernel.org/linux-pci/dfa40e48-8840-4e61-9fda-25cdb3ad81c1@panix.com/ Reported-by: Ammar Faizi Closes: https://lore.kernel.org/linux-pci/ed53280ed15d1140700b96cca2734bf327ee92539e5eb68e80f5bbbf0f01@linux.gnuweeb.org/ Tested-by: Ammar Faizi Tested-by: Kenneth Crudup Signed-off-by: Nam Cao Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner Reviewed-by: Jinjie Ruan Acked-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250807081051.2253962-1-namcao@linutronix.de --- drivers/pci/controller/vmd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 9bbb0ff4cc15..b679c7f28f51 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -280,10 +280,12 @@ static int vmd_msi_alloc(struct irq_domain *domain, unsigned int virq, static void vmd_msi_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { + struct irq_data *irq_data; struct vmd_irq *vmdirq; for (int i = 0; i < nr_irqs; ++i) { - vmdirq = irq_get_chip_data(virq + i); + irq_data = irq_domain_get_irq_data(domain, virq + i); + vmdirq = irq_data->chip_data; synchronize_srcu(&vmdirq->irq->srcu); From 03537826f77f1c829d0593d211b38b9c876c1722 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 7 Aug 2025 18:12:11 +0200 Subject: [PATCH 204/279] smb: client: return an error if rdma_connect does not return within 5 seconds This matches the timeout for tcp connections. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 6c2af00be44c..181349eda7a3 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1653,8 +1653,10 @@ static struct smbd_connection *_smbd_get_connection( goto rdma_connect_failed; } - wait_event_interruptible( - info->conn_wait, sc->status != SMBDIRECT_SOCKET_CONNECTING); + wait_event_interruptible_timeout( + info->conn_wait, + sc->status != SMBDIRECT_SOCKET_CONNECTING, + msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); From 761399745710fd37fa4312af1b675b2fe73c1c52 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 7 Aug 2025 18:12:12 +0200 Subject: [PATCH 205/279] smb: client: improve logging in smbd_conn_upcall() Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 181349eda7a3..8ed4ab6f1d3a 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -178,9 +178,10 @@ static int smbd_conn_upcall( { struct smbd_connection *info = id->context; struct smbdirect_socket *sc = &info->socket; + const char *event_name = rdma_event_msg(event->event); - log_rdma_event(INFO, "event=%d status=%d\n", - event->event, event->status); + log_rdma_event(INFO, "event=%s status=%d\n", + event_name, event->status); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: @@ -190,17 +191,19 @@ static int smbd_conn_upcall( break; case RDMA_CM_EVENT_ADDR_ERROR: + log_rdma_event(ERR, "connecting failed event=%s\n", event_name); info->ri_rc = -EHOSTUNREACH; complete(&info->ri_done); break; case RDMA_CM_EVENT_ROUTE_ERROR: + log_rdma_event(ERR, "connecting failed event=%s\n", event_name); info->ri_rc = -ENETUNREACH; complete(&info->ri_done); break; case RDMA_CM_EVENT_ESTABLISHED: - log_rdma_event(INFO, "connected event=%d\n", event->event); + log_rdma_event(INFO, "connected event=%s\n", event_name); sc->status = SMBDIRECT_SOCKET_CONNECTED; wake_up_interruptible(&info->conn_wait); break; @@ -208,7 +211,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: - log_rdma_event(INFO, "connecting failed event=%d\n", event->event); + log_rdma_event(ERR, "connecting failed event=%s\n", event_name); sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up_interruptible(&info->conn_wait); break; @@ -217,6 +220,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_DISCONNECTED: /* This happens when we fail the negotiation */ if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { + log_rdma_event(ERR, "event=%s during negotiation\n", event_name); sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up(&info->conn_wait); break; @@ -229,6 +233,8 @@ static int smbd_conn_upcall( break; default: + log_rdma_event(ERR, "unexpected event=%s status=%d\n", + event_name, event->status); break; } From 550a194c5998e4e77affc6235e80d3766dc2d27e Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 7 Aug 2025 18:12:13 +0200 Subject: [PATCH 206/279] smb: client: don't call init_waitqueue_head(&info->conn_wait) twice in _smbd_get_connection It is already called long before we may hit this cleanup code path. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 8ed4ab6f1d3a..c819cc6dcc4f 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1716,7 +1716,6 @@ static struct smbd_connection *_smbd_get_connection( cancel_delayed_work_sync(&info->idle_timer_work); destroy_caches_and_workqueue(info); sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; - init_waitqueue_head(&info->conn_wait); rdma_disconnect(sc->rdma.cm_id); wait_event(info->conn_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); From dfe6f14aedbf59bfb7145de5c7da908583ae50fd Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 7 Aug 2025 18:12:14 +0200 Subject: [PATCH 207/279] smb: client: only use a single wait_queue to monitor smbdirect connection status There's no need for separate conn_wait and disconn_wait queues. This will simplify the move to common code, the server code already a single wait_queue for this. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 17 ++++++++--------- fs/smb/client/smbdirect.h | 3 +-- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index c819cc6dcc4f..c628e91c328b 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -205,7 +205,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_ESTABLISHED: log_rdma_event(INFO, "connected event=%s\n", event_name); sc->status = SMBDIRECT_SOCKET_CONNECTED; - wake_up_interruptible(&info->conn_wait); + wake_up_interruptible(&info->status_wait); break; case RDMA_CM_EVENT_CONNECT_ERROR: @@ -213,7 +213,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_REJECTED: log_rdma_event(ERR, "connecting failed event=%s\n", event_name); sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - wake_up_interruptible(&info->conn_wait); + wake_up_interruptible(&info->status_wait); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: @@ -222,12 +222,12 @@ static int smbd_conn_upcall( if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { log_rdma_event(ERR, "event=%s during negotiation\n", event_name); sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - wake_up(&info->conn_wait); + wake_up(&info->status_wait); break; } sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - wake_up_interruptible(&info->disconn_wait); + wake_up_interruptible(&info->status_wait); wake_up_interruptible(&sc->recv_io.reassembly.wait_queue); wake_up_interruptible_all(&info->wait_send_queue); break; @@ -1325,7 +1325,7 @@ void smbd_destroy(struct TCP_Server_Info *server) rdma_disconnect(sc->rdma.cm_id); log_rdma_event(INFO, "wait for transport being disconnected\n"); wait_event_interruptible( - info->disconn_wait, + info->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); } @@ -1650,8 +1650,7 @@ static struct smbd_connection *_smbd_get_connection( log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", &addr_in->sin_addr, port); - init_waitqueue_head(&info->conn_wait); - init_waitqueue_head(&info->disconn_wait); + init_waitqueue_head(&info->status_wait); init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); rc = rdma_connect(sc->rdma.cm_id, &conn_param); if (rc) { @@ -1660,7 +1659,7 @@ static struct smbd_connection *_smbd_get_connection( } wait_event_interruptible_timeout( - info->conn_wait, + info->status_wait, sc->status != SMBDIRECT_SOCKET_CONNECTING, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); @@ -1717,7 +1716,7 @@ static struct smbd_connection *_smbd_get_connection( destroy_caches_and_workqueue(info); sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; rdma_disconnect(sc->rdma.cm_id); - wait_event(info->conn_wait, + wait_event(info->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); allocate_cache_failed: diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 0d4d45428c85..e45aa9ddd71d 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -47,8 +47,7 @@ struct smbd_connection { int ri_rc; struct completion ri_done; - wait_queue_head_t conn_wait; - wait_queue_head_t disconn_wait; + wait_queue_head_t status_wait; struct completion negotiate_completion; bool negotiate_done; From e6bb9193974059ddbb0ce7763fa3882bd60d4dc3 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 5 Aug 2025 18:13:13 +0900 Subject: [PATCH 208/279] ksmbd: limit repeated connections from clients with the same IP Repeated connections from clients with the same IP address may exhaust the max connections and prevent other normal client connections. This patch limit repeated connections from clients with the same IP. Reported-by: tianshuo han Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.h | 1 + fs/smb/server/transport_tcp.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index dd3e0e3f7bf0..31dd1caac1e8 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -46,6 +46,7 @@ struct ksmbd_conn { struct mutex srv_mutex; int status; unsigned int cli_cap; + __be32 inet_addr; char *request_buf; struct ksmbd_transport *transport; struct nls_table *local_nls; diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 4e9f98db9ff4..d72588f33b9c 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -87,6 +87,7 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk) return NULL; } + conn->inet_addr = inet_sk(client_sk->sk)->inet_daddr; conn->transport = KSMBD_TRANS(t); KSMBD_TRANS(t)->conn = conn; KSMBD_TRANS(t)->ops = &ksmbd_tcp_transport_ops; @@ -230,6 +231,8 @@ static int ksmbd_kthread_fn(void *p) { struct socket *client_sk = NULL; struct interface *iface = (struct interface *)p; + struct inet_sock *csk_inet; + struct ksmbd_conn *conn; int ret; while (!kthread_should_stop()) { @@ -248,6 +251,20 @@ static int ksmbd_kthread_fn(void *p) continue; } + /* + * Limits repeated connections from clients with the same IP. + */ + csk_inet = inet_sk(client_sk->sk); + down_read(&conn_list_lock); + list_for_each_entry(conn, &conn_list, conns_list) + if (csk_inet->inet_daddr == conn->inet_addr) { + ret = -EAGAIN; + break; + } + up_read(&conn_list_lock); + if (ret == -EAGAIN) + continue; + if (server_conf.max_connections && atomic_inc_return(&active_num_conn) >= server_conf.max_connections) { pr_info_ratelimited("Limit the maximum number of connections(%u)\n", From 8e7d178d06e8937454b6d2f2811fa6a15656a214 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 6 Aug 2025 03:03:49 +0200 Subject: [PATCH 209/279] smb: server: Fix extension string in ksmbd_extract_shortname() In ksmbd_extract_shortname(), strscpy() is incorrectly called with the length of the source string (excluding the NUL terminator) rather than the size of the destination buffer. This results in "__" being copied to 'extension' rather than "___" (two underscores instead of three). Use the destination buffer size instead to ensure that the string "___" (three underscores) is copied correctly. Cc: stable@vger.kernel.org Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Signed-off-by: Thorsten Blum Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index 425c756bcfb8..b23203a1c286 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -515,7 +515,7 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname, p = strrchr(longname, '.'); if (p == longname) { /*name starts with a dot*/ - strscpy(extension, "___", strlen("___")); + strscpy(extension, "___", sizeof(extension)); } else { if (p) { p++; From 5378bdf6a611a32500fccf13d14156f219bb0c85 Mon Sep 17 00:00:00 2001 From: Adam Young Date: Mon, 14 Jul 2025 20:10:07 -0400 Subject: [PATCH 210/279] mailbox/pcc: support mailbox management of the shared buffer Define a new, optional, callback that allows the driver to specify how the return data buffer is allocated. If that callback is set, mailbox/pcc.c is now responsible for reading from and writing to the PCC shared buffer. This also allows for proper checks of the Commnand complete flag between the PCC sender and receiver. For Type 4 channels, initialize the command complete flag prior to accepting messages. Since the mailbox does not know what memory allocation scheme to use for response messages, the client now has an optional callback that allows it to allocate the buffer for a response message. When an outbound message is written to the buffer, the mailbox checks for the flag indicating the client wants an tx complete notification via IRQ. Upon receipt of the interrupt It will pair it with the outgoing message. The expected use is to free the kernel memory buffer for the previous outgoing message. Signed-off-by: Adam Young Signed-off-by: Jassi Brar --- drivers/mailbox/pcc.c | 102 ++++++++++++++++++++++++++++++++++++++++-- include/acpi/pcc.h | 29 ++++++++++++ 2 files changed, 127 insertions(+), 4 deletions(-) diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index f6714c233f5a..0a00719b2482 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -306,6 +306,22 @@ static void pcc_chan_acknowledge(struct pcc_chan_info *pchan) pcc_chan_reg_read_modify_write(&pchan->db); } +static void *write_response(struct pcc_chan_info *pchan) +{ + struct pcc_header pcc_header; + void *buffer; + int data_len; + + memcpy_fromio(&pcc_header, pchan->chan.shmem, + sizeof(pcc_header)); + data_len = pcc_header.length - sizeof(u32) + sizeof(struct pcc_header); + + buffer = pchan->chan.rx_alloc(pchan->chan.mchan->cl, data_len); + if (buffer != NULL) + memcpy_fromio(buffer, pchan->chan.shmem, data_len); + return buffer; +} + /** * pcc_mbox_irq - PCC mailbox interrupt handler * @irq: interrupt number @@ -317,6 +333,8 @@ static irqreturn_t pcc_mbox_irq(int irq, void *p) { struct pcc_chan_info *pchan; struct mbox_chan *chan = p; + struct pcc_header *pcc_header = chan->active_req; + void *handle = NULL; pchan = chan->con_priv; @@ -340,7 +358,17 @@ static irqreturn_t pcc_mbox_irq(int irq, void *p) * required to avoid any possible race in updatation of this flag. */ pchan->chan_in_use = false; - mbox_chan_received_data(chan, NULL); + + if (pchan->chan.rx_alloc) + handle = write_response(pchan); + + if (chan->active_req) { + pcc_header = chan->active_req; + if (pcc_header->flags & PCC_CMD_COMPLETION_NOTIFY) + mbox_chan_txdone(chan, 0); + } + + mbox_chan_received_data(chan, handle); pcc_chan_acknowledge(pchan); @@ -384,9 +412,24 @@ pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id) pcc_mchan = &pchan->chan; pcc_mchan->shmem = acpi_os_ioremap(pcc_mchan->shmem_base_addr, pcc_mchan->shmem_size); - if (pcc_mchan->shmem) - return pcc_mchan; + if (!pcc_mchan->shmem) + goto err; + pcc_mchan->manage_writes = false; + + /* This indicates that the channel is ready to accept messages. + * This needs to happen after the channel has registered + * its callback. There is no access point to do that in + * the mailbox API. That implies that the mailbox client must + * have set the allocate callback function prior to + * sending any messages. + */ + if (pchan->type == ACPI_PCCT_TYPE_EXT_PCC_SLAVE_SUBSPACE) + pcc_chan_reg_read_modify_write(&pchan->cmd_update); + + return pcc_mchan; + +err: mbox_free_channel(chan); return ERR_PTR(-ENXIO); } @@ -417,8 +460,38 @@ void pcc_mbox_free_channel(struct pcc_mbox_chan *pchan) } EXPORT_SYMBOL_GPL(pcc_mbox_free_channel); +static int pcc_write_to_buffer(struct mbox_chan *chan, void *data) +{ + struct pcc_chan_info *pchan = chan->con_priv; + struct pcc_mbox_chan *pcc_mbox_chan = &pchan->chan; + struct pcc_header *pcc_header = data; + + if (!pchan->chan.manage_writes) + return 0; + + /* The PCC header length includes the command field + * but not the other values from the header. + */ + int len = pcc_header->length - sizeof(u32) + sizeof(struct pcc_header); + u64 val; + + pcc_chan_reg_read(&pchan->cmd_complete, &val); + if (!val) { + pr_info("%s pchan->cmd_complete not set", __func__); + return -1; + } + memcpy_toio(pcc_mbox_chan->shmem, data, len); + return 0; +} + + /** - * pcc_send_data - Called from Mailbox Controller code. Used + * pcc_send_data - Called from Mailbox Controller code. If + * pchan->chan.rx_alloc is set, then the command complete + * flag is checked and the data is written to the shared + * buffer io memory. + * + * If pchan->chan.rx_alloc is not set, then it is used * here only to ring the channel doorbell. The PCC client * specific read/write is done in the client driver in * order to maintain atomicity over PCC channel once @@ -434,17 +507,37 @@ static int pcc_send_data(struct mbox_chan *chan, void *data) int ret; struct pcc_chan_info *pchan = chan->con_priv; + ret = pcc_write_to_buffer(chan, data); + if (ret) + return ret; + ret = pcc_chan_reg_read_modify_write(&pchan->cmd_update); if (ret) return ret; ret = pcc_chan_reg_read_modify_write(&pchan->db); + if (!ret && pchan->plat_irq > 0) pchan->chan_in_use = true; return ret; } + +static bool pcc_last_tx_done(struct mbox_chan *chan) +{ + struct pcc_chan_info *pchan = chan->con_priv; + u64 val; + + pcc_chan_reg_read(&pchan->cmd_complete, &val); + if (!val) + return false; + else + return true; +} + + + /** * pcc_startup - Called from Mailbox Controller code. Used here * to request the interrupt. @@ -490,6 +583,7 @@ static const struct mbox_chan_ops pcc_chan_ops = { .send_data = pcc_send_data, .startup = pcc_startup, .shutdown = pcc_shutdown, + .last_tx_done = pcc_last_tx_done, }; /** diff --git a/include/acpi/pcc.h b/include/acpi/pcc.h index 840bfc95bae3..9af3b502f839 100644 --- a/include/acpi/pcc.h +++ b/include/acpi/pcc.h @@ -17,6 +17,35 @@ struct pcc_mbox_chan { u32 latency; u32 max_access_rate; u16 min_turnaround_time; + + /* Set to true to indicate that the mailbox should manage + * writing the dat to the shared buffer. This differs from + * the case where the drivesr are writing to the buffer and + * using send_data only to ring the doorbell. If this flag + * is set, then the void * data parameter of send_data must + * point to a kernel-memory buffer formatted in accordance with + * the PCC specification. + * + * The active buffer management will include reading the + * notify_on_completion flag, and will then + * call mbox_chan_txdone when the acknowledgment interrupt is + * received. + */ + bool manage_writes; + + /* Optional callback that allows the driver + * to allocate the memory used for receiving + * messages. The return value is the location + * inside the buffer where the mailbox should write the data. + */ + void *(*rx_alloc)(struct mbox_client *cl, int size); +}; + +struct pcc_header { + u32 signature; + u32 flags; + u32 length; + u32 command; }; /* Generic Communications Channel Shared Memory Region */ From 33503c083fda048c77903460ac0429e1e2c0e341 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 8 Aug 2025 06:35:14 -0600 Subject: [PATCH 211/279] io_uring/memmap: cast nr_pages to size_t before shifting If the allocated size exceeds UINT_MAX, then it's necessary to cast the mr->nr_pages value to size_t to prevent it from overflowing. In practice this isn't much of a concern as the required memory size will have been validated upfront, and accounted to the user. And > 4GB sizes will be necessary to make the lack of a cast a problem, which greatly exceeds normal user locked_vm settings that are generally in the kb to mb range. However, if root is used, then accounting isn't done, and then it's possible to hit this issue. Link: https://lore.kernel.org/all/6895b298.050a0220.7f033.0059.GAE@google.com/ Cc: stable@vger.kernel.org Reported-by: syzbot+23727438116feb13df15@syzkaller.appspotmail.com Fixes: 087f997870a9 ("io_uring/memmap: implement mmap for regions") Signed-off-by: Jens Axboe --- io_uring/memmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/memmap.c b/io_uring/memmap.c index 725dc0bec24c..2e99dffddfc5 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -156,7 +156,7 @@ static int io_region_allocate_pages(struct io_ring_ctx *ctx, unsigned long mmap_offset) { gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN; - unsigned long size = mr->nr_pages << PAGE_SHIFT; + size_t size = (size_t) mr->nr_pages << PAGE_SHIFT; unsigned long nr_allocated; struct page **pages; void *p; From c875503a9b9082928d7d3fc60b5400d16fbfae4e Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Wed, 6 Aug 2025 18:27:56 +0800 Subject: [PATCH 212/279] net: hibmcge: fix rtnl deadlock issue Currently, the hibmcge netdev acquires the rtnl_lock in pci_error_handlers.reset_prepare() and releases it in pci_error_handlers.reset_done(). However, in the PCI framework: pci_reset_bus - __pci_reset_slot - pci_slot_save_and_disable_locked - pci_dev_save_and_disable - err_handler->reset_prepare(dev); In pci_slot_save_and_disable_locked(): list_for_each_entry(dev, &slot->bus->devices, bus_list) { if (!dev->slot || dev->slot!= slot) continue; pci_dev_save_and_disable(dev); if (dev->subordinate) pci_bus_save_and_disable_locked(dev->subordinate); } This will iterate through all devices under the current bus and execute err_handler->reset_prepare(), causing two devices of the hibmcge driver to sequentially request the rtnl_lock, leading to a deadlock. Since the driver now executes netif_device_detach() before the reset process, it will not concurrently with other netdev APIs, so there is no need to hold the rtnl_lock now. Therefore, this patch removes the rtnl_lock during the reset process and adjusts the position of HBG_NIC_STATE_RESETTING to ensure that multiple resets are not executed concurrently. Fixes: 3f5a61f6d504f ("net: hibmcge: Add reset supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index 503cfbfb4a8a..83cf75bf7a17 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -53,9 +53,11 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type) { int ret; - ASSERT_RTNL(); + if (test_and_set_bit(HBG_NIC_STATE_RESETTING, &priv->state)) + return -EBUSY; if (netif_running(priv->netdev)) { + clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); dev_warn(&priv->pdev->dev, "failed to reset because port is up\n"); return -EBUSY; @@ -64,7 +66,6 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type) netif_device_detach(priv->netdev); priv->reset_type = type; - set_bit(HBG_NIC_STATE_RESETTING, &priv->state); clear_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); ret = hbg_hw_event_notify(priv, HBG_HW_EVENT_RESET); if (ret) { @@ -84,29 +85,26 @@ static int hbg_reset_done(struct hbg_priv *priv, enum hbg_reset_type type) type != priv->reset_type) return 0; - ASSERT_RTNL(); - - clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); ret = hbg_rebuild(priv); if (ret) { priv->stats.reset_fail_cnt++; set_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); + clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); dev_err(&priv->pdev->dev, "failed to rebuild after reset\n"); return ret; } netif_device_attach(priv->netdev); + clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); dev_info(&priv->pdev->dev, "reset done\n"); return ret; } -/* must be protected by rtnl lock */ int hbg_reset(struct hbg_priv *priv) { int ret; - ASSERT_RTNL(); ret = hbg_reset_prepare(priv, HBG_RESET_TYPE_FUNCTION); if (ret) return ret; @@ -171,7 +169,6 @@ static void hbg_pci_err_reset_prepare(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct hbg_priv *priv = netdev_priv(netdev); - rtnl_lock(); hbg_reset_prepare(priv, HBG_RESET_TYPE_FLR); } @@ -181,7 +178,6 @@ static void hbg_pci_err_reset_done(struct pci_dev *pdev) struct hbg_priv *priv = netdev_priv(netdev); hbg_reset_done(priv, HBG_RESET_TYPE_FLR); - rtnl_unlock(); } static const struct pci_error_handlers hbg_pci_err_handler = { From 7004b26f0b64331143eb0b312e77a357a11427ce Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Wed, 6 Aug 2025 18:27:57 +0800 Subject: [PATCH 213/279] net: hibmcge: fix the division by zero issue When the network port is down, the queue is released, and ring->len is 0. In debugfs, hbg_get_queue_used_num() will be called, which may lead to a division by zero issue. This patch adds a check, if ring->len is 0, hbg_get_queue_used_num() directly returns 0. Fixes: 40735e7543f9 ("net: hibmcge: Implement .ndo_start_xmit function") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h index 2883a5899ae2..8b6110599e10 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.h @@ -29,7 +29,12 @@ static inline bool hbg_fifo_is_full(struct hbg_priv *priv, enum hbg_dir dir) static inline u32 hbg_get_queue_used_num(struct hbg_ring *ring) { - return (ring->ntu + ring->len - ring->ntc) % ring->len; + u32 len = READ_ONCE(ring->len); + + if (!len) + return 0; + + return (READ_ONCE(ring->ntu) + len - READ_ONCE(ring->ntc)) % len; } netdev_tx_t hbg_net_start_xmit(struct sk_buff *skb, struct net_device *netdev); From 62c50180ffda01468e640ac14925503796f255e2 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Wed, 6 Aug 2025 18:27:58 +0800 Subject: [PATCH 214/279] net: hibmcge: fix the np_link_fail error reporting issue Currently, after modifying device port mode, the np_link_ok state is immediately checked. At this point, the device may not yet ready, leading to the querying of an intermediate state. This patch will poll to check if np_link is ok after modifying device port mode, and only report np_link_fail upon timeout. Fixes: e0306637e85d ("net: hibmcge: Add support for mac link exception handling feature") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index 8cca8316ba40..d0aa0661ecd4 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -12,6 +12,8 @@ #define HBG_HW_EVENT_WAIT_TIMEOUT_US (2 * 1000 * 1000) #define HBG_HW_EVENT_WAIT_INTERVAL_US (10 * 1000) +#define HBG_MAC_LINK_WAIT_TIMEOUT_US (500 * 1000) +#define HBG_MAC_LINK_WAIT_INTERVAL_US (5 * 1000) /* little endian or big endian. * ctrl means packet description, data means skb packet data */ @@ -228,6 +230,9 @@ void hbg_hw_fill_buffer(struct hbg_priv *priv, u32 buffer_dma_addr) void hbg_hw_adjust_link(struct hbg_priv *priv, u32 speed, u32 duplex) { + u32 link_status; + int ret; + hbg_hw_mac_enable(priv, HBG_STATUS_DISABLE); hbg_reg_write_field(priv, HBG_REG_PORT_MODE_ADDR, @@ -239,8 +244,14 @@ void hbg_hw_adjust_link(struct hbg_priv *priv, u32 speed, u32 duplex) hbg_hw_mac_enable(priv, HBG_STATUS_ENABLE); - if (!hbg_reg_read_field(priv, HBG_REG_AN_NEG_STATE_ADDR, - HBG_REG_AN_NEG_STATE_NP_LINK_OK_B)) + /* wait MAC link up */ + ret = readl_poll_timeout(priv->io_base + HBG_REG_AN_NEG_STATE_ADDR, + link_status, + FIELD_GET(HBG_REG_AN_NEG_STATE_NP_LINK_OK_B, + link_status), + HBG_MAC_LINK_WAIT_INTERVAL_US, + HBG_MAC_LINK_WAIT_TIMEOUT_US); + if (ret) hbg_np_link_fail_task_schedule(priv); } From 06feac15406f4f66f4c0c6ea60b10d44775d4133 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Tue, 5 Aug 2025 23:08:12 +0530 Subject: [PATCH 215/279] net: ti: icssg-prueth: Fix emac link speed handling When link settings are changed emac->speed is populated by emac_adjust_link(). The link speed and other settings are then written into the DRAM. However if both ports are brought down after this and brought up again or if the operating mode is changed and a firmware reload is needed, the DRAM is cleared by icssg_config(). As a result the link settings are lost. Fix this by calling emac_adjust_link() after icssg_config(). This re populates the settings in the DRAM after a new firmware load. Fixes: 9facce84f406 ("net: ti: icssg-prueth: Fix firmware load sequence.") Signed-off-by: MD Danish Anwar Reviewed-by: Andrew Lunn Message-ID: <20250805173812.2183161-1-danishanwar@ti.com> Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 2b973d6e2341..6c7d776ae4ee 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -50,6 +50,8 @@ /* CTRLMMR_ICSSG_RGMII_CTRL register bits */ #define ICSSG_CTRL_RGMII_ID_MODE BIT(24) +static void emac_adjust_link(struct net_device *ndev); + static int emac_get_tx_ts(struct prueth_emac *emac, struct emac_tx_ts_response *rsp) { @@ -229,6 +231,10 @@ static int prueth_emac_common_start(struct prueth *prueth) ret = icssg_config(prueth, emac, slice); if (ret) goto disable_class; + + mutex_lock(&emac->ndev->phydev->lock); + emac_adjust_link(emac->ndev); + mutex_unlock(&emac->ndev->phydev->lock); } ret = prueth_emac_start(prueth); From 64fdaa94bfe0cca3a0f4b2dd922486c5f59fe678 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Aug 2025 17:36:54 -0700 Subject: [PATCH 216/279] net: page_pool: allow enabling recycling late, fix false positive warning Page pool can have pages "directly" (locklessly) recycled to it, if the NAPI that owns the page pool is scheduled to run on the same CPU. To make this safe we check that the NAPI is disabled while we destroy the page pool. In most cases NAPI and page pool lifetimes are tied together so this happens naturally. The queue API expects the following order of calls: -> mem_alloc alloc new pp -> stop napi_disable -> start napi_enable -> mem_free free old pp Here we allocate the page pool in ->mem_alloc and free in ->mem_free. But the NAPIs are only stopped between ->stop and ->start. We created page_pool_disable_direct_recycling() to safely shut down the recycling in ->stop. This way the page_pool_destroy() call in ->mem_free doesn't have to worry about recycling any more. Unfortunately, the page_pool_disable_direct_recycling() is not enough to deal with failures which necessitate freeing the _new_ page pool. If we hit a failure in ->mem_alloc or ->stop the new page pool has to be freed while the NAPI is active (assuming driver attaches the page pool to an existing NAPI instance and doesn't reallocate NAPIs). Freeing the new page pool is technically safe because it hasn't been used for any packets, yet, so there can be no recycling. But the check in napi_assert_will_not_race() has no way of knowing that. We could check if page pool is empty but that'd make the check much less likely to trigger during development. Add page_pool_enable_direct_recycling(), pairing with page_pool_disable_direct_recycling(). It will allow us to create the new page pools in "disabled" state and only enable recycling when we know the reconfig operation will not fail. Coincidentally it will also let us re-enable the recycling for the old pool, if the reconfig failed: -> mem_alloc (new) -> stop (old) # disables direct recycling for old -> start (new) # fail!! -> start (old) # go back to old pp but direct recycling is lost :( -> mem_free (new) The new helper is idempotent to make the life easier for drivers, which can operate in HDS mode and support zero-copy Rx. The driver can call the helper twice whether there are two pools or it has multiple references to a single pool. Fixes: 40eca00ae605 ("bnxt_en: unlink page pool when stopping Rx queue") Tested-by: David Wei Link: https://patch.msgid.link/20250805003654.2944974-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 ++++++- include/net/page_pool/types.h | 2 ++ net/core/page_pool.c | 29 +++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5578ddcb465d..76a4c5ae8000 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3819,7 +3819,6 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, if (BNXT_RX_PAGE_MODE(bp)) pp.pool_size += bp->rx_ring_size / rx_size_fac; pp.nid = numa_node; - pp.napi = &rxr->bnapi->napi; pp.netdev = bp->dev; pp.dev = &bp->pdev->dev; pp.dma_dir = bp->rx_dir; @@ -3851,6 +3850,12 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, return PTR_ERR(pool); } +static void bnxt_enable_rx_page_pool(struct bnxt_rx_ring_info *rxr) +{ + page_pool_enable_direct_recycling(rxr->head_pool, &rxr->bnapi->napi); + page_pool_enable_direct_recycling(rxr->page_pool, &rxr->bnapi->napi); +} + static int bnxt_alloc_rx_agg_bmap(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) { u16 mem_size; @@ -3889,6 +3894,7 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp) rc = bnxt_alloc_rx_page_pool(bp, rxr, cpu_node); if (rc) return rc; + bnxt_enable_rx_page_pool(rxr); rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i, 0); if (rc < 0) @@ -16031,6 +16037,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) goto err_reset; } + bnxt_enable_rx_page_pool(rxr); napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 431b593de709..1509a536cb85 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -265,6 +265,8 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, struct xdp_mem_info; #ifdef CONFIG_PAGE_POOL +void page_pool_enable_direct_recycling(struct page_pool *pool, + struct napi_struct *napi); void page_pool_disable_direct_recycling(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 05e2e22a8f7c..343a6cac21e3 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -1201,6 +1201,35 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), pool->xdp_mem_id = mem->id; } +/** + * page_pool_enable_direct_recycling() - mark page pool as owned by NAPI + * @pool: page pool to modify + * @napi: NAPI instance to associate the page pool with + * + * Associate a page pool with a NAPI instance for lockless page recycling. + * This is useful when a new page pool has to be added to a NAPI instance + * without disabling that NAPI instance, to mark the point at which control + * path "hands over" the page pool to the NAPI instance. In most cases driver + * can simply set the @napi field in struct page_pool_params, and does not + * have to call this helper. + * + * The function is idempotent, but does not implement any refcounting. + * Single page_pool_disable_direct_recycling() will disable recycling, + * no matter how many times enable was called. + */ +void page_pool_enable_direct_recycling(struct page_pool *pool, + struct napi_struct *napi) +{ + if (READ_ONCE(pool->p.napi) == napi) + return; + WARN_ON(!napi || pool->p.napi); + + mutex_lock(&page_pools_lock); + WRITE_ONCE(pool->p.napi, napi); + mutex_unlock(&page_pools_lock); +} +EXPORT_SYMBOL(page_pool_enable_direct_recycling); + void page_pool_disable_direct_recycling(struct page_pool *pool) { /* Disable direct recycling based on pool->cpuid. From 5f1d1d14db7dabce9c815e7d7cd351f8d58b8585 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Tue, 5 Aug 2025 07:23:18 -0700 Subject: [PATCH 217/279] net: ti: icss-iep: Fix incorrect type for return value in extts_enable() The variable ret in icss_iep_extts_enable() was incorrectly declared as u32, while the function returns int and may return negative error codes. This will cause sign extension issues and incorrect error propagation. Update ret to be int to fix error handling. This change corrects the declaration to avoid potential type mismatch. Fixes: c1e0230eeaab ("net: ti: icss-iep: Add IEP driver") Signed-off-by: Alok Tiwari Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250805142323.1949406-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icss_iep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index 50bfbc2779e4..d8c9fe1d98c4 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -621,7 +621,8 @@ static int icss_iep_pps_enable(struct icss_iep *iep, int on) static int icss_iep_extts_enable(struct icss_iep *iep, u32 index, int on) { - u32 val, cap, ret = 0; + u32 val, cap; + int ret = 0; mutex_lock(&iep->ptp_clk_mutex); From aba0c94f61ec05315fa7815d21aefa4c87f6a9f4 Mon Sep 17 00:00:00 2001 From: Budimir Markovic Date: Thu, 7 Aug 2025 04:18:11 +0000 Subject: [PATCH 218/279] vsock: Do not allow binding to VMADDR_PORT_ANY It is possible for a vsock to autobind to VMADDR_PORT_ANY. This can cause a use-after-free when a connection is made to the bound socket. The socket returned by accept() also has port VMADDR_PORT_ANY but is not on the list of unbound sockets. Binding it will result in an extra refcount decrement similar to the one fixed in fcdd2242c023 (vsock: Keep the binding until socket destruction). Modify the check in __vsock_bind_connectible() to also prevent binding to VMADDR_PORT_ANY. Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Reported-by: Budimir Markovic Signed-off-by: Budimir Markovic Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20250807041811.678-1-markovicbudimir@gmail.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index ead6a3c14b87..bebb355f3ffe 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -689,7 +689,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, unsigned int i; for (i = 0; i < MAX_PORT_RETRIES; i++) { - if (port <= LAST_RESERVED_PORT) + if (port == VMADDR_PORT_ANY || + port <= LAST_RESERVED_PORT) port = LAST_RESERVED_PORT + 1; new_addr.svm_port = port++; From d44c40e4e30f6aa1ca8ed1c8b715d4c5829f0560 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 10 Jun 2025 18:07:49 -0400 Subject: [PATCH 219/279] tools/power turbostat: verify arguments to params --show and --hide $ sudo turbostat --quiet --show junk turbostat: Counter 'junk' can not be added. Previously, invalid arguments to --show and --hide were silently ignored Acked-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 33 +++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 33a54a9e0781..4056b7e26a0f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2310,6 +2310,8 @@ char *deferred_add_names[MAX_DEFERRED]; char *deferred_skip_names[MAX_DEFERRED]; int deferred_add_index; int deferred_skip_index; +unsigned int deferred_add_consumed; +unsigned int deferred_skip_consumed; /* * HIDE_LIST - hide this list of counters, show the rest [default] @@ -10512,8 +10514,10 @@ int is_deferred_add(char *name) int i; for (i = 0; i < deferred_add_index; ++i) - if (!strcmp(name, deferred_add_names[i])) + if (!strcmp(name, deferred_add_names[i])) { + deferred_add_consumed |= (1 << i); return 1; + } return 0; } @@ -10522,11 +10526,34 @@ int is_deferred_skip(char *name) int i; for (i = 0; i < deferred_skip_index; ++i) - if (!strcmp(name, deferred_skip_names[i])) + if (!strcmp(name, deferred_skip_names[i])) { + deferred_skip_consumed |= (1 << i); return 1; + } return 0; } +void verify_deferred_consumed(void) +{ + int i; + int fail = 0; + + for (i = 0; i < deferred_add_index; ++i) { + if (!(deferred_add_consumed & (1 << i))) { + warnx("Counter '%s' can not be added.", deferred_add_names[i]); + fail++; + } + } + for (i = 0; i < deferred_skip_index; ++i) { + if (!(deferred_skip_consumed & (1 << i))) { + warnx("Counter '%s' can not be skipped.", deferred_skip_names[i]); + fail++; + } + } + if (fail) + exit(-EINVAL); +} + void probe_cpuidle_residency(void) { char path[64]; @@ -10885,6 +10912,8 @@ int main(int argc, char **argv) probe_cpuidle_residency(); probe_cpuidle_counts(); + verify_deferred_consumed(); + if (!getuid()) set_rlimit(); From 6ea0ec1b958a84aff9f03fb0ae4613a4d5bed3ea Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Fri, 13 Jun 2025 09:54:23 -0700 Subject: [PATCH 220/279] tools/power turbostat: Fix build with musl turbostat.c: In function 'parse_int_file': turbostat.c:5567:19: error: 'PATH_MAX' undeclared (first use in this function) 5567 | char path[PATH_MAX]; | ^~~~~~~~ turbostat.c: In function 'probe_graphics': turbostat.c:6787:19: error: 'PATH_MAX' undeclared (first use in this function) 6787 | char path[PATH_MAX]; | ^~~~~~~~ Signed-off-by: Calvin Owens Reviewed-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4056b7e26a0f..778aee48c6e6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -67,6 +67,7 @@ #include #include #include +#include #define UNUSED(x) (void)(x) From d34fe509f5f76d9dc36291242d67c6528027ebbd Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Fri, 13 Jun 2025 19:20:28 -0700 Subject: [PATCH 221/279] tools/power turbostat: Handle cap_get_proc() ENOSYS Kernels configured with CONFIG_MULTIUSER=n have no cap_get_proc(). Check for ENOSYS to recognize this case, and continue on to attempt to access the requested MSRs (such as temperature). Signed-off-by: Calvin Owens Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 778aee48c6e6..fa81e273d3a2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -6574,8 +6574,16 @@ int check_for_cap_sys_rawio(void) int ret = 0; caps = cap_get_proc(); - if (caps == NULL) + if (caps == NULL) { + /* + * CONFIG_MULTIUSER=n kernels have no cap_get_proc() + * Allow them to continue and attempt to access MSRs + */ + if (errno == ENOSYS) + return 0; + return 1; + } if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) { ret = 1; From 44207567fa64e995d4f2ec2d45af4c947cb1a465 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 17 Jun 2025 20:48:59 +0800 Subject: [PATCH 222/279] tools/power turbostat: Fix bogus SysWatt for forked program Similar to delta_cpu(), delta_platform() is called in turbostat main loop. This ensures accurate SysWatt readings in periodic monitoring mode $ sudo turbostat -S -q --show power -i 1 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt RAMWatt PKG_% RAM_% SysWatt 60 61 6.21 1.13 0.16 0.00 0.00 0.00 13.07 58 61 6.00 1.07 0.18 0.00 0.00 0.00 12.75 58 61 5.74 1.05 0.17 0.00 0.00 0.00 12.22 58 60 6.27 1.11 0.24 0.00 0.00 0.00 13.55 However, delta_platform() is missing for forked program and causes bogus SysWatt reporting, $ sudo turbostat -S -q --show power sleep 1 1.004736 sec CoreTmp PkgTmp PkgWatt CorWatt GFXWatt RAMWatt PKG_% RAM_% SysWatt 57 58 6.05 1.02 0.16 0.00 0.00 0.00 0.03 Add missing delta_platform() for forked program. Fixes: e5f687b89bc2 ("tools/power turbostat: Add RAPL psys as a built-in counter") Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index fa81e273d3a2..4cb3f1aa3a88 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9827,6 +9827,7 @@ int fork_it(char **argv) timersub(&tv_odd, &tv_even, &tv_delta); if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) fprintf(outf, "%s: Counter reset detected\n", progname); + delta_platform(&platform_counters_odd, &platform_counters_even); compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); From fd60d8a086191fe33c2d719732d2482052fa6805 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 7 Aug 2025 15:40:11 -0400 Subject: [PATCH 223/279] sctp: linearize cloned gso packets in sctp_rcv A cloned head skb still shares these frag skbs in fraglist with the original head skb. It's not safe to access these frag skbs. syzbot reported two use-of-uninitialized-memory bugs caused by this: BUG: KMSAN: uninit-value in sctp_inq_pop+0x15b7/0x1920 net/sctp/inqueue.c:211 sctp_inq_pop+0x15b7/0x1920 net/sctp/inqueue.c:211 sctp_assoc_bh_rcv+0x1a7/0xc50 net/sctp/associola.c:998 sctp_inq_push+0x2ef/0x380 net/sctp/inqueue.c:88 sctp_backlog_rcv+0x397/0xdb0 net/sctp/input.c:331 sk_backlog_rcv+0x13b/0x420 include/net/sock.h:1122 __release_sock+0x1da/0x330 net/core/sock.c:3106 release_sock+0x6b/0x250 net/core/sock.c:3660 sctp_wait_for_connect+0x487/0x820 net/sctp/socket.c:9360 sctp_sendmsg_to_asoc+0x1ec1/0x1f00 net/sctp/socket.c:1885 sctp_sendmsg+0x32b9/0x4a80 net/sctp/socket.c:2031 inet_sendmsg+0x25a/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:718 [inline] and BUG: KMSAN: uninit-value in sctp_assoc_bh_rcv+0x34e/0xbc0 net/sctp/associola.c:987 sctp_assoc_bh_rcv+0x34e/0xbc0 net/sctp/associola.c:987 sctp_inq_push+0x2a3/0x350 net/sctp/inqueue.c:88 sctp_backlog_rcv+0x3c7/0xda0 net/sctp/input.c:331 sk_backlog_rcv+0x142/0x420 include/net/sock.h:1148 __release_sock+0x1d3/0x330 net/core/sock.c:3213 release_sock+0x6b/0x270 net/core/sock.c:3767 sctp_wait_for_connect+0x458/0x820 net/sctp/socket.c:9367 sctp_sendmsg_to_asoc+0x223a/0x2260 net/sctp/socket.c:1886 sctp_sendmsg+0x3910/0x49f0 net/sctp/socket.c:2032 inet_sendmsg+0x269/0x2a0 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] This patch fixes it by linearizing cloned gso packets in sctp_rcv(). Fixes: 90017accff61 ("sctp: Add GSO support") Reported-by: syzbot+773e51afe420baaf0e2b@syzkaller.appspotmail.com Reported-by: syzbot+70a42f45e76bede082be@syzkaller.appspotmail.com Signed-off-by: Xin Long Reviewed-by: Marcelo Ricardo Leitner Link: https://patch.msgid.link/dd7dc337b99876d4132d0961f776913719f7d225.1754595611.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 2dc2666988fb..7e99894778d4 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -117,7 +117,7 @@ int sctp_rcv(struct sk_buff *skb) * it's better to just linearize it otherwise crc computing * takes longer. */ - if ((!is_gso && skb_linearize(skb)) || + if (((!is_gso || skb_cloned(skb)) && skb_linearize(skb)) || !pskb_may_pull(skb, sizeof(struct sctphdr))) goto discard_it; From 829f45f9d992019b49f08ab425ca11288b084aed Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Wed, 6 Aug 2025 17:54:53 -0700 Subject: [PATCH 224/279] net: dsa: microchip: Fix KSZ8863 reset problem ksz8873_valid_regs[] was added for register access for KSZ8863/KSZ8873 switches, but the reset register is not in the list so ksz8_reset_switch() does not take any effect. Replace regmap_update_bits() using ksz_regmap_8 with ksz_rmw8() so that an error message will be given if the register is not defined. A side effect of not resetting the switch is the static MAC table is not cleared. Further additions to the table will show write error as there are only 8 entries in the table. Fixes: d0dec3333040 ("net: dsa: microchip: Add register access control for KSZ8873 chip") Signed-off-by: Tristram Ha Reviewed-by: Oleksij Rempel Link: https://patch.msgid.link/20250807005453.8306-1-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz8.c | 20 +++++++++++--------- drivers/net/dsa/microchip/ksz_common.c | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8.c b/drivers/net/dsa/microchip/ksz8.c index 76e490070e9c..c354abdafc1b 100644 --- a/drivers/net/dsa/microchip/ksz8.c +++ b/drivers/net/dsa/microchip/ksz8.c @@ -36,15 +36,14 @@ static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set) { - regmap_update_bits(ksz_regmap_8(dev), addr, bits, set ? bits : 0); + ksz_rmw8(dev, addr, bits, set ? bits : 0); } static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits, bool set) { - regmap_update_bits(ksz_regmap_8(dev), - dev->dev_ops->get_port_addr(port, offset), - bits, set ? bits : 0); + ksz_rmw8(dev, dev->dev_ops->get_port_addr(port, offset), bits, + set ? bits : 0); } /** @@ -1955,16 +1954,19 @@ int ksz8_setup(struct dsa_switch *ds) ksz_cfg(dev, S_LINK_AGING_CTRL, SW_LINK_AUTO_AGING, true); /* Enable aggressive back off algorithm in half duplex mode. */ - regmap_update_bits(ksz_regmap_8(dev), REG_SW_CTRL_1, - SW_AGGR_BACKOFF, SW_AGGR_BACKOFF); + ret = ksz_rmw8(dev, REG_SW_CTRL_1, SW_AGGR_BACKOFF, SW_AGGR_BACKOFF); + if (ret) + return ret; /* * Make sure unicast VLAN boundary is set as default and * enable no excessive collision drop. */ - regmap_update_bits(ksz_regmap_8(dev), REG_SW_CTRL_2, - UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP, - UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP); + ret = ksz_rmw8(dev, REG_SW_CTRL_2, + UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP, + UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP); + if (ret) + return ret; ksz_cfg(dev, S_REPLACE_VID_CTRL, SW_REPLACE_VID, false); diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 7292bfe2f7ca..4cb14288ff0f 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1447,6 +1447,7 @@ static const struct regmap_range ksz8873_valid_regs[] = { regmap_reg_range(0x3f, 0x3f), /* advanced control registers */ + regmap_reg_range(0x43, 0x43), regmap_reg_range(0x60, 0x6f), regmap_reg_range(0x70, 0x75), regmap_reg_range(0x76, 0x78), From 53898ebabe843bfa7baea9dae152797d5d0563c9 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 6 Aug 2025 14:37:25 -0700 Subject: [PATCH 225/279] net: lapbether: ignore ops-locked netdevs Syzkaller managed to trigger lock dependency in xsk_notify via register_netdevice. As discussed in [0], using register_netdevice in the notifiers is problematic so skip adding lapbeth for ops-locked devices. xsk_notifier+0xa4/0x280 net/xdp/xsk.c:1645 notifier_call_chain+0xbc/0x410 kernel/notifier.c:85 call_netdevice_notifiers_info+0xbe/0x140 net/core/dev.c:2230 call_netdevice_notifiers_extack net/core/dev.c:2268 [inline] call_netdevice_notifiers net/core/dev.c:2282 [inline] unregister_netdevice_many_notify+0xf9d/0x2700 net/core/dev.c:12077 unregister_netdevice_many net/core/dev.c:12140 [inline] unregister_netdevice_queue+0x305/0x3f0 net/core/dev.c:11984 register_netdevice+0x18f1/0x2270 net/core/dev.c:11149 lapbeth_new_device drivers/net/wan/lapbether.c:420 [inline] lapbeth_device_event+0x5b1/0xbe0 drivers/net/wan/lapbether.c:462 notifier_call_chain+0xbc/0x410 kernel/notifier.c:85 call_netdevice_notifiers_info+0xbe/0x140 net/core/dev.c:2230 call_netdevice_notifiers_extack net/core/dev.c:2268 [inline] call_netdevice_notifiers net/core/dev.c:2282 [inline] __dev_notify_flags+0x12c/0x2e0 net/core/dev.c:9497 netif_change_flags+0x108/0x160 net/core/dev.c:9526 dev_change_flags+0xba/0x250 net/core/dev_api.c:68 devinet_ioctl+0x11d5/0x1f50 net/ipv4/devinet.c:1200 inet_ioctl+0x3a7/0x3f0 net/ipv4/af_inet.c:1001 0: https://lore.kernel.org/netdev/20250625140357.6203d0af@kernel.org/ Fixes: 4c975fd70002 ("net: hold instance lock during NETDEV_REGISTER/UP") Suggested-by: Jakub Kicinski Reported-by: syzbot+e67ea9c235b13b4f0020@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e67ea9c235b13b4f0020 Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250806213726.1383379-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/wan/lapbether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 995a7207bdf8..f357a7ac70ac 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -81,7 +81,7 @@ static struct lapbethdev *lapbeth_get_x25_dev(struct net_device *dev) static __inline__ int dev_is_ethdev(struct net_device *dev) { - return dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5); + return dev->type == ARPHRD_ETHER && !netdev_need_ops_lock(dev); } /* ------------------------------------------------------------------------ */ From c64237960819aee1766d03f446ae6de94b1e3f73 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 6 Aug 2025 14:37:26 -0700 Subject: [PATCH 226/279] hamradio: ignore ops-locked netdevs Syzkaller managed to trigger lock dependency in xsk_notify via register_netdevice. As discussed in [0], using register_netdevice in the notifiers is problematic so skip adding hamradio for ops-locked devices. xsk_notifier+0x89/0x230 net/xdp/xsk.c:1664 notifier_call_chain+0x1b6/0x3e0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2267 [inline] call_netdevice_notifiers net/core/dev.c:2281 [inline] unregister_netdevice_many_notify+0x14d7/0x1ff0 net/core/dev.c:12156 unregister_netdevice_many net/core/dev.c:12219 [inline] unregister_netdevice_queue+0x33c/0x380 net/core/dev.c:12063 register_netdevice+0x1689/0x1ae0 net/core/dev.c:11241 bpq_new_device drivers/net/hamradio/bpqether.c:481 [inline] bpq_device_event+0x491/0x600 drivers/net/hamradio/bpqether.c:523 notifier_call_chain+0x1b6/0x3e0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2267 [inline] call_netdevice_notifiers net/core/dev.c:2281 [inline] __dev_notify_flags+0x18d/0x2e0 net/core/dev.c:-1 netif_change_flags+0xe8/0x1a0 net/core/dev.c:9608 dev_change_flags+0x130/0x260 net/core/dev_api.c:68 devinet_ioctl+0xbb4/0x1b50 net/ipv4/devinet.c:1200 inet_ioctl+0x3c0/0x4c0 net/ipv4/af_inet.c:1001 0: https://lore.kernel.org/netdev/20250625140357.6203d0af@kernel.org/ Fixes: 4c975fd70002 ("net: hold instance lock during NETDEV_REGISTER/UP") Suggested-by: Jakub Kicinski Reported-by: syzbot+e6300f66a999a6612477@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e6300f66a999a6612477 Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250806213726.1383379-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/hamradio/bpqether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 0e0fe32d2da4..045c5177262e 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -138,7 +138,7 @@ static inline struct net_device *bpq_get_ax25_dev(struct net_device *dev) static inline int dev_is_ethdev(struct net_device *dev) { - return dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5); + return dev->type == ARPHRD_ETHER && !netdev_need_ops_lock(dev); } /* ------------------------------------------------------------------------ */ From 33caa208dba6fa639e8a92fd0c8320b652e5550c Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Wed, 6 Aug 2025 13:21:51 -0700 Subject: [PATCH 227/279] hv_netvsc: Fix panic during namespace deletion with VF The existing code move the VF NIC to new namespace when NETDEV_REGISTER is received on netvsc NIC. During deletion of the namespace, default_device_exit_batch() >> default_device_exit_net() is called. When netvsc NIC is moved back and registered to the default namespace, it automatically brings VF NIC back to the default namespace. This will cause the default_device_exit_net() >> for_each_netdev_safe loop unable to detect the list end, and hit NULL ptr: [ 231.449420] mana 7870:00:00.0 enP30832s1: Moved VF to namespace with: eth0 [ 231.449656] BUG: kernel NULL pointer dereference, address: 0000000000000010 [ 231.450246] #PF: supervisor read access in kernel mode [ 231.450579] #PF: error_code(0x0000) - not-present page [ 231.450916] PGD 17b8a8067 P4D 0 [ 231.451163] Oops: Oops: 0000 [#1] SMP NOPTI [ 231.451450] CPU: 82 UID: 0 PID: 1394 Comm: kworker/u768:1 Not tainted 6.16.0-rc4+ #3 VOLUNTARY [ 231.452042] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 11/21/2024 [ 231.452692] Workqueue: netns cleanup_net [ 231.452947] RIP: 0010:default_device_exit_batch+0x16c/0x3f0 [ 231.453326] Code: c0 0c f5 b3 e8 d5 db fe ff 48 85 c0 74 15 48 c7 c2 f8 fd ca b2 be 10 00 00 00 48 8d 7d c0 e8 7b 77 25 00 49 8b 86 28 01 00 00 <48> 8b 50 10 4c 8b 2a 4c 8d 62 f0 49 83 ed 10 4c 39 e0 0f 84 d6 00 [ 231.454294] RSP: 0018:ff75fc7c9bf9fd00 EFLAGS: 00010246 [ 231.454610] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 61c8864680b583eb [ 231.455094] RDX: ff1fa9f71462d800 RSI: ff75fc7c9bf9fd38 RDI: 0000000030766564 [ 231.455686] RBP: ff75fc7c9bf9fd78 R08: 0000000000000000 R09: 0000000000000000 [ 231.456126] R10: 0000000000000001 R11: 0000000000000004 R12: ff1fa9f70088e340 [ 231.456621] R13: ff1fa9f70088e340 R14: ffffffffb3f50c20 R15: ff1fa9f7103e6340 [ 231.457161] FS: 0000000000000000(0000) GS:ff1faa6783a08000(0000) knlGS:0000000000000000 [ 231.457707] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 231.458031] CR2: 0000000000000010 CR3: 0000000179ab2006 CR4: 0000000000b73ef0 [ 231.458434] Call Trace: [ 231.458600] [ 231.458777] ops_undo_list+0x100/0x220 [ 231.459015] cleanup_net+0x1b8/0x300 [ 231.459285] process_one_work+0x184/0x340 To fix it, move the ns change to a workqueue, and take rtnl_lock to avoid changing the netdev list when default_device_exit_net() is using it. Cc: stable@vger.kernel.org Fixes: 4c262801ea60 ("hv_netvsc: Fix VF namespace also in synthetic NIC NETDEV_REGISTER event") Signed-off-by: Haiyang Zhang Link: https://patch.msgid.link/1754511711-11188-1-git-send-email-haiyangz@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/hyperv_net.h | 3 +++ drivers/net/hyperv/netvsc_drv.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index cb6f5482d203..7397c693f984 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -1061,6 +1061,7 @@ struct net_device_context { struct net_device __rcu *vf_netdev; struct netvsc_vf_pcpu_stats __percpu *vf_stats; struct delayed_work vf_takeover; + struct delayed_work vfns_work; /* 1: allocated, serial number is valid. 0: not allocated */ u32 vf_alloc; @@ -1075,6 +1076,8 @@ struct net_device_context { struct netvsc_device_info *saved_netvsc_dev_info; }; +void netvsc_vfns_work(struct work_struct *w); + /* Azure hosts don't support non-TCP port numbers in hashing for fragmented * packets. We can use ethtool to change UDP hash level when necessary. */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f44753756358..39c892e46cb0 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2522,6 +2522,7 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); + INIT_DELAYED_WORK(&net_device_ctx->vfns_work, netvsc_vfns_work); net_device_ctx->vf_stats = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats); @@ -2666,6 +2667,8 @@ static void netvsc_remove(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); + nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev) { cancel_work_sync(&nvdev->subchan_work); @@ -2707,6 +2710,7 @@ static int netvsc_suspend(struct hv_device *dev) cancel_delayed_work_sync(&ndev_ctx->dwork); rtnl_lock(); + cancel_delayed_work_sync(&ndev_ctx->vfns_work); nvdev = rtnl_dereference(ndev_ctx->nvdev); if (nvdev == NULL) { @@ -2800,6 +2804,27 @@ static void netvsc_event_set_vf_ns(struct net_device *ndev) } } +void netvsc_vfns_work(struct work_struct *w) +{ + struct net_device_context *ndev_ctx = + container_of(w, struct net_device_context, vfns_work.work); + struct net_device *ndev; + + if (!rtnl_trylock()) { + schedule_delayed_work(&ndev_ctx->vfns_work, 1); + return; + } + + ndev = hv_get_drvdata(ndev_ctx->device_ctx); + if (!ndev) + goto out; + + netvsc_event_set_vf_ns(ndev); + +out: + rtnl_unlock(); +} + /* * On Hyper-V, every VF interface is matched with a corresponding * synthetic interface. The synthetic interface is presented first @@ -2810,10 +2835,12 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct net_device_context *ndev_ctx; int ret = 0; if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) { - netvsc_event_set_vf_ns(event_dev); + ndev_ctx = netdev_priv(event_dev); + schedule_delayed_work(&ndev_ctx->vfns_work, 0); return NOTIFY_DONE; } From d240b441b5cbb389f90fce37edb9ef76a3c9a42b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 23 Jun 2025 13:24:25 -0700 Subject: [PATCH 228/279] tools/power turbostat.8: Document Totl%C0, Any%C0, GFX%C0, CPUGFX% columns Explain the meaning of the Totl%C0, Any%C0, GFX%C0, CPUGFX% columns. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index fb11108aaf42..db3888b8af12 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -186,6 +186,14 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBSAMAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used. .PP +\fBTotl%C0\fP Weighted percentage of time that CPUs are busy. If N CPUs are busy during an interval, the percentage is N * 100%. +.PP +\fBAny%C0\fP Percentage of time that at least one CPU is busy. +.PP +\fBGFX%C0\fP Percentage of time that at least one GFX compute engine is busy. +.PP +\fBCPUGFX%\fP Percentage of time that at least one CPU is busy at the same time as at least one Graphics compute enginer is busy. +.PP \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters. .PP \fBPkgWatt\fP Watts consumed by the whole package. From 8d14a098b47cc7e5cfa703b9e015d6ca1074489a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 12 Jul 2025 16:16:56 -0400 Subject: [PATCH 229/279] tools/power turbostat: Support more than 64 built-in-counters We have out-grown the ability to use a 64-bit memory location to inventory every possible built-in counter. Leverage the the CPU_SET(3) macros to break this barrier. Also, break the Joules & Watts counters into two, since we can no longer 'or' them together... Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 552 +++++++++++++++++++------- 1 file changed, 402 insertions(+), 150 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4cb3f1aa3a88..ee948e671741 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -210,91 +210,236 @@ struct msr_counter bic[] = { { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, }; -#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) -#define BIC_USEC (1ULL << 0) -#define BIC_TOD (1ULL << 1) -#define BIC_Package (1ULL << 2) -#define BIC_Node (1ULL << 3) -#define BIC_Avg_MHz (1ULL << 4) -#define BIC_Busy (1ULL << 5) -#define BIC_Bzy_MHz (1ULL << 6) -#define BIC_TSC_MHz (1ULL << 7) -#define BIC_IRQ (1ULL << 8) -#define BIC_SMI (1ULL << 9) -#define BIC_cpuidle (1ULL << 10) -#define BIC_CPU_c1 (1ULL << 11) -#define BIC_CPU_c3 (1ULL << 12) -#define BIC_CPU_c6 (1ULL << 13) -#define BIC_CPU_c7 (1ULL << 14) -#define BIC_ThreadC (1ULL << 15) -#define BIC_CoreTmp (1ULL << 16) -#define BIC_CoreCnt (1ULL << 17) -#define BIC_PkgTmp (1ULL << 18) -#define BIC_GFX_rc6 (1ULL << 19) -#define BIC_GFXMHz (1ULL << 20) -#define BIC_Pkgpc2 (1ULL << 21) -#define BIC_Pkgpc3 (1ULL << 22) -#define BIC_Pkgpc6 (1ULL << 23) -#define BIC_Pkgpc7 (1ULL << 24) -#define BIC_Pkgpc8 (1ULL << 25) -#define BIC_Pkgpc9 (1ULL << 26) -#define BIC_Pkgpc10 (1ULL << 27) -#define BIC_CPU_LPI (1ULL << 28) -#define BIC_SYS_LPI (1ULL << 29) -#define BIC_PkgWatt (1ULL << 30) -#define BIC_CorWatt (1ULL << 31) -#define BIC_GFXWatt (1ULL << 32) -#define BIC_PkgCnt (1ULL << 33) -#define BIC_RAMWatt (1ULL << 34) -#define BIC_PKG__ (1ULL << 35) -#define BIC_RAM__ (1ULL << 36) -#define BIC_Pkg_J (1ULL << 37) -#define BIC_Cor_J (1ULL << 38) -#define BIC_GFX_J (1ULL << 39) -#define BIC_RAM_J (1ULL << 40) -#define BIC_Mod_c6 (1ULL << 41) -#define BIC_Totl_c0 (1ULL << 42) -#define BIC_Any_c0 (1ULL << 43) -#define BIC_GFX_c0 (1ULL << 44) -#define BIC_CPUGFX (1ULL << 45) -#define BIC_Core (1ULL << 46) -#define BIC_CPU (1ULL << 47) -#define BIC_APIC (1ULL << 48) -#define BIC_X2APIC (1ULL << 49) -#define BIC_Die (1ULL << 50) -#define BIC_GFXACTMHz (1ULL << 51) -#define BIC_IPC (1ULL << 52) -#define BIC_CORE_THROT_CNT (1ULL << 53) -#define BIC_UNCORE_MHZ (1ULL << 54) -#define BIC_SAM_mc6 (1ULL << 55) -#define BIC_SAMMHz (1ULL << 56) -#define BIC_SAMACTMHz (1ULL << 57) -#define BIC_Diec6 (1ULL << 58) -#define BIC_SysWatt (1ULL << 59) -#define BIC_Sys_J (1ULL << 60) -#define BIC_NMI (1ULL << 61) -#define BIC_CPU_c1e (1ULL << 62) -#define BIC_pct_idle (1ULL << 63) +/* n.b. bic_names must match the order in bic[], above */ +enum bic_names { + BIC_USEC, + BIC_TOD, + BIC_Package, + BIC_Node, + BIC_Avg_MHz, + BIC_Busy, + BIC_Bzy_MHz, + BIC_TSC_MHz, + BIC_IRQ, + BIC_SMI, + BIC_cpuidle, + BIC_CPU_c1, + BIC_CPU_c3, + BIC_CPU_c6, + BIC_CPU_c7, + BIC_ThreadC, + BIC_CoreTmp, + BIC_CoreCnt, + BIC_PkgTmp, + BIC_GFX_rc6, + BIC_GFXMHz, + BIC_Pkgpc2, + BIC_Pkgpc3, + BIC_Pkgpc6, + BIC_Pkgpc7, + BIC_Pkgpc8, + BIC_Pkgpc9, + BIC_Pkgpc10, + BIC_CPU_LPI, + BIC_SYS_LPI, + BIC_PkgWatt, + BIC_CorWatt, + BIC_GFXWatt, + BIC_PkgCnt, + BIC_RAMWatt, + BIC_PKG__, + BIC_RAM__, + BIC_Pkg_J, + BIC_Cor_J, + BIC_GFX_J, + BIC_RAM_J, + BIC_Mod_c6, + BIC_Totl_c0, + BIC_Any_c0, + BIC_GFX_c0, + BIC_CPUGFX, + BIC_Core, + BIC_CPU, + BIC_APIC, + BIC_X2APIC, + BIC_Die, + BIC_GFXACTMHz, + BIC_IPC, + BIC_CORE_THROT_CNT, + BIC_UNCORE_MHZ, + BIC_SAM_mc6, + BIC_SAMMHz, + BIC_SAMACTMHz, + BIC_Diec6, + BIC_SysWatt, + BIC_Sys_J, + BIC_NMI, + BIC_CPU_c1e, + BIC_pct_idle, + MAX_BIC +}; -#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) -#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) -#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) -#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle ) -#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle) -#define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) +void print_bic_set(char *s, cpu_set_t *set) +{ + int i; -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle) + assert(MAX_BIC < CPU_SETSIZE); -unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC; + printf("%s:", s); -#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) -#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) -#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) -#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) -#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) -#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) + for (i = 0; i <= MAX_BIC; ++i) { + + if (CPU_ISSET(i, set)) { + assert(i < MAX_BIC); + printf(" %s", bic[i].name); + } + } + putchar('\n'); +} + +static cpu_set_t bic_group_topology; +static cpu_set_t bic_group_thermal_pwr; +static cpu_set_t bic_group_frequency; +static cpu_set_t bic_group_hw_idle; +static cpu_set_t bic_group_sw_idle; +static cpu_set_t bic_group_idle; +static cpu_set_t bic_group_other; +static cpu_set_t bic_group_disabled_by_default; +static cpu_set_t bic_enabled; +static cpu_set_t bic_present; + +/* modify */ +#define BIC_INIT(set) CPU_ZERO(set) + +#define SET_BIC(COUNTER_NUMBER, set) CPU_SET(COUNTER_NUMBER, set) +#define CLR_BIC(COUNTER_NUMBER, set) CPU_CLR(COUNTER_NUMBER, set) + +#define BIC_PRESENT(COUNTER_NUMBER) SET_BIC(COUNTER_NUMBER, &bic_present) +#define BIC_NOT_PRESENT(COUNTER_NUMBER) CPU_CLR(COUNTER_NUMBER, &bic_present) + +/* test */ +#define BIC_IS_ENABLED(COUNTER_NUMBER) CPU_ISSET(COUNTER_NUMBER, &bic_enabled) +#define DO_BIC_READ(COUNTER_NUMBER) CPU_ISSET(COUNTER_NUMBER, &bic_present) +#define DO_BIC(COUNTER_NUMBER) (CPU_ISSET(COUNTER_NUMBER, &bic_enabled) && CPU_ISSET(COUNTER_NUMBER, &bic_present)) + +static void bic_set_all(cpu_set_t *set) +{ + int i; + + assert(MAX_BIC < CPU_SETSIZE); + + for (i = 0; i < MAX_BIC; ++i) + SET_BIC(i, set); +} + +/* + * bic_clear_bits() + * clear all the bits from "clr" in "dst" + */ +static void bic_clear_bits(cpu_set_t *dst, cpu_set_t *clr) +{ + int i; + + assert(MAX_BIC < CPU_SETSIZE); + + for (i = 0; i < MAX_BIC; ++i) + if (CPU_ISSET(i, clr)) + CLR_BIC(i, dst); +} + +static void bic_groups_init(void) +{ + BIC_INIT(&bic_group_topology); + SET_BIC(BIC_Package, &bic_group_topology); + SET_BIC(BIC_Node, &bic_group_topology); + SET_BIC(BIC_CoreCnt, &bic_group_topology); + SET_BIC(BIC_PkgCnt, &bic_group_topology); + SET_BIC(BIC_Core, &bic_group_topology); + SET_BIC(BIC_CPU, &bic_group_topology); + SET_BIC(BIC_Die, &bic_group_topology); + + BIC_INIT(&bic_group_thermal_pwr); + SET_BIC(BIC_CoreTmp, &bic_group_thermal_pwr); + SET_BIC(BIC_PkgTmp, &bic_group_thermal_pwr); + SET_BIC(BIC_PkgWatt, &bic_group_thermal_pwr); + SET_BIC(BIC_CorWatt, &bic_group_thermal_pwr); + SET_BIC(BIC_GFXWatt, &bic_group_thermal_pwr); + SET_BIC(BIC_RAMWatt, &bic_group_thermal_pwr); + SET_BIC(BIC_PKG__, &bic_group_thermal_pwr); + SET_BIC(BIC_RAM__, &bic_group_thermal_pwr); + SET_BIC(BIC_SysWatt, &bic_group_thermal_pwr); + + BIC_INIT(&bic_group_frequency); + SET_BIC(BIC_Avg_MHz, &bic_group_frequency); + SET_BIC(BIC_Busy, &bic_group_frequency); + SET_BIC(BIC_Bzy_MHz, &bic_group_frequency); + SET_BIC(BIC_TSC_MHz, &bic_group_frequency); + SET_BIC(BIC_GFXMHz, &bic_group_frequency); + SET_BIC(BIC_GFXACTMHz, &bic_group_frequency); + SET_BIC(BIC_SAMMHz, &bic_group_frequency); + SET_BIC(BIC_SAMACTMHz, &bic_group_frequency); + SET_BIC(BIC_UNCORE_MHZ, &bic_group_frequency); + + BIC_INIT(&bic_group_hw_idle); + SET_BIC(BIC_Busy, &bic_group_hw_idle); + SET_BIC(BIC_CPU_c1, &bic_group_hw_idle); + SET_BIC(BIC_CPU_c3, &bic_group_hw_idle); + SET_BIC(BIC_CPU_c6, &bic_group_hw_idle); + SET_BIC(BIC_CPU_c7, &bic_group_hw_idle); + SET_BIC(BIC_GFX_rc6, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc2, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc3, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc6, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc7, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc8, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc9, &bic_group_hw_idle); + SET_BIC(BIC_Pkgpc10, &bic_group_hw_idle); + SET_BIC(BIC_CPU_LPI, &bic_group_hw_idle); + SET_BIC(BIC_SYS_LPI, &bic_group_hw_idle); + SET_BIC(BIC_Mod_c6, &bic_group_hw_idle); + SET_BIC(BIC_Totl_c0, &bic_group_hw_idle); + SET_BIC(BIC_Any_c0, &bic_group_hw_idle); + SET_BIC(BIC_GFX_c0, &bic_group_hw_idle); + SET_BIC(BIC_CPUGFX, &bic_group_hw_idle); + SET_BIC(BIC_SAM_mc6, &bic_group_hw_idle); + SET_BIC(BIC_Diec6, &bic_group_hw_idle); + + BIC_INIT(&bic_group_sw_idle); + SET_BIC(BIC_Busy, &bic_group_sw_idle); + SET_BIC(BIC_cpuidle, &bic_group_sw_idle); + SET_BIC(BIC_pct_idle, &bic_group_sw_idle); + + BIC_INIT(&bic_group_idle); + CPU_OR(&bic_group_idle, &bic_group_idle, &bic_group_hw_idle); + SET_BIC(BIC_pct_idle, &bic_group_idle); + + BIC_INIT(&bic_group_other); + SET_BIC(BIC_IRQ, &bic_group_other); + SET_BIC(BIC_NMI, &bic_group_other); + SET_BIC(BIC_SMI, &bic_group_other); + SET_BIC(BIC_ThreadC, &bic_group_other); + SET_BIC(BIC_CoreTmp, &bic_group_other); + SET_BIC(BIC_IPC, &bic_group_other); + + BIC_INIT(&bic_group_disabled_by_default); + SET_BIC(BIC_USEC, &bic_group_disabled_by_default); + SET_BIC(BIC_TOD, &bic_group_disabled_by_default); + SET_BIC(BIC_cpuidle, &bic_group_disabled_by_default); + SET_BIC(BIC_APIC, &bic_group_disabled_by_default); + SET_BIC(BIC_X2APIC, &bic_group_disabled_by_default); + + BIC_INIT(&bic_enabled); + bic_set_all(&bic_enabled); + bic_clear_bits(&bic_enabled, &bic_group_disabled_by_default); + + BIC_INIT(&bic_present); + SET_BIC(BIC_USEC, &bic_present); + SET_BIC(BIC_TOD, &bic_present); + SET_BIC(BIC_cpuidle, &bic_present); + SET_BIC(BIC_APIC, &bic_present); + SET_BIC(BIC_X2APIC, &bic_present); + SET_BIC(BIC_pct_idle, &bic_present); +} /* * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: @@ -1205,7 +1350,7 @@ struct rapl_counter_arch_info { int msr_shift; /* Positive mean shift right, negative mean shift left */ double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */ unsigned int rci_index; /* Maps data from perf counters to global variables */ - unsigned long long bic; + unsigned int bic_number; double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */ unsigned long long flags; }; @@ -1220,7 +1365,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, - .bic = BIC_PkgWatt | BIC_Pkg_J, + .bic_number = BIC_PkgWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_PKG, + .perf_subsys = "power", + .perf_name = "energy-pkg", + .msr = MSR_PKG_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, + .bic_number = BIC_Pkg_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1233,7 +1391,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, - .bic = BIC_PkgWatt | BIC_Pkg_J, + .bic_number = BIC_PkgWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_AMD_F17H, + .perf_subsys = "power", + .perf_name = "energy-pkg", + .msr = MSR_PKG_ENERGY_STAT, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PKG, + .bic_number = BIC_Pkg_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1246,7 +1417,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, - .bic = BIC_CorWatt | BIC_Cor_J, + .bic_number = BIC_CorWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_CORE_ENERGY_STATUS, + .perf_subsys = "power", + .perf_name = "energy-cores", + .msr = MSR_PP0_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_CORES, + .bic_number = BIC_Cor_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1259,7 +1443,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_dram_energy_units, .rci_index = RAPL_RCI_INDEX_DRAM, - .bic = BIC_RAMWatt | BIC_RAM_J, + .bic_number = BIC_RAMWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_DRAM, + .perf_subsys = "power", + .perf_name = "energy-ram", + .msr = MSR_DRAM_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_dram_energy_units, + .rci_index = RAPL_RCI_INDEX_DRAM, + .bic_number = BIC_RAM_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1272,7 +1469,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_GFX, - .bic = BIC_GFXWatt | BIC_GFX_J, + .bic_number = BIC_GFXWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_GFX, + .perf_subsys = "power", + .perf_name = "energy-gpu", + .msr = MSR_PP1_ENERGY_STATUS, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_GFX, + .bic_number = BIC_GFX_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1285,7 +1495,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_time_units, .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS, - .bic = BIC_PKG__, + .bic_number = BIC_PKG__, .compat_scale = 100.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1298,7 +1508,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_time_units, .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS, - .bic = BIC_RAM__, + .bic_number = BIC_RAM__, .compat_scale = 100.0, .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1311,7 +1521,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_energy_units, .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, - .bic = BIC_CorWatt | BIC_Cor_J, + .bic_number = BIC_CorWatt, + .compat_scale = 1.0, + .flags = 0, + }, + { + .feature_mask = RAPL_AMD_F17H, + .perf_subsys = NULL, + .perf_name = NULL, + .msr = MSR_CORE_ENERGY_STAT, + .msr_mask = 0xFFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_CORE_ENERGY, + .bic_number = BIC_Cor_J, .compat_scale = 1.0, .flags = 0, }, @@ -1324,7 +1547,20 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .msr_shift = 0, .platform_rapl_msr_scale = &rapl_psys_energy_units, .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, - .bic = BIC_SysWatt | BIC_Sys_J, + .bic_number = BIC_SysWatt, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, + { + .feature_mask = RAPL_PSYS, + .perf_subsys = "power", + .perf_name = "energy-psys", + .msr = MSR_PLATFORM_ENERGY_STATUS, + .msr_mask = 0x00000000FFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_psys_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, + .bic_number = BIC_Sys_J, .compat_scale = 1.0, .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, }, @@ -1373,7 +1609,7 @@ struct cstate_counter_arch_info { const char *perf_name; unsigned long long msr; unsigned int rci_index; /* Maps data from perf counters to global variables */ - unsigned long long bic; + unsigned int bic_number; unsigned long long flags; int pkg_cstate_limit; }; @@ -1385,7 +1621,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c1-residency", .msr = MSR_CORE_C1_RES, .rci_index = CCSTATE_RCI_INDEX_C1_RESIDENCY, - .bic = BIC_CPU_c1, + .bic_number = BIC_CPU_c1, .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD, .pkg_cstate_limit = 0, }, @@ -1395,7 +1631,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c3-residency", .msr = MSR_CORE_C3_RESIDENCY, .rci_index = CCSTATE_RCI_INDEX_C3_RESIDENCY, - .bic = BIC_CPU_c3, + .bic_number = BIC_CPU_c3, .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, .pkg_cstate_limit = 0, }, @@ -1405,7 +1641,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c6-residency", .msr = MSR_CORE_C6_RESIDENCY, .rci_index = CCSTATE_RCI_INDEX_C6_RESIDENCY, - .bic = BIC_CPU_c6, + .bic_number = BIC_CPU_c6, .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, .pkg_cstate_limit = 0, }, @@ -1415,7 +1651,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c7-residency", .msr = MSR_CORE_C7_RESIDENCY, .rci_index = CCSTATE_RCI_INDEX_C7_RESIDENCY, - .bic = BIC_CPU_c7, + .bic_number = BIC_CPU_c7, .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY, .pkg_cstate_limit = 0, }, @@ -1425,7 +1661,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c2-residency", .msr = MSR_PKG_C2_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C2_RESIDENCY, - .bic = BIC_Pkgpc2, + .bic_number = BIC_Pkgpc2, .flags = 0, .pkg_cstate_limit = PCL__2, }, @@ -1435,7 +1671,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c3-residency", .msr = MSR_PKG_C3_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C3_RESIDENCY, - .bic = BIC_Pkgpc3, + .bic_number = BIC_Pkgpc3, .flags = 0, .pkg_cstate_limit = PCL__3, }, @@ -1445,7 +1681,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c6-residency", .msr = MSR_PKG_C6_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C6_RESIDENCY, - .bic = BIC_Pkgpc6, + .bic_number = BIC_Pkgpc6, .flags = 0, .pkg_cstate_limit = PCL__6, }, @@ -1455,7 +1691,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c7-residency", .msr = MSR_PKG_C7_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C7_RESIDENCY, - .bic = BIC_Pkgpc7, + .bic_number = BIC_Pkgpc7, .flags = 0, .pkg_cstate_limit = PCL__7, }, @@ -1465,7 +1701,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c8-residency", .msr = MSR_PKG_C8_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C8_RESIDENCY, - .bic = BIC_Pkgpc8, + .bic_number = BIC_Pkgpc8, .flags = 0, .pkg_cstate_limit = PCL__8, }, @@ -1475,7 +1711,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c9-residency", .msr = MSR_PKG_C9_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C9_RESIDENCY, - .bic = BIC_Pkgpc9, + .bic_number = BIC_Pkgpc9, .flags = 0, .pkg_cstate_limit = PCL__9, }, @@ -1485,7 +1721,7 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { .perf_name = "c10-residency", .msr = MSR_PKG_C10_RESIDENCY, .rci_index = PCSTATE_RCI_INDEX_C10_RESIDENCY, - .bic = BIC_Pkgpc10, + .bic_number = BIC_Pkgpc10, .flags = 0, .pkg_cstate_limit = PCL_10, }, @@ -2180,10 +2416,13 @@ int get_msr_fd(int cpu) static void bic_disable_msr_access(void) { - const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp | - BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp; - - bic_enabled &= ~bic_msrs; + CLR_BIC(BIC_Mod_c6, &bic_enabled); + CLR_BIC(BIC_CoreTmp, &bic_enabled); + CLR_BIC(BIC_Totl_c0, &bic_enabled); + CLR_BIC(BIC_Any_c0, &bic_enabled); + CLR_BIC(BIC_GFX_c0, &bic_enabled); + CLR_BIC(BIC_CPUGFX, &bic_enabled); + CLR_BIC(BIC_PkgTmp, &bic_enabled); free_sys_msr_counters(); } @@ -2383,10 +2622,9 @@ void help(void) * for all the strings in comma separate name_list, * set the approprate bit in return value. */ -unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) +void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode) { unsigned int i; - unsigned long long retval = 0; while (name_list) { char *comma; @@ -2398,38 +2636,37 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) for (i = 0; i < MAX_BIC; ++i) { if (!strcmp(name_list, bic[i].name)) { - retval |= (1ULL << i); + SET_BIC(i, ret_set); break; } if (!strcmp(name_list, "all")) { - retval |= ~0; + bic_set_all(ret_set); break; } else if (!strcmp(name_list, "topology")) { - retval |= BIC_GROUP_TOPOLOGY; + CPU_OR(ret_set, ret_set, &bic_group_topology); break; } else if (!strcmp(name_list, "power")) { - retval |= BIC_GROUP_THERMAL_PWR; + CPU_OR(ret_set, ret_set, &bic_group_thermal_pwr); break; } else if (!strcmp(name_list, "idle")) { - retval |= BIC_GROUP_IDLE; + CPU_OR(ret_set, ret_set, &bic_group_idle); break; } else if (!strcmp(name_list, "swidle")) { - retval |= BIC_GROUP_SW_IDLE; + CPU_OR(ret_set, ret_set, &bic_group_sw_idle); break; } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */ - retval |= BIC_GROUP_SW_IDLE; + CPU_OR(ret_set, ret_set, &bic_group_sw_idle); break; } else if (!strcmp(name_list, "hwidle")) { - retval |= BIC_GROUP_HW_IDLE; + CPU_OR(ret_set, ret_set, &bic_group_hw_idle); break; } else if (!strcmp(name_list, "frequency")) { - retval |= BIC_GROUP_FREQUENCY; + CPU_OR(ret_set, ret_set, &bic_group_frequency); break; } else if (!strcmp(name_list, "other")) { - retval |= BIC_OTHER; + CPU_OR(ret_set, ret_set, &bic_group_other); break; } - } if (i == MAX_BIC) { if (mode == SHOW_LIST) { @@ -2458,7 +2695,6 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) name_list++; } - return retval; } void print_header(char *delim) @@ -7345,21 +7581,28 @@ void rapl_probe_intel(void) unsigned long long msr; unsigned int time_unit; double tdp; - const unsigned long long bic_watt_bits = BIC_SysWatt | BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; - const unsigned long long bic_joules_bits = BIC_Sys_J | BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; - if (rapl_joules) - bic_enabled &= ~bic_watt_bits; - else - bic_enabled &= ~bic_joules_bits; + if (rapl_joules) { + CLR_BIC(BIC_SysWatt, &bic_enabled); + CLR_BIC(BIC_PkgWatt, &bic_enabled); + CLR_BIC(BIC_CorWatt, &bic_enabled); + CLR_BIC(BIC_RAMWatt, &bic_enabled); + CLR_BIC(BIC_GFXWatt, &bic_enabled); + } else { + CLR_BIC(BIC_Sys_J, &bic_enabled); + CLR_BIC(BIC_Pkg_J, &bic_enabled); + CLR_BIC(BIC_Cor_J, &bic_enabled); + CLR_BIC(BIC_RAM_J, &bic_enabled); + CLR_BIC(BIC_GFX_J, &bic_enabled); + } if (!platform->rapl_msrs || no_msr) return; if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) - bic_enabled &= ~BIC_PKG__; + CLR_BIC(BIC_PKG__, &bic_enabled); if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) - bic_enabled &= ~BIC_RAM__; + CLR_BIC(BIC_RAM__, &bic_enabled); /* units on package 0, verify later other packages match */ if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) @@ -7398,13 +7641,14 @@ void rapl_probe_amd(void) { unsigned long long msr; double tdp; - const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt; - const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J; - if (rapl_joules) - bic_enabled &= ~bic_watt_bits; - else - bic_enabled &= ~bic_joules_bits; + if (rapl_joules) { + CLR_BIC(BIC_SysWatt, &bic_enabled); + CLR_BIC(BIC_CorWatt, &bic_enabled); + } else { + CLR_BIC(BIC_Pkg_J, &bic_enabled); + CLR_BIC(BIC_Cor_J, &bic_enabled); + } if (!platform->rapl_msrs || no_msr) return; @@ -8151,7 +8395,7 @@ void rapl_perf_init(void) enum rapl_unit unit; unsigned int next_domain; - if (!BIC_IS_ENABLED(cai->bic)) + if (!BIC_IS_ENABLED(cai->bic_number)) continue; memset(domain_visited, 0, num_domains * sizeof(*domain_visited)); @@ -8215,7 +8459,7 @@ void rapl_perf_init(void) /* If any CPU has access to the counter, make it present */ if (has_counter) - BIC_PRESENT(cai->bic); + BIC_PRESENT(cai->bic_number); } free(domain_visited); @@ -8436,7 +8680,7 @@ void cstate_perf_init_(bool soft_c1) if (!per_core && pkg_visited[pkg_id]) continue; - const bool counter_needed = BIC_IS_ENABLED(cai->bic) || + const bool counter_needed = BIC_IS_ENABLED(cai->bic_number) || (soft_c1 && (cai->flags & CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY)); const bool counter_supported = (platform->supported_cstates & cai->feature_mask); @@ -8463,7 +8707,7 @@ void cstate_perf_init_(bool soft_c1) /* If any CPU has access to the counter, make it present */ if (has_counter) - BIC_PRESENT(cai->bic); + BIC_PRESENT(cai->bic_number); } free(cores_visited); @@ -9199,7 +9443,7 @@ void check_msr_access(void) void check_perf_access(void) { if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) - bic_enabled &= ~BIC_IPC; + CLR_BIC(BIC_IPC, &bic_enabled); } bool perf_has_hybrid_devices(void) @@ -9768,8 +10012,8 @@ void turbostat_init() * disable more BICs, since it can't be reported accurately. */ if (platform->enable_tsc_tweak && !has_base_hz) { - bic_enabled &= ~BIC_Busy; - bic_enabled &= ~BIC_Bzy_MHz; + CLR_BIC(BIC_Busy, &bic_enabled); + CLR_BIC(BIC_Bzy_MHz, &bic_enabled); } } @@ -10785,22 +11029,29 @@ void cmdline(int argc, char **argv) no_perf = 1; break; case 'e': - /* --enable specified counter */ - bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); + /* --enable specified counter, without clearning existing list */ + bic_lookup(&bic_enabled, optarg, SHOW_LIST); break; case 'f': force_load++; break; case 'd': debug++; - ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); + bic_set_all(&bic_enabled); break; case 'H': /* * --hide: do not show those specified * multiple invocations simply clear more bits in enabled mask */ - bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); + { + cpu_set_t bic_group_hide; + + BIC_INIT(&bic_group_hide); + + bic_lookup(&bic_group_hide, optarg, HIDE_LIST); + bic_clear_bits(&bic_enabled, &bic_group_hide); + } break; case 'h': default: @@ -10824,7 +11075,7 @@ void cmdline(int argc, char **argv) rapl_joules++; break; case 'l': - ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); + bic_set_all(&bic_enabled); list_header_only++; quiet++; break; @@ -10861,9 +11112,8 @@ void cmdline(int argc, char **argv) * subsequent invocations can add to it. */ if (shown == 0) - bic_enabled = bic_lookup(optarg, SHOW_LIST); - else - bic_enabled |= bic_lookup(optarg, SHOW_LIST); + BIC_INIT(&bic_enabled); + bic_lookup(&bic_enabled, optarg, SHOW_LIST); shown = 1; break; case 'S': @@ -10900,6 +11150,8 @@ int main(int argc, char **argv) { int fd, ret; + bic_groups_init(); + fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY); if (fd < 0) goto skip_cgroup_setting; From 5f961fb2a7d8f4d89d64a9e2cd584738de5f9c58 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 14 Jul 2025 23:33:55 -0400 Subject: [PATCH 230/279] tools/power turbostat: probe and display L3 cache topology Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 34 ++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ee948e671741..54f270226746 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -195,6 +195,7 @@ struct msr_counter bic[] = { { 0x0, "APIC", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "X2APIC", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "Die", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "L3", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "GFXAMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "IPC", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CoreThr", NULL, 0, 0, 0, NULL, 0 }, @@ -263,6 +264,7 @@ enum bic_names { BIC_APIC, BIC_X2APIC, BIC_Die, + BIC_L3, BIC_GFXACTMHz, BIC_IPC, BIC_CORE_THROT_CNT, @@ -292,7 +294,7 @@ void print_bic_set(char *s, cpu_set_t *set) if (CPU_ISSET(i, set)) { assert(i < MAX_BIC); printf(" %s", bic[i].name); - } + } } putchar('\n'); } @@ -357,6 +359,7 @@ static void bic_groups_init(void) SET_BIC(BIC_Core, &bic_group_topology); SET_BIC(BIC_CPU, &bic_group_topology); SET_BIC(BIC_Die, &bic_group_topology); + SET_BIC(BIC_L3, &bic_group_topology); BIC_INIT(&bic_group_thermal_pwr); SET_BIC(BIC_CoreTmp, &bic_group_thermal_pwr); @@ -2273,6 +2276,7 @@ struct platform_counters { struct cpu_topology { int physical_package_id; int die_id; + int l3_id; int logical_cpu_id; int physical_node_id; int logical_node_id; /* 0-based count within the package */ @@ -2294,6 +2298,7 @@ struct topo_params { int max_core_id; int max_package_id; int max_die_id; + int max_l3_id; int max_node_num; int nodes_per_pkg; int cores_per_node; @@ -2712,6 +2717,8 @@ void print_header(char *delim) outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); if (DO_BIC(BIC_Die)) outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); + if (DO_BIC(BIC_L3)) + outp += sprintf(outp, "%sL3", (printed++ ? delim : "")); if (DO_BIC(BIC_Node)) outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -3183,6 +3190,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Die)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_L3)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Node)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_Core)) @@ -3206,6 +3215,12 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data else outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } + if (DO_BIC(BIC_L3)) { + if (c) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].l3_id); + else + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + } if (DO_BIC(BIC_Node)) { if (t) outp += sprintf(outp, "%s%d", @@ -5911,6 +5926,11 @@ int get_die_id(int cpu) return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); } +int get_l3_id(int cpu) +{ + return parse_int_file("/sys/devices/system/cpu/cpu%d/cache/index3/id", cpu); +} + int get_core_id(int cpu) { return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); @@ -9203,6 +9223,11 @@ void topology_probe(bool startup) if (cpus[i].die_id > topo.max_die_id) topo.max_die_id = cpus[i].die_id; + /* get l3 information */ + cpus[i].l3_id = get_l3_id(i); + if (cpus[i].l3_id > topo.max_l3_id) + topo.max_l3_id = cpus[i].l3_id; + /* get numa node information */ cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); if (cpus[i].physical_node_id > topo.max_node_num) @@ -9235,6 +9260,9 @@ void topology_probe(bool startup) if (!summary_only && topo.num_die > 1) BIC_PRESENT(BIC_Die); + if (!summary_only && topo.max_l3_id > 0) + BIC_PRESENT(BIC_L3); + topo.num_packages = max_package_id + 1; if (debug > 1) fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); @@ -9258,8 +9286,8 @@ void topology_probe(bool startup) if (cpu_is_not_present(i)) continue; fprintf(outf, - "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", - i, cpus[i].physical_package_id, cpus[i].die_id, + "cpu %d pkg %d die %d l3 %d node %d lnode %d core %d thread %d\n", + i, cpus[i].physical_package_id, cpus[i].die_id, cpus[i].l3_id, cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); } From a5015d945de6003cf813af2bb11189982f5b3d54 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 22 Jul 2025 00:17:04 -0400 Subject: [PATCH 231/279] tools/power turbostat: delete GET_PKG() pkg_base[pkg_id] is a simple array of structure pointers, let the compiler treat it that way. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 54f270226746..dd1160144625 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2080,8 +2080,6 @@ struct pkg_data { ((node_no) * topo.cores_per_node) + \ (core_no)) -#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) - /* * The accumulated sum of MSR is defined as a monotonic * increasing MSR, it will be accumulated periodically, @@ -2345,16 +2343,15 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { struct thread_data *t; struct core_data *c; - struct pkg_data *p; + t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); if (cpu_is_not_allowed(t->cpu_id)) continue; c = GET_CORE(core_base, core_no, node_no, pkg_no); - p = GET_PKG(pkg_base, pkg_no); - retval |= func(t, c, p); + retval |= func(t, c, &pkg_base[pkg_no]); } } } @@ -6119,7 +6116,6 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { struct thread_data *t, *t2; struct core_data *c, *c2; - struct pkg_data *p, *p2; t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); @@ -6131,10 +6127,7 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, c = GET_CORE(core_base, core_no, node_no, pkg_no); c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); - p = GET_PKG(pkg_base, pkg_no); - p2 = GET_PKG(pkg_base2, pkg_no); - - retval |= func(t, c, p, t2, c2, p2); + retval |= func(t, c, &pkg_base[pkg_no], t2, c2, &pkg_base2[pkg_no]); } } } @@ -9342,7 +9335,6 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, int thread_id = cpus[cpu_id].thread_id; struct thread_data *t; struct core_data *c; - struct pkg_data *p; /* Workaround for systems where physical_node_id==-1 * and logical_node_id==(-1 - topo.num_cpus) @@ -9352,18 +9344,17 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); c = GET_CORE(core_base, core_id, node_id, pkg_id); - p = GET_PKG(pkg_base, pkg_id); t->cpu_id = cpu_id; if (!cpu_is_not_allowed(cpu_id)) { if (c->base_cpu < 0) c->base_cpu = t->cpu_id; - if (p->base_cpu < 0) - p->base_cpu = t->cpu_id; + if (pkg_base[pkg_id].base_cpu < 0) + pkg_base[pkg_id].base_cpu = t->cpu_id; } c->core_id = core_id; - p->package_id = pkg_id; + pkg_base[pkg_id].package_id = pkg_id; } int initialize_counters(int cpu_id) From dcd1c379b0f179763956e8596ad99912165a95ec Mon Sep 17 00:00:00 2001 From: Michael Hebenstreit Date: Fri, 8 Aug 2025 15:57:53 -0400 Subject: [PATCH 232/279] tools/power turbostat: add format "average" for external attributes External atributes with format "raw" are not printed in summary lines for nodes/packages (or with option -S). The new format "average" behaves like "raw" but also adds the summary data Signed-off-by: Michael Hebenstreit Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 3 ++- tools/power/x86/turbostat/turbostat.c | 30 ++++++++++++++++++--------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index db3888b8af12..3340def58d01 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -47,10 +47,11 @@ name as necessary to disambiguate it from others is necessary. Note that option MSRs are read as 64-bits, u32 truncates the displayed value to 32-bits. default: u64 - format: {\fBraw\fP | \fBdelta\fP | \fBpercent\fP} + format: {\fBraw\fP | \fBdelta\fP | \fBpercent\fP | \fBaverage\fP} 'raw' shows the MSR contents in hex. 'delta' shows the difference in values during the measurement interval. 'percent' shows the delta as a percentage of the cycles elapsed. + 'average' similar to raw, but also averaged for node/package summaries (or when using -S). default: delta name: "name_string" diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index dd1160144625..9ad3b1aa79ef 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2756,7 +2756,7 @@ void print_header(char *delim) for (mp = sys.tp; mp; mp = mp->next) { - if (mp->format == FORMAT_RAW) { + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { if (mp->width == 64) outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); else @@ -2831,7 +2831,7 @@ void print_header(char *delim) } for (mp = sys.cp; mp; mp = mp->next) { - if (mp->format == FORMAT_RAW) { + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { if (mp->width == 64) outp += sprintf(outp, "%s%18.18s", delim, mp->name); else @@ -2961,7 +2961,7 @@ void print_header(char *delim) outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); for (mp = sys.pp; mp; mp = mp->next) { - if (mp->format == FORMAT_RAW) { + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { if (mp->width == 64) outp += sprintf(outp, "%s%18.18s", delim, mp->name); else if (mp->width == 32) @@ -3282,7 +3282,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data /* Added counters */ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) { + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { if (mp->width == 32) outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); @@ -3379,7 +3379,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) { + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { if (mp->width == 32) outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); @@ -3578,7 +3578,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) { + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { if (mp->width == 32) outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); @@ -3755,7 +3755,7 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; else if (mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; @@ -3799,7 +3799,7 @@ void delta_core(struct core_data *new, struct core_data *old) DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; else old->counter[i] = new->counter[i] - old->counter[i]; @@ -3913,7 +3913,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d old->smi_count = new->smi_count - old->smi_count; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW) + if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; else old->counter[i] = new->counter[i] - old->counter[i]; @@ -10419,6 +10419,10 @@ void parse_add_command_msr(char *add_command) format = FORMAT_RAW; goto next; } + if (!strncmp(add_command, "average", strlen("average"))) { + format = FORMAT_AVERAGE; + goto next; + } if (!strncmp(add_command, "delta", strlen("delta"))) { format = FORMAT_DELTA; goto next; @@ -10691,13 +10695,19 @@ void parse_add_command_pmt(char *add_command) has_format = true; } + if (strcmp("average", format_name) == 0) { + format = FORMAT_AVERAGE; + has_format = true; + } + if (strcmp("delta", format_name) == 0) { format = FORMAT_DELTA; has_format = true; } if (!has_format) { - fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name); + fprintf(stderr, "%s: Invalid format %s. Expected raw, average or delta\n", + __func__, format_name); exit(1); } } From 3a088b07c4f10bf577f4a2392111704195a794ba Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 11 Jun 2025 14:50:26 +0800 Subject: [PATCH 233/279] tools/power turbostat: Fix DMR support Together with the RAPL MSRs, there are more MSRs gone on DMR, including PLR (Perf Limit Reasons), and IRTL (Package cstate Interrupt Response Time Limit) MSRs. The configurable TDP info should also be retrieved from TPMI based Intel Speed Select Technology feature. Remove the access of these MSRs for DMR. Improve the DMR platform feature table to make it more readable at the same time. Fixes: 83075bd59de2 ("tools/power turbostat: Add initial support for DMR") Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 29 ++++++++++++++------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9ad3b1aa79ef..e540bb0bb093 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -989,20 +989,21 @@ static const struct platform_features spr_features = { }; static const struct platform_features dmr_features = { - .has_msr_misc_feature_control = spr_features.has_msr_misc_feature_control, - .has_msr_misc_pwr_mgmt = spr_features.has_msr_misc_pwr_mgmt, - .has_nhm_msrs = spr_features.has_nhm_msrs, - .has_config_tdp = spr_features.has_config_tdp, - .bclk_freq = spr_features.bclk_freq, - .supported_cstates = spr_features.supported_cstates, - .cst_limit = spr_features.cst_limit, - .has_msr_core_c1_res = spr_features.has_msr_core_c1_res, - .has_msr_module_c6_res_ms = 1, /* DMR has Dual Core Module and MC6 MSR */ - .has_irtl_msrs = spr_features.has_irtl_msrs, - .has_cst_prewake_bit = spr_features.has_cst_prewake_bit, - .has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit, - .trl_msrs = spr_features.trl_msrs, - .rapl_msrs = 0, /* DMR does not have RAPL MSRs */ + .has_msr_misc_feature_control = spr_features.has_msr_misc_feature_control, + .has_msr_misc_pwr_mgmt = spr_features.has_msr_misc_pwr_mgmt, + .has_nhm_msrs = spr_features.has_nhm_msrs, + .bclk_freq = spr_features.bclk_freq, + .supported_cstates = spr_features.supported_cstates, + .cst_limit = spr_features.cst_limit, + .has_msr_core_c1_res = spr_features.has_msr_core_c1_res, + .has_cst_prewake_bit = spr_features.has_cst_prewake_bit, + .has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit, + .trl_msrs = spr_features.trl_msrs, + .has_msr_module_c6_res_ms = 1, /* DMR has Dual-Core-Module and MC6 MSR */ + .rapl_msrs = 0, /* DMR does not have RAPL MSRs */ + .plr_msrs = 0, /* DMR does not have PLR MSRs */ + .has_irtl_msrs = 0, /* DMR does not have IRTL MSRs */ + .has_config_tdp = 0, /* DMR does not have CTDP MSRs */ }; static const struct platform_features srf_features = { From 378e901160256d2ab66e45ffb97afaca51e65706 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 8 Aug 2025 19:30:07 -0400 Subject: [PATCH 234/279] tools/power turbostat: standardize PER_THREAD_PARAMS use a macro for PER_THREAD_PARAMS to make adding one later more clear. no functional change Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 42 ++++++++++++++------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e540bb0bb093..d65a504a6c5f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2331,6 +2331,8 @@ int cpu_is_not_allowed(int cpu) * skip non-present cpus */ +#define PER_THREAD_PARAMS struct thread_data *t, struct core_data *c, struct pkg_data *p + int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) { @@ -2360,21 +2362,21 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk return retval; } -int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int is_cpu_first_thread_in_core(PER_THREAD_PARAMS) { UNUSED(p); return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); } -int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int is_cpu_first_core_in_package(PER_THREAD_PARAMS) { UNUSED(c); return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); } -int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int is_cpu_first_thread_in_package(PER_THREAD_PARAMS) { return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); } @@ -3020,7 +3022,7 @@ void print_header(char *delim) outp += sprintf(outp, "\n"); } -int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int dump_counters(PER_THREAD_PARAMS) { int i; struct msr_counter *mp; @@ -3135,7 +3137,7 @@ double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desir /* * column formatting convention & formats */ -int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int format_counters(PER_THREAD_PARAMS) { static int count; @@ -3677,7 +3679,7 @@ void flush_output_stderr(void) outp = output_buffer; } -void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +void format_all_counters(PER_THREAD_PARAMS) { static int count; @@ -3968,7 +3970,7 @@ void rapl_counter_clear(struct rapl_counter *c) c->unit = RAPL_UNIT_INVALID; } -void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +void clear_counters(PER_THREAD_PARAMS) { int i; struct msr_counter *mp; @@ -4065,7 +4067,7 @@ void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter dst->raw_value += src->raw_value; } -int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int sum_counters(PER_THREAD_PARAMS) { int i; struct msr_counter *mp; @@ -4213,7 +4215,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) * sum the counters for all cpus in the system * compute the weighted average */ -void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) +void compute_average(PER_THREAD_PARAMS) { int i; struct msr_counter *mp; @@ -4796,7 +4798,7 @@ char *find_sysfs_path_by_id(struct sysfs_path *sp, int id) return NULL; } -int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_data *c, struct pkg_data *p) +int get_cstate_counters(unsigned int cpu, PER_THREAD_PARAMS) { /* * Overcommit memory a little bit here, @@ -5096,7 +5098,7 @@ static inline int get_rapl_domain_id(int cpu) * migrate to cpu * acquire and record local counters for that cpu */ -int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int get_counters(PER_THREAD_PARAMS) { int cpu = t->cpu_id; unsigned long long msr; @@ -6586,7 +6588,7 @@ int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) timer_t timerid; /* Timer callback, update the sum of MSRs periodically. */ -static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) +static int update_msr_sum(PER_THREAD_PARAMS) { int i, ret; int cpu = t->cpu_id; @@ -7332,7 +7334,7 @@ static void dump_sysfs_pstate_config(void) * print_epb() * Decode the ENERGY_PERF_BIAS MSR */ -int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_epb(PER_THREAD_PARAMS) { char *epb_string; int cpu, epb; @@ -7381,7 +7383,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) * print_hwp() * Decode the MSR_HWP_CAPABILITIES */ -int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_hwp(PER_THREAD_PARAMS) { unsigned long long msr; int cpu; @@ -7470,7 +7472,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) /* * print_perf_limit() */ -int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_perf_limit(PER_THREAD_PARAMS) { unsigned long long msr; int cpu; @@ -7845,7 +7847,7 @@ static int print_rapl_sysfs(void) return 0; } -int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_rapl(PER_THREAD_PARAMS) { unsigned long long msr; const char *msr_name; @@ -7999,7 +8001,7 @@ void probe_rapl(void) * below this value, including the Digital Thermal Sensor (DTS), * Package Thermal Management Sensor (PTM), and thermal event thresholds. */ -int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int set_temperature_target(PER_THREAD_PARAMS) { unsigned long long msr; unsigned int tcc_default, tcc_offset; @@ -8067,7 +8069,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk return 0; } -int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int print_thermal(PER_THREAD_PARAMS) { unsigned long long msr; unsigned int dts, dts2; @@ -8147,7 +8149,7 @@ void probe_thermal(void) for_all_cpus(print_thermal, ODD_COUNTERS); } -int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int get_cpu_type(PER_THREAD_PARAMS) { unsigned int eax, ebx, ecx, edx; @@ -9395,7 +9397,7 @@ void allocate_irq_buffers(void) err(-1, "calloc %d NMI", topo.max_cpu_num + 1); } -int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int update_topo(PER_THREAD_PARAMS) { topo.allowed_cpus++; if ((int)t->cpu_id == c->base_cpu) From e60a13bcef206795d3ddf82f130fe8f570176d06 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 9 Aug 2025 16:31:31 -0400 Subject: [PATCH 235/279] tools/power turbostat: Handle non-root legacy-uncore sysfs permissions /sys/devices/system/cpu/intel_uncore_frequency/package_X_die_Y/ may be readable by all, but /sys/devices/system/cpu/intel_uncore_frequency/package_X_die_Y/current_freq_khz may be readable only by root. Non-root turbostat users see complaints in this scenario. Fail probe of the interface if we can't read current_freq_khz. Reported-by: Artem Bityutskiy Original-patch-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d65a504a6c5f..76f4093959d5 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -7002,7 +7002,8 @@ static void probe_intel_uncore_frequency_legacy(void) sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, j); - if (access(path_base, R_OK)) + sprintf(path, "%s/current_freq_khz", path_base); + if (access(path, R_OK)) continue; BIC_PRESENT(BIC_UNCORE_MHZ); From 5e98a5e73edcc4114c5ad10596db87e24f50ee4d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 9 Aug 2025 21:08:26 -0400 Subject: [PATCH 236/279] tools/power turbostat: version 2025.09.09 Probe and display L3 Cache topology Add ability to average an added counter (useful for pre-integrated "counters", such as Watts) Break the limit of 64 built-in counters. Assorted bug fixes and minor feature tweaks Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 76f4093959d5..72a280e7a9d5 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -10126,7 +10126,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.06.08 - Len Brown \n"); + fprintf(outf, "turbostat version 2025.09.09 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 From 8f5ae30d69d7543eee0d70083daf4de8fe15d585 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Aug 2025 19:41:16 +0300 Subject: [PATCH 237/279] Linux 6.17-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 37e37565515e..6bfe776bf3c5 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 16 +PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Baby Opossum Posse # *DOCUMENTATION* From c6993c4cb91803fceb82d6b5e0ec5e0aec2d0ad6 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Mon, 4 Aug 2025 16:20:31 +0800 Subject: [PATCH 238/279] erofs: Fallback to normal access if DAX is not supported on extra device If using multiple devices, we should check if the extra device support DAX instead of checking the primary device when deciding if to use DAX to access a file. If an extra device does not support DAX we should fallback to normal access otherwise the data on that device will be inaccessible. Signed-off-by: Yuezhang Mo Reviewed-by: Friendy Su Reviewed-by: Jacky Cao Reviewed-by: Daniel Palmer Reviewed-by: Gao Xiang Reviewed-by: Hongbo Li Link: https://lore.kernel.org/r/20250804082030.3667257-2-Yuezhang.Mo@sony.com Signed-off-by: Gao Xiang --- fs/erofs/super.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index e1020aa60771..8c7a5985b4ee 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -174,6 +174,11 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, if (!erofs_is_fileio_mode(sbi)) { dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file), &dif->dax_part_off, NULL, NULL); + if (!dif->dax_dev && test_opt(&sbi->opt, DAX_ALWAYS)) { + erofs_info(sb, "DAX unsupported by %s. Turning off DAX.", + dif->path); + clear_opt(&sbi->opt, DAX_ALWAYS); + } } else if (!S_ISREG(file_inode(file)->i_mode)) { fput(file); return -EINVAL; @@ -210,8 +215,13 @@ static int erofs_scan_devices(struct super_block *sb, ondisk_extradevs, sbi->devs->extra_devices); return -EINVAL; } - if (!ondisk_extradevs) + if (!ondisk_extradevs) { + if (test_opt(&sbi->opt, DAX_ALWAYS) && !sbi->dif0.dax_dev) { + erofs_info(sb, "DAX unsupported by block device. Turning off DAX."); + clear_opt(&sbi->opt, DAX_ALWAYS); + } return 0; + } if (!sbi->devs->extra_devices && !erofs_is_fscache_mode(sb)) sbi->devs->flatdev = true; @@ -338,7 +348,6 @@ static int erofs_read_superblock(struct super_block *sb) if (ret < 0) goto out; - /* handle multiple devices */ ret = erofs_scan_devices(sb, dsb); if (erofs_sb_has_48bit(sbi)) @@ -671,14 +680,9 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) return invalfc(fc, "cannot use fsoffset in fscache mode"); } - if (test_opt(&sbi->opt, DAX_ALWAYS)) { - if (!sbi->dif0.dax_dev) { - errorfc(fc, "DAX unsupported by block device. Turning off DAX."); - clear_opt(&sbi->opt, DAX_ALWAYS); - } else if (sbi->blkszbits != PAGE_SHIFT) { - errorfc(fc, "unsupported blocksize for DAX"); - clear_opt(&sbi->opt, DAX_ALWAYS); - } + if (test_opt(&sbi->opt, DAX_ALWAYS) && sbi->blkszbits != PAGE_SHIFT) { + erofs_info(sb, "unsupported blocksize for DAX"); + clear_opt(&sbi->opt, DAX_ALWAYS); } sb->s_time_gran = 1; From 74da24f0ac9b8aabfb8d7feeba6c32ddff3065e0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 Jul 2025 14:44:49 +0200 Subject: [PATCH 239/279] erofs: Do not select tristate symbols from bool symbols The EROFS filesystem has many configurable options, controlled through boolean Kconfig symbols. When enabled, these options may need to enable additional library functionality elsewhere. Currently this is done by selecting the symbol for the additional functionality. However, if EROFS_FS itself is modular, and the target symbol is a tristate symbol, the additional functionality is always forced built-in. Selecting tristate symbols from a tristate symbol does keep modular transitivity. Hence fix this by moving selects of tristate symbols to the main EROFS_FS symbol. Signed-off-by: Geert Uytterhoeven Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/da1b899e511145dd43fd2d398f64b2e03c6a39e7.1753879351.git.geert+renesas@glider.be Signed-off-by: Gao Xiang --- fs/erofs/Kconfig | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 7b26efc271ee..d81f3318417d 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -3,8 +3,18 @@ config EROFS_FS tristate "EROFS filesystem support" depends on BLOCK + select CACHEFILES if EROFS_FS_ONDEMAND select CRC32 + select CRYPTO if EROFS_FS_ZIP_ACCEL + select CRYPTO_DEFLATE if EROFS_FS_ZIP_ACCEL select FS_IOMAP + select LZ4_DECOMPRESS if EROFS_FS_ZIP + select NETFS_SUPPORT if EROFS_FS_ONDEMAND + select XXHASH if EROFS_FS_XATTR + select XZ_DEC if EROFS_FS_ZIP_LZMA + select XZ_DEC_MICROLZMA if EROFS_FS_ZIP_LZMA + select ZLIB_INFLATE if EROFS_FS_ZIP_DEFLATE + select ZSTD_DECOMPRESS if EROFS_FS_ZIP_ZSTD help EROFS (Enhanced Read-Only File System) is a lightweight read-only file system with modern designs (e.g. no buffer heads, inline @@ -38,7 +48,6 @@ config EROFS_FS_DEBUG config EROFS_FS_XATTR bool "EROFS extended attributes" depends on EROFS_FS - select XXHASH default y help Extended attributes are name:value pairs associated with inodes by @@ -94,7 +103,6 @@ config EROFS_FS_BACKED_BY_FILE config EROFS_FS_ZIP bool "EROFS Data Compression Support" depends on EROFS_FS - select LZ4_DECOMPRESS default y help Enable transparent compression support for EROFS file systems. @@ -104,8 +112,6 @@ config EROFS_FS_ZIP config EROFS_FS_ZIP_LZMA bool "EROFS LZMA compressed data support" depends on EROFS_FS_ZIP - select XZ_DEC - select XZ_DEC_MICROLZMA help Saying Y here includes support for reading EROFS file systems containing LZMA compressed data, specifically called microLZMA. It @@ -117,7 +123,6 @@ config EROFS_FS_ZIP_LZMA config EROFS_FS_ZIP_DEFLATE bool "EROFS DEFLATE compressed data support" depends on EROFS_FS_ZIP - select ZLIB_INFLATE help Saying Y here includes support for reading EROFS file systems containing DEFLATE compressed data. It gives better compression @@ -132,7 +137,6 @@ config EROFS_FS_ZIP_DEFLATE config EROFS_FS_ZIP_ZSTD bool "EROFS Zstandard compressed data support" depends on EROFS_FS_ZIP - select ZSTD_DECOMPRESS help Saying Y here includes support for reading EROFS file systems containing Zstandard compressed data. It gives better compression @@ -147,8 +151,6 @@ config EROFS_FS_ZIP_ZSTD config EROFS_FS_ZIP_ACCEL bool "EROFS hardware decompression support" depends on EROFS_FS_ZIP - select CRYPTO - select CRYPTO_DEFLATE help Saying Y here includes hardware accelerator support for reading EROFS file systems containing compressed data. It gives better @@ -163,9 +165,7 @@ config EROFS_FS_ZIP_ACCEL config EROFS_FS_ONDEMAND bool "EROFS fscache-based on-demand read support (deprecated)" depends on EROFS_FS - select NETFS_SUPPORT select FSCACHE - select CACHEFILES select CACHEFILES_ONDEMAND help This permits EROFS to use fscache-backed data blobs with on-demand From c99fab6e80b76422741d34aafc2f930a482afbdd Mon Sep 17 00:00:00 2001 From: Junli Liu Date: Tue, 5 Aug 2025 09:19:58 +0800 Subject: [PATCH 240/279] erofs: fix atomic context detection when !CONFIG_DEBUG_LOCK_ALLOC Since EROFS handles decompression in non-atomic contexts due to uncontrollable decompression latencies and vmap() usage, it tries to detect atomic contexts and only kicks off a kworker on demand in order to reduce unnecessary scheduling overhead. However, the current approach is insufficient and can lead to sleeping function calls in invalid contexts, causing kernel warnings and potential system instability. See the stacktrace [1] and previous discussion [2]. The current implementation only checks rcu_read_lock_any_held(), which behaves inconsistently across different kernel configurations: - When CONFIG_DEBUG_LOCK_ALLOC is enabled: correctly detects RCU critical sections by checking rcu_lock_map - When CONFIG_DEBUG_LOCK_ALLOC is disabled: compiles to "!preemptible()", which only checks preempt_count and misses RCU critical sections This patch introduces z_erofs_in_atomic() to provide comprehensive atomic context detection: 1. Check RCU preemption depth when CONFIG_PREEMPTION is enabled, as RCU critical sections may not affect preempt_count but still require atomic handling 2. Always use async processing when CONFIG_PREEMPT_COUNT is disabled, as preemption state cannot be reliably determined 3. Fall back to standard preemptible() check for remaining cases The function replaces the previous complex condition check and ensures that z_erofs always uses (kthread_)work in atomic contexts to minimize scheduling overhead and prevent sleeping in invalid contexts. [1] Problem stacktrace [ 61.266692] BUG: sleeping function called from invalid context at kernel/locking/rtmutex_api.c:510 [ 61.266702] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 107, name: irq/54-ufshcd [ 61.266704] preempt_count: 0, expected: 0 [ 61.266705] RCU nest depth: 2, expected: 0 [ 61.266710] CPU: 0 UID: 0 PID: 107 Comm: irq/54-ufshcd Tainted: G W O 6.12.17 #1 [ 61.266714] Tainted: [W]=WARN, [O]=OOT_MODULE [ 61.266715] Hardware name: schumacher (DT) [ 61.266717] Call trace: [ 61.266718] dump_backtrace+0x9c/0x100 [ 61.266727] show_stack+0x20/0x38 [ 61.266728] dump_stack_lvl+0x78/0x90 [ 61.266734] dump_stack+0x18/0x28 [ 61.266736] __might_resched+0x11c/0x180 [ 61.266743] __might_sleep+0x64/0xc8 [ 61.266745] mutex_lock+0x2c/0xc0 [ 61.266748] z_erofs_decompress_queue+0xe8/0x978 [ 61.266753] z_erofs_decompress_kickoff+0xa8/0x190 [ 61.266756] z_erofs_endio+0x168/0x288 [ 61.266758] bio_endio+0x160/0x218 [ 61.266762] blk_update_request+0x244/0x458 [ 61.266766] scsi_end_request+0x38/0x278 [ 61.266770] scsi_io_completion+0x4c/0x600 [ 61.266772] scsi_finish_command+0xc8/0xe8 [ 61.266775] scsi_complete+0x88/0x148 [ 61.266777] blk_mq_complete_request+0x3c/0x58 [ 61.266780] scsi_done_internal+0xcc/0x158 [ 61.266782] scsi_done+0x1c/0x30 [ 61.266783] ufshcd_compl_one_cqe+0x12c/0x438 [ 61.266786] __ufshcd_transfer_req_compl+0x2c/0x78 [ 61.266788] ufshcd_poll+0xf4/0x210 [ 61.266789] ufshcd_transfer_req_compl+0x50/0x88 [ 61.266791] ufshcd_intr+0x21c/0x7c8 [ 61.266792] irq_forced_thread_fn+0x44/0xd8 [ 61.266796] irq_thread+0x1a4/0x358 [ 61.266799] kthread+0x12c/0x138 [ 61.266802] ret_from_fork+0x10/0x20 [2] https://lore.kernel.org/r/58b661d0-0ebb-4b45-a10d-c5927fb791cd@paulmck-laptop Signed-off-by: Junli Liu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20250805011957.911186-1-liujunli@lixiang.com [ Gao Xiang: Use the original trace in v1. ] Signed-off-by: Gao Xiang --- fs/erofs/zdata.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 792f20888a8f..2d73297003d2 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1432,6 +1432,16 @@ static void z_erofs_decompressqueue_kthread_work(struct kthread_work *work) } #endif +/* Use (kthread_)work in atomic contexts to minimize scheduling overhead */ +static inline bool z_erofs_in_atomic(void) +{ + if (IS_ENABLED(CONFIG_PREEMPTION) && rcu_preempt_depth()) + return true; + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) + return true; + return !preemptible(); +} + static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, int bios) { @@ -1446,8 +1456,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, if (atomic_add_return(bios, &io->pending_bios)) return; - /* Use (kthread_)work and sync decompression for atomic contexts only */ - if (!in_task() || irqs_disabled() || rcu_read_lock_any_held()) { + if (z_erofs_in_atomic()) { #ifdef CONFIG_EROFS_FS_PCPU_KTHREAD struct kthread_worker *worker; From 0b96d9bed324a1c1b7d02bfb9596351ef178428d Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 7 Aug 2025 16:20:19 +0800 Subject: [PATCH 241/279] erofs: fix block count report when 48-bit layout is on Fix incorrect shift order when combining the 48-bit block count. Fixes: 2e1473d5195f ("erofs: implement 48-bit block addressing for unencoded inodes") Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250807082019.3093539-1-hsiangkao@linux.alibaba.com --- fs/erofs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 8c7a5985b4ee..1b529ace4db0 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -323,8 +323,8 @@ static int erofs_read_superblock(struct super_block *sb) sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); if (erofs_sb_has_48bit(sbi) && dsb->rootnid_8b) { sbi->root_nid = le64_to_cpu(dsb->rootnid_8b); - sbi->dif0.blocks = (sbi->dif0.blocks << 32) | - le16_to_cpu(dsb->rb.blocks_hi); + sbi->dif0.blocks = sbi->dif0.blocks | + ((u64)le16_to_cpu(dsb->rb.blocks_hi) << 32); } else { sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b); } From 61399e0c5410567ef60cb1cda34cca42903842e3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Aug 2025 19:03:22 +0200 Subject: [PATCH 242/279] rcu: Fix racy re-initialization of irq_work causing hangs RCU re-initializes the deferred QS irq work everytime before attempting to queue it. However there are situations where the irq work is attempted to be queued even though it is already queued. In that case re-initializing messes-up with the irq work queue that is about to be handled. The chances for that to happen are higher when the architecture doesn't support self-IPIs and irq work are then all lazy, such as with the following sequence: 1) rcu_read_unlock() is called when IRQs are disabled and there is a grace period involving blocked tasks on the node. The irq work is then initialized and queued. 2) The related tasks are unblocked and the CPU quiescent state is reported. rdp->defer_qs_iw_pending is reset to DEFER_QS_IDLE, allowing the irq work to be requeued in the future (note the previous one hasn't fired yet). 3) A new grace period starts and the node has blocked tasks. 4) rcu_read_unlock() is called when IRQs are disabled again. The irq work is re-initialized (but it's queued! and its node is cleared) and requeued. Which means it's requeued to itself. 5) The irq work finally fires with the tick. But since it was requeued to itself, it loops and hangs. Fix this with initializing the irq work only once before the CPU boots. Fixes: b41642c87716 ("rcu: Fix rcu_read_unlock() deadloop due to IRQ work") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202508071303.c1134cce-lkp@intel.com Signed-off-by: Frederic Weisbecker Reviewed-by: Joel Fernandes Signed-off-by: Neeraj Upadhyay (AMD) --- kernel/rcu/tree.c | 2 ++ kernel/rcu/tree.h | 1 + kernel/rcu/tree_plugin.h | 8 ++++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 174ee243b349..8eff357b0436 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4262,6 +4262,8 @@ int rcutree_prepare_cpu(unsigned int cpu) rdp->rcu_iw_gp_seq = rdp->gp_seq - 1; trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl")); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + + rcu_preempt_deferred_qs_init(rdp); rcu_spawn_rnp_kthreads(rnp); rcu_spawn_cpu_nocb_kthread(cpu); ASSERT_EXCLUSIVE_WRITER(rcu_state.n_online_cpus); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index de6ca13a7b5f..b8bbe7960cda 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -488,6 +488,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); static void rcu_flavor_sched_clock_irq(int user); static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck); +static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); static bool rcu_is_callbacks_kthread(struct rcu_data *rdp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fc14adf15cbb..4cd170b2d655 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -763,8 +763,6 @@ static void rcu_read_unlock_special(struct task_struct *t) cpu_online(rdp->cpu)) { // Get scheduler to re-evaluate and call hooks. // If !IRQ_WORK, FQS scan will eventually IPI. - rdp->defer_qs_iw = - IRQ_WORK_INIT_HARD(rcu_preempt_deferred_qs_handler); rdp->defer_qs_iw_pending = DEFER_QS_PENDING; irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu); } @@ -904,6 +902,10 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) } } +static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp) +{ + rdp->defer_qs_iw = IRQ_WORK_INIT_HARD(rcu_preempt_deferred_qs_handler); +} #else /* #ifdef CONFIG_PREEMPT_RCU */ /* @@ -1103,6 +1105,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks)); } +static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp) { } + #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* From 7573980c7049450a0af22acd4f8e96f37ea30c48 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 8 Aug 2025 10:45:05 -0700 Subject: [PATCH 243/279] MAINTAINERS: Mark Intel WWAN IOSM driver as orphaned This maintainer's email no longer works. Remove it from MAINTAINERS. I've been unable to locate a new maintainer for this at Intel. Mark the driver as Orphaned. Signed-off-by: Dave Hansen Cc: Loic Poulain Cc: Johannes Berg Cc: Andrew Lunn Acked-by: Sergey Ryazanov Link: https://patch.msgid.link/20250808174505.C9FF434F@davehans-spike.ostc.intel.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index eed16e53b9b2..ea82fc952c17 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12724,9 +12724,8 @@ S: Maintained F: drivers/platform/x86/intel/wmi/thunderbolt.c INTEL WWAN IOSM DRIVER -M: M Chetan Kumar L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: drivers/net/wwan/iosm/ INTEL(R) FLEXIBLE RETURN AND EVENT DELIVERY From b56e9fb1c9669ce460dc899cecb09a54a6d71cf4 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 8 Aug 2025 10:53:24 -0700 Subject: [PATCH 244/279] MAINTAINERS: Mark Intel PTP DFL ToD as orphaned This maintainer's email no longer works. Remove it from MAINTAINERS. Also mark the code as an Orphan. Signed-off-by: Dave Hansen Cc: Richard Cochran Cc: Tianfei Zhang Cc: Andrew Lunn Link: https://patch.msgid.link/20250808175324.8C4B7354@davehans-spike.ostc.intel.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ea82fc952c17..3a46ef3f8a77 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12583,10 +12583,9 @@ S: Supported F: drivers/cpufreq/intel_pstate.c INTEL PTP DFL ToD DRIVER -M: Tianfei Zhang L: linux-fpga@vger.kernel.org L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: drivers/ptp/ptp_dfl_tod.c INTEL QUADRATURE ENCODER PERIPHERAL DRIVER From b132a3b0c228125dfda03ba7c7903ef133e1ee21 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 8 Aug 2025 10:39:25 -0700 Subject: [PATCH 245/279] MAINTAINERS: Remove bouncing T7XX reviewer This reviewer's email no longer works. Remove it from MAINTAINERS. Signed-off-by: Dave Hansen Cc: Chandrashekar Devegowda Cc: Liu Haijun Cc: Ricardo Martinez Link: https://patch.msgid.link/20250808173925.FECE3782@davehans-spike.ostc.intel.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3a46ef3f8a77..6c7bc7b27eca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15672,7 +15672,6 @@ MEDIATEK T7XX 5G WWAN MODEM DRIVER M: Chandrashekar Devegowda R: Chiranjeevi Rapolu R: Liu Haijun -R: M Chetan Kumar R: Ricardo Martinez L: netdev@vger.kernel.org S: Supported From 61aaca8b89fb98be58b8df19f01181bb983cccff Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Fri, 8 Aug 2025 15:31:08 +0200 Subject: [PATCH 246/279] net: usb: qmi_wwan: add Telit Cinterion FN990A w/audio composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following Telit Cinterion FN990A w/audio composition: 0x1077: tty (diag) + adb + rmnet + audio + tty (AT/NMEA) + tty (AT) + tty (AT) + tty (AT) T: Bus=01 Lev=01 Prnt=01 Port=09 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1077 Rev=05.04 S: Manufacturer=Telit Wireless Solutions S: Product=FN990 S: SerialNumber=67e04c35 C: #Ifs=10 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 0 Cls=01(audio) Sub=01 Prot=20 Driver=snd-usb-audio I: If#= 4 Alt= 1 #EPs= 1 Cls=01(audio) Sub=02 Prot=20 Driver=snd-usb-audio E: Ad=03(O) Atr=0d(Isoc) MxPS= 68 Ivl=1ms I: If#= 5 Alt= 1 #EPs= 1 Cls=01(audio) Sub=02 Prot=20 Driver=snd-usb-audio E: Ad=84(I) Atr=0d(Isoc) MxPS= 68 Ivl=1ms I: If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 7 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 9 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8c(I) Atr=03(Int.) MxPS= 10 Ivl=32ms Cc: stable@vger.kernel.org Signed-off-by: Fabio Porcedda Acked-by: Bjørn Mork Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index f5647ee0adde..e56901bb6ebc 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1361,6 +1361,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990A */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1077, 2)}, /* Telit FN990A w/audio */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */ From 52966bf71de98fef4ca7b3be1349adc7459d6d53 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 8 Aug 2025 07:45:23 -0400 Subject: [PATCH 247/279] ref_tracker: use %p instead of %px in debugfs dentry name As Kees points out, this is a kernel address leak, and debugging is not a sufficiently good reason to expose the real kernel address. Fixes: 65b584f53611 ("ref_tracker: automatically register a file in debugfs for a ref_tracker_dir") Reported-by: Kees Cook Closes: https://lore.kernel.org/netdev/202507301603.62E553F93@keescook/ Signed-off-by: Jeff Layton Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- lib/ref_tracker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c index a9e6ffcff04b..cce12287708e 100644 --- a/lib/ref_tracker.c +++ b/lib/ref_tracker.c @@ -434,7 +434,7 @@ void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir) if (dentry && !xa_is_err(dentry)) return; - ret = snprintf(name, sizeof(name), "%s@%px", dir->class, dir); + ret = snprintf(name, sizeof(name), "%s@%p", dir->class, dir); name[sizeof(name) - 1] = '\0'; if (ret < sizeof(name)) { From de1e963ad064caf73ee2c7485b925f381a3aefbf Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 8 Aug 2025 13:16:34 +0100 Subject: [PATCH 248/279] net: stmmac: rk: put the PHY clock on remove The PHY clock (bsp_priv->clk_phy) is obtained using of_clk_get(), which doesn't take part in the devm release. Therefore, when a device is unbound, this clock needs to be explicitly put. Fix this. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Fixes: fecd4d7eef8b ("net: stmmac: dwmac-rk: Add integrated PHY support") Link: https://patch.msgid.link/E1ukM1S-0086qo-PC@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 79b92130a03f..f6687c2f30f6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1765,11 +1765,15 @@ static int rk_gmac_probe(struct platform_device *pdev) static void rk_gmac_remove(struct platform_device *pdev) { - struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(&pdev->dev); + struct stmmac_priv *priv = netdev_priv(platform_get_drvdata(pdev)); + struct rk_priv_data *bsp_priv = priv->plat->bsp_priv; stmmac_dvr_remove(&pdev->dev); rk_gmac_powerdown(bsp_priv); + + if (priv->plat->phy_node && bsp_priv->integrated_phy) + clk_put(bsp_priv->clk_phy); } #ifdef CONFIG_PM_SLEEP From 89886abd073489e26614e4d80fb8eb70d3938a0b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 8 Aug 2025 13:16:39 +0100 Subject: [PATCH 249/279] net: stmmac: dwc-qos: fix clk prepare/enable leak on probe failure dwc_eth_dwmac_probe() gets bulk clocks, and then prepares and enables them. Unfortunately, if dwc_eth_dwmac_config_dt() or stmmac_dvr_probe() fail, we leave the clocks prepared and enabled. Fix this by using devm_clk_bulk_get_all_enabled() to combine the steps and provide devm based release of the prepare and enable state. This also fixes a similar leakin dwc_eth_dwmac_remove() which wasn't correctly retrieving the struct plat_stmmacenet_data. This becomes unnecessary. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Fixes: a045e40645df ("net: stmmac: refactor clock management in EQoS driver") Link: https://patch.msgid.link/E1ukM1X-0086qu-Td@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 09ae16e026eb..6c363f9b0ce2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -330,15 +330,11 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); - ret = devm_clk_bulk_get_all(&pdev->dev, &plat_dat->clks); + ret = devm_clk_bulk_get_all_enabled(&pdev->dev, &plat_dat->clks); if (ret < 0) - return dev_err_probe(&pdev->dev, ret, "Failed to retrieve all required clocks\n"); + return dev_err_probe(&pdev->dev, ret, "Failed to retrieve and enable all required clocks\n"); plat_dat->num_clks = ret; - ret = clk_bulk_prepare_enable(plat_dat->num_clks, plat_dat->clks); - if (ret) - return dev_err_probe(&pdev->dev, ret, "Failed to enable clocks\n"); - plat_dat->stmmac_clk = stmmac_pltfr_find_clk(plat_dat, data->stmmac_clk_name); @@ -346,7 +342,6 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) ret = data->probe(pdev, plat_dat, &stmmac_res); if (ret < 0) { dev_err_probe(&pdev->dev, ret, "failed to probe subdriver\n"); - clk_bulk_disable_unprepare(plat_dat->num_clks, plat_dat->clks); return ret; } @@ -370,15 +365,11 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) static void dwc_eth_dwmac_remove(struct platform_device *pdev) { const struct dwc_eth_dwmac_data *data = device_get_match_data(&pdev->dev); - struct plat_stmmacenet_data *plat_dat = dev_get_platdata(&pdev->dev); stmmac_dvr_remove(&pdev->dev); if (data->remove) data->remove(pdev); - - if (plat_dat) - clk_bulk_disable_unprepare(plat_dat->num_clks, plat_dat->clks); } static const struct of_device_id dwc_eth_dwmac_match[] = { From ab5ac789efa985e42bdb5b5e8a7a4ad84935d44e Mon Sep 17 00:00:00 2001 From: Sukrut Heroorkar Date: Tue, 5 Aug 2025 00:56:14 +0200 Subject: [PATCH 250/279] selftests/proc: fix string literal warning in proc-maps-race.c This change resolves non literal string format warning invoked for proc-maps-race.c while compiling. proc-maps-race.c:205:17: warning: format not a string literal and no format arguments [-Wformat-security] 205 | printf(text); | ^~~~~~ proc-maps-race.c:209:17: warning: format not a string literal and no format arguments [-Wformat-security] 209 | printf(text); | ^~~~~~ proc-maps-race.c: In function `print_last_lines': proc-maps-race.c:224:9: warning: format not a string literal and no format arguments [-Wformat-security] 224 | printf(start); | ^~~~~~ Add string format specifier %s for the printf calls in both print_first_lines() and print_last_lines() thus resolving the warnings. The test executes fine after this change thus causing no effect to the functional behavior of the test. Link: https://lkml.kernel.org/r/20250804225633.841777-1-hsukrut3@gmail.com Fixes: aadc099c480f ("selftests/proc: add verbose mode for /proc/pid/maps tearing tests") Signed-off-by: Sukrut Heroorkar Acked-by: Suren Baghdasaryan Cc: David Hunter Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/proc-maps-race.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c index 66773685a047..94bba4553130 100644 --- a/tools/testing/selftests/proc/proc-maps-race.c +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -202,11 +202,11 @@ static void print_first_lines(char *text, int nr) int offs = end - text; text[offs] = '\0'; - printf(text); + printf("%s", text); text[offs] = '\n'; printf("\n"); } else { - printf(text); + printf("%s", text); } } @@ -221,7 +221,7 @@ static void print_last_lines(char *text, int nr) nr--; start--; } - printf(start); + printf("%s", start); } static void print_boundaries(const char *title, FIXTURE_DATA(proc_maps_race) *self) From cf1b80dc31a1137b8b4568c138b453bf7453204a Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Wed, 6 Aug 2025 20:26:11 +0530 Subject: [PATCH 251/279] mm: pass page directly instead of using folio_page In commit_anon_folio_batch(), we iterate over all pages pointed to by the PTE batch. Therefore we need to know the first page of the batch; currently we derive that via folio_page(folio, 0), but, that takes us to the first (head) page of the folio instead - our PTE batch may lie in the middle of the folio, leading to incorrectness. Bite the bullet and throw away the micro-optimization of reusing the folio in favour of code simplicity. Derive the page and the folio in change_pte_range, and pass the page too to commit_anon_folio_batch to fix the aforementioned issue. Link: https://lkml.kernel.org/r/20250806145611.3962-1-dev.jain@arm.com Fixes: cac1db8c3aad ("mm: optimize mprotect() by PTE batching") Reported-by: syzbot+57bcc752f0df8bb1365c@syzkaller.appspotmail.com Signed-off-by: Dev Jain Reviewed-by: Lorenzo Stoakes Debugged-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: Christophe Leroy Cc: Hugh Dickins Cc: Jann Horn Cc: Joey Gouly Cc: Kevin Brodsky Cc: Lance Yang Cc: Liam Howlett Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Ryan Roberts Cc: Vlastimil Babka Cc: Will Deacon Cc: Yang Shi Cc: Yicong Yang Cc: Zhenhua Huang Cc: Zi Yan Signed-off-by: Andrew Morton --- mm/mprotect.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 78bded7acf79..113b48985834 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -120,9 +120,8 @@ static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep, static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, pte_t oldpte, pte_t *pte, int target_node, - struct folio **foliop) + struct folio *folio) { - struct folio *folio = NULL; bool ret = true; bool toptier; int nid; @@ -131,7 +130,6 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, if (pte_protnone(oldpte)) goto skip; - folio = vm_normal_folio(vma, addr, oldpte); if (!folio) goto skip; @@ -173,7 +171,6 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); skip: - *foliop = folio; return ret; } @@ -231,10 +228,9 @@ static int page_anon_exclusive_sub_batch(int start_idx, int max_len, * retrieve sub-batches. */ static void commit_anon_folio_batch(struct vm_area_struct *vma, - struct folio *folio, unsigned long addr, pte_t *ptep, + struct folio *folio, struct page *first_page, unsigned long addr, pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb) { - struct page *first_page = folio_page(folio, 0); bool expected_anon_exclusive; int sub_batch_idx = 0; int len; @@ -251,7 +247,7 @@ static void commit_anon_folio_batch(struct vm_area_struct *vma, } static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma, - struct folio *folio, unsigned long addr, pte_t *ptep, + struct folio *folio, struct page *page, unsigned long addr, pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes, struct mmu_gather *tlb) { bool set_write; @@ -270,7 +266,7 @@ static void set_write_prot_commit_flush_ptes(struct vm_area_struct *vma, /* idx = */ 0, set_write, tlb); return; } - commit_anon_folio_batch(vma, folio, addr, ptep, oldpte, ptent, nr_ptes, tlb); + commit_anon_folio_batch(vma, folio, page, addr, ptep, oldpte, ptent, nr_ptes, tlb); } static long change_pte_range(struct mmu_gather *tlb, @@ -305,15 +301,19 @@ static long change_pte_range(struct mmu_gather *tlb, const fpb_t flags = FPB_RESPECT_SOFT_DIRTY | FPB_RESPECT_WRITE; int max_nr_ptes = (end - addr) >> PAGE_SHIFT; struct folio *folio = NULL; + struct page *page; pte_t ptent; + page = vm_normal_page(vma, addr, oldpte); + if (page) + folio = page_folio(page); /* * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ if (prot_numa) { int ret = prot_numa_skip(vma, addr, oldpte, pte, - target_node, &folio); + target_node, folio); if (ret) { /* determine batch to skip */ @@ -323,9 +323,6 @@ static long change_pte_range(struct mmu_gather *tlb, } } - if (!folio) - folio = vm_normal_folio(vma, addr, oldpte); - nr_ptes = mprotect_folio_pte_batch(folio, pte, oldpte, max_nr_ptes, flags); oldpte = modify_prot_start_ptes(vma, addr, pte, nr_ptes); @@ -351,7 +348,7 @@ static long change_pte_range(struct mmu_gather *tlb, */ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pte_write(ptent)) - set_write_prot_commit_flush_ptes(vma, folio, + set_write_prot_commit_flush_ptes(vma, folio, page, addr, pte, oldpte, ptent, nr_ptes, tlb); else prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent, From aba6faec0103ed8f169be8dce2ead41fcb689446 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 6 Aug 2025 15:00:22 -0700 Subject: [PATCH 252/279] userfaultfd: fix a crash in UFFDIO_MOVE when PMD is a migration entry When UFFDIO_MOVE encounters a migration PMD entry, it proceeds with obtaining a folio and accessing it even though the entry is swp_entry_t. Add the missing check and let split_huge_pmd() handle migration entries. While at it also remove unnecessary folio check. [surenb@google.com: remove extra folio check, per David] Link: https://lkml.kernel.org/r/20250807200418.1963585-1-surenb@google.com Link: https://lkml.kernel.org/r/20250806220022.926763-1-surenb@google.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Suren Baghdasaryan Reported-by: syzbot+b446dbe27035ef6bd6c2@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68794b5c.a70a0220.693ce.0050.GAE@google.com/ Reviewed-by: Peter Xu Acked-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Lokesh Gidra Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index cbed91b09640..45e6290e2e8b 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1821,13 +1821,16 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, /* Check if we can move the pmd without splitting it. */ if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) || !pmd_none(dst_pmdval)) { - struct folio *folio = pmd_folio(*src_pmd); + /* Can be a migration entry */ + if (pmd_present(*src_pmd)) { + struct folio *folio = pmd_folio(*src_pmd); - if (!folio || (!is_huge_zero_folio(folio) && - !PageAnonExclusive(&folio->page))) { - spin_unlock(ptl); - err = -EBUSY; - break; + if (!is_huge_zero_folio(folio) && + !PageAnonExclusive(&folio->page)) { + spin_unlock(ptl); + err = -EBUSY; + break; + } } spin_unlock(ptl); From 0b5be138ce00f421bd7cc5a226061bd62c4ab850 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 7 Aug 2025 19:58:19 +0100 Subject: [PATCH 253/279] mm/mremap: avoid expensive folio lookup on mremap folio pte batch It was discovered in the attached report that commit f822a9a81a31 ("mm: optimize mremap() by PTE batching") introduced a significant performance regression on a number of metrics on x86-64, most notably stress-ng.bigheap.realloc_calls_per_sec - indicating a 37.3% regression in number of mremap() calls per second. I was able to reproduce this locally on an intel x86-64 raptor lake system, noting an average of 143,857 realloc calls/sec (with a stddev of 4,531 or 3.1%) prior to this patch being applied, and 81,503 afterwards (stddev of 2,131 or 2.6%) - a 43.3% regression. During testing I was able to determine that there was no meaningful difference in efforts to optimise the folio_pte_batch() operation, nor checking folio_test_large(). This is within expectation, as a regression this large is likely to indicate we are accessing memory that is not yet in a cache line (and perhaps may even cause a main memory fetch). The expectation by those discussing this from the start was that vm_normal_folio() (invoked by mremap_folio_pte_batch()) would likely be the culprit due to having to retrieve memory from the vmemmap (which mremap() page table moves does not otherwise do, meaning this is inevitably cold memory). I was able to definitively determine that this theory is indeed correct and the cause of the issue. The solution is to restore part of an approach previously discarded on review, that is to invoke pte_batch_hint() which explicitly determines, through reference to the PTE alone (thus no vmemmap lookup), what the PTE batch size may be. On platforms other than arm64 this is currently hardcoded to return 1, so this naturally resolves the issue for x86-64, and for arm64 introduces little to no overhead as the pte cache line will be hot. With this patch applied, we move from 81,503 realloc calls/sec to 138,701 (stddev of 496.1 or 0.4%), which is a -3.6% regression, however accounting for the variance in the original result, this is broadly restoring performance to its prior state. Link: https://lkml.kernel.org/r/20250807185819.199865-1-lorenzo.stoakes@oracle.com Fixes: f822a9a81a31 ("mm: optimize mremap() by PTE batching") Signed-off-by: Lorenzo Stoakes Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202508071609.4e743d7c-lkp@intel.com Acked-by: David Hildenbrand Acked-by: Pedro Falcato Reviewed-by: Barry Song Acked-by: Vlastimil Babka Reviewed-by: Dev Jain Cc: Ryan Roberts Cc: Barry Song Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- mm/mremap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/mremap.c b/mm/mremap.c index 677a4d744df9..9afa8cd524f5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -179,6 +179,10 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr if (max_nr == 1) return 1; + /* Avoid expensive folio lookup if we stand no chance of benefit. */ + if (pte_batch_hint(ptep, pte) == 1) + return 1; + folio = vm_normal_folio(vma, addr, pte); if (!folio || !folio_test_large(folio)) return 1; From c0e1b774f68bdbea1618e356e30672c7f1e32509 Mon Sep 17 00:00:00 2001 From: Jialin Wang Date: Fri, 8 Aug 2025 00:54:55 +0800 Subject: [PATCH 254/279] proc: proc_maps_open allow proc_mem_open to return NULL The commit 65c66047259f ("proc: fix the issue of proc_mem_open returning NULL") caused proc_maps_open() to return -ESRCH when proc_mem_open() returns NULL. This breaks legitimate /proc//maps access for kernel threads since kernel threads have NULL mm_struct. The regression causes perf to fail and exit when profiling a kernel thread: # perf record -v -g -p $(pgrep kswapd0) ... couldn't open /proc/65/task/65/maps This patch partially reverts the commit to fix it. Link: https://lkml.kernel.org/r/20250807165455.73656-1-wjl.linux@gmail.com Fixes: 65c66047259f ("proc: fix the issue of proc_mem_open returning NULL") Signed-off-by: Jialin Wang Cc: Penglei Jiang Cc: Signed-off-by: Andrew Morton --- fs/proc/task_mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ee1e4ccd33bd..29cca0e6d0ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -340,8 +340,8 @@ static int proc_maps_open(struct inode *inode, struct file *file, priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR_OR_NULL(priv->mm)) { - int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; + if (IS_ERR(priv->mm)) { + int err = PTR_ERR(priv->mm); seq_release_private(inode, file); return err; From 8ee90742cf29427683294a6a80f1e2b7f4af1cff Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Thu, 7 Aug 2025 12:08:32 +0800 Subject: [PATCH 255/279] net: phy: nxp-c45-tja11xx: fix the PHY ID mismatch issue when using C45 TJA1103/04/20/21 support both C22 and C45 accessing methods. The TJA11xx driver has implemented the match_phy_device() API. However, it does not handle the C45 ID. If C45 was used to access TJA11xx, match_phy_device() would always return false due to phydev->phy_id only used by C22 being empty, resulting in the generic phy driver being used for TJA11xx PHYs. Therefore, check phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] when using C45. Fixes: 1b76b2497aba ("net: phy: nxp-c45-tja11xx: simplify .match_phy_device OP") Signed-off-by: Clark Wang Link: https://patch.msgid.link/20250807040832.2455306-1-xiaoning.wang@nxp.com Signed-off-by: Paolo Abeni --- drivers/net/phy/nxp-c45-tja11xx.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 4c6d905f0a9f..87adb6508017 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1965,24 +1965,27 @@ static int nxp_c45_macsec_ability(struct phy_device *phydev) return macsec_ability; } +static bool tja11xx_phy_id_compare(struct phy_device *phydev, + const struct phy_driver *phydrv) +{ + u32 id = phydev->is_c45 ? phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] : + phydev->phy_id; + + return phy_id_compare(id, phydrv->phy_id, phydrv->phy_id_mask); +} + static int tja11xx_no_macsec_match_phy_device(struct phy_device *phydev, const struct phy_driver *phydrv) { - if (!phy_id_compare(phydev->phy_id, phydrv->phy_id, - phydrv->phy_id_mask)) - return 0; - - return !nxp_c45_macsec_ability(phydev); + return tja11xx_phy_id_compare(phydev, phydrv) && + !nxp_c45_macsec_ability(phydev); } static int tja11xx_macsec_match_phy_device(struct phy_device *phydev, const struct phy_driver *phydrv) { - if (!phy_id_compare(phydev->phy_id, phydrv->phy_id, - phydrv->phy_id_mask)) - return 0; - - return nxp_c45_macsec_ability(phydev); + return tja11xx_phy_id_compare(phydev, phydrv) && + nxp_c45_macsec_ability(phydev); } static const struct nxp_c45_regmap tja1120_regmap = { From 8ea25274ebaf2f6be8be374633b2ed8348ec0e70 Mon Sep 17 00:00:00 2001 From: Buday Csaba Date: Thu, 7 Aug 2025 15:54:49 +0200 Subject: [PATCH 256/279] net: mdiobus: release reset_gpio in mdiobus_unregister_device() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reset_gpio is claimed in mdiobus_register_device(), but it is not released in mdiobus_unregister_device(). It is instead only released when the whole MDIO bus is unregistered. When a device uses the reset_gpio property, it becomes impossible to unregister it and register it again, because the GPIO remains claimed. This patch resolves that issue. Fixes: bafbdd527d56 ("phylib: Add device reset GPIO support") # see notes Reviewed-by: Andrew Lunn Cc: Csókás Bence [ csokas.bence: Resolve rebase conflict and clarify msg ] Signed-off-by: Buday Csaba Link: https://patch.msgid.link/20250807135449.254254-2-csokas.bence@prolan.hu Signed-off-by: Paolo Abeni --- drivers/net/phy/mdio_bus.c | 1 + drivers/net/phy/mdio_bus_provider.c | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index fda2e27c1810..cad6ed3aa10b 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -91,6 +91,7 @@ int mdiobus_unregister_device(struct mdio_device *mdiodev) if (mdiodev->bus->mdio_map[mdiodev->addr] != mdiodev) return -EINVAL; + gpiod_put(mdiodev->reset_gpio); reset_control_put(mdiodev->reset_ctrl); mdiodev->bus->mdio_map[mdiodev->addr] = NULL; diff --git a/drivers/net/phy/mdio_bus_provider.c b/drivers/net/phy/mdio_bus_provider.c index 48dc4bf85125..f43973e73ea3 100644 --- a/drivers/net/phy/mdio_bus_provider.c +++ b/drivers/net/phy/mdio_bus_provider.c @@ -443,9 +443,6 @@ void mdiobus_unregister(struct mii_bus *bus) if (!mdiodev) continue; - if (mdiodev->reset_gpio) - gpiod_put(mdiodev->reset_gpio); - mdiodev->device_remove(mdiodev); mdiodev->device_free(mdiodev); } From c8a9a619c072e9a45e9a9b4b035269427dc00aa8 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Fri, 8 Aug 2025 09:36:54 +0000 Subject: [PATCH 257/279] dt-bindings: net: thead,th1520-gmac: Describe APB interface clock Besides ones for GMAC core and peripheral registers, the TH1520 GMAC requires one more clock for configuring APB glue registers. Describe it in the binding. Fixes: f920ce04c399 ("dt-bindings: net: Add T-HEAD dwmac support") Signed-off-by: Yao Zi Acked-by: Krzysztof Kozlowski Reviewed-by: Drew Fustini Link: https://patch.msgid.link/20250808093655.48074-3-ziyao@disroot.org Signed-off-by: Paolo Abeni --- .../devicetree/bindings/net/thead,th1520-gmac.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml b/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml index 6d9de3303762..b3492a9aa4ef 100644 --- a/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml +++ b/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml @@ -62,11 +62,13 @@ properties: items: - description: GMAC main clock - description: Peripheral registers interface clock + - description: APB glue registers interface clock clock-names: items: - const: stmmaceth - const: pclk + - const: apb interrupts: items: @@ -88,8 +90,8 @@ examples: compatible = "thead,th1520-gmac", "snps,dwmac-3.70a"; reg = <0xe7070000 0x2000>, <0xec003000 0x1000>; reg-names = "dwmac", "apb"; - clocks = <&clk 1>, <&clk 2>; - clock-names = "stmmaceth", "pclk"; + clocks = <&clk 1>, <&clk 2>, <&clk 3>; + clock-names = "stmmaceth", "pclk", "apb"; interrupts = <66>; interrupt-names = "macirq"; phy-mode = "rgmii-id"; From 4cc339ce482ba78589a2d5cbe1c84b735d263383 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Fri, 8 Aug 2025 09:36:55 +0000 Subject: [PATCH 258/279] net: stmmac: thead: Get and enable APB clock on initialization It's necessary to adjust the MAC TX clock when the linkspeed changes, but it's noted such adjustment always fails on TH1520 SoC, and reading back from APB glue registers that control clock generation results in garbage, causing broken link. With some testing, it's found a clock must be ungated for access to APB glue registers. Without any consumer, the clock is automatically disabled during late kernel startup. Let's get and enable it if it's described in devicetree. For backward compatibility with older devicetrees, probing won't fail if the APB clock isn't found. In this case, we emit a warning since the link will break if the speed changes. Fixes: 33a1a01e3afa ("net: stmmac: Add glue layer for T-HEAD TH1520 SoC") Signed-off-by: Yao Zi Tested-by: Drew Fustini Reviewed-by: Drew Fustini Link: https://patch.msgid.link/20250808093655.48074-4-ziyao@disroot.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c index c72ee759aae5..f2946bea0bc2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c @@ -211,6 +211,7 @@ static int thead_dwmac_probe(struct platform_device *pdev) struct stmmac_resources stmmac_res; struct plat_stmmacenet_data *plat; struct thead_dwmac *dwmac; + struct clk *apb_clk; void __iomem *apb; int ret; @@ -224,6 +225,19 @@ static int thead_dwmac_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(plat), "dt configuration failed\n"); + /* + * The APB clock is essential for accessing glue registers. However, + * old devicetrees don't describe it correctly. We continue to probe + * and emit a warning if it isn't present. + */ + apb_clk = devm_clk_get_enabled(&pdev->dev, "apb"); + if (PTR_ERR(apb_clk) == -ENOENT) + dev_warn(&pdev->dev, + "cannot get apb clock, link may break after speed changes\n"); + else if (IS_ERR(apb_clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(apb_clk), + "failed to get apb clock\n"); + dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); if (!dwmac) return -ENOMEM; From a7f75e2883c4bd57b12c3be61bb926929adad9c0 Mon Sep 17 00:00:00 2001 From: Yao Zi Date: Fri, 8 Aug 2025 09:36:56 +0000 Subject: [PATCH 259/279] riscv: dts: thead: Add APB clocks for TH1520 GMACs Describe perisys-apb4-hclk as the APB clock for TH1520 SoC, which is essential for accessing GMAC glue registers. Fixes: 7e756671a664 ("riscv: dts: thead: Add TH1520 ethernet nodes") Signed-off-by: Yao Zi Reviewed-by: Drew Fustini Tested-by: Drew Fustini Link: https://patch.msgid.link/20250808093655.48074-5-ziyao@disroot.org Signed-off-by: Paolo Abeni --- arch/riscv/boot/dts/thead/th1520.dtsi | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/riscv/boot/dts/thead/th1520.dtsi b/arch/riscv/boot/dts/thead/th1520.dtsi index 42724bf7e90e..03f1d7319049 100644 --- a/arch/riscv/boot/dts/thead/th1520.dtsi +++ b/arch/riscv/boot/dts/thead/th1520.dtsi @@ -297,8 +297,9 @@ gmac1: ethernet@ffe7060000 { reg-names = "dwmac", "apb"; interrupts = <67 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; - clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC1>; - clock-names = "stmmaceth", "pclk"; + clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC1>, + <&clk CLK_PERISYS_APB4_HCLK>; + clock-names = "stmmaceth", "pclk", "apb"; snps,pbl = <32>; snps,fixed-burst; snps,multicast-filter-bins = <64>; @@ -319,8 +320,9 @@ gmac0: ethernet@ffe7070000 { reg-names = "dwmac", "apb"; interrupts = <66 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; - clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC0>; - clock-names = "stmmaceth", "pclk"; + clocks = <&clk CLK_GMAC_AXI>, <&clk CLK_GMAC0>, + <&clk CLK_PERISYS_APB4_HCLK>; + clock-names = "stmmaceth", "pclk", "apb"; snps,pbl = <32>; snps,fixed-burst; snps,multicast-filter-bins = <64>; From e93f7af148222303c4632318536c0f649b4ee5b1 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Fri, 8 Aug 2025 11:57:56 -0700 Subject: [PATCH 260/279] docs: Fix name for net.ipv4.udp_child_hash_entries udp_child_ehash_entries -> udp_child_hash_entries Fixes: 9804985bf27f ("udp: Introduce optional per-netns hash table.") Signed-off-by: Jordan Rife Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250808185800.1189042-1-jordan@jrife.io Signed-off-by: Paolo Abeni --- Documentation/networking/ip-sysctl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index bb620f554598..9756d16e3df1 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1420,7 +1420,7 @@ udp_hash_entries - INTEGER A negative value means the networking namespace does not own its hash buckets and shares the initial networking namespace's one. -udp_child_ehash_entries - INTEGER +udp_child_hash_entries - INTEGER Control the number of hash buckets for UDP sockets in the child networking namespace, which must be set before clone() or unshare(). From bda053d6445717f8a4cd76f88caea2e39299fe07 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Aug 2025 17:12:03 -0700 Subject: [PATCH 261/279] selftests: drv-net: don't assume device has only 2 queues The test is implicitly assuming the device only has 2 queues. A real device will likely have more. The exact problem is that because NAPIs get added to the list from the head, the netlink dump reports them in reverse order. So the naive napis[0] will actually likely give us the _last_ NAPI, not the first one. Re-enable all the NAPIs instead of hard-coding 2 in the test. This way the NAPIs we operated on will always reappear, doesn't matter where they were in the registration order. Fixes: e6d76268813d ("net: Update threaded state in napi config in netif_set_threaded") Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250809001205.1147153-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/napi_threaded.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py index b2698db39817..9699a100a87d 100755 --- a/tools/testing/selftests/drivers/net/napi_threaded.py +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -35,6 +35,8 @@ def _setup_deferred_cleanup(cfg) -> None: threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout defer(_set_threaded_state, cfg, threaded) + return combined + def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: """ @@ -49,7 +51,7 @@ def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: napi0_id = napis[0]['id'] napi1_id = napis[1]['id'] - _setup_deferred_cleanup(cfg) + qcnt = _setup_deferred_cleanup(cfg) # set threaded _set_threaded_state(cfg, 1) @@ -62,7 +64,7 @@ def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: nl.napi_set({'id': napi1_id, 'threaded': 'disabled'}) cmd(f"ethtool -L {cfg.ifname} combined 1") - cmd(f"ethtool -L {cfg.ifname} combined 2") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") _assert_napi_threaded_enabled(nl, napi0_id) _assert_napi_threaded_disabled(nl, napi1_id) @@ -80,7 +82,7 @@ def change_num_queues(cfg, nl) -> None: napi0_id = napis[0]['id'] napi1_id = napis[1]['id'] - _setup_deferred_cleanup(cfg) + qcnt = _setup_deferred_cleanup(cfg) # set threaded _set_threaded_state(cfg, 1) @@ -90,7 +92,7 @@ def change_num_queues(cfg, nl) -> None: _assert_napi_threaded_enabled(nl, napi1_id) cmd(f"ethtool -L {cfg.ifname} combined 1") - cmd(f"ethtool -L {cfg.ifname} combined 2") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") # check napi threaded is set for both napis _assert_napi_threaded_enabled(nl, napi0_id) From ccba9f6baa900e31ad1a4c36e6f3c176694f9eac Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Aug 2025 17:12:04 -0700 Subject: [PATCH 262/279] net: update NAPI threaded config even for disabled NAPIs We have to make sure that all future NAPIs will have the right threaded state when the state is configured on the device level. We chose not to have an "unset" state for threaded, and not to wipe the NAPI config clean when channels are explicitly disabled. This means the persistent config structs "exist" even when their NAPIs are not instantiated. Differently put - the NAPI persistent state lives in the net_device (ncfg == struct napi_config): ,--- [napi 0] - [napi 1] [dev] | | `--- [ncfg 0] - [ncfg 1] so say we a device with 2 queues but only 1 enabled: ,--- [napi 0] [dev] | `--- [ncfg 0] - [ncfg 1] now we set the device to threaded=1: ,---------- [napi 0 (thr:1)] [dev(thr:1)] | `---------- [ncfg 0 (thr:1)] - [ncfg 1 (thr:?)] Since [ncfg 1] was not attached to a NAPI during configuration we skipped it. If we create a NAPI for it later it will have the old setting (presumably disabled). One could argue if this is right or not "in principle", but it's definitely not how things worked before per-NAPI config.. Fixes: 2677010e7793 ("Add support to set NAPI threaded for individual NAPI") Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250809001205.1147153-3-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 5 ++++- net/core/dev.c | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e5de4b0a433..f3a3b761abfb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2071,6 +2071,8 @@ enum netdev_reg_state { * @max_pacing_offload_horizon: max EDT offload horizon in nsec. * @napi_config: An array of napi_config structures containing per-NAPI * settings. + * @num_napi_configs: number of allocated NAPI config structs, + * always >= max(num_rx_queues, num_tx_queues). * @gro_flush_timeout: timeout for GRO layer in NAPI * @napi_defer_hard_irqs: If not zero, provides a counter that would * allow to avoid NIC hard IRQ, on busy queues. @@ -2482,8 +2484,9 @@ struct net_device { u64 max_pacing_offload_horizon; struct napi_config *napi_config; - unsigned long gro_flush_timeout; + u32 num_napi_configs; u32 napi_defer_hard_irqs; + unsigned long gro_flush_timeout; /** * @up: copy of @state's IFF_UP, but safe to read with just @lock. diff --git a/net/core/dev.c b/net/core/dev.c index 68dc47d7e700..f180746382a1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6999,7 +6999,7 @@ int netif_set_threaded(struct net_device *dev, enum netdev_napi_threaded threaded) { struct napi_struct *napi; - int err = 0; + int i, err = 0; netdev_assert_locked_or_invisible(dev); @@ -7021,6 +7021,10 @@ int netif_set_threaded(struct net_device *dev, list_for_each_entry(napi, &dev->napi_list, dev_list) WARN_ON_ONCE(napi_set_threaded(napi, threaded)); + /* Override the config for all NAPIs even if currently not listed */ + for (i = 0; i < dev->num_napi_configs; i++) + dev->napi_config[i].threaded = threaded; + return err; } @@ -11873,6 +11877,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, goto free_all; dev->cfg_pending = dev->cfg; + dev->num_napi_configs = maxqs; napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config)); dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT); if (!dev->napi_config) From b3fc08ab9a565efb42fe08be046a0d203b82cdb8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Aug 2025 17:12:05 -0700 Subject: [PATCH 263/279] net: prevent deadlocks when enabling NAPIs with mixed kthread config The following order of calls currently deadlocks if: - device has threaded=1; and - NAPI has persistent config with threaded=0. netif_napi_add_weight_config() dev->threaded == 1 napi_kthread_create() napi_enable() napi_restore_config() napi_set_threaded(0) napi_stop_kthread() while (NAPIF_STATE_SCHED) msleep(20) We deadlock because disabled NAPI has STATE_SCHED set. Creating a thread in netif_napi_add() just to destroy it in napi_disable() is fairly ugly in the first place. Let's read both the device config and the NAPI config in netif_napi_add(). Fixes: e6d76268813d ("net: Update threaded state in napi config in netif_set_threaded") Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250809001205.1147153-4-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/core/dev.c | 5 +++-- net/core/dev.h | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index f180746382a1..5a3c0f40a93f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7357,8 +7357,9 @@ void netif_napi_add_weight_locked(struct net_device *dev, * Clear dev->threaded if kthread creation failed so that * threaded mode will not be enabled in napi_enable(). */ - if (dev->threaded && napi_kthread_create(napi)) - dev->threaded = NETDEV_NAPI_THREADED_DISABLED; + if (napi_get_threaded_config(dev, napi)) + if (napi_kthread_create(napi)) + dev->threaded = NETDEV_NAPI_THREADED_DISABLED; netif_napi_set_irq_locked(napi, -1); } EXPORT_SYMBOL(netif_napi_add_weight_locked); diff --git a/net/core/dev.h b/net/core/dev.h index ab69edc0c3e3..d6b08d435479 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -323,6 +323,14 @@ static inline enum netdev_napi_threaded napi_get_threaded(struct napi_struct *n) return NETDEV_NAPI_THREADED_DISABLED; } +static inline enum netdev_napi_threaded +napi_get_threaded_config(struct net_device *dev, struct napi_struct *n) +{ + if (n->config) + return n->config->threaded; + return dev->threaded; +} + int napi_set_threaded(struct napi_struct *n, enum netdev_napi_threaded threaded); From c5ec7f49b480db0dfc83f395755b1c2a7c979920 Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Fri, 4 Jul 2025 13:17:47 +0200 Subject: [PATCH 264/279] devlink: let driver opt out of automatic phys_port_name generation Currently when adding devlink port, phys_port_name is automatically generated within devlink port initialization flow. As a result adding devlink port support to driver may result in forced changes of interface names, which breaks already existing network configs. This is an expected behavior but in some scenarios it would not be preferable to provide such limitation for legacy driver not being able to keep 'pre-devlink' interface name. Add flag no_phys_port_name to devlink_port_attrs struct which indicates if devlink should not alter name of interface. Suggested-by: Jiri Pirko Link: https://lore.kernel.org/all/nbwrfnjhvrcduqzjl4a2jafnvvud6qsbxlvxaxilnryglf4j7r@btuqrimnfuly/ Signed-off-by: Jedrzej Jagielski Signed-off-by: Tony Nguyen --- include/net/devlink.h | 6 +++++- net/devlink/port.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 93640a29427c..b32c9ceeb81d 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -78,6 +78,9 @@ struct devlink_port_pci_sf_attrs { * @flavour: flavour of the port * @split: indicates if this is split port * @splittable: indicates if the port can be split. + * @no_phys_port_name: skip automatic phys_port_name generation; for + * compatibility only, newly added driver/port instance + * should never set this. * @lanes: maximum number of lanes the port supports. 0 value is not passed to netlink. * @switch_id: if the port is part of switch, this is buffer with ID, otherwise this is NULL * @phys: physical port attributes @@ -87,7 +90,8 @@ struct devlink_port_pci_sf_attrs { */ struct devlink_port_attrs { u8 split:1, - splittable:1; + splittable:1, + no_phys_port_name:1; u32 lanes; enum devlink_port_flavour flavour; struct netdev_phys_item_id switch_id; diff --git a/net/devlink/port.c b/net/devlink/port.c index 939081a0e615..cb8d4df61619 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -1519,7 +1519,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, struct devlink_port_attrs *attrs = &devlink_port->attrs; int n = 0; - if (!devlink_port->attrs_set) + if (!devlink_port->attrs_set || devlink_port->attrs.no_phys_port_name) return -EOPNOTSUPP; switch (attrs->flavour) { From e67a0bc3ed4fd8ee1697cb6d937e2b294ec13b5e Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 3 Jul 2025 12:41:15 +0200 Subject: [PATCH 265/279] ixgbe: prevent from unwanted interface name changes Users of the ixgbe driver report that after adding devlink support by the commit a0285236ab93 ("ixgbe: add initial devlink support") their configs got broken due to unwanted changes of interface names. It's caused by automatic phys_port_name generation during devlink port initialization flow. To prevent from that set no_phys_port_name flag for ixgbe devlink ports. Reported-by: David Howells Closes: https://lore.kernel.org/netdev/3452224.1745518016@warthog.procyon.org.uk/ Reported-by: David Kaplan Closes: https://lore.kernel.org/netdev/LV3PR12MB92658474624CCF60220157199470A@LV3PR12MB9265.namprd12.prod.outlook.com/ Fixes: a0285236ab93 ("ixgbe: add initial devlink support") Signed-off-by: Jedrzej Jagielski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/devlink/devlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c index 54f1b83dfe42..d227f4d2a2d1 100644 --- a/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c @@ -543,6 +543,7 @@ int ixgbe_devlink_register_port(struct ixgbe_adapter *adapter) attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; attrs.phys.port_number = adapter->hw.bus.func; + attrs.no_phys_port_name = 1; ixgbe_devlink_set_switch_id(adapter, &attrs.switch_id); devlink_port_attrs_set(devlink_port, &attrs); From 2efe41234dbd0a83fdb7cd38226c2f70039a2cd3 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Mon, 28 Jul 2025 15:26:49 +0900 Subject: [PATCH 266/279] ptp: prevent possible ABBA deadlock in ptp_clock_freerun() syzbot reported the following ABBA deadlock: CPU0 CPU1 ---- ---- n_vclocks_store() lock(&ptp->n_vclocks_mux) [1] (physical clock) pc_clock_adjtime() lock(&clk->rwsem) [2] (physical clock) ... ptp_clock_freerun() ptp_vclock_in_use() lock(&ptp->n_vclocks_mux) [3] (physical clock) ptp_clock_unregister() posix_clock_unregister() lock(&clk->rwsem) [4] (virtual clock) Since ptp virtual clock is registered only under ptp physical clock, both ptp_clock and posix_clock must be physical clocks for ptp_vclock_in_use() to lock &ptp->n_vclocks_mux and check ptp->n_vclocks. However, when unregistering vclocks in n_vclocks_store(), the locking ptp->n_vclocks_mux is a physical clock lock, but clk->rwsem of ptp_clock_unregister() called through device_for_each_child_reverse() is a virtual clock lock. Therefore, clk->rwsem used in CPU0 and clk->rwsem used in CPU1 are different locks, but in lockdep, a false positive occurs because the possibility of deadlock is determined through lock-class. To solve this, lock subclass annotation must be added to the posix_clock rwsem of the vclock. Reported-by: syzbot+7cfb66a237c4a5fb22ad@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7cfb66a237c4a5fb22ad Fixes: 73f37068d540 ("ptp: support ptp physical/virtual clocks conversion") Signed-off-by: Jeongjun Park Acked-by: Richard Cochran Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250728062649.469882-1-aha310510@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_private.h | 5 +++++ drivers/ptp/ptp_vclock.c | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index a6aad743c282..b352df4cd3f9 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -24,6 +24,11 @@ #define PTP_DEFAULT_MAX_VCLOCKS 20 #define PTP_MAX_CHANNELS 2048 +enum { + PTP_LOCK_PHYSICAL = 0, + PTP_LOCK_VIRTUAL, +}; + struct timestamp_event_queue { struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS]; int head; diff --git a/drivers/ptp/ptp_vclock.c b/drivers/ptp/ptp_vclock.c index 2fdeedd60e21..64c950456517 100644 --- a/drivers/ptp/ptp_vclock.c +++ b/drivers/ptp/ptp_vclock.c @@ -154,6 +154,11 @@ static long ptp_vclock_refresh(struct ptp_clock_info *ptp) return PTP_VCLOCK_REFRESH_INTERVAL; } +static void ptp_vclock_set_subclass(struct ptp_clock *ptp) +{ + lockdep_set_subclass(&ptp->clock.rwsem, PTP_LOCK_VIRTUAL); +} + static const struct ptp_clock_info ptp_vclock_info = { .owner = THIS_MODULE, .name = "ptp virtual clock", @@ -213,6 +218,8 @@ struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock) return NULL; } + ptp_vclock_set_subclass(vclock->clock); + timecounter_init(&vclock->tc, &vclock->cc, 0); ptp_schedule_worker(vclock->clock, PTP_VCLOCK_REFRESH_INTERVAL); From 6db015fc4b5d5f63a64a193f65d98da3a7fc811d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Aug 2025 16:29:06 -0700 Subject: [PATCH 267/279] tls: handle data disappearing from under the TLS ULP TLS expects that it owns the receive queue of the TCP socket. This cannot be guaranteed in case the reader of the TCP socket entered before the TLS ULP was installed, or uses some non-standard read API (eg. zerocopy ones). Replace the WARN_ON() and a buggy early exit (which leaves anchor pointing to a freed skb) with real error handling. Wipe the parsing state and tell the reader to retry. We already reload the anchor every time we (re)acquire the socket lock, so the only condition we need to avoid is an out of bounds read (not having enough bytes in the socket for previously parsed record len). If some data was read from under TLS but there's enough in the queue we'll reload and decrypt what is most likely not a valid TLS record. Leading to some undefined behavior from TLS perspective (corrupting a stream? missing an alert? missing an attack?) but no kernel crash should take place. Reported-by: William Liu Reported-by: Savino Dicanosa Link: https://lore.kernel.org/tFjq_kf7sWIG3A7CrCg_egb8CVsT_gsmHAK0_wxDPJXfIzxFAMxqmLwp3MlU5EHiet0AwwJldaaFdgyHpeIUCS-3m3llsmRzp9xIOBR4lAI=@syst3mfailure.io Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250807232907.600366-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls.h | 2 +- net/tls/tls_strp.c | 11 ++++++++--- net/tls/tls_sw.c | 3 ++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/net/tls/tls.h b/net/tls/tls.h index 774859b63f0d..4e077068e6d9 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -196,7 +196,7 @@ void tls_strp_msg_done(struct tls_strparser *strp); int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb); void tls_rx_msg_ready(struct tls_strparser *strp); -void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); +bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); int tls_strp_msg_cow(struct tls_sw_context_rx *ctx); struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx); int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst); diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 095cf31bae0b..d71643b494a1 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -475,7 +475,7 @@ static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len) strp->stm.offset = offset; } -void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) +bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) { struct strp_msg *rxm; struct tls_msg *tlm; @@ -484,8 +484,11 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) DEBUG_NET_WARN_ON_ONCE(!strp->stm.full_len); if (!strp->copy_mode && force_refresh) { - if (WARN_ON(tcp_inq(strp->sk) < strp->stm.full_len)) - return; + if (unlikely(tcp_inq(strp->sk) < strp->stm.full_len)) { + WRITE_ONCE(strp->msg_ready, 0); + memset(&strp->stm, 0, sizeof(strp->stm)); + return false; + } tls_strp_load_anchor_with_queue(strp, strp->stm.full_len); } @@ -495,6 +498,8 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) rxm->offset = strp->stm.offset; tlm = tls_msg(strp->anchor); tlm->control = strp->mark; + + return true; } /* Called with lock held on lower socket */ diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 549d1ea01a72..51c98a007dda 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1384,7 +1384,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, return sock_intr_errno(timeo); } - tls_strp_msg_load(&ctx->strp, released); + if (unlikely(!tls_strp_msg_load(&ctx->strp, released))) + return tls_rx_rec_wait(sk, psock, nonblock, false); return 1; } From d7e82594a45c5cb270940ac469846e8026c7db0f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Aug 2025 16:29:07 -0700 Subject: [PATCH 268/279] selftests: tls: test TCP stealing data from under the TLS socket Check a race where data disappears from the TCP socket after TLS signaled that its ready to receive. ok 6 global.data_steal # RUN tls_basic.base_base ... # OK tls_basic.base_base Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250807232907.600366-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 63 +++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5ded3b3a7538..d8cfcf9bb825 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2708,6 +2708,69 @@ TEST(prequeue) { close(cfd); } +TEST(data_steal) { + struct tls_crypto_info_keys tls; + char buf[20000], buf2[20000]; + struct sockaddr_in addr; + int sfd, cfd, ret, fd; + int pid, status; + socklen_t len; + + len = sizeof(addr); + memrnd(buf, sizeof(buf)); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls, 0); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + + fd = socket(AF_INET, SOCK_STREAM, 0); + sfd = socket(AF_INET, SOCK_STREAM, 0); + + ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0); + ASSERT_EQ(listen(sfd, 10), 0); + ASSERT_EQ(getsockname(sfd, &addr, &len), 0); + ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0); + ASSERT_GE(cfd = accept(sfd, &addr, &len), 0); + close(sfd); + + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); + } + ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0); + + /* Spawn a child and get it into the read wait path of the underlying + * TCP socket. + */ + pid = fork(); + ASSERT_GE(pid, 0); + if (!pid) { + EXPECT_EQ(recv(cfd, buf, sizeof(buf), MSG_WAITALL), + sizeof(buf)); + exit(!__test_passed(_metadata)); + } + + usleep(2000); + ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); + ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); + + EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf)); + usleep(2000); + EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1); + /* Don't check errno, the error will be different depending + * on what random bytes TLS interpreted as the record length. + */ + + close(fd); + close(cfd); + + EXPECT_EQ(wait(&status), pid); + EXPECT_EQ(status, 0); +} + static void __attribute__((constructor)) fips_check(void) { int res; FILE *f; From 30c1d25b9870d551be42535067d5481668b5e6f3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Aug 2025 12:26:10 +0200 Subject: [PATCH 269/279] netfilter: nft_set_pipapo: fix null deref for empty set Blamed commit broke the check for a null scratch map: - if (unlikely(!m || !*raw_cpu_ptr(m->scratch))) + if (unlikely(!raw_cpu_ptr(m->scratch))) This should have been "if (!*raw_ ...)". Use the pattern of the avx2 version which is more readable. This can only be reproduced if avx2 support isn't available. Fixes: d8d871a35ca9 ("netfilter: nft_set_pipapo: merge pipapo_get/lookup") Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 1a19649c2851..9a10251228fd 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -426,10 +426,9 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, local_bh_disable(); - if (unlikely(!raw_cpu_ptr(m->scratch))) - goto out; - scratch = *raw_cpu_ptr(m->scratch); + if (unlikely(!scratch)) + goto out; map_index = scratch->map_index; From c0a23bbc98e93704a1f4fb5e7e7bb2d7c0fb6eb3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 29 Jul 2025 14:26:11 +0200 Subject: [PATCH 270/279] ipvs: Fix estimator kthreads preferred affinity The estimator kthreads' affinity are defined by sysctl overwritten preferences and applied through a plain call to the scheduler's affinity API. However since the introduction of managed kthreads preferred affinity, such a practice shortcuts the kthreads core code which eventually overwrites the target to the default unbound affinity. Fix this with using the appropriate kthread's API. Fixes: d1a89197589c ("kthread: Default affine kthread to its preferred NUMA node") Signed-off-by: Frederic Weisbecker Acked-by: Julian Anastasov Signed-off-by: Florian Westphal --- include/net/ip_vs.h | 13 +++++++++++++ kernel/kthread.c | 1 + net/netfilter/ipvs/ip_vs_est.c | 3 ++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ff406ef4fd4a..29a36709e7f3 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1163,6 +1163,14 @@ static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) return housekeeping_cpumask(HK_TYPE_KTHREAD); } +static inline const struct cpumask *sysctl_est_preferred_cpulist(struct netns_ipvs *ipvs) +{ + if (ipvs->est_cpulist_valid) + return ipvs->sysctl_est_cpulist; + else + return NULL; +} + static inline int sysctl_est_nice(struct netns_ipvs *ipvs) { return ipvs->sysctl_est_nice; @@ -1270,6 +1278,11 @@ static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) return housekeeping_cpumask(HK_TYPE_KTHREAD); } +static inline const struct cpumask *sysctl_est_preferred_cpulist(struct netns_ipvs *ipvs) +{ + return NULL; +} + static inline int sysctl_est_nice(struct netns_ipvs *ipvs) { return IPVS_EST_NICE; diff --git a/kernel/kthread.c b/kernel/kthread.c index 0e98b228a8ef..31b072e8d427 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -893,6 +893,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) return ret; } +EXPORT_SYMBOL_GPL(kthread_affine_preferred); /* * Re-affine kthreads according to their preferences diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index f821ad2e19b3..15049b826732 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -265,7 +265,8 @@ int ip_vs_est_kthread_start(struct netns_ipvs *ipvs, } set_user_nice(kd->task, sysctl_est_nice(ipvs)); - set_cpus_allowed_ptr(kd->task, sysctl_est_cpulist(ipvs)); + if (sysctl_est_preferred_cpulist(ipvs)) + kthread_affine_preferred(kd->task, sysctl_est_preferred_cpulist(ipvs)); pr_info("starting estimator thread %d...\n", kd->id); wake_up_process(kd->task); From cf5fb87fcdaaaafec55dcc0dc5a9e15ead343973 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 13 Aug 2025 02:38:50 +0200 Subject: [PATCH 271/279] netfilter: nf_tables: reject duplicate device on updates A chain/flowtable update with duplicated devices in the same batch is possible. Unfortunately, netdev event path only removes the first device that is found, leaving unregistered the hook of the duplicated device. Check if a duplicated device exists in the transaction batch, bail out with EEXIST in such case. WARNING is hit when unregistering the hook: [49042.221275] WARNING: CPU: 4 PID: 8425 at net/netfilter/core.c:340 nf_hook_entry_head+0xaa/0x150 [49042.221375] CPU: 4 UID: 0 PID: 8425 Comm: nft Tainted: G S 6.16.0+ #170 PREEMPT(full) [...] [49042.221382] RIP: 0010:nf_hook_entry_head+0xaa/0x150 Fixes: 78d9f48f7f44 ("netfilter: nf_tables: add devices to existing flowtable") Fixes: b9703ed44ffb ("netfilter: nf_tables: support for adding new devices to an existing netdev chain") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 13d0ed9d1895..58c5425d61c2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2803,6 +2803,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, struct nft_chain *chain = ctx->chain; struct nft_chain_hook hook = {}; struct nft_stats __percpu *stats = NULL; + struct nftables_pernet *nft_net; struct nft_hook *h, *next; struct nf_hook_ops *ops; struct nft_trans *trans; @@ -2845,6 +2846,20 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, if (nft_hook_list_find(&basechain->hook_list, h)) { list_del(&h->list); nft_netdev_hook_free(h); + continue; + } + + nft_net = nft_pernet(ctx->net); + list_for_each_entry(trans, &nft_net->commit_list, list) { + if (trans->msg_type != NFT_MSG_NEWCHAIN || + trans->table != ctx->table || + !nft_trans_chain_update(trans)) + continue; + + if (nft_hook_list_find(&nft_trans_chain_hooks(trans), h)) { + nft_chain_release_hook(&hook); + return -EEXIST; + } } } } else { @@ -9060,6 +9075,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, { const struct nlattr * const *nla = ctx->nla; struct nft_flowtable_hook flowtable_hook; + struct nftables_pernet *nft_net; struct nft_hook *hook, *next; struct nf_hook_ops *ops; struct nft_trans *trans; @@ -9076,6 +9092,20 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, if (nft_hook_list_find(&flowtable->hook_list, hook)) { list_del(&hook->list); nft_netdev_hook_free(hook); + continue; + } + + nft_net = nft_pernet(ctx->net); + list_for_each_entry(trans, &nft_net->commit_list, list) { + if (trans->msg_type != NFT_MSG_NEWFLOWTABLE || + trans->table != ctx->table || + !nft_trans_flowtable_update(trans)) + continue; + + if (nft_hook_list_find(&nft_trans_flowtable_hooks(trans), hook)) { + err = -EEXIST; + goto err_flowtable_update_hook; + } } } From a58893aa173923fdc49c2d35d638d8133065e952 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Tue, 12 Aug 2025 13:08:58 +0800 Subject: [PATCH 272/279] net: mctp: Fix bad kfree_skb in bind lookup test The kunit test's skb_pkt is consumed by mctp_dst_input() so shouldn't be freed separately. Fixes: e6d8e7dbc5a3 ("net: mctp: Add bind lookup test") Reported-by: Alexandre Ghiti Closes: https://lore.kernel.org/all/734b02a3-1941-49df-a0da-ec14310d41e4@ghiti.fr/ Signed-off-by: Matt Johnston Tested-by: Alexandre Ghiti Link: https://patch.msgid.link/20250812-fix-mctp-bind-test-v1-1-5e2128664eb3@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/test/route-test.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index fb6b46a952cb..69a3ccfc6310 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -1586,7 +1586,6 @@ static void mctp_test_bind_lookup(struct kunit *test) cleanup: kfree_skb(skb_sock); - kfree_skb(skb_pkt); /* Drop all binds */ for (size_t i = 0; i < ARRAY_SIZE(lookup_binds); i++) From b2cafefaf0473bafb0c3502a8530167d35e06113 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 12 Aug 2025 16:21:26 +0000 Subject: [PATCH 273/279] netdevsim: Fix wild pointer access in nsim_queue_free(). syzbot reported the splat below. [0] When nsim_queue_uninit() is called from nsim_init_netdevsim(), register_netdevice() has not been called, thus dev->dstats has not been allocated. Let's not call dev_dstats_rx_dropped_add() in such a case. [0] BUG: unable to handle page fault for address: ffff88809782c020 PF: supervisor write access in kernel mode PF: error_code(0x0002) - not-present page PGD 1b401067 P4D 1b401067 PUD 0 Oops: Oops: 0002 [#1] SMP KASAN NOPTI CPU: 3 UID: 0 PID: 8476 Comm: syz.1.251 Not tainted 6.16.0-syzkaller-06699-ge8d780dcd957 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:local_add arch/x86/include/asm/local.h:33 [inline] RIP: 0010:u64_stats_add include/linux/u64_stats_sync.h:89 [inline] RIP: 0010:dev_dstats_rx_dropped_add include/linux/netdevice.h:3027 [inline] RIP: 0010:nsim_queue_free+0xba/0x120 drivers/net/netdevsim/netdev.c:714 Code: 07 77 6c 4a 8d 3c ed 20 7e f1 8d 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 75 46 4a 03 1c ed 20 7e f1 8d <4c> 01 63 20 be 00 02 00 00 48 8d 3d 00 00 00 00 e8 61 2f 58 fa 48 RSP: 0018:ffffc900044af150 EFLAGS: 00010286 RAX: dffffc0000000000 RBX: ffff88809782c000 RCX: 00000000000079c3 RDX: 1ffffffff1be2fc7 RSI: ffffffff8c15f380 RDI: ffffffff8df17e38 RBP: ffff88805f59d000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000003 R14: ffff88806ceb3d00 R15: ffffed100dfd308e FS: 0000000000000000(0000) GS:ffff88809782c000(0063) knlGS:00000000f505db40 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: ffff88809782c020 CR3: 000000006fc6a000 CR4: 0000000000352ef0 Call Trace: nsim_queue_uninit drivers/net/netdevsim/netdev.c:993 [inline] nsim_init_netdevsim drivers/net/netdevsim/netdev.c:1049 [inline] nsim_create+0xd0a/0x1260 drivers/net/netdevsim/netdev.c:1101 __nsim_dev_port_add+0x435/0x7d0 drivers/net/netdevsim/dev.c:1438 nsim_dev_port_add_all drivers/net/netdevsim/dev.c:1494 [inline] nsim_dev_reload_create drivers/net/netdevsim/dev.c:1546 [inline] nsim_dev_reload_up+0x5b8/0x860 drivers/net/netdevsim/dev.c:1003 devlink_reload+0x322/0x7c0 net/devlink/dev.c:474 devlink_nl_reload_doit+0xe31/0x1410 net/devlink/dev.c:584 genl_family_rcv_msg_doit+0x206/0x2f0 net/netlink/genetlink.c:1115 genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0x55c/0x800 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x155/0x420 net/netlink/af_netlink.c:2552 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1320 [inline] netlink_unicast+0x5aa/0x870 net/netlink/af_netlink.c:1346 netlink_sendmsg+0x8d1/0xdd0 net/netlink/af_netlink.c:1896 sock_sendmsg_nosec net/socket.c:714 [inline] __sock_sendmsg net/socket.c:729 [inline] ____sys_sendmsg+0xa95/0xc70 net/socket.c:2614 ___sys_sendmsg+0x134/0x1d0 net/socket.c:2668 __sys_sendmsg+0x16d/0x220 net/socket.c:2700 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0x7c/0x3a0 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x32/0x80 arch/x86/entry/syscall_32.c:331 entry_SYSENTER_compat_after_hwframe+0x84/0x8e RIP: 0023:0xf708e579 Code: b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 00 00 00 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d b4 26 00 00 00 00 8d b4 26 00 00 00 00 RSP: 002b:00000000f505d55c EFLAGS: 00000296 ORIG_RAX: 0000000000000172 RAX: ffffffffffffffda RBX: 0000000000000007 RCX: 0000000080000080 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000296 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: CR2: ffff88809782c020 Fixes: 2a68a22304f9 ("netdevsim: account dropped packet length in stats on queue free") Reported-by: syzbot+8aa80c6232008f7b957d@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/688bb9ca.a00a0220.26d0e1.0050.GAE@google.com/ Suggested-by: Jakub Kicinski Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250812162130.4129322-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 39fe28af48b9..0178219f0db5 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -710,9 +710,13 @@ static struct nsim_rq *nsim_queue_alloc(void) static void nsim_queue_free(struct net_device *dev, struct nsim_rq *rq) { hrtimer_cancel(&rq->napi_timer); - local_bh_disable(); - dev_dstats_rx_dropped_add(dev, rq->skb_queue.qlen); - local_bh_enable(); + + if (rq->skb_queue.qlen) { + local_bh_disable(); + dev_dstats_rx_dropped_add(dev, rq->skb_queue.qlen); + local_bh_enable(); + } + skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE); kfree(rq); } From 39f8fcda2088382a4aa70b258d6f7225aa386f11 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 12 Aug 2025 11:29:07 -0700 Subject: [PATCH 274/279] bnxt: fill data page pool with frags if PAGE_SIZE > BNXT_RX_PAGE_SIZE The data page pool always fills the HW rx ring with pages. On arm64 with 64K pages, this will waste _at least_ 32K of memory per entry in the rx ring. Fix by fragmenting the pages if PAGE_SIZE > BNXT_RX_PAGE_SIZE. This makes the data page pool the same as the header pool. Tested with iperf3 with a small (64 entries) rx ring to encourage buffer circulation. Fixes: cd1fafe7da1f ("eth: bnxt: add support rx side device memory TCP") Reviewed-by: Michael Chan Signed-off-by: David Wei Link: https://patch.msgid.link/20250812182907.1540755-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 76a4c5ae8000..2800a90fba1f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -926,15 +926,21 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, static netmem_ref __bnxt_alloc_rx_netmem(struct bnxt *bp, dma_addr_t *mapping, struct bnxt_rx_ring_info *rxr, + unsigned int *offset, gfp_t gfp) { netmem_ref netmem; - netmem = page_pool_alloc_netmems(rxr->page_pool, gfp); + if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) { + netmem = page_pool_alloc_frag_netmem(rxr->page_pool, offset, BNXT_RX_PAGE_SIZE, gfp); + } else { + netmem = page_pool_alloc_netmems(rxr->page_pool, gfp); + *offset = 0; + } if (!netmem) return 0; - *mapping = page_pool_get_dma_addr_netmem(netmem); + *mapping = page_pool_get_dma_addr_netmem(netmem) + *offset; return netmem; } @@ -1029,7 +1035,7 @@ static int bnxt_alloc_rx_netmem(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, dma_addr_t mapping; netmem_ref netmem; - netmem = __bnxt_alloc_rx_netmem(bp, &mapping, rxr, gfp); + netmem = __bnxt_alloc_rx_netmem(bp, &mapping, rxr, &offset, gfp); if (!netmem) return -ENOMEM; From 87c6efc5ce9c126ae4a781bc04504b83780e3650 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 12 Aug 2025 18:40:29 +0200 Subject: [PATCH 275/279] net/sched: ets: use old 'nbands' while purging unused classes Shuang reported sch_ets test-case [1] crashing in ets_class_qlen_notify() after recent changes from Lion [2]. The problem is: in ets_qdisc_change() we purge unused DWRR queues; the value of 'q->nbands' is the new one, and the cleanup should be done with the old one. The problem is here since my first attempts to fix ets_qdisc_change(), but it surfaced again after the recent qdisc len accounting fixes. Fix it purging idle DWRR queues before assigning a new value of 'q->nbands', so that all purge operations find a consistent configuration: - old 'q->nbands' because it's needed by ets_class_find() - old 'q->nstrict' because it's needed by ets_class_is_strict() BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 62 UID: 0 PID: 39457 Comm: tc Kdump: loaded Not tainted 6.12.0-116.el10.x86_64 #1 PREEMPT(voluntary) Hardware name: Dell Inc. PowerEdge R640/06DKY5, BIOS 2.12.2 07/09/2021 RIP: 0010:__list_del_entry_valid_or_report+0x4/0x80 Code: ff 4c 39 c7 0f 84 39 19 8e ff b8 01 00 00 00 c3 cc cc cc cc 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa <48> 8b 17 48 8b 4f 08 48 85 d2 0f 84 56 19 8e ff 48 85 c9 0f 84 ab RSP: 0018:ffffba186009f400 EFLAGS: 00010202 RAX: 00000000000000d6 RBX: 0000000000000000 RCX: 0000000000000004 RDX: ffff9f0fa29b69c0 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffffffc12c2400 R08: 0000000000000008 R09: 0000000000000004 R10: ffffffffffffffff R11: 0000000000000004 R12: 0000000000000000 R13: ffff9f0f8cfe0000 R14: 0000000000100005 R15: 0000000000000000 FS: 00007f2154f37480(0000) GS:ffff9f269c1c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001530be001 CR4: 00000000007726f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ets_class_qlen_notify+0x65/0x90 [sch_ets] qdisc_tree_reduce_backlog+0x74/0x110 ets_qdisc_change+0x630/0xa40 [sch_ets] __tc_modify_qdisc.constprop.0+0x216/0x7f0 tc_modify_qdisc+0x7c/0x120 rtnetlink_rcv_msg+0x145/0x3f0 netlink_rcv_skb+0x53/0x100 netlink_unicast+0x245/0x390 netlink_sendmsg+0x21b/0x470 ____sys_sendmsg+0x39d/0x3d0 ___sys_sendmsg+0x9a/0xe0 __sys_sendmsg+0x7a/0xd0 do_syscall_64+0x7d/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f2155114084 Code: 89 02 b8 ff ff ff ff eb bb 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 80 3d 25 f0 0c 00 00 74 13 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 89 54 24 1c 48 89 RSP: 002b:00007fff1fd7a988 EFLAGS: 00000202 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000560ec063e5e0 RCX: 00007f2155114084 RDX: 0000000000000000 RSI: 00007fff1fd7a9f0 RDI: 0000000000000003 RBP: 00007fff1fd7aa60 R08: 0000000000000010 R09: 000000000000003f R10: 0000560ee9b3a010 R11: 0000000000000202 R12: 00007fff1fd7aae0 R13: 000000006891ccde R14: 0000560ec063e5e0 R15: 00007fff1fd7aad0 [1] https://lore.kernel.org/netdev/e08c7f4a6882f260011909a868311c6e9b54f3e4.1639153474.git.dcaratti@redhat.com/ [2] https://lore.kernel.org/netdev/d912cbd7-193b-4269-9857-525bee8bbb6a@gmail.com/ Cc: stable@vger.kernel.org Fixes: 103406b38c60 ("net/sched: Always pass notifications when child class becomes empty") Fixes: c062f2a0b04d ("net/sched: sch_ets: don't remove idle classes from the round-robin list") Fixes: dcc68b4d8084 ("net: sch_ets: Add a new Qdisc") Reported-by: Li Shuang Closes: https://issues.redhat.com/browse/RHEL-108026 Reviewed-by: Petr Machata Co-developed-by: Ivan Vecera Signed-off-by: Ivan Vecera Signed-off-by: Davide Caratti Link: https://patch.msgid.link/7928ff6d17db47a2ae7cc205c44777b1f1950545.1755016081.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/sched/sch_ets.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 037f764822b9..82635dd2cfa5 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -651,6 +651,12 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); + for (i = nbands; i < oldbands; i++) { + if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) + list_del_init(&q->classes[i].alist); + qdisc_purge_queue(q->classes[i].qdisc); + } + WRITE_ONCE(q->nbands, nbands); for (i = nstrict; i < q->nstrict; i++) { if (q->classes[i].qdisc->q.qlen) { @@ -658,11 +664,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->classes[i].deficit = quanta[i]; } } - for (i = q->nbands; i < oldbands; i++) { - if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) - list_del_init(&q->classes[i].alist); - qdisc_purge_queue(q->classes[i].qdisc); - } WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); From 774a2ae6617b30a4dcc7ebaf178ef05da05b2a47 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 12 Aug 2025 18:40:30 +0200 Subject: [PATCH 276/279] selftests: net/forwarding: test purge of active DWRR classes Extend sch_ets.sh to add a reproducer for problematic list deletions when active DWRR class are purged by ets_qdisc_change() [1] [2]. [1] https://lore.kernel.org/netdev/e08c7f4a6882f260011909a868311c6e9b54f3e4.1639153474.git.dcaratti@redhat.com/ [2] https://lore.kernel.org/netdev/f3b9bacc73145f265c19ab80785933da5b7cbdec.1754581577.git.dcaratti@redhat.com/ Suggested-by: Victor Nogueira Signed-off-by: Davide Caratti Acked-by: Victor Nogueira Link: https://patch.msgid.link/489497cb781af7389011ca1591fb702a7391f5e7.1755016081.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/sch_ets.sh | 1 + tools/testing/selftests/net/forwarding/sch_ets_tests.sh | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index 1f6f53e284b5..6269d5e23487 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -11,6 +11,7 @@ ALL_TESTS=" ets_test_strict ets_test_mixed ets_test_dwrr + ets_test_plug classifier_mode ets_test_strict ets_test_mixed diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index 08240d3e3c87..79d837a2868a 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -224,3 +224,11 @@ ets_test_dwrr() ets_set_dwrr_two_bands xfail_on_slow ets_dwrr_test_01 } + +ets_test_plug() +{ + ets_change_qdisc $put 2 "3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" "1514 1514" + tc qdisc add dev $put handle 20: parent 10:4 plug + start_traffic_pktsize 100 $h1.10 192.0.2.1 192.0.2.2 00:c1:a0:c1:a0:00 "-c 1" + ets_qdisc_setup $put 2 +} From 52565a935213cd6a8662ddb8efe5b4219343a25d Mon Sep 17 00:00:00 2001 From: Sven Stegemann Date: Tue, 12 Aug 2025 21:18:03 +0200 Subject: [PATCH 277/279] net: kcm: Fix race condition in kcm_unattach() syzbot found a race condition when kcm_unattach(psock) and kcm_release(kcm) are executed at the same time. kcm_unattach() is missing a check of the flag kcm->tx_stopped before calling queue_work(). If the kcm has a reserved psock, kcm_unattach() might get executed between cancel_work_sync() and unreserve_psock() in kcm_release(), requeuing kcm->tx_work right before kcm gets freed in kcm_done(). Remove kcm->tx_stopped and replace it by the less error-prone disable_work_sync(). Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+e62c9db591c30e174662@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e62c9db591c30e174662 Reported-by: syzbot+d199b52665b6c3069b94@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d199b52665b6c3069b94 Reported-by: syzbot+be6b1fdfeae512726b4e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=be6b1fdfeae512726b4e Signed-off-by: Sven Stegemann Link: https://patch.msgid.link/20250812191810.27777-1-sven@stegemann.de Signed-off-by: Jakub Kicinski --- include/net/kcm.h | 1 - net/kcm/kcmsock.c | 10 ++-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/include/net/kcm.h b/include/net/kcm.h index 441e993be634..d9c35e71ecea 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -71,7 +71,6 @@ struct kcm_sock { struct list_head wait_psock_list; struct sk_buff *seq_skb; struct mutex tx_mutex; - u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ bool tx_wait; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index a4971e6fa943..b4f01cb07561 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -430,7 +430,7 @@ static void psock_write_space(struct sock *sk) /* Check if the socket is reserved so someone is waiting for sending. */ kcm = psock->tx_kcm; - if (kcm && !unlikely(kcm->tx_stopped)) + if (kcm) queue_work(kcm_wq, &kcm->tx_work); spin_unlock_bh(&mux->lock); @@ -1693,12 +1693,6 @@ static int kcm_release(struct socket *sock) */ __skb_queue_purge(&sk->sk_write_queue); - /* Set tx_stopped. This is checked when psock is bound to a kcm and we - * get a writespace callback. This prevents further work being queued - * from the callback (unbinding the psock occurs after canceling work. - */ - kcm->tx_stopped = 1; - release_sock(sk); spin_lock_bh(&mux->lock); @@ -1714,7 +1708,7 @@ static int kcm_release(struct socket *sock) /* Cancel work. After this point there should be no outside references * to the kcm socket. */ - cancel_work_sync(&kcm->tx_work); + disable_work_sync(&kcm->tx_work); lock_sock(sk); psock = kcm->tx_psock; From e2f9ae91619add9884428d095c3c630b6b120a61 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 14 Aug 2025 11:38:58 +0900 Subject: [PATCH 278/279] MAINTAINERS: Remove bouncing kprobes maintainer The kprobes MAINTAINERS entry includes anil.s.keshavamurthy@intel.com. That address is bouncing. Remove it. This still leaves three other listed maintainers. Link: https://lore.kernel.org/all/20250808180124.7DDE2ECD@davehans-spike.ostc.intel.com/ Signed-off-by: Dave Hansen Cc: Naveen N Rao Cc: "David S. Miller" Cc: Masami Hiramatsu Cc: linux-trace-kernel@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fe168477caa4..a2bc2bb91970 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13686,7 +13686,6 @@ F: scripts/Makefile.kmsan KPROBES M: Naveen N Rao -M: Anil S Keshavamurthy M: "David S. Miller" M: Masami Hiramatsu L: linux-kernel@vger.kernel.org From 4faff70959d51078f9ee8372f8cff0d7045e4114 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 11 Aug 2025 17:29:31 +0800 Subject: [PATCH 279/279] net: usb: asix_devices: add phy_mask for ax88772 mdio bus Without setting phy_mask for ax88772 mdio bus, current driver may create at most 32 mdio phy devices with phy address range from 0x00 ~ 0x1f. DLink DUB-E100 H/W Ver B1 is such a device. However, only one main phy device will bind to net phy driver. This is creating issue during system suspend/resume since phy_polling_mode() in phy_state_machine() will directly deference member of phydev->drv for non-main phy devices. Then NULL pointer dereference issue will occur. Due to only external phy or internal phy is necessary, add phy_mask for ax88772 mdio bus to workarnoud the issue. Closes: https://lore.kernel.org/netdev/20250806082931.3289134-1-xu.yang_2@nxp.com Fixes: e532a096be0e ("net: usb: asix: ax88772: add phylib support") Cc: stable@vger.kernel.org Signed-off-by: Xu Yang Tested-by: Oleksij Rempel Reviewed-by: Oleksij Rempel Link: https://patch.msgid.link/20250811092931.860333-1-xu.yang_2@nxp.com Signed-off-by: Paolo Abeni --- drivers/net/usb/asix_devices.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 9b0318fb50b5..d9f5942ccc44 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -676,6 +676,7 @@ static int ax88772_init_mdio(struct usbnet *dev) priv->mdio->read = &asix_mdio_bus_read; priv->mdio->write = &asix_mdio_bus_write; priv->mdio->name = "Asix MDIO Bus"; + priv->mdio->phy_mask = ~(BIT(priv->phy_addr) | BIT(AX_EMBD_PHY_ADDR)); /* mii bus name is usb-- */ snprintf(priv->mdio->id, MII_BUS_ID_SIZE, "usb-%03d:%03d", dev->udev->bus->busnum, dev->udev->devnum);