From 595b2c5917d89f5aeeecb4c74e29c0ecfabdc7e9 Mon Sep 17 00:00:00 2001 From: Keguang Zhang Date: Wed, 30 Aug 2023 21:35:05 +0800 Subject: [PATCH 001/415] MIPS: loongson32: Remove dma.h and nand.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 7b3415f581c7 ("MIPS: Loongson32: Remove unused platform devices"), struct plat_ls1x_dma and plat_ls1x_nand are unused. Then, dma.h and nand.h are useless. Therefore, remove these useless header files. Signed-off-by: Keguang Zhang Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/mach-loongson32/dma.h | 21 --------------- arch/mips/include/asm/mach-loongson32/nand.h | 26 ------------------- .../include/asm/mach-loongson32/platform.h | 3 --- arch/mips/loongson32/common/platform.c | 2 -- arch/mips/loongson32/ls1b/board.c | 2 -- 5 files changed, 54 deletions(-) delete mode 100644 arch/mips/include/asm/mach-loongson32/dma.h delete mode 100644 arch/mips/include/asm/mach-loongson32/nand.h diff --git a/arch/mips/include/asm/mach-loongson32/dma.h b/arch/mips/include/asm/mach-loongson32/dma.h deleted file mode 100644 index e917b3ccb2c2..000000000000 --- a/arch/mips/include/asm/mach-loongson32/dma.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2015 Zhang, Keguang - * - * Loongson 1 NAND platform support. - */ - -#ifndef __ASM_MACH_LOONGSON32_DMA_H -#define __ASM_MACH_LOONGSON32_DMA_H - -#define LS1X_DMA_CHANNEL0 0 -#define LS1X_DMA_CHANNEL1 1 -#define LS1X_DMA_CHANNEL2 2 - -struct plat_ls1x_dma { - int nr_channels; -}; - -extern struct plat_ls1x_dma ls1b_dma_pdata; - -#endif /* __ASM_MACH_LOONGSON32_DMA_H */ diff --git a/arch/mips/include/asm/mach-loongson32/nand.h b/arch/mips/include/asm/mach-loongson32/nand.h deleted file mode 100644 index aaf5ed19d78d..000000000000 --- a/arch/mips/include/asm/mach-loongson32/nand.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2015 Zhang, Keguang - * - * Loongson 1 NAND platform support. - */ - -#ifndef __ASM_MACH_LOONGSON32_NAND_H -#define __ASM_MACH_LOONGSON32_NAND_H - -#include -#include - -struct plat_ls1x_nand { - struct mtd_partition *parts; - unsigned int nr_parts; - - int hold_cycle; - int wait_cycle; -}; - -extern struct plat_ls1x_nand ls1b_nand_pdata; - -bool ls1x_dma_filter_fn(struct dma_chan *chan, void *param); - -#endif /* __ASM_MACH_LOONGSON32_NAND_H */ diff --git a/arch/mips/include/asm/mach-loongson32/platform.h b/arch/mips/include/asm/mach-loongson32/platform.h index 2cdcfb5f6012..f74292b13bc3 100644 --- a/arch/mips/include/asm/mach-loongson32/platform.h +++ b/arch/mips/include/asm/mach-loongson32/platform.h @@ -8,9 +8,6 @@ #include -#include -#include - extern struct platform_device ls1x_uart_pdev; extern struct platform_device ls1x_eth0_pdev; extern struct platform_device ls1x_eth1_pdev; diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c index 8075590a9f83..623eb4bc7b41 100644 --- a/arch/mips/loongson32/common/platform.c +++ b/arch/mips/loongson32/common/platform.c @@ -15,8 +15,6 @@ #include #include -#include -#include /* 8250/16550 compatible UART */ #define LS1X_UART(_id) \ diff --git a/arch/mips/loongson32/ls1b/board.c b/arch/mips/loongson32/ls1b/board.c index fed8d432ef20..fe115bdcb22c 100644 --- a/arch/mips/loongson32/ls1b/board.c +++ b/arch/mips/loongson32/ls1b/board.c @@ -8,8 +8,6 @@ #include #include -#include -#include #include static const struct gpio_led ls1x_gpio_leds[] __initconst = { From 04318868abaa82f7c6e9e0ce323aa5460b7fc9da Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 Aug 2023 17:11:55 +0200 Subject: [PATCH 002/415] mips: dts: ingenic: Remove unneeded probe-type properties The "probe-type" property was only needed when used with the (long obsolete) "direct-mapped" compatible value. Signed-off-by: Geert Uytterhoeven Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/dts/ingenic/jz4725b.dtsi | 1 - arch/mips/boot/dts/ingenic/jz4770.dtsi | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/mips/boot/dts/ingenic/jz4725b.dtsi b/arch/mips/boot/dts/ingenic/jz4725b.dtsi index acbbe8c4664c..c5c5a094c37d 100644 --- a/arch/mips/boot/dts/ingenic/jz4725b.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4725b.dtsi @@ -366,7 +366,6 @@ bch: ecc-controller@130d0000 { rom: memory@1fc00000 { compatible = "mtd-rom"; - probe-type = "map_rom"; reg = <0x1fc00000 0x2000>; bank-width = <4>; diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi index 9c0099919db7..504e895e916e 100644 --- a/arch/mips/boot/dts/ingenic/jz4770.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi @@ -461,7 +461,6 @@ usb_otg: usb@13440000 { rom: memory@1fc00000 { compatible = "mtd-rom"; - probe-type = "map_rom"; reg = <0x1fc00000 0x2000>; bank-width = <4>; From b44ae980e9d026c41101d97cf96c0eb09d490b35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sun, 17 Sep 2023 13:37:53 +0300 Subject: [PATCH 003/415] mips: dts: ralink: mt7621: define each reset as an item MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each item of the resets property should define a reset. Split the item with two resets on the ethernet node into two separate items. Sort the items of the clocks property to the same line as a trivial change. Signed-off-by: Arınç ÜNAL Acked-by: Sergio Paracuellos Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/dts/ralink/mt7621.dtsi | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/mips/boot/dts/ralink/mt7621.dtsi b/arch/mips/boot/dts/ralink/mt7621.dtsi index 7caed0d14f11..35a10258f235 100644 --- a/arch/mips/boot/dts/ralink/mt7621.dtsi +++ b/arch/mips/boot/dts/ralink/mt7621.dtsi @@ -300,14 +300,13 @@ ethernet: ethernet@1e100000 { compatible = "mediatek,mt7621-eth"; reg = <0x1e100000 0x10000>; - clocks = <&sysc MT7621_CLK_FE>, - <&sysc MT7621_CLK_ETH>; + clocks = <&sysc MT7621_CLK_FE>, <&sysc MT7621_CLK_ETH>; clock-names = "fe", "ethif"; #address-cells = <1>; #size-cells = <0>; - resets = <&sysc MT7621_RST_FE &sysc MT7621_RST_ETH>; + resets = <&sysc MT7621_RST_FE>, <&sysc MT7621_RST_ETH>; reset-names = "fe", "eth"; interrupt-parent = <&gic>; From 70f8cd94f2bc7da7577f7751d03e568006accb97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Mon, 18 Sep 2023 10:59:15 +0300 Subject: [PATCH 004/415] mips: dts: ralink: mt7621: rename to GnuBee GB-PC1 and GnuBee GB-PC2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename GB-PC1 to GnuBee GB-PC1, and GB-PC2 to GnuBee GB-PC2 to include brand and model name. Signed-off-by: Arınç ÜNAL Acked-by: Sergio Paracuellos Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts | 2 +- arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts index 129b6710b699..f9c262cc2e96 100644 --- a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts +++ b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc1.dts @@ -8,7 +8,7 @@ / { compatible = "gnubee,gb-pc1", "mediatek,mt7621-soc"; - model = "GB-PC1"; + model = "GnuBee GB-PC1"; memory@0 { device_type = "memory"; diff --git a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts index f810cd10f4f4..b281e13f22ed 100644 --- a/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts +++ b/arch/mips/boot/dts/ralink/mt7621-gnubee-gb-pc2.dts @@ -8,7 +8,7 @@ / { compatible = "gnubee,gb-pc2", "mediatek,mt7621-soc"; - model = "GB-PC2"; + model = "GnuBee GB-PC2"; memory@0 { device_type = "memory"; From 4d0f332a6fb65aac2fa98e61bbed4ac9dfcf0a01 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 19 Sep 2023 19:05:01 +0800 Subject: [PATCH 005/415] MIPS: Remove dead code in relocate_new_kernel There are two adjacent "b" instructions, the second one is unreachable, it is dead code, just remove it. Signed-off-by: Tiezhu Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/relocate_kernel.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index f5b2ef979b43..8f0a7263a9d6 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -66,7 +66,6 @@ copy_word: LONG_ADDIU s6, s6, -1 beq s6, zero, process_entry b copy_word - b process_entry done: #ifdef CONFIG_SMP From 83767a67e7b6a0291cde5681ec7e3708f3f8f877 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 10 Oct 2023 16:54:34 +0800 Subject: [PATCH 006/415] MIPS: KVM: Fix a build warning about variable set but not used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 411740f5422a ("KVM: MIPS/MMU: Implement KVM_CAP_SYNC_MMU") old_pte is no longer used in kvm_mips_map_page(). So remove it to fix a build warning about variable set but not used: arch/mips/kvm/mmu.c: In function 'kvm_mips_map_page': >> arch/mips/kvm/mmu.c:701:29: warning: variable 'old_pte' set but not used [-Wunused-but-set-variable] 701 | pte_t *ptep, entry, old_pte; | ^~~~~~~ Cc: stable@vger.kernel.org Fixes: 411740f5422a960 ("KVM: MIPS/MMU: Implement KVM_CAP_SYNC_MMU") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310070530.aARZCSfh-lkp@intel.com/ Signed-off-by: Huacai Chen Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer --- arch/mips/kvm/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 7b2ac1319d70..467ee6b95ae1 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -592,7 +592,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, gfn_t gfn = gpa >> PAGE_SHIFT; int srcu_idx, err; kvm_pfn_t pfn; - pte_t *ptep, entry, old_pte; + pte_t *ptep, entry; bool writeable; unsigned long prot_bits; unsigned long mmu_seq; @@ -664,7 +664,6 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, entry = pfn_pte(pfn, __pgprot(prot_bits)); /* Write the PTE */ - old_pte = *ptep; set_pte(ptep, entry); err = 0; From f2f12cf4e5f6173febc8db8c3e533141b0e667a4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 10 Oct 2023 09:34:05 -0500 Subject: [PATCH 007/415] MIPS: lantiq: Fix pcibios_plat_dev_init() "no previous prototype" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After bbd8810d3998 ("PCI: Remove unused includes and superfluous struct declaration"), no longer includes , which provides the extern declarations for pcibios_plat_dev_init() and pcibios_map_irq() via . This results in these new warnings: arch/mips/pci/fixup-lantiq.c:13:5: warning: no previous prototype for 'pcibios_plat_dev_init' [-Wmissing-prototypes] arch/mips/pci/fixup-lantiq.c:24:5: warning: no previous prototype for 'pcibios_map_irq' [-Wmissing-prototypes] Include directly to get these declarations. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310070445.tzRBNYRC-lkp@intel.com/ Signed-off-by: Bjorn Helgaas Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer --- arch/mips/pci/fixup-lantiq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c index 105569c1b712..8f5fb98b3984 100644 --- a/arch/mips/pci/fixup-lantiq.c +++ b/arch/mips/pci/fixup-lantiq.c @@ -6,6 +6,7 @@ #include #include +#include int (*ltq_pci_plat_arch_init)(struct pci_dev *dev) = NULL; int (*ltq_pci_plat_dev_init)(struct pci_dev *dev) = NULL; From aaf1f08ac0fb435100fa459a6ee5b292f3786dff Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 10 Oct 2023 09:34:06 -0500 Subject: [PATCH 008/415] MIPS: lantiq: Remove unnecessary include of MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/mips/pci/fixup-lantiq.c requires nothing from , so remove the include of it. Signed-off-by: Bjorn Helgaas Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer --- arch/mips/pci/fixup-lantiq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c index 8f5fb98b3984..13009666204f 100644 --- a/arch/mips/pci/fixup-lantiq.c +++ b/arch/mips/pci/fixup-lantiq.c @@ -4,7 +4,6 @@ * Copyright (C) 2012 John Crispin */ -#include #include #include From d5f4e1f2fcd85c3c361e73fa3c8f1b8fdb55ae0a Mon Sep 17 00:00:00 2001 From: Peter Lafreniere Date: Mon, 18 Sep 2023 17:57:10 +0000 Subject: [PATCH 009/415] arch: mips: remove ReiserFS from defconfig ReiserFS has been deprecated for a year and a half, yet is still built as part of a defconfig kernel. According to commit eb103a51640e ("reiserfs: Deprecate reiserfs"), the filesystem is slated to be removed in 2025. Remove it from the defconfig profiles now, as part of its deprecation process. Signed-off-by: Peter Lafreniere Signed-off-by: Thomas Bogendoerfer --- arch/mips/configs/fuloong2e_defconfig | 1 - arch/mips/configs/jazz_defconfig | 4 ---- arch/mips/configs/lemote2f_defconfig | 3 --- arch/mips/configs/malta_defconfig | 5 ----- arch/mips/configs/malta_kvm_defconfig | 5 ----- arch/mips/configs/maltaup_xpa_defconfig | 5 ----- arch/mips/configs/rm200_defconfig | 4 ---- 7 files changed, 27 deletions(-) diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index 1843468f84a3..00329bb5de5a 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -177,7 +177,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=m CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=y CONFIG_ISO9660_FS=m diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index fdf374574105..65adb538030d 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -70,10 +70,6 @@ CONFIG_FRAMEBUFFER_CONSOLE=y # CONFIG_HWMON is not set CONFIG_EXT2_FS=m CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 83d9a8ff4270..38f17b658421 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -229,9 +229,6 @@ CONFIG_EXT2_FS=m CONFIG_EXT3_FS=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_XFS_FS=m diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index ae1a7793e810..6f8046024557 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -317,11 +317,6 @@ CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index c07e30f63d8b..16a91eeff67f 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -323,11 +323,6 @@ CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 0a5701020d3f..264aba29ea4f 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -323,11 +323,6 @@ CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 5c5e2186210c..08e1c1f2f4de 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -310,10 +310,6 @@ CONFIG_USB_LD=m CONFIG_USB_TEST=m CONFIG_EXT2_FS=m CONFIG_EXT3_FS=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m From b7a10da0268a5d255bebe1d8697dc3f0b26fc3e2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 Sep 2023 08:15:22 +0200 Subject: [PATCH 010/415] serial: 8250: remove AR7 support AR7 is going to be removed from the Kernel, so remove its type definition from 8250 code. As with previous removals, I checked with Debian Code Search that 'PORT_AR7' is not used in userspace. Signed-off-by: Wolfram Sang Acked-by: Florian Fainelli Acked-by: Greg Kroah-Hartman Signed-off-by: Thomas Bogendoerfer --- drivers/tty/serial/8250/8250_port.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 141627370aab..0e3a9ec7af94 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -170,13 +170,6 @@ static const struct serial8250_config uart_config[] = { .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO, }, - [PORT_AR7] = { - .name = "AR7", - .fifo_size = 16, - .tx_loadsz = 16, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00, - .flags = UART_CAP_FIFO /* | UART_CAP_AFE */, - }, [PORT_U6_16550A] = { .name = "U6_16550A", .fifo_size = 64, From f10672800876be6e60b0b11d5939fcc9ab1b7050 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 Sep 2023 08:15:23 +0200 Subject: [PATCH 011/415] mtd: parsers: ar7: remove support AR7 is going to be removed from the Kernel, so remove its support for MTD. Signed-off-by: Wolfram Sang Acked-by: Florian Fainelli Acked-by: Miquel Raynal Signed-off-by: Thomas Bogendoerfer --- arch/arm/configs/pxa_defconfig | 1 - drivers/mtd/parsers/Kconfig | 5 -- drivers/mtd/parsers/Makefile | 1 - drivers/mtd/parsers/ar7part.c | 129 --------------------------------- 4 files changed, 136 deletions(-) delete mode 100644 drivers/mtd/parsers/ar7part.c diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 23c131b0854b..9e81b1849e4c 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -100,7 +100,6 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_CONNECTOR=y CONFIG_MTD=y -CONFIG_MTD_AR7_PARTS=m CONFIG_MTD_CMDLINE_PARTS=m CONFIG_MTD_OF_PARTS=m CONFIG_MTD_AFS_PARTS=m diff --git a/drivers/mtd/parsers/Kconfig b/drivers/mtd/parsers/Kconfig index 60738edcd5d5..da03ab6efe04 100644 --- a/drivers/mtd/parsers/Kconfig +++ b/drivers/mtd/parsers/Kconfig @@ -1,9 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -config MTD_AR7_PARTS - tristate "TI AR7 partitioning parser" - help - TI AR7 partitioning parser support - config MTD_BCM47XX_PARTS tristate "BCM47XX partitioning parser" depends on BCM47XX || ARCH_BCM_5301X diff --git a/drivers/mtd/parsers/Makefile b/drivers/mtd/parsers/Makefile index 0e70b621a1d8..9b00c62b837a 100644 --- a/drivers/mtd/parsers/Makefile +++ b/drivers/mtd/parsers/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_MTD_AR7_PARTS) += ar7part.o obj-$(CONFIG_MTD_BCM47XX_PARTS) += bcm47xxpart.o obj-$(CONFIG_MTD_BCM63XX_PARTS) += bcm63xxpart.o obj-$(CONFIG_MTD_BRCM_U_BOOT) += brcm_u-boot.o diff --git a/drivers/mtd/parsers/ar7part.c b/drivers/mtd/parsers/ar7part.c deleted file mode 100644 index 8cd683711ac6..000000000000 --- a/drivers/mtd/parsers/ar7part.c +++ /dev/null @@ -1,129 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright © 2007 Eugene Konev - * - * TI AR7 flash partition table. - * Based on ar7 map by Felix Fietkau - */ - -#include -#include - -#include -#include -#include -#include - -#include - -#define AR7_PARTS 4 -#define ROOT_OFFSET 0xe0000 - -#define LOADER_MAGIC1 le32_to_cpu(0xfeedfa42) -#define LOADER_MAGIC2 le32_to_cpu(0xfeed1281) - -struct ar7_bin_rec { - unsigned int checksum; - unsigned int length; - unsigned int address; -}; - -static int create_mtd_partitions(struct mtd_info *master, - const struct mtd_partition **pparts, - struct mtd_part_parser_data *data) -{ - struct ar7_bin_rec header; - unsigned int offset; - size_t len; - unsigned int pre_size = master->erasesize, post_size = 0; - unsigned int root_offset = ROOT_OFFSET; - - int retries = 10; - struct mtd_partition *ar7_parts; - - ar7_parts = kcalloc(AR7_PARTS, sizeof(*ar7_parts), GFP_KERNEL); - if (!ar7_parts) - return -ENOMEM; - ar7_parts[0].name = "loader"; - ar7_parts[0].offset = 0; - ar7_parts[0].size = master->erasesize; - ar7_parts[0].mask_flags = MTD_WRITEABLE; - - ar7_parts[1].name = "config"; - ar7_parts[1].offset = 0; - ar7_parts[1].size = master->erasesize; - ar7_parts[1].mask_flags = 0; - - do { /* Try 10 blocks starting from master->erasesize */ - offset = pre_size; - mtd_read(master, offset, sizeof(header), &len, - (uint8_t *)&header); - if (!strncmp((char *)&header, "TIENV0.8", 8)) - ar7_parts[1].offset = pre_size; - if (header.checksum == LOADER_MAGIC1) - break; - if (header.checksum == LOADER_MAGIC2) - break; - pre_size += master->erasesize; - } while (retries--); - - pre_size = offset; - - if (!ar7_parts[1].offset) { - ar7_parts[1].offset = master->size - master->erasesize; - post_size = master->erasesize; - } - - switch (header.checksum) { - case LOADER_MAGIC1: - while (header.length) { - offset += sizeof(header) + header.length; - mtd_read(master, offset, sizeof(header), &len, - (uint8_t *)&header); - } - root_offset = offset + sizeof(header) + 4; - break; - case LOADER_MAGIC2: - while (header.length) { - offset += sizeof(header) + header.length; - mtd_read(master, offset, sizeof(header), &len, - (uint8_t *)&header); - } - root_offset = offset + sizeof(header) + 4 + 0xff; - root_offset &= ~(uint32_t)0xff; - break; - default: - printk(KERN_WARNING "Unknown magic: %08x\n", header.checksum); - break; - } - - mtd_read(master, root_offset, sizeof(header), &len, (u8 *)&header); - if (header.checksum != SQUASHFS_MAGIC) { - root_offset += master->erasesize - 1; - root_offset &= ~(master->erasesize - 1); - } - - ar7_parts[2].name = "linux"; - ar7_parts[2].offset = pre_size; - ar7_parts[2].size = master->size - pre_size - post_size; - ar7_parts[2].mask_flags = 0; - - ar7_parts[3].name = "rootfs"; - ar7_parts[3].offset = root_offset; - ar7_parts[3].size = master->size - root_offset - post_size; - ar7_parts[3].mask_flags = 0; - - *pparts = ar7_parts; - return AR7_PARTS; -} - -static struct mtd_part_parser ar7_parser = { - .parse_fn = create_mtd_partitions, - .name = "ar7part", -}; -module_mtd_part_parser(ar7_parser); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR( "Felix Fietkau , " - "Eugene Konev "); -MODULE_DESCRIPTION("MTD partitioning for TI AR7"); From 9a6c782158f703fa65c7d52e2156b7d4e7cd41f5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 Sep 2023 08:15:24 +0200 Subject: [PATCH 012/415] vlynq: remove bus driver There are no users with a vlynq_driver in the Kernel tree. Also, only the AR7 platform ever initialized a VLYNQ bus, but AR7 is going to be removed from the Kernel. OpenWRT had some out-of-tree drivers which they probably intended to upport, but AR7 devices are even there not supported anymore because they are "stuck with Kernel 3.18" [1]. This code can go. [1] https://openwrt.org/docs/techref/targets/ar7 Signed-off-by: Wolfram Sang Acked-by: Greg Kroah-Hartman Acked-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- MAINTAINERS | 7 - drivers/Kconfig | 2 - drivers/Makefile | 1 - drivers/vlynq/Kconfig | 21 -- drivers/vlynq/Makefile | 6 - drivers/vlynq/vlynq.c | 799 ----------------------------------------- include/linux/vlynq.h | 149 -------- 7 files changed, 985 deletions(-) delete mode 100644 drivers/vlynq/Kconfig delete mode 100644 drivers/vlynq/Makefile delete mode 100644 drivers/vlynq/vlynq.c delete mode 100644 include/linux/vlynq.h diff --git a/MAINTAINERS b/MAINTAINERS index 35977b269d5e..1cf130fce761 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22963,13 +22963,6 @@ W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git F: drivers/media/test-drivers/vivid/* -VLYNQ BUS -M: Florian Fainelli -L: openwrt-devel@lists.openwrt.org (subscribers-only) -S: Maintained -F: drivers/vlynq/vlynq.c -F: include/linux/vlynq.h - VM SOCKETS (AF_VSOCK) M: Stefano Garzarella L: virtualization@lists.linux-foundation.org diff --git a/drivers/Kconfig b/drivers/Kconfig index efb66e25fa2d..9c8b82ddebfe 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -135,8 +135,6 @@ source "drivers/uio/Kconfig" source "drivers/vfio/Kconfig" -source "drivers/vlynq/Kconfig" - source "drivers/virt/Kconfig" source "drivers/virtio/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 1bec7819a837..f7eeec79ef66 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -151,7 +151,6 @@ obj-$(CONFIG_BCMA) += bcma/ obj-$(CONFIG_VHOST_RING) += vhost/ obj-$(CONFIG_VHOST_IOTLB) += vhost/ obj-$(CONFIG_VHOST) += vhost/ -obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_GREYBUS) += greybus/ obj-$(CONFIG_COMEDI) += comedi/ obj-$(CONFIG_STAGING) += staging/ diff --git a/drivers/vlynq/Kconfig b/drivers/vlynq/Kconfig deleted file mode 100644 index e7f9492a0b04..000000000000 --- a/drivers/vlynq/Kconfig +++ /dev/null @@ -1,21 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -menu "TI VLYNQ" - depends on AR7 - -config VLYNQ - bool "TI VLYNQ bus support" - help - Support for Texas Instruments(R) VLYNQ bus. - The VLYNQ bus is a high-speed, serial and packetized - data bus which allows external peripherals of a SoC - to appear into the system's main memory. - - If unsure, say N - -config VLYNQ_DEBUG - bool "VLYNQ bus debug" - depends on VLYNQ && DEBUG_KERNEL - help - Turn on VLYNQ bus debugging. - -endmenu diff --git a/drivers/vlynq/Makefile b/drivers/vlynq/Makefile deleted file mode 100644 index d9ce5b2b5ce0..000000000000 --- a/drivers/vlynq/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for kernel vlynq drivers -# - -obj-$(CONFIG_VLYNQ) += vlynq.o diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c deleted file mode 100644 index 4af6615808cc..000000000000 --- a/drivers/vlynq/vlynq.c +++ /dev/null @@ -1,799 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2006, 2007 Eugene Konev - * - * Parts of the VLYNQ specification can be found here: - * http://www.ti.com/litv/pdf/sprue36a - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define VLYNQ_CTRL_PM_ENABLE 0x80000000 -#define VLYNQ_CTRL_CLOCK_INT 0x00008000 -#define VLYNQ_CTRL_CLOCK_DIV(x) (((x) & 7) << 16) -#define VLYNQ_CTRL_INT_LOCAL 0x00004000 -#define VLYNQ_CTRL_INT_ENABLE 0x00002000 -#define VLYNQ_CTRL_INT_VECTOR(x) (((x) & 0x1f) << 8) -#define VLYNQ_CTRL_INT2CFG 0x00000080 -#define VLYNQ_CTRL_RESET 0x00000001 - -#define VLYNQ_CTRL_CLOCK_MASK (0x7 << 16) - -#define VLYNQ_INT_OFFSET 0x00000014 -#define VLYNQ_REMOTE_OFFSET 0x00000080 - -#define VLYNQ_STATUS_LINK 0x00000001 -#define VLYNQ_STATUS_LERROR 0x00000080 -#define VLYNQ_STATUS_RERROR 0x00000100 - -#define VINT_ENABLE 0x00000100 -#define VINT_TYPE_EDGE 0x00000080 -#define VINT_LEVEL_LOW 0x00000040 -#define VINT_VECTOR(x) ((x) & 0x1f) -#define VINT_OFFSET(irq) (8 * ((irq) % 4)) - -#define VLYNQ_AUTONEGO_V2 0x00010000 - -struct vlynq_regs { - u32 revision; - u32 control; - u32 status; - u32 int_prio; - u32 int_status; - u32 int_pending; - u32 int_ptr; - u32 tx_offset; - struct vlynq_mapping rx_mapping[4]; - u32 chip; - u32 autonego; - u32 unused[6]; - u32 int_device[8]; -}; - -#ifdef CONFIG_VLYNQ_DEBUG -static void vlynq_dump_regs(struct vlynq_device *dev) -{ - int i; - - printk(KERN_DEBUG "VLYNQ local=%p remote=%p\n", - dev->local, dev->remote); - for (i = 0; i < 32; i++) { - printk(KERN_DEBUG "VLYNQ: local %d: %08x\n", - i + 1, ((u32 *)dev->local)[i]); - printk(KERN_DEBUG "VLYNQ: remote %d: %08x\n", - i + 1, ((u32 *)dev->remote)[i]); - } -} - -static void vlynq_dump_mem(u32 *base, int count) -{ - int i; - - for (i = 0; i < (count + 3) / 4; i++) { - if (i % 4 == 0) - printk(KERN_DEBUG "\nMEM[0x%04x]:", i * 4); - printk(KERN_DEBUG " 0x%08x", *(base + i)); - } - printk(KERN_DEBUG "\n"); -} -#endif - -/* Check the VLYNQ link status with a given device */ -static int vlynq_linked(struct vlynq_device *dev) -{ - int i; - - for (i = 0; i < 100; i++) - if (readl(&dev->local->status) & VLYNQ_STATUS_LINK) - return 1; - else - cpu_relax(); - - return 0; -} - -static void vlynq_reset(struct vlynq_device *dev) -{ - writel(readl(&dev->local->control) | VLYNQ_CTRL_RESET, - &dev->local->control); - - /* Wait for the devices to finish resetting */ - msleep(5); - - /* Remove reset bit */ - writel(readl(&dev->local->control) & ~VLYNQ_CTRL_RESET, - &dev->local->control); - - /* Give some time for the devices to settle */ - msleep(5); -} - -static void vlynq_irq_unmask(struct irq_data *d) -{ - struct vlynq_device *dev = irq_data_get_irq_chip_data(d); - int virq; - u32 val; - - BUG_ON(!dev); - virq = d->irq - dev->irq_start; - val = readl(&dev->remote->int_device[virq >> 2]); - val |= (VINT_ENABLE | virq) << VINT_OFFSET(virq); - writel(val, &dev->remote->int_device[virq >> 2]); -} - -static void vlynq_irq_mask(struct irq_data *d) -{ - struct vlynq_device *dev = irq_data_get_irq_chip_data(d); - int virq; - u32 val; - - BUG_ON(!dev); - virq = d->irq - dev->irq_start; - val = readl(&dev->remote->int_device[virq >> 2]); - val &= ~(VINT_ENABLE << VINT_OFFSET(virq)); - writel(val, &dev->remote->int_device[virq >> 2]); -} - -static int vlynq_irq_type(struct irq_data *d, unsigned int flow_type) -{ - struct vlynq_device *dev = irq_data_get_irq_chip_data(d); - int virq; - u32 val; - - BUG_ON(!dev); - virq = d->irq - dev->irq_start; - val = readl(&dev->remote->int_device[virq >> 2]); - switch (flow_type & IRQ_TYPE_SENSE_MASK) { - case IRQ_TYPE_EDGE_RISING: - case IRQ_TYPE_EDGE_FALLING: - case IRQ_TYPE_EDGE_BOTH: - val |= VINT_TYPE_EDGE << VINT_OFFSET(virq); - val &= ~(VINT_LEVEL_LOW << VINT_OFFSET(virq)); - break; - case IRQ_TYPE_LEVEL_HIGH: - val &= ~(VINT_TYPE_EDGE << VINT_OFFSET(virq)); - val &= ~(VINT_LEVEL_LOW << VINT_OFFSET(virq)); - break; - case IRQ_TYPE_LEVEL_LOW: - val &= ~(VINT_TYPE_EDGE << VINT_OFFSET(virq)); - val |= VINT_LEVEL_LOW << VINT_OFFSET(virq); - break; - default: - return -EINVAL; - } - writel(val, &dev->remote->int_device[virq >> 2]); - return 0; -} - -static void vlynq_local_ack(struct irq_data *d) -{ - struct vlynq_device *dev = irq_data_get_irq_chip_data(d); - u32 status = readl(&dev->local->status); - - pr_debug("%s: local status: 0x%08x\n", - dev_name(&dev->dev), status); - writel(status, &dev->local->status); -} - -static void vlynq_remote_ack(struct irq_data *d) -{ - struct vlynq_device *dev = irq_data_get_irq_chip_data(d); - u32 status = readl(&dev->remote->status); - - pr_debug("%s: remote status: 0x%08x\n", - dev_name(&dev->dev), status); - writel(status, &dev->remote->status); -} - -static irqreturn_t vlynq_irq(int irq, void *dev_id) -{ - struct vlynq_device *dev = dev_id; - u32 status; - int virq = 0; - - status = readl(&dev->local->int_status); - writel(status, &dev->local->int_status); - - if (unlikely(!status)) - spurious_interrupt(); - - while (status) { - if (status & 1) - do_IRQ(dev->irq_start + virq); - status >>= 1; - virq++; - } - - return IRQ_HANDLED; -} - -static struct irq_chip vlynq_irq_chip = { - .name = "vlynq", - .irq_unmask = vlynq_irq_unmask, - .irq_mask = vlynq_irq_mask, - .irq_set_type = vlynq_irq_type, -}; - -static struct irq_chip vlynq_local_chip = { - .name = "vlynq local error", - .irq_unmask = vlynq_irq_unmask, - .irq_mask = vlynq_irq_mask, - .irq_ack = vlynq_local_ack, -}; - -static struct irq_chip vlynq_remote_chip = { - .name = "vlynq local error", - .irq_unmask = vlynq_irq_unmask, - .irq_mask = vlynq_irq_mask, - .irq_ack = vlynq_remote_ack, -}; - -static int vlynq_setup_irq(struct vlynq_device *dev) -{ - u32 val; - int i, virq; - - if (dev->local_irq == dev->remote_irq) { - printk(KERN_ERR - "%s: local vlynq irq should be different from remote\n", - dev_name(&dev->dev)); - return -EINVAL; - } - - /* Clear local and remote error bits */ - writel(readl(&dev->local->status), &dev->local->status); - writel(readl(&dev->remote->status), &dev->remote->status); - - /* Now setup interrupts */ - val = VLYNQ_CTRL_INT_VECTOR(dev->local_irq); - val |= VLYNQ_CTRL_INT_ENABLE | VLYNQ_CTRL_INT_LOCAL | - VLYNQ_CTRL_INT2CFG; - val |= readl(&dev->local->control); - writel(VLYNQ_INT_OFFSET, &dev->local->int_ptr); - writel(val, &dev->local->control); - - val = VLYNQ_CTRL_INT_VECTOR(dev->remote_irq); - val |= VLYNQ_CTRL_INT_ENABLE; - val |= readl(&dev->remote->control); - writel(VLYNQ_INT_OFFSET, &dev->remote->int_ptr); - writel(val, &dev->remote->int_ptr); - writel(val, &dev->remote->control); - - for (i = dev->irq_start; i <= dev->irq_end; i++) { - virq = i - dev->irq_start; - if (virq == dev->local_irq) { - irq_set_chip_and_handler(i, &vlynq_local_chip, - handle_level_irq); - irq_set_chip_data(i, dev); - } else if (virq == dev->remote_irq) { - irq_set_chip_and_handler(i, &vlynq_remote_chip, - handle_level_irq); - irq_set_chip_data(i, dev); - } else { - irq_set_chip_and_handler(i, &vlynq_irq_chip, - handle_simple_irq); - irq_set_chip_data(i, dev); - writel(0, &dev->remote->int_device[virq >> 2]); - } - } - - if (request_irq(dev->irq, vlynq_irq, IRQF_SHARED, "vlynq", dev)) { - printk(KERN_ERR "%s: request_irq failed\n", - dev_name(&dev->dev)); - return -EAGAIN; - } - - return 0; -} - -static void vlynq_device_release(struct device *dev) -{ - struct vlynq_device *vdev = to_vlynq_device(dev); - kfree(vdev); -} - -static int vlynq_device_match(struct device *dev, - struct device_driver *drv) -{ - struct vlynq_device *vdev = to_vlynq_device(dev); - struct vlynq_driver *vdrv = to_vlynq_driver(drv); - struct vlynq_device_id *ids = vdrv->id_table; - - while (ids->id) { - if (ids->id == vdev->dev_id) { - vdev->divisor = ids->divisor; - vlynq_set_drvdata(vdev, ids); - printk(KERN_INFO "Driver found for VLYNQ " - "device: %08x\n", vdev->dev_id); - return 1; - } - printk(KERN_DEBUG "Not using the %08x VLYNQ device's driver" - " for VLYNQ device: %08x\n", ids->id, vdev->dev_id); - ids++; - } - return 0; -} - -static int vlynq_device_probe(struct device *dev) -{ - struct vlynq_device *vdev = to_vlynq_device(dev); - struct vlynq_driver *drv = to_vlynq_driver(dev->driver); - struct vlynq_device_id *id = vlynq_get_drvdata(vdev); - int result = -ENODEV; - - if (drv->probe) - result = drv->probe(vdev, id); - if (result) - put_device(dev); - return result; -} - -static void vlynq_device_remove(struct device *dev) -{ - struct vlynq_driver *drv = to_vlynq_driver(dev->driver); - - if (drv->remove) - drv->remove(to_vlynq_device(dev)); -} - -int __vlynq_register_driver(struct vlynq_driver *driver, struct module *owner) -{ - driver->driver.name = driver->name; - driver->driver.bus = &vlynq_bus_type; - return driver_register(&driver->driver); -} -EXPORT_SYMBOL(__vlynq_register_driver); - -void vlynq_unregister_driver(struct vlynq_driver *driver) -{ - driver_unregister(&driver->driver); -} -EXPORT_SYMBOL(vlynq_unregister_driver); - -/* - * A VLYNQ remote device can clock the VLYNQ bus master - * using a dedicated clock line. In that case, both the - * remove device and the bus master should have the same - * serial clock dividers configured. Iterate through the - * 8 possible dividers until we actually link with the - * device. - */ -static int __vlynq_try_remote(struct vlynq_device *dev) -{ - int i; - - vlynq_reset(dev); - for (i = dev->dev_id ? vlynq_rdiv2 : vlynq_rdiv8; dev->dev_id ? - i <= vlynq_rdiv8 : i >= vlynq_rdiv2; - dev->dev_id ? i++ : i--) { - - if (!vlynq_linked(dev)) - break; - - writel((readl(&dev->remote->control) & - ~VLYNQ_CTRL_CLOCK_MASK) | - VLYNQ_CTRL_CLOCK_INT | - VLYNQ_CTRL_CLOCK_DIV(i - vlynq_rdiv1), - &dev->remote->control); - writel((readl(&dev->local->control) - & ~(VLYNQ_CTRL_CLOCK_INT | - VLYNQ_CTRL_CLOCK_MASK)) | - VLYNQ_CTRL_CLOCK_DIV(i - vlynq_rdiv1), - &dev->local->control); - - if (vlynq_linked(dev)) { - printk(KERN_DEBUG - "%s: using remote clock divisor %d\n", - dev_name(&dev->dev), i - vlynq_rdiv1 + 1); - dev->divisor = i; - return 0; - } else { - vlynq_reset(dev); - } - } - - return -ENODEV; -} - -/* - * A VLYNQ remote device can be clocked by the VLYNQ bus - * master using a dedicated clock line. In that case, only - * the bus master configures the serial clock divider. - * Iterate through the 8 possible dividers until we - * actually get a link with the device. - */ -static int __vlynq_try_local(struct vlynq_device *dev) -{ - int i; - - vlynq_reset(dev); - - for (i = dev->dev_id ? vlynq_ldiv2 : vlynq_ldiv8; dev->dev_id ? - i <= vlynq_ldiv8 : i >= vlynq_ldiv2; - dev->dev_id ? i++ : i--) { - - writel((readl(&dev->local->control) & - ~VLYNQ_CTRL_CLOCK_MASK) | - VLYNQ_CTRL_CLOCK_INT | - VLYNQ_CTRL_CLOCK_DIV(i - vlynq_ldiv1), - &dev->local->control); - - if (vlynq_linked(dev)) { - printk(KERN_DEBUG - "%s: using local clock divisor %d\n", - dev_name(&dev->dev), i - vlynq_ldiv1 + 1); - dev->divisor = i; - return 0; - } else { - vlynq_reset(dev); - } - } - - return -ENODEV; -} - -/* - * When using external clocking method, serial clock - * is supplied by an external oscillator, therefore we - * should mask the local clock bit in the clock control - * register for both the bus master and the remote device. - */ -static int __vlynq_try_external(struct vlynq_device *dev) -{ - vlynq_reset(dev); - if (!vlynq_linked(dev)) - return -ENODEV; - - writel((readl(&dev->remote->control) & - ~VLYNQ_CTRL_CLOCK_INT), - &dev->remote->control); - - writel((readl(&dev->local->control) & - ~VLYNQ_CTRL_CLOCK_INT), - &dev->local->control); - - if (vlynq_linked(dev)) { - printk(KERN_DEBUG "%s: using external clock\n", - dev_name(&dev->dev)); - dev->divisor = vlynq_div_external; - return 0; - } - - return -ENODEV; -} - -static int __vlynq_enable_device(struct vlynq_device *dev) -{ - int result; - struct plat_vlynq_ops *ops = dev->dev.platform_data; - - result = ops->on(dev); - if (result) - return result; - - switch (dev->divisor) { - case vlynq_div_external: - case vlynq_div_auto: - /* When the device is brought from reset it should have clock - * generation negotiated by hardware. - * Check which device is generating clocks and perform setup - * accordingly */ - if (vlynq_linked(dev) && readl(&dev->remote->control) & - VLYNQ_CTRL_CLOCK_INT) { - if (!__vlynq_try_remote(dev) || - !__vlynq_try_local(dev) || - !__vlynq_try_external(dev)) - return 0; - } else { - if (!__vlynq_try_external(dev) || - !__vlynq_try_local(dev) || - !__vlynq_try_remote(dev)) - return 0; - } - break; - case vlynq_ldiv1: - case vlynq_ldiv2: - case vlynq_ldiv3: - case vlynq_ldiv4: - case vlynq_ldiv5: - case vlynq_ldiv6: - case vlynq_ldiv7: - case vlynq_ldiv8: - writel(VLYNQ_CTRL_CLOCK_INT | - VLYNQ_CTRL_CLOCK_DIV(dev->divisor - - vlynq_ldiv1), &dev->local->control); - writel(0, &dev->remote->control); - if (vlynq_linked(dev)) { - printk(KERN_DEBUG - "%s: using local clock divisor %d\n", - dev_name(&dev->dev), - dev->divisor - vlynq_ldiv1 + 1); - return 0; - } - break; - case vlynq_rdiv1: - case vlynq_rdiv2: - case vlynq_rdiv3: - case vlynq_rdiv4: - case vlynq_rdiv5: - case vlynq_rdiv6: - case vlynq_rdiv7: - case vlynq_rdiv8: - writel(0, &dev->local->control); - writel(VLYNQ_CTRL_CLOCK_INT | - VLYNQ_CTRL_CLOCK_DIV(dev->divisor - - vlynq_rdiv1), &dev->remote->control); - if (vlynq_linked(dev)) { - printk(KERN_DEBUG - "%s: using remote clock divisor %d\n", - dev_name(&dev->dev), - dev->divisor - vlynq_rdiv1 + 1); - return 0; - } - break; - } - - ops->off(dev); - return -ENODEV; -} - -int vlynq_enable_device(struct vlynq_device *dev) -{ - struct plat_vlynq_ops *ops = dev->dev.platform_data; - int result = -ENODEV; - - result = __vlynq_enable_device(dev); - if (result) - return result; - - result = vlynq_setup_irq(dev); - if (result) - ops->off(dev); - - dev->enabled = !result; - return result; -} -EXPORT_SYMBOL(vlynq_enable_device); - - -void vlynq_disable_device(struct vlynq_device *dev) -{ - struct plat_vlynq_ops *ops = dev->dev.platform_data; - - dev->enabled = 0; - free_irq(dev->irq, dev); - ops->off(dev); -} -EXPORT_SYMBOL(vlynq_disable_device); - -int vlynq_set_local_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping) -{ - int i; - - if (!dev->enabled) - return -ENXIO; - - writel(tx_offset, &dev->local->tx_offset); - for (i = 0; i < 4; i++) { - writel(mapping[i].offset, &dev->local->rx_mapping[i].offset); - writel(mapping[i].size, &dev->local->rx_mapping[i].size); - } - return 0; -} -EXPORT_SYMBOL(vlynq_set_local_mapping); - -int vlynq_set_remote_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping) -{ - int i; - - if (!dev->enabled) - return -ENXIO; - - writel(tx_offset, &dev->remote->tx_offset); - for (i = 0; i < 4; i++) { - writel(mapping[i].offset, &dev->remote->rx_mapping[i].offset); - writel(mapping[i].size, &dev->remote->rx_mapping[i].size); - } - return 0; -} -EXPORT_SYMBOL(vlynq_set_remote_mapping); - -int vlynq_set_local_irq(struct vlynq_device *dev, int virq) -{ - int irq = dev->irq_start + virq; - if (dev->enabled) - return -EBUSY; - - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - if (virq == dev->remote_irq) - return -EINVAL; - - dev->local_irq = virq; - - return 0; -} -EXPORT_SYMBOL(vlynq_set_local_irq); - -int vlynq_set_remote_irq(struct vlynq_device *dev, int virq) -{ - int irq = dev->irq_start + virq; - if (dev->enabled) - return -EBUSY; - - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - if (virq == dev->local_irq) - return -EINVAL; - - dev->remote_irq = virq; - - return 0; -} -EXPORT_SYMBOL(vlynq_set_remote_irq); - -static int vlynq_probe(struct platform_device *pdev) -{ - struct vlynq_device *dev; - struct resource *regs_res, *mem_res, *irq_res; - int len, result; - - regs_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); - if (!regs_res) - return -ENODEV; - - mem_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mem"); - if (!mem_res) - return -ENODEV; - - irq_res = platform_get_resource_byname(pdev, IORESOURCE_IRQ, "devirq"); - if (!irq_res) - return -ENODEV; - - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { - printk(KERN_ERR - "vlynq: failed to allocate device structure\n"); - return -ENOMEM; - } - - dev->id = pdev->id; - dev->dev.bus = &vlynq_bus_type; - dev->dev.parent = &pdev->dev; - dev_set_name(&dev->dev, "vlynq%d", dev->id); - dev->dev.platform_data = pdev->dev.platform_data; - dev->dev.release = vlynq_device_release; - - dev->regs_start = regs_res->start; - dev->regs_end = regs_res->end; - dev->mem_start = mem_res->start; - dev->mem_end = mem_res->end; - - len = resource_size(regs_res); - if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) { - printk(KERN_ERR "%s: Can't request vlynq registers\n", - dev_name(&dev->dev)); - result = -ENXIO; - goto fail_request; - } - - dev->local = ioremap(regs_res->start, len); - if (!dev->local) { - printk(KERN_ERR "%s: Can't remap vlynq registers\n", - dev_name(&dev->dev)); - result = -ENXIO; - goto fail_remap; - } - - dev->remote = (struct vlynq_regs *)((void *)dev->local + - VLYNQ_REMOTE_OFFSET); - - dev->irq = platform_get_irq_byname(pdev, "irq"); - dev->irq_start = irq_res->start; - dev->irq_end = irq_res->end; - dev->local_irq = dev->irq_end - dev->irq_start; - dev->remote_irq = dev->local_irq - 1; - - if (device_register(&dev->dev)) - goto fail_register; - platform_set_drvdata(pdev, dev); - - printk(KERN_INFO "%s: regs 0x%p, irq %d, mem 0x%p\n", - dev_name(&dev->dev), (void *)dev->regs_start, dev->irq, - (void *)dev->mem_start); - - dev->dev_id = 0; - dev->divisor = vlynq_div_auto; - result = __vlynq_enable_device(dev); - if (result == 0) { - dev->dev_id = readl(&dev->remote->chip); - ((struct plat_vlynq_ops *)(dev->dev.platform_data))->off(dev); - } - if (dev->dev_id) - printk(KERN_INFO "Found a VLYNQ device: %08x\n", dev->dev_id); - - return 0; - -fail_register: - iounmap(dev->local); -fail_remap: -fail_request: - release_mem_region(regs_res->start, len); - kfree(dev); - return result; -} - -static int vlynq_remove(struct platform_device *pdev) -{ - struct vlynq_device *dev = platform_get_drvdata(pdev); - - device_unregister(&dev->dev); - iounmap(dev->local); - release_mem_region(dev->regs_start, - dev->regs_end - dev->regs_start + 1); - - kfree(dev); - - return 0; -} - -static struct platform_driver vlynq_platform_driver = { - .driver.name = "vlynq", - .probe = vlynq_probe, - .remove = vlynq_remove, -}; - -struct bus_type vlynq_bus_type = { - .name = "vlynq", - .match = vlynq_device_match, - .probe = vlynq_device_probe, - .remove = vlynq_device_remove, -}; -EXPORT_SYMBOL(vlynq_bus_type); - -static int vlynq_init(void) -{ - int res = 0; - - res = bus_register(&vlynq_bus_type); - if (res) - goto fail_bus; - - res = platform_driver_register(&vlynq_platform_driver); - if (res) - goto fail_platform; - - return 0; - -fail_platform: - bus_unregister(&vlynq_bus_type); -fail_bus: - return res; -} - -static void vlynq_exit(void) -{ - platform_driver_unregister(&vlynq_platform_driver); - bus_unregister(&vlynq_bus_type); -} - -module_init(vlynq_init); -module_exit(vlynq_exit); diff --git a/include/linux/vlynq.h b/include/linux/vlynq.h deleted file mode 100644 index e9c0cd36c48a..000000000000 --- a/include/linux/vlynq.h +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2006, 2007 Eugene Konev - */ - -#ifndef __VLYNQ_H__ -#define __VLYNQ_H__ - -#include -#include - -struct module; - -#define VLYNQ_NUM_IRQS 32 - -struct vlynq_mapping { - u32 size; - u32 offset; -}; - -enum vlynq_divisor { - vlynq_div_auto = 0, - vlynq_ldiv1, - vlynq_ldiv2, - vlynq_ldiv3, - vlynq_ldiv4, - vlynq_ldiv5, - vlynq_ldiv6, - vlynq_ldiv7, - vlynq_ldiv8, - vlynq_rdiv1, - vlynq_rdiv2, - vlynq_rdiv3, - vlynq_rdiv4, - vlynq_rdiv5, - vlynq_rdiv6, - vlynq_rdiv7, - vlynq_rdiv8, - vlynq_div_external -}; - -struct vlynq_device_id { - u32 id; - enum vlynq_divisor divisor; - unsigned long driver_data; -}; - -struct vlynq_regs; -struct vlynq_device { - u32 id, dev_id; - int local_irq; - int remote_irq; - enum vlynq_divisor divisor; - u32 regs_start, regs_end; - u32 mem_start, mem_end; - u32 irq_start, irq_end; - int irq; - int enabled; - struct vlynq_regs *local; - struct vlynq_regs *remote; - struct device dev; -}; - -struct vlynq_driver { - char *name; - struct vlynq_device_id *id_table; - int (*probe)(struct vlynq_device *dev, struct vlynq_device_id *id); - void (*remove)(struct vlynq_device *dev); - struct device_driver driver; -}; - -struct plat_vlynq_ops { - int (*on)(struct vlynq_device *dev); - void (*off)(struct vlynq_device *dev); -}; - -static inline struct vlynq_driver *to_vlynq_driver(struct device_driver *drv) -{ - return container_of(drv, struct vlynq_driver, driver); -} - -static inline struct vlynq_device *to_vlynq_device(struct device *device) -{ - return container_of(device, struct vlynq_device, dev); -} - -extern struct bus_type vlynq_bus_type; - -extern int __vlynq_register_driver(struct vlynq_driver *driver, - struct module *owner); - -static inline int vlynq_register_driver(struct vlynq_driver *driver) -{ - return __vlynq_register_driver(driver, THIS_MODULE); -} - -static inline void *vlynq_get_drvdata(struct vlynq_device *dev) -{ - return dev_get_drvdata(&dev->dev); -} - -static inline void vlynq_set_drvdata(struct vlynq_device *dev, void *data) -{ - dev_set_drvdata(&dev->dev, data); -} - -static inline u32 vlynq_mem_start(struct vlynq_device *dev) -{ - return dev->mem_start; -} - -static inline u32 vlynq_mem_end(struct vlynq_device *dev) -{ - return dev->mem_end; -} - -static inline u32 vlynq_mem_len(struct vlynq_device *dev) -{ - return dev->mem_end - dev->mem_start + 1; -} - -static inline int vlynq_virq_to_irq(struct vlynq_device *dev, int virq) -{ - int irq = dev->irq_start + virq; - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - return irq; -} - -static inline int vlynq_irq_to_virq(struct vlynq_device *dev, int irq) -{ - if ((irq < dev->irq_start) || (irq > dev->irq_end)) - return -EINVAL; - - return irq - dev->irq_start; -} - -extern void vlynq_unregister_driver(struct vlynq_driver *driver); -extern int vlynq_enable_device(struct vlynq_device *dev); -extern void vlynq_disable_device(struct vlynq_device *dev); -extern int vlynq_set_local_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping); -extern int vlynq_set_remote_mapping(struct vlynq_device *dev, u32 tx_offset, - struct vlynq_mapping *mapping); -extern int vlynq_set_local_irq(struct vlynq_device *dev, int virq); -extern int vlynq_set_remote_irq(struct vlynq_device *dev, int virq); - -#endif /* __VLYNQ_H__ */ From 1bc6e01372883354ffcd77714d8dc24c7fa4a2ed Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 Sep 2023 08:15:25 +0200 Subject: [PATCH 013/415] watchdog: ar7_wdt: remove driver to prepare for platform removal AR7 is going to be removed from the Kernel, so remove its watchdog support. This allows us to remove the platform because this driver includes a platform specific header. Signed-off-by: Wolfram Sang Acked-by: Florian Fainelli Acked-by: Guenter Roeck Signed-off-by: Thomas Bogendoerfer --- drivers/watchdog/Kconfig | 6 - drivers/watchdog/Makefile | 1 - drivers/watchdog/ar7_wdt.c | 315 ------------------------------------- 3 files changed, 322 deletions(-) delete mode 100644 drivers/watchdog/ar7_wdt.c diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 751458959411..b9f715d73fe8 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1768,12 +1768,6 @@ config SIBYTE_WDOG To compile this driver as a loadable module, choose M here. The module will be called sb_wdog. -config AR7_WDT - tristate "TI AR7 Watchdog Timer" - depends on AR7 || (MIPS && 32BIT && COMPILE_TEST) - help - Hardware driver for the TI AR7 Watchdog Timer. - config TXX9_WDT tristate "Toshiba TXx9 Watchdog Timer" depends on CPU_TX49XX || (MIPS && COMPILE_TEST) diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 7eab9de311cb..7cbc34514ec1 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -168,7 +168,6 @@ obj-$(CONFIG_INDYDOG) += indydog.o obj-$(CONFIG_JZ4740_WDT) += jz4740_wdt.o obj-$(CONFIG_WDT_MTX1) += mtx-1_wdt.o obj-$(CONFIG_SIBYTE_WDOG) += sb_wdog.o -obj-$(CONFIG_AR7_WDT) += ar7_wdt.o obj-$(CONFIG_TXX9_WDT) += txx9wdt.o obj-$(CONFIG_OCTEON_WDT) += octeon-wdt.o octeon-wdt-y := octeon-wdt-main.o octeon-wdt-nmi.o diff --git a/drivers/watchdog/ar7_wdt.c b/drivers/watchdog/ar7_wdt.c deleted file mode 100644 index cdcaeb0961ac..000000000000 --- a/drivers/watchdog/ar7_wdt.c +++ /dev/null @@ -1,315 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * drivers/watchdog/ar7_wdt.c - * - * Copyright (C) 2007 Nicolas Thill - * Copyright (c) 2005 Enrik Berkhan - * - * Some code taken from: - * National Semiconductor SCx200 Watchdog support - * Copyright (c) 2001,2002 Christer Weinigel - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define LONGNAME "TI AR7 Watchdog Timer" - -MODULE_AUTHOR("Nicolas Thill "); -MODULE_DESCRIPTION(LONGNAME); -MODULE_LICENSE("GPL"); - -static int margin = 60; -module_param(margin, int, 0); -MODULE_PARM_DESC(margin, "Watchdog margin in seconds"); - -static bool nowayout = WATCHDOG_NOWAYOUT; -module_param(nowayout, bool, 0); -MODULE_PARM_DESC(nowayout, "Disable watchdog shutdown on close"); - -#define READ_REG(x) readl((void __iomem *)&(x)) -#define WRITE_REG(x, v) writel((v), (void __iomem *)&(x)) - -struct ar7_wdt { - u32 kick_lock; - u32 kick; - u32 change_lock; - u32 change; - u32 disable_lock; - u32 disable; - u32 prescale_lock; - u32 prescale; -}; - -static unsigned long wdt_is_open; -static unsigned expect_close; -static DEFINE_SPINLOCK(wdt_lock); - -/* XXX currently fixed, allows max margin ~68.72 secs */ -#define prescale_value 0xffff - -/* Pointer to the remapped WDT IO space */ -static struct ar7_wdt *ar7_wdt; - -static struct clk *vbus_clk; - -static void ar7_wdt_kick(u32 value) -{ - WRITE_REG(ar7_wdt->kick_lock, 0x5555); - if ((READ_REG(ar7_wdt->kick_lock) & 3) == 1) { - WRITE_REG(ar7_wdt->kick_lock, 0xaaaa); - if ((READ_REG(ar7_wdt->kick_lock) & 3) == 3) { - WRITE_REG(ar7_wdt->kick, value); - return; - } - } - pr_err("failed to unlock WDT kick reg\n"); -} - -static void ar7_wdt_prescale(u32 value) -{ - WRITE_REG(ar7_wdt->prescale_lock, 0x5a5a); - if ((READ_REG(ar7_wdt->prescale_lock) & 3) == 1) { - WRITE_REG(ar7_wdt->prescale_lock, 0xa5a5); - if ((READ_REG(ar7_wdt->prescale_lock) & 3) == 3) { - WRITE_REG(ar7_wdt->prescale, value); - return; - } - } - pr_err("failed to unlock WDT prescale reg\n"); -} - -static void ar7_wdt_change(u32 value) -{ - WRITE_REG(ar7_wdt->change_lock, 0x6666); - if ((READ_REG(ar7_wdt->change_lock) & 3) == 1) { - WRITE_REG(ar7_wdt->change_lock, 0xbbbb); - if ((READ_REG(ar7_wdt->change_lock) & 3) == 3) { - WRITE_REG(ar7_wdt->change, value); - return; - } - } - pr_err("failed to unlock WDT change reg\n"); -} - -static void ar7_wdt_disable(u32 value) -{ - WRITE_REG(ar7_wdt->disable_lock, 0x7777); - if ((READ_REG(ar7_wdt->disable_lock) & 3) == 1) { - WRITE_REG(ar7_wdt->disable_lock, 0xcccc); - if ((READ_REG(ar7_wdt->disable_lock) & 3) == 2) { - WRITE_REG(ar7_wdt->disable_lock, 0xdddd); - if ((READ_REG(ar7_wdt->disable_lock) & 3) == 3) { - WRITE_REG(ar7_wdt->disable, value); - return; - } - } - } - pr_err("failed to unlock WDT disable reg\n"); -} - -static void ar7_wdt_update_margin(int new_margin) -{ - u32 change; - u32 vbus_rate; - - vbus_rate = clk_get_rate(vbus_clk); - change = new_margin * (vbus_rate / prescale_value); - if (change < 1) - change = 1; - if (change > 0xffff) - change = 0xffff; - ar7_wdt_change(change); - margin = change * prescale_value / vbus_rate; - pr_info("timer margin %d seconds (prescale %d, change %d, freq %d)\n", - margin, prescale_value, change, vbus_rate); -} - -static void ar7_wdt_enable_wdt(void) -{ - pr_debug("enabling watchdog timer\n"); - ar7_wdt_disable(1); - ar7_wdt_kick(1); -} - -static void ar7_wdt_disable_wdt(void) -{ - pr_debug("disabling watchdog timer\n"); - ar7_wdt_disable(0); -} - -static int ar7_wdt_open(struct inode *inode, struct file *file) -{ - /* only allow one at a time */ - if (test_and_set_bit(0, &wdt_is_open)) - return -EBUSY; - ar7_wdt_enable_wdt(); - expect_close = 0; - - return stream_open(inode, file); -} - -static int ar7_wdt_release(struct inode *inode, struct file *file) -{ - if (!expect_close) - pr_warn("watchdog device closed unexpectedly, will not disable the watchdog timer\n"); - else if (!nowayout) - ar7_wdt_disable_wdt(); - clear_bit(0, &wdt_is_open); - return 0; -} - -static ssize_t ar7_wdt_write(struct file *file, const char *data, - size_t len, loff_t *ppos) -{ - /* check for a magic close character */ - if (len) { - size_t i; - - spin_lock(&wdt_lock); - ar7_wdt_kick(1); - spin_unlock(&wdt_lock); - - expect_close = 0; - for (i = 0; i < len; ++i) { - char c; - if (get_user(c, data + i)) - return -EFAULT; - if (c == 'V') - expect_close = 1; - } - - } - return len; -} - -static long ar7_wdt_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - static const struct watchdog_info ident = { - .identity = LONGNAME, - .firmware_version = 1, - .options = (WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | - WDIOF_MAGICCLOSE), - }; - int new_margin; - - switch (cmd) { - case WDIOC_GETSUPPORT: - if (copy_to_user((struct watchdog_info *)arg, &ident, - sizeof(ident))) - return -EFAULT; - return 0; - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - if (put_user(0, (int *)arg)) - return -EFAULT; - return 0; - case WDIOC_KEEPALIVE: - ar7_wdt_kick(1); - return 0; - case WDIOC_SETTIMEOUT: - if (get_user(new_margin, (int *)arg)) - return -EFAULT; - if (new_margin < 1) - return -EINVAL; - - spin_lock(&wdt_lock); - ar7_wdt_update_margin(new_margin); - ar7_wdt_kick(1); - spin_unlock(&wdt_lock); - fallthrough; - case WDIOC_GETTIMEOUT: - if (put_user(margin, (int *)arg)) - return -EFAULT; - return 0; - default: - return -ENOTTY; - } -} - -static const struct file_operations ar7_wdt_fops = { - .owner = THIS_MODULE, - .write = ar7_wdt_write, - .unlocked_ioctl = ar7_wdt_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .open = ar7_wdt_open, - .release = ar7_wdt_release, - .llseek = no_llseek, -}; - -static struct miscdevice ar7_wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &ar7_wdt_fops, -}; - -static int ar7_wdt_probe(struct platform_device *pdev) -{ - int rc; - - ar7_wdt = devm_platform_ioremap_resource_byname(pdev, "regs"); - if (IS_ERR(ar7_wdt)) - return PTR_ERR(ar7_wdt); - - vbus_clk = clk_get(NULL, "vbus"); - if (IS_ERR(vbus_clk)) { - pr_err("could not get vbus clock\n"); - return PTR_ERR(vbus_clk); - } - - ar7_wdt_disable_wdt(); - ar7_wdt_prescale(prescale_value); - ar7_wdt_update_margin(margin); - - rc = misc_register(&ar7_wdt_miscdev); - if (rc) { - pr_err("unable to register misc device\n"); - goto out; - } - return 0; - -out: - clk_put(vbus_clk); - vbus_clk = NULL; - return rc; -} - -static void ar7_wdt_remove(struct platform_device *pdev) -{ - misc_deregister(&ar7_wdt_miscdev); - clk_put(vbus_clk); - vbus_clk = NULL; -} - -static void ar7_wdt_shutdown(struct platform_device *pdev) -{ - if (!nowayout) - ar7_wdt_disable_wdt(); -} - -static struct platform_driver ar7_wdt_driver = { - .probe = ar7_wdt_probe, - .remove_new = ar7_wdt_remove, - .shutdown = ar7_wdt_shutdown, - .driver = { - .name = "ar7_wdt", - }, -}; - -module_platform_driver(ar7_wdt_driver); From 4b7d3ab445653336db9854eedad812607760c015 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 Sep 2023 08:15:27 +0200 Subject: [PATCH 014/415] MIPS: AR7: remove platform After a discussion about removing VLYNQ support from the Kernel, it was concluded that its only user, the AR7 platform can go [1]. Even OpenWRT has removed support because these devices are "stuck with 3.18" [2]. [1] https://lore.kernel.org/r/3395161f-2543-46f0-83d9-b918800305e1@gmail.com [2] https://openwrt.org/docs/techref/targets/ar7 Suggested-by: Jonas Gorski Signed-off-by: Wolfram Sang Acked-by: Florian Fainelli Signed-off-by: Thomas Bogendoerfer --- arch/mips/Kbuild.platforms | 1 - arch/mips/Kconfig | 22 - arch/mips/ar7/Makefile | 11 - arch/mips/ar7/Platform | 5 - arch/mips/ar7/clock.c | 439 -------------- arch/mips/ar7/gpio.c | 332 ----------- arch/mips/ar7/irq.c | 165 ------ arch/mips/ar7/memory.c | 51 -- arch/mips/ar7/platform.c | 722 ------------------------ arch/mips/ar7/prom.c | 256 --------- arch/mips/ar7/setup.c | 93 --- arch/mips/ar7/time.c | 31 - arch/mips/boot/compressed/uart-16550.c | 5 - arch/mips/configs/ar7_defconfig | 119 ---- arch/mips/include/asm/mach-ar7/ar7.h | 191 ------- arch/mips/include/asm/mach-ar7/irq.h | 16 - arch/mips/include/asm/mach-ar7/prom.h | 12 - arch/mips/include/asm/mach-ar7/spaces.h | 22 - 18 files changed, 2493 deletions(-) delete mode 100644 arch/mips/ar7/Makefile delete mode 100644 arch/mips/ar7/Platform delete mode 100644 arch/mips/ar7/clock.c delete mode 100644 arch/mips/ar7/gpio.c delete mode 100644 arch/mips/ar7/irq.c delete mode 100644 arch/mips/ar7/memory.c delete mode 100644 arch/mips/ar7/platform.c delete mode 100644 arch/mips/ar7/prom.c delete mode 100644 arch/mips/ar7/setup.c delete mode 100644 arch/mips/ar7/time.c delete mode 100644 arch/mips/configs/ar7_defconfig delete mode 100644 arch/mips/include/asm/mach-ar7/ar7.h delete mode 100644 arch/mips/include/asm/mach-ar7/irq.h delete mode 100644 arch/mips/include/asm/mach-ar7/prom.h delete mode 100644 arch/mips/include/asm/mach-ar7/spaces.h diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index caad195ba5c1..a2311c4bce6a 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -2,7 +2,6 @@ # All platforms listed in alphabetic order platform-$(CONFIG_MIPS_ALCHEMY) += alchemy/ -platform-$(CONFIG_AR7) += ar7/ platform-$(CONFIG_ATH25) += ath25/ platform-$(CONFIG_ATH79) += ath79/ platform-$(CONFIG_BCM47XX) += bcm47xx/ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index bc8421859006..76db82542519 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -202,28 +202,6 @@ config MIPS_ALCHEMY select SYS_SUPPORTS_ZBOOT select COMMON_CLK -config AR7 - bool "Texas Instruments AR7" - select BOOT_ELF32 - select COMMON_CLK - select DMA_NONCOHERENT - select CEVT_R4K - select CSRC_R4K - select IRQ_MIPS_CPU - select NO_EXCEPT_FILL - select SWAP_IO_SPACE - select SYS_HAS_CPU_MIPS32_R1 - select SYS_HAS_EARLY_PRINTK - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_LITTLE_ENDIAN - select SYS_SUPPORTS_MIPS16 - select SYS_SUPPORTS_ZBOOT_UART16550 - select GPIOLIB - select VLYNQ - help - Support for the Texas Instruments AR7 System-on-a-Chip - family: TNETD7100, 7200 and 7300. - config ATH25 bool "Atheros AR231x/AR531x SoC support" select CEVT_R4K diff --git a/arch/mips/ar7/Makefile b/arch/mips/ar7/Makefile deleted file mode 100644 index cd51c6c6e686..000000000000 --- a/arch/mips/ar7/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -obj-y := \ - prom.o \ - setup.o \ - memory.o \ - irq.o \ - time.o \ - platform.o \ - gpio.o \ - clock.o diff --git a/arch/mips/ar7/Platform b/arch/mips/ar7/Platform deleted file mode 100644 index a9257cc01c3c..000000000000 --- a/arch/mips/ar7/Platform +++ /dev/null @@ -1,5 +0,0 @@ -# -# Texas Instruments AR7 -# -cflags-$(CONFIG_AR7) += -I$(srctree)/arch/mips/include/asm/mach-ar7 -load-$(CONFIG_AR7) += 0xffffffff94100000 diff --git a/arch/mips/ar7/clock.c b/arch/mips/ar7/clock.c deleted file mode 100644 index c717acbc5506..000000000000 --- a/arch/mips/ar7/clock.c +++ /dev/null @@ -1,439 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2007 Felix Fietkau - * Copyright (C) 2007 Eugene Konev - * Copyright (C) 2009 Florian Fainelli - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define BOOT_PLL_SOURCE_MASK 0x3 -#define CPU_PLL_SOURCE_SHIFT 16 -#define BUS_PLL_SOURCE_SHIFT 14 -#define USB_PLL_SOURCE_SHIFT 18 -#define DSP_PLL_SOURCE_SHIFT 22 -#define BOOT_PLL_SOURCE_AFE 0 -#define BOOT_PLL_SOURCE_BUS 0 -#define BOOT_PLL_SOURCE_REF 1 -#define BOOT_PLL_SOURCE_XTAL 2 -#define BOOT_PLL_SOURCE_CPU 3 -#define BOOT_PLL_BYPASS 0x00000020 -#define BOOT_PLL_ASYNC_MODE 0x02000000 -#define BOOT_PLL_2TO1_MODE 0x00008000 - -#define TNETD7200_CLOCK_ID_CPU 0 -#define TNETD7200_CLOCK_ID_DSP 1 -#define TNETD7200_CLOCK_ID_USB 2 - -#define TNETD7200_DEF_CPU_CLK 211000000 -#define TNETD7200_DEF_DSP_CLK 125000000 -#define TNETD7200_DEF_USB_CLK 48000000 - -struct tnetd7300_clock { - u32 ctrl; -#define PREDIV_MASK 0x001f0000 -#define PREDIV_SHIFT 16 -#define POSTDIV_MASK 0x0000001f - u32 unused1[3]; - u32 pll; -#define MUL_MASK 0x0000f000 -#define MUL_SHIFT 12 -#define PLL_MODE_MASK 0x00000001 -#define PLL_NDIV 0x00000800 -#define PLL_DIV 0x00000002 -#define PLL_STATUS 0x00000001 - u32 unused2[3]; -}; - -struct tnetd7300_clocks { - struct tnetd7300_clock bus; - struct tnetd7300_clock cpu; - struct tnetd7300_clock usb; - struct tnetd7300_clock dsp; -}; - -struct tnetd7200_clock { - u32 ctrl; - u32 unused1[3]; -#define DIVISOR_ENABLE_MASK 0x00008000 - u32 mul; - u32 prediv; - u32 postdiv; - u32 postdiv2; - u32 unused2[6]; - u32 cmd; - u32 status; - u32 cmden; - u32 padding[15]; -}; - -struct tnetd7200_clocks { - struct tnetd7200_clock cpu; - struct tnetd7200_clock dsp; - struct tnetd7200_clock usb; -}; - -struct clk_rate { - u32 rate; -}; -static struct clk_rate bus_clk = { - .rate = 125000000, -}; - -static struct clk_rate cpu_clk = { - .rate = 150000000, -}; - -static void approximate(int base, int target, int *prediv, - int *postdiv, int *mul) -{ - int i, j, k, freq, res = target; - for (i = 1; i <= 16; i++) - for (j = 1; j <= 32; j++) - for (k = 1; k <= 32; k++) { - freq = abs(base / j * i / k - target); - if (freq < res) { - res = freq; - *mul = i; - *prediv = j; - *postdiv = k; - } - } -} - -static void calculate(int base, int target, int *prediv, int *postdiv, - int *mul) -{ - int tmp_gcd, tmp_base, tmp_freq; - - for (*prediv = 1; *prediv <= 32; (*prediv)++) { - tmp_base = base / *prediv; - tmp_gcd = gcd(target, tmp_base); - *mul = target / tmp_gcd; - *postdiv = tmp_base / tmp_gcd; - if ((*mul < 1) || (*mul >= 16)) - continue; - if ((*postdiv > 0) & (*postdiv <= 32)) - break; - } - - if (base / *prediv * *mul / *postdiv != target) { - approximate(base, target, prediv, postdiv, mul); - tmp_freq = base / *prediv * *mul / *postdiv; - printk(KERN_WARNING - "Adjusted requested frequency %d to %d\n", - target, tmp_freq); - } - - printk(KERN_DEBUG "Clocks: prediv: %d, postdiv: %d, mul: %d\n", - *prediv, *postdiv, *mul); -} - -static int tnetd7300_dsp_clock(void) -{ - u32 didr1, didr2; - u8 rev = ar7_chip_rev(); - didr1 = readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x18)); - didr2 = readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x1c)); - if (didr2 & (1 << 23)) - return 0; - if ((rev >= 0x23) && (rev != 0x57)) - return 250000000; - if ((((didr2 & 0x1fff) << 10) | ((didr1 & 0xffc00000) >> 22)) - > 4208000) - return 250000000; - return 0; -} - -static int tnetd7300_get_clock(u32 shift, struct tnetd7300_clock *clock, - u32 *bootcr, u32 bus_clock) -{ - int product; - int base_clock = AR7_REF_CLOCK; - u32 ctrl = readl(&clock->ctrl); - u32 pll = readl(&clock->pll); - int prediv = ((ctrl & PREDIV_MASK) >> PREDIV_SHIFT) + 1; - int postdiv = (ctrl & POSTDIV_MASK) + 1; - int divisor = prediv * postdiv; - int mul = ((pll & MUL_MASK) >> MUL_SHIFT) + 1; - - switch ((*bootcr & (BOOT_PLL_SOURCE_MASK << shift)) >> shift) { - case BOOT_PLL_SOURCE_BUS: - base_clock = bus_clock; - break; - case BOOT_PLL_SOURCE_REF: - base_clock = AR7_REF_CLOCK; - break; - case BOOT_PLL_SOURCE_XTAL: - base_clock = AR7_XTAL_CLOCK; - break; - case BOOT_PLL_SOURCE_CPU: - base_clock = cpu_clk.rate; - break; - } - - if (*bootcr & BOOT_PLL_BYPASS) - return base_clock / divisor; - - if ((pll & PLL_MODE_MASK) == 0) - return (base_clock >> (mul / 16 + 1)) / divisor; - - if ((pll & (PLL_NDIV | PLL_DIV)) == (PLL_NDIV | PLL_DIV)) { - product = (mul & 1) ? - (base_clock * mul) >> 1 : - (base_clock * (mul - 1)) >> 2; - return product / divisor; - } - - if (mul == 16) - return base_clock / divisor; - - return base_clock * mul / divisor; -} - -static void tnetd7300_set_clock(u32 shift, struct tnetd7300_clock *clock, - u32 *bootcr, u32 frequency) -{ - int prediv, postdiv, mul; - int base_clock = bus_clk.rate; - - switch ((*bootcr & (BOOT_PLL_SOURCE_MASK << shift)) >> shift) { - case BOOT_PLL_SOURCE_BUS: - base_clock = bus_clk.rate; - break; - case BOOT_PLL_SOURCE_REF: - base_clock = AR7_REF_CLOCK; - break; - case BOOT_PLL_SOURCE_XTAL: - base_clock = AR7_XTAL_CLOCK; - break; - case BOOT_PLL_SOURCE_CPU: - base_clock = cpu_clk.rate; - break; - } - - calculate(base_clock, frequency, &prediv, &postdiv, &mul); - - writel(((prediv - 1) << PREDIV_SHIFT) | (postdiv - 1), &clock->ctrl); - mdelay(1); - writel(4, &clock->pll); - while (readl(&clock->pll) & PLL_STATUS) - ; - writel(((mul - 1) << MUL_SHIFT) | (0xff << 3) | 0x0e, &clock->pll); - mdelay(75); -} - -static void __init tnetd7300_init_clocks(void) -{ - u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4); - struct tnetd7300_clocks *clocks = - ioremap(UR8_REGS_CLOCKS, - sizeof(struct tnetd7300_clocks)); - u32 dsp_clk; - struct clk *clk; - - bus_clk.rate = tnetd7300_get_clock(BUS_PLL_SOURCE_SHIFT, - &clocks->bus, bootcr, AR7_AFE_CLOCK); - - if (*bootcr & BOOT_PLL_ASYNC_MODE) - cpu_clk.rate = tnetd7300_get_clock(CPU_PLL_SOURCE_SHIFT, - &clocks->cpu, bootcr, AR7_AFE_CLOCK); - else - cpu_clk.rate = bus_clk.rate; - - dsp_clk = tnetd7300_dsp_clock(); - if (dsp_clk == 250000000) - tnetd7300_set_clock(DSP_PLL_SOURCE_SHIFT, &clocks->dsp, - bootcr, dsp_clk); - - iounmap(clocks); - iounmap(bootcr); - - clk = clk_register_fixed_rate(NULL, "cpu", NULL, 0, cpu_clk.rate); - clkdev_create(clk, "cpu", NULL); - clk = clk_register_fixed_rate(NULL, "dsp", NULL, 0, dsp_clk); - clkdev_create(clk, "dsp", NULL); -} - -static void tnetd7200_set_clock(int base, struct tnetd7200_clock *clock, - int prediv, int postdiv, int postdiv2, int mul, u32 frequency) -{ - printk(KERN_INFO - "Clocks: base = %d, frequency = %u, prediv = %d, " - "postdiv = %d, postdiv2 = %d, mul = %d\n", - base, frequency, prediv, postdiv, postdiv2, mul); - - writel(0, &clock->ctrl); - writel(DIVISOR_ENABLE_MASK | ((prediv - 1) & 0x1F), &clock->prediv); - writel((mul - 1) & 0xF, &clock->mul); - - while (readl(&clock->status) & 0x1) - ; /* nop */ - - writel(DIVISOR_ENABLE_MASK | ((postdiv - 1) & 0x1F), &clock->postdiv); - - writel(readl(&clock->cmden) | 1, &clock->cmden); - writel(readl(&clock->cmd) | 1, &clock->cmd); - - while (readl(&clock->status) & 0x1) - ; /* nop */ - - writel(DIVISOR_ENABLE_MASK | ((postdiv2 - 1) & 0x1F), &clock->postdiv2); - - writel(readl(&clock->cmden) | 1, &clock->cmden); - writel(readl(&clock->cmd) | 1, &clock->cmd); - - while (readl(&clock->status) & 0x1) - ; /* nop */ - - writel(readl(&clock->ctrl) | 1, &clock->ctrl); -} - -static int tnetd7200_get_clock_base(int clock_id, u32 *bootcr) -{ - if (*bootcr & BOOT_PLL_ASYNC_MODE) - /* Async */ - switch (clock_id) { - case TNETD7200_CLOCK_ID_DSP: - return AR7_REF_CLOCK; - default: - return AR7_AFE_CLOCK; - } - else - /* Sync */ - if (*bootcr & BOOT_PLL_2TO1_MODE) - /* 2:1 */ - switch (clock_id) { - case TNETD7200_CLOCK_ID_DSP: - return AR7_REF_CLOCK; - default: - return AR7_AFE_CLOCK; - } - else - /* 1:1 */ - return AR7_REF_CLOCK; -} - - -static void __init tnetd7200_init_clocks(void) -{ - u32 *bootcr = (u32 *)ioremap(AR7_REGS_DCL, 4); - struct tnetd7200_clocks *clocks = - ioremap(AR7_REGS_CLOCKS, - sizeof(struct tnetd7200_clocks)); - int cpu_base, cpu_mul, cpu_prediv, cpu_postdiv; - int dsp_base, dsp_mul, dsp_prediv, dsp_postdiv; - int usb_base, usb_mul, usb_prediv, usb_postdiv; - struct clk *clk; - - cpu_base = tnetd7200_get_clock_base(TNETD7200_CLOCK_ID_CPU, bootcr); - dsp_base = tnetd7200_get_clock_base(TNETD7200_CLOCK_ID_DSP, bootcr); - - if (*bootcr & BOOT_PLL_ASYNC_MODE) { - printk(KERN_INFO "Clocks: Async mode\n"); - - printk(KERN_INFO "Clocks: Setting DSP clock\n"); - calculate(dsp_base, TNETD7200_DEF_DSP_CLK, - &dsp_prediv, &dsp_postdiv, &dsp_mul); - bus_clk.rate = - ((dsp_base / dsp_prediv) * dsp_mul) / dsp_postdiv; - tnetd7200_set_clock(dsp_base, &clocks->dsp, - dsp_prediv, dsp_postdiv * 2, dsp_postdiv, dsp_mul * 2, - bus_clk.rate); - - printk(KERN_INFO "Clocks: Setting CPU clock\n"); - calculate(cpu_base, TNETD7200_DEF_CPU_CLK, &cpu_prediv, - &cpu_postdiv, &cpu_mul); - cpu_clk.rate = - ((cpu_base / cpu_prediv) * cpu_mul) / cpu_postdiv; - tnetd7200_set_clock(cpu_base, &clocks->cpu, - cpu_prediv, cpu_postdiv, -1, cpu_mul, - cpu_clk.rate); - - } else - if (*bootcr & BOOT_PLL_2TO1_MODE) { - printk(KERN_INFO "Clocks: Sync 2:1 mode\n"); - - printk(KERN_INFO "Clocks: Setting CPU clock\n"); - calculate(cpu_base, TNETD7200_DEF_CPU_CLK, &cpu_prediv, - &cpu_postdiv, &cpu_mul); - cpu_clk.rate = ((cpu_base / cpu_prediv) * cpu_mul) - / cpu_postdiv; - tnetd7200_set_clock(cpu_base, &clocks->cpu, - cpu_prediv, cpu_postdiv, -1, cpu_mul, - cpu_clk.rate); - - printk(KERN_INFO "Clocks: Setting DSP clock\n"); - calculate(dsp_base, TNETD7200_DEF_DSP_CLK, &dsp_prediv, - &dsp_postdiv, &dsp_mul); - bus_clk.rate = cpu_clk.rate / 2; - tnetd7200_set_clock(dsp_base, &clocks->dsp, - dsp_prediv, dsp_postdiv * 2, dsp_postdiv, - dsp_mul * 2, bus_clk.rate); - } else { - printk(KERN_INFO "Clocks: Sync 1:1 mode\n"); - - printk(KERN_INFO "Clocks: Setting DSP clock\n"); - calculate(dsp_base, TNETD7200_DEF_DSP_CLK, &dsp_prediv, - &dsp_postdiv, &dsp_mul); - bus_clk.rate = ((dsp_base / dsp_prediv) * dsp_mul) - / dsp_postdiv; - tnetd7200_set_clock(dsp_base, &clocks->dsp, - dsp_prediv, dsp_postdiv * 2, dsp_postdiv, - dsp_mul * 2, bus_clk.rate); - - cpu_clk.rate = bus_clk.rate; - } - - printk(KERN_INFO "Clocks: Setting USB clock\n"); - usb_base = bus_clk.rate; - calculate(usb_base, TNETD7200_DEF_USB_CLK, &usb_prediv, - &usb_postdiv, &usb_mul); - tnetd7200_set_clock(usb_base, &clocks->usb, - usb_prediv, usb_postdiv, -1, usb_mul, - TNETD7200_DEF_USB_CLK); - - iounmap(clocks); - iounmap(bootcr); - - clk = clk_register_fixed_rate(NULL, "cpu", NULL, 0, cpu_clk.rate); - clkdev_create(clk, "cpu", NULL); - clkdev_create(clk, "dsp", NULL); -} - -void __init ar7_init_clocks(void) -{ - struct clk *clk; - - switch (ar7_chip_id()) { - case AR7_CHIP_7100: - case AR7_CHIP_7200: - tnetd7200_init_clocks(); - break; - case AR7_CHIP_7300: - tnetd7300_init_clocks(); - break; - default: - break; - } - clk = clk_register_fixed_rate(NULL, "bus", NULL, 0, bus_clk.rate); - clkdev_create(clk, "bus", NULL); - /* adjust vbus clock rate */ - clk = clk_register_fixed_factor(NULL, "vbus", "bus", 0, 1, 2); - clkdev_create(clk, "vbus", NULL); - clkdev_create(clk, "cpmac", "cpmac.1"); - clkdev_create(clk, "cpmac", "cpmac.1"); -} diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c deleted file mode 100644 index 4ed833b9cc2f..000000000000 --- a/arch/mips/ar7/gpio.c +++ /dev/null @@ -1,332 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2007 Felix Fietkau - * Copyright (C) 2007 Eugene Konev - * Copyright (C) 2009-2010 Florian Fainelli - */ - -#include -#include -#include - -#include - -#define AR7_GPIO_MAX 32 -#define TITAN_GPIO_MAX 51 - -struct ar7_gpio_chip { - void __iomem *regs; - struct gpio_chip chip; -}; - -static int ar7_gpio_get_value(struct gpio_chip *chip, unsigned gpio) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_in = gpch->regs + AR7_GPIO_INPUT; - - return !!(readl(gpio_in) & (1 << gpio)); -} - -static int titan_gpio_get_value(struct gpio_chip *chip, unsigned gpio) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_in0 = gpch->regs + TITAN_GPIO_INPUT_0; - void __iomem *gpio_in1 = gpch->regs + TITAN_GPIO_INPUT_1; - - return readl(gpio >> 5 ? gpio_in1 : gpio_in0) & (1 << (gpio & 0x1f)); -} - -static void ar7_gpio_set_value(struct gpio_chip *chip, - unsigned gpio, int value) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_out = gpch->regs + AR7_GPIO_OUTPUT; - unsigned tmp; - - tmp = readl(gpio_out) & ~(1 << gpio); - if (value) - tmp |= 1 << gpio; - writel(tmp, gpio_out); -} - -static void titan_gpio_set_value(struct gpio_chip *chip, - unsigned gpio, int value) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_out0 = gpch->regs + TITAN_GPIO_OUTPUT_0; - void __iomem *gpio_out1 = gpch->regs + TITAN_GPIO_OUTPUT_1; - unsigned tmp; - - tmp = readl(gpio >> 5 ? gpio_out1 : gpio_out0) & ~(1 << (gpio & 0x1f)); - if (value) - tmp |= 1 << (gpio & 0x1f); - writel(tmp, gpio >> 5 ? gpio_out1 : gpio_out0); -} - -static int ar7_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_dir = gpch->regs + AR7_GPIO_DIR; - - writel(readl(gpio_dir) | (1 << gpio), gpio_dir); - - return 0; -} - -static int titan_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_dir0 = gpch->regs + TITAN_GPIO_DIR_0; - void __iomem *gpio_dir1 = gpch->regs + TITAN_GPIO_DIR_1; - - if (gpio >= TITAN_GPIO_MAX) - return -EINVAL; - - writel(readl(gpio >> 5 ? gpio_dir1 : gpio_dir0) | (1 << (gpio & 0x1f)), - gpio >> 5 ? gpio_dir1 : gpio_dir0); - return 0; -} - -static int ar7_gpio_direction_output(struct gpio_chip *chip, - unsigned gpio, int value) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_dir = gpch->regs + AR7_GPIO_DIR; - - ar7_gpio_set_value(chip, gpio, value); - writel(readl(gpio_dir) & ~(1 << gpio), gpio_dir); - - return 0; -} - -static int titan_gpio_direction_output(struct gpio_chip *chip, - unsigned gpio, int value) -{ - struct ar7_gpio_chip *gpch = gpiochip_get_data(chip); - void __iomem *gpio_dir0 = gpch->regs + TITAN_GPIO_DIR_0; - void __iomem *gpio_dir1 = gpch->regs + TITAN_GPIO_DIR_1; - - if (gpio >= TITAN_GPIO_MAX) - return -EINVAL; - - titan_gpio_set_value(chip, gpio, value); - writel(readl(gpio >> 5 ? gpio_dir1 : gpio_dir0) & ~(1 << - (gpio & 0x1f)), gpio >> 5 ? gpio_dir1 : gpio_dir0); - - return 0; -} - -static struct ar7_gpio_chip ar7_gpio_chip = { - .chip = { - .label = "ar7-gpio", - .direction_input = ar7_gpio_direction_input, - .direction_output = ar7_gpio_direction_output, - .set = ar7_gpio_set_value, - .get = ar7_gpio_get_value, - .base = 0, - .ngpio = AR7_GPIO_MAX, - } -}; - -static struct ar7_gpio_chip titan_gpio_chip = { - .chip = { - .label = "titan-gpio", - .direction_input = titan_gpio_direction_input, - .direction_output = titan_gpio_direction_output, - .set = titan_gpio_set_value, - .get = titan_gpio_get_value, - .base = 0, - .ngpio = TITAN_GPIO_MAX, - } -}; - -static inline int ar7_gpio_enable_ar7(unsigned gpio) -{ - void __iomem *gpio_en = ar7_gpio_chip.regs + AR7_GPIO_ENABLE; - - writel(readl(gpio_en) | (1 << gpio), gpio_en); - - return 0; -} - -static inline int ar7_gpio_enable_titan(unsigned gpio) -{ - void __iomem *gpio_en0 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_0; - void __iomem *gpio_en1 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_1; - - writel(readl(gpio >> 5 ? gpio_en1 : gpio_en0) | (1 << (gpio & 0x1f)), - gpio >> 5 ? gpio_en1 : gpio_en0); - - return 0; -} - -int ar7_gpio_enable(unsigned gpio) -{ - return ar7_is_titan() ? ar7_gpio_enable_titan(gpio) : - ar7_gpio_enable_ar7(gpio); -} -EXPORT_SYMBOL(ar7_gpio_enable); - -static inline int ar7_gpio_disable_ar7(unsigned gpio) -{ - void __iomem *gpio_en = ar7_gpio_chip.regs + AR7_GPIO_ENABLE; - - writel(readl(gpio_en) & ~(1 << gpio), gpio_en); - - return 0; -} - -static inline int ar7_gpio_disable_titan(unsigned gpio) -{ - void __iomem *gpio_en0 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_0; - void __iomem *gpio_en1 = titan_gpio_chip.regs + TITAN_GPIO_ENBL_1; - - writel(readl(gpio >> 5 ? gpio_en1 : gpio_en0) & ~(1 << (gpio & 0x1f)), - gpio >> 5 ? gpio_en1 : gpio_en0); - - return 0; -} - -int ar7_gpio_disable(unsigned gpio) -{ - return ar7_is_titan() ? ar7_gpio_disable_titan(gpio) : - ar7_gpio_disable_ar7(gpio); -} -EXPORT_SYMBOL(ar7_gpio_disable); - -struct titan_gpio_cfg { - u32 reg; - u32 shift; - u32 func; -}; - -static const struct titan_gpio_cfg titan_gpio_table[] = { - /* reg, start bit, mux value */ - {4, 24, 1}, - {4, 26, 1}, - {4, 28, 1}, - {4, 30, 1}, - {5, 6, 1}, - {5, 8, 1}, - {5, 10, 1}, - {5, 12, 1}, - {7, 14, 3}, - {7, 16, 3}, - {7, 18, 3}, - {7, 20, 3}, - {7, 22, 3}, - {7, 26, 3}, - {7, 28, 3}, - {7, 30, 3}, - {8, 0, 3}, - {8, 2, 3}, - {8, 4, 3}, - {8, 10, 3}, - {8, 14, 3}, - {8, 16, 3}, - {8, 18, 3}, - {8, 20, 3}, - {9, 8, 3}, - {9, 10, 3}, - {9, 12, 3}, - {9, 14, 3}, - {9, 18, 3}, - {9, 20, 3}, - {9, 24, 3}, - {9, 26, 3}, - {9, 28, 3}, - {9, 30, 3}, - {10, 0, 3}, - {10, 2, 3}, - {10, 8, 3}, - {10, 10, 3}, - {10, 12, 3}, - {10, 14, 3}, - {13, 12, 3}, - {13, 14, 3}, - {13, 16, 3}, - {13, 18, 3}, - {13, 24, 3}, - {13, 26, 3}, - {13, 28, 3}, - {13, 30, 3}, - {14, 2, 3}, - {14, 6, 3}, - {14, 8, 3}, - {14, 12, 3} -}; - -static int titan_gpio_pinsel(unsigned gpio) -{ - struct titan_gpio_cfg gpio_cfg; - u32 mux_status, pin_sel_reg, tmp; - void __iomem *pin_sel = (void __iomem *)KSEG1ADDR(AR7_REGS_PINSEL); - - if (gpio >= ARRAY_SIZE(titan_gpio_table)) - return -EINVAL; - - gpio_cfg = titan_gpio_table[gpio]; - pin_sel_reg = gpio_cfg.reg - 1; - - mux_status = (readl(pin_sel + pin_sel_reg) >> gpio_cfg.shift) & 0x3; - - /* Check the mux status */ - if (!((mux_status == 0) || (mux_status == gpio_cfg.func))) - return 0; - - /* Set the pin sel value */ - tmp = readl(pin_sel + pin_sel_reg); - tmp |= ((gpio_cfg.func & 0x3) << gpio_cfg.shift); - writel(tmp, pin_sel + pin_sel_reg); - - return 0; -} - -/* Perform minimal Titan GPIO configuration */ -static void titan_gpio_init(void) -{ - unsigned i; - - for (i = 44; i < 48; i++) { - titan_gpio_pinsel(i); - ar7_gpio_enable_titan(i); - titan_gpio_direction_input(&titan_gpio_chip.chip, i); - } -} - -int __init ar7_gpio_init(void) -{ - int ret; - struct ar7_gpio_chip *gpch; - unsigned size; - - if (!ar7_is_titan()) { - gpch = &ar7_gpio_chip; - size = 0x10; - } else { - gpch = &titan_gpio_chip; - size = 0x1f; - } - - gpch->regs = ioremap(AR7_REGS_GPIO, size); - if (!gpch->regs) { - printk(KERN_ERR "%s: failed to ioremap regs\n", - gpch->chip.label); - return -ENOMEM; - } - - ret = gpiochip_add_data(&gpch->chip, gpch); - if (ret) { - printk(KERN_ERR "%s: failed to add gpiochip\n", - gpch->chip.label); - iounmap(gpch->regs); - return ret; - } - printk(KERN_INFO "%s: registered %d GPIOs\n", - gpch->chip.label, gpch->chip.ngpio); - - if (ar7_is_titan()) - titan_gpio_init(); - - return ret; -} diff --git a/arch/mips/ar7/irq.c b/arch/mips/ar7/irq.c deleted file mode 100644 index f0a7942d393e..000000000000 --- a/arch/mips/ar7/irq.c +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2006,2007 Felix Fietkau - * Copyright (C) 2006,2007 Eugene Konev - */ - -#include -#include -#include - -#include -#include -#include - -#define EXCEPT_OFFSET 0x80 -#define PACE_OFFSET 0xA0 -#define CHNLS_OFFSET 0x200 - -#define REG_OFFSET(irq, reg) ((irq) / 32 * 0x4 + reg * 0x10) -#define SEC_REG_OFFSET(reg) (EXCEPT_OFFSET + reg * 0x8) -#define SEC_SR_OFFSET (SEC_REG_OFFSET(0)) /* 0x80 */ -#define CR_OFFSET(irq) (REG_OFFSET(irq, 1)) /* 0x10 */ -#define SEC_CR_OFFSET (SEC_REG_OFFSET(1)) /* 0x88 */ -#define ESR_OFFSET(irq) (REG_OFFSET(irq, 2)) /* 0x20 */ -#define SEC_ESR_OFFSET (SEC_REG_OFFSET(2)) /* 0x90 */ -#define ECR_OFFSET(irq) (REG_OFFSET(irq, 3)) /* 0x30 */ -#define SEC_ECR_OFFSET (SEC_REG_OFFSET(3)) /* 0x98 */ -#define PIR_OFFSET (0x40) -#define MSR_OFFSET (0x44) -#define PM_OFFSET(irq) (REG_OFFSET(irq, 5)) /* 0x50 */ -#define TM_OFFSET(irq) (REG_OFFSET(irq, 6)) /* 0x60 */ - -#define REG(addr) ((u32 *)(KSEG1ADDR(AR7_REGS_IRQ) + addr)) - -#define CHNL_OFFSET(chnl) (CHNLS_OFFSET + (chnl * 4)) - -static int ar7_irq_base; - -static void ar7_unmask_irq(struct irq_data *d) -{ - writel(1 << ((d->irq - ar7_irq_base) % 32), - REG(ESR_OFFSET(d->irq - ar7_irq_base))); -} - -static void ar7_mask_irq(struct irq_data *d) -{ - writel(1 << ((d->irq - ar7_irq_base) % 32), - REG(ECR_OFFSET(d->irq - ar7_irq_base))); -} - -static void ar7_ack_irq(struct irq_data *d) -{ - writel(1 << ((d->irq - ar7_irq_base) % 32), - REG(CR_OFFSET(d->irq - ar7_irq_base))); -} - -static void ar7_unmask_sec_irq(struct irq_data *d) -{ - writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_ESR_OFFSET)); -} - -static void ar7_mask_sec_irq(struct irq_data *d) -{ - writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_ECR_OFFSET)); -} - -static void ar7_ack_sec_irq(struct irq_data *d) -{ - writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_CR_OFFSET)); -} - -static struct irq_chip ar7_irq_type = { - .name = "AR7", - .irq_unmask = ar7_unmask_irq, - .irq_mask = ar7_mask_irq, - .irq_ack = ar7_ack_irq -}; - -static struct irq_chip ar7_sec_irq_type = { - .name = "AR7", - .irq_unmask = ar7_unmask_sec_irq, - .irq_mask = ar7_mask_sec_irq, - .irq_ack = ar7_ack_sec_irq, -}; - -static void __init ar7_irq_init(int base) -{ - int i; - /* - * Disable interrupts and clear pending - */ - writel(0xffffffff, REG(ECR_OFFSET(0))); - writel(0xff, REG(ECR_OFFSET(32))); - writel(0xffffffff, REG(SEC_ECR_OFFSET)); - writel(0xffffffff, REG(CR_OFFSET(0))); - writel(0xff, REG(CR_OFFSET(32))); - writel(0xffffffff, REG(SEC_CR_OFFSET)); - - ar7_irq_base = base; - - for (i = 0; i < 40; i++) { - writel(i, REG(CHNL_OFFSET(i))); - /* Primary IRQ's */ - irq_set_chip_and_handler(base + i, &ar7_irq_type, - handle_level_irq); - /* Secondary IRQ's */ - if (i < 32) - irq_set_chip_and_handler(base + i + 40, - &ar7_sec_irq_type, - handle_level_irq); - } - - if (request_irq(2, no_action, IRQF_NO_THREAD, "AR7 cascade interrupt", - NULL)) - pr_err("Failed to request irq 2 (AR7 cascade interrupt)\n"); - if (request_irq(ar7_irq_base, no_action, IRQF_NO_THREAD, - "AR7 cascade interrupt", NULL)) { - pr_err("Failed to request irq %d (AR7 cascade interrupt)\n", - ar7_irq_base); - } - set_c0_status(IE_IRQ0); -} - -void __init arch_init_irq(void) -{ - mips_cpu_irq_init(); - ar7_irq_init(8); -} - -static void ar7_cascade(void) -{ - u32 status; - int i, irq; - - /* Primary IRQ's */ - irq = readl(REG(PIR_OFFSET)) & 0x3f; - if (irq) { - do_IRQ(ar7_irq_base + irq); - return; - } - - /* Secondary IRQ's are cascaded through primary '0' */ - writel(1, REG(CR_OFFSET(irq))); - status = readl(REG(SEC_SR_OFFSET)); - for (i = 0; i < 32; i++) { - if (status & 1) { - do_IRQ(ar7_irq_base + i + 40); - return; - } - status >>= 1; - } - - spurious_interrupt(); -} - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; - if (pending & STATUSF_IP7) /* cpu timer */ - do_IRQ(7); - else if (pending & STATUSF_IP2) /* int0 hardware line */ - ar7_cascade(); - else - spurious_interrupt(); -} diff --git a/arch/mips/ar7/memory.c b/arch/mips/ar7/memory.c deleted file mode 100644 index ce8024c1a54e..000000000000 --- a/arch/mips/ar7/memory.c +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2007 Felix Fietkau - * Copyright (C) 2007 Eugene Konev - */ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -static int __init memsize(void) -{ - u32 size = (64 << 20); - u32 *addr = (u32 *)KSEG1ADDR(AR7_SDRAM_BASE + size - 4); - u32 *kernel_end = (u32 *)KSEG1ADDR(CPHYSADDR((u32)&_end)); - u32 *tmpaddr = addr; - - while (tmpaddr > kernel_end) { - *tmpaddr = (u32)tmpaddr; - size >>= 1; - tmpaddr -= size >> 2; - } - - do { - tmpaddr += size >> 2; - if (*tmpaddr != (u32)tmpaddr) - break; - size <<= 1; - } while (size < (64 << 20)); - - writel((u32)tmpaddr, &addr); - - return size; -} - -void __init prom_meminit(void) -{ - unsigned long pages; - - pages = memsize() >> PAGE_SHIFT; - memblock_add(PHYS_OFFSET, pages << PAGE_SHIFT); -} diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c deleted file mode 100644 index 215149a85d83..000000000000 --- a/arch/mips/ar7/platform.c +++ /dev/null @@ -1,722 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2006,2007 Felix Fietkau - * Copyright (C) 2006,2007 Eugene Konev - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/***************************************************************************** - * VLYNQ Bus - ****************************************************************************/ -struct plat_vlynq_data { - struct plat_vlynq_ops ops; - int gpio_bit; - int reset_bit; -}; - -static int vlynq_on(struct vlynq_device *dev) -{ - int ret; - struct plat_vlynq_data *pdata = dev->dev.platform_data; - - ret = gpio_request(pdata->gpio_bit, "vlynq"); - if (ret) - goto out; - - ar7_device_reset(pdata->reset_bit); - - ret = ar7_gpio_disable(pdata->gpio_bit); - if (ret) - goto out_enabled; - - ret = ar7_gpio_enable(pdata->gpio_bit); - if (ret) - goto out_enabled; - - ret = gpio_direction_output(pdata->gpio_bit, 0); - if (ret) - goto out_gpio_enabled; - - msleep(50); - - gpio_set_value(pdata->gpio_bit, 1); - - msleep(50); - - return 0; - -out_gpio_enabled: - ar7_gpio_disable(pdata->gpio_bit); -out_enabled: - ar7_device_disable(pdata->reset_bit); - gpio_free(pdata->gpio_bit); -out: - return ret; -} - -static void vlynq_off(struct vlynq_device *dev) -{ - struct plat_vlynq_data *pdata = dev->dev.platform_data; - - ar7_gpio_disable(pdata->gpio_bit); - gpio_free(pdata->gpio_bit); - ar7_device_disable(pdata->reset_bit); -} - -static struct resource vlynq_low_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_VLYNQ0, - .end = AR7_REGS_VLYNQ0 + 0xff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 29, - .end = 29, - }, - { - .name = "mem", - .flags = IORESOURCE_MEM, - .start = 0x04000000, - .end = 0x04ffffff, - }, - { - .name = "devirq", - .flags = IORESOURCE_IRQ, - .start = 80, - .end = 111, - }, -}; - -static struct resource vlynq_high_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_VLYNQ1, - .end = AR7_REGS_VLYNQ1 + 0xff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 33, - .end = 33, - }, - { - .name = "mem", - .flags = IORESOURCE_MEM, - .start = 0x0c000000, - .end = 0x0cffffff, - }, - { - .name = "devirq", - .flags = IORESOURCE_IRQ, - .start = 112, - .end = 143, - }, -}; - -static struct plat_vlynq_data vlynq_low_data = { - .ops = { - .on = vlynq_on, - .off = vlynq_off, - }, - .reset_bit = 20, - .gpio_bit = 18, -}; - -static struct plat_vlynq_data vlynq_high_data = { - .ops = { - .on = vlynq_on, - .off = vlynq_off, - }, - .reset_bit = 16, - .gpio_bit = 19, -}; - -static struct platform_device vlynq_low = { - .id = 0, - .name = "vlynq", - .dev = { - .platform_data = &vlynq_low_data, - }, - .resource = vlynq_low_res, - .num_resources = ARRAY_SIZE(vlynq_low_res), -}; - -static struct platform_device vlynq_high = { - .id = 1, - .name = "vlynq", - .dev = { - .platform_data = &vlynq_high_data, - }, - .resource = vlynq_high_res, - .num_resources = ARRAY_SIZE(vlynq_high_res), -}; - -/***************************************************************************** - * Flash - ****************************************************************************/ -static struct resource physmap_flash_resource = { - .name = "mem", - .flags = IORESOURCE_MEM, - .start = 0x10000000, - .end = 0x107fffff, -}; - -static const char *ar7_probe_types[] = { "ar7part", NULL }; - -static struct physmap_flash_data physmap_flash_data = { - .width = 2, - .part_probe_types = ar7_probe_types, -}; - -static struct platform_device physmap_flash = { - .name = "physmap-flash", - .dev = { - .platform_data = &physmap_flash_data, - }, - .resource = &physmap_flash_resource, - .num_resources = 1, -}; - -/***************************************************************************** - * Ethernet - ****************************************************************************/ -static struct resource cpmac_low_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_MAC0, - .end = AR7_REGS_MAC0 + 0x7ff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 27, - .end = 27, - }, -}; - -static struct resource cpmac_high_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_MAC1, - .end = AR7_REGS_MAC1 + 0x7ff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 41, - .end = 41, - }, -}; - -static struct fixed_phy_status fixed_phy_status __initdata = { - .link = 1, - .speed = 100, - .duplex = 1, -}; - -static struct plat_cpmac_data cpmac_low_data = { - .reset_bit = 17, - .power_bit = 20, - .phy_mask = 0x80000000, -}; - -static struct plat_cpmac_data cpmac_high_data = { - .reset_bit = 21, - .power_bit = 22, - .phy_mask = 0x7fffffff, -}; - -static u64 cpmac_dma_mask = DMA_BIT_MASK(32); - -static struct platform_device cpmac_low = { - .id = 0, - .name = "cpmac", - .dev = { - .dma_mask = &cpmac_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &cpmac_low_data, - }, - .resource = cpmac_low_res, - .num_resources = ARRAY_SIZE(cpmac_low_res), -}; - -static struct platform_device cpmac_high = { - .id = 1, - .name = "cpmac", - .dev = { - .dma_mask = &cpmac_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &cpmac_high_data, - }, - .resource = cpmac_high_res, - .num_resources = ARRAY_SIZE(cpmac_high_res), -}; - -static void __init cpmac_get_mac(int instance, unsigned char *dev_addr) -{ - char name[5], *mac; - - sprintf(name, "mac%c", 'a' + instance); - mac = prom_getenv(name); - if (!mac && instance) { - sprintf(name, "mac%c", 'a'); - mac = prom_getenv(name); - } - - if (mac) { - if (!mac_pton(mac, dev_addr)) { - pr_warn("cannot parse mac address, using random address\n"); - eth_random_addr(dev_addr); - } - } else - eth_random_addr(dev_addr); -} - -/***************************************************************************** - * USB - ****************************************************************************/ -static struct resource usb_res[] = { - { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = AR7_REGS_USB, - .end = AR7_REGS_USB + 0xff, - }, - { - .name = "irq", - .flags = IORESOURCE_IRQ, - .start = 32, - .end = 32, - }, - { - .name = "mem", - .flags = IORESOURCE_MEM, - .start = 0x03400000, - .end = 0x03401fff, - }, -}; - -static struct platform_device ar7_udc = { - .name = "ar7_udc", - .resource = usb_res, - .num_resources = ARRAY_SIZE(usb_res), -}; - -/***************************************************************************** - * LEDs - ****************************************************************************/ -static const struct gpio_led default_leds[] = { - { - .name = "status", - .gpio = 8, - .active_low = 1, - }, -}; - -static const struct gpio_led titan_leds[] = { - { .name = "status", .gpio = 8, .active_low = 1, }, - { .name = "wifi", .gpio = 13, .active_low = 1, }, -}; - -static const struct gpio_led dsl502t_leds[] = { - { - .name = "status", - .gpio = 9, - .active_low = 1, - }, - { - .name = "ethernet", - .gpio = 7, - .active_low = 1, - }, - { - .name = "usb", - .gpio = 12, - .active_low = 1, - }, -}; - -static const struct gpio_led dg834g_leds[] = { - { - .name = "ppp", - .gpio = 6, - .active_low = 1, - }, - { - .name = "status", - .gpio = 7, - .active_low = 1, - }, - { - .name = "adsl", - .gpio = 8, - .active_low = 1, - }, - { - .name = "wifi", - .gpio = 12, - .active_low = 1, - }, - { - .name = "power", - .gpio = 14, - .active_low = 1, - .default_trigger = "default-on", - }, -}; - -static const struct gpio_led fb_sl_leds[] = { - { - .name = "1", - .gpio = 7, - }, - { - .name = "2", - .gpio = 13, - .active_low = 1, - }, - { - .name = "3", - .gpio = 10, - .active_low = 1, - }, - { - .name = "4", - .gpio = 12, - .active_low = 1, - }, - { - .name = "5", - .gpio = 9, - .active_low = 1, - }, -}; - -static const struct gpio_led fb_fon_leds[] = { - { - .name = "1", - .gpio = 8, - }, - { - .name = "2", - .gpio = 3, - .active_low = 1, - }, - { - .name = "3", - .gpio = 5, - }, - { - .name = "4", - .gpio = 4, - .active_low = 1, - }, - { - .name = "5", - .gpio = 11, - .active_low = 1, - }, -}; - -static const struct gpio_led gt701_leds[] = { - { - .name = "inet:green", - .gpio = 13, - .active_low = 1, - }, - { - .name = "usb", - .gpio = 12, - .active_low = 1, - }, - { - .name = "inet:red", - .gpio = 9, - .active_low = 1, - }, - { - .name = "power:red", - .gpio = 7, - .active_low = 1, - }, - { - .name = "power:green", - .gpio = 8, - .active_low = 1, - .default_trigger = "default-on", - }, - { - .name = "ethernet", - .gpio = 10, - .active_low = 1, - }, -}; - -static struct gpio_led_platform_data ar7_led_data; - -static struct platform_device ar7_gpio_leds = { - .name = "leds-gpio", - .dev = { - .platform_data = &ar7_led_data, - } -}; - -static void __init detect_leds(void) -{ - char *prid, *usb_prod; - - /* Default LEDs */ - ar7_led_data.num_leds = ARRAY_SIZE(default_leds); - ar7_led_data.leds = default_leds; - - /* FIXME: the whole thing is unreliable */ - prid = prom_getenv("ProductID"); - usb_prod = prom_getenv("usb_prod"); - - /* If we can't get the product id from PROM, use the default LEDs */ - if (!prid) - return; - - if (strstr(prid, "Fritz_Box_FON")) { - ar7_led_data.num_leds = ARRAY_SIZE(fb_fon_leds); - ar7_led_data.leds = fb_fon_leds; - } else if (strstr(prid, "Fritz_Box_")) { - ar7_led_data.num_leds = ARRAY_SIZE(fb_sl_leds); - ar7_led_data.leds = fb_sl_leds; - } else if ((!strcmp(prid, "AR7RD") || !strcmp(prid, "AR7DB")) - && usb_prod != NULL && strstr(usb_prod, "DSL-502T")) { - ar7_led_data.num_leds = ARRAY_SIZE(dsl502t_leds); - ar7_led_data.leds = dsl502t_leds; - } else if (strstr(prid, "DG834")) { - ar7_led_data.num_leds = ARRAY_SIZE(dg834g_leds); - ar7_led_data.leds = dg834g_leds; - } else if (strstr(prid, "CYWM") || strstr(prid, "CYWL")) { - ar7_led_data.num_leds = ARRAY_SIZE(titan_leds); - ar7_led_data.leds = titan_leds; - } else if (strstr(prid, "GT701")) { - ar7_led_data.num_leds = ARRAY_SIZE(gt701_leds); - ar7_led_data.leds = gt701_leds; - } -} - -/***************************************************************************** - * Watchdog - ****************************************************************************/ -static struct resource ar7_wdt_res = { - .name = "regs", - .flags = IORESOURCE_MEM, - .start = -1, /* Filled at runtime */ - .end = -1, /* Filled at runtime */ -}; - -static struct platform_device ar7_wdt = { - .name = "ar7_wdt", - .resource = &ar7_wdt_res, - .num_resources = 1, -}; - -/***************************************************************************** - * Init - ****************************************************************************/ -static int __init ar7_register_uarts(void) -{ -#ifdef CONFIG_SERIAL_8250 - static struct uart_port uart_port __initdata; - struct clk *bus_clk; - int res; - - memset(&uart_port, 0, sizeof(struct uart_port)); - - bus_clk = clk_get(NULL, "bus"); - if (IS_ERR(bus_clk)) - panic("unable to get bus clk"); - - uart_port.type = PORT_AR7; - uart_port.uartclk = clk_get_rate(bus_clk) / 2; - uart_port.iotype = UPIO_MEM32; - uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF; - uart_port.regshift = 2; - - uart_port.line = 0; - uart_port.irq = AR7_IRQ_UART0; - uart_port.mapbase = AR7_REGS_UART0; - uart_port.membase = ioremap(uart_port.mapbase, 256); - - res = early_serial_setup(&uart_port); - if (res) - return res; - - /* Only TNETD73xx have a second serial port */ - if (ar7_has_second_uart()) { - uart_port.line = 1; - uart_port.irq = AR7_IRQ_UART1; - uart_port.mapbase = UR8_REGS_UART1; - uart_port.membase = ioremap(uart_port.mapbase, 256); - - res = early_serial_setup(&uart_port); - if (res) - return res; - } -#endif - - return 0; -} - -static void __init titan_fixup_devices(void) -{ - /* Set vlynq0 data */ - vlynq_low_data.reset_bit = 15; - vlynq_low_data.gpio_bit = 14; - - /* Set vlynq1 data */ - vlynq_high_data.reset_bit = 16; - vlynq_high_data.gpio_bit = 7; - - /* Set vlynq0 resources */ - vlynq_low_res[0].start = TITAN_REGS_VLYNQ0; - vlynq_low_res[0].end = TITAN_REGS_VLYNQ0 + 0xff; - vlynq_low_res[1].start = 33; - vlynq_low_res[1].end = 33; - vlynq_low_res[2].start = 0x0c000000; - vlynq_low_res[2].end = 0x0fffffff; - vlynq_low_res[3].start = 80; - vlynq_low_res[3].end = 111; - - /* Set vlynq1 resources */ - vlynq_high_res[0].start = TITAN_REGS_VLYNQ1; - vlynq_high_res[0].end = TITAN_REGS_VLYNQ1 + 0xff; - vlynq_high_res[1].start = 34; - vlynq_high_res[1].end = 34; - vlynq_high_res[2].start = 0x40000000; - vlynq_high_res[2].end = 0x43ffffff; - vlynq_high_res[3].start = 112; - vlynq_high_res[3].end = 143; - - /* Set cpmac0 data */ - cpmac_low_data.phy_mask = 0x40000000; - - /* Set cpmac1 data */ - cpmac_high_data.phy_mask = 0x80000000; - - /* Set cpmac0 resources */ - cpmac_low_res[0].start = TITAN_REGS_MAC0; - cpmac_low_res[0].end = TITAN_REGS_MAC0 + 0x7ff; - - /* Set cpmac1 resources */ - cpmac_high_res[0].start = TITAN_REGS_MAC1; - cpmac_high_res[0].end = TITAN_REGS_MAC1 + 0x7ff; -} - -static int __init ar7_register_devices(void) -{ - void __iomem *bootcr; - u32 val; - int res; - - res = ar7_gpio_init(); - if (res) - pr_warn("unable to register gpios: %d\n", res); - - res = ar7_register_uarts(); - if (res) - pr_err("unable to setup uart(s): %d\n", res); - - res = platform_device_register(&physmap_flash); - if (res) - pr_warn("unable to register physmap-flash: %d\n", res); - - if (ar7_is_titan()) - titan_fixup_devices(); - - ar7_device_disable(vlynq_low_data.reset_bit); - res = platform_device_register(&vlynq_low); - if (res) - pr_warn("unable to register vlynq-low: %d\n", res); - - if (ar7_has_high_vlynq()) { - ar7_device_disable(vlynq_high_data.reset_bit); - res = platform_device_register(&vlynq_high); - if (res) - pr_warn("unable to register vlynq-high: %d\n", res); - } - - if (ar7_has_high_cpmac()) { - res = fixed_phy_add(PHY_POLL, cpmac_high.id, - &fixed_phy_status); - if (!res) { - cpmac_get_mac(1, cpmac_high_data.dev_addr); - - res = platform_device_register(&cpmac_high); - if (res) - pr_warn("unable to register cpmac-high: %d\n", - res); - } else - pr_warn("unable to add cpmac-high phy: %d\n", res); - } else - cpmac_low_data.phy_mask = 0xffffffff; - - res = fixed_phy_add(PHY_POLL, cpmac_low.id, &fixed_phy_status); - if (!res) { - cpmac_get_mac(0, cpmac_low_data.dev_addr); - res = platform_device_register(&cpmac_low); - if (res) - pr_warn("unable to register cpmac-low: %d\n", res); - } else - pr_warn("unable to add cpmac-low phy: %d\n", res); - - detect_leds(); - res = platform_device_register(&ar7_gpio_leds); - if (res) - pr_warn("unable to register leds: %d\n", res); - - res = platform_device_register(&ar7_udc); - if (res) - pr_warn("unable to register usb slave: %d\n", res); - - /* Register watchdog only if enabled in hardware */ - bootcr = ioremap(AR7_REGS_DCL, 4); - val = readl(bootcr); - iounmap(bootcr); - if (val & AR7_WDT_HW_ENA) { - if (ar7_has_high_vlynq()) - ar7_wdt_res.start = UR8_REGS_WDT; - else - ar7_wdt_res.start = AR7_REGS_WDT; - - ar7_wdt_res.end = ar7_wdt_res.start + 0x20; - res = platform_device_register(&ar7_wdt); - if (res) - pr_warn("unable to register watchdog: %d\n", res); - } - - return 0; -} -device_initcall(ar7_register_devices); diff --git a/arch/mips/ar7/prom.c b/arch/mips/ar7/prom.c deleted file mode 100644 index 5810d3993fc6..000000000000 --- a/arch/mips/ar7/prom.c +++ /dev/null @@ -1,256 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * Putting things on the screen/serial line using YAMONs facilities. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define MAX_ENTRY 80 - -struct env_var { - char *name; - char *value; -}; - -static struct env_var adam2_env[MAX_ENTRY]; - -char *prom_getenv(const char *name) -{ - int i; - - for (i = 0; (i < MAX_ENTRY) && adam2_env[i].name; i++) - if (!strcmp(name, adam2_env[i].name)) - return adam2_env[i].value; - - return NULL; -} -EXPORT_SYMBOL(prom_getenv); - -static void __init ar7_init_cmdline(int argc, char *argv[]) -{ - int i; - - for (i = 1; i < argc; i++) { - strlcat(arcs_cmdline, argv[i], COMMAND_LINE_SIZE); - if (i < (argc - 1)) - strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE); - } -} - -struct psbl_rec { - u32 psbl_size; - u32 env_base; - u32 env_size; - u32 ffs_base; - u32 ffs_size; -}; - -static const char psp_env_version[] __initconst = "TIENV0.8"; - -struct psp_env_chunk { - u8 num; - u8 ctrl; - u16 csum; - u8 len; - char data[11]; -} __packed; - -struct psp_var_map_entry { - u8 num; - char *value; -}; - -static const struct psp_var_map_entry psp_var_map[] = { - { 1, "cpufrequency" }, - { 2, "memsize" }, - { 3, "flashsize" }, - { 4, "modetty0" }, - { 5, "modetty1" }, - { 8, "maca" }, - { 9, "macb" }, - { 28, "sysfrequency" }, - { 38, "mipsfrequency" }, -}; - -/* - -Well-known variable (num is looked up in table above for matching variable name) -Example: cpufrequency=211968000 -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- -| 01 |CTRL|CHECKSUM | 01 | _2 | _1 | _1 | _9 | _6 | _8 | _0 | _0 | _0 | \0 | FF -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- - -Name=Value pair in a single chunk -Example: NAME=VALUE -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- -| 00 |CTRL|CHECKSUM | 01 | _N | _A | _M | _E | _0 | _V | _A | _L | _U | _E | \0 -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- - -Name=Value pair in 2 chunks (len is the number of chunks) -Example: bootloaderVersion=1.3.7.15 -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- -| 00 |CTRL|CHECKSUM | 02 | _b | _o | _o | _t | _l | _o | _a | _d | _e | _r | _V -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- -| _e | _r | _s | _i | _o | _n | \0 | _1 | _. | _3 | _. | _7 | _. | _1 | _5 | \0 -+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+--- - -Data is padded with 0xFF - -*/ - -#define PSP_ENV_SIZE 4096 - -static char psp_env_data[PSP_ENV_SIZE] = { 0, }; - -static char * __init lookup_psp_var_map(u8 num) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(psp_var_map); i++) - if (psp_var_map[i].num == num) - return psp_var_map[i].value; - - return NULL; -} - -static void __init add_adam2_var(char *name, char *value) -{ - int i; - - for (i = 0; i < MAX_ENTRY; i++) { - if (!adam2_env[i].name) { - adam2_env[i].name = name; - adam2_env[i].value = value; - return; - } else if (!strcmp(adam2_env[i].name, name)) { - adam2_env[i].value = value; - return; - } - } -} - -static int __init parse_psp_env(void *psp_env_base) -{ - int i, n; - char *name, *value; - struct psp_env_chunk *chunks = (struct psp_env_chunk *)psp_env_data; - - memcpy_fromio(chunks, psp_env_base, PSP_ENV_SIZE); - - i = 1; - n = PSP_ENV_SIZE / sizeof(struct psp_env_chunk); - while (i < n) { - if ((chunks[i].num == 0xff) || ((i + chunks[i].len) > n)) - break; - value = chunks[i].data; - if (chunks[i].num) { - name = lookup_psp_var_map(chunks[i].num); - } else { - name = value; - value += strlen(name) + 1; - } - if (name) - add_adam2_var(name, value); - i += chunks[i].len; - } - return 0; -} - -static void __init ar7_init_env(struct env_var *env) -{ - int i; - struct psbl_rec *psbl = (struct psbl_rec *)(KSEG1ADDR(0x14000300)); - void *psp_env = (void *)KSEG1ADDR(psbl->env_base); - - if (strcmp(psp_env, psp_env_version) == 0) { - parse_psp_env(psp_env); - } else { - for (i = 0; i < MAX_ENTRY; i++, env++) - if (env->name) - add_adam2_var(env->name, env->value); - } -} - -static void __init console_config(void) -{ -#ifdef CONFIG_SERIAL_8250_CONSOLE - char console_string[40]; - int baud = 0; - char parity = '\0', bits = '\0', flow = '\0'; - char *s, *p; - - if (strstr(arcs_cmdline, "console=")) - return; - - s = prom_getenv("modetty0"); - if (s) { - baud = simple_strtoul(s, &p, 10); - s = p; - if (*s == ',') - s++; - if (*s) - parity = *s++; - if (*s == ',') - s++; - if (*s) - bits = *s++; - if (*s == ',') - s++; - if (*s == 'h') - flow = 'r'; - } - - if (baud == 0) - baud = 38400; - if (parity != 'n' && parity != 'o' && parity != 'e') - parity = 'n'; - if (bits != '7' && bits != '8') - bits = '8'; - - if (flow == 'r') - sprintf(console_string, " console=ttyS0,%d%c%c%c", baud, - parity, bits, flow); - else - sprintf(console_string, " console=ttyS0,%d%c%c", baud, parity, - bits); - strlcat(arcs_cmdline, console_string, COMMAND_LINE_SIZE); -#endif -} - -void __init prom_init(void) -{ - ar7_init_cmdline(fw_arg0, (char **)fw_arg1); - ar7_init_env((struct env_var *)fw_arg2); - console_config(); -} - -#define PORT(offset) (KSEG1ADDR(AR7_REGS_UART0 + (offset * 4))) -static inline unsigned int serial_in(int offset) -{ - return readl((void *)PORT(offset)); -} - -static inline void serial_out(int offset, int value) -{ - writel(value, (void *)PORT(offset)); -} - -void prom_putchar(char c) -{ - while ((serial_in(UART_LSR) & UART_LSR_TEMT) == 0) - ; - serial_out(UART_TX, c); -} diff --git a/arch/mips/ar7/setup.c b/arch/mips/ar7/setup.c deleted file mode 100644 index 352d5dbc777c..000000000000 --- a/arch/mips/ar7/setup.c +++ /dev/null @@ -1,93 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. - */ -#include -#include -#include -#include - -#include -#include -#include - -static void ar7_machine_restart(char *command) -{ - u32 *softres_reg = ioremap(AR7_REGS_RESET + AR7_RESET_SOFTWARE, 1); - - writel(1, softres_reg); -} - -static void ar7_machine_halt(void) -{ - while (1) - ; -} - -static void ar7_machine_power_off(void) -{ - u32 *power_reg = (u32 *)ioremap(AR7_REGS_POWER, 1); - u32 power_state = readl(power_reg) | (3 << 30); - - writel(power_state, power_reg); - ar7_machine_halt(); -} - -const char *get_system_type(void) -{ - u16 chip_id = ar7_chip_id(); - u16 titan_variant_id = titan_chip_id(); - - switch (chip_id) { - case AR7_CHIP_7100: - return "TI AR7 (TNETD7100)"; - case AR7_CHIP_7200: - return "TI AR7 (TNETD7200)"; - case AR7_CHIP_7300: - return "TI AR7 (TNETD7300)"; - case AR7_CHIP_TITAN: - switch (titan_variant_id) { - case TITAN_CHIP_1050: - return "TI AR7 (TNETV1050)"; - case TITAN_CHIP_1055: - return "TI AR7 (TNETV1055)"; - case TITAN_CHIP_1056: - return "TI AR7 (TNETV1056)"; - case TITAN_CHIP_1060: - return "TI AR7 (TNETV1060)"; - } - fallthrough; - default: - return "TI AR7 (unknown)"; - } -} - -static int __init ar7_init_console(void) -{ - return 0; -} -console_initcall(ar7_init_console); - -/* - * Initializes basic routines and structures pointers, memory size (as - * given by the bios and saves the command line. - */ -void __init plat_mem_setup(void) -{ - unsigned long io_base; - - _machine_restart = ar7_machine_restart; - _machine_halt = ar7_machine_halt; - pm_power_off = ar7_machine_power_off; - - io_base = (unsigned long)ioremap(AR7_REGS_BASE, 0x10000); - if (!io_base) - panic("Can't remap IO base!"); - set_io_port_base(io_base); - - prom_meminit(); - - printk(KERN_INFO "%s, ID: 0x%04x, Revision: 0x%02x\n", - get_system_type(), ar7_chip_id(), ar7_chip_rev()); -} diff --git a/arch/mips/ar7/time.c b/arch/mips/ar7/time.c deleted file mode 100644 index 72aa77d7087b..000000000000 --- a/arch/mips/ar7/time.c +++ /dev/null @@ -1,31 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * Setting up the clock on the MIPS boards. - */ - -#include -#include -#include -#include - -#include -#include - -void __init plat_time_init(void) -{ - struct clk *cpu_clk; - - /* Initialize ar7 clocks so the CPU clock frequency is correct */ - ar7_init_clocks(); - - cpu_clk = clk_get(NULL, "cpu"); - if (IS_ERR(cpu_clk)) { - printk(KERN_ERR "unable to get cpu clock\n"); - return; - } - - mips_hpt_frequency = clk_get_rate(cpu_clk) / 2; -} diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c index 96d28f211121..09dcd2c561d9 100644 --- a/arch/mips/boot/compressed/uart-16550.c +++ b/arch/mips/boot/compressed/uart-16550.c @@ -13,11 +13,6 @@ #define PORT(offset) (CKSEG1ADDR(UART_BASE) + (offset)) #endif -#ifdef CONFIG_AR7 -#include -#define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset)) -#endif - #ifdef CONFIG_MACH_INGENIC #define INGENIC_UART_BASE_ADDR (0x10030000 + 0x1000 * CONFIG_ZBOOT_INGENIC_UART) #define PORT(offset) (CKSEG1ADDR(INGENIC_UART_BASE_ADDR) + (4 * offset)) diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig deleted file mode 100644 index 329c60aa570a..000000000000 --- a/arch/mips/configs/ar7_defconfig +++ /dev/null @@ -1,119 +0,0 @@ -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_KERNEL_LZMA=y -CONFIG_SYSVIPC=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_RELAY=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -# CONFIG_ELF_CORE is not set -# CONFIG_KALLSYMS is not set -# CONFIG_VM_EVENT_COUNTERS is not set -# CONFIG_COMPAT_BRK is not set -CONFIG_AR7=y -CONFIG_HZ_100=y -CONFIG_KEXEC=y -# CONFIG_SECCOMP is not set -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_PARTITION_ADVANCED=y -CONFIG_BSD_DISKLABEL=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_MROUTE=y -CONFIG_SYN_COOKIES=y -# CONFIG_INET_DIAG is not set -CONFIG_TCP_CONG_ADVANCED=y -# CONFIG_TCP_CONG_BIC is not set -# CONFIG_TCP_CONG_CUBIC is not set -CONFIG_TCP_CONG_WESTWOOD=y -# CONFIG_TCP_CONG_HTCP is not set -# CONFIG_IPV6 is not set -CONFIG_NETFILTER=y -# CONFIG_BRIDGE_NETFILTER is not set -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_MARK=y -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_RAW=m -CONFIG_ATM=m -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_BRIDGE=y -CONFIG_VLAN_8021Q=y -CONFIG_NET_SCHED=y -CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=y -CONFIG_HAMRADIO=y -CONFIG_CFG80211=m -CONFIG_MAC80211=m -CONFIG_MTD=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_CFI_STAA=y -CONFIG_MTD_COMPLEX_MAPPINGS=y -CONFIG_MTD_PHYSMAP=y -CONFIG_NETDEVICES=y -CONFIG_CPMAC=y -CONFIG_FIXED_PHY=y -CONFIG_PPP=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_MULTILINK=y -CONFIG_PPPOATM=m -CONFIG_PPPOE=m -CONFIG_PPP_ASYNC=m -# CONFIG_INPUT is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -# CONFIG_LEGACY_PTYS is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_NR_UARTS=2 -CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -CONFIG_HW_RANDOM=y -CONFIG_GPIO_SYSFS=y -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_AR7_WDT=y -# CONFIG_USB_SUPPORT is not set -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y -CONFIG_LEDS_GPIO=y -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_LEDS_TRIGGER_DEFAULT_ON=y -# CONFIG_DNOTIFY is not set -CONFIG_PROC_KCORE=y -# CONFIG_PROC_PAGE_MONITOR is not set -CONFIG_TMPFS=y -CONFIG_JFFS2_FS=y -CONFIG_JFFS2_SUMMARY=y -CONFIG_JFFS2_COMPRESSION_OPTIONS=y -CONFIG_SQUASHFS=y -# CONFIG_CRYPTO_HW is not set -CONFIG_STRIP_ASM_SYMS=y -CONFIG_DEBUG_FS=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="rootfstype=squashfs,jffs2" diff --git a/arch/mips/include/asm/mach-ar7/ar7.h b/arch/mips/include/asm/mach-ar7/ar7.h deleted file mode 100644 index 1e8621a6afa3..000000000000 --- a/arch/mips/include/asm/mach-ar7/ar7.h +++ /dev/null @@ -1,191 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2006,2007 Felix Fietkau - * Copyright (C) 2006,2007 Eugene Konev - */ - -#ifndef __AR7_H__ -#define __AR7_H__ - -#include -#include -#include - -#include - -#define AR7_SDRAM_BASE 0x14000000 - -#define AR7_REGS_BASE 0x08610000 - -#define AR7_REGS_MAC0 (AR7_REGS_BASE + 0x0000) -#define AR7_REGS_GPIO (AR7_REGS_BASE + 0x0900) -/* 0x08610A00 - 0x08610BFF (512 bytes, 128 bytes / clock) */ -#define AR7_REGS_POWER (AR7_REGS_BASE + 0x0a00) -#define AR7_REGS_CLOCKS (AR7_REGS_POWER + 0x80) -#define UR8_REGS_CLOCKS (AR7_REGS_POWER + 0x20) -#define AR7_REGS_UART0 (AR7_REGS_BASE + 0x0e00) -#define AR7_REGS_USB (AR7_REGS_BASE + 0x1200) -#define AR7_REGS_RESET (AR7_REGS_BASE + 0x1600) -#define AR7_REGS_PINSEL (AR7_REGS_BASE + 0x160C) -#define AR7_REGS_VLYNQ0 (AR7_REGS_BASE + 0x1800) -#define AR7_REGS_DCL (AR7_REGS_BASE + 0x1a00) -#define AR7_REGS_VLYNQ1 (AR7_REGS_BASE + 0x1c00) -#define AR7_REGS_MDIO (AR7_REGS_BASE + 0x1e00) -#define AR7_REGS_IRQ (AR7_REGS_BASE + 0x2400) -#define AR7_REGS_MAC1 (AR7_REGS_BASE + 0x2800) - -#define AR7_REGS_WDT (AR7_REGS_BASE + 0x1f00) -#define UR8_REGS_WDT (AR7_REGS_BASE + 0x0b00) -#define UR8_REGS_UART1 (AR7_REGS_BASE + 0x0f00) - -/* Titan registers */ -#define TITAN_REGS_ESWITCH_BASE (0x08640000) -#define TITAN_REGS_MAC0 (TITAN_REGS_ESWITCH_BASE) -#define TITAN_REGS_MAC1 (TITAN_REGS_ESWITCH_BASE + 0x0800) -#define TITAN_REGS_MDIO (TITAN_REGS_ESWITCH_BASE + 0x02000) -#define TITAN_REGS_VLYNQ0 (AR7_REGS_BASE + 0x1c00) -#define TITAN_REGS_VLYNQ1 (AR7_REGS_BASE + 0x1300) - -#define AR7_RESET_PERIPHERAL 0x0 -#define AR7_RESET_SOFTWARE 0x4 -#define AR7_RESET_STATUS 0x8 - -#define AR7_RESET_BIT_CPMAC_LO 17 -#define AR7_RESET_BIT_CPMAC_HI 21 -#define AR7_RESET_BIT_MDIO 22 -#define AR7_RESET_BIT_EPHY 26 - -#define TITAN_RESET_BIT_EPHY1 28 - -/* GPIO control registers */ -#define AR7_GPIO_INPUT 0x0 -#define AR7_GPIO_OUTPUT 0x4 -#define AR7_GPIO_DIR 0x8 -#define AR7_GPIO_ENABLE 0xc -#define TITAN_GPIO_INPUT_0 0x0 -#define TITAN_GPIO_INPUT_1 0x4 -#define TITAN_GPIO_OUTPUT_0 0x8 -#define TITAN_GPIO_OUTPUT_1 0xc -#define TITAN_GPIO_DIR_0 0x10 -#define TITAN_GPIO_DIR_1 0x14 -#define TITAN_GPIO_ENBL_0 0x18 -#define TITAN_GPIO_ENBL_1 0x1c - -#define AR7_CHIP_7100 0x18 -#define AR7_CHIP_7200 0x2b -#define AR7_CHIP_7300 0x05 -#define AR7_CHIP_TITAN 0x07 -#define TITAN_CHIP_1050 0x0f -#define TITAN_CHIP_1055 0x0e -#define TITAN_CHIP_1056 0x0d -#define TITAN_CHIP_1060 0x07 - -/* Interrupts */ -#define AR7_IRQ_UART0 15 -#define AR7_IRQ_UART1 16 - -/* Clocks */ -#define AR7_AFE_CLOCK 35328000 -#define AR7_REF_CLOCK 25000000 -#define AR7_XTAL_CLOCK 24000000 - -/* DCL */ -#define AR7_WDT_HW_ENA 0x10 - -struct plat_cpmac_data { - int reset_bit; - int power_bit; - u32 phy_mask; - char dev_addr[6]; -}; - -struct plat_dsl_data { - int reset_bit_dsl; - int reset_bit_sar; -}; - -static inline int ar7_is_titan(void) -{ - return (readl((void *)KSEG1ADDR(AR7_REGS_GPIO + 0x24)) & 0xffff) == - AR7_CHIP_TITAN; -} - -static inline u16 ar7_chip_id(void) -{ - return ar7_is_titan() ? AR7_CHIP_TITAN : (readl((void *) - KSEG1ADDR(AR7_REGS_GPIO + 0x14)) & 0xffff); -} - -static inline u16 titan_chip_id(void) -{ - unsigned int val = readl((void *)KSEG1ADDR(AR7_REGS_GPIO + - TITAN_GPIO_INPUT_1)); - return ((val >> 12) & 0x0f); -} - -static inline u8 ar7_chip_rev(void) -{ - return (readl((void *)KSEG1ADDR(AR7_REGS_GPIO + (ar7_is_titan() ? 0x24 : - 0x14))) >> 16) & 0xff; -} - -static inline int ar7_has_high_cpmac(void) -{ - u16 chip_id = ar7_chip_id(); - switch (chip_id) { - case AR7_CHIP_7100: - case AR7_CHIP_7200: - return 0; - case AR7_CHIP_7300: - return 1; - default: - return -ENXIO; - } -} -#define ar7_has_high_vlynq ar7_has_high_cpmac -#define ar7_has_second_uart ar7_has_high_cpmac - -static inline void ar7_device_enable(u32 bit) -{ - void *reset_reg = - (void *)KSEG1ADDR(AR7_REGS_RESET + AR7_RESET_PERIPHERAL); - writel(readl(reset_reg) | (1 << bit), reset_reg); - msleep(20); -} - -static inline void ar7_device_disable(u32 bit) -{ - void *reset_reg = - (void *)KSEG1ADDR(AR7_REGS_RESET + AR7_RESET_PERIPHERAL); - writel(readl(reset_reg) & ~(1 << bit), reset_reg); - msleep(20); -} - -static inline void ar7_device_reset(u32 bit) -{ - ar7_device_disable(bit); - ar7_device_enable(bit); -} - -static inline void ar7_device_on(u32 bit) -{ - void *power_reg = (void *)KSEG1ADDR(AR7_REGS_POWER); - writel(readl(power_reg) | (1 << bit), power_reg); - msleep(20); -} - -static inline void ar7_device_off(u32 bit) -{ - void *power_reg = (void *)KSEG1ADDR(AR7_REGS_POWER); - writel(readl(power_reg) & ~(1 << bit), power_reg); - msleep(20); -} - -int __init ar7_gpio_init(void); -void __init ar7_init_clocks(void); - -/* Board specific GPIO functions */ -int ar7_gpio_enable(unsigned gpio); -int ar7_gpio_disable(unsigned gpio); - -#endif /* __AR7_H__ */ diff --git a/arch/mips/include/asm/mach-ar7/irq.h b/arch/mips/include/asm/mach-ar7/irq.h deleted file mode 100644 index 46bb730ea970..000000000000 --- a/arch/mips/include/asm/mach-ar7/irq.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Shamelessly copied from asm-mips/mach-emma2rh/ - * Copyright (C) 2003 by Ralf Baechle - */ -#ifndef __ASM_AR7_IRQ_H -#define __ASM_AR7_IRQ_H - -#define NR_IRQS 256 - -#include - -#endif /* __ASM_AR7_IRQ_H */ diff --git a/arch/mips/include/asm/mach-ar7/prom.h b/arch/mips/include/asm/mach-ar7/prom.h deleted file mode 100644 index 9e1d20b06f57..000000000000 --- a/arch/mips/include/asm/mach-ar7/prom.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2006, 2007 Florian Fainelli - */ - -#ifndef __PROM_H__ -#define __PROM_H__ - -extern char *prom_getenv(const char *name); -extern void prom_meminit(void); - -#endif /* __PROM_H__ */ diff --git a/arch/mips/include/asm/mach-ar7/spaces.h b/arch/mips/include/asm/mach-ar7/spaces.h deleted file mode 100644 index a004d94dfbdd..000000000000 --- a/arch/mips/include/asm/mach-ar7/spaces.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1994 - 1999, 2000, 03, 04 Ralf Baechle - * Copyright (C) 2000, 2002 Maciej W. Rozycki - * Copyright (C) 1990, 1999, 2000 Silicon Graphics, Inc. - */ -#ifndef _ASM_AR7_SPACES_H -#define _ASM_AR7_SPACES_H - -/* - * This handles the memory map. - * We handle pages at KSEG0 for kernels with 32 bit address space. - */ -#define PAGE_OFFSET _AC(0x94000000, UL) -#define PHYS_OFFSET _AC(0x14000000, UL) - -#include - -#endif /* __ASM_AR7_SPACES_H */ From cdcd6aef9db5797995d4153ea19fdf56d189f0e4 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 24 Oct 2023 12:56:40 +0200 Subject: [PATCH 015/415] drm/vc4: tests: Fix UAF in the mock helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VC4 mock helpers allocate the CRTC, encoders and connectors using a call to kunit_kzalloc(), but the DRM device they are attache to survives for longer than the test itself which leads to use-after-frees reported by KASAN. Switch to drmm_kzalloc to tie the lifetime of these objects to the main DRM device. Fixes: f759f5b53f1c ("drm/vc4: tests: Introduce a mocking infrastructure") Reported-by: Linux Kernel Functional Testing Closes: https://lore.kernel.org/all/CA+G9fYvJA2HGqzR9LGgq63v0SKaUejHAE6f7+z9cwWN-ourJ_g@mail.gmail.com/ Tested-by: Anders Roxell Reviewed-by: Maíra Canal Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20231024105640.352752-1-mripard@kernel.org --- drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c | 2 +- drivers/gpu/drm/vc4/tests/vc4_mock_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c b/drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c index 5d12d7beef0e..ade3309ae042 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c +++ b/drivers/gpu/drm/vc4/tests/vc4_mock_crtc.c @@ -26,7 +26,7 @@ struct vc4_dummy_crtc *vc4_mock_pv(struct kunit *test, struct vc4_crtc *vc4_crtc; int ret; - dummy_crtc = kunit_kzalloc(test, sizeof(*dummy_crtc), GFP_KERNEL); + dummy_crtc = drmm_kzalloc(drm, sizeof(*dummy_crtc), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, dummy_crtc); vc4_crtc = &dummy_crtc->crtc; diff --git a/drivers/gpu/drm/vc4/tests/vc4_mock_output.c b/drivers/gpu/drm/vc4/tests/vc4_mock_output.c index 6e11fcc9ef45..e70d7c3076ac 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_mock_output.c +++ b/drivers/gpu/drm/vc4/tests/vc4_mock_output.c @@ -32,7 +32,7 @@ struct vc4_dummy_output *vc4_dummy_output(struct kunit *test, struct drm_encoder *enc; int ret; - dummy_output = kunit_kzalloc(test, sizeof(*dummy_output), GFP_KERNEL); + dummy_output = drmm_kzalloc(drm, sizeof(*dummy_output), GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dummy_output); dummy_output->encoder.type = vc4_encoder_type; From 101c9f637efa1655f55876644d4439e552267527 Mon Sep 17 00:00:00 2001 From: Erik Kurzinger Date: Wed, 16 Aug 2023 09:26:05 -0700 Subject: [PATCH 016/415] drm/syncobj: fix DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE If DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT is invoked with the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE flag set but no fence has yet been submitted for the given timeline point the call will fail immediately with EINVAL. This does not match the intended behavior where the call should wait until the fence has been submitted (or the timeout expires). The following small example program illustrates the issue. It should wait for 5 seconds and then print ETIME, but instead it terminates right away after printing EINVAL. #include #include #include #include #include int main(void) { int fd = open("/dev/dri/card0", O_RDWR); uint32_t syncobj; drmSyncobjCreate(fd, 0, &syncobj); struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); uint64_t point = 1; if (drmSyncobjTimelineWait(fd, &syncobj, &point, 1, ts.tv_sec * 1000000000 + ts.tv_nsec + 5000000000, // 5s DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE, NULL)) { printf("drmSyncobjTimelineWait failed %d\n", errno); } } Fixes: 01d6c3578379 ("drm/syncobj: add support for timeline point wait v8") Signed-off-by: Erik Kurzinger Reviewed by: Simon Ser Signed-off-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/1fac96f1-2f3f-f9f9-4eb0-340f27a8f6c0@nvidia.com --- drivers/gpu/drm/drm_syncobj.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index f7003d1ec5ef..01da6789d044 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1069,7 +1069,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, fence = drm_syncobj_fence_get(syncobjs[i]); if (!fence || dma_fence_chain_find_seqno(&fence, points[i])) { dma_fence_put(fence); - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) { continue; } else { timeout = -EINVAL; From 390001d648ffa027b750b7dceb5d43f4c1d1a39e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Oct 2023 22:10:04 +0200 Subject: [PATCH 017/415] drm/i915/mtl: avoid stringop-overflow warning The newly added memset() causes a warning for some reason I could not figure out: In file included from arch/x86/include/asm/string.h:3, from drivers/gpu/drm/i915/gt/intel_rc6.c:6: In function 'rc6_res_reg_init', inlined from 'intel_rc6_init' at drivers/gpu/drm/i915/gt/intel_rc6.c:610:2: arch/x86/include/asm/string_32.h:195:29: error: '__builtin_memset' writing 16 bytes into a region of size 0 overflows the destination [-Werror=stringop-overflow=] 195 | #define memset(s, c, count) __builtin_memset(s, c, count) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gt/intel_rc6.c:584:9: note: in expansion of macro 'memset' 584 | memset(rc6->res_reg, INVALID_MMIO_REG.reg, sizeof(rc6->res_reg)); | ^~~~~~ In function 'intel_rc6_init': Change it to an normal initializer and an added memcpy() that does not have this problem. Fixes: 4bb9ca7ee074 ("drm/i915/mtl: C6 residency and C state type for MTL SAMedia") Signed-off-by: Arnd Bergmann Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231016201012.1022812-1-arnd@kernel.org (cherry picked from commit 0520b30b219053cd789909bca45b3c486ef3ee09) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_rc6.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 8b67abd720be..7090e4be29cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -581,19 +581,23 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) static void rc6_res_reg_init(struct intel_rc6 *rc6) { - memset(rc6->res_reg, INVALID_MMIO_REG.reg, sizeof(rc6->res_reg)); + i915_reg_t res_reg[INTEL_RC6_RES_MAX] = { + [0 ... INTEL_RC6_RES_MAX - 1] = INVALID_MMIO_REG, + }; switch (rc6_to_gt(rc6)->type) { case GT_MEDIA: - rc6->res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6; + res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6; break; default: - rc6->res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED; - rc6->res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6; - rc6->res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p; - rc6->res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp; + res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED; + res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6; + res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p; + res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp; break; } + + memcpy(rc6->res_reg, res_reg, sizeof(res_reg)); } void intel_rc6_init(struct intel_rc6 *rc6) From 7d7a328d0e8d6edefb7b0d665185d468667588d0 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 18 Oct 2023 11:38:15 +0200 Subject: [PATCH 018/415] drm/i915: Flush WC GGTT only on required platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gen8_ggtt_invalidate() is only needed for limited set of platforms where GGTT is mapped as WC. This was added as way to fix WC based GGTT in commit 0f9b91c754b7 ("drm/i915: flush system agent TLBs on SNB") and there are no reference in HW docs that forces us to use this on non-WC backed GGTT. This can also cause unwanted side-effects on XE_HP platforms where GFX_FLSH_CNTL_GEN6 is not valid anymore. v2: Add a func to detect wc ggtt detection (Ville) v3: Improve commit log and add reference commit (Daniel) Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement") Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Jonathan Cavitt Cc: John Harrison Cc: Andi Shyti Cc: Ville Syrjälä Cc: Daniel Vetter Cc: # v6.2+ Suggested-by: Matt Roper Signed-off-by: Nirmoy Das Reviewed-by: Matt Roper Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231018093815.1349-1-nirmoy.das@intel.com (cherry picked from commit 81de3e296b10a13e5c9f13172825b0d8d9495c68) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 35 +++++++++++++++++++--------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 1c93e84278a0..15fc8e4703f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -195,6 +195,21 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) spin_unlock_irq(&uncore->lock); } +static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915) +{ + /* + * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range + * will be dropped. For WC mappings in general we have 64 byte burst + * writes when the WC buffer is flushed, so we can't use it, but have to + * resort to an uncached mapping. The WC issue is easily caught by the + * readback check when writing GTT PTE entries. + */ + if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11) + return true; + + return false; +} + static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) { struct intel_uncore *uncore = ggtt->vm.gt->uncore; @@ -202,8 +217,12 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) /* * Note that as an uncached mmio write, this will flush the * WCB of the writes into the GGTT before it triggers the invalidate. + * + * Only perform this when GGTT is mapped as WC, see ggtt_probe_common(). */ - intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + if (needs_wc_ggtt_mapping(ggtt->vm.i915)) + intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, + GFX_FLSH_CNTL_EN); } static void guc_ggtt_ct_invalidate(struct intel_gt *gt) @@ -1140,17 +1159,11 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915); - /* - * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range - * will be dropped. For WC mappings in general we have 64 byte burst - * writes when the WC buffer is flushed, so we can't use it, but have to - * resort to an uncached mapping. The WC issue is easily caught by the - * readback check when writing GTT PTE entries. - */ - if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) - ggtt->gsm = ioremap(phys_addr, size); - else + if (needs_wc_ggtt_mapping(i915)) ggtt->gsm = ioremap_wc(phys_addr, size); + else + ggtt->gsm = ioremap(phys_addr, size); + if (!ggtt->gsm) { drm_err(&i915->drm, "Failed to map the ggtt page table\n"); return -ENOMEM; From ce4941c2d6459664761c9854701015d8e99414fb Mon Sep 17 00:00:00 2001 From: Chaitanya Kumar Borah Date: Wed, 18 Oct 2023 17:06:22 +0530 Subject: [PATCH 019/415] drm/i915/mtl: Support HBR3 rate with C10 phy and eDP in MTL eDP specification supports HBR3 link rate since v1.4a. Moreover, C10 phy can support HBR3 link rate for both DP and eDP. Therefore, do not clamp the supported rates for eDP at 6.75Gbps. Cc: BSpec: 70073 74224 Signed-off-by: Chaitanya Kumar Borah Reviewed-by: Mika Kahola Signed-off-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20231018113622.2761997-1-chaitanya.kumar.borah@intel.com (cherry picked from commit a3431650f30a94b179d419ef87c21213655c28cd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 1891c0cc187d..2c1034578984 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -430,7 +430,7 @@ static int mtl_max_source_rate(struct intel_dp *intel_dp) enum phy phy = intel_port_to_phy(i915, dig_port->base.port); if (intel_is_c10phy(i915, phy)) - return intel_dp_is_edp(intel_dp) ? 675000 : 810000; + return 810000; return 2000000; } From 471aa951bf1206d3c10d0daa67005b8e4db4ff83 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 27 Oct 2023 10:28:22 -0700 Subject: [PATCH 020/415] i915/perf: Fix NULL deref bugs with drm_dbg() calls When i915 perf interface is not available dereferencing it will lead to NULL dereferences. As returning -ENOTSUPP is pretty clear return when perf interface is not available. Fixes: 2fec539112e8 ("i915/perf: Replace DRM_DEBUG with driver specific drm_dbg call") Suggested-by: Tvrtko Ursulin Signed-off-by: Harshit Mogalapalli Reviewed-by: Tvrtko Ursulin Cc: # v6.0+ Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231027172822.2753059-1-harshit.m.mogalapalli@oracle.com [tursulin: added stable tag] (cherry picked from commit 36f27350ff745bd228ab04d7845dfbffc177a889) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 7178f298b3e6..f92c2a464ebe 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4227,11 +4227,8 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, u32 known_open_flags; int ret; - if (!perf->i915) { - drm_dbg(&perf->i915->drm, - "i915 perf interface not available for this system\n"); + if (!perf->i915) return -ENOTSUPP; - } known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | @@ -4607,11 +4604,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct i915_oa_reg *regs; int err, id; - if (!perf->i915) { - drm_dbg(&perf->i915->drm, - "i915 perf interface not available for this system\n"); + if (!perf->i915) return -ENOTSUPP; - } if (!perf->metrics_kobj) { drm_dbg(&perf->i915->drm, @@ -4773,11 +4767,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct i915_oa_config *oa_config; int ret; - if (!perf->i915) { - drm_dbg(&perf->i915->drm, - "i915 perf interface not available for this system\n"); + if (!perf->i915) return -ENOTSUPP; - } if (i915_perf_stream_paranoid && !perfmon_capable()) { drm_dbg(&perf->i915->drm, From fbb74e56378d8306f214658e3d525a8b3f000c5a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 30 Oct 2023 07:23:38 +0200 Subject: [PATCH 021/415] ASoC: ti: omap-mcbsp: Fix runtime PM underflow warnings We need to check for an active device as otherwise we get warnings for some mcbsp instances for "Runtime PM usage count underflow!". Reported-by: Andreas Kemnade Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20231030052340.13415-1-tony@atomide.com Signed-off-by: Mark Brown --- sound/soc/ti/omap-mcbsp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/ti/omap-mcbsp.c b/sound/soc/ti/omap-mcbsp.c index 7643a54592f5..2110ffe5281c 100644 --- a/sound/soc/ti/omap-mcbsp.c +++ b/sound/soc/ti/omap-mcbsp.c @@ -73,14 +73,16 @@ static int omap2_mcbsp_set_clks_src(struct omap_mcbsp *mcbsp, u8 fck_src_id) return 0; } - pm_runtime_put_sync(mcbsp->dev); + if (mcbsp->active) + pm_runtime_put_sync(mcbsp->dev); r = clk_set_parent(mcbsp->fclk, fck_src); if (r) dev_err(mcbsp->dev, "CLKS: could not clk_set_parent() to %s\n", src); - pm_runtime_get_sync(mcbsp->dev); + if (mcbsp->active) + pm_runtime_get_sync(mcbsp->dev); clk_put(fck_src); From 04f8c76de983a5c1e8913e442e791a9e8d0e0dfd Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 30 Oct 2023 09:23:38 -0500 Subject: [PATCH 022/415] ASoC: rockchip: Fix unused rockchip_i2s_tdm_match warning for !CONFIG_OF Commit 9958d85968ed ("ASoC: Use device_get_match_data()") dropped the unconditional use of rockchip_i2s_tdm_match resulting in this warning: sound/soc/rockchip/rockchip_i2s_tdm.c:1315:34: warning: 'rockchip_i2s_tdm_match' defined but not used [-Wunused-const-variable=] The fix is to drop of_match_ptr() which is not necessary because DT is always used for this driver. Fixes: 9958d85968ed ("ASoC: Use device_get_match_data()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310121802.CDAGVdF2-lkp@intel.com/ Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20231030142337.814907-2-robh@kernel.org Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_i2s_tdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index 5c51dbef6e86..860e66ec85e8 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -1757,7 +1757,7 @@ static struct platform_driver rockchip_i2s_tdm_driver = { .remove_new = rockchip_i2s_tdm_remove, .driver = { .name = DRV_NAME, - .of_match_table = of_match_ptr(rockchip_i2s_tdm_match), + .of_match_table = rockchip_i2s_tdm_match, .pm = &rockchip_i2s_tdm_pm_ops, }, }; From 1a3b7eab8500a6b923f7b62cc8aa4d832c7dfb3e Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Mon, 30 Oct 2023 18:36:44 +0800 Subject: [PATCH 023/415] ASoC: rt712-sdca: fix speaker route missing issue Sometimes the codec probe would be called earlier than the hardware initialization. Therefore, the speaker route should be added before the the first_hw_init check. Signed-off-by: Shuming Fan Fixes: f3da2ed110e2 ("ASoC: rt1712-sdca: enable pm_runtime in probe, keep status as 'suspended'")? Link: https://lore.kernel.org/r/20231030103644.1787948-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt712-sdca.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/rt712-sdca.c b/sound/soc/codecs/rt712-sdca.c index 7077ff6ba1f4..6954fbe7ec5f 100644 --- a/sound/soc/codecs/rt712-sdca.c +++ b/sound/soc/codecs/rt712-sdca.c @@ -963,13 +963,6 @@ static int rt712_sdca_probe(struct snd_soc_component *component) rt712_sdca_parse_dt(rt712, &rt712->slave->dev); rt712->component = component; - if (!rt712->first_hw_init) - return 0; - - ret = pm_runtime_resume(component->dev); - if (ret < 0 && ret != -EACCES) - return ret; - /* add SPK route */ if (rt712->hw_id != RT712_DEV_ID_713) { snd_soc_add_component_controls(component, @@ -980,6 +973,13 @@ static int rt712_sdca_probe(struct snd_soc_component *component) rt712_sdca_spk_dapm_routes, ARRAY_SIZE(rt712_sdca_spk_dapm_routes)); } + if (!rt712->first_hw_init) + return 0; + + ret = pm_runtime_resume(component->dev); + if (ret < 0 && ret != -EACCES) + return ret; + return 0; } From cba4590036855f4e3110d43c14385d2401080dbb Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 27 Oct 2023 09:54:25 -0700 Subject: [PATCH 024/415] ASoC: codecs: aw88399: Fix -Wuninitialized in aw_dev_set_vcalb() Clang warns (or errors with CONFIG_WERROR=y): sound/soc/codecs/aw88399.c:441:18: error: variable 'vsense_select' is uninitialized when used here [-Werror,-Wuninitialized] 441 | vsense_select = vsense_select & (~AW88399_VDSEL_MASK); | ^~~~~~~~~~~~~ sound/soc/codecs/aw88399.c:431:28: note: initialize the variable 'vsense_select' to silence this warning 431 | unsigned int vsense_select, vsense_value; | ^ | = 0 1 error generated. This clearly should have been using the value received from regmap_read(). Use the correct variable to resolve the warning. Closes: https://github.com/ClangBuiltLinux/linux/issues/1952 Fixes: 8ade6cc7e261 ("ASoC: codecs: Add aw88399 amplifier driver") Signed-off-by: Nathan Chancellor Reviewed-by: Weidong Wang Link: https://lore.kernel.org/r/20231027-asoc-aw88399-fix-wuninitialized-v1-1-b1044493e4cd@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/aw88399.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/aw88399.c b/sound/soc/codecs/aw88399.c index ce30bc7cdea9..47fe38d0def5 100644 --- a/sound/soc/codecs/aw88399.c +++ b/sound/soc/codecs/aw88399.c @@ -438,7 +438,7 @@ static int aw_dev_set_vcalb(struct aw88399 *aw88399) if (ret) return ret; - vsense_select = vsense_select & (~AW88399_VDSEL_MASK); + vsense_select = vsense_value & (~AW88399_VDSEL_MASK); ret = aw88399_dev_get_icalk(aw88399, &icalk); if (ret) { From ed2232d49187cebc007ecf4e6374069b11ab3219 Mon Sep 17 00:00:00 2001 From: Syed Saba Kareem Date: Tue, 31 Oct 2023 19:29:34 +0530 Subject: [PATCH 025/415] ASoC: amd: acp: fix for i2s mode register field update I2S mode register field will be set to 1 when tdm mode is enabled. Update the I2S mode field based on tdm_mode flag check. This will fix below smatch checker warning. sound/soc/amd/acp/acp-i2s.c:59 acp_set_i2s_clk() warn: odd binop '0x0 & 0x2' Fixes: 40f74d5f09d7 ("ASoC: amd: acp: refactor acp i2s clock generation code") Reported-By: Dan Carpenter Signed-off-by: Syed Saba Kareem Link: https://lore.kernel.org/r/20231031135949.1064581-3-Syed.SabaKareem@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-i2s.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c index 1185e5aac523..60cbc881be6e 100644 --- a/sound/soc/amd/acp/acp-i2s.c +++ b/sound/soc/amd/acp/acp-i2s.c @@ -26,7 +26,6 @@ #define DRV_NAME "acp_i2s_playcap" #define I2S_MASTER_MODE_ENABLE 1 -#define I2S_MODE_ENABLE 0 #define LRCLK_DIV_FIELD GENMASK(10, 2) #define BCLK_DIV_FIELD GENMASK(23, 11) #define ACP63_LRCLK_DIV_FIELD GENMASK(12, 2) @@ -56,7 +55,8 @@ static inline void acp_set_i2s_clk(struct acp_dev_data *adata, int dai_id) } val = I2S_MASTER_MODE_ENABLE; - val |= I2S_MODE_ENABLE & BIT(1); + if (adata->tdm_mode) + val |= BIT(1); switch (chip->acp_rev) { case ACP63_DEV: From c9e920ffa752b9047d65041cd70495409c768dd7 Mon Sep 17 00:00:00 2001 From: Weidong Wang Date: Wed, 1 Nov 2023 17:02:08 +0800 Subject: [PATCH 026/415] ASoC: codecs: Modify the maximum value of calib The maximum value that calib can set should be consistent with the maximum value of re. Signed-off-by: Weidong Wang Link: https://lore.kernel.org/r/20231101090211.177125-2-wangweidong.a@awinic.com Signed-off-by: Mark Brown --- sound/soc/codecs/aw88395/aw88395.c | 2 +- sound/soc/codecs/aw88399.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/aw88395/aw88395.c b/sound/soc/codecs/aw88395/aw88395.c index 77227c8f01f6..3c459a67ad0c 100644 --- a/sound/soc/codecs/aw88395/aw88395.c +++ b/sound/soc/codecs/aw88395/aw88395.c @@ -356,7 +356,7 @@ static const struct snd_kcontrol_new aw88395_controls[] = { aw88395_get_fade_in_time, aw88395_set_fade_in_time), SOC_SINGLE_EXT("Volume Ramp Down Step", 0, 0, FADE_TIME_MAX, FADE_TIME_MIN, aw88395_get_fade_out_time, aw88395_set_fade_out_time), - SOC_SINGLE_EXT("Calib", 0, 0, 100, 0, + SOC_SINGLE_EXT("Calib", 0, 0, AW88395_CALI_RE_MAX, 0, aw88395_re_get, aw88395_re_set), AW88395_PROFILE_EXT("Profile Set", aw88395_profile_info, aw88395_profile_get, aw88395_profile_set), diff --git a/sound/soc/codecs/aw88399.c b/sound/soc/codecs/aw88399.c index 47fe38d0def5..1d6228cb1cc2 100644 --- a/sound/soc/codecs/aw88399.c +++ b/sound/soc/codecs/aw88399.c @@ -1710,7 +1710,7 @@ static const struct snd_kcontrol_new aw88399_controls[] = { aw88399_get_fade_in_time, aw88399_set_fade_in_time), SOC_SINGLE_EXT("Volume Ramp Down Step", 0, 0, FADE_TIME_MAX, FADE_TIME_MIN, aw88399_get_fade_out_time, aw88399_set_fade_out_time), - SOC_SINGLE_EXT("Calib", 0, 0, 100, 0, + SOC_SINGLE_EXT("Calib", 0, 0, AW88399_CALI_RE_MAX, 0, aw88399_re_get, aw88399_re_set), AW88399_PROFILE_EXT("AW88399 Profile Set", aw88399_profile_info, aw88399_profile_get, aw88399_profile_set), From baf46c3c763809fbeabcff5ec6e2ff3081f755f2 Mon Sep 17 00:00:00 2001 From: Weidong Wang Date: Wed, 1 Nov 2023 17:02:09 +0800 Subject: [PATCH 027/415] ASoC: codecs: Modify the wrong judgment of re value An error code should be return when the re is greater than the maximum value or less than the minimum value Signed-off-by: Weidong Wang Link: https://lore.kernel.org/r/20231101090211.177125-3-wangweidong.a@awinic.com Signed-off-by: Mark Brown --- sound/soc/codecs/aw88399.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/aw88399.c b/sound/soc/codecs/aw88399.c index 1d6228cb1cc2..54f8457e8497 100644 --- a/sound/soc/codecs/aw88399.c +++ b/sound/soc/codecs/aw88399.c @@ -486,8 +486,8 @@ static int aw_dev_update_cali_re(struct aw_cali_desc *cali_desc) u32 cali_re; int ret; - if ((aw_dev->cali_desc.cali_re <= AW88399_CALI_RE_MAX) || - (aw_dev->cali_desc.cali_re >= AW88399_CALI_RE_MIN)) + if ((aw_dev->cali_desc.cali_re >= AW88399_CALI_RE_MAX) || + (aw_dev->cali_desc.cali_re <= AW88399_CALI_RE_MIN)) return -EINVAL; cali_re = AW88399_SHOW_RE_TO_DSP_RE((aw_dev->cali_desc.cali_re + From b729598c1747576bb9a4c997190be3f7c2915726 Mon Sep 17 00:00:00 2001 From: Weidong Wang Date: Wed, 1 Nov 2023 17:02:10 +0800 Subject: [PATCH 028/415] ASoC: codecs: Modify macro value error The value of vsense_select should be either 32 or 0 in both cases, so modify the AW88399_DEV_VDSEL_VSENSE macro to 32. Signed-off-by: Weidong Wang Link: https://lore.kernel.org/r/20231101090211.177125-4-wangweidong.a@awinic.com Signed-off-by: Mark Brown --- sound/soc/codecs/aw88399.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/aw88399.h b/sound/soc/codecs/aw88399.h index 8b3f1e101985..4f391099d0f2 100644 --- a/sound/soc/codecs/aw88399.h +++ b/sound/soc/codecs/aw88399.h @@ -522,7 +522,7 @@ enum { enum { AW88399_DEV_VDSEL_DAC = 0, - AW88399_DEV_VDSEL_VSENSE = 1, + AW88399_DEV_VDSEL_VSENSE = 32, }; enum { From ab5201e20c181563774631258f737caeefed2364 Mon Sep 17 00:00:00 2001 From: David Rau Date: Wed, 1 Nov 2023 10:25:07 +0800 Subject: [PATCH 029/415] ASoC: da7219: Improve system suspend and resume handling When DA7219 is suspended, prevent the AAD IRQ handler is unexpectedly executed and cause the I2C driver "Transfer while suspended" failure. Signed-off-by: David Rau Link: https://lore.kernel.org/r/20231101022507.6226-1-David.Rau.opensource@dm.renesas.com Signed-off-by: Mark Brown --- sound/soc/codecs/da7219-aad.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c index 3bbe85091649..4c4405942779 100644 --- a/sound/soc/codecs/da7219-aad.c +++ b/sound/soc/codecs/da7219-aad.c @@ -927,10 +927,15 @@ void da7219_aad_suspend(struct snd_soc_component *component) struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component); u8 micbias_ctrl; + disable_irq(da7219_aad->irq); + if (da7219_aad->jack) { /* Disable jack detection during suspend */ snd_soc_component_update_bits(component, DA7219_ACCDET_CONFIG_1, DA7219_ACCDET_EN_MASK, 0); + cancel_delayed_work_sync(&da7219_aad->jack_det_work); + /* Disable ground switch */ + snd_soc_component_update_bits(component, 0xFB, 0x01, 0x00); /* * If we have a 4-pole jack inserted, then micbias will be @@ -947,8 +952,6 @@ void da7219_aad_suspend(struct snd_soc_component *component) } } } - - synchronize_irq(da7219_aad->irq); } void da7219_aad_resume(struct snd_soc_component *component) @@ -971,6 +974,8 @@ void da7219_aad_resume(struct snd_soc_component *component) DA7219_ACCDET_EN_MASK, DA7219_ACCDET_EN_MASK); } + + enable_irq(da7219_aad->irq); } From 3503895788d402d6a3814085ed582c364ec3e903 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 31 Oct 2023 12:02:06 -0400 Subject: [PATCH 030/415] virtio_pci: move structure to a header These are guest/host interfaces, so they belong in the header where e.g. qemu will know to find them. Note: we added a new structure as opposed to extending existing one because someone might be relying on the size of the existing structure staying unchanged. Add a warning to avoid using sizeof. Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- drivers/virtio/virtio_pci_modern_dev.c | 7 ++++--- include/linux/virtio_pci_modern.h | 7 ------- include/uapi/linux/virtio_pci.h | 11 +++++++++++ 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index e2a1fe7bb66c..7de8b1ebabac 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -294,9 +294,10 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) err = -EINVAL; mdev->common = vp_modern_map_capability(mdev, common, - sizeof(struct virtio_pci_common_cfg), 4, - 0, sizeof(struct virtio_pci_modern_common_cfg), - &mdev->common_len, NULL); + sizeof(struct virtio_pci_common_cfg), 4, 0, + offsetofend(struct virtio_pci_modern_common_cfg, + queue_reset), + &mdev->common_len, NULL); if (!mdev->common) goto err_map_common; mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1, diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index d0f2797420f7..a09e13a577a9 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -5,13 +5,6 @@ #include #include -struct virtio_pci_modern_common_cfg { - struct virtio_pci_common_cfg cfg; - - __le16 queue_notify_data; /* read-write */ - __le16 queue_reset; /* read-write */ -}; - /** * struct virtio_pci_modern_device - info for modern PCI virtio * @pci_dev: Ptr to the PCI device struct diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index f703afc7ad31..44f4dd2add18 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -166,6 +166,17 @@ struct virtio_pci_common_cfg { __le32 queue_used_hi; /* read-write */ }; +/* + * Warning: do not use sizeof on this: use offsetofend for + * specific fields you need. + */ +struct virtio_pci_modern_common_cfg { + struct virtio_pci_common_cfg cfg; + + __le16 queue_notify_data; /* read-write */ + __le16 queue_reset; /* read-write */ +}; + /* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ struct virtio_pci_cfg_cap { struct virtio_pci_cap cap; From 0d82410252ea324f0064e75b9865bb74cccc1dda Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 31 Oct 2023 15:43:39 +0100 Subject: [PATCH 031/415] vdpa_sim_blk: allocate the buffer zeroed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deleting and recreating a device can lead to having the same content as the old device, so let's always allocate buffers completely zeroed out. Fixes: abebb16254b3 ("vdpa_sim_blk: support shared backend") Suggested-by: Qing Wang Signed-off-by: Stefano Garzarella Message-Id: <20231031144339.121453-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Eugenio Pérez Acked-by: Jason Wang --- drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c index b3a3cb165795..b137f3679343 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -437,7 +437,7 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, if (blk->shared_backend) { blk->buffer = shared_buffer; } else { - blk->buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, + blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, GFP_KERNEL); if (!blk->buffer) { ret = -ENOMEM; @@ -495,7 +495,7 @@ static int __init vdpasim_blk_init(void) goto parent_err; if (shared_backend) { - shared_buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, + shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, GFP_KERNEL); if (!shared_buffer) { ret = -ENOMEM; From f2de37a572853d340f945a7748f74e3ed8c6b743 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 12 Oct 2023 12:28:52 +0200 Subject: [PATCH 032/415] riscv, qemu_fw_cfg: Add support for RISC-V architecture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Qemu fw_cfg support was missing for RISC-V, which made it hard to do proper vmcore dumps from qemu. Add the missing RISC-V arch-defines. You can now do vmcore dumps from qemu. Add "-device vmcoreinfo" to the qemu command-line. From the qemu monitor: (qemu) dump-guest-memory vmcore The vmcore can now be used, e.g., with the "crash" utility. Acked-by: "Michael S. Tsirkin" Acked-by: Alistair Francis Tested-by: Clément Léger Signed-off-by: Björn Töpel Message-Id: <20231012102852.234442-1-bjorn@kernel.org> Signed-off-by: Michael S. Tsirkin --- drivers/firmware/Kconfig | 2 +- drivers/firmware/qemu_fw_cfg.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index b59e3041fd62..f05ff56629b3 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -155,7 +155,7 @@ config RASPBERRYPI_FIRMWARE config FW_CFG_SYSFS tristate "QEMU fw_cfg device support in sysfs" - depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || SPARC || X86) + depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || RISCV || SPARC || X86) depends on HAS_IOPORT_MAP default n help diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index a69399a6b7c0..1448f61173b3 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -211,7 +211,7 @@ static void fw_cfg_io_cleanup(void) /* arch-specific ctrl & data register offsets are not available in ACPI, DT */ #if !(defined(FW_CFG_CTRL_OFF) && defined(FW_CFG_DATA_OFF)) -# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64)) +# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV)) # define FW_CFG_CTRL_OFF 0x08 # define FW_CFG_DATA_OFF 0x00 # define FW_CFG_DMA_OFF 0x10 From b2c8b644fac1087dfe69a1762c04df090178a5ae Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 25 Oct 2023 16:53:19 +0200 Subject: [PATCH 033/415] virtio_pci: Switch away from deprecated irq_set_affinity_hint Since commit 65c7cdedeb30 ("genirq: Provide new interfaces for affinity hints") irq_set_affinity_hint is being phased out. Switch to new interfaces for setting and applying irq affinity hints. Signed-off-by: Jakub Sitnicki Message-Id: <20231025145319.380775-1-jakub@cloudflare.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- drivers/virtio/virtio_pci_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index c2524a7207cf..7a5593997e0e 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -242,7 +242,7 @@ void vp_del_vqs(struct virtio_device *vdev) if (v != VIRTIO_MSI_NO_VECTOR) { int irq = pci_irq_vector(vp_dev->pci_dev, v); - irq_set_affinity_hint(irq, NULL); + irq_update_affinity_hint(irq, NULL); free_irq(irq, vq); } } @@ -443,10 +443,10 @@ int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask) mask = vp_dev->msix_affinity_masks[info->msix_vector]; irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); if (!cpu_mask) - irq_set_affinity_hint(irq, NULL); + irq_update_affinity_hint(irq, NULL); else { cpumask_copy(mask, cpu_mask); - irq_set_affinity_hint(irq, mask); + irq_set_affinity_and_hint(irq, mask); } } return 0; From e07754e0a1ea2d63fb29574253d1fd7405607343 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 Oct 2023 15:12:54 +0300 Subject: [PATCH 034/415] vhost-vdpa: fix use after free in vhost_vdpa_probe() The put_device() calls vhost_vdpa_release_dev() which calls ida_simple_remove() and frees "v". So this call to ida_simple_remove() is a use after free and a double free. Fixes: ebe6a354fa7e ("vhost-vdpa: Call ida_simple_remove() when failed") Signed-off-by: Dan Carpenter Message-Id: Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vdpa.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 30df5c58db73..da7ec77cdaff 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1582,7 +1582,6 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) err: put_device(&v->dev); - ida_simple_remove(&vhost_vdpa_ida, v->minor); return r; } From f19c3b4239f5bfb69aacbaf75d4277c095e7aa7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 4 Oct 2023 17:13:58 +0200 Subject: [PATCH 035/415] riscv: remove unused functions in traps_misaligned.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace macros by the only two function calls that are done from this file, store_u8() and load_u8(). Signed-off-by: Clément Léger Link: https://lore.kernel.org/r/20231004151405.521596-2-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps_misaligned.c | 48 +++++----------------------- 1 file changed, 8 insertions(+), 40 deletions(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 378f5b151443..e7bfb33089c1 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -151,51 +151,19 @@ #define PRECISION_S 0 #define PRECISION_D 1 -#define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn) \ -static inline type load_##type(const type *addr) \ -{ \ - type val; \ - asm (#insn " %0, %1" \ - : "=&r" (val) : "m" (*addr)); \ - return val; \ -} - -#define DECLARE_UNPRIVILEGED_STORE_FUNCTION(type, insn) \ -static inline void store_##type(type *addr, type val) \ -{ \ - asm volatile (#insn " %0, %1\n" \ - : : "r" (val), "m" (*addr)); \ -} - -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u8, lbu) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u16, lhu) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s8, lb) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s16, lh) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s32, lw) -DECLARE_UNPRIVILEGED_STORE_FUNCTION(u8, sb) -DECLARE_UNPRIVILEGED_STORE_FUNCTION(u16, sh) -DECLARE_UNPRIVILEGED_STORE_FUNCTION(u32, sw) -#if defined(CONFIG_64BIT) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lwu) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u64, ld) -DECLARE_UNPRIVILEGED_STORE_FUNCTION(u64, sd) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, ld) -#else -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lw) -DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, lw) - -static inline u64 load_u64(const u64 *addr) +static inline u8 load_u8(const u8 *addr) { - return load_u32((u32 *)addr) - + ((u64)load_u32((u32 *)addr + 1) << 32); + u8 val; + + asm volatile("lbu %0, %1" : "=&r" (val) : "m" (*addr)); + + return val; } -static inline void store_u64(u64 *addr, u64 val) +static inline void store_u8(u8 *addr, u8 val) { - store_u32((u32 *)addr, val); - store_u32((u32 *)addr + 1, val >> 32); + asm volatile ("sb %0, %1\n" : : "r" (val), "m" (*addr)); } -#endif static inline ulong get_insn(ulong mepc) { From 7c83232161f609bbc452a1255f823f41afc411dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 4 Oct 2023 17:13:59 +0200 Subject: [PATCH 036/415] riscv: add support for misaligned trap handling in S-mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Misalignment trap handling is only supported for M-mode and uses direct accesses to user memory. In S-mode, when handling usermode fault, this requires to use the get_user()/put_user() accessors. Implement load_u8(), store_u8() and get_insn() using these accessors for userspace and direct text access for kernel. Signed-off-by: Clément Léger Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20231004151405.521596-3-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 8 ++ arch/riscv/include/asm/entry-common.h | 14 +++ arch/riscv/kernel/Makefile | 2 +- arch/riscv/kernel/traps.c | 9 -- arch/riscv/kernel/traps_misaligned.c | 119 +++++++++++++++++++++++--- 5 files changed, 129 insertions(+), 23 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d607ab0f7c6d..6e167358a897 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -636,6 +636,14 @@ config THREAD_SIZE_ORDER Specify the Pages of thread stack size (from 4KB to 64KB), which also affects irq stack size, which is equal to thread stack size. +config RISCV_MISALIGNED + bool "Support misaligned load/store traps for kernel and userspace" + default y + help + Say Y here if you want the kernel to embed support for misaligned + load/store for both kernel and userspace. When disable, misaligned + accesses will generate SIGBUS in userspace and panic in kernel. + endmenu # "Platform type" menu "Kernel features" diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h index 6e4dee49d84b..7ab5e34318c8 100644 --- a/arch/riscv/include/asm/entry-common.h +++ b/arch/riscv/include/asm/entry-common.h @@ -8,4 +8,18 @@ void handle_page_fault(struct pt_regs *regs); void handle_break(struct pt_regs *regs); +#ifdef CONFIG_RISCV_MISALIGNED +int handle_misaligned_load(struct pt_regs *regs); +int handle_misaligned_store(struct pt_regs *regs); +#else +static inline int handle_misaligned_load(struct pt_regs *regs) +{ + return -1; +} +static inline int handle_misaligned_store(struct pt_regs *regs) +{ + return -1; +} +#endif + #endif /* _ASM_RISCV_ENTRY_COMMON_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 95cf25d48405..0d874fb24b51 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -59,7 +59,7 @@ obj-y += patch.o obj-y += probes/ obj-$(CONFIG_MMU) += vdso.o vdso/ -obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o +obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o obj-$(CONFIG_SMP) += smpboot.o diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 19807c4d3805..d69779e4b967 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -179,14 +179,6 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re DO_ERROR_INFO(do_trap_load_fault, SIGSEGV, SEGV_ACCERR, "load access fault"); -#ifndef CONFIG_RISCV_M_MODE -DO_ERROR_INFO(do_trap_load_misaligned, - SIGBUS, BUS_ADRALN, "Oops - load address misaligned"); -DO_ERROR_INFO(do_trap_store_misaligned, - SIGBUS, BUS_ADRALN, "Oops - store (or AMO) address misaligned"); -#else -int handle_misaligned_load(struct pt_regs *regs); -int handle_misaligned_store(struct pt_regs *regs); asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) { @@ -229,7 +221,6 @@ asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs irqentry_nmi_exit(regs, state); } } -#endif DO_ERROR_INFO(do_trap_store_fault, SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault"); DO_ERROR_INFO(do_trap_ecall_s, diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index e7bfb33089c1..9daed7d756ae 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -12,6 +12,7 @@ #include #include #include +#include #define INSN_MATCH_LB 0x3 #define INSN_MASK_LB 0x707f @@ -151,21 +152,25 @@ #define PRECISION_S 0 #define PRECISION_D 1 -static inline u8 load_u8(const u8 *addr) +#ifdef CONFIG_RISCV_M_MODE +static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) { u8 val; asm volatile("lbu %0, %1" : "=&r" (val) : "m" (*addr)); + *r_val = val; - return val; + return 0; } -static inline void store_u8(u8 *addr, u8 val) +static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) { asm volatile ("sb %0, %1\n" : : "r" (val), "m" (*addr)); + + return 0; } -static inline ulong get_insn(ulong mepc) +static inline int get_insn(struct pt_regs *regs, ulong mepc, ulong *r_insn) { register ulong __mepc asm ("a2") = mepc; ulong val, rvc_mask = 3, tmp; @@ -194,8 +199,86 @@ static inline ulong get_insn(ulong mepc) : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask), [xlen_minus_16] "i" (XLEN_MINUS_16)); - return val; + *r_insn = val; + + return 0; } +#else +static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) +{ + if (user_mode(regs)) { + return __get_user(*r_val, addr); + } else { + *r_val = *addr; + return 0; + } +} + +static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val) +{ + if (user_mode(regs)) { + return __put_user(val, addr); + } else { + *addr = val; + return 0; + } +} + +#define __read_insn(regs, insn, insn_addr) \ +({ \ + int __ret; \ + \ + if (user_mode(regs)) { \ + __ret = __get_user(insn, insn_addr); \ + } else { \ + insn = *insn_addr; \ + __ret = 0; \ + } \ + \ + __ret; \ +}) + +static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn) +{ + ulong insn = 0; + + if (epc & 0x2) { + ulong tmp = 0; + u16 __user *insn_addr = (u16 __user *)epc; + + if (__read_insn(regs, insn, insn_addr)) + return -EFAULT; + /* __get_user() uses regular "lw" which sign extend the loaded + * value make sure to clear higher order bits in case we "or" it + * below with the upper 16 bits half. + */ + insn &= GENMASK(15, 0); + if ((insn & __INSN_LENGTH_MASK) != __INSN_LENGTH_32) { + *r_insn = insn; + return 0; + } + insn_addr++; + if (__read_insn(regs, tmp, insn_addr)) + return -EFAULT; + *r_insn = (tmp << 16) | insn; + + return 0; + } else { + u32 __user *insn_addr = (u32 __user *)epc; + + if (__read_insn(regs, insn, insn_addr)) + return -EFAULT; + if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) { + *r_insn = insn; + return 0; + } + insn &= GENMASK(15, 0); + *r_insn = insn; + + return 0; + } +} +#endif union reg_data { u8 data_bytes[8]; @@ -207,10 +290,13 @@ int handle_misaligned_load(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; - unsigned long insn = get_insn(epc); - unsigned long addr = csr_read(mtval); + unsigned long insn; + unsigned long addr = regs->badaddr; int i, fp = 0, shift = 0, len = 0; + if (get_insn(regs, epc, &insn)) + return -1; + regs->epc = 0; if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) { @@ -274,8 +360,10 @@ int handle_misaligned_load(struct pt_regs *regs) } val.data_u64 = 0; - for (i = 0; i < len; i++) - val.data_bytes[i] = load_u8((void *)(addr + i)); + for (i = 0; i < len; i++) { + if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i])) + return -1; + } if (fp) return -1; @@ -290,10 +378,13 @@ int handle_misaligned_store(struct pt_regs *regs) { union reg_data val; unsigned long epc = regs->epc; - unsigned long insn = get_insn(epc); - unsigned long addr = csr_read(mtval); + unsigned long insn; + unsigned long addr = regs->badaddr; int i, len = 0; + if (get_insn(regs, epc, &insn)) + return -1; + regs->epc = 0; val.data_ulong = GET_RS2(insn, regs); @@ -327,8 +418,10 @@ int handle_misaligned_store(struct pt_regs *regs) return -1; } - for (i = 0; i < len; i++) - store_u8((void *)(addr + i), val.data_bytes[i]); + for (i = 0; i < len; i++) { + if (store_u8(regs, (void *)(addr + i), val.data_bytes[i])) + return -1; + } regs->epc = epc + INSN_LEN(insn); From 89c12fecdc4d46c1f08a81dab5d305304cc626eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 4 Oct 2023 17:14:00 +0200 Subject: [PATCH 037/415] riscv: report perf event for misaligned fault MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing calls to account for misaligned fault event using perf_sw_event(). Signed-off-by: Clément Léger Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20231004151405.521596-4-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps_misaligned.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 9daed7d756ae..804f6c5e0e44 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -294,6 +295,8 @@ int handle_misaligned_load(struct pt_regs *regs) unsigned long addr = regs->badaddr; int i, fp = 0, shift = 0, len = 0; + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); + if (get_insn(regs, epc, &insn)) return -1; @@ -382,6 +385,8 @@ int handle_misaligned_store(struct pt_regs *regs) unsigned long addr = regs->badaddr; int i, len = 0; + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); + if (get_insn(regs, epc, &insn)) return -1; From 7c586a555a48a952f64d883d2f20402fb61d9164 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 4 Oct 2023 17:14:01 +0200 Subject: [PATCH 038/415] riscv: add floating point insn support to misaligned access emulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This support is partially based of openSBI misaligned emulation floating point instruction support. It provides support for the existing floating point instructions (both for 32/64 bits as well as compressed ones). Since floating point registers are not part of the pt_regs struct, we need to modify them directly using some assembly. We also dirty the pt_regs status in case we modify them to be sure context switch will save FP state. With this support, Linux is on par with openSBI support. Signed-off-by: Clément Léger Link: https://lore.kernel.org/r/20231004151405.521596-5-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/fpu.S | 121 +++++++++++++++++++++ arch/riscv/kernel/traps_misaligned.c | 152 ++++++++++++++++++++++++++- 2 files changed, 269 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S index dd2205473de7..5dd3161a4dac 100644 --- a/arch/riscv/kernel/fpu.S +++ b/arch/riscv/kernel/fpu.S @@ -104,3 +104,124 @@ ENTRY(__fstate_restore) csrc CSR_STATUS, t1 ret ENDPROC(__fstate_restore) + +#define get_f32(which) fmv.x.s a0, which; j 2f +#define put_f32(which) fmv.s.x which, a1; j 2f +#if __riscv_xlen == 64 +# define get_f64(which) fmv.x.d a0, which; j 2f +# define put_f64(which) fmv.d.x which, a1; j 2f +#else +# define get_f64(which) fsd which, 0(a1); j 2f +# define put_f64(which) fld which, 0(a1); j 2f +#endif + +.macro fp_access_prologue + /* + * Compute jump offset to store the correct FP register since we don't + * have indirect FP register access + */ + sll t0, a0, 3 + la t2, 1f + add t0, t0, t2 + li t1, SR_FS + csrs CSR_STATUS, t1 + jr t0 +1: +.endm + +.macro fp_access_epilogue +2: + csrc CSR_STATUS, t1 + ret +.endm + +#define fp_access_body(__access_func) \ + __access_func(f0); \ + __access_func(f1); \ + __access_func(f2); \ + __access_func(f3); \ + __access_func(f4); \ + __access_func(f5); \ + __access_func(f6); \ + __access_func(f7); \ + __access_func(f8); \ + __access_func(f9); \ + __access_func(f10); \ + __access_func(f11); \ + __access_func(f12); \ + __access_func(f13); \ + __access_func(f14); \ + __access_func(f15); \ + __access_func(f16); \ + __access_func(f17); \ + __access_func(f18); \ + __access_func(f19); \ + __access_func(f20); \ + __access_func(f21); \ + __access_func(f22); \ + __access_func(f23); \ + __access_func(f24); \ + __access_func(f25); \ + __access_func(f26); \ + __access_func(f27); \ + __access_func(f28); \ + __access_func(f29); \ + __access_func(f30); \ + __access_func(f31) + + +#ifdef CONFIG_RISCV_MISALIGNED + +/* + * Disable compressed instructions set to keep a constant offset between FP + * load/store/move instructions + */ +.option norvc +/* + * put_f32_reg - Set a FP register from a register containing the value + * a0 = FP register index to be set + * a1 = value to be loaded in the FP register + */ +SYM_FUNC_START(put_f32_reg) + fp_access_prologue + fp_access_body(put_f32) + fp_access_epilogue +SYM_FUNC_END(put_f32_reg) + +/* + * get_f32_reg - Get a FP register value and return it + * a0 = FP register index to be retrieved + */ +SYM_FUNC_START(get_f32_reg) + fp_access_prologue + fp_access_body(get_f32) + fp_access_epilogue +SYM_FUNC_END(get_f32_reg) + +/* + * put_f64_reg - Set a 64 bits FP register from a value or a pointer. + * a0 = FP register index to be set + * a1 = value/pointer to be loaded in the FP register (when xlen == 32 bits, we + * load the value to a pointer). + */ +SYM_FUNC_START(put_f64_reg) + fp_access_prologue + fp_access_body(put_f64) + fp_access_epilogue +SYM_FUNC_END(put_f64_reg) + +/* + * put_f64_reg - Get a 64 bits FP register value and returned it or store it to + * a pointer. + * a0 = FP register index to be retrieved + * a1 = If xlen == 32, pointer which should be loaded with the FP register value + * or unused if xlen == 64. In which case the FP register value is returned + * through a0 + */ +SYM_FUNC_START(get_f64_reg) + fp_access_prologue + fp_access_body(get_f64) + fp_access_epilogue +SYM_FUNC_END(get_f64_reg) + +#endif /* CONFIG_RISCV_MISALIGNED */ diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 804f6c5e0e44..041fd2dbd955 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -153,6 +153,115 @@ #define PRECISION_S 0 #define PRECISION_D 1 +#ifdef CONFIG_FPU + +#define FP_GET_RD(insn) (insn >> 7 & 0x1F) + +extern void put_f32_reg(unsigned long fp_reg, unsigned long value); + +static int set_f32_rd(unsigned long insn, struct pt_regs *regs, + unsigned long val) +{ + unsigned long fp_reg = FP_GET_RD(insn); + + put_f32_reg(fp_reg, val); + regs->status |= SR_FS_DIRTY; + + return 0; +} + +extern void put_f64_reg(unsigned long fp_reg, unsigned long value); + +static int set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val) +{ + unsigned long fp_reg = FP_GET_RD(insn); + unsigned long value; + +#if __riscv_xlen == 32 + value = (unsigned long) &val; +#else + value = val; +#endif + put_f64_reg(fp_reg, value); + regs->status |= SR_FS_DIRTY; + + return 0; +} + +#if __riscv_xlen == 32 +extern void get_f64_reg(unsigned long fp_reg, u64 *value); + +static u64 get_f64_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) +{ + unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F; + u64 val; + + get_f64_reg(fp_reg, &val); + regs->status |= SR_FS_DIRTY; + + return val; +} +#else + +extern unsigned long get_f64_reg(unsigned long fp_reg); + +static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) +{ + unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F; + unsigned long val; + + val = get_f64_reg(fp_reg); + regs->status |= SR_FS_DIRTY; + + return val; +} + +#endif + +extern unsigned long get_f32_reg(unsigned long fp_reg); + +static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) +{ + unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F; + unsigned long val; + + val = get_f32_reg(fp_reg); + regs->status |= SR_FS_DIRTY; + + return val; +} + +#else /* CONFIG_FPU */ +static void set_f32_rd(unsigned long insn, struct pt_regs *regs, + unsigned long val) {} + +static void set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val) {} + +static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) +{ + return 0; +} + +static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, + struct pt_regs *regs) +{ + return 0; +} + +#endif + +#define GET_F64_RS2(insn, regs) (get_f64_rs(insn, 20, regs)) +#define GET_F64_RS2C(insn, regs) (get_f64_rs(insn, 2, regs)) +#define GET_F64_RS2S(insn, regs) (get_f64_rs(RVC_RS2S(insn), 0, regs)) + +#define GET_F32_RS2(insn, regs) (get_f32_rs(insn, 20, regs)) +#define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs)) +#define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs)) + #ifdef CONFIG_RISCV_M_MODE static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val) { @@ -362,15 +471,21 @@ int handle_misaligned_load(struct pt_regs *regs) return -1; } + if (!IS_ENABLED(CONFIG_FPU) && fp) + return -EOPNOTSUPP; + val.data_u64 = 0; for (i = 0; i < len; i++) { if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i])) return -1; } - if (fp) - return -1; - SET_RD(insn, regs, val.data_ulong << shift >> shift); + if (!fp) + SET_RD(insn, regs, val.data_ulong << shift >> shift); + else if (len == 8) + set_f64_rd(insn, regs, val.data_u64); + else + set_f32_rd(insn, regs, val.data_ulong); regs->epc = epc + INSN_LEN(insn); @@ -383,7 +498,7 @@ int handle_misaligned_store(struct pt_regs *regs) unsigned long epc = regs->epc; unsigned long insn; unsigned long addr = regs->badaddr; - int i, len = 0; + int i, len = 0, fp = 0; perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); @@ -400,6 +515,14 @@ int handle_misaligned_store(struct pt_regs *regs) } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) { len = 8; #endif + } else if ((insn & INSN_MASK_FSD) == INSN_MATCH_FSD) { + fp = 1; + len = 8; + val.data_u64 = GET_F64_RS2(insn, regs); + } else if ((insn & INSN_MASK_FSW) == INSN_MATCH_FSW) { + fp = 1; + len = 4; + val.data_ulong = GET_F32_RS2(insn, regs); } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) { len = 2; #if defined(CONFIG_64BIT) @@ -418,11 +541,32 @@ int handle_misaligned_store(struct pt_regs *regs) ((insn >> SH_RD) & 0x1f)) { len = 4; val.data_ulong = GET_RS2C(insn, regs); + } else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) { + fp = 1; + len = 8; + val.data_u64 = GET_F64_RS2S(insn, regs); + } else if ((insn & INSN_MASK_C_FSDSP) == INSN_MATCH_C_FSDSP) { + fp = 1; + len = 8; + val.data_u64 = GET_F64_RS2C(insn, regs); +#if !defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_C_FSW) == INSN_MATCH_C_FSW) { + fp = 1; + len = 4; + val.data_ulong = GET_F32_RS2S(insn, regs); + } else if ((insn & INSN_MASK_C_FSWSP) == INSN_MATCH_C_FSWSP) { + fp = 1; + len = 4; + val.data_ulong = GET_F32_RS2C(insn, regs); +#endif } else { regs->epc = epc; return -1; } + if (!IS_ENABLED(CONFIG_FPU) && fp) + return -EOPNOTSUPP; + for (i = 0; i < len; i++) { if (store_u8(regs, (void *)(addr + i), val.data_bytes[i])) return -1; From bc38f61313d316d74c16ce7287d6dba2f42502c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 4 Oct 2023 17:14:02 +0200 Subject: [PATCH 039/415] riscv: add support for sysctl unaligned_enabled control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This sysctl tuning option allows the user to disable misaligned access handling globally on the system. This will also be used by misaligned detection code to temporarily disable misaligned access handling. Signed-off-by: Clément Léger Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20231004151405.521596-6-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/kernel/traps_misaligned.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6e167358a897..1313f83bb0cb 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -638,6 +638,7 @@ config THREAD_SIZE_ORDER config RISCV_MISALIGNED bool "Support misaligned load/store traps for kernel and userspace" + select SYSCTL_ARCH_UNALIGN_ALLOW default y help Say Y here if you want the kernel to embed support for misaligned diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 041fd2dbd955..b5fb1ff078e3 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -396,6 +396,9 @@ union reg_data { u64 data_u64; }; +/* sysctl hooks */ +int unaligned_enabled __read_mostly = 1; /* Enabled by default */ + int handle_misaligned_load(struct pt_regs *regs) { union reg_data val; @@ -406,6 +409,9 @@ int handle_misaligned_load(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); + if (!unaligned_enabled) + return -1; + if (get_insn(regs, epc, &insn)) return -1; @@ -502,6 +508,9 @@ int handle_misaligned_store(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); + if (!unaligned_enabled) + return -1; + if (get_insn(regs, epc, &insn)) return -1; From 90b11b470b2e88ff583e04be109e6441cb69f54d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 4 Oct 2023 17:14:03 +0200 Subject: [PATCH 040/415] riscv: annotate check_unaligned_access_boot_cpu() with __init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is solely called as an initcall, thus annotate it with __init. Signed-off-by: Clément Léger Reviewed-by: Evan Green Link: https://lore.kernel.org/r/20231004151405.521596-7-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 1cfbba65d11a..356e5677eeb1 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -645,7 +645,7 @@ void check_unaligned_access(int cpu) __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); } -static int check_unaligned_access_boot_cpu(void) +static int __init check_unaligned_access_boot_cpu(void) { check_unaligned_access(0); return 0; From 71c54b3d169db5569655cbd2a3616bc701fd5eec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 4 Oct 2023 17:14:04 +0200 Subject: [PATCH 041/415] riscv: report misaligned accesses emulation to hwprobe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hwprobe provides a way to report if misaligned access are emulated. In order to correctly populate that feature, we can check if it actually traps when doing a misaligned access. This can be checked using an exception table entry which will actually be used when a misaligned access is done from kernel mode. Signed-off-by: Clément Léger Link: https://lore.kernel.org/r/20231004151405.521596-8-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cpufeature.h | 18 +++++++++ arch/riscv/kernel/cpufeature.c | 4 ++ arch/riscv/kernel/smpboot.c | 2 +- arch/riscv/kernel/traps_misaligned.c | 56 ++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index d0345bd659c9..e4ae6af51876 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -32,4 +32,22 @@ extern struct riscv_isainfo hart_isa[NR_CPUS]; void check_unaligned_access(int cpu); +#ifdef CONFIG_RISCV_MISALIGNED +bool unaligned_ctl_available(void); +bool check_unaligned_access_emulated(int cpu); +void unaligned_emulation_finish(void); +#else +static inline bool unaligned_ctl_available(void) +{ + return false; +} + +static inline bool check_unaligned_access_emulated(int cpu) +{ + return false; +} + +static inline void unaligned_emulation_finish(void) {} +#endif + #endif diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 356e5677eeb1..fbbde800bc21 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -568,6 +568,9 @@ void check_unaligned_access(int cpu) void *src; long speed = RISCV_HWPROBE_MISALIGNED_SLOW; + if (check_unaligned_access_emulated(cpu)) + return; + page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); if (!page) { pr_warn("Can't alloc pages to measure memcpy performance"); @@ -648,6 +651,7 @@ void check_unaligned_access(int cpu) static int __init check_unaligned_access_boot_cpu(void) { check_unaligned_access(0); + unaligned_emulation_finish(); return 0; } diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 1b8da4e40a4d..5d9858d6ad26 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -245,8 +245,8 @@ asmlinkage __visible void smp_callin(void) riscv_ipi_enable(); numa_add_cpu(curr_cpuid); - set_cpu_online(curr_cpuid, 1); check_unaligned_access(curr_cpuid); + set_cpu_online(curr_cpuid, 1); if (has_vector()) { if (riscv_v_setup_vsize()) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index b5fb1ff078e3..d99b95084b6c 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #define INSN_MATCH_LB 0x3 #define INSN_MASK_LB 0x707f @@ -396,6 +398,8 @@ union reg_data { u64 data_u64; }; +static bool unaligned_ctl __read_mostly; + /* sysctl hooks */ int unaligned_enabled __read_mostly = 1; /* Enabled by default */ @@ -409,6 +413,8 @@ int handle_misaligned_load(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; + if (!unaligned_enabled) return -1; @@ -585,3 +591,53 @@ int handle_misaligned_store(struct pt_regs *regs) return 0; } + +bool check_unaligned_access_emulated(int cpu) +{ + long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); + unsigned long tmp_var, tmp_val; + bool misaligned_emu_detected; + + *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + + __asm__ __volatile__ ( + " "REG_L" %[tmp], 1(%[ptr])\n" + : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); + + misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED); + /* + * If unaligned_ctl is already set, this means that we detected that all + * CPUS uses emulated misaligned access at boot time. If that changed + * when hotplugging the new cpu, this is something we don't handle. + */ + if (unlikely(unaligned_ctl && !misaligned_emu_detected)) { + pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n"); + while (true) + cpu_relax(); + } + + return misaligned_emu_detected; +} + +void __init unaligned_emulation_finish(void) +{ + int cpu; + + /* + * We can only support PR_UNALIGN controls if all CPUs have misaligned + * accesses emulated since tasks requesting such control can run on any + * CPU. + */ + for_each_present_cpu(cpu) { + if (per_cpu(misaligned_access_speed, cpu) != + RISCV_HWPROBE_MISALIGNED_EMULATED) { + return; + } + } + unaligned_ctl = true; +} + +bool unaligned_ctl_available(void) +{ + return unaligned_ctl; +} From 9f23a5d2f6b01c2ab91d791109731a0d87ec2239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 4 Oct 2023 17:14:05 +0200 Subject: [PATCH 042/415] riscv: add support for PR_SET_UNALIGN and PR_GET_UNALIGN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that trap support is ready to handle misalignment errors in S-mode, allow the user to control the behavior of misaligned accesses using prctl(PR_SET_UNALIGN). Add an align_ctl flag in thread_struct which will be used to determine if we should SIGBUS the process or not on such fault. Signed-off-by: Clément Léger Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20231004151405.521596-9-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/processor.h | 9 +++++++++ arch/riscv/kernel/process.c | 18 ++++++++++++++++++ arch/riscv/kernel/traps_misaligned.c | 6 ++++++ 3 files changed, 33 insertions(+) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 3e23e1786d05..adbe520d07c5 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -8,6 +8,7 @@ #include #include +#include #include @@ -82,6 +83,7 @@ struct thread_struct { unsigned long bad_cause; unsigned long vstate_ctrl; struct __riscv_v_ext_state vstate; + unsigned long align_ctl; }; /* Whitelist the fstate from the task_struct for hardened usercopy */ @@ -94,6 +96,7 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, #define INIT_THREAD { \ .sp = sizeof(init_stack) + (long)&init_stack, \ + .align_ctl = PR_UNALIGN_NOPRINT, \ } #define task_pt_regs(tsk) \ @@ -134,6 +137,12 @@ extern long riscv_v_vstate_ctrl_set_current(unsigned long arg); extern long riscv_v_vstate_ctrl_get_current(void); #endif /* CONFIG_RISCV_ISA_V */ +extern int get_unalign_ctl(struct task_struct *tsk, unsigned long addr); +extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); + +#define GET_UNALIGN_CTL(tsk, addr) get_unalign_ctl((tsk), (addr)) +#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val)) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_PROCESSOR_H */ diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index e32d737e039f..4f21d970a129 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -25,6 +25,7 @@ #include #include #include +#include register unsigned long gp_in_global __asm__("gp"); @@ -41,6 +42,23 @@ void arch_cpu_idle(void) cpu_do_idle(); } +int set_unalign_ctl(struct task_struct *tsk, unsigned int val) +{ + if (!unaligned_ctl_available()) + return -EINVAL; + + tsk->thread.align_ctl = val; + return 0; +} + +int get_unalign_ctl(struct task_struct *tsk, unsigned long adr) +{ + if (!unaligned_ctl_available()) + return -EINVAL; + + return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr); +} + void __show_regs(struct pt_regs *regs) { show_regs_print_info(KERN_DEFAULT); diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index d99b95084b6c..bba301b5194d 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -418,6 +418,9 @@ int handle_misaligned_load(struct pt_regs *regs) if (!unaligned_enabled) return -1; + if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS)) + return -1; + if (get_insn(regs, epc, &insn)) return -1; @@ -517,6 +520,9 @@ int handle_misaligned_store(struct pt_regs *regs) if (!unaligned_enabled) return -1; + if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS)) + return -1; + if (get_insn(regs, epc, &insn)) return -1; From a5e3b127455d073f146a2a4ea3e7117635d34c5c Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Thu, 2 Nov 2023 10:36:49 +0100 Subject: [PATCH 043/415] swiotlb: do not free decrypted pages if dynamic Fix these two error paths: 1. When set_memory_decrypted() fails, pages may be left fully or partially decrypted. 2. Decrypted pages may be freed if swiotlb_alloc_tlb() determines that the physical address is too high. To fix the first issue, call set_memory_encrypted() on the allocated region after a failed decryption attempt. If that also fails, leak the pages. To fix the second issue, check that the TLB physical address is below the requested limit before decrypting. Let the caller differentiate between unsuitable physical address (=> retry from a lower zone) and allocation failures (=> no point in retrying). Cc: stable@vger.kernel.org Fixes: 79636caad361 ("swiotlb: if swiotlb is full, fall back to a transient memory pool") Signed-off-by: Petr Tesarik Reviewed-by: Rick Edgecombe Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 26202274784f..71392c9fac10 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -558,29 +558,40 @@ void __init swiotlb_exit(void) * alloc_dma_pages() - allocate pages to be used for DMA * @gfp: GFP flags for the allocation. * @bytes: Size of the buffer. + * @phys_limit: Maximum allowed physical address of the buffer. * * Allocate pages from the buddy allocator. If successful, make the allocated * pages decrypted that they can be used for DMA. * - * Return: Decrypted pages, or %NULL on failure. + * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN) + * if the allocated physical address was above @phys_limit. */ -static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes) +static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit) { unsigned int order = get_order(bytes); struct page *page; + phys_addr_t paddr; void *vaddr; page = alloc_pages(gfp, order); if (!page) return NULL; - vaddr = page_address(page); + paddr = page_to_phys(page); + if (paddr + bytes - 1 > phys_limit) { + __free_pages(page, order); + return ERR_PTR(-EAGAIN); + } + + vaddr = phys_to_virt(paddr); if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) goto error; return page; error: - __free_pages(page, order); + /* Intentional leak if pages cannot be encrypted again. */ + if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) + __free_pages(page, order); return NULL; } @@ -618,11 +629,7 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes, else if (phys_limit <= DMA_BIT_MASK(32)) gfp |= __GFP_DMA32; - while ((page = alloc_dma_pages(gfp, bytes)) && - page_to_phys(page) + bytes - 1 > phys_limit) { - /* allocated, but too high */ - __free_pages(page, get_order(bytes)); - + while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) { if (IS_ENABLED(CONFIG_ZONE_DMA32) && phys_limit < DMA_BIT_MASK(64) && !(gfp & (__GFP_DMA32 | __GFP_DMA))) From 71956d0cb56c1e5f9feeb4819db87a076418e930 Mon Sep 17 00:00:00 2001 From: Nitin Yadav Date: Thu, 26 Oct 2023 11:44:58 +0530 Subject: [PATCH 044/415] mmc: sdhci_am654: fix start loop index for TAP value parsing ti,otap-del-sel-legacy/ti,itap-del-sel-legacy passed from DT are currently ignored for all SD/MMC and eMMC modes. Fix this by making start loop index to MMC_TIMING_LEGACY. Fixes: 8ee5fc0e0b3b ("mmc: sdhci_am654: Update OTAPDLY writes") Signed-off-by: Nitin Yadav Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231026061458.1116276-1-n-yadav@ti.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci_am654.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index c125485ba80e..967bd2dfcda1 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -598,7 +598,7 @@ static int sdhci_am654_get_otap_delay(struct sdhci_host *host, return 0; } - for (i = MMC_TIMING_MMC_HS; i <= MMC_TIMING_MMC_HS400; i++) { + for (i = MMC_TIMING_LEGACY; i <= MMC_TIMING_MMC_HS400; i++) { ret = device_property_read_u32(dev, td[i].otap_binding, &sdhci_am654->otap_del_sel[i]); From ed9009ad300c0f15a3ecfe9613547b1962bde02c Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Mon, 30 Oct 2023 23:48:09 +0100 Subject: [PATCH 045/415] mmc: Add quirk MMC_QUIRK_BROKEN_CACHE_FLUSH for Micron eMMC Q2J54A Micron MTFC4GACAJCN eMMC supports cache but requires that flush cache operation be allowed only after a write has occurred. Otherwise, the cache flush command or subsequent commands will time out. Signed-off-by: Bean Huo Signed-off-by: Rafael Beims Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231030224809.59245-1-beanhuo@iokpp.de Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 4 +++- drivers/mmc/core/card.h | 4 ++++ drivers/mmc/core/mmc.c | 8 ++++++-- drivers/mmc/core/quirks.h | 7 ++++--- include/linux/mmc/card.h | 2 ++ 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 3a8f27c3e310..152dfe593c43 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2381,8 +2381,10 @@ enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req) } ret = mmc_blk_cqe_issue_flush(mq, req); break; - case REQ_OP_READ: case REQ_OP_WRITE: + card->written_flag = true; + fallthrough; + case REQ_OP_READ: if (host->cqe_enabled) ret = mmc_blk_cqe_issue_rw_rq(mq, req); else diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h index 4edf9057fa79..b7754a1b8d97 100644 --- a/drivers/mmc/core/card.h +++ b/drivers/mmc/core/card.h @@ -280,4 +280,8 @@ static inline int mmc_card_broken_sd_cache(const struct mmc_card *c) return c->quirks & MMC_QUIRK_BROKEN_SD_CACHE; } +static inline int mmc_card_broken_cache_flush(const struct mmc_card *c) +{ + return c->quirks & MMC_QUIRK_BROKEN_CACHE_FLUSH; +} #endif diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 8180983bd402..8ec19f9f1fd2 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -2086,13 +2086,17 @@ static int _mmc_flush_cache(struct mmc_host *host) { int err = 0; + if (mmc_card_broken_cache_flush(host->card) && !host->card->written_flag) + return 0; + if (_mmc_cache_enabled(host)) { err = mmc_switch(host->card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_FLUSH_CACHE, 1, CACHE_FLUSH_TIMEOUT_MS); if (err) - pr_err("%s: cache flush error %d\n", - mmc_hostname(host), err); + pr_err("%s: cache flush error %d\n", mmc_hostname(host), err); + else + host->card->written_flag = false; } return err; diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index 32b64b564fb1..cca71867bc4a 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -110,11 +110,12 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { MMC_QUIRK_TRIM_BROKEN), /* - * Micron MTFC4GACAJCN-1M advertises TRIM but it does not seems to - * support being used to offload WRITE_ZEROES. + * Micron MTFC4GACAJCN-1M supports TRIM but does not appear to support + * WRITE_ZEROES offloading. It also supports caching, but the cache can + * only be flushed after a write has occurred. */ MMC_FIXUP("Q2J54A", CID_MANFID_MICRON, 0x014e, add_quirk_mmc, - MMC_QUIRK_TRIM_BROKEN), + MMC_QUIRK_TRIM_BROKEN | MMC_QUIRK_BROKEN_CACHE_FLUSH), /* * Kingston EMMC04G-M627 advertises TRIM but it does not seems to diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index daa2f40d9ce6..7b12eebc5586 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -295,7 +295,9 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ #define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ +#define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ + bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ unsigned int erase_size; /* erase size in sectors */ From b44f9da81783fda72632ef9b0d05ea3f3ca447a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 2 Nov 2023 10:51:06 +0300 Subject: [PATCH 046/415] mmc: vub300: fix an error code This error path should return -EINVAL instead of success. Fixes: 88095e7b473a ("mmc: Add new VUB300 USB-to-SD/SDIO/MMC driver") Signed-off-by: Dan Carpenter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/0769d30c-ad80-421b-bf5d-7d6f5d85604e@moroto.mountain Signed-off-by: Ulf Hansson --- drivers/mmc/host/vub300.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index de3f443f5fdc..fd67c0682b38 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -2309,6 +2309,7 @@ static int vub300_probe(struct usb_interface *interface, vub300->read_only = (0x0010 & vub300->system_port_status.port_flags) ? 1 : 0; } else { + retval = -EINVAL; goto error5; } usb_set_intfdata(interface, vub300); From 421b605edb1ce611dee06cf6fd9a1c1f2fd85ad0 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Fri, 3 Nov 2023 09:42:20 +0900 Subject: [PATCH 047/415] Revert "mmc: core: Capture correct oemid-bits for eMMC cards" This reverts commit 84ee19bffc9306128cd0f1c650e89767079efeff. The commit above made quirks with an OEMID fail to be applied, as they were checking card->cid.oemid for the full 16 bits defined in MMC_FIXUP macros but the field would only contain the bottom 8 bits. eMMC v5.1A might have bogus values in OEMID's higher bits so another fix will be made, but it has been decided to revert this until that is ready. Fixes: 84ee19bffc93 ("mmc: core: Capture correct oemid-bits for eMMC cards") Link: https://lkml.kernel.org/r/ZToJsSLHr8RnuTHz@codewreck.org Link: https://lkml.kernel.org/r/CAPDyKFqkKibcXnwjnhc3+W1iJBHLeqQ9BpcZrSwhW2u9K2oUtg@mail.gmail.com Signed-off-by: Dominique Martinet Cc: stable@vger.kernel.org Cc: Alex Fetters Reviewed-by: Avri Altman Link: https://lore.kernel.org/r/20231103004220.1666641-1-asmadeus@codewreck.org Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 8ec19f9f1fd2..705942edacc6 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -104,7 +104,7 @@ static int mmc_decode_cid(struct mmc_card *card) case 3: /* MMC v3.1 - v3.3 */ case 4: /* MMC v4 */ card->cid.manfid = UNSTUFF_BITS(resp, 120, 8); - card->cid.oemid = UNSTUFF_BITS(resp, 104, 8); + card->cid.oemid = UNSTUFF_BITS(resp, 104, 16); card->cid.prod_name[0] = UNSTUFF_BITS(resp, 96, 8); card->cid.prod_name[1] = UNSTUFF_BITS(resp, 88, 8); card->cid.prod_name[2] = UNSTUFF_BITS(resp, 80, 8); From 004fc58edea6f00db9ad07b40b882e8d976f7a54 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Tue, 31 Oct 2023 12:31:39 +0200 Subject: [PATCH 048/415] ASoC: mediatek: mt8186_mt6366_rt1019_rt5682s: trivial: fix error messages Property 'playback-codecs' is referenced as 'speaker-codec' in the error message, and this can lead to confusion. Correct the error message such that the correct property name is referenced. Fixes: 0da16e370dd7 ("ASoC: mediatek: mt8186: add machine driver with mt6366, rt1019 and rt5682s") Signed-off-by: Eugen Hristev Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231031103139.77395-1-eugen.hristev@collabora.com Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c b/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c index 6dfcfcf47cab..f78197c8e582 100644 --- a/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c +++ b/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c @@ -1216,7 +1216,7 @@ static int mt8186_mt6366_rt1019_rt5682s_dev_probe(struct platform_device *pdev) playback_codec = of_get_child_by_name(pdev->dev.of_node, "playback-codecs"); if (!playback_codec) { ret = -EINVAL; - dev_err_probe(&pdev->dev, ret, "Property 'speaker-codecs' missing or invalid\n"); + dev_err_probe(&pdev->dev, ret, "Property 'playback-codecs' missing or invalid\n"); goto err_playback_codec; } @@ -1230,7 +1230,7 @@ static int mt8186_mt6366_rt1019_rt5682s_dev_probe(struct platform_device *pdev) for_each_card_prelinks(card, i, dai_link) { ret = mt8186_mt6366_card_set_be_link(card, dai_link, playback_codec, "I2S3"); if (ret) { - dev_err_probe(&pdev->dev, ret, "%s set speaker_codec fail\n", + dev_err_probe(&pdev->dev, ret, "%s set playback_codec fail\n", dai_link->name); goto err_probe; } From 0df96fb71a395b4fc9c80180306420c743f395a8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Nov 2023 09:26:13 -0600 Subject: [PATCH 049/415] io_uring/rw: don't attempt to allocate async data if opcode doesn't need it The new read multishot method doesn't need to allocate async data ever, as it doesn't do vectored IO and it must only be used with provided buffers. While it doesn't have ->prep_async() set, it also sets ->async_size to 0, which is different from any other read/write type we otherwise support. If it's used on a file type that isn't pollable, we do try and allocate this async data, and then try and use that data. But since we passed in a size of 0 for the data, we get a NULL back on data allocation. We then proceed to dereference that to copy state, and that obviously won't end well. Add a check in io_setup_async_rw() for this condition, and avoid copying state. Also add a check for whether or not buffer selection is specified in prep while at it. Fixes: fc68fcda0491 ("io_uring/rw: add support for IORING_OP_READ_MULTISHOT") Link: https://bugzilla.kernel.org/show_bug.cgi?id=218101 Signed-off-by: Jens Axboe --- io_uring/rw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/io_uring/rw.c b/io_uring/rw.c index 3398e1d944c2..1c76de483ef6 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -131,6 +131,10 @@ int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { int ret; + /* must be used with provided buffers */ + if (!(req->flags & REQ_F_BUFFER_SELECT)) + return -EINVAL; + ret = io_prep_rw(req, sqe); if (unlikely(ret)) return ret; @@ -542,6 +546,9 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, { if (!force && !io_cold_defs[req->opcode].prep_async) return 0; + /* opcode type doesn't need async data */ + if (!io_cold_defs[req->opcode].async_size) + return 0; if (!req_has_async_data(req)) { struct io_async_rw *iorw; From dec96fc2dcb59723e041416b8dc53e011b4bfc2e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 13 Oct 2023 10:05:48 +0100 Subject: [PATCH 050/415] btrfs: use u64 for buffer sizes in the tree search ioctls In the tree search v2 ioctl we use the type size_t, which is an unsigned long, to track the buffer size in the local variable 'buf_size'. An unsigned long is 32 bits wide on a 32 bits architecture. The buffer size defined in struct btrfs_ioctl_search_args_v2 is a u64, so when we later try to copy the local variable 'buf_size' to the argument struct, when the search returns -EOVERFLOW, we copy only 32 bits which will be a problem on big endian systems. Fix this by using a u64 type for the buffer sizes, not only at btrfs_ioctl_tree_search_v2(), but also everywhere down the call chain so that we can use the u64 at btrfs_ioctl_tree_search_v2(). Fixes: cc68a8a5a433 ("btrfs: new ioctl TREE_SEARCH_V2") Reported-by: Dan Carpenter Link: https://lore.kernel.org/linux-btrfs/ce6f4bd6-9453-4ffe-ba00-cee35495e10f@moroto.mountain/ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 89cd212735ea..f7e94aff41ae 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1528,7 +1528,7 @@ static noinline int key_in_sk(struct btrfs_key *key, static noinline int copy_to_sk(struct btrfs_path *path, struct btrfs_key *key, struct btrfs_ioctl_search_key *sk, - size_t *buf_size, + u64 *buf_size, char __user *ubuf, unsigned long *sk_offset, int *num_found) @@ -1660,7 +1660,7 @@ static noinline int copy_to_sk(struct btrfs_path *path, static noinline int search_ioctl(struct inode *inode, struct btrfs_ioctl_search_key *sk, - size_t *buf_size, + u64 *buf_size, char __user *ubuf) { struct btrfs_fs_info *info = btrfs_sb(inode->i_sb); @@ -1733,7 +1733,7 @@ static noinline int btrfs_ioctl_tree_search(struct inode *inode, struct btrfs_ioctl_search_args __user *uargs = argp; struct btrfs_ioctl_search_key sk; int ret; - size_t buf_size; + u64 buf_size; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1763,8 +1763,8 @@ static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode, struct btrfs_ioctl_search_args_v2 __user *uarg = argp; struct btrfs_ioctl_search_args_v2 args; int ret; - size_t buf_size; - const size_t buf_limit = SZ_16M; + u64 buf_size; + const u64 buf_limit = SZ_16M; if (!capable(CAP_SYS_ADMIN)) return -EPERM; From b8212814d1e8428a082234223105e4071b844fab Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 12 Oct 2023 12:42:55 +0300 Subject: [PATCH 051/415] btrfs: directly return 0 on no error code in btrfs_insert_raid_extent() It's more obvious to return a literal zero instead of "return ret;". Plus Smatch complains that ret could be uninitialized if the ordered_extent->bioc_list list is empty and this silences that warning. Signed-off-by: Dan Carpenter Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid-stripe-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index 944e8f1862aa..9589362acfbf 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -145,7 +145,7 @@ int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans, btrfs_put_bioc(bioc); } - return ret; + return 0; } int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, From dfcb03ae8a341600d72fbf3c79429f306764d653 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 17 Oct 2023 16:23:22 +0900 Subject: [PATCH 052/415] btrfs: zoned: drop no longer valid write pointer check There is a check of the write pointer vs the zone size to reject an invalid write pointer. However, as of now, we have RAID0/RAID10 on the zoned mode, we can have a block group whose size is larger than the zone size. As an equivalent check against the block group's zone_capacity is already there, we can just drop this invalid check. Fixes: 568220fa9657 ("btrfs: zoned: support RAID0/1/10 on top of raid stripe tree") Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 3504ade30cb0..188378ca19c7 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1661,13 +1661,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } out: - if (cache->alloc_offset > fs_info->zone_size) { - btrfs_err(fs_info, - "zoned: invalid write pointer %llu in block group %llu", - cache->alloc_offset, cache->start); - ret = -EIO; - } - if (cache->alloc_offset > cache->zone_capacity) { btrfs_err(fs_info, "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu", From 776a838f1fa95670c1c1cf7109a898090b473fa3 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 17 Oct 2023 17:00:31 +0900 Subject: [PATCH 053/415] btrfs: zoned: wait for data BG to be finished on direct IO allocation Running the fio command below on a ZNS device results in "Resource temporarily unavailable" error. $ sudo fio --name=w --directory=/mnt --filesize=1GB --bs=16MB --numjobs=16 \ --rw=write --ioengine=libaio --iodepth=128 --direct=1 fio: io_u error on file /mnt/w.2.0: Resource temporarily unavailable: write offset=117440512, buflen=16777216 fio: io_u error on file /mnt/w.2.0: Resource temporarily unavailable: write offset=134217728, buflen=16777216 ... This happens because -EAGAIN error returned from btrfs_reserve_extent() called from btrfs_new_extent_direct() is spilling over to the userland. btrfs_reserve_extent() returns -EAGAIN when there is no active zone available. Then, the caller should wait for some other on-going IO to finish a zone and retry the allocation. This logic is already implemented for buffered write in cow_file_range(), but it is missing for the direct IO counterpart. Implement the same logic for it. Reported-by: Shinichiro Kawasaki Fixes: 2ce543f47843 ("btrfs: zoned: wait until zone is finished when allocation didn't progress") CC: stable@vger.kernel.org # 6.1+ Tested-by: Shinichiro Kawasaki Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b388505c91cc..137c4824ff91 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6979,8 +6979,15 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, int ret; alloc_hint = get_extent_allocation_hint(inode, start, len); +again: ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize, 0, alloc_hint, &ins, 1, 1); + if (ret == -EAGAIN) { + ASSERT(btrfs_is_zoned(fs_info)); + wait_on_bit_io(&inode->root->fs_info->flags, BTRFS_FS_NEED_ZONE_FINISH, + TASK_UNINTERRUPTIBLE); + goto again; + } if (ret) return ERR_PTR(ret); From d8ba2a91fc3cd0347823435971f58f473cbba7aa Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 Oct 2023 15:18:17 -0400 Subject: [PATCH 054/415] btrfs: get correct owning_root when dropping snapshot Dave reported a bug where we were aborting the transaction while trying to cleanup the squota reservation for an extent. This turned out to be because we're doing btrfs_header_owner(next) in do_walk_down when we decide to free the block. However in this code block we haven't explicitly read next, so it could be stale. We would then get whatever garbage happened to be in the pages at this point. The commit that introduced that is "btrfs: track owning root in btrfs_ref". Fix this by saving the owner_root when we do the btrfs_lookup_extent_info(). We always do this in do_walk_down, it is how we make the decision of whether or not to delete the block. This is cheap because we've already done the extent item lookup at this point, so it's straightforward to just grab the owner root as well. Then we can use this when deleting the metadata block without needing to force a read of the extent buffer to find the owner. This fixes the problem that Dave reported. Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 2 +- fs/btrfs/extent-tree.c | 25 +++++++++++++++++-------- fs/btrfs/extent-tree.h | 3 ++- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c0c5f2239820..14cefeaf9622 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -421,7 +421,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (btrfs_block_can_be_shared(root, buf)) { ret = btrfs_lookup_extent_info(trans, fs_info, buf->start, btrfs_header_level(buf), 1, - &refs, &flags); + &refs, &flags, NULL); if (ret) return ret; if (unlikely(refs == 0)) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c8e5b4715b49..0455935ff558 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -102,7 +102,8 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len) */ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 offset, int metadata, u64 *refs, u64 *flags) + u64 offset, int metadata, u64 *refs, u64 *flags, + u64 *owning_root) { struct btrfs_root *extent_root; struct btrfs_delayed_ref_head *head; @@ -114,6 +115,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, u32 item_size; u64 num_refs; u64 extent_flags; + u64 owner = 0; int ret; /* @@ -167,6 +169,8 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_extent_item); num_refs = btrfs_extent_refs(leaf, ei); extent_flags = btrfs_extent_flags(leaf, ei); + owner = btrfs_get_extent_owner_root(fs_info, leaf, + path->slots[0]); } else { ret = -EUCLEAN; btrfs_err(fs_info, @@ -226,6 +230,8 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, *refs = num_refs; if (flags) *flags = extent_flags; + if (owning_root) + *owning_root = owner; out_free: btrfs_free_path(path); return ret; @@ -5234,7 +5240,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, /* We don't lock the tree block, it's OK to be racy here */ ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, wc->level - 1, 1, &refs, - &flags); + &flags, NULL); /* We don't care about errors in readahead. */ if (ret < 0) continue; @@ -5301,7 +5307,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, fs_info, eb->start, level, 1, &wc->refs[level], - &wc->flags[level]); + &wc->flags[level], + NULL); BUG_ON(ret == -ENOMEM); if (ret) return ret; @@ -5391,6 +5398,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, u64 bytenr; u64 generation; u64 parent; + u64 owner_root = 0; struct btrfs_tree_parent_check check = { 0 }; struct btrfs_key key; struct btrfs_ref ref = { 0 }; @@ -5434,7 +5442,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1, &wc->refs[level - 1], - &wc->flags[level - 1]); + &wc->flags[level - 1], + &owner_root); if (ret < 0) goto out_unlock; @@ -5567,8 +5576,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, find_next_key(path, level, &wc->drop_progress); btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, - fs_info->nodesize, parent, - btrfs_header_owner(next)); + fs_info->nodesize, parent, owner_root); btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid, 0, false); ret = btrfs_free_extent(trans, &ref); @@ -5635,7 +5643,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, fs_info, eb->start, level, 1, &wc->refs[level], - &wc->flags[level]); + &wc->flags[level], + NULL); if (ret < 0) { btrfs_tree_unlock_rw(eb, path->locks[level]); path->locks[level] = 0; @@ -5880,7 +5889,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) ret = btrfs_lookup_extent_info(trans, fs_info, path->nodes[level]->start, level, 1, &wc->refs[level], - &wc->flags[level]); + &wc->flags[level], NULL); if (ret < 0) { err = ret; goto out_end_trans; diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h index 0716f65d9753..2e066035ccee 100644 --- a/fs/btrfs/extent-tree.h +++ b/fs/btrfs/extent-tree.h @@ -99,7 +99,8 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 offset, int metadata, u64 *refs, u64 *flags); + u64 offset, int metadata, u64 *refs, u64 *flags, + u64 *owner_root); int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num, int reserved); int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, From 47e2b06b7b5cb356a987ba3429550c3a89ea89d6 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 28 Oct 2023 13:28:45 +1030 Subject: [PATCH 055/415] btrfs: make found_logical_ret parameter mandatory for function queue_scrub_stripe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [BUG] There is a compilation warning reported on commit ae76d8e3e135 ("btrfs: scrub: fix grouping of read IO"), where gcc (14.0.0 20231022 experimental) is reporting the following uninitialized variable: fs/btrfs/scrub.c: In function ‘scrub_simple_mirror.isra’: fs/btrfs/scrub.c:2075:29: error: ‘found_logical’ may be used uninitialized [-Werror=maybe-uninitialized[https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wmaybe-uninitialized]] 2075 | cur_logical = found_logical + BTRFS_STRIPE_LEN; fs/btrfs/scrub.c:2040:21: note: ‘found_logical’ was declared here 2040 | u64 found_logical; | ^~~~~~~~~~~~~ [CAUSE] This is a false alert, as @found_logical is passed as parameter @found_logical_ret of function queue_scrub_stripe(). As long as queue_scrub_stripe() returned 0, we would update @found_logical_ret. And if queue_scrub_stripe() returned >0 or <0, the caller would not utilized @found_logical, thus there should be nothing wrong. Although the triggering gcc is still experimental, it looks like the extra check on "if (found_logical_ret)" can sometimes confuse the compiler. Meanwhile the only caller of queue_scrub_stripe() is always passing a valid pointer, there is no need for such check at all. [FIX] Although the report itself is a false alert, we can still make it more explicit by: - Replace the check for @found_logical_ret with ASSERT() - Initialize @found_logical to U64_MAX - Add one extra ASSERT() to make sure @found_logical got updated Link: https://lore.kernel.org/linux-btrfs/87fs1x1p93.fsf@gentoo.org/ Fixes: ae76d8e3e135 ("btrfs: scrub: fix grouping of read IO") Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9ce5be21b036..f62a408671cb 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1868,6 +1868,9 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group * */ ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES); + /* @found_logical_ret must be specified. */ + ASSERT(found_logical_ret); + stripe = &sctx->stripes[sctx->cur_stripe]; scrub_reset_stripe(stripe); ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path, @@ -1876,8 +1879,7 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group * /* Either >0 as no more extents or <0 for error. */ if (ret) return ret; - if (found_logical_ret) - *found_logical_ret = stripe->logical; + *found_logical_ret = stripe->logical; sctx->cur_stripe++; /* We filled one group, submit it. */ @@ -2080,7 +2082,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, /* Go through each extent items inside the logical range */ while (cur_logical < logical_end) { - u64 found_logical; + u64 found_logical = U64_MAX; u64 cur_physical = physical + cur_logical - logical_start; /* Canceled? */ @@ -2115,6 +2117,8 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, if (ret < 0) break; + /* queue_scrub_stripe() returned 0, @found_logical must be updated. */ + ASSERT(found_logical != U64_MAX); cur_logical = found_logical + BTRFS_STRIPE_LEN; /* Don't hold CPU for too long time */ From cd63ffbd23edc176f09cac5c9287db732d7cbb73 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 30 Oct 2023 11:54:23 +0000 Subject: [PATCH 056/415] btrfs: fix error pointer dereference after failure to allocate fs devices At device_list_add() we allocate a btrfs_fs_devices structure and then before checking if the allocation failed (pointer is ERR_PTR(-ENOMEM)), we dereference the error pointer in a memcpy() argument if the feature BTRFS_FEATURE_INCOMPAT_METADATA_UUID is enabled. Fix this by checking for an allocation error before trying the memcpy(). Fixes: f7361d8c3fc3 ("btrfs: sipmlify uuid parameters of alloc_fs_devices()") Reviewed-by: Qu Wenruo Reviewed-by: Anand Jain Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1fdfa9153e30..dd279241f78c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -746,13 +746,13 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (!fs_devices) { fs_devices = alloc_fs_devices(disk_super->fsid); + if (IS_ERR(fs_devices)) + return ERR_CAST(fs_devices); + if (has_metadata_uuid) memcpy(fs_devices->metadata_uuid, disk_super->metadata_uuid, BTRFS_FSID_SIZE); - if (IS_ERR(fs_devices)) - return ERR_CAST(fs_devices); - if (same_fsid_diff_dev) { generate_random_uuid(fs_devices->fsid); fs_devices->temp_fsid = true; From f8f9ab2d98116e79d220f1d089df7464ad4e026d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Nov 2023 10:35:40 -0600 Subject: [PATCH 057/415] io_uring/net: ensure socket is marked connected on connect retry io_uring does non-blocking connection attempts, which can yield some unexpected results if a connect request is re-attempted by an an application. This is equivalent to the following sync syscall sequence: sock = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP); connect(sock, &addr, sizeof(addr); ret == -1 and errno == EINPROGRESS expected here. Now poll for POLLOUT on sock, and when that returns, we expect the socket to be connected. But if we follow that procedure with: connect(sock, &addr, sizeof(addr)); you'd expect ret == -1 and errno == EISCONN here, but you actually get ret == 0. If we attempt the connection one more time, then we get EISCON as expected. io_uring used to do this, but turns out that bluetooth fails with EBADFD if you attempt to re-connect. Also looks like EISCONN _could_ occur with this sequence. Retain the ->in_progress logic, but work-around a potential EISCONN or EBADFD error and only in those cases look at the sock_error(). This should work in general and avoid the odd sequence of a repeated connect request returning success when the socket is already connected. This is all a side effect of the socket state being in a CONNECTING state when we get EINPROGRESS, and only a re-connect or other related operation will turn that into CONNECTED. Cc: stable@vger.kernel.org Fixes: 3fb1bd688172 ("io_uring/net: handle -EINPROGRESS correct for IORING_OP_CONNECT") Link: https://github.com/axboe/liburing/issues/980 Signed-off-by: Jens Axboe --- io_uring/net.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 7a8e298af81b..75d494dad7e2 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1461,16 +1461,6 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) int ret; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; - if (connect->in_progress) { - struct socket *socket; - - ret = -ENOTSOCK; - socket = sock_from_file(req->file); - if (socket) - ret = sock_error(socket->sk); - goto out; - } - if (req_has_async_data(req)) { io = req->async_data; } else { @@ -1490,9 +1480,7 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) && force_nonblock) { if (ret == -EINPROGRESS) { connect->in_progress = true; - return -EAGAIN; - } - if (ret == -ECONNABORTED) { + } else if (ret == -ECONNABORTED) { if (connect->seen_econnaborted) goto out; connect->seen_econnaborted = true; @@ -1506,6 +1494,16 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) memcpy(req->async_data, &__io, sizeof(__io)); return -EAGAIN; } + if (connect->in_progress) { + /* + * At least bluetooth will return -EBADFD on a re-connect + * attempt, and it's (supposedly) also valid to get -EISCONN + * which means the previous result is good. For both of these, + * grab the sock_error() and use that for the completion. + */ + if (ret == -EBADFD || ret == -EISCONN) + ret = sock_error(sock_from_file(req->file)->sk); + } if (ret == -ERESTARTSYS) ret = -EINTR; out: From 5c5f0d2b5f92c47baf82b9b211e27edd7d195158 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 9 Jun 2023 13:04:14 +0800 Subject: [PATCH 058/415] libceph: add doutc and *_client debug macros support This will help print the fsid and client's global_id in debug logs, and also print the function names. [ idryomov: %lld -> %llu, leading space for doutc(), don't include __func__ in pr_*() variants ] Link: https://tracker.ceph.com/issues/61590 Signed-off-by: Xiubo Li Reviewed-by: Patrick Donnelly Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_debug.h | 38 +++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index d5a5da838caf..11a92a946016 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -19,12 +19,25 @@ pr_debug("%.*s %12.12s:%-4d : " fmt, \ 8 - (int)sizeof(KBUILD_MODNAME), " ", \ kbasename(__FILE__), __LINE__, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + pr_debug("%.*s %12.12s:%-4d : [%pU %llu] " fmt, \ + 8 - (int)sizeof(KBUILD_MODNAME), " ", \ + kbasename(__FILE__), __LINE__, \ + &client->fsid, client->monc.auth->global_id, \ + ##__VA_ARGS__) # else /* faux printk call just to see any compiler warnings. */ # define dout(fmt, ...) do { \ if (0) \ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ } while (0) +# define doutc(client, fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG "[%pU %llu] " fmt, \ + &client->fsid, \ + client->monc.auth->global_id, \ + ##__VA_ARGS__); \ + } while (0) # endif #else @@ -33,7 +46,32 @@ * or, just wrap pr_debug */ # define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + pr_debug(" [%pU %llu] %s: " fmt, &client->fsid, \ + client->monc.auth->global_id, __func__, ##__VA_ARGS__) #endif +#define pr_notice_client(client, fmt, ...) \ + pr_notice("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_info_client(client, fmt, ...) \ + pr_info("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_client(client, fmt, ...) \ + pr_warn("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_once_client(client, fmt, ...) \ + pr_warn_once("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_err_client(client, fmt, ...) \ + pr_err("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_warn_ratelimited_client(client, fmt, ...) \ + pr_warn_ratelimited("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) +#define pr_err_ratelimited_client(client, fmt, ...) \ + pr_err_ratelimited("[%pU %llu]: " fmt, &client->fsid, \ + client->monc.auth->global_id, ##__VA_ARGS__) + #endif From 197b7d792d6aead2e30d4b2c054ffabae2ed73dc Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 9 Jun 2023 15:15:47 +0800 Subject: [PATCH 059/415] ceph: pass the mdsc to several helpers We will use the 'mdsc' to get the global_id in the following commits. Link: https://tracker.ceph.com/issues/61590 Signed-off-by: Xiubo Li Reviewed-by: Patrick Donnelly Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 15 ++++++++------ fs/ceph/debugfs.c | 4 ++-- fs/ceph/dir.c | 2 +- fs/ceph/file.c | 2 +- fs/ceph/mds_client.c | 39 +++++++++++++++++++++---------------- fs/ceph/mds_client.h | 3 ++- fs/ceph/mdsmap.c | 3 ++- fs/ceph/snap.c | 16 +++++++++------ fs/ceph/super.h | 3 ++- include/linux/ceph/mdsmap.h | 5 ++++- 10 files changed, 55 insertions(+), 37 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 14215ec646f7..c00f15b773f0 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1178,7 +1178,8 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) } } -void ceph_remove_cap(struct ceph_cap *cap, bool queue_release) +void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, + bool queue_release) { struct ceph_inode_info *ci = cap->ci; struct ceph_fs_client *fsc; @@ -1342,6 +1343,8 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg) */ void __ceph_remove_caps(struct ceph_inode_info *ci) { + struct inode *inode = &ci->netfs.inode; + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct rb_node *p; /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU) @@ -1351,7 +1354,7 @@ void __ceph_remove_caps(struct ceph_inode_info *ci) while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); p = rb_next(p); - ceph_remove_cap(cap, true); + ceph_remove_cap(mdsc, cap, true); } spin_unlock(&ci->i_ceph_lock); } @@ -3999,7 +4002,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, goto out_unlock; if (target < 0) { - ceph_remove_cap(cap, false); + ceph_remove_cap(mdsc, cap, false); goto out_unlock; } @@ -4034,7 +4037,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, change_auth_cap_ses(ci, tcap->session); } } - ceph_remove_cap(cap, false); + ceph_remove_cap(mdsc, cap, false); goto out_unlock; } else if (tsession) { /* add placeholder for the export tagert */ @@ -4051,7 +4054,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, spin_unlock(&mdsc->cap_dirty_lock); } - ceph_remove_cap(cap, false); + ceph_remove_cap(mdsc, cap, false); goto out_unlock; } @@ -4164,7 +4167,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, ocap->mseq, mds, le32_to_cpu(ph->seq), le32_to_cpu(ph->mseq)); } - ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); + ceph_remove_cap(mdsc, ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); } *old_issued = issued; diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 3904333fa6c3..2f1e7498cd74 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -81,7 +81,7 @@ static int mdsc_show(struct seq_file *s, void *p) if (req->r_inode) { seq_printf(s, " #%llx", ceph_ino(req->r_inode)); } else if (req->r_dentry) { - path = ceph_mdsc_build_path(req->r_dentry, &pathlen, + path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen, &pathbase, 0); if (IS_ERR(path)) path = NULL; @@ -100,7 +100,7 @@ static int mdsc_show(struct seq_file *s, void *p) } if (req->r_old_dentry) { - path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, + path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &pathlen, &pathbase, 0); if (IS_ERR(path)) path = NULL; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 854cbdd66661..fff5cb2df9a8 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1226,7 +1226,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, if (result) { int pathlen = 0; u64 base = 0; - char *path = ceph_mdsc_build_path(dentry, &pathlen, + char *path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &base, 0); /* mark error on parent + clear complete */ diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b5f8038065d7..7c4d79a23506 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -574,7 +574,7 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, if (result) { int pathlen = 0; u64 base = 0; - char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen, + char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen, &base, 0); pr_warn("async create failure path=(%llx)%s result=%d!\n", diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 293b93182955..284ba087c507 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2126,6 +2126,7 @@ static bool drop_negative_children(struct dentry *dentry) */ static int trim_caps_cb(struct inode *inode, int mds, void *arg) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); int *remaining = arg; struct ceph_inode_info *ci = ceph_inode(inode); int used, wanted, oissued, mine; @@ -2173,7 +2174,7 @@ static int trim_caps_cb(struct inode *inode, int mds, void *arg) if (oissued) { /* we aren't the only cap.. just remove us */ - ceph_remove_cap(cap, true); + ceph_remove_cap(mdsc, cap, true); (*remaining)--; } else { struct dentry *dentry; @@ -2588,6 +2589,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) /** * ceph_mdsc_build_path - build a path string to a given dentry + * @mdsc: mds client * @dentry: dentry to which path should be built * @plen: returned length of string * @pbase: returned base inode number @@ -2607,8 +2609,8 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) * Encode hidden .snap dirs as a double /, i.e. * foo/.snap/bar -> foo//bar */ -char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase, - int for_wire) +char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, + int *plen, u64 *pbase, int for_wire) { struct dentry *cur; struct inode *inode; @@ -2726,9 +2728,9 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase, return path + pos; } -static int build_dentry_path(struct dentry *dentry, struct inode *dir, - const char **ppath, int *ppathlen, u64 *pino, - bool *pfreepath, bool parent_locked) +static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry, + struct inode *dir, const char **ppath, int *ppathlen, + u64 *pino, bool *pfreepath, bool parent_locked) { char *path; @@ -2744,7 +2746,7 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir, return 0; } rcu_read_unlock(); - path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); + path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1); if (IS_ERR(path)) return PTR_ERR(path); *ppath = path; @@ -2756,6 +2758,7 @@ static int build_inode_path(struct inode *inode, const char **ppath, int *ppathlen, u64 *pino, bool *pfreepath) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct dentry *dentry; char *path; @@ -2765,7 +2768,7 @@ static int build_inode_path(struct inode *inode, return 0; } dentry = d_find_alias(inode); - path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); + path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1); dput(dentry); if (IS_ERR(path)) return PTR_ERR(path); @@ -2778,10 +2781,11 @@ static int build_inode_path(struct inode *inode, * request arguments may be specified via an inode *, a dentry *, or * an explicit ino+path. */ -static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, - struct inode *rdiri, const char *rpath, - u64 rino, const char **ppath, int *pathlen, - u64 *ino, bool *freepath, bool parent_locked) +static int set_request_path_attr(struct ceph_mds_client *mdsc, struct inode *rinode, + struct dentry *rdentry, struct inode *rdiri, + const char *rpath, u64 rino, const char **ppath, + int *pathlen, u64 *ino, bool *freepath, + bool parent_locked) { int r = 0; @@ -2790,7 +2794,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode), ceph_snap(rinode)); } else if (rdentry) { - r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino, + r = build_dentry_path(mdsc, rdentry, rdiri, ppath, pathlen, ino, freepath, parent_locked); dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); @@ -2877,7 +2881,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, &session->s_features); - ret = set_request_path_attr(req->r_inode, req->r_dentry, + ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry, req->r_parent, req->r_path1, req->r_ino1.ino, &path1, &pathlen1, &ino1, &freepath1, test_bit(CEPH_MDS_R_PARENT_LOCKED, @@ -2891,7 +2895,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, if (req->r_old_dentry && !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED)) old_dentry = req->r_old_dentry; - ret = set_request_path_attr(NULL, old_dentry, + ret = set_request_path_attr(mdsc, NULL, old_dentry, req->r_old_dentry_dir, req->r_path2, req->r_ino2.ino, &path2, &pathlen2, &ino2, &freepath2, true); @@ -4290,6 +4294,7 @@ static struct dentry* d_find_primary(struct inode *inode) */ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); union { struct ceph_mds_cap_reconnect v2; struct ceph_mds_cap_reconnect_v1 v1; @@ -4307,7 +4312,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) dentry = d_find_primary(inode); if (dentry) { /* set pathbase to parent dir when msg_version >= 2 */ - path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, + path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, recon_state->msg_version >= 2); dput(dentry); if (IS_ERR(path)) { @@ -5662,7 +5667,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) return; } - newmap = ceph_mdsmap_decode(&p, end, ceph_msgr2(mdsc->fsc->client)); + newmap = ceph_mdsmap_decode(mdsc, &p, end, ceph_msgr2(mdsc->fsc->client)); if (IS_ERR(newmap)) { err = PTR_ERR(newmap); goto bad_unlock; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 5a3714bdd64a..d930eb79dc38 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -581,7 +581,8 @@ static inline void ceph_mdsc_free_path(char *path, int len) __putname(path - (PATH_MAX - 1 - len)); } -extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, +extern char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, + struct dentry *dentry, int *plen, u64 *base, int for_wire); extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry); diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 7dac21ee6ce7..6cbec7aed5a0 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -114,7 +114,8 @@ static int __decode_and_drop_compat_set(void **p, void* end) * Ignore any fields we don't care about (there are quite a few of * them). */ -struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) +struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, + void *end, bool msgr2) { struct ceph_mdsmap *m; const void *start = *p; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 813f21add992..55090e6c9967 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -329,7 +329,8 @@ static int cmpu64_rev(const void *a, const void *b) /* * build the snap context for a given realm. */ -static int build_snap_context(struct ceph_snap_realm *realm, +static int build_snap_context(struct ceph_mds_client *mdsc, + struct ceph_snap_realm *realm, struct list_head *realm_queue, struct list_head *dirty_realms) { @@ -425,7 +426,8 @@ static int build_snap_context(struct ceph_snap_realm *realm, /* * rebuild snap context for the given realm and all of its children. */ -static void rebuild_snap_realms(struct ceph_snap_realm *realm, +static void rebuild_snap_realms(struct ceph_mds_client *mdsc, + struct ceph_snap_realm *realm, struct list_head *dirty_realms) { LIST_HEAD(realm_queue); @@ -451,7 +453,8 @@ static void rebuild_snap_realms(struct ceph_snap_realm *realm, continue; } - last = build_snap_context(_realm, &realm_queue, dirty_realms); + last = build_snap_context(mdsc, _realm, &realm_queue, + dirty_realms); dout("%s %llx %p, %s\n", __func__, _realm->ino, _realm, last > 0 ? "is deferred" : !last ? "succeeded" : "failed"); @@ -708,7 +711,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, * Queue cap_snaps for snap writeback for this realm and its children. * Called under snap_rwsem, so realm topology won't change. */ -static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) +static void queue_realm_cap_snaps(struct ceph_mds_client *mdsc, + struct ceph_snap_realm *realm) { struct ceph_inode_info *ci; struct inode *lastinode = NULL; @@ -855,7 +859,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, /* rebuild_snapcs when we reach the _end_ (root) of the trace */ if (realm_to_rebuild && p >= e) - rebuild_snap_realms(realm_to_rebuild, &dirty_realms); + rebuild_snap_realms(mdsc, realm_to_rebuild, &dirty_realms); if (!first_realm) first_realm = realm; @@ -873,7 +877,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, realm = list_first_entry(&dirty_realms, struct ceph_snap_realm, dirty_item); list_del_init(&realm->dirty_item); - queue_realm_cap_snaps(realm); + queue_realm_cap_snaps(mdsc, realm); } if (realm_ret) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 51c7f2b14f6f..09c262dd5bd3 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1223,7 +1223,8 @@ extern void ceph_add_cap(struct inode *inode, unsigned cap, unsigned seq, u64 realmino, int flags, struct ceph_cap **new_cap); extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); -extern void ceph_remove_cap(struct ceph_cap *cap, bool queue_release); +extern void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, + bool queue_release); extern void __ceph_remove_caps(struct ceph_inode_info *ci); extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 4c3e0648dc27..89f1931f1ba6 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -5,6 +5,8 @@ #include #include +struct ceph_mds_client; + /* * mds map - describe servers in the mds cluster. * @@ -65,7 +67,8 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) } extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); -struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2); +struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, + void *end, bool msgr2); extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); From 5995d90d2d19f337df6a50bcf4699ef053214dac Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 12 Jun 2023 10:50:38 +0800 Subject: [PATCH 060/415] ceph: rename _to_client() to _to_fs_client() We need to covert the inode to ceph_client in the following commit, and will add one new helper for that, here we rename the old helper to _fs_client(). Link: https://tracker.ceph.com/issues/61590 Signed-off-by: Xiubo Li Reviewed-by: Patrick Donnelly Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 20 ++++++++++---------- fs/ceph/cache.c | 2 +- fs/ceph/caps.c | 40 ++++++++++++++++++++-------------------- fs/ceph/crypto.c | 2 +- fs/ceph/dir.c | 22 +++++++++++----------- fs/ceph/export.c | 10 +++++----- fs/ceph/file.c | 24 ++++++++++++------------ fs/ceph/inode.c | 14 +++++++------- fs/ceph/ioctl.c | 8 ++++---- fs/ceph/mds_client.c | 2 +- fs/ceph/snap.c | 2 +- fs/ceph/super.c | 22 +++++++++++----------- fs/ceph/super.h | 10 +++++----- fs/ceph/xattr.c | 12 ++++++------ 14 files changed, 95 insertions(+), 95 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index f4863078f7fe..28fa05a9d4d2 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -229,7 +229,7 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq) { struct inode *inode = subreq->rreq->inode; - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 objno, objoff; u32 xlen; @@ -244,7 +244,7 @@ static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq) static void finish_netfs_read(struct ceph_osd_request *req) { struct inode *inode = req->r_inode; - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); struct netfs_io_subrequest *subreq = req->r_priv; struct ceph_osd_req_op *op = &req->r_ops[0]; @@ -348,7 +348,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) struct netfs_io_request *rreq = subreq->rreq; struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_osd_request *req = NULL; struct ceph_vino vino = ceph_vino(inode); struct iov_iter iter; @@ -658,7 +658,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct folio *folio = page_folio(page); struct inode *inode = page->mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_snap_context *snapc, *oldest; loff_t page_off = page_offset(page); int err; @@ -803,7 +803,7 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc) ihold(inode); if (wbc->sync_mode == WB_SYNC_NONE && - ceph_inode_to_client(inode)->write_congested) + ceph_inode_to_fs_client(inode)->write_congested) return AOP_WRITEPAGE_ACTIVATE; wait_on_page_fscache(page); @@ -836,7 +836,7 @@ static void writepages_finish(struct ceph_osd_request *req) int rc = req->r_result; struct ceph_snap_context *snapc = req->r_snapc; struct address_space *mapping = inode->i_mapping; - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); unsigned int len = 0; bool remove_page; @@ -926,7 +926,7 @@ static int ceph_writepages_start(struct address_space *mapping, { struct inode *inode = mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_vino vino = ceph_vino(inode); pgoff_t index, start_index, end = -1; struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; @@ -1823,7 +1823,7 @@ int ceph_uninline_data(struct file *file) { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_osd_request *req = NULL; struct ceph_cap_flush *prealloc_cf = NULL; struct folio *folio = NULL; @@ -1977,7 +1977,7 @@ enum { static int __ceph_pool_perm_get(struct ceph_inode_info *ci, s64 pool, struct ceph_string *pool_ns) { - struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->netfs.inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(&ci->netfs.inode); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_osd_request *rd_req = NULL, *wr_req = NULL; struct rb_node **p, *parent; @@ -2168,7 +2168,7 @@ int ceph_pool_perm_check(struct inode *inode, int need) return 0; } - if (ceph_test_mount_opt(ceph_inode_to_client(inode), + if (ceph_test_mount_opt(ceph_inode_to_fs_client(inode), NOPOOLPERM)) return 0; diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index de1dee46d3df..930fbd54d2c8 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -15,7 +15,7 @@ void ceph_fscache_register_inode_cookie(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); /* No caching for filesystem? */ if (!fsc->fscache) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c00f15b773f0..e976f481d5df 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -635,7 +635,7 @@ void ceph_add_cap(struct inode *inode, unsigned seq, unsigned mseq, u64 realmino, int flags, struct ceph_cap **new_cap) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap *cap; int mds = session->s_mds; @@ -922,7 +922,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) int __ceph_caps_issued_mask_metric(struct ceph_inode_info *ci, int mask, int touch) { - struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb); int r; r = __ceph_caps_issued_mask(ci, mask, touch); @@ -996,7 +996,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci) const int WR_SHIFT = ffs(CEPH_FILE_MODE_WR); const int LAZY_SHIFT = ffs(CEPH_FILE_MODE_LAZY); struct ceph_mount_options *opt = - ceph_inode_to_client(&ci->netfs.inode)->mount_options; + ceph_inode_to_fs_client(&ci->netfs.inode)->mount_options; unsigned long used_cutoff = jiffies - opt->caps_wanted_delay_max * HZ; unsigned long idle_cutoff = jiffies - opt->caps_wanted_delay_min * HZ; @@ -1121,7 +1121,7 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) dout("__ceph_remove_cap %p from %p\n", cap, &ci->netfs.inode); - mdsc = ceph_inode_to_client(&ci->netfs.inode)->mdsc; + mdsc = ceph_inode_to_fs_client(&ci->netfs.inode)->mdsc; /* remove from inode's cap rbtree, and clear auth cap */ rb_erase(&cap->ci_node, &ci->i_caps); @@ -1192,7 +1192,7 @@ void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, lockdep_assert_held(&ci->i_ceph_lock); - fsc = ceph_inode_to_client(&ci->netfs.inode); + fsc = ceph_inode_to_fs_client(&ci->netfs.inode); WARN_ON_ONCE(ci->i_auth_cap == cap && !list_empty(&ci->i_dirty_item) && !fsc->blocklisted && @@ -1344,7 +1344,7 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg) void __ceph_remove_caps(struct ceph_inode_info *ci) { struct inode *inode = &ci->netfs.inode; - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct rb_node *p; /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU) @@ -1688,7 +1688,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session **psession) { struct inode *inode = &ci->netfs.inode; - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct ceph_mds_session *session = NULL; bool need_put = false; int mds; @@ -1753,7 +1753,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, struct ceph_cap_flush **pcf) { struct ceph_mds_client *mdsc = - ceph_sb_to_client(ci->netfs.inode.i_sb)->mdsc; + ceph_sb_to_fs_client(ci->netfs.inode.i_sb)->mdsc; struct inode *inode = &ci->netfs.inode; int was = ci->i_dirty_caps; int dirty = 0; @@ -1876,7 +1876,7 @@ static u64 __mark_caps_flushing(struct inode *inode, struct ceph_mds_session *session, bool wake, u64 *oldest_flush_tid) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap_flush *cf = NULL; int flushing; @@ -2235,7 +2235,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) */ static int try_flush_caps(struct inode *inode, u64 *ptid) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); int flushing = 0; u64 flush_tid = 0, oldest_flush_tid = 0; @@ -2313,7 +2313,7 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid) */ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; int ret, err = 0; @@ -2496,7 +2496,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) caps_are_flushed(inode, flush_tid)); } else { struct ceph_mds_client *mdsc = - ceph_sb_to_client(inode->i_sb)->mdsc; + ceph_sb_to_fs_client(inode->i_sb)->mdsc; spin_lock(&ci->i_ceph_lock); if (__ceph_caps_dirty(ci)) @@ -2749,7 +2749,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, loff_t endoff, int flags, int *got) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; int ret = 0; int have, implemented; bool snap_rwsem_locked = false; @@ -2967,7 +2967,7 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, int want, loff_t endoff, int *got) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); int ret, _got, flags; ret = ceph_pool_perm_check(inode, need); @@ -3730,7 +3730,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, __releases(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_cap_flush *cf, *tmp_cf; LIST_HEAD(to_remove); unsigned seq = le32_to_cpu(m->seq); @@ -3836,7 +3836,7 @@ void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap, bool *wake_ci, bool *wake_mdsc) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; bool ret; lockdep_assert_held(&ci->i_ceph_lock); @@ -3880,7 +3880,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, struct ceph_mds_session *session) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; u64 follows = le64_to_cpu(m->snap_follows); struct ceph_cap_snap *capsnap = NULL, *iter; bool wake_ci = false; @@ -3972,7 +3972,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, struct ceph_mds_cap_peer *ph, struct ceph_mds_session *session) { - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct ceph_mds_session *tsession = NULL; struct ceph_cap *cap, *tcap, *new_cap = NULL; struct ceph_inode_info *ci = ceph_inode(inode); @@ -4675,7 +4675,7 @@ int ceph_drop_caps_for_unlink(struct inode *inode) if (__ceph_caps_dirty(ci)) { struct ceph_mds_client *mdsc = - ceph_inode_to_client(inode)->mdsc; + ceph_inode_to_fs_client(inode)->mdsc; __cap_delay_requeue_front(mdsc, ci); } } @@ -4858,7 +4858,7 @@ static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode) int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate) { - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); bool is_auth; diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index 5b5112c78462..08c385610731 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c @@ -129,7 +129,7 @@ static bool ceph_crypt_empty_dir(struct inode *inode) static const union fscrypt_policy *ceph_get_dummy_policy(struct super_block *sb) { - return ceph_sb_to_client(sb)->fsc_dummy_enc_policy.policy; + return ceph_sb_to_fs_client(sb)->fsc_dummy_enc_policy.policy; } static struct fscrypt_operations ceph_fscrypt_ops = { diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index fff5cb2df9a8..1395b71df5cc 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -310,7 +310,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; int i; int err; @@ -703,7 +703,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req, struct dentry *dentry) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb); struct inode *parent = d_inode(dentry->d_parent); /* we hold i_rwsem */ /* .snap dir? */ @@ -771,7 +771,7 @@ static bool is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb); struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_mds_request *req; int op; @@ -1199,7 +1199,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { struct dentry *dentry = req->r_dentry; - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb); struct ceph_dentry_info *di = ceph_dentry(dentry); int result = req->r_err ? req->r_err : le32_to_cpu(req->r_reply_info.head->result); @@ -1290,7 +1290,7 @@ static int get_caps_for_async_unlink(struct inode *dir, struct dentry *dentry) */ static int ceph_unlink(struct inode *dir, struct dentry *dentry) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = d_inode(dentry); struct ceph_mds_request *req; @@ -1469,7 +1469,7 @@ void __ceph_dentry_lease_touch(struct ceph_dentry_info *di) return; } - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_list_lock); list_move_tail(&di->lease_list, &mdsc->dentry_leases); spin_unlock(&mdsc->dentry_list_lock); @@ -1516,7 +1516,7 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di) return; } - mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; + mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_list_lock); __dentry_dir_lease_touch(mdsc, di), spin_unlock(&mdsc->dentry_list_lock); @@ -1530,7 +1530,7 @@ static void __dentry_lease_unlist(struct ceph_dentry_info *di) if (list_empty(&di->lease_list)) return; - mdsc = ceph_sb_to_client(di->dentry->d_sb)->mdsc; + mdsc = ceph_sb_to_fs_client(di->dentry->d_sb)->mdsc; spin_lock(&mdsc->dentry_list_lock); list_del_init(&di->lease_list); spin_unlock(&mdsc->dentry_list_lock); @@ -1888,7 +1888,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) dentry, inode, ceph_dentry(dentry)->offset, !!(dentry->d_flags & DCACHE_NOKEY_NAME)); - mdsc = ceph_sb_to_client(dir->i_sb)->mdsc; + mdsc = ceph_sb_to_fs_client(dir->i_sb)->mdsc; /* always trust cached snapped dentries, snapdir dentry */ if (ceph_snap(dir) != CEPH_NOSNAP) { @@ -1995,7 +1995,7 @@ static int ceph_d_delete(const struct dentry *dentry) static void ceph_d_release(struct dentry *dentry) { struct ceph_dentry_info *di = ceph_dentry(dentry); - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb); dout("d_release %p\n", dentry); @@ -2064,7 +2064,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, int left; const int bufsize = 1024; - if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) + if (!ceph_test_mount_opt(ceph_sb_to_fs_client(inode->i_sb), DIRSTAT)) return -EISDIR; if (!dfi->dir_info) { diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 8559990a59a5..52c4daf2447d 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -123,7 +123,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, static struct inode *__lookup_inode(struct super_block *sb, u64 ino) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc; struct inode *inode; struct ceph_vino vino; int err; @@ -205,7 +205,7 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb, struct ceph_nfs_snapfh *sfh, bool want_parent) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc; struct ceph_mds_request *req; struct inode *inode; struct ceph_vino vino; @@ -317,7 +317,7 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, static struct dentry *__get_parent(struct super_block *sb, struct dentry *child, u64 ino) { - struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc; struct ceph_mds_request *req; struct inode *inode; int mask; @@ -439,7 +439,7 @@ static int __get_snap_name(struct dentry *parent, char *name, { struct inode *inode = d_inode(child); struct inode *dir = d_inode(parent); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_mds_request *req = NULL; char *last_name = NULL; unsigned next_offset = 2; @@ -544,7 +544,7 @@ static int ceph_get_name(struct dentry *parent, char *name, if (ceph_snap(inode) != CEPH_NOSNAP) return __get_snap_name(parent, name, child); - mdsc = ceph_inode_to_client(inode)->mdsc; + mdsc = ceph_inode_to_fs_client(inode)->mdsc; req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, USE_ANY_MDS); if (IS_ERR(req)) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7c4d79a23506..c74428099d6d 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -200,7 +200,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mount_options *opt = - ceph_inode_to_client(&ci->netfs.inode)->mount_options; + ceph_inode_to_fs_client(&ci->netfs.inode)->mount_options; struct ceph_file_info *fi; int ret; @@ -234,7 +234,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, spin_lock_init(&fi->rw_contexts_lock); INIT_LIST_HEAD(&fi->rw_contexts); - fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen); + fi->filp_gen = READ_ONCE(ceph_inode_to_fs_client(inode)->filp_gen); if ((file->f_mode & FMODE_WRITE) && ceph_has_inline_data(ci)) { ret = ceph_uninline_data(file); @@ -352,7 +352,7 @@ int ceph_renew_caps(struct inode *inode, int fmode) int ceph_open(struct inode *inode, struct file *file) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct ceph_file_info *fi = file->private_data; @@ -730,7 +730,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct inode *new_inode = NULL; @@ -962,7 +962,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, u64 *last_objver) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_osd_client *osdc = &fsc->client->osdc; ssize_t ret; u64 off = *ki_pos; @@ -1256,7 +1256,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) if (aio_work) { INIT_WORK(&aio_work->work, ceph_aio_retry_work); aio_work->req = req; - queue_work(ceph_inode_to_client(inode)->inode_wq, + queue_work(ceph_inode_to_fs_client(inode)->inode_wq, &aio_work->work); return; } @@ -1386,7 +1386,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_client_metric *metric = &fsc->mdsc->metric; struct ceph_vino vino; struct ceph_osd_request *req; @@ -1610,7 +1610,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_osd_request *req; struct page **pages; @@ -2228,7 +2228,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct ceph_file_info *fi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; @@ -2462,7 +2462,7 @@ static int ceph_zero_partial_object(struct inode *inode, loff_t offset, loff_t *length) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_osd_request *req; int ret = 0; loff_t zero = 0; @@ -2845,7 +2845,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, struct ceph_inode_info *src_ci = ceph_inode(src_inode); struct ceph_inode_info *dst_ci = ceph_inode(dst_inode); struct ceph_cap_flush *prealloc_cf; - struct ceph_fs_client *src_fsc = ceph_inode_to_client(src_inode); + struct ceph_fs_client *src_fsc = ceph_inode_to_fs_client(src_inode); loff_t size; ssize_t ret = -EIO, bytes; u64 src_objnum, dst_objnum, src_objoff, dst_objoff; @@ -2853,7 +2853,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, int src_got = 0, dst_got = 0, err, dirty; if (src_inode->i_sb != dst_inode->i_sb) { - struct ceph_fs_client *dst_fsc = ceph_inode_to_client(dst_inode); + struct ceph_fs_client *dst_fsc = ceph_inode_to_fs_client(dst_inode); if (ceph_fsid_compare(&src_fsc->client->fsid, &dst_fsc->client->fsid)) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index b79100f720b3..db6977c15c28 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1489,7 +1489,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; struct inode *in = NULL; struct ceph_vino tvino, dvino; - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); int err = 0; dout("fill_trace %p is_dentry %d is_target %d\n", req, @@ -2079,7 +2079,7 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size) void ceph_queue_inode_work(struct inode *inode, int work_bit) { - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); set_bit(work_bit, &ci->i_work_mask); @@ -2427,7 +2427,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, struct ceph_inode_info *ci = ceph_inode(inode); unsigned int ia_valid = attr->ia_valid; struct ceph_mds_request *req; - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_cap_flush *prealloc_cf; loff_t isize = i_size_read(inode); int issued; @@ -2740,7 +2740,7 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); int err; if (ceph_snap(inode) != CEPH_NOSNAP) @@ -2810,7 +2810,7 @@ int ceph_try_to_choose_auth_mds(struct inode *inode, int mask) int __ceph_do_getattr(struct inode *inode, struct page *locked_page, int mask, bool force) { - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; int mode; @@ -2856,7 +2856,7 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page, int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, size_t size) { - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; int mode = USE_AUTH_MDS; @@ -3001,7 +3001,7 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path, stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0; if (S_ISDIR(inode->i_mode)) { - if (ceph_test_mount_opt(ceph_sb_to_client(sb), RBYTES)) { + if (ceph_test_mount_opt(ceph_sb_to_fs_client(sb), RBYTES)) { stat->size = ci->i_rbytes; } else if (ceph_snap(inode) == CEPH_SNAPDIR) { struct ceph_inode_info *pci; diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 91a84917d203..3f617146e4ad 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -65,7 +65,7 @@ static long __validate_layout(struct ceph_mds_client *mdsc, static long ceph_ioctl_set_layout(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_mds_request *req; struct ceph_ioctl_layout l; struct ceph_inode_info *ci = ceph_inode(file_inode(file)); @@ -140,7 +140,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) struct ceph_mds_request *req; struct ceph_ioctl_layout l; int err; - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; /* copy and validate */ if (copy_from_user(&l, arg, sizeof(l))) @@ -183,7 +183,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = - &ceph_sb_to_client(inode->i_sb)->client->osdc; + &ceph_sb_to_fs_client(inode->i_sb)->client->osdc; struct ceph_object_locator oloc; CEPH_DEFINE_OID_ONSTACK(oid); u32 xlen; @@ -244,7 +244,7 @@ static long ceph_ioctl_lazyio(struct file *file) struct ceph_file_info *fi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { spin_lock(&ci->i_ceph_lock); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 284ba087c507..d009f1dff291 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -830,7 +830,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) */ int ceph_wait_on_conflict_unlink(struct dentry *dentry) { - struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb); struct dentry *pdentry = dentry->d_parent; struct dentry *udentry, *found = NULL; struct ceph_dentry_info *di; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 55090e6c9967..d0d3612f28f0 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -964,7 +964,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct ceph_snap_realm *oldrealm = ci->i_snap_realm; lockdep_assert_held(&ci->i_ceph_lock); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 2d7f5a8d4a92..52af90beab00 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -44,7 +44,7 @@ static LIST_HEAD(ceph_fsc_list); */ static void ceph_put_super(struct super_block *s) { - struct ceph_fs_client *fsc = ceph_sb_to_client(s); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s); dout("put_super\n"); ceph_fscrypt_free_dummy_policy(fsc); @@ -53,7 +53,7 @@ static void ceph_put_super(struct super_block *s) static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(d_inode(dentry)); struct ceph_mon_client *monc = &fsc->client->monc; struct ceph_statfs st; int i, err; @@ -118,7 +118,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) static int ceph_sync_fs(struct super_block *sb, int wait) { - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); if (!wait) { dout("sync_fs (non-blocking)\n"); @@ -684,7 +684,7 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, */ static int ceph_show_options(struct seq_file *m, struct dentry *root) { - struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(root->d_sb); struct ceph_mount_options *fsopt = fsc->mount_options; size_t pos; int ret; @@ -1015,7 +1015,7 @@ static void __ceph_umount_begin(struct ceph_fs_client *fsc) */ void ceph_umount_begin(struct super_block *sb) { - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); dout("ceph_umount_begin - starting forced umount\n"); if (!fsc) @@ -1226,7 +1226,7 @@ static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) struct ceph_fs_client *new = fc->s_fs_info; struct ceph_mount_options *fsopt = new->mount_options; struct ceph_options *opt = new->client->options; - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); dout("ceph_compare_super %p\n", sb); @@ -1322,9 +1322,9 @@ static int ceph_get_tree(struct fs_context *fc) goto out; } - if (ceph_sb_to_client(sb) != fsc) { + if (ceph_sb_to_fs_client(sb) != fsc) { destroy_fs_client(fsc); - fsc = ceph_sb_to_client(sb); + fsc = ceph_sb_to_fs_client(sb); dout("get_sb got existing client %p\n", fsc); } else { dout("get_sb using new client %p\n", fsc); @@ -1377,7 +1377,7 @@ static int ceph_reconfigure_fc(struct fs_context *fc) struct ceph_parse_opts_ctx *pctx = fc->fs_private; struct ceph_mount_options *fsopt = pctx->opts; struct super_block *sb = fc->root->d_sb; - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); err = ceph_apply_test_dummy_encryption(sb, fc, fsopt); if (err) @@ -1516,7 +1516,7 @@ void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc) static void ceph_kill_sb(struct super_block *s) { - struct ceph_fs_client *fsc = ceph_sb_to_client(s); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s); struct ceph_mds_client *mdsc = fsc->mdsc; bool wait; @@ -1578,7 +1578,7 @@ MODULE_ALIAS_FS("ceph"); int ceph_force_reconnect(struct super_block *sb) { - struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); int err = 0; fsc->mount_state = CEPH_MOUNT_RECOVER; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 09c262dd5bd3..8efd4ba60774 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -488,13 +488,13 @@ ceph_inode(const struct inode *inode) } static inline struct ceph_fs_client * -ceph_inode_to_client(const struct inode *inode) +ceph_inode_to_fs_client(const struct inode *inode) { return (struct ceph_fs_client *)inode->i_sb->s_fs_info; } static inline struct ceph_fs_client * -ceph_sb_to_client(const struct super_block *sb) +ceph_sb_to_fs_client(const struct super_block *sb) { return (struct ceph_fs_client *)sb->s_fs_info; } @@ -502,7 +502,7 @@ ceph_sb_to_client(const struct super_block *sb) static inline struct ceph_mds_client * ceph_sb_to_mdsc(const struct super_block *sb) { - return (struct ceph_mds_client *)ceph_sb_to_client(sb)->mdsc; + return (struct ceph_mds_client *)ceph_sb_to_fs_client(sb)->mdsc; } static inline struct ceph_vino @@ -558,7 +558,7 @@ static inline u64 ceph_snap(struct inode *inode) */ static inline u64 ceph_present_ino(struct super_block *sb, u64 ino) { - if (unlikely(ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))) + if (unlikely(ceph_test_mount_opt(ceph_sb_to_fs_client(sb), INO32))) return ceph_ino_to_ino32(ino); return ino; } @@ -1106,7 +1106,7 @@ void ceph_inode_shutdown(struct inode *inode); static inline bool ceph_inode_is_shutdown(struct inode *inode) { unsigned long flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags); - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); int state = READ_ONCE(fsc->mount_state); return (flags & CEPH_I_SHUTDOWN) || state >= CEPH_MOUNT_SHUTDOWN; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 0deae4a0f5f1..558f64554b59 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -57,7 +57,7 @@ static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, size_t size) { - struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb); struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_string *pool_ns; s64 pool = ci->i_layout.pool_id; @@ -161,7 +161,7 @@ static ssize_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, char *val, size_t size) { ssize_t ret; - struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb); struct ceph_osd_client *osdc = &fsc->client->osdc; s64 pool = ci->i_layout.pool_id; const char *pool_name; @@ -313,7 +313,7 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val, static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci, char *val, size_t size) { - struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb); return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid); } @@ -321,7 +321,7 @@ static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci, static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci, char *val, size_t size) { - struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb); return ceph_fmt_xattr(val, size, "client%lld", ceph_client_gid(fsc->client)); @@ -1094,7 +1094,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) static int ceph_sync_setxattr(struct inode *inode, const char *name, const char *value, size_t size, int flags) { - struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req; struct ceph_mds_client *mdsc = fsc->mdsc; @@ -1164,7 +1164,7 @@ int __ceph_setxattr(struct inode *inode, const char *name, { struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; struct ceph_cap_flush *prealloc_cf = NULL; struct ceph_buffer *old_blob = NULL; int issued; From 38d46409c4639a1d659ebfa70e27a8bed6b8ee1d Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 12 Jun 2023 09:04:07 +0800 Subject: [PATCH 061/415] ceph: print cluster fsid and client global_id in all debug logs Multiple CephFS mounts on a host is increasingly common so disambiguating messages like this is necessary and will make it easier to debug issues. At the same this will improve the debug logs to make them easier to troubleshooting issues, such as print the ino# instead only printing the memory addresses of the corresponding inodes and print the dentry names instead of the corresponding memory addresses for the dentry,etc. Link: https://tracker.ceph.com/issues/61590 Signed-off-by: Xiubo Li Reviewed-by: Patrick Donnelly Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/acl.c | 6 +- fs/ceph/addr.c | 279 +++++++++-------- fs/ceph/caps.c | 708 +++++++++++++++++++++++++------------------ fs/ceph/crypto.c | 39 ++- fs/ceph/debugfs.c | 6 +- fs/ceph/dir.c | 218 +++++++------ fs/ceph/export.c | 39 +-- fs/ceph/file.c | 245 ++++++++------- fs/ceph/inode.c | 485 ++++++++++++++++------------- fs/ceph/ioctl.c | 13 +- fs/ceph/locks.c | 57 ++-- fs/ceph/mds_client.c | 558 +++++++++++++++++++--------------- fs/ceph/mdsmap.c | 24 +- fs/ceph/metric.c | 5 +- fs/ceph/quota.c | 29 +- fs/ceph/snap.c | 174 ++++++----- fs/ceph/super.c | 70 +++-- fs/ceph/super.h | 6 + fs/ceph/xattr.c | 96 +++--- 19 files changed, 1746 insertions(+), 1311 deletions(-) diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index c53a1d220622..e95841961397 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -15,6 +15,7 @@ #include #include "super.h" +#include "mds_client.h" static inline void ceph_set_cached_acl(struct inode *inode, int type, struct posix_acl *acl) @@ -31,6 +32,7 @@ static inline void ceph_set_cached_acl(struct inode *inode, struct posix_acl *ceph_get_acl(struct inode *inode, int type, bool rcu) { + struct ceph_client *cl = ceph_inode_to_client(inode); int size; unsigned int retry_cnt = 0; const char *name; @@ -72,8 +74,8 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type, bool rcu) } else if (size == -ENODATA || size == 0) { acl = NULL; } else { - pr_err_ratelimited("get acl %llx.%llx failed, err=%d\n", - ceph_vinop(inode), size); + pr_err_ratelimited_client(cl, "%llx.%llx failed, err=%d\n", + ceph_vinop(inode), size); acl = ERR_PTR(-EIO); } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 28fa05a9d4d2..63cb78fabebf 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -79,18 +79,18 @@ static inline struct ceph_snap_context *page_snap_context(struct page *page) */ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) { - struct inode *inode; + struct inode *inode = mapping->host; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci; struct ceph_snap_context *snapc; if (folio_test_dirty(folio)) { - dout("%p dirty_folio %p idx %lu -- already dirty\n", - mapping->host, folio, folio->index); + doutc(cl, "%llx.%llx %p idx %lu -- already dirty\n", + ceph_vinop(inode), folio, folio->index); VM_BUG_ON_FOLIO(!folio_test_private(folio), folio); return false; } - inode = mapping->host; ci = ceph_inode(inode); /* dirty the head */ @@ -111,12 +111,12 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) if (ci->i_wrbuffer_ref == 0) ihold(inode); ++ci->i_wrbuffer_ref; - dout("%p dirty_folio %p idx %lu head %d/%d -> %d/%d " - "snapc %p seq %lld (%d snaps)\n", - mapping->host, folio, folio->index, - ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1, - ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, - snapc, snapc->seq, snapc->num_snaps); + doutc(cl, "%llx.%llx %p idx %lu head %d/%d -> %d/%d " + "snapc %p seq %lld (%d snaps)\n", + ceph_vinop(inode), folio, folio->index, + ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1, + ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, + snapc, snapc->seq, snapc->num_snaps); spin_unlock(&ci->i_ceph_lock); /* @@ -137,23 +137,22 @@ static bool ceph_dirty_folio(struct address_space *mapping, struct folio *folio) static void ceph_invalidate_folio(struct folio *folio, size_t offset, size_t length) { - struct inode *inode; - struct ceph_inode_info *ci; + struct inode *inode = folio->mapping->host; + struct ceph_client *cl = ceph_inode_to_client(inode); + struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc; - inode = folio->mapping->host; - ci = ceph_inode(inode); if (offset != 0 || length != folio_size(folio)) { - dout("%p invalidate_folio idx %lu partial dirty page %zu~%zu\n", - inode, folio->index, offset, length); + doutc(cl, "%llx.%llx idx %lu partial dirty page %zu~%zu\n", + ceph_vinop(inode), folio->index, offset, length); return; } WARN_ON(!folio_test_locked(folio)); if (folio_test_private(folio)) { - dout("%p invalidate_folio idx %lu full dirty page\n", - inode, folio->index); + doutc(cl, "%llx.%llx idx %lu full dirty page\n", + ceph_vinop(inode), folio->index); snapc = folio_detach_private(folio); ceph_put_wrbuffer_cap_refs(ci, 1, snapc); @@ -166,10 +165,10 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset, static bool ceph_release_folio(struct folio *folio, gfp_t gfp) { struct inode *inode = folio->mapping->host; + struct ceph_client *cl = ceph_inode_to_client(inode); - dout("%llx:%llx release_folio idx %lu (%sdirty)\n", - ceph_vinop(inode), - folio->index, folio_test_dirty(folio) ? "" : "not "); + doutc(cl, "%llx.%llx idx %lu (%sdirty)\n", ceph_vinop(inode), + folio->index, folio_test_dirty(folio) ? "" : "not "); if (folio_test_private(folio)) return false; @@ -245,6 +244,7 @@ static void finish_netfs_read(struct ceph_osd_request *req) { struct inode *inode = req->r_inode; struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); struct netfs_io_subrequest *subreq = req->r_priv; struct ceph_osd_req_op *op = &req->r_ops[0]; @@ -254,8 +254,8 @@ static void finish_netfs_read(struct ceph_osd_request *req) ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency, req->r_end_latency, osd_data->length, err); - dout("%s: result %d subreq->len=%zu i_size=%lld\n", __func__, req->r_result, - subreq->len, i_size_read(req->r_inode)); + doutc(cl, "result %d subreq->len=%zu i_size=%lld\n", req->r_result, + subreq->len, i_size_read(req->r_inode)); /* no object means success but no data */ if (err == -ENOENT) @@ -349,6 +349,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_request *req = NULL; struct ceph_vino vino = ceph_vino(inode); struct iov_iter iter; @@ -383,7 +384,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) goto out; } - dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len); + doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n", + ceph_vinop(inode), subreq->start, subreq->len, len); iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len); @@ -400,8 +402,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off); if (err < 0) { - dout("%s: iov_ter_get_pages_alloc returned %d\n", - __func__, err); + doutc(cl, "%llx.%llx failed to allocate pages, %d\n", + ceph_vinop(inode), err); goto out; } @@ -429,12 +431,13 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) ceph_osdc_put_request(req); if (err) netfs_subreq_terminated(subreq, err, false); - dout("%s: result %d\n", __func__, err); + doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err); } static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) { struct inode *inode = rreq->inode; + struct ceph_client *cl = ceph_inode_to_client(inode); int got = 0, want = CEPH_CAP_FILE_CACHE; struct ceph_netfs_request_data *priv; int ret = 0; @@ -466,12 +469,12 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) */ ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got); if (ret < 0) { - dout("start_read %p, error getting cap\n", inode); + doutc(cl, "%llx.%llx, error getting cap\n", ceph_vinop(inode)); goto out; } if (!(got & want)) { - dout("start_read %p, no cache cap\n", inode); + doutc(cl, "%llx.%llx, no cache cap\n", ceph_vinop(inode)); ret = -EACCES; goto out; } @@ -563,13 +566,14 @@ get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl, struct ceph_snap_context *page_snapc) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_snap_context *snapc = NULL; struct ceph_cap_snap *capsnap = NULL; spin_lock(&ci->i_ceph_lock); list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { - dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, - capsnap->context, capsnap->dirty_pages); + doutc(cl, " capsnap %p snapc %p has %d dirty pages\n", + capsnap, capsnap->context, capsnap->dirty_pages); if (!capsnap->dirty_pages) continue; @@ -601,8 +605,8 @@ get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl, } if (!snapc && ci->i_wrbuffer_ref_head) { snapc = ceph_get_snap_context(ci->i_head_snapc); - dout(" head snapc %p has %d dirty pages\n", - snapc, ci->i_wrbuffer_ref_head); + doutc(cl, " head snapc %p has %d dirty pages\n", snapc, + ci->i_wrbuffer_ref_head); if (ctl) { ctl->i_size = i_size_read(inode); ctl->truncate_size = ci->i_truncate_size; @@ -659,6 +663,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct inode *inode = page->mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_snap_context *snapc, *oldest; loff_t page_off = page_offset(page); int err; @@ -670,7 +675,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) bool caching = ceph_is_cache_enabled(inode); struct page *bounce_page = NULL; - dout("writepage %p idx %lu\n", page, page->index); + doutc(cl, "%llx.%llx page %p idx %lu\n", ceph_vinop(inode), page, + page->index); if (ceph_inode_is_shutdown(inode)) return -EIO; @@ -678,13 +684,14 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) /* verify this is a writeable snap context */ snapc = page_snap_context(page); if (!snapc) { - dout("writepage %p page %p not dirty?\n", inode, page); + doutc(cl, "%llx.%llx page %p not dirty?\n", ceph_vinop(inode), + page); return 0; } oldest = get_oldest_context(inode, &ceph_wbc, snapc); if (snapc->seq > oldest->seq) { - dout("writepage %p page %p snapc %p not writeable - noop\n", - inode, page, snapc); + doutc(cl, "%llx.%llx page %p snapc %p not writeable - noop\n", + ceph_vinop(inode), page, snapc); /* we should only noop if called by kswapd */ WARN_ON(!(current->flags & PF_MEMALLOC)); ceph_put_snap_context(oldest); @@ -695,8 +702,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) /* is this a partial page at end of file? */ if (page_off >= ceph_wbc.i_size) { - dout("folio at %lu beyond eof %llu\n", folio->index, - ceph_wbc.i_size); + doutc(cl, "%llx.%llx folio at %lu beyond eof %llu\n", + ceph_vinop(inode), folio->index, ceph_wbc.i_size); folio_invalidate(folio, 0, folio_size(folio)); return 0; } @@ -705,8 +712,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) len = ceph_wbc.i_size - page_off; wlen = IS_ENCRYPTED(inode) ? round_up(len, CEPH_FSCRYPT_BLOCK_SIZE) : len; - dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n", - inode, page, page->index, page_off, wlen, snapc, snapc->seq); + doutc(cl, "%llx.%llx page %p index %lu on %llu~%llu snapc %p seq %lld\n", + ceph_vinop(inode), page, page->index, page_off, wlen, snapc, + snapc->seq); if (atomic_long_inc_return(&fsc->writeback_count) > CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) @@ -747,8 +755,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) osd_req_op_extent_osd_data_pages(req, 0, bounce_page ? &bounce_page : &page, wlen, 0, false, false); - dout("writepage %llu~%llu (%llu bytes, %sencrypted)\n", - page_off, len, wlen, IS_ENCRYPTED(inode) ? "" : "not "); + doutc(cl, "%llx.%llx %llu~%llu (%llu bytes, %sencrypted)\n", + ceph_vinop(inode), page_off, len, wlen, + IS_ENCRYPTED(inode) ? "" : "not "); req->r_mtime = inode->i_mtime; ceph_osdc_start_request(osdc, req); @@ -767,19 +776,21 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) wbc = &tmp_wbc; if (err == -ERESTARTSYS) { /* killed by SIGKILL */ - dout("writepage interrupted page %p\n", page); + doutc(cl, "%llx.%llx interrupted page %p\n", + ceph_vinop(inode), page); redirty_page_for_writepage(wbc, page); end_page_writeback(page); return err; } if (err == -EBLOCKLISTED) fsc->blocklisted = true; - dout("writepage setting page/mapping error %d %p\n", - err, page); + doutc(cl, "%llx.%llx setting page/mapping error %d %p\n", + ceph_vinop(inode), err, page); mapping_set_error(&inode->i_data, err); wbc->pages_skipped++; } else { - dout("writepage cleaned page %p\n", page); + doutc(cl, "%llx.%llx cleaned page %p\n", + ceph_vinop(inode), page); err = 0; /* vfs expects us to return 0 */ } oldest = detach_page_private(page); @@ -829,6 +840,7 @@ static void writepages_finish(struct ceph_osd_request *req) { struct inode *inode = req->r_inode; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_osd_data *osd_data; struct page *page; int num_pages, total_pages = 0; @@ -840,7 +852,7 @@ static void writepages_finish(struct ceph_osd_request *req) unsigned int len = 0; bool remove_page; - dout("writepages_finish %p rc %d\n", inode, rc); + doutc(cl, "%llx.%llx rc %d\n", ceph_vinop(inode), rc); if (rc < 0) { mapping_set_error(mapping, rc); ceph_set_error_write(ci); @@ -862,8 +874,10 @@ static void writepages_finish(struct ceph_osd_request *req) /* clean all pages */ for (i = 0; i < req->r_num_ops; i++) { if (req->r_ops[i].op != CEPH_OSD_OP_WRITE) { - pr_warn("%s incorrect op %d req %p index %d tid %llu\n", - __func__, req->r_ops[i].op, req, i, req->r_tid); + pr_warn_client(cl, + "%llx.%llx incorrect op %d req %p index %d tid %llu\n", + ceph_vinop(inode), req->r_ops[i].op, req, i, + req->r_tid); break; } @@ -890,7 +904,7 @@ static void writepages_finish(struct ceph_osd_request *req) ceph_put_snap_context(detach_page_private(page)); end_page_writeback(page); - dout("unlocking %p\n", page); + doutc(cl, "unlocking %p\n", page); if (remove_page) generic_error_remove_page(inode->i_mapping, @@ -898,8 +912,9 @@ static void writepages_finish(struct ceph_osd_request *req) unlock_page(page); } - dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n", - inode, osd_data->length, rc >= 0 ? num_pages : 0); + doutc(cl, "%llx.%llx wrote %llu bytes cleaned %d pages\n", + ceph_vinop(inode), osd_data->length, + rc >= 0 ? num_pages : 0); release_pages(osd_data->pages, num_pages); } @@ -927,6 +942,7 @@ static int ceph_writepages_start(struct address_space *mapping, struct inode *inode = mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_vino vino = ceph_vino(inode); pgoff_t index, start_index, end = -1; struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; @@ -944,15 +960,15 @@ static int ceph_writepages_start(struct address_space *mapping, fsc->write_congested) return 0; - dout("writepages_start %p (mode=%s)\n", inode, - wbc->sync_mode == WB_SYNC_NONE ? "NONE" : - (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); + doutc(cl, "%llx.%llx (mode=%s)\n", ceph_vinop(inode), + wbc->sync_mode == WB_SYNC_NONE ? "NONE" : + (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); if (ceph_inode_is_shutdown(inode)) { if (ci->i_wrbuffer_ref > 0) { - pr_warn_ratelimited( - "writepage_start %p %lld forced umount\n", - inode, ceph_ino(inode)); + pr_warn_ratelimited_client(cl, + "%llx.%llx %lld forced umount\n", + ceph_vinop(inode), ceph_ino(inode)); } mapping_set_error(mapping, -EIO); return -EIO; /* we're in a forced umount, don't write! */ @@ -976,11 +992,11 @@ static int ceph_writepages_start(struct address_space *mapping, if (!snapc) { /* hmm, why does writepages get called when there is no dirty data? */ - dout(" no snap context with dirty data?\n"); + doutc(cl, " no snap context with dirty data?\n"); goto out; } - dout(" oldest snapc is %p seq %lld (%d snaps)\n", - snapc, snapc->seq, snapc->num_snaps); + doutc(cl, " oldest snapc is %p seq %lld (%d snaps)\n", snapc, + snapc->seq, snapc->num_snaps); should_loop = false; if (ceph_wbc.head_snapc && snapc != last_snapc) { @@ -990,13 +1006,13 @@ static int ceph_writepages_start(struct address_space *mapping, end = -1; if (index > 0) should_loop = true; - dout(" cyclic, start at %lu\n", index); + doutc(cl, " cyclic, start at %lu\n", index); } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = true; - dout(" not cyclic, %lu to %lu\n", index, end); + doutc(cl, " not cyclic, %lu to %lu\n", index, end); } } else if (!ceph_wbc.head_snapc) { /* Do not respect wbc->range_{start,end}. Dirty pages @@ -1005,7 +1021,7 @@ static int ceph_writepages_start(struct address_space *mapping, * associated with 'snapc' get written */ if (index > 0) should_loop = true; - dout(" non-head snapc, range whole\n"); + doutc(cl, " non-head snapc, range whole\n"); } if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) @@ -1028,12 +1044,12 @@ static int ceph_writepages_start(struct address_space *mapping, get_more_pages: nr_folios = filemap_get_folios_tag(mapping, &index, end, tag, &fbatch); - dout("pagevec_lookup_range_tag got %d\n", nr_folios); + doutc(cl, "pagevec_lookup_range_tag got %d\n", nr_folios); if (!nr_folios && !locked_pages) break; for (i = 0; i < nr_folios && locked_pages < max_pages; i++) { page = &fbatch.folios[i]->page; - dout("? %p idx %lu\n", page, page->index); + doutc(cl, "? %p idx %lu\n", page, page->index); if (locked_pages == 0) lock_page(page); /* first page */ else if (!trylock_page(page)) @@ -1042,15 +1058,15 @@ static int ceph_writepages_start(struct address_space *mapping, /* only dirty pages, or our accounting breaks */ if (unlikely(!PageDirty(page)) || unlikely(page->mapping != mapping)) { - dout("!dirty or !mapping %p\n", page); + doutc(cl, "!dirty or !mapping %p\n", page); unlock_page(page); continue; } /* only if matching snap context */ pgsnapc = page_snap_context(page); if (pgsnapc != snapc) { - dout("page snapc %p %lld != oldest %p %lld\n", - pgsnapc, pgsnapc->seq, snapc, snapc->seq); + doutc(cl, "page snapc %p %lld != oldest %p %lld\n", + pgsnapc, pgsnapc->seq, snapc, snapc->seq); if (!should_loop && !ceph_wbc.head_snapc && wbc->sync_mode != WB_SYNC_NONE) @@ -1061,8 +1077,8 @@ static int ceph_writepages_start(struct address_space *mapping, if (page_offset(page) >= ceph_wbc.i_size) { struct folio *folio = page_folio(page); - dout("folio at %lu beyond eof %llu\n", - folio->index, ceph_wbc.i_size); + doutc(cl, "folio at %lu beyond eof %llu\n", + folio->index, ceph_wbc.i_size); if ((ceph_wbc.size_stable || folio_pos(folio) >= i_size_read(inode)) && folio_clear_dirty_for_io(folio)) @@ -1072,23 +1088,23 @@ static int ceph_writepages_start(struct address_space *mapping, continue; } if (strip_unit_end && (page->index > strip_unit_end)) { - dout("end of strip unit %p\n", page); + doutc(cl, "end of strip unit %p\n", page); unlock_page(page); break; } if (PageWriteback(page) || PageFsCache(page)) { if (wbc->sync_mode == WB_SYNC_NONE) { - dout("%p under writeback\n", page); + doutc(cl, "%p under writeback\n", page); unlock_page(page); continue; } - dout("waiting on writeback %p\n", page); + doutc(cl, "waiting on writeback %p\n", page); wait_on_page_writeback(page); wait_on_page_fscache(page); } if (!clear_page_dirty_for_io(page)) { - dout("%p !clear_page_dirty_for_io\n", page); + doutc(cl, "%p !clear_page_dirty_for_io\n", page); unlock_page(page); continue; } @@ -1143,8 +1159,8 @@ static int ceph_writepages_start(struct address_space *mapping, } /* note position of first page in fbatch */ - dout("%p will write page %p idx %lu\n", - inode, page, page->index); + doutc(cl, "%llx.%llx will write page %p idx %lu\n", + ceph_vinop(inode), page, page->index); if (atomic_long_inc_return(&fsc->writeback_count) > CONGESTION_ON_THRESH( @@ -1158,8 +1174,9 @@ static int ceph_writepages_start(struct address_space *mapping, locked_pages ? GFP_NOWAIT : GFP_NOFS); if (IS_ERR(pages[locked_pages])) { if (PTR_ERR(pages[locked_pages]) == -EINVAL) - pr_err("%s: inode->i_blkbits=%hhu\n", - __func__, inode->i_blkbits); + pr_err_client(cl, + "inode->i_blkbits=%hhu\n", + inode->i_blkbits); /* better not fail on first page! */ BUG_ON(locked_pages == 0); pages[locked_pages] = NULL; @@ -1193,7 +1210,7 @@ static int ceph_writepages_start(struct address_space *mapping, if (nr_folios && i == nr_folios && locked_pages < max_pages) { - dout("reached end fbatch, trying for more\n"); + doutc(cl, "reached end fbatch, trying for more\n"); folio_batch_release(&fbatch); goto get_more_pages; } @@ -1254,8 +1271,8 @@ static int ceph_writepages_start(struct address_space *mapping, /* Start a new extent */ osd_req_op_extent_dup_last(req, op_idx, cur_offset - offset); - dout("writepages got pages at %llu~%llu\n", - offset, len); + doutc(cl, "got pages at %llu~%llu\n", offset, + len); osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, 0, from_pool, false); @@ -1288,12 +1305,13 @@ static int ceph_writepages_start(struct address_space *mapping, if (IS_ENCRYPTED(inode)) len = round_up(len, CEPH_FSCRYPT_BLOCK_SIZE); - dout("writepages got pages at %llu~%llu\n", offset, len); + doutc(cl, "got pages at %llu~%llu\n", offset, len); if (IS_ENCRYPTED(inode) && ((offset | len) & ~CEPH_FSCRYPT_BLOCK_MASK)) - pr_warn("%s: bad encrypted write offset=%lld len=%llu\n", - __func__, offset, len); + pr_warn_client(cl, + "bad encrypted write offset=%lld len=%llu\n", + offset, len); osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, 0, from_pool, false); @@ -1345,14 +1363,14 @@ static int ceph_writepages_start(struct address_space *mapping, done = true; release_folios: - dout("folio_batch release on %d folios (%p)\n", (int)fbatch.nr, - fbatch.nr ? fbatch.folios[0] : NULL); + doutc(cl, "folio_batch release on %d folios (%p)\n", + (int)fbatch.nr, fbatch.nr ? fbatch.folios[0] : NULL); folio_batch_release(&fbatch); } if (should_loop && !done) { /* more to do; loop back to beginning of file */ - dout("writepages looping back to beginning of file\n"); + doutc(cl, "looping back to beginning of file\n"); end = start_index - 1; /* OK even when start_index == 0 */ /* to write dirty pages associated with next snapc, @@ -1390,7 +1408,8 @@ static int ceph_writepages_start(struct address_space *mapping, out: ceph_osdc_put_request(req); ceph_put_snap_context(last_snapc); - dout("writepages dend - startone, rc = %d\n", rc); + doutc(cl, "%llx.%llx dend - startone, rc = %d\n", ceph_vinop(inode), + rc); return rc; } @@ -1424,11 +1443,12 @@ static struct ceph_snap_context * ceph_find_incompatible(struct page *page) { struct inode *inode = page->mapping->host; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); if (ceph_inode_is_shutdown(inode)) { - dout(" page %p %llx:%llx is shutdown\n", page, - ceph_vinop(inode)); + doutc(cl, " %llx.%llx page %p is shutdown\n", + ceph_vinop(inode), page); return ERR_PTR(-ESTALE); } @@ -1449,13 +1469,15 @@ ceph_find_incompatible(struct page *page) if (snapc->seq > oldest->seq) { /* not writeable -- return it for the caller to deal with */ ceph_put_snap_context(oldest); - dout(" page %p snapc %p not current or oldest\n", page, snapc); + doutc(cl, " %llx.%llx page %p snapc %p not current or oldest\n", + ceph_vinop(inode), page, snapc); return ceph_get_snap_context(snapc); } ceph_put_snap_context(oldest); /* yay, writeable, do it now (without dropping page lock) */ - dout(" page %p snapc %p not current, but oldest\n", page, snapc); + doutc(cl, " %llx.%llx page %p snapc %p not current, but oldest\n", + ceph_vinop(inode), page, snapc); if (clear_page_dirty_for_io(page)) { int r = writepage_nounlock(page, NULL); if (r < 0) @@ -1524,10 +1546,11 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, { struct folio *folio = page_folio(subpage); struct inode *inode = file_inode(file); + struct ceph_client *cl = ceph_inode_to_client(inode); bool check_cap = false; - dout("write_end file %p inode %p folio %p %d~%d (%d)\n", file, - inode, folio, (int)pos, (int)copied, (int)len); + doutc(cl, "%llx.%llx file %p folio %p %d~%d (%d)\n", ceph_vinop(inode), + file, folio, (int)pos, (int)copied, (int)len); if (!folio_test_uptodate(folio)) { /* just return that nothing was copied on a short copy */ @@ -1587,6 +1610,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_file_info *fi = vma->vm_file->private_data; loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT; int want, got, err; @@ -1598,8 +1622,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) ceph_block_sigs(&oldset); - dout("filemap_fault %p %llx.%llx %llu trying to get caps\n", - inode, ceph_vinop(inode), off); + doutc(cl, "%llx.%llx %llu trying to get caps\n", + ceph_vinop(inode), off); if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; else @@ -1610,8 +1634,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) if (err < 0) goto out_restore; - dout("filemap_fault %p %llu got cap refs on %s\n", - inode, off, ceph_cap_string(got)); + doutc(cl, "%llx.%llx %llu got cap refs on %s\n", ceph_vinop(inode), + off, ceph_cap_string(got)); if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) || !ceph_has_inline_data(ci)) { @@ -1619,8 +1643,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) ceph_add_rw_context(fi, &rw_ctx); ret = filemap_fault(vmf); ceph_del_rw_context(fi, &rw_ctx); - dout("filemap_fault %p %llu drop cap refs %s ret %x\n", - inode, off, ceph_cap_string(got), ret); + doutc(cl, "%llx.%llx %llu drop cap refs %s ret %x\n", + ceph_vinop(inode), off, ceph_cap_string(got), ret); } else err = -EAGAIN; @@ -1661,8 +1685,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED; out_inline: filemap_invalidate_unlock_shared(mapping); - dout("filemap_fault %p %llu read inline data ret %x\n", - inode, off, ret); + doutc(cl, "%llx.%llx %llu read inline data ret %x\n", + ceph_vinop(inode), off, ret); } out_restore: ceph_restore_sigs(&oldset); @@ -1676,6 +1700,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct inode *inode = file_inode(vma->vm_file); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; struct ceph_cap_flush *prealloc_cf; @@ -1702,8 +1727,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) else len = offset_in_thp(page, size); - dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n", - inode, ceph_vinop(inode), off, len, size); + doutc(cl, "%llx.%llx %llu~%zd getting caps i_size %llu\n", + ceph_vinop(inode), off, len, size); if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; else @@ -1714,8 +1739,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) if (err < 0) goto out_free; - dout("page_mkwrite %p %llu~%zd got cap refs on %s\n", - inode, off, len, ceph_cap_string(got)); + doutc(cl, "%llx.%llx %llu~%zd got cap refs on %s\n", ceph_vinop(inode), + off, len, ceph_cap_string(got)); /* Update time before taking page lock */ file_update_time(vma->vm_file); @@ -1763,8 +1788,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) __mark_inode_dirty(inode, dirty); } - dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n", - inode, off, len, ceph_cap_string(got), ret); + doutc(cl, "%llx.%llx %llu~%zd dropping cap refs on %s ret %x\n", + ceph_vinop(inode), off, len, ceph_cap_string(got), ret); ceph_put_cap_refs_async(ci, got); out_free: ceph_restore_sigs(&oldset); @@ -1778,6 +1803,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, char *data, size_t len) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct address_space *mapping = inode->i_mapping; struct page *page; @@ -1798,8 +1824,8 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, } } - dout("fill_inline_data %p %llx.%llx len %zu locked_page %p\n", - inode, ceph_vinop(inode), len, locked_page); + doutc(cl, "%p %llx.%llx len %zu locked_page %p\n", inode, + ceph_vinop(inode), len, locked_page); if (len > 0) { void *kaddr = kmap_atomic(page); @@ -1824,6 +1850,7 @@ int ceph_uninline_data(struct file *file) struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_request *req = NULL; struct ceph_cap_flush *prealloc_cf = NULL; struct folio *folio = NULL; @@ -1836,8 +1863,8 @@ int ceph_uninline_data(struct file *file) inline_version = ci->i_inline_version; spin_unlock(&ci->i_ceph_lock); - dout("uninline_data %p %llx.%llx inline_version %llu\n", - inode, ceph_vinop(inode), inline_version); + doutc(cl, "%llx.%llx inline_version %llu\n", ceph_vinop(inode), + inline_version); if (ceph_inode_is_shutdown(inode)) { err = -EIO; @@ -1949,8 +1976,8 @@ int ceph_uninline_data(struct file *file) } out: ceph_free_cap_flush(prealloc_cf); - dout("uninline_data %p %llx.%llx inline_version %llu = %d\n", - inode, ceph_vinop(inode), inline_version, err); + doutc(cl, "%llx.%llx inline_version %llu = %d\n", + ceph_vinop(inode), inline_version, err); return err; } @@ -1979,6 +2006,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, { struct ceph_fs_client *fsc = ceph_inode_to_fs_client(&ci->netfs.inode); struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *cl = fsc->client; struct ceph_osd_request *rd_req = NULL, *wr_req = NULL; struct rb_node **p, *parent; struct ceph_pool_perm *perm; @@ -2013,10 +2041,10 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, goto out; if (pool_ns) - dout("__ceph_pool_perm_get pool %lld ns %.*s no perm cached\n", - pool, (int)pool_ns->len, pool_ns->str); + doutc(cl, "pool %lld ns %.*s no perm cached\n", pool, + (int)pool_ns->len, pool_ns->str); else - dout("__ceph_pool_perm_get pool %lld no perm cached\n", pool); + doutc(cl, "pool %lld no perm cached\n", pool); down_write(&mdsc->pool_perm_rwsem); p = &mdsc->pool_perm_tree.rb_node; @@ -2141,15 +2169,16 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, if (!err) err = have; if (pool_ns) - dout("__ceph_pool_perm_get pool %lld ns %.*s result = %d\n", - pool, (int)pool_ns->len, pool_ns->str, err); + doutc(cl, "pool %lld ns %.*s result = %d\n", pool, + (int)pool_ns->len, pool_ns->str, err); else - dout("__ceph_pool_perm_get pool %lld result = %d\n", pool, err); + doutc(cl, "pool %lld result = %d\n", pool, err); return err; } int ceph_pool_perm_check(struct inode *inode, int need) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_string *pool_ns; s64 pool; @@ -2179,13 +2208,11 @@ int ceph_pool_perm_check(struct inode *inode, int need) check: if (flags & CEPH_I_POOL_PERM) { if ((need & CEPH_CAP_FILE_RD) && !(flags & CEPH_I_POOL_RD)) { - dout("ceph_pool_perm_check pool %lld no read perm\n", - pool); + doutc(cl, "pool %lld no read perm\n", pool); return -EPERM; } if ((need & CEPH_CAP_FILE_WR) && !(flags & CEPH_I_POOL_WR)) { - dout("ceph_pool_perm_check pool %lld no write perm\n", - pool); + doutc(cl, "pool %lld no write perm\n", pool); return -EPERM; } return 0; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index e976f481d5df..46b73a62024e 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -186,10 +186,10 @@ static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps) mdsc->caps_avail_count += nr_caps; } - dout("%s: caps %d = %d used + %d resv + %d avail\n", - __func__, - mdsc->caps_total_count, mdsc->caps_use_count, - mdsc->caps_reserve_count, mdsc->caps_avail_count); + doutc(mdsc->fsc->client, + "caps %d = %d used + %d resv + %d avail\n", + mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); @@ -202,6 +202,7 @@ static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps) int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx, int need) { + struct ceph_client *cl = mdsc->fsc->client; int i, j; struct ceph_cap *cap; int have; @@ -212,7 +213,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *s; LIST_HEAD(newcaps); - dout("reserve caps ctx=%p need=%d\n", ctx, need); + doutc(cl, "ctx=%p need=%d\n", ctx, need); /* first reserve any caps that are already allocated */ spin_lock(&mdsc->caps_list_lock); @@ -272,8 +273,8 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, continue; } - pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n", - ctx, need, have + alloc); + pr_warn_client(cl, "ctx=%p ENOMEM need=%d got=%d\n", ctx, need, + have + alloc); err = -ENOMEM; break; } @@ -298,20 +299,21 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, spin_unlock(&mdsc->caps_list_lock); - dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", - ctx, mdsc->caps_total_count, mdsc->caps_use_count, - mdsc->caps_reserve_count, mdsc->caps_avail_count); + doutc(cl, "ctx=%p %d = %d used + %d resv + %d avail\n", ctx, + mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); return err; } void ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx) { + struct ceph_client *cl = mdsc->fsc->client; bool reclaim = false; if (!ctx->count) return; - dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); + doutc(cl, "ctx=%p count=%d\n", ctx, ctx->count); spin_lock(&mdsc->caps_list_lock); __ceph_unreserve_caps(mdsc, ctx->count); ctx->count = 0; @@ -328,6 +330,7 @@ void ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap *cap = NULL; /* temporary, until we do something about cap import/export */ @@ -359,9 +362,9 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, } spin_lock(&mdsc->caps_list_lock); - dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", - ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count, - mdsc->caps_reserve_count, mdsc->caps_avail_count); + doutc(cl, "ctx=%p (%d) %d = %d used + %d resv + %d avail\n", ctx, + ctx->count, mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); BUG_ON(!ctx->count); BUG_ON(ctx->count > mdsc->caps_reserve_count); BUG_ON(list_empty(&mdsc->caps_list)); @@ -382,10 +385,12 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) { + struct ceph_client *cl = mdsc->fsc->client; + spin_lock(&mdsc->caps_list_lock); - dout("put_cap %p %d = %d used + %d resv + %d avail\n", - cap, mdsc->caps_total_count, mdsc->caps_use_count, - mdsc->caps_reserve_count, mdsc->caps_avail_count); + doutc(cl, "%p %d = %d used + %d resv + %d avail\n", cap, + mdsc->caps_total_count, mdsc->caps_use_count, + mdsc->caps_reserve_count, mdsc->caps_avail_count); mdsc->caps_use_count--; /* * Keep some preallocated caps around (ceph_min_count), to @@ -491,11 +496,13 @@ static void __insert_cap_node(struct ceph_inode_info *ci, static void __cap_set_timeouts(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { + struct inode *inode = &ci->netfs.inode; struct ceph_mount_options *opt = mdsc->fsc->mount_options; + ci->i_hold_caps_max = round_jiffies(jiffies + opt->caps_wanted_delay_max * HZ); - dout("__cap_set_timeouts %p %lu\n", &ci->netfs.inode, - ci->i_hold_caps_max - jiffies); + doutc(mdsc->fsc->client, "%p %llx.%llx %lu\n", inode, + ceph_vinop(inode), ci->i_hold_caps_max - jiffies); } /* @@ -509,8 +516,11 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc, static void __cap_delay_requeue(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { - dout("__cap_delay_requeue %p flags 0x%lx at %lu\n", &ci->netfs.inode, - ci->i_ceph_flags, ci->i_hold_caps_max); + struct inode *inode = &ci->netfs.inode; + + doutc(mdsc->fsc->client, "%p %llx.%llx flags 0x%lx at %lu\n", + inode, ceph_vinop(inode), ci->i_ceph_flags, + ci->i_hold_caps_max); if (!mdsc->stopping) { spin_lock(&mdsc->cap_delay_lock); if (!list_empty(&ci->i_cap_delay_list)) { @@ -533,7 +543,9 @@ static void __cap_delay_requeue(struct ceph_mds_client *mdsc, static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { - dout("__cap_delay_requeue_front %p\n", &ci->netfs.inode); + struct inode *inode = &ci->netfs.inode; + + doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); spin_lock(&mdsc->cap_delay_lock); ci->i_ceph_flags |= CEPH_I_FLUSH; if (!list_empty(&ci->i_cap_delay_list)) @@ -550,7 +562,9 @@ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc, static void __cap_delay_cancel(struct ceph_mds_client *mdsc, struct ceph_inode_info *ci) { - dout("__cap_delay_cancel %p\n", &ci->netfs.inode); + struct inode *inode = &ci->netfs.inode; + + doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); if (list_empty(&ci->i_cap_delay_list)) return; spin_lock(&mdsc->cap_delay_lock); @@ -562,6 +576,9 @@ static void __cap_delay_cancel(struct ceph_mds_client *mdsc, static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, unsigned issued) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); + unsigned had = __ceph_caps_issued(ci, NULL); lockdep_assert_held(&ci->i_ceph_lock); @@ -586,7 +603,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, if (issued & CEPH_CAP_FILE_SHARED) atomic_inc(&ci->i_shared_gen); if (S_ISDIR(ci->netfs.inode.i_mode)) { - dout(" marking %p NOT complete\n", &ci->netfs.inode); + doutc(cl, " marking %p NOT complete\n", inode); __ceph_dir_clear_complete(ci); } } @@ -636,6 +653,7 @@ void ceph_add_cap(struct inode *inode, struct ceph_cap **new_cap) { struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap *cap; int mds = session->s_mds; @@ -644,8 +662,9 @@ void ceph_add_cap(struct inode *inode, lockdep_assert_held(&ci->i_ceph_lock); - dout("add_cap %p mds%d cap %llx %s seq %d\n", inode, - session->s_mds, cap_id, ceph_cap_string(issued), seq); + doutc(cl, "%p %llx.%llx mds%d cap %llx %s seq %d\n", inode, + ceph_vinop(inode), session->s_mds, cap_id, + ceph_cap_string(issued), seq); gen = atomic_read(&session->s_cap_gen); @@ -723,9 +742,9 @@ void ceph_add_cap(struct inode *inode, actual_wanted = __ceph_caps_wanted(ci); if ((wanted & ~actual_wanted) || (issued & ~actual_wanted & CEPH_CAP_ANY_WR)) { - dout(" issued %s, mds wanted %s, actual %s, queueing\n", - ceph_cap_string(issued), ceph_cap_string(wanted), - ceph_cap_string(actual_wanted)); + doutc(cl, "issued %s, mds wanted %s, actual %s, queueing\n", + ceph_cap_string(issued), ceph_cap_string(wanted), + ceph_cap_string(actual_wanted)); __cap_delay_requeue(mdsc, ci); } @@ -742,9 +761,9 @@ void ceph_add_cap(struct inode *inode, WARN_ON(ci->i_auth_cap == cap); } - dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", - inode, ceph_vinop(inode), cap, ceph_cap_string(issued), - ceph_cap_string(issued|cap->issued), seq, mds); + doutc(cl, "inode %p %llx.%llx cap %p %s now %s seq %d mds%d\n", + inode, ceph_vinop(inode), cap, ceph_cap_string(issued), + ceph_cap_string(issued|cap->issued), seq, mds); cap->cap_id = cap_id; cap->issued = issued; cap->implemented |= issued; @@ -766,6 +785,8 @@ void ceph_add_cap(struct inode *inode, */ static int __cap_is_valid(struct ceph_cap *cap) { + struct inode *inode = &cap->ci->netfs.inode; + struct ceph_client *cl = cap->session->s_mdsc->fsc->client; unsigned long ttl; u32 gen; @@ -773,9 +794,9 @@ static int __cap_is_valid(struct ceph_cap *cap) ttl = cap->session->s_cap_ttl; if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) { - dout("__cap_is_valid %p cap %p issued %s " - "but STALE (gen %u vs %u)\n", &cap->ci->netfs.inode, - cap, ceph_cap_string(cap->issued), cap->cap_gen, gen); + doutc(cl, "%p %llx.%llx cap %p issued %s but STALE (gen %u vs %u)\n", + inode, ceph_vinop(inode), cap, + ceph_cap_string(cap->issued), cap->cap_gen, gen); return 0; } @@ -789,6 +810,8 @@ static int __cap_is_valid(struct ceph_cap *cap) */ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); int have = ci->i_snap_caps; struct ceph_cap *cap; struct rb_node *p; @@ -799,8 +822,8 @@ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) cap = rb_entry(p, struct ceph_cap, ci_node); if (!__cap_is_valid(cap)) continue; - dout("__ceph_caps_issued %p cap %p issued %s\n", - &ci->netfs.inode, cap, ceph_cap_string(cap->issued)); + doutc(cl, "%p %llx.%llx cap %p issued %s\n", inode, + ceph_vinop(inode), cap, ceph_cap_string(cap->issued)); have |= cap->issued; if (implemented) *implemented |= cap->implemented; @@ -843,16 +866,18 @@ int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap) */ static void __touch_cap(struct ceph_cap *cap) { + struct inode *inode = &cap->ci->netfs.inode; struct ceph_mds_session *s = cap->session; + struct ceph_client *cl = s->s_mdsc->fsc->client; spin_lock(&s->s_cap_lock); if (!s->s_cap_iterator) { - dout("__touch_cap %p cap %p mds%d\n", &cap->ci->netfs.inode, cap, - s->s_mds); + doutc(cl, "%p %llx.%llx cap %p mds%d\n", inode, + ceph_vinop(inode), cap, s->s_mds); list_move_tail(&cap->session_caps, &s->s_caps); } else { - dout("__touch_cap %p cap %p mds%d NOP, iterating over caps\n", - &cap->ci->netfs.inode, cap, s->s_mds); + doutc(cl, "%p %llx.%llx cap %p mds%d NOP, iterating over caps\n", + inode, ceph_vinop(inode), cap, s->s_mds); } spin_unlock(&s->s_cap_lock); } @@ -864,15 +889,16 @@ static void __touch_cap(struct ceph_cap *cap) */ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap *cap; struct rb_node *p; int have = ci->i_snap_caps; if ((have & mask) == mask) { - dout("__ceph_caps_issued_mask ino 0x%llx snap issued %s" - " (mask %s)\n", ceph_ino(&ci->netfs.inode), - ceph_cap_string(have), - ceph_cap_string(mask)); + doutc(cl, "mask %p %llx.%llx snap issued %s (mask %s)\n", + inode, ceph_vinop(inode), ceph_cap_string(have), + ceph_cap_string(mask)); return 1; } @@ -881,10 +907,10 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) if (!__cap_is_valid(cap)) continue; if ((cap->issued & mask) == mask) { - dout("__ceph_caps_issued_mask ino 0x%llx cap %p issued %s" - " (mask %s)\n", ceph_ino(&ci->netfs.inode), cap, - ceph_cap_string(cap->issued), - ceph_cap_string(mask)); + doutc(cl, "mask %p %llx.%llx cap %p issued %s (mask %s)\n", + inode, ceph_vinop(inode), cap, + ceph_cap_string(cap->issued), + ceph_cap_string(mask)); if (touch) __touch_cap(cap); return 1; @@ -893,10 +919,10 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) /* does a combination of caps satisfy mask? */ have |= cap->issued; if ((have & mask) == mask) { - dout("__ceph_caps_issued_mask ino 0x%llx combo issued %s" - " (mask %s)\n", ceph_ino(&ci->netfs.inode), - ceph_cap_string(cap->issued), - ceph_cap_string(mask)); + doutc(cl, "mask %p %llx.%llx combo issued %s (mask %s)\n", + inode, ceph_vinop(inode), + ceph_cap_string(cap->issued), + ceph_cap_string(mask)); if (touch) { struct rb_node *q; @@ -954,13 +980,14 @@ int __ceph_caps_revoking_other(struct ceph_inode_info *ci, int ceph_caps_revoking(struct ceph_inode_info *ci, int mask) { struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); int ret; spin_lock(&ci->i_ceph_lock); ret = __ceph_caps_revoking_other(ci, NULL, mask); spin_unlock(&ci->i_ceph_lock); - dout("ceph_caps_revoking %p %s = %d\n", inode, - ceph_cap_string(mask), ret); + doutc(cl, "%p %llx.%llx %s = %d\n", inode, ceph_vinop(inode), + ceph_cap_string(mask), ret); return ret; } @@ -1107,19 +1134,21 @@ int ceph_is_any_caps(struct inode *inode) void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) { struct ceph_mds_session *session = cap->session; + struct ceph_client *cl = session->s_mdsc->fsc->client; struct ceph_inode_info *ci = cap->ci; + struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc; int removed = 0; /* 'ci' being NULL means the remove have already occurred */ if (!ci) { - dout("%s: cap inode is NULL\n", __func__); + doutc(cl, "inode is NULL\n"); return; } lockdep_assert_held(&ci->i_ceph_lock); - dout("__ceph_remove_cap %p from %p\n", cap, &ci->netfs.inode); + doutc(cl, "%p from %p %llx.%llx\n", cap, inode, ceph_vinop(inode)); mdsc = ceph_inode_to_fs_client(&ci->netfs.inode)->mdsc; @@ -1132,8 +1161,8 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) spin_lock(&session->s_cap_lock); if (session->s_cap_iterator == cap) { /* not yet, we are iterating over this very cap */ - dout("__ceph_remove_cap delaying %p removal from session %p\n", - cap, cap->session); + doutc(cl, "delaying %p removal from session %p\n", cap, + cap->session); } else { list_del_init(&cap->session_caps); session->s_nr_caps--; @@ -1186,7 +1215,7 @@ void ceph_remove_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, /* 'ci' being NULL means the remove have already occurred */ if (!ci) { - dout("%s: cap inode is NULL\n", __func__); + doutc(mdsc->fsc->client, "inode is NULL\n"); return; } @@ -1228,15 +1257,19 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg) { struct ceph_mds_caps *fc; void *p; - struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc; + struct ceph_mds_client *mdsc = arg->session->s_mdsc; + struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc; - dout("%s %s %llx %llx caps %s wanted %s dirty %s seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu xattr_ver %llu xattr_len %d\n", - __func__, ceph_cap_op_name(arg->op), arg->cid, arg->ino, - ceph_cap_string(arg->caps), ceph_cap_string(arg->wanted), - ceph_cap_string(arg->dirty), arg->seq, arg->issue_seq, - arg->flush_tid, arg->oldest_flush_tid, arg->mseq, arg->follows, - arg->size, arg->max_size, arg->xattr_version, - arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0); + doutc(mdsc->fsc->client, + "%s %llx %llx caps %s wanted %s dirty %s seq %u/%u" + " tid %llu/%llu mseq %u follows %lld size %llu/%llu" + " xattr_ver %llu xattr_len %d\n", + ceph_cap_op_name(arg->op), arg->cid, arg->ino, + ceph_cap_string(arg->caps), ceph_cap_string(arg->wanted), + ceph_cap_string(arg->dirty), arg->seq, arg->issue_seq, + arg->flush_tid, arg->oldest_flush_tid, arg->mseq, arg->follows, + arg->size, arg->max_size, arg->xattr_version, + arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0); msg->hdr.version = cpu_to_le16(12); msg->hdr.tid = cpu_to_le64(arg->flush_tid); @@ -1373,6 +1406,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, { struct ceph_inode_info *ci = cap->ci; struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); int held, revoking; lockdep_assert_held(&ci->i_ceph_lock); @@ -1381,10 +1415,10 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, revoking = cap->implemented & ~cap->issued; retain &= ~revoking; - dout("%s %p cap %p session %p %s -> %s (revoking %s)\n", - __func__, inode, cap, cap->session, - ceph_cap_string(held), ceph_cap_string(held & retain), - ceph_cap_string(revoking)); + doutc(cl, "%p %llx.%llx cap %p session %p %s -> %s (revoking %s)\n", + inode, ceph_vinop(inode), cap, cap->session, + ceph_cap_string(held), ceph_cap_string(held & retain), + ceph_cap_string(revoking)); BUG_ON((retain & CEPH_CAP_PIN) == 0); ci->i_ceph_flags &= ~CEPH_I_FLUSH; @@ -1500,13 +1534,16 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci) { struct ceph_msg *msg; struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(arg), GFP_NOFS, false); if (!msg) { - pr_err("error allocating cap msg: ino (%llx.%llx) flushing %s tid %llu, requeuing cap.\n", - ceph_vinop(inode), ceph_cap_string(arg->dirty), - arg->flush_tid); + pr_err_client(cl, + "error allocating cap msg: ino (%llx.%llx)" + " flushing %s tid %llu, requeuing cap.\n", + ceph_vinop(inode), ceph_cap_string(arg->dirty), + arg->flush_tid); spin_lock(&ci->i_ceph_lock); __cap_delay_requeue(arg->session->s_mdsc, ci); spin_unlock(&ci->i_ceph_lock); @@ -1595,11 +1632,13 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap_snap *capsnap; u64 oldest_flush_tid = 0; u64 first_tid = 1, last_tid = 0; - dout("__flush_snaps %p session %p\n", inode, session); + doutc(cl, "%p %llx.%llx session %p\n", inode, ceph_vinop(inode), + session); list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { /* @@ -1614,7 +1653,7 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, /* only flush each capsnap once */ if (capsnap->cap_flush.tid > 0) { - dout(" already flushed %p, skipping\n", capsnap); + doutc(cl, "already flushed %p, skipping\n", capsnap); continue; } @@ -1646,8 +1685,8 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, int ret; if (!(cap && cap->session == session)) { - dout("__flush_snaps %p auth cap %p not mds%d, " - "stop\n", inode, cap, session->s_mds); + doutc(cl, "%p %llx.%llx auth cap %p not mds%d, stop\n", + inode, ceph_vinop(inode), cap, session->s_mds); break; } @@ -1668,15 +1707,17 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, refcount_inc(&capsnap->nref); spin_unlock(&ci->i_ceph_lock); - dout("__flush_snaps %p capsnap %p tid %llu %s\n", - inode, capsnap, cf->tid, ceph_cap_string(capsnap->dirty)); + doutc(cl, "%p %llx.%llx capsnap %p tid %llu %s\n", inode, + ceph_vinop(inode), capsnap, cf->tid, + ceph_cap_string(capsnap->dirty)); ret = __send_flush_snap(inode, session, capsnap, cap->mseq, oldest_flush_tid); if (ret < 0) { - pr_err("__flush_snaps: error sending cap flushsnap, " - "ino (%llx.%llx) tid %llu follows %llu\n", - ceph_vinop(inode), cf->tid, capsnap->follows); + pr_err_client(cl, "error sending cap flushsnap, " + "ino (%llx.%llx) tid %llu follows %llu\n", + ceph_vinop(inode), cf->tid, + capsnap->follows); } ceph_put_cap_snap(capsnap); @@ -1689,27 +1730,28 @@ void ceph_flush_snaps(struct ceph_inode_info *ci, { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_mds_session *session = NULL; bool need_put = false; int mds; - dout("ceph_flush_snaps %p\n", inode); + doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); if (psession) session = *psession; retry: spin_lock(&ci->i_ceph_lock); if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) { - dout(" no capsnap needs flush, doing nothing\n"); + doutc(cl, " no capsnap needs flush, doing nothing\n"); goto out; } if (!ci->i_auth_cap) { - dout(" no auth cap (migrating?), doing nothing\n"); + doutc(cl, " no auth cap (migrating?), doing nothing\n"); goto out; } mds = ci->i_auth_cap->session->s_mds; if (session && session->s_mds != mds) { - dout(" oops, wrong session %p mutex\n", session); + doutc(cl, " oops, wrong session %p mutex\n", session); ceph_put_mds_session(session); session = NULL; } @@ -1755,21 +1797,23 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb)->mdsc; struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); int was = ci->i_dirty_caps; int dirty = 0; lockdep_assert_held(&ci->i_ceph_lock); if (!ci->i_auth_cap) { - pr_warn("__mark_dirty_caps %p %llx mask %s, " - "but no auth cap (session was closed?)\n", - inode, ceph_ino(inode), ceph_cap_string(mask)); + pr_warn_client(cl, "%p %llx.%llx mask %s, " + "but no auth cap (session was closed?)\n", + inode, ceph_vinop(inode), + ceph_cap_string(mask)); return 0; } - dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->netfs.inode, - ceph_cap_string(mask), ceph_cap_string(was), - ceph_cap_string(was | mask)); + doutc(cl, "%p %llx.%llx %s dirty %s -> %s\n", inode, + ceph_vinop(inode), ceph_cap_string(mask), + ceph_cap_string(was), ceph_cap_string(was | mask)); ci->i_dirty_caps |= mask; if (was == 0) { struct ceph_mds_session *session = ci->i_auth_cap->session; @@ -1782,8 +1826,9 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, ci->i_head_snapc = ceph_get_snap_context( ci->i_snap_realm->cached_context); } - dout(" inode %p now dirty snapc %p auth cap %p\n", - &ci->netfs.inode, ci->i_head_snapc, ci->i_auth_cap); + doutc(cl, "%p %llx.%llx now dirty snapc %p auth cap %p\n", + inode, ceph_vinop(inode), ci->i_head_snapc, + ci->i_auth_cap); BUG_ON(!list_empty(&ci->i_dirty_item)); spin_lock(&mdsc->cap_dirty_lock); list_add(&ci->i_dirty_item, &session->s_cap_dirty); @@ -1877,6 +1922,7 @@ static u64 __mark_caps_flushing(struct inode *inode, u64 *oldest_flush_tid) { struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap_flush *cf = NULL; int flushing; @@ -1887,13 +1933,13 @@ static u64 __mark_caps_flushing(struct inode *inode, BUG_ON(!ci->i_prealloc_cap_flush); flushing = ci->i_dirty_caps; - dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n", - ceph_cap_string(flushing), - ceph_cap_string(ci->i_flushing_caps), - ceph_cap_string(ci->i_flushing_caps | flushing)); + doutc(cl, "flushing %s, flushing_caps %s -> %s\n", + ceph_cap_string(flushing), + ceph_cap_string(ci->i_flushing_caps), + ceph_cap_string(ci->i_flushing_caps | flushing)); ci->i_flushing_caps |= flushing; ci->i_dirty_caps = 0; - dout(" inode %p now !dirty\n", inode); + doutc(cl, "%p %llx.%llx now !dirty\n", inode, ceph_vinop(inode)); swap(cf, ci->i_prealloc_cap_flush); cf->caps = flushing; @@ -1924,6 +1970,7 @@ static int try_nonblocking_invalidate(struct inode *inode) __releases(ci->i_ceph_lock) __acquires(ci->i_ceph_lock) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u32 invalidating_gen = ci->i_rdcache_gen; @@ -1935,12 +1982,13 @@ static int try_nonblocking_invalidate(struct inode *inode) if (inode->i_data.nrpages == 0 && invalidating_gen == ci->i_rdcache_gen) { /* success. */ - dout("try_nonblocking_invalidate %p success\n", inode); + doutc(cl, "%p %llx.%llx success\n", inode, + ceph_vinop(inode)); /* save any racing async invalidate some trouble */ ci->i_rdcache_revoking = ci->i_rdcache_gen - 1; return 0; } - dout("try_nonblocking_invalidate %p failed\n", inode); + doutc(cl, "%p %llx.%llx failed\n", inode, ceph_vinop(inode)); return -1; } @@ -1972,6 +2020,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap *cap; u64 flush_tid, oldest_flush_tid; int file_wanted, used, cap_used; @@ -2046,9 +2095,9 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) } } - dout("check_caps %llx.%llx file_want %s used %s dirty %s flushing %s" - " issued %s revoking %s retain %s %s%s%s\n", ceph_vinop(inode), - ceph_cap_string(file_wanted), + doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " + "flushing %s issued %s revoking %s retain %s %s%s%s\n", + inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps), ceph_cap_string(issued), ceph_cap_string(revoking), @@ -2069,10 +2118,10 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) (revoking & (CEPH_CAP_FILE_CACHE| CEPH_CAP_FILE_LAZYIO)) && /* or revoking cache */ !tried_invalidate) { - dout("check_caps trying to invalidate on %llx.%llx\n", - ceph_vinop(inode)); + doutc(cl, "trying to invalidate on %p %llx.%llx\n", + inode, ceph_vinop(inode)); if (try_nonblocking_invalidate(inode) < 0) { - dout("check_caps queuing invalidate\n"); + doutc(cl, "queuing invalidate\n"); queue_invalidate = true; ci->i_rdcache_revoking = ci->i_rdcache_gen; } @@ -2100,35 +2149,35 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) cap_used &= ~ci->i_auth_cap->issued; revoking = cap->implemented & ~cap->issued; - dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n", - cap->mds, cap, ceph_cap_string(cap_used), - ceph_cap_string(cap->issued), - ceph_cap_string(cap->implemented), - ceph_cap_string(revoking)); + doutc(cl, " mds%d cap %p used %s issued %s implemented %s revoking %s\n", + cap->mds, cap, ceph_cap_string(cap_used), + ceph_cap_string(cap->issued), + ceph_cap_string(cap->implemented), + ceph_cap_string(revoking)); if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { /* request larger max_size from MDS? */ if (ci->i_wanted_max_size > ci->i_max_size && ci->i_wanted_max_size > ci->i_requested_max_size) { - dout("requesting new max_size\n"); + doutc(cl, "requesting new max_size\n"); goto ack; } /* approaching file_max? */ if (__ceph_should_report_size(ci)) { - dout("i_size approaching max_size\n"); + doutc(cl, "i_size approaching max_size\n"); goto ack; } } /* flush anything dirty? */ if (cap == ci->i_auth_cap) { if ((flags & CHECK_CAPS_FLUSH) && ci->i_dirty_caps) { - dout("flushing dirty caps\n"); + doutc(cl, "flushing dirty caps\n"); goto ack; } if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) { - dout("flushing snap caps\n"); + doutc(cl, "flushing snap caps\n"); goto ack; } } @@ -2136,7 +2185,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) /* completed revocation? going down and there are no caps? */ if (revoking) { if ((revoking & cap_used) == 0) { - dout("completed revocation of %s\n", + doutc(cl, "completed revocation of %s\n", ceph_cap_string(cap->implemented & ~cap->issued)); goto ack; } @@ -2314,6 +2363,7 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid) static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) { struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; int ret, err = 0; @@ -2403,8 +2453,9 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) kfree(sessions); } - dout("%s %p wait on tid %llu %llu\n", __func__, - inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL); + doutc(cl, "%p %llx.%llx wait on tid %llu %llu\n", inode, + ceph_vinop(inode), req1 ? req1->r_tid : 0ULL, + req2 ? req2->r_tid : 0ULL); if (req1) { ret = !wait_for_completion_timeout(&req1->r_safe_completion, ceph_timeout_jiffies(req1->r_timeout)); @@ -2430,11 +2481,13 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); u64 flush_tid; int ret, err; int dirty; - dout("fsync %p%s\n", inode, datasync ? " datasync" : ""); + doutc(cl, "%p %llx.%llx%s\n", inode, ceph_vinop(inode), + datasync ? " datasync" : ""); ret = file_write_and_wait_range(file, start, end); if (datasync) @@ -2445,7 +2498,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) goto out; dirty = try_flush_caps(inode, &flush_tid); - dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); + doutc(cl, "dirty caps are %s\n", ceph_cap_string(dirty)); err = flush_mdlog_and_wait_inode_unsafe_requests(inode); @@ -2466,7 +2519,8 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) if (err < 0) ret = err; out: - dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); + doutc(cl, "%p %llx.%llx%s result=%d\n", inode, ceph_vinop(inode), + datasync ? " datasync" : "", ret); return ret; } @@ -2479,12 +2533,13 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); u64 flush_tid; int err = 0; int dirty; int wait = (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync); - dout("write_inode %p wait=%d\n", inode, wait); + doutc(cl, "%p %llx.%llx wait=%d\n", inode, ceph_vinop(inode), wait); ceph_fscache_unpin_writeback(inode, wbc); if (wait) { err = ceph_wait_on_async_create(inode); @@ -2514,6 +2569,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, __acquires(ci->i_ceph_lock) { struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap *cap; struct ceph_cap_flush *cf; int ret; @@ -2539,8 +2595,8 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, cap = ci->i_auth_cap; if (!(cap && cap->session == session)) { - pr_err("%p auth cap %p not mds%d ???\n", - inode, cap, session->s_mds); + pr_err_client(cl, "%p auth cap %p not mds%d ???\n", + inode, cap, session->s_mds); break; } @@ -2549,8 +2605,9 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, if (!cf->is_capsnap) { struct cap_msg_args arg; - dout("kick_flushing_caps %p cap %p tid %llu %s\n", - inode, cap, cf->tid, ceph_cap_string(cf->caps)); + doutc(cl, "%p %llx.%llx cap %p tid %llu %s\n", + inode, ceph_vinop(inode), cap, cf->tid, + ceph_cap_string(cf->caps)); __prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH, (cf->tid < last_snap_flush ? CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0), @@ -2564,9 +2621,9 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_cap_snap *capsnap = container_of(cf, struct ceph_cap_snap, cap_flush); - dout("kick_flushing_caps %p capsnap %p tid %llu %s\n", - inode, capsnap, cf->tid, - ceph_cap_string(capsnap->dirty)); + doutc(cl, "%p %llx.%llx capsnap %p tid %llu %s\n", + inode, ceph_vinop(inode), capsnap, cf->tid, + ceph_cap_string(capsnap->dirty)); refcount_inc(&capsnap->nref); spin_unlock(&ci->i_ceph_lock); @@ -2574,11 +2631,10 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, ret = __send_flush_snap(inode, session, capsnap, cap->mseq, oldest_flush_tid); if (ret < 0) { - pr_err("kick_flushing_caps: error sending " - "cap flushsnap, ino (%llx.%llx) " - "tid %llu follows %llu\n", - ceph_vinop(inode), cf->tid, - capsnap->follows); + pr_err_client(cl, "error sending cap flushsnap," + " %p %llx.%llx tid %llu follows %llu\n", + inode, ceph_vinop(inode), cf->tid, + capsnap->follows); } ceph_put_cap_snap(capsnap); @@ -2591,22 +2647,26 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct ceph_cap *cap; u64 oldest_flush_tid; - dout("early_kick_flushing_caps mds%d\n", session->s_mds); + doutc(cl, "mds%d\n", session->s_mds); spin_lock(&mdsc->cap_dirty_lock); oldest_flush_tid = __get_oldest_flush_tid(mdsc); spin_unlock(&mdsc->cap_dirty_lock); list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { + struct inode *inode = &ci->netfs.inode; + spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; if (!(cap && cap->session == session)) { - pr_err("%p auth cap %p not mds%d ???\n", - &ci->netfs.inode, cap, session->s_mds); + pr_err_client(cl, "%p %llx.%llx auth cap %p not mds%d ???\n", + inode, ceph_vinop(inode), cap, + session->s_mds); spin_unlock(&ci->i_ceph_lock); continue; } @@ -2639,24 +2699,28 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct ceph_cap *cap; u64 oldest_flush_tid; lockdep_assert_held(&session->s_mutex); - dout("kick_flushing_caps mds%d\n", session->s_mds); + doutc(cl, "mds%d\n", session->s_mds); spin_lock(&mdsc->cap_dirty_lock); oldest_flush_tid = __get_oldest_flush_tid(mdsc); spin_unlock(&mdsc->cap_dirty_lock); list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) { + struct inode *inode = &ci->netfs.inode; + spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; if (!(cap && cap->session == session)) { - pr_err("%p auth cap %p not mds%d ???\n", - &ci->netfs.inode, cap, session->s_mds); + pr_err_client(cl, "%p %llx.%llx auth cap %p not mds%d ???\n", + inode, ceph_vinop(inode), cap, + session->s_mds); spin_unlock(&ci->i_ceph_lock); continue; } @@ -2673,11 +2737,13 @@ void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session, { struct ceph_mds_client *mdsc = session->s_mdsc; struct ceph_cap *cap = ci->i_auth_cap; + struct inode *inode = &ci->netfs.inode; lockdep_assert_held(&ci->i_ceph_lock); - dout("%s %p flushing %s\n", __func__, &ci->netfs.inode, - ceph_cap_string(ci->i_flushing_caps)); + doutc(mdsc->fsc->client, "%p %llx.%llx flushing %s\n", + inode, ceph_vinop(inode), + ceph_cap_string(ci->i_flushing_caps)); if (!list_empty(&ci->i_cap_flush_list)) { u64 oldest_flush_tid; @@ -2699,6 +2765,9 @@ void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session, void ceph_take_cap_refs(struct ceph_inode_info *ci, int got, bool snap_rwsem_locked) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); + lockdep_assert_held(&ci->i_ceph_lock); if (got & CEPH_CAP_PIN) @@ -2719,10 +2788,10 @@ void ceph_take_cap_refs(struct ceph_inode_info *ci, int got, } if (got & CEPH_CAP_FILE_BUFFER) { if (ci->i_wb_ref == 0) - ihold(&ci->netfs.inode); + ihold(inode); ci->i_wb_ref++; - dout("%s %p wb %d -> %d (?)\n", __func__, - &ci->netfs.inode, ci->i_wb_ref-1, ci->i_wb_ref); + doutc(cl, "%p %llx.%llx wb %d -> %d (?)\n", inode, + ceph_vinop(inode), ci->i_wb_ref-1, ci->i_wb_ref); } } @@ -2750,19 +2819,22 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; + struct ceph_client *cl = ceph_inode_to_client(inode); int ret = 0; int have, implemented; bool snap_rwsem_locked = false; - dout("get_cap_refs %p need %s want %s\n", inode, - ceph_cap_string(need), ceph_cap_string(want)); + doutc(cl, "%p %llx.%llx need %s want %s\n", inode, + ceph_vinop(inode), ceph_cap_string(need), + ceph_cap_string(want)); again: spin_lock(&ci->i_ceph_lock); if ((flags & CHECK_FILELOCK) && (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) { - dout("try_get_cap_refs %p error filelock\n", inode); + doutc(cl, "%p %llx.%llx error filelock\n", inode, + ceph_vinop(inode)); ret = -EIO; goto out_unlock; } @@ -2782,8 +2854,8 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, if (have & need & CEPH_CAP_FILE_WR) { if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { - dout("get_cap_refs %p endoff %llu > maxsize %llu\n", - inode, endoff, ci->i_max_size); + doutc(cl, "%p %llx.%llx endoff %llu > maxsize %llu\n", + inode, ceph_vinop(inode), endoff, ci->i_max_size); if (endoff > ci->i_requested_max_size) ret = ci->i_auth_cap ? -EFBIG : -EUCLEAN; goto out_unlock; @@ -2793,7 +2865,8 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, * can get a final snapshot value for size+mtime. */ if (__ceph_have_pending_cap_snap(ci)) { - dout("get_cap_refs %p cap_snap_pending\n", inode); + doutc(cl, "%p %llx.%llx cap_snap_pending\n", inode, + ceph_vinop(inode)); goto out_unlock; } } @@ -2811,9 +2884,9 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, int not = want & ~(have & need); int revoking = implemented & ~have; int exclude = revoking & not; - dout("get_cap_refs %p have %s but not %s (revoking %s)\n", - inode, ceph_cap_string(have), ceph_cap_string(not), - ceph_cap_string(revoking)); + doutc(cl, "%p %llx.%llx have %s but not %s (revoking %s)\n", + inode, ceph_vinop(inode), ceph_cap_string(have), + ceph_cap_string(not), ceph_cap_string(revoking)); if (!exclude || !(exclude & CEPH_CAP_FILE_BUFFER)) { if (!snap_rwsem_locked && !ci->i_head_snapc && @@ -2853,28 +2926,31 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, spin_unlock(&s->s_cap_lock); } if (session_readonly) { - dout("get_cap_refs %p need %s but mds%d readonly\n", - inode, ceph_cap_string(need), ci->i_auth_cap->mds); + doutc(cl, "%p %llx.%llx need %s but mds%d readonly\n", + inode, ceph_vinop(inode), ceph_cap_string(need), + ci->i_auth_cap->mds); ret = -EROFS; goto out_unlock; } if (ceph_inode_is_shutdown(inode)) { - dout("get_cap_refs %p inode is shutdown\n", inode); + doutc(cl, "%p %llx.%llx inode is shutdown\n", + inode, ceph_vinop(inode)); ret = -ESTALE; goto out_unlock; } mds_wanted = __ceph_caps_mds_wanted(ci, false); if (need & ~mds_wanted) { - dout("get_cap_refs %p need %s > mds_wanted %s\n", - inode, ceph_cap_string(need), - ceph_cap_string(mds_wanted)); + doutc(cl, "%p %llx.%llx need %s > mds_wanted %s\n", + inode, ceph_vinop(inode), ceph_cap_string(need), + ceph_cap_string(mds_wanted)); ret = -EUCLEAN; goto out_unlock; } - dout("get_cap_refs %p have %s need %s\n", inode, - ceph_cap_string(have), ceph_cap_string(need)); + doutc(cl, "%p %llx.%llx have %s need %s\n", inode, + ceph_vinop(inode), ceph_cap_string(have), + ceph_cap_string(need)); } out_unlock: @@ -2889,8 +2965,8 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, else if (ret == 1) ceph_update_cap_hit(&mdsc->metric); - dout("get_cap_refs %p ret %d got %s\n", inode, - ret, ceph_cap_string(*got)); + doutc(cl, "%p %llx.%llx ret %d got %s\n", inode, + ceph_vinop(inode), ret, ceph_cap_string(*got)); return ret; } @@ -2902,13 +2978,14 @@ static int try_get_cap_refs(struct inode *inode, int need, int want, static void check_max_size(struct inode *inode, loff_t endoff) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); int check = 0; /* do we need to explicitly request a larger max_size? */ spin_lock(&ci->i_ceph_lock); if (endoff >= ci->i_max_size && endoff > ci->i_wanted_max_size) { - dout("write %p at large endoff %llu, req max_size\n", - inode, endoff); + doutc(cl, "write %p %llx.%llx at large endoff %llu, req max_size\n", + inode, ceph_vinop(inode), endoff); ci->i_wanted_max_size = endoff; } /* duplicate ceph_check_caps()'s logic */ @@ -3118,10 +3195,12 @@ void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps) static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); + if (!capsnap->need_flush && !capsnap->writing && !capsnap->dirty_pages) { - dout("dropping cap_snap %p follows %llu\n", - capsnap, capsnap->follows); + doutc(cl, "%p follows %llu\n", capsnap, capsnap->follows); BUG_ON(capsnap->cap_flush.tid > 0); ceph_put_snap_context(capsnap->context); if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps)) @@ -3153,6 +3232,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had, enum put_cap_refs_mode mode) { struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); int last = 0, put = 0, flushsnaps = 0, wake = 0; bool check_flushsnaps = false; @@ -3175,8 +3255,8 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had, put++; check_flushsnaps = true; } - dout("put_cap_refs %p wb %d -> %d (?)\n", - inode, ci->i_wb_ref+1, ci->i_wb_ref); + doutc(cl, "%p %llx.%llx wb %d -> %d (?)\n", inode, + ceph_vinop(inode), ci->i_wb_ref+1, ci->i_wb_ref); } if (had & CEPH_CAP_FILE_WR) { if (--ci->i_wr_ref == 0) { @@ -3216,8 +3296,8 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had, } spin_unlock(&ci->i_ceph_lock); - dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had), - last ? " last" : "", put ? " put" : ""); + doutc(cl, "%p %llx.%llx had %s%s%s\n", inode, ceph_vinop(inode), + ceph_cap_string(had), last ? " last" : "", put ? " put" : ""); switch (mode) { case PUT_CAP_REFS_SYNC: @@ -3267,6 +3347,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, struct ceph_snap_context *snapc) { struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap_snap *capsnap = NULL, *iter; int put = 0; bool last = false; @@ -3290,11 +3371,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, ceph_put_snap_context(ci->i_head_snapc); ci->i_head_snapc = NULL; } - dout("put_wrbuffer_cap_refs on %p head %d/%d -> %d/%d %s\n", - inode, - ci->i_wrbuffer_ref+nr, ci->i_wrbuffer_ref_head+nr, - ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, - last ? " LAST" : ""); + doutc(cl, "on %p %llx.%llx head %d/%d -> %d/%d %s\n", + inode, ceph_vinop(inode), ci->i_wrbuffer_ref+nr, + ci->i_wrbuffer_ref_head+nr, ci->i_wrbuffer_ref, + ci->i_wrbuffer_ref_head, last ? " LAST" : ""); } else { list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) { if (iter->context == snapc) { @@ -3324,13 +3404,12 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, } } } - dout("put_wrbuffer_cap_refs on %p cap_snap %p " - " snap %lld %d/%d -> %d/%d %s%s\n", - inode, capsnap, capsnap->context->seq, - ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, - ci->i_wrbuffer_ref, capsnap->dirty_pages, - last ? " (wrbuffer last)" : "", - complete_capsnap ? " (complete capsnap)" : ""); + doutc(cl, "%p %llx.%llx cap_snap %p snap %lld %d/%d -> %d/%d %s%s\n", + inode, ceph_vinop(inode), capsnap, capsnap->context->seq, + ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, + ci->i_wrbuffer_ref, capsnap->dirty_pages, + last ? " (wrbuffer last)" : "", + complete_capsnap ? " (complete capsnap)" : ""); } unlock: @@ -3353,9 +3432,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, */ static void invalidate_aliases(struct inode *inode) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct dentry *dn, *prev = NULL; - dout("invalidate_aliases inode %p\n", inode); + doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); d_prune_aliases(inode); /* * For non-directory inode, d_find_alias() only returns @@ -3414,6 +3494,7 @@ static void handle_cap_grant(struct inode *inode, __releases(ci->i_ceph_lock) __releases(session->s_mdsc->snap_rwsem) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); int seq = le32_to_cpu(grant->seq); int newcaps = le32_to_cpu(grant->caps); @@ -3437,10 +3518,11 @@ static void handle_cap_grant(struct inode *inode, if (IS_ENCRYPTED(inode) && size) size = extra_info->fscrypt_file_size; - dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", - inode, cap, session->s_mds, seq, ceph_cap_string(newcaps)); - dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, - i_size_read(inode)); + doutc(cl, "%p %llx.%llx cap %p mds%d seq %d %s\n", inode, + ceph_vinop(inode), cap, session->s_mds, seq, + ceph_cap_string(newcaps)); + doutc(cl, " size %llu max_size %llu, i_size %llu\n", size, + max_size, i_size_read(inode)); /* @@ -3500,15 +3582,17 @@ static void handle_cap_grant(struct inode *inode, inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); ci->i_btime = extra_info->btime; - dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, - from_kuid(&init_user_ns, inode->i_uid), - from_kgid(&init_user_ns, inode->i_gid)); + doutc(cl, "%p %llx.%llx mode 0%o uid.gid %d.%d\n", inode, + ceph_vinop(inode), inode->i_mode, + from_kuid(&init_user_ns, inode->i_uid), + from_kgid(&init_user_ns, inode->i_gid)); #if IS_ENABLED(CONFIG_FS_ENCRYPTION) if (ci->fscrypt_auth_len != extra_info->fscrypt_auth_len || memcmp(ci->fscrypt_auth, extra_info->fscrypt_auth, ci->fscrypt_auth_len)) - pr_warn_ratelimited("%s: cap grant attempt to change fscrypt_auth on non-I_NEW inode (old len %d new len %d)\n", - __func__, ci->fscrypt_auth_len, + pr_warn_ratelimited_client(cl, + "cap grant attempt to change fscrypt_auth on non-I_NEW inode (old len %d new len %d)\n", + ci->fscrypt_auth_len, extra_info->fscrypt_auth_len); #endif } @@ -3526,8 +3610,8 @@ static void handle_cap_grant(struct inode *inode, u64 version = le64_to_cpu(grant->xattr_version); if (version > ci->i_xattrs.version) { - dout(" got new xattrs v%llu on %p len %d\n", - version, inode, len); + doutc(cl, " got new xattrs v%llu on %p %llx.%llx len %d\n", + version, inode, ceph_vinop(inode), len); if (ci->i_xattrs.blob) ceph_buffer_put(ci->i_xattrs.blob); ci->i_xattrs.blob = ceph_buffer_get(xattr_buf); @@ -3578,8 +3662,8 @@ static void handle_cap_grant(struct inode *inode, if (ci->i_auth_cap == cap && (newcaps & CEPH_CAP_ANY_FILE_WR)) { if (max_size != ci->i_max_size) { - dout("max_size %lld -> %llu\n", - ci->i_max_size, max_size); + doutc(cl, "max_size %lld -> %llu\n", ci->i_max_size, + max_size); ci->i_max_size = max_size; if (max_size >= ci->i_wanted_max_size) { ci->i_wanted_max_size = 0; /* reset */ @@ -3593,10 +3677,9 @@ static void handle_cap_grant(struct inode *inode, wanted = __ceph_caps_wanted(ci); used = __ceph_caps_used(ci); dirty = __ceph_caps_dirty(ci); - dout(" my wanted = %s, used = %s, dirty %s\n", - ceph_cap_string(wanted), - ceph_cap_string(used), - ceph_cap_string(dirty)); + doutc(cl, " my wanted = %s, used = %s, dirty %s\n", + ceph_cap_string(wanted), ceph_cap_string(used), + ceph_cap_string(dirty)); if ((was_stale || le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) && (wanted & ~(cap->mds_wanted | newcaps))) { @@ -3617,10 +3700,9 @@ static void handle_cap_grant(struct inode *inode, if (cap->issued & ~newcaps) { int revoking = cap->issued & ~newcaps; - dout("revocation: %s -> %s (revoking %s)\n", - ceph_cap_string(cap->issued), - ceph_cap_string(newcaps), - ceph_cap_string(revoking)); + doutc(cl, "revocation: %s -> %s (revoking %s)\n", + ceph_cap_string(cap->issued), ceph_cap_string(newcaps), + ceph_cap_string(revoking)); if (S_ISREG(inode->i_mode) && (revoking & used & CEPH_CAP_FILE_BUFFER)) writeback = true; /* initiate writeback; will delay ack */ @@ -3638,11 +3720,12 @@ static void handle_cap_grant(struct inode *inode, cap->issued = newcaps; cap->implemented |= newcaps; } else if (cap->issued == newcaps) { - dout("caps unchanged: %s -> %s\n", - ceph_cap_string(cap->issued), ceph_cap_string(newcaps)); + doutc(cl, "caps unchanged: %s -> %s\n", + ceph_cap_string(cap->issued), + ceph_cap_string(newcaps)); } else { - dout("grant: %s -> %s\n", ceph_cap_string(cap->issued), - ceph_cap_string(newcaps)); + doutc(cl, "grant: %s -> %s\n", ceph_cap_string(cap->issued), + ceph_cap_string(newcaps)); /* non-auth MDS is revoking the newly grant caps ? */ if (cap == ci->i_auth_cap && __ceph_caps_revoking_other(ci, cap, newcaps)) @@ -3731,6 +3814,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap_flush *cf, *tmp_cf; LIST_HEAD(to_remove); unsigned seq = le32_to_cpu(m->seq); @@ -3767,11 +3851,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, } } - dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," - " flushing %s -> %s\n", - inode, session->s_mds, seq, ceph_cap_string(dirty), - ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps), - ceph_cap_string(ci->i_flushing_caps & ~cleaned)); + doutc(cl, "%p %llx.%llx mds%d seq %d on %s cleaned %s, flushing %s -> %s\n", + inode, ceph_vinop(inode), session->s_mds, seq, + ceph_cap_string(dirty), ceph_cap_string(cleaned), + ceph_cap_string(ci->i_flushing_caps), + ceph_cap_string(ci->i_flushing_caps & ~cleaned)); if (list_empty(&to_remove) && !cleaned) goto out; @@ -3787,18 +3871,21 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, if (list_empty(&ci->i_cap_flush_list)) { list_del_init(&ci->i_flushing_item); if (!list_empty(&session->s_cap_flushing)) { - dout(" mds%d still flushing cap on %p\n", - session->s_mds, - &list_first_entry(&session->s_cap_flushing, - struct ceph_inode_info, - i_flushing_item)->netfs.inode); + struct inode *inode = + &list_first_entry(&session->s_cap_flushing, + struct ceph_inode_info, + i_flushing_item)->netfs.inode; + doutc(cl, " mds%d still flushing cap on %p %llx.%llx\n", + session->s_mds, inode, ceph_vinop(inode)); } } mdsc->num_cap_flushing--; - dout(" inode %p now !flushing\n", inode); + doutc(cl, " %p %llx.%llx now !flushing\n", inode, + ceph_vinop(inode)); if (ci->i_dirty_caps == 0) { - dout(" inode %p now clean\n", inode); + doutc(cl, " %p %llx.%llx now clean\n", inode, + ceph_vinop(inode)); BUG_ON(!list_empty(&ci->i_dirty_item)); drop = true; if (ci->i_wr_ref == 0 && @@ -3837,11 +3924,13 @@ void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; bool ret; lockdep_assert_held(&ci->i_ceph_lock); - dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci); + doutc(cl, "removing capsnap %p, %p %llx.%llx ci %p\n", capsnap, + inode, ceph_vinop(inode), ci); list_del_init(&capsnap->ci_item); ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush); @@ -3881,28 +3970,30 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; u64 follows = le64_to_cpu(m->snap_follows); struct ceph_cap_snap *capsnap = NULL, *iter; bool wake_ci = false; bool wake_mdsc = false; - dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n", - inode, ci, session->s_mds, follows); + doutc(cl, "%p %llx.%llx ci %p mds%d follows %lld\n", inode, + ceph_vinop(inode), ci, session->s_mds, follows); spin_lock(&ci->i_ceph_lock); list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) { if (iter->follows == follows) { if (iter->cap_flush.tid != flush_tid) { - dout(" cap_snap %p follows %lld tid %lld !=" - " %lld\n", iter, follows, - flush_tid, iter->cap_flush.tid); + doutc(cl, " cap_snap %p follows %lld " + "tid %lld != %lld\n", iter, + follows, flush_tid, + iter->cap_flush.tid); break; } capsnap = iter; break; } else { - dout(" skipping cap_snap %p follows %lld\n", - iter, iter->follows); + doutc(cl, " skipping cap_snap %p follows %lld\n", + iter, iter->follows); } } if (capsnap) @@ -3931,6 +4022,7 @@ static bool handle_cap_trunc(struct inode *inode, struct cap_extra_info *extra_info) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); int mds = session->s_mds; int seq = le32_to_cpu(trunc->seq); u32 truncate_seq = le32_to_cpu(trunc->truncate_seq); @@ -3953,8 +4045,8 @@ static bool handle_cap_trunc(struct inode *inode, if (IS_ENCRYPTED(inode) && size) size = extra_info->fscrypt_file_size; - dout("%s inode %p mds%d seq %d to %lld truncate seq %d\n", - __func__, inode, mds, seq, truncate_size, truncate_seq); + doutc(cl, "%p %llx.%llx mds%d seq %d to %lld truncate seq %d\n", + inode, ceph_vinop(inode), mds, seq, truncate_size, truncate_seq); queue_trunc = ceph_fill_file_size(inode, issued, truncate_seq, truncate_size, size); return queue_trunc; @@ -3973,6 +4065,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, struct ceph_mds_session *session) { struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_session *tsession = NULL; struct ceph_cap *cap, *tcap, *new_cap = NULL; struct ceph_inode_info *ci = ceph_inode(inode); @@ -3992,8 +4085,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, target = -1; } - dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n", - inode, ci, mds, mseq, target); + doutc(cl, "%p %llx.%llx ci %p mds%d mseq %d target %d\n", + inode, ceph_vinop(inode), ci, mds, mseq, target); retry: down_read(&mdsc->snap_rwsem); spin_lock(&ci->i_ceph_lock); @@ -4013,12 +4106,13 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, issued = cap->issued; if (issued != cap->implemented) - pr_err_ratelimited("handle_cap_export: issued != implemented: " - "ino (%llx.%llx) mds%d seq %d mseq %d " - "issued %s implemented %s\n", - ceph_vinop(inode), mds, cap->seq, cap->mseq, - ceph_cap_string(issued), - ceph_cap_string(cap->implemented)); + pr_err_ratelimited_client(cl, "issued != implemented: " + "%p %llx.%llx mds%d seq %d mseq %d" + " issued %s implemented %s\n", + inode, ceph_vinop(inode), mds, + cap->seq, cap->mseq, + ceph_cap_string(issued), + ceph_cap_string(cap->implemented)); tcap = __get_cap_for_mds(ci, target); @@ -4026,7 +4120,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, /* already have caps from the target */ if (tcap->cap_id == t_cap_id && ceph_seq_cmp(tcap->seq, t_seq) < 0) { - dout(" updating import cap %p mds%d\n", tcap, target); + doutc(cl, " updating import cap %p mds%d\n", tcap, + target); tcap->cap_id = t_cap_id; tcap->seq = t_seq - 1; tcap->issue_seq = t_seq - 1; @@ -4107,6 +4202,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, struct ceph_cap **target_cap, int *old_issued) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap *cap, *ocap, *new_cap = NULL; int mds = session->s_mds; int issued; @@ -4127,8 +4223,8 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, peer = -1; } - dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", - inode, ci, mds, mseq, peer); + doutc(cl, "%p %llx.%llx ci %p mds%d mseq %d peer %d\n", + inode, ceph_vinop(inode), ci, mds, mseq, peer); retry: cap = __get_cap_for_mds(ci, mds); if (!cap) { @@ -4154,17 +4250,17 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; if (ocap && ocap->cap_id == p_cap_id) { - dout(" remove export cap %p mds%d flags %d\n", - ocap, peer, ph->flags); + doutc(cl, " remove export cap %p mds%d flags %d\n", + ocap, peer, ph->flags); if ((ph->flags & CEPH_CAP_FLAG_AUTH) && (ocap->seq != le32_to_cpu(ph->seq) || ocap->mseq != le32_to_cpu(ph->mseq))) { - pr_err_ratelimited("handle_cap_import: " - "mismatched seq/mseq: ino (%llx.%llx) " - "mds%d seq %d mseq %d importer mds%d " - "has peer seq %d mseq %d\n", - ceph_vinop(inode), peer, ocap->seq, - ocap->mseq, mds, le32_to_cpu(ph->seq), + pr_err_ratelimited_client(cl, "mismatched seq/mseq: " + "%p %llx.%llx mds%d seq %d mseq %d" + " importer mds%d has peer seq %d mseq %d\n", + inode, ceph_vinop(inode), peer, + ocap->seq, ocap->mseq, mds, + le32_to_cpu(ph->seq), le32_to_cpu(ph->mseq)); } ceph_remove_cap(mdsc, ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); @@ -4230,6 +4326,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_msg *msg) { struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct inode *inode; struct ceph_inode_info *ci; struct ceph_cap *cap; @@ -4248,7 +4345,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, bool close_sessions = false; bool do_cap_release = false; - dout("handle_caps from mds%d\n", session->s_mds); + doutc(cl, "from mds%d\n", session->s_mds); if (!ceph_inc_mds_stopping_blocker(mdsc, session)) return; @@ -4350,15 +4447,15 @@ void ceph_handle_caps(struct ceph_mds_session *session, /* lookup ino */ inode = ceph_find_inode(mdsc->fsc->sb, vino); - dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino, - vino.snap, inode); + doutc(cl, " op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), + vino.ino, vino.snap, inode); mutex_lock(&session->s_mutex); - dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, - (unsigned)seq); + doutc(cl, " mds%d seq %lld cap seq %u\n", session->s_mds, + session->s_seq, (unsigned)seq); if (!inode) { - dout(" i don't have ino %llx\n", vino.ino); + doutc(cl, " i don't have ino %llx\n", vino.ino); switch (op) { case CEPH_CAP_OP_IMPORT: @@ -4413,9 +4510,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds); if (!cap) { - dout(" no cap on %p ino %llx.%llx from mds%d\n", - inode, ceph_ino(inode), ceph_snap(inode), - session->s_mds); + doutc(cl, " no cap on %p ino %llx.%llx from mds%d\n", + inode, ceph_ino(inode), ceph_snap(inode), + session->s_mds); spin_unlock(&ci->i_ceph_lock); switch (op) { case CEPH_CAP_OP_REVOKE: @@ -4453,8 +4550,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, default: spin_unlock(&ci->i_ceph_lock); - pr_err("ceph_handle_caps: unknown cap op %d %s\n", op, - ceph_cap_op_name(op)); + pr_err_client(cl, "unknown cap op %d %s\n", op, + ceph_cap_op_name(op)); } done: @@ -4495,7 +4592,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, goto done; bad: - pr_err("ceph_handle_caps: corrupt message\n"); + pr_err_client(cl, "corrupt message\n"); ceph_msg_dump(msg); goto out; } @@ -4509,6 +4606,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, */ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; struct inode *inode; struct ceph_inode_info *ci; struct ceph_mount_options *opt = mdsc->fsc->mount_options; @@ -4516,14 +4614,14 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) unsigned long loop_start = jiffies; unsigned long delay = 0; - dout("check_delayed_caps\n"); + doutc(cl, "begin\n"); spin_lock(&mdsc->cap_delay_lock); while (!list_empty(&mdsc->cap_delay_list)) { ci = list_first_entry(&mdsc->cap_delay_list, struct ceph_inode_info, i_cap_delay_list); if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) { - dout("%s caps added recently. Exiting loop", __func__); + doutc(cl, "caps added recently. Exiting loop"); delay = ci->i_hold_caps_max; break; } @@ -4535,13 +4633,15 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) inode = igrab(&ci->netfs.inode); if (inode) { spin_unlock(&mdsc->cap_delay_lock); - dout("check_delayed_caps on %p\n", inode); + doutc(cl, "on %p %llx.%llx\n", inode, + ceph_vinop(inode)); ceph_check_caps(ci, 0); iput(inode); spin_lock(&mdsc->cap_delay_lock); } } spin_unlock(&mdsc->cap_delay_lock); + doutc(cl, "done\n"); return delay; } @@ -4552,17 +4652,18 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) static void flush_dirty_session_caps(struct ceph_mds_session *s) { struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct inode *inode; - dout("flush_dirty_caps\n"); + doutc(cl, "begin\n"); spin_lock(&mdsc->cap_dirty_lock); while (!list_empty(&s->s_cap_dirty)) { ci = list_first_entry(&s->s_cap_dirty, struct ceph_inode_info, i_dirty_item); inode = &ci->netfs.inode; ihold(inode); - dout("flush_dirty_caps %llx.%llx\n", ceph_vinop(inode)); + doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); spin_unlock(&mdsc->cap_dirty_lock); ceph_wait_on_async_create(inode); ceph_check_caps(ci, CHECK_CAPS_FLUSH); @@ -4570,7 +4671,7 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s) spin_lock(&mdsc->cap_dirty_lock); } spin_unlock(&mdsc->cap_dirty_lock); - dout("flush_dirty_caps done\n"); + doutc(cl, "done\n"); } void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) @@ -4695,6 +4796,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode, int mds, int drop, int unless, int force) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap *cap; struct ceph_mds_request_release *rel = *p; int used, dirty; @@ -4704,9 +4806,9 @@ int ceph_encode_inode_release(void **p, struct inode *inode, used = __ceph_caps_used(ci); dirty = __ceph_caps_dirty(ci); - dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n", - inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop), - ceph_cap_string(unless)); + doutc(cl, "%p %llx.%llx mds%d used|dirty %s drop %s unless %s\n", + inode, ceph_vinop(inode), mds, ceph_cap_string(used|dirty), + ceph_cap_string(drop), ceph_cap_string(unless)); /* only drop unused, clean caps */ drop &= ~(used | dirty); @@ -4728,12 +4830,13 @@ int ceph_encode_inode_release(void **p, struct inode *inode, if (force || (cap->issued & drop)) { if (cap->issued & drop) { int wanted = __ceph_caps_wanted(ci); - dout("encode_inode_release %p cap %p " - "%s -> %s, wanted %s -> %s\n", inode, cap, - ceph_cap_string(cap->issued), - ceph_cap_string(cap->issued & ~drop), - ceph_cap_string(cap->mds_wanted), - ceph_cap_string(wanted)); + doutc(cl, "%p %llx.%llx cap %p %s -> %s, " + "wanted %s -> %s\n", inode, + ceph_vinop(inode), cap, + ceph_cap_string(cap->issued), + ceph_cap_string(cap->issued & ~drop), + ceph_cap_string(cap->mds_wanted), + ceph_cap_string(wanted)); cap->issued &= ~drop; cap->implemented &= ~drop; @@ -4742,9 +4845,9 @@ int ceph_encode_inode_release(void **p, struct inode *inode, !(wanted & CEPH_CAP_ANY_FILE_WR)) ci->i_requested_max_size = 0; } else { - dout("encode_inode_release %p cap %p %s" - " (force)\n", inode, cap, - ceph_cap_string(cap->issued)); + doutc(cl, "%p %llx.%llx cap %p %s (force)\n", + inode, ceph_vinop(inode), cap, + ceph_cap_string(cap->issued)); } rel->ino = cpu_to_le64(ceph_ino(inode)); @@ -4759,8 +4862,9 @@ int ceph_encode_inode_release(void **p, struct inode *inode, *p += sizeof(*rel); ret = 1; } else { - dout("encode_inode_release %p cap %p %s (noop)\n", - inode, cap, ceph_cap_string(cap->issued)); + doutc(cl, "%p %llx.%llx cap %p %s (noop)\n", + inode, ceph_vinop(inode), cap, + ceph_cap_string(cap->issued)); } } spin_unlock(&ci->i_ceph_lock); @@ -4786,6 +4890,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, struct dentry *parent = NULL; struct ceph_mds_request_release *rel = *p; struct ceph_dentry_info *di = ceph_dentry(dentry); + struct ceph_client *cl; int force = 0; int ret; @@ -4807,10 +4912,11 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force); dput(parent); + cl = ceph_inode_to_client(dir); spin_lock(&dentry->d_lock); if (ret && di->lease_session && di->lease_session->s_mds == mds) { - dout("encode_dentry_release %p mds%d seq %d\n", - dentry, mds, (int)di->lease_seq); + doutc(cl, "%p mds%d seq %d\n", dentry, mds, + (int)di->lease_seq); rel->dname_seq = cpu_to_le32(di->lease_seq); __ceph_mdsc_drop_dentry_lease(dentry); spin_unlock(&dentry->d_lock); @@ -4836,12 +4942,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry, static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_cap_snap *capsnap; int capsnap_release = 0; lockdep_assert_held(&ci->i_ceph_lock); - dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode); + doutc(cl, "removing capsnaps, ci is %p, %p %llx.%llx\n", + ci, inode, ceph_vinop(inode)); while (!list_empty(&ci->i_cap_snaps)) { capsnap = list_first_entry(&ci->i_cap_snaps, @@ -4860,6 +4968,7 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali { struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *cl = fsc->client; struct ceph_inode_info *ci = ceph_inode(inode); bool is_auth; bool dirty_dropped = false; @@ -4867,8 +4976,8 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali lockdep_assert_held(&ci->i_ceph_lock); - dout("removing cap %p, ci is %p, inode is %p\n", - cap, ci, &ci->netfs.inode); + doutc(cl, "removing cap %p, ci is %p, %p %llx.%llx\n", + cap, ci, inode, ceph_vinop(inode)); is_auth = (cap == ci->i_auth_cap); __ceph_remove_cap(cap, false); @@ -4895,19 +5004,19 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali } if (!list_empty(&ci->i_dirty_item)) { - pr_warn_ratelimited( - " dropping dirty %s state for %p %lld\n", + pr_warn_ratelimited_client(cl, + " dropping dirty %s state for %p %llx.%llx\n", ceph_cap_string(ci->i_dirty_caps), - inode, ceph_ino(inode)); + inode, ceph_vinop(inode)); ci->i_dirty_caps = 0; list_del_init(&ci->i_dirty_item); dirty_dropped = true; } if (!list_empty(&ci->i_flushing_item)) { - pr_warn_ratelimited( - " dropping dirty+flushing %s state for %p %lld\n", + pr_warn_ratelimited_client(cl, + " dropping dirty+flushing %s state for %p %llx.%llx\n", ceph_cap_string(ci->i_flushing_caps), - inode, ceph_ino(inode)); + inode, ceph_vinop(inode)); ci->i_flushing_caps = 0; list_del_init(&ci->i_flushing_item); mdsc->num_cap_flushing--; @@ -4930,8 +5039,9 @@ int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invali if (atomic_read(&ci->i_filelock_ref) > 0) { /* make further file lock syscall return -EIO */ ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK; - pr_warn_ratelimited(" dropping file locks for %p %lld\n", - inode, ceph_ino(inode)); + pr_warn_ratelimited_client(cl, + " dropping file locks for %p %llx.%llx\n", + inode, ceph_vinop(inode)); } if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index 08c385610731..d692ebfddedb 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c @@ -211,6 +211,7 @@ void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req, static struct inode *parse_longname(const struct inode *parent, const char *name, int *name_len) { + struct ceph_client *cl = ceph_inode_to_client(parent); struct inode *dir = NULL; struct ceph_vino vino = { .snap = CEPH_NOSNAP }; char *inode_number; @@ -222,12 +223,12 @@ static struct inode *parse_longname(const struct inode *parent, name++; name_end = strrchr(name, '_'); if (!name_end) { - dout("Failed to parse long snapshot name: %s\n", name); + doutc(cl, "failed to parse long snapshot name: %s\n", name); return ERR_PTR(-EIO); } *name_len = (name_end - name); if (*name_len <= 0) { - pr_err("Failed to parse long snapshot name\n"); + pr_err_client(cl, "failed to parse long snapshot name\n"); return ERR_PTR(-EIO); } @@ -239,7 +240,7 @@ static struct inode *parse_longname(const struct inode *parent, return ERR_PTR(-ENOMEM); ret = kstrtou64(inode_number, 10, &vino.ino); if (ret) { - dout("Failed to parse inode number: %s\n", name); + doutc(cl, "failed to parse inode number: %s\n", name); dir = ERR_PTR(ret); goto out; } @@ -250,7 +251,7 @@ static struct inode *parse_longname(const struct inode *parent, /* This can happen if we're not mounting cephfs on the root */ dir = ceph_get_inode(parent->i_sb, vino, NULL); if (IS_ERR(dir)) - dout("Can't find inode %s (%s)\n", inode_number, name); + doutc(cl, "can't find inode %s (%s)\n", inode_number, name); } out: @@ -261,6 +262,7 @@ static struct inode *parse_longname(const struct inode *parent, int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name, char *buf) { + struct ceph_client *cl = ceph_inode_to_client(parent); struct inode *dir = parent; struct qstr iname; u32 len; @@ -329,7 +331,7 @@ int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name, /* base64 encode the encrypted name */ elen = ceph_base64_encode(cryptbuf, len, buf); - dout("base64-encoded ciphertext name = %.*s\n", elen, buf); + doutc(cl, "base64-encoded ciphertext name = %.*s\n", elen, buf); /* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */ WARN_ON(elen > 240); @@ -504,7 +506,10 @@ int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num) { - dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num); + struct ceph_client *cl = ceph_inode_to_client(inode); + + doutc(cl, "%p %llx.%llx len %u offs %u blk %llu\n", inode, + ceph_vinop(inode), len, offs, lblk_num); return fscrypt_decrypt_block_inplace(inode, page, len, offs, lblk_num); } @@ -513,7 +518,10 @@ int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode, unsigned int offs, u64 lblk_num, gfp_t gfp_flags) { - dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num); + struct ceph_client *cl = ceph_inode_to_client(inode); + + doutc(cl, "%p %llx.%llx len %u offs %u blk %llu\n", inode, + ceph_vinop(inode), len, offs, lblk_num); return fscrypt_encrypt_block_inplace(inode, page, len, offs, lblk_num, gfp_flags); } @@ -582,6 +590,7 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page, u64 off, struct ceph_sparse_extent *map, u32 ext_cnt) { + struct ceph_client *cl = ceph_inode_to_client(inode); int i, ret = 0; struct ceph_inode_info *ci = ceph_inode(inode); u64 objno, objoff; @@ -589,7 +598,8 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page, /* Nothing to do for empty array */ if (ext_cnt == 0) { - dout("%s: empty array, ret 0\n", __func__); + doutc(cl, "%p %llx.%llx empty array, ret 0\n", inode, + ceph_vinop(inode)); return 0; } @@ -603,14 +613,17 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page, int fret; if ((ext->off | ext->len) & ~CEPH_FSCRYPT_BLOCK_MASK) { - pr_warn("%s: bad encrypted sparse extent idx %d off %llx len %llx\n", - __func__, i, ext->off, ext->len); + pr_warn_client(cl, + "%p %llx.%llx bad encrypted sparse extent " + "idx %d off %llx len %llx\n", + inode, ceph_vinop(inode), i, ext->off, + ext->len); return -EIO; } fret = ceph_fscrypt_decrypt_pages(inode, &page[pgidx], off + pgsoff, ext->len); - dout("%s: [%d] 0x%llx~0x%llx fret %d\n", __func__, i, - ext->off, ext->len, fret); + doutc(cl, "%p %llx.%llx [%d] 0x%llx~0x%llx fret %d\n", inode, + ceph_vinop(inode), i, ext->off, ext->len, fret); if (fret < 0) { if (ret == 0) ret = fret; @@ -618,7 +631,7 @@ int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page, } ret = pgsoff + fret; } - dout("%s: ret %d\n", __func__, ret); + doutc(cl, "ret %d\n", ret); return ret; } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 2f1e7498cd74..24c08078f5aa 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -398,7 +398,7 @@ DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) { - dout("ceph_fs_debugfs_cleanup\n"); + doutc(fsc->client, "begin\n"); debugfs_remove(fsc->debugfs_bdi); debugfs_remove(fsc->debugfs_congestion_kb); debugfs_remove(fsc->debugfs_mdsmap); @@ -407,13 +407,14 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) debugfs_remove(fsc->debugfs_status); debugfs_remove(fsc->debugfs_mdsc); debugfs_remove_recursive(fsc->debugfs_metrics_dir); + doutc(fsc->client, "done\n"); } void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) { char name[100]; - dout("ceph_fs_debugfs_init\n"); + doutc(fsc->client, "begin\n"); fsc->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", 0600, @@ -469,6 +470,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) &metrics_size_fops); debugfs_create_file("caps", 0400, fsc->debugfs_metrics_dir, fsc, &metrics_caps_fops); + doutc(fsc->client, "done\n"); } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 1395b71df5cc..6bb95b4e93a8 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -109,7 +109,9 @@ static int fpos_cmp(loff_t l, loff_t r) * regardless of what dir changes take place on the * server. */ -static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name, +static int note_last_dentry(struct ceph_fs_client *fsc, + struct ceph_dir_file_info *dfi, + const char *name, int len, unsigned next_offset) { char *buf = kmalloc(len+1, GFP_KERNEL); @@ -120,7 +122,7 @@ static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name, memcpy(dfi->last_name, name, len); dfi->last_name[len] = 0; dfi->next_offset = next_offset; - dout("note_last_dentry '%s'\n", dfi->last_name); + doutc(fsc->client, "'%s'\n", dfi->last_name); return 0; } @@ -130,6 +132,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, struct ceph_readdir_cache_control *cache_ctl) { struct inode *dir = d_inode(parent); + struct ceph_client *cl = ceph_inode_to_client(dir); struct dentry *dentry; unsigned idx_mask = (PAGE_SIZE / sizeof(struct dentry *)) - 1; loff_t ptr_pos = idx * sizeof(struct dentry *); @@ -142,7 +145,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, ceph_readdir_cache_release(cache_ctl); cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff); if (!cache_ctl->page) { - dout(" page %lu not found\n", ptr_pgoff); + doutc(cl, " page %lu not found\n", ptr_pgoff); return ERR_PTR(-EAGAIN); } /* reading/filling the cache are serialized by @@ -185,13 +188,16 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, struct ceph_dir_file_info *dfi = file->private_data; struct dentry *parent = file->f_path.dentry; struct inode *dir = d_inode(parent); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(dir); + struct ceph_client *cl = ceph_inode_to_client(dir); struct dentry *dentry, *last = NULL; struct ceph_dentry_info *di; struct ceph_readdir_cache_control cache_ctl = {}; u64 idx = 0; int err = 0; - dout("__dcache_readdir %p v%u at %llx\n", dir, (unsigned)shared_gen, ctx->pos); + doutc(cl, "%p %llx.%llx v%u at %llx\n", dir, ceph_vinop(dir), + (unsigned)shared_gen, ctx->pos); /* search start position */ if (ctx->pos > 2) { @@ -221,7 +227,8 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, dput(dentry); } - dout("__dcache_readdir %p cache idx %llu\n", dir, idx); + doutc(cl, "%p %llx.%llx cache idx %llu\n", dir, + ceph_vinop(dir), idx); } @@ -257,8 +264,8 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, spin_unlock(&dentry->d_lock); if (emit_dentry) { - dout(" %llx dentry %p %pd %p\n", di->offset, - dentry, dentry, d_inode(dentry)); + doutc(cl, " %llx dentry %p %pd %p\n", di->offset, + dentry, dentry, d_inode(dentry)); ctx->pos = di->offset; if (!dir_emit(ctx, dentry->d_name.name, dentry->d_name.len, ceph_present_inode(d_inode(dentry)), @@ -281,7 +288,8 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, if (last) { int ret; di = ceph_dentry(last); - ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len, + ret = note_last_dentry(fsc, dfi, last->d_name.name, + last->d_name.len, fpos_off(di->offset) + 1); if (ret < 0) err = ret; @@ -312,18 +320,21 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client *cl = fsc->client; int i; int err; unsigned frag = -1; struct ceph_mds_reply_info_parsed *rinfo; - dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); + doutc(cl, "%p %llx.%llx file %p pos %llx\n", inode, + ceph_vinop(inode), file, ctx->pos); if (dfi->file_info.flags & CEPH_F_ATEND) return 0; /* always start with . and .. */ if (ctx->pos == 0) { - dout("readdir off 0 -> '.'\n"); + doutc(cl, "%p %llx.%llx off 0 -> '.'\n", inode, + ceph_vinop(inode)); if (!dir_emit(ctx, ".", 1, ceph_present_inode(inode), inode->i_mode >> 12)) return 0; @@ -337,7 +348,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ino = ceph_present_inode(dentry->d_parent->d_inode); spin_unlock(&dentry->d_lock); - dout("readdir off 1 -> '..'\n"); + doutc(cl, "%p %llx.%llx off 1 -> '..'\n", inode, + ceph_vinop(inode)); if (!dir_emit(ctx, "..", 2, ino, inode->i_mode >> 12)) return 0; ctx->pos = 2; @@ -391,8 +403,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) frag = fpos_frag(ctx->pos); } - dout("readdir fetching %llx.%llx frag %x offset '%s'\n", - ceph_vinop(inode), frag, dfi->last_name); + doutc(cl, "fetching %p %llx.%llx frag %x offset '%s'\n", + inode, ceph_vinop(inode), frag, dfi->last_name); req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) return PTR_ERR(req); @@ -446,12 +458,12 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ceph_mdsc_put_request(req); return err; } - dout("readdir got and parsed readdir result=%d on " - "frag %x, end=%d, complete=%d, hash_order=%d\n", - err, frag, - (int)req->r_reply_info.dir_end, - (int)req->r_reply_info.dir_complete, - (int)req->r_reply_info.hash_order); + doutc(cl, "%p %llx.%llx got and parsed readdir result=%d" + "on frag %x, end=%d, complete=%d, hash_order=%d\n", + inode, ceph_vinop(inode), err, frag, + (int)req->r_reply_info.dir_end, + (int)req->r_reply_info.dir_complete, + (int)req->r_reply_info.hash_order); rinfo = &req->r_reply_info; if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { @@ -481,7 +493,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) dfi->dir_ordered_count = req->r_dir_ordered_cnt; } } else { - dout("readdir !did_prepopulate\n"); + doutc(cl, "%p %llx.%llx !did_prepopulate\n", inode, + ceph_vinop(inode)); /* disable readdir cache */ dfi->readdir_cache_idx = -1; /* preclude from marking dir complete */ @@ -494,8 +507,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) rinfo->dir_entries + (rinfo->dir_nr-1); unsigned next_offset = req->r_reply_info.dir_end ? 2 : (fpos_off(rde->offset) + 1); - err = note_last_dentry(dfi, rde->name, rde->name_len, - next_offset); + err = note_last_dentry(fsc, dfi, rde->name, + rde->name_len, next_offset); if (err) { ceph_mdsc_put_request(dfi->last_readdir); dfi->last_readdir = NULL; @@ -508,9 +521,9 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) } rinfo = &dfi->last_readdir->r_reply_info; - dout("readdir frag %x num %d pos %llx chunk first %llx\n", - dfi->frag, rinfo->dir_nr, ctx->pos, - rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL); + doutc(cl, "%p %llx.%llx frag %x num %d pos %llx chunk first %llx\n", + inode, ceph_vinop(inode), dfi->frag, rinfo->dir_nr, ctx->pos, + rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL); i = 0; /* search start position */ @@ -530,8 +543,9 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; if (rde->offset < ctx->pos) { - pr_warn("%s: rde->offset 0x%llx ctx->pos 0x%llx\n", - __func__, rde->offset, ctx->pos); + pr_warn_client(cl, + "%p %llx.%llx rde->offset 0x%llx ctx->pos 0x%llx\n", + inode, ceph_vinop(inode), rde->offset, ctx->pos); return -EIO; } @@ -539,9 +553,9 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) return -EIO; ctx->pos = rde->offset; - dout("readdir (%d/%d) -> %llx '%.*s' %p\n", - i, rinfo->dir_nr, ctx->pos, - rde->name_len, rde->name, &rde->inode.in); + doutc(cl, "%p %llx.%llx (%d/%d) -> %llx '%.*s' %p\n", inode, + ceph_vinop(inode), i, rinfo->dir_nr, ctx->pos, + rde->name_len, rde->name, &rde->inode.in); if (!dir_emit(ctx, rde->name, rde->name_len, ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), @@ -552,7 +566,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) * doesn't have enough memory, etc. So for next readdir * it will continue. */ - dout("filldir stopping us...\n"); + doutc(cl, "filldir stopping us...\n"); return 0; } @@ -583,7 +597,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) kfree(dfi->last_name); dfi->last_name = NULL; } - dout("readdir next frag is %x\n", frag); + doutc(cl, "%p %llx.%llx next frag is %x\n", inode, + ceph_vinop(inode), frag); goto more; } dfi->file_info.flags |= CEPH_F_ATEND; @@ -598,20 +613,23 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) spin_lock(&ci->i_ceph_lock); if (dfi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) { - dout(" marking %p complete and ordered\n", inode); + doutc(cl, " marking %p %llx.%llx complete and ordered\n", + inode, ceph_vinop(inode)); /* use i_size to track number of entries in * readdir cache */ BUG_ON(dfi->readdir_cache_idx < 0); i_size_write(inode, dfi->readdir_cache_idx * sizeof(struct dentry*)); } else { - dout(" marking %p complete\n", inode); + doutc(cl, " marking %llx.%llx complete\n", + ceph_vinop(inode)); } __ceph_dir_set_complete(ci, dfi->dir_release_count, dfi->dir_ordered_count); spin_unlock(&ci->i_ceph_lock); } - dout("readdir %p file %p done.\n", inode, file); + doutc(cl, "%p %llx.%llx file %p done.\n", inode, ceph_vinop(inode), + file); return 0; } @@ -657,6 +675,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) { struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file->f_mapping->host; + struct ceph_client *cl = ceph_inode_to_client(inode); loff_t retval; inode_lock(inode); @@ -676,7 +695,8 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) if (offset >= 0) { if (need_reset_readdir(dfi, offset)) { - dout("dir_llseek dropping %p content\n", file); + doutc(cl, "%p %llx.%llx dropping %p content\n", + inode, ceph_vinop(inode), file); reset_readdir(dfi); } else if (is_hash_order(offset) && offset > file->f_pos) { /* for hash offset, we don't know if a forward seek @@ -705,6 +725,7 @@ struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req, { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb); struct inode *parent = d_inode(dentry->d_parent); /* we hold i_rwsem */ + struct ceph_client *cl = ceph_inode_to_client(parent); /* .snap dir? */ if (ceph_snap(parent) == CEPH_NOSNAP && @@ -713,8 +734,9 @@ struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req, struct inode *inode = ceph_get_snapdir(parent); res = d_splice_alias(inode, dentry); - dout("ENOENT on snapdir %p '%pd', linking to snapdir %p. Spliced dentry %p\n", - dentry, dentry, inode, res); + doutc(cl, "ENOENT on snapdir %p '%pd', linking to " + "snapdir %p %llx.%llx. Spliced dentry %p\n", + dentry, dentry, inode, ceph_vinop(inode), res); if (res) dentry = res; } @@ -735,12 +757,15 @@ struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req, struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, struct dentry *dentry, int err) { + struct ceph_client *cl = req->r_mdsc->fsc->client; + if (err == -ENOENT) { /* no trace? */ err = 0; if (!req->r_reply_info.head->is_dentry) { - dout("ENOENT and no trace, dentry %p inode %p\n", - dentry, d_inode(dentry)); + doutc(cl, + "ENOENT and no trace, dentry %p inode %llx.%llx\n", + dentry, ceph_vinop(d_inode(dentry))); if (d_really_is_positive(dentry)) { d_drop(dentry); err = -ENOENT; @@ -773,13 +798,14 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb); struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_request *req; int op; int mask; int err; - dout("lookup %p dentry %p '%pd'\n", - dir, dentry, dentry); + doutc(cl, "%p %llx.%llx/'%pd' dentry %p\n", dir, ceph_vinop(dir), + dentry, dentry); if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); @@ -802,7 +828,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, struct ceph_dentry_info *di = ceph_dentry(dentry); spin_lock(&ci->i_ceph_lock); - dout(" dir %p flags are 0x%lx\n", dir, ci->i_ceph_flags); + doutc(cl, " dir %llx.%llx flags are 0x%lx\n", + ceph_vinop(dir), ci->i_ceph_flags); if (strncmp(dentry->d_name.name, fsc->mount_options->snapdir_name, dentry->d_name.len) && @@ -812,7 +839,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, __ceph_caps_issued_mask_metric(ci, CEPH_CAP_FILE_SHARED, 1)) { __ceph_touch_fmode(ci, mdsc, CEPH_FILE_MODE_RD); spin_unlock(&ci->i_ceph_lock); - dout(" dir %p complete, -ENOENT\n", dir); + doutc(cl, " dir %llx.%llx complete, -ENOENT\n", + ceph_vinop(dir)); d_add(dentry, NULL); di->lease_shared_gen = atomic_read(&ci->i_shared_gen); return NULL; @@ -850,7 +878,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, } dentry = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); /* will dput(dentry) */ - dout("lookup result=%p\n", dentry); + doutc(cl, "result=%p\n", dentry); return dentry; } @@ -885,6 +913,7 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct ceph_acl_sec_ctx as_ctx = {}; int err; @@ -901,8 +930,8 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir, goto out; } - dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n", - dir, dentry, mode, rdev); + doutc(cl, "%p %llx.%llx/'%pd' dentry %p mode 0%ho rdev %d\n", + dir, ceph_vinop(dir), dentry, dentry, mode, rdev); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -993,6 +1022,7 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *dest) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct ceph_acl_sec_ctx as_ctx = {}; umode_t mode = S_IFLNK | 0777; @@ -1010,7 +1040,8 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir, goto out; } - dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); + doutc(cl, "%p %llx.%llx/'%pd' to '%s'\n", dir, ceph_vinop(dir), dentry, + dest); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1064,6 +1095,7 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct ceph_acl_sec_ctx as_ctx = {}; int err; @@ -1076,10 +1108,11 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir, if (ceph_snap(dir) == CEPH_SNAPDIR) { /* mkdir .snap/foo is a MKSNAP */ op = CEPH_MDS_OP_MKSNAP; - dout("mksnap dir %p snap '%pd' dn %p\n", dir, - dentry, dentry); + doutc(cl, "mksnap %llx.%llx/'%pd' dentry %p\n", + ceph_vinop(dir), dentry, dentry); } else if (ceph_snap(dir) == CEPH_NOSNAP) { - dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode); + doutc(cl, "mkdir %llx.%llx/'%pd' dentry %p mode 0%ho\n", + ceph_vinop(dir), dentry, dentry, mode); op = CEPH_MDS_OP_MKDIR; } else { err = -EROFS; @@ -1144,6 +1177,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; int err; @@ -1161,8 +1195,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, if (err) return err; - dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n", - dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry); + doutc(cl, "%p %llx.%llx/'%pd' to '%pd'\n", dir, ceph_vinop(dir), + old_dentry, dentry); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS); if (IS_ERR(req)) { d_drop(dentry); @@ -1200,13 +1234,15 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, { struct dentry *dentry = req->r_dentry; struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb); + struct ceph_client *cl = fsc->client; struct ceph_dentry_info *di = ceph_dentry(dentry); int result = req->r_err ? req->r_err : le32_to_cpu(req->r_reply_info.head->result); if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags)) - pr_warn("%s dentry %p:%pd async unlink bit is not set\n", - __func__, dentry, dentry); + pr_warn_client(cl, + "dentry %p:%pd async unlink bit is not set\n", + dentry, dentry); spin_lock(&fsc->async_unlink_conflict_lock); hash_del_rcu(&di->hnode); @@ -1240,8 +1276,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, /* mark inode itself for an error (since metadata is bogus) */ mapping_set_error(req->r_old_inode->i_mapping, result); - pr_warn("async unlink failure path=(%llx)%s result=%d!\n", - base, IS_ERR(path) ? "<>" : path, result); + pr_warn_client(cl, "failure path=(%llx)%s result=%d!\n", + base, IS_ERR(path) ? "<>" : path, result); ceph_mdsc_free_path(path, pathlen); } out: @@ -1291,6 +1327,7 @@ static int get_caps_for_async_unlink(struct inode *dir, struct dentry *dentry) static int ceph_unlink(struct inode *dir, struct dentry *dentry) { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = d_inode(dentry); struct ceph_mds_request *req; @@ -1300,11 +1337,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) if (ceph_snap(dir) == CEPH_SNAPDIR) { /* rmdir .snap/foo is RMSNAP */ - dout("rmsnap dir %p '%pd' dn %p\n", dir, dentry, dentry); + doutc(cl, "rmsnap %llx.%llx/'%pd' dn\n", ceph_vinop(dir), + dentry); op = CEPH_MDS_OP_RMSNAP; } else if (ceph_snap(dir) == CEPH_NOSNAP) { - dout("unlink/rmdir dir %p dn %p inode %p\n", - dir, dentry, inode); + doutc(cl, "unlink/rmdir %llx.%llx/'%pd' inode %llx.%llx\n", + ceph_vinop(dir), dentry, ceph_vinop(inode)); op = d_is_dir(dentry) ? CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; } else @@ -1327,9 +1365,9 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) (req->r_dir_caps = get_caps_for_async_unlink(dir, dentry))) { struct ceph_dentry_info *di = ceph_dentry(dentry); - dout("async unlink on %llu/%.*s caps=%s", ceph_ino(dir), - dentry->d_name.len, dentry->d_name.name, - ceph_cap_string(req->r_dir_caps)); + doutc(cl, "async unlink on %llx.%llx/'%pd' caps=%s", + ceph_vinop(dir), dentry, + ceph_cap_string(req->r_dir_caps)); set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags); req->r_callback = ceph_async_unlink_cb; req->r_old_inode = d_inode(dentry); @@ -1384,6 +1422,7 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *new_dentry, unsigned int flags) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; int op = CEPH_MDS_OP_RENAME; int err; @@ -1413,8 +1452,9 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (err) return err; - dout("rename dir %p dentry %p to dir %p dentry %p\n", - old_dir, old_dentry, new_dir, new_dentry); + doutc(cl, "%llx.%llx/'%pd' to %llx.%llx/'%pd'\n", + ceph_vinop(old_dir), old_dentry, ceph_vinop(new_dir), + new_dentry); req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) return PTR_ERR(req); @@ -1459,9 +1499,10 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir, void __ceph_dentry_lease_touch(struct ceph_dentry_info *di) { struct dentry *dn = di->dentry; - struct ceph_mds_client *mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; - dout("dentry_lease_touch %p %p '%pd'\n", di, dn, dn); + doutc(cl, "%p %p '%pd'\n", di, dn, dn); di->flags |= CEPH_DENTRY_LEASE_LIST; if (di->flags & CEPH_DENTRY_SHRINK_LIST) { @@ -1469,7 +1510,6 @@ void __ceph_dentry_lease_touch(struct ceph_dentry_info *di) return; } - mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_list_lock); list_move_tail(&di->lease_list, &mdsc->dentry_leases); spin_unlock(&mdsc->dentry_list_lock); @@ -1493,10 +1533,10 @@ static void __dentry_dir_lease_touch(struct ceph_mds_client* mdsc, void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di) { struct dentry *dn = di->dentry; - struct ceph_mds_client *mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; - dout("dentry_dir_lease_touch %p %p '%pd' (offset 0x%llx)\n", - di, dn, dn, di->offset); + doutc(cl, "%p %p '%pd' (offset 0x%llx)\n", di, dn, dn, di->offset); if (!list_empty(&di->lease_list)) { if (di->flags & CEPH_DENTRY_LEASE_LIST) { @@ -1516,7 +1556,6 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di) return; } - mdsc = ceph_sb_to_fs_client(dn->d_sb)->mdsc; spin_lock(&mdsc->dentry_list_lock); __dentry_dir_lease_touch(mdsc, di), spin_unlock(&mdsc->dentry_list_lock); @@ -1757,6 +1796,8 @@ static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags) { struct ceph_dentry_info *di; struct ceph_mds_session *session = NULL; + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dentry->d_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; u32 seq = 0; int valid = 0; @@ -1789,7 +1830,7 @@ static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags) CEPH_MDS_LEASE_RENEW, seq); ceph_put_mds_session(session); } - dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid); + doutc(cl, "dentry %p = %d\n", dentry, valid); return valid; } @@ -1832,6 +1873,7 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, struct ceph_mds_client *mdsc) { struct ceph_inode_info *ci = ceph_inode(dir); + struct ceph_client *cl = mdsc->fsc->client; int valid; int shared_gen; @@ -1853,8 +1895,9 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, valid = 0; spin_unlock(&dentry->d_lock); } - dout("dir_lease_is_valid dir %p v%u dentry %p = %d\n", - dir, (unsigned)atomic_read(&ci->i_shared_gen), dentry, valid); + doutc(cl, "dir %p %llx.%llx v%u dentry %p '%pd' = %d\n", dir, + ceph_vinop(dir), (unsigned)atomic_read(&ci->i_shared_gen), + dentry, dentry, valid); return valid; } @@ -1863,10 +1906,11 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, */ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) { + struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dentry->d_sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; int valid = 0; struct dentry *parent; struct inode *dir, *inode; - struct ceph_mds_client *mdsc; valid = fscrypt_d_revalidate(dentry, flags); if (valid <= 0) @@ -1884,16 +1928,16 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) inode = d_inode(dentry); } - dout("d_revalidate %p '%pd' inode %p offset 0x%llx nokey %d\n", dentry, - dentry, inode, ceph_dentry(dentry)->offset, - !!(dentry->d_flags & DCACHE_NOKEY_NAME)); + doutc(cl, "%p '%pd' inode %p offset 0x%llx nokey %d\n", + dentry, dentry, inode, ceph_dentry(dentry)->offset, + !!(dentry->d_flags & DCACHE_NOKEY_NAME)); mdsc = ceph_sb_to_fs_client(dir->i_sb)->mdsc; /* always trust cached snapped dentries, snapdir dentry */ if (ceph_snap(dir) != CEPH_NOSNAP) { - dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry, - dentry, inode); + doutc(cl, "%p '%pd' inode %p is SNAPPED\n", dentry, + dentry, inode); valid = 1; } else if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { valid = 1; @@ -1948,14 +1992,14 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) break; } ceph_mdsc_put_request(req); - dout("d_revalidate %p lookup result=%d\n", - dentry, err); + doutc(cl, "%p '%pd', lookup result=%d\n", dentry, + dentry, err); } } else { percpu_counter_inc(&mdsc->metric.d_lease_hit); } - dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid"); + doutc(cl, "%p '%pd' %s\n", dentry, dentry, valid ? "valid" : "invalid"); if (!valid) ceph_dir_clear_complete(dir); @@ -1997,7 +2041,7 @@ static void ceph_d_release(struct dentry *dentry) struct ceph_dentry_info *di = ceph_dentry(dentry); struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb); - dout("d_release %p\n", dentry); + doutc(fsc->client, "dentry %p '%pd'\n", dentry, dentry); atomic64_dec(&fsc->mdsc->metric.total_dentries); @@ -2018,10 +2062,12 @@ static void ceph_d_release(struct dentry *dentry) */ static void ceph_d_prune(struct dentry *dentry) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dentry->d_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *dir_ci; struct ceph_dentry_info *di; - dout("ceph_d_prune %pd %p\n", dentry, dentry); + doutc(cl, "dentry %p '%pd'\n", dentry, dentry); /* do we have a valid parent? */ if (IS_ROOT(dentry)) diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 52c4daf2447d..726af69d4d62 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -36,6 +36,7 @@ struct ceph_nfs_snapfh { static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len, struct inode *parent_inode) { + struct ceph_client *cl = ceph_inode_to_client(inode); static const int snap_handle_length = sizeof(struct ceph_nfs_snapfh) >> 2; struct ceph_nfs_snapfh *sfh = (void *)rawfh; @@ -79,13 +80,14 @@ static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len, *max_len = snap_handle_length; ret = FILEID_BTRFS_WITH_PARENT; out: - dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret); + doutc(cl, "%p %llx.%llx ret=%d\n", inode, ceph_vinop(inode), ret); return ret; } static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, struct inode *parent_inode) { + struct ceph_client *cl = ceph_inode_to_client(inode); static const int handle_length = sizeof(struct ceph_nfs_fh) >> 2; static const int connected_handle_length = @@ -105,15 +107,15 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, if (parent_inode) { struct ceph_nfs_confh *cfh = (void *)rawfh; - dout("encode_fh %llx with parent %llx\n", - ceph_ino(inode), ceph_ino(parent_inode)); + doutc(cl, "%p %llx.%llx with parent %p %llx.%llx\n", inode, + ceph_vinop(inode), parent_inode, ceph_vinop(parent_inode)); cfh->ino = ceph_ino(inode); cfh->parent_ino = ceph_ino(parent_inode); *max_len = connected_handle_length; type = FILEID_INO32_GEN_PARENT; } else { struct ceph_nfs_fh *fh = (void *)rawfh; - dout("encode_fh %llx\n", ceph_ino(inode)); + doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); fh->ino = ceph_ino(inode); *max_len = handle_length; type = FILEID_INO32_GEN; @@ -206,6 +208,7 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb, bool want_parent) { struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct inode *inode; struct ceph_vino vino; @@ -278,11 +281,10 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb, ceph_mdsc_put_request(req); if (want_parent) { - dout("snapfh_to_parent %llx.%llx\n err=%d\n", - vino.ino, vino.snap, err); + doutc(cl, "%llx.%llx\n err=%d\n", vino.ino, vino.snap, err); } else { - dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d", - vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err); + doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino, + vino.snap, sfh->parent_ino, sfh->hash, err); } if (IS_ERR(inode)) return ERR_CAST(inode); @@ -297,6 +299,7 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); struct ceph_nfs_fh *fh = (void *)fid->raw; if (fh_type == FILEID_BTRFS_WITH_PARENT) { @@ -310,7 +313,7 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, if (fh_len < sizeof(*fh) / 4) return NULL; - dout("fh_to_dentry %llx\n", fh->ino); + doutc(fsc->client, "%llx\n", fh->ino); return __fh_to_dentry(sb, fh->ino); } @@ -363,6 +366,7 @@ static struct dentry *__get_parent(struct super_block *sb, static struct dentry *ceph_get_parent(struct dentry *child) { struct inode *inode = d_inode(child); + struct ceph_client *cl = ceph_inode_to_client(inode); struct dentry *dn; if (ceph_snap(inode) != CEPH_NOSNAP) { @@ -402,8 +406,8 @@ static struct dentry *ceph_get_parent(struct dentry *child) dn = __get_parent(child->d_sb, child, 0); } out: - dout("get_parent %p ino %llx.%llx err=%ld\n", - child, ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn)); + doutc(cl, "child %p %p %llx.%llx err=%ld\n", child, inode, + ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn)); return dn; } @@ -414,6 +418,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); struct ceph_nfs_confh *cfh = (void *)fid->raw; struct dentry *dentry; @@ -427,7 +432,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, if (fh_len < sizeof(*cfh) / 4) return NULL; - dout("fh_to_parent %llx\n", cfh->parent_ino); + doutc(fsc->client, "%llx\n", cfh->parent_ino); dentry = __get_parent(sb, NULL, cfh->ino); if (unlikely(dentry == ERR_PTR(-ENOENT))) dentry = __fh_to_dentry(sb, cfh->parent_ino); @@ -526,8 +531,8 @@ static int __get_snap_name(struct dentry *parent, char *name, if (req) ceph_mdsc_put_request(req); kfree(last_name); - dout("get_snap_name %p ino %llx.%llx err=%d\n", - child, ceph_vinop(inode), err); + doutc(fsc->client, "child dentry %p %p %llx.%llx err=%d\n", child, + inode, ceph_vinop(inode), err); return err; } @@ -588,9 +593,9 @@ static int ceph_get_name(struct dentry *parent, char *name, ceph_fname_free_buffer(dir, &oname); } out: - dout("get_name %p ino %llx.%llx err %d %s%s\n", - child, ceph_vinop(inode), err, - err ? "" : "name ", err ? "" : name); + doutc(mdsc->fsc->client, "child dentry %p %p %llx.%llx err %d %s%s\n", + child, inode, ceph_vinop(inode), err, err ? "" : "name ", + err ? "" : name); ceph_mdsc_put_request(req); return err; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index c74428099d6d..f60164d9e0c8 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -19,8 +19,9 @@ #include "io.h" #include "metric.h" -static __le32 ceph_flags_sys2wire(u32 flags) +static __le32 ceph_flags_sys2wire(struct ceph_mds_client *mdsc, u32 flags) { + struct ceph_client *cl = mdsc->fsc->client; u32 wire_flags = 0; switch (flags & O_ACCMODE) { @@ -48,7 +49,7 @@ static __le32 ceph_flags_sys2wire(u32 flags) #undef ceph_sys2wire if (flags) - dout("unused open flags: %x\n", flags); + doutc(cl, "unused open flags: %x\n", flags); return cpu_to_le32(wire_flags); } @@ -189,7 +190,7 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode) if (IS_ERR(req)) goto out; req->r_fmode = ceph_flags_to_mode(flags); - req->r_args.open.flags = ceph_flags_sys2wire(flags); + req->r_args.open.flags = ceph_flags_sys2wire(mdsc, flags); req->r_args.open.mode = cpu_to_le32(create_mode); out: return req; @@ -201,11 +202,12 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mount_options *opt = ceph_inode_to_fs_client(&ci->netfs.inode)->mount_options; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_file_info *fi; int ret; - dout("%s %p %p 0%o (%s)\n", __func__, inode, file, - inode->i_mode, isdir ? "dir" : "regular"); + doutc(cl, "%p %llx.%llx %p 0%o (%s)\n", inode, ceph_vinop(inode), + file, inode->i_mode, isdir ? "dir" : "regular"); BUG_ON(inode->i_fop->release != ceph_release); if (isdir) { @@ -259,6 +261,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, */ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) { + struct ceph_client *cl = ceph_inode_to_client(inode); int ret = 0; switch (inode->i_mode & S_IFMT) { @@ -271,13 +274,13 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) break; case S_IFLNK: - dout("init_file %p %p 0%o (symlink)\n", inode, file, - inode->i_mode); + doutc(cl, "%p %llx.%llx %p 0%o (symlink)\n", inode, + ceph_vinop(inode), file, inode->i_mode); break; default: - dout("init_file %p %p 0%o (special)\n", inode, file, - inode->i_mode); + doutc(cl, "%p %llx.%llx %p 0%o (special)\n", inode, + ceph_vinop(inode), file, inode->i_mode); /* * we need to drop the open ref now, since we don't * have .release set to ceph_release. @@ -296,6 +299,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) int ceph_renew_caps(struct inode *inode, int fmode) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req; int err, flags, wanted; @@ -307,8 +311,9 @@ int ceph_renew_caps(struct inode *inode, int fmode) (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) { int issued = __ceph_caps_issued(ci, NULL); spin_unlock(&ci->i_ceph_lock); - dout("renew caps %p want %s issued %s updating mds_wanted\n", - inode, ceph_cap_string(wanted), ceph_cap_string(issued)); + doutc(cl, "%p %llx.%llx want %s issued %s updating mds_wanted\n", + inode, ceph_vinop(inode), ceph_cap_string(wanted), + ceph_cap_string(issued)); ceph_check_caps(ci, 0); return 0; } @@ -339,7 +344,8 @@ int ceph_renew_caps(struct inode *inode, int fmode) err = ceph_mdsc_do_request(mdsc, NULL, req); ceph_mdsc_put_request(req); out: - dout("renew caps %p open result=%d\n", inode, err); + doutc(cl, "%p %llx.%llx open result=%d\n", inode, ceph_vinop(inode), + err); return err < 0 ? err : 0; } @@ -353,6 +359,7 @@ int ceph_open(struct inode *inode, struct file *file) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct ceph_file_info *fi = file->private_data; @@ -360,7 +367,7 @@ int ceph_open(struct inode *inode, struct file *file) int flags, fmode, wanted; if (fi) { - dout("open file %p is already opened\n", file); + doutc(cl, "file %p is already opened\n", file); return 0; } @@ -374,8 +381,8 @@ int ceph_open(struct inode *inode, struct file *file) return err; } - dout("open inode %p ino %llx.%llx file %p flags %d (%d)\n", inode, - ceph_vinop(inode), file, flags, file->f_flags); + doutc(cl, "%p %llx.%llx file %p flags %d (%d)\n", inode, + ceph_vinop(inode), file, flags, file->f_flags); fmode = ceph_flags_to_mode(flags); wanted = ceph_caps_for_mode(fmode); @@ -399,9 +406,9 @@ int ceph_open(struct inode *inode, struct file *file) int mds_wanted = __ceph_caps_mds_wanted(ci, true); int issued = __ceph_caps_issued(ci, NULL); - dout("open %p fmode %d want %s issued %s using existing\n", - inode, fmode, ceph_cap_string(wanted), - ceph_cap_string(issued)); + doutc(cl, "open %p fmode %d want %s issued %s using existing\n", + inode, fmode, ceph_cap_string(wanted), + ceph_cap_string(issued)); __ceph_touch_fmode(ci, mdsc, fmode); spin_unlock(&ci->i_ceph_lock); @@ -421,7 +428,7 @@ int ceph_open(struct inode *inode, struct file *file) spin_unlock(&ci->i_ceph_lock); - dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted)); + doutc(cl, "open fmode %d wants %s\n", fmode, ceph_cap_string(wanted)); req = prepare_open_request(inode->i_sb, flags, 0); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -435,7 +442,7 @@ int ceph_open(struct inode *inode, struct file *file) if (!err) err = ceph_init_file(inode, file, req->r_fmode); ceph_mdsc_put_request(req); - dout("open result=%d on %llx.%llx\n", err, ceph_vinop(inode)); + doutc(cl, "open result=%d on %llx.%llx\n", err, ceph_vinop(inode)); out: return err; } @@ -515,6 +522,7 @@ static int try_prep_async_create(struct inode *dir, struct dentry *dentry, static void restore_deleg_ino(struct inode *dir, u64 ino) { + struct ceph_client *cl = ceph_inode_to_client(dir); struct ceph_inode_info *ci = ceph_inode(dir); struct ceph_mds_session *s = NULL; @@ -525,7 +533,8 @@ static void restore_deleg_ino(struct inode *dir, u64 ino) if (s) { int err = ceph_restore_deleg_ino(s, ino); if (err) - pr_warn("ceph: unable to restore delegated ino 0x%llx to session: %d\n", + pr_warn_client(cl, + "unable to restore delegated ino 0x%llx to session: %d\n", ino, err); ceph_put_mds_session(s); } @@ -557,6 +566,7 @@ static void wake_async_create_waiters(struct inode *inode, static void ceph_async_create_cb(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { + struct ceph_client *cl = mdsc->fsc->client; struct dentry *dentry = req->r_dentry; struct inode *dinode = d_inode(dentry); struct inode *tinode = req->r_target_inode; @@ -577,7 +587,8 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen, &base, 0); - pr_warn("async create failure path=(%llx)%s result=%d!\n", + pr_warn_client(cl, + "async create failure path=(%llx)%s result=%d!\n", base, IS_ERR(path) ? "<>" : path, result); ceph_mdsc_free_path(path, pathlen); @@ -596,14 +607,15 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, u64 ino = ceph_vino(tinode).ino; if (req->r_deleg_ino != ino) - pr_warn("%s: inode number mismatch! err=%d deleg_ino=0x%llx target=0x%llx\n", - __func__, req->r_err, req->r_deleg_ino, ino); + pr_warn_client(cl, + "inode number mismatch! err=%d deleg_ino=0x%llx target=0x%llx\n", + req->r_err, req->r_deleg_ino, ino); mapping_set_error(tinode->i_mapping, result); wake_async_create_waiters(tinode, req->r_session); } else if (!result) { - pr_warn("%s: no req->r_target_inode for 0x%llx\n", __func__, - req->r_deleg_ino); + pr_warn_client(cl, "no req->r_target_inode for 0x%llx\n", + req->r_deleg_ino); } out: ceph_mdsc_release_dir_caps(req); @@ -625,6 +637,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, struct timespec64 now; struct ceph_string *pool_ns; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_vino vino = { .ino = req->r_deleg_ino, .snap = CEPH_NOSNAP }; @@ -683,7 +696,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, req->r_fmode, NULL); up_read(&mdsc->snap_rwsem); if (ret) { - dout("%s failed to fill inode: %d\n", __func__, ret); + doutc(cl, "failed to fill inode: %d\n", ret); ceph_dir_clear_complete(dir); if (!d_unhashed(dentry)) d_drop(dentry); @@ -691,8 +704,8 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, } else { struct dentry *dn; - dout("%s d_adding new inode 0x%llx to 0x%llx/%s\n", __func__, - vino.ino, ceph_ino(dir), dentry->d_name.name); + doutc(cl, "d_adding new inode 0x%llx to 0x%llx/%s\n", + vino.ino, ceph_ino(dir), dentry->d_name.name); ceph_dir_clear_ordered(dir); ceph_init_inode_acls(inode, as_ctx); if (inode->i_state & I_NEW) { @@ -731,6 +744,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode) { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct inode *new_inode = NULL; @@ -740,9 +754,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, int mask; int err; - dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n", - dir, dentry, dentry, - d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); + doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n", + dir, ceph_vinop(dir), dentry, dentry, + d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); if (dentry->d_name.len > NAME_MAX) return -ENAMETOOLONG; @@ -880,17 +894,18 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, goto out_req; if (dn || d_really_is_negative(dentry) || d_is_symlink(dentry)) { /* make vfs retry on splice, ENOENT, or symlink */ - dout("atomic_open finish_no_open on dn %p\n", dn); + doutc(cl, "finish_no_open on dn %p\n", dn); err = finish_no_open(file, dn); } else { if (IS_ENCRYPTED(dir) && !fscrypt_has_permitted_context(dir, d_inode(dentry))) { - pr_warn("Inconsistent encryption context (parent %llx:%llx child %llx:%llx)\n", + pr_warn_client(cl, + "Inconsistent encryption context (parent %llx:%llx child %llx:%llx)\n", ceph_vinop(dir), ceph_vinop(d_inode(dentry))); goto out_req; } - dout("atomic_open finish_open on dn %p\n", dn); + doutc(cl, "finish_open on dn %p\n", dn); if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { struct inode *newino = d_inode(dentry); @@ -905,17 +920,19 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, iput(new_inode); out_ctx: ceph_release_acl_sec_ctx(&as_ctx); - dout("atomic_open result=%d\n", err); + doutc(cl, "result=%d\n", err); return err; } int ceph_release(struct inode *inode, struct file *file) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); if (S_ISDIR(inode->i_mode)) { struct ceph_dir_file_info *dfi = file->private_data; - dout("release inode %p dir file %p\n", inode, file); + doutc(cl, "%p %llx.%llx dir file %p\n", inode, + ceph_vinop(inode), file); WARN_ON(!list_empty(&dfi->file_info.rw_contexts)); ceph_put_fmode(ci, dfi->file_info.fmode, 1); @@ -927,7 +944,8 @@ int ceph_release(struct inode *inode, struct file *file) kmem_cache_free(ceph_dir_file_cachep, dfi); } else { struct ceph_file_info *fi = file->private_data; - dout("release inode %p regular file %p\n", inode, file); + doutc(cl, "%p %llx.%llx regular file %p\n", inode, + ceph_vinop(inode), file); WARN_ON(!list_empty(&fi->rw_contexts)); ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE); @@ -963,6 +981,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_client *osdc = &fsc->client->osdc; ssize_t ret; u64 off = *ki_pos; @@ -971,7 +990,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); u64 objver = 0; - dout("sync_read on inode %p %llx~%llx\n", inode, *ki_pos, len); + doutc(cl, "on inode %p %llx.%llx %llx~%llx\n", inode, + ceph_vinop(inode), *ki_pos, len); if (ceph_inode_is_shutdown(inode)) return -EIO; @@ -1005,8 +1025,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, /* determine new offset/length if encrypted */ ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len); - dout("sync_read orig %llu~%llu reading %llu~%llu", - off, len, read_off, read_len); + doutc(cl, "orig %llu~%llu reading %llu~%llu", off, len, + read_off, read_len); req = ceph_osdc_new_request(osdc, &ci->i_layout, ci->i_vino, read_off, &read_len, 0, 1, @@ -1059,8 +1079,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, objver = req->r_version; i_size = i_size_read(inode); - dout("sync_read %llu~%llu got %zd i_size %llu%s\n", - off, len, ret, i_size, (more ? " MORE" : "")); + doutc(cl, "%llu~%llu got %zd i_size %llu%s\n", off, len, + ret, i_size, (more ? " MORE" : "")); /* Fix it to go to end of extent map */ if (sparse && ret >= 0) @@ -1101,8 +1121,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, int zlen = min(len - ret, i_size - off - ret); int zoff = page_off + ret; - dout("sync_read zero gap %llu~%llu\n", - off + ret, off + ret + zlen); + doutc(cl, "zero gap %llu~%llu\n", off + ret, + off + ret + zlen); ceph_zero_page_vector_range(zoff, zlen, pages); ret += zlen; } @@ -1151,7 +1171,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, if (last_objver) *last_objver = objver; } - dout("sync_read result %zd retry_op %d\n", ret, *retry_op); + doutc(cl, "result %zd retry_op %d\n", ret, *retry_op); return ret; } @@ -1160,9 +1180,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + struct ceph_client *cl = ceph_inode_to_client(inode); - dout("sync_read on file %p %llx~%zx %s\n", file, iocb->ki_pos, - iov_iter_count(to), (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); + doutc(cl, "on file %p %llx~%zx %s\n", file, iocb->ki_pos, + iov_iter_count(to), + (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op, NULL); } @@ -1190,6 +1212,7 @@ static void ceph_aio_retry_work(struct work_struct *work); static void ceph_aio_complete(struct inode *inode, struct ceph_aio_request *aio_req) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); int ret; @@ -1203,7 +1226,7 @@ static void ceph_aio_complete(struct inode *inode, if (!ret) ret = aio_req->total_len; - dout("ceph_aio_complete %p rc %d\n", inode, ret); + doutc(cl, "%p %llx.%llx rc %d\n", inode, ceph_vinop(inode), ret); if (ret >= 0 && aio_req->write) { int dirty; @@ -1242,11 +1265,13 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) struct ceph_client_metric *metric = &ceph_sb_to_mdsc(inode->i_sb)->metric; unsigned int len = osd_data->bvec_pos.iter.bi_size; bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ); + struct ceph_client *cl = ceph_inode_to_client(inode); BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS); BUG_ON(!osd_data->num_bvecs); - dout("ceph_aio_complete_req %p rc %d bytes %u\n", inode, rc, len); + doutc(cl, "req %p inode %p %llx.%llx, rc %d bytes %u\n", req, + inode, ceph_vinop(inode), rc, len); if (rc == -EOLDSNAPC) { struct ceph_aio_work *aio_work; @@ -1387,6 +1412,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_client_metric *metric = &fsc->mdsc->metric; struct ceph_vino vino; struct ceph_osd_request *req; @@ -1405,9 +1431,9 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; - dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n", - (write ? "write" : "read"), file, pos, (unsigned)count, - snapc, snapc ? snapc->seq : 0); + doutc(cl, "sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n", + (write ? "write" : "read"), file, pos, (unsigned)count, + snapc, snapc ? snapc->seq : 0); if (write) { int ret2; @@ -1418,7 +1444,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, pos >> PAGE_SHIFT, (pos + count - 1) >> PAGE_SHIFT); if (ret2 < 0) - dout("invalidate_inode_pages2_range returned %d\n", ret2); + doutc(cl, "invalidate_inode_pages2_range returned %d\n", + ret2); flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE; } else { @@ -1611,6 +1638,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_osd_request *req; struct page **pages; @@ -1625,8 +1653,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; - dout("sync_write on file %p %lld~%u snapc %p seq %lld\n", - file, pos, (unsigned)count, snapc, snapc->seq); + doutc(cl, "on file %p %lld~%u snapc %p seq %lld\n", file, pos, + (unsigned)count, snapc, snapc->seq); ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count - 1); @@ -1670,9 +1698,9 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, last = (pos + len) != (write_pos + write_len); rmw = first || last; - dout("sync_write ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n", - ci->i_vino.ino, pos, len, write_pos, write_len, - rmw ? "" : "no "); + doutc(cl, "ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n", + ci->i_vino.ino, pos, len, write_pos, write_len, + rmw ? "" : "no "); /* * The data is emplaced into the page as it would be if it were @@ -1881,7 +1909,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, left -= ret; } if (ret < 0) { - dout("sync_write write failed with %d\n", ret); + doutc(cl, "write failed with %d\n", ret); ceph_release_page_vector(pages, num_pages); break; } @@ -1891,7 +1919,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, write_pos, write_len, GFP_KERNEL); if (ret < 0) { - dout("encryption failed with %d\n", ret); + doutc(cl, "encryption failed with %d\n", ret); ceph_release_page_vector(pages, num_pages); break; } @@ -1910,7 +1938,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, break; } - dout("sync_write write op %lld~%llu\n", write_pos, write_len); + doutc(cl, "write op %lld~%llu\n", write_pos, write_len); osd_req_op_extent_osd_data_pages(req, rmw ? 1 : 0, pages, write_len, offset_in_page(write_pos), false, true); @@ -1941,7 +1969,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, req->r_end_latency, len, ret); ceph_osdc_put_request(req); if (ret != 0) { - dout("sync_write osd write returned %d\n", ret); + doutc(cl, "osd write returned %d\n", ret); /* Version changed! Must re-do the rmw cycle */ if ((assert_ver && (ret == -ERANGE || ret == -EOVERFLOW)) || (!assert_ver && ret == -EEXIST)) { @@ -1971,13 +1999,13 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, pos >> PAGE_SHIFT, (pos + len - 1) >> PAGE_SHIFT); if (ret < 0) { - dout("invalidate_inode_pages2_range returned %d\n", - ret); + doutc(cl, "invalidate_inode_pages2_range returned %d\n", + ret); ret = 0; } pos += len; written += len; - dout("sync_write written %d\n", written); + doutc(cl, "written %d\n", written); if (pos > i_size_read(inode)) { check_caps = ceph_inode_set_size(inode, pos); if (check_caps) @@ -1991,7 +2019,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, ret = written; iocb->ki_pos = pos; } - dout("sync_write returning %d\n", ret); + doutc(cl, "returning %d\n", ret); return ret; } @@ -2010,13 +2038,14 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); bool direct_lock = iocb->ki_flags & IOCB_DIRECT; + struct ceph_client *cl = ceph_inode_to_client(inode); ssize_t ret; int want = 0, got = 0; int retry_op = 0, read = 0; again: - dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", - inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode); + doutc(cl, "%llu~%u trying to get caps on %p %llx.%llx\n", + iocb->ki_pos, (unsigned)len, inode, ceph_vinop(inode)); if (ceph_inode_is_shutdown(inode)) return -ESTALE; @@ -2044,9 +2073,9 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { - dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n", - inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, - ceph_cap_string(got)); + doutc(cl, "sync %p %llx.%llx %llu~%u got cap refs on %s\n", + inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, + ceph_cap_string(got)); if (!ceph_has_inline_data(ci)) { if (!retry_op && @@ -2064,16 +2093,16 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) } } else { CEPH_DEFINE_RW_CONTEXT(rw_ctx, got); - dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", - inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, - ceph_cap_string(got)); + doutc(cl, "async %p %llx.%llx %llu~%u got cap refs on %s\n", + inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, + ceph_cap_string(got)); ceph_add_rw_context(fi, &rw_ctx); ret = generic_file_read_iter(iocb, to); ceph_del_rw_context(fi, &rw_ctx); } - dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", - inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); + doutc(cl, "%p %llx.%llx dropping cap refs on %s = %d\n", + inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); ceph_put_cap_refs(ci, got); if (direct_lock) @@ -2133,8 +2162,8 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) /* hit EOF or hole? */ if (retry_op == CHECK_EOF && iocb->ki_pos < i_size && ret < len) { - dout("sync_read hit hole, ppos %lld < size %lld" - ", reading more\n", iocb->ki_pos, i_size); + doutc(cl, "hit hole, ppos %lld < size %lld, reading more\n", + iocb->ki_pos, i_size); read += ret; len -= ret; @@ -2229,6 +2258,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; @@ -2296,8 +2326,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) goto out; - dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n", - inode, ceph_vinop(inode), pos, count, i_size_read(inode)); + doutc(cl, "%p %llx.%llx %llu~%zd getting caps. i_size %llu\n", + inode, ceph_vinop(inode), pos, count, + i_size_read(inode)); if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) want |= CEPH_CAP_FILE_BUFFER; if (fi->fmode & CEPH_FILE_MODE_LAZY) @@ -2313,8 +2344,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_inc_iversion_raw(inode); - dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n", - inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); + doutc(cl, "%p %llx.%llx %llu~%zd got cap refs on %s\n", + inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC) || @@ -2374,14 +2405,14 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ceph_check_caps(ci, CHECK_CAPS_FLUSH); } - dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", - inode, ceph_vinop(inode), pos, (unsigned)count, - ceph_cap_string(got)); + doutc(cl, "%p %llx.%llx %llu~%u dropping cap refs on %s\n", + inode, ceph_vinop(inode), pos, (unsigned)count, + ceph_cap_string(got)); ceph_put_cap_refs(ci, got); if (written == -EOLDSNAPC) { - dout("aio_write %p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n", - inode, ceph_vinop(inode), pos, (unsigned)count); + doutc(cl, "%p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n", + inode, ceph_vinop(inode), pos, (unsigned)count); goto retry_snap; } @@ -2553,14 +2584,15 @@ static long ceph_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_cap_flush *prealloc_cf; + struct ceph_client *cl = ceph_inode_to_client(inode); int want, got = 0; int dirty; int ret = 0; loff_t endoff = 0; loff_t size; - dout("%s %p %llx.%llx mode %x, offset %llu length %llu\n", __func__, - inode, ceph_vinop(inode), mode, offset, length); + doutc(cl, "%p %llx.%llx mode %x, offset %llu length %llu\n", + inode, ceph_vinop(inode), mode, offset, length); if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; @@ -2689,6 +2721,7 @@ static void put_rd_wr_caps(struct ceph_inode_info *src_ci, int src_got, static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode, loff_t src_off, loff_t dst_off, size_t len) { + struct ceph_client *cl = ceph_inode_to_client(src_inode); loff_t size, endoff; size = i_size_read(src_inode); @@ -2699,8 +2732,8 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode, * inode. */ if (src_off + len > size) { - dout("Copy beyond EOF (%llu + %zu > %llu)\n", - src_off, len, size); + doutc(cl, "Copy beyond EOF (%llu + %zu > %llu)\n", src_off, + len, size); return -EOPNOTSUPP; } size = i_size_read(dst_inode); @@ -2776,6 +2809,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off u64 src_objnum, src_objoff, dst_objnum, dst_objoff; u32 src_objlen, dst_objlen; u32 object_size = src_ci->i_layout.object_size; + struct ceph_client *cl = fsc->client; int ret; src_oloc.pool = src_ci->i_layout.pool_id; @@ -2817,9 +2851,10 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off if (ret) { if (ret == -EOPNOTSUPP) { fsc->have_copy_from2 = false; - pr_notice("OSDs don't support copy-from2; disabling copy offload\n"); + pr_notice_client(cl, + "OSDs don't support copy-from2; disabling copy offload\n"); } - dout("ceph_osdc_copy_from returned %d\n", ret); + doutc(cl, "returned %d\n", ret); if (!bytes) bytes = ret; goto out; @@ -2846,6 +2881,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, struct ceph_inode_info *dst_ci = ceph_inode(dst_inode); struct ceph_cap_flush *prealloc_cf; struct ceph_fs_client *src_fsc = ceph_inode_to_fs_client(src_inode); + struct ceph_client *cl = src_fsc->client; loff_t size; ssize_t ret = -EIO, bytes; u64 src_objnum, dst_objnum, src_objoff, dst_objoff; @@ -2888,7 +2924,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, (src_ci->i_layout.stripe_count != 1) || (dst_ci->i_layout.stripe_count != 1) || (src_ci->i_layout.object_size != dst_ci->i_layout.object_size)) { - dout("Invalid src/dst files layout\n"); + doutc(cl, "Invalid src/dst files layout\n"); return -EOPNOTSUPP; } @@ -2906,12 +2942,12 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, /* Start by sync'ing the source and destination files */ ret = file_write_and_wait_range(src_file, src_off, (src_off + len)); if (ret < 0) { - dout("failed to write src file (%zd)\n", ret); + doutc(cl, "failed to write src file (%zd)\n", ret); goto out; } ret = file_write_and_wait_range(dst_file, dst_off, (dst_off + len)); if (ret < 0) { - dout("failed to write dst file (%zd)\n", ret); + doutc(cl, "failed to write dst file (%zd)\n", ret); goto out; } @@ -2923,7 +2959,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, err = get_rd_wr_caps(src_file, &src_got, dst_file, (dst_off + len), &dst_got); if (err < 0) { - dout("get_rd_wr_caps returned %d\n", err); + doutc(cl, "get_rd_wr_caps returned %d\n", err); ret = -EOPNOTSUPP; goto out; } @@ -2938,7 +2974,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, dst_off >> PAGE_SHIFT, (dst_off + len) >> PAGE_SHIFT); if (ret < 0) { - dout("Failed to invalidate inode pages (%zd)\n", ret); + doutc(cl, "Failed to invalidate inode pages (%zd)\n", + ret); ret = 0; /* XXX */ } ceph_calc_file_object_mapping(&src_ci->i_layout, src_off, @@ -2959,7 +2996,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, * starting at the src_off */ if (src_objoff) { - dout("Initial partial copy of %u bytes\n", src_objlen); + doutc(cl, "Initial partial copy of %u bytes\n", src_objlen); /* * we need to temporarily drop all caps as we'll be calling @@ -2970,7 +3007,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, &dst_off, src_objlen, flags); /* Abort on short copies or on error */ if (ret < (long)src_objlen) { - dout("Failed partial copy (%zd)\n", ret); + doutc(cl, "Failed partial copy (%zd)\n", ret); goto out; } len -= ret; @@ -2992,7 +3029,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, ret = bytes; goto out_caps; } - dout("Copied %zu bytes out of %zu\n", bytes, len); + doutc(cl, "Copied %zu bytes out of %zu\n", bytes, len); len -= bytes; ret += bytes; @@ -3020,13 +3057,13 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, * there were errors in remote object copies (len >= object_size). */ if (len && (len < src_ci->i_layout.object_size)) { - dout("Final partial copy of %zu bytes\n", len); + doutc(cl, "Final partial copy of %zu bytes\n", len); bytes = do_splice_direct(src_file, &src_off, dst_file, &dst_off, len, flags); if (bytes > 0) ret += bytes; else - dout("Failed partial copy (%zd)\n", bytes); + doutc(cl, "Failed partial copy (%zd)\n", bytes); } out: diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index db6977c15c28..808b5a69f028 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -129,6 +129,8 @@ void ceph_as_ctx_to_req(struct ceph_mds_request *req, struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino, struct inode *newino) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb); + struct ceph_client *cl = mdsc->fsc->client; struct inode *inode; if (ceph_vino_is_reserved(vino)) @@ -145,12 +147,13 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino, } if (!inode) { - dout("No inode found for %llx.%llx\n", vino.ino, vino.snap); + doutc(cl, "no inode found for %llx.%llx\n", vino.ino, vino.snap); return ERR_PTR(-ENOMEM); } - dout("get_inode on %llu=%llx.%llx got %p new %d\n", ceph_present_inode(inode), - ceph_vinop(inode), inode, !!(inode->i_state & I_NEW)); + doutc(cl, "on %llx=%llx.%llx got %p new %d\n", + ceph_present_inode(inode), ceph_vinop(inode), inode, + !!(inode->i_state & I_NEW)); return inode; } @@ -159,6 +162,7 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino, */ struct inode *ceph_get_snapdir(struct inode *parent) { + struct ceph_client *cl = ceph_inode_to_client(parent); struct ceph_vino vino = { .ino = ceph_ino(parent), .snap = CEPH_SNAPDIR, @@ -171,14 +175,14 @@ struct inode *ceph_get_snapdir(struct inode *parent) return inode; if (!S_ISDIR(parent->i_mode)) { - pr_warn_once("bad snapdir parent type (mode=0%o)\n", - parent->i_mode); + pr_warn_once_client(cl, "bad snapdir parent type (mode=0%o)\n", + parent->i_mode); goto err; } if (!(inode->i_state & I_NEW) && !S_ISDIR(inode->i_mode)) { - pr_warn_once("bad snapdir inode type (mode=0%o)\n", - inode->i_mode); + pr_warn_once_client(cl, "bad snapdir inode type (mode=0%o)\n", + inode->i_mode); goto err; } @@ -203,7 +207,7 @@ struct inode *ceph_get_snapdir(struct inode *parent) inode->i_flags |= S_ENCRYPTED; ci->fscrypt_auth_len = pci->fscrypt_auth_len; } else { - dout("Failed to alloc snapdir fscrypt_auth\n"); + doutc(cl, "Failed to alloc snapdir fscrypt_auth\n"); ret = -ENOMEM; goto err; } @@ -249,6 +253,8 @@ const struct inode_operations ceph_file_iops = { static struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci, u32 f) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); struct rb_node **p; struct rb_node *parent = NULL; struct ceph_inode_frag *frag; @@ -279,8 +285,7 @@ static struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci, rb_link_node(&frag->node, parent, p); rb_insert_color(&frag->node, &ci->i_fragtree); - dout("get_or_create_frag added %llx.%llx frag %x\n", - ceph_vinop(&ci->netfs.inode), f); + doutc(cl, "added %p %llx.%llx frag %x\n", inode, ceph_vinop(inode), f); return frag; } @@ -313,6 +318,7 @@ struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f) static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, struct ceph_inode_frag *pfrag, int *found) { + struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode); u32 t = ceph_frag_make(0, 0); struct ceph_inode_frag *frag; unsigned nway, i; @@ -336,8 +342,8 @@ static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, /* choose child */ nway = 1 << frag->split_by; - dout("choose_frag(%x) %x splits by %d (%d ways)\n", v, t, - frag->split_by, nway); + doutc(cl, "frag(%x) %x splits by %d (%d ways)\n", v, t, + frag->split_by, nway); for (i = 0; i < nway; i++) { n = ceph_frag_make_child(t, frag->split_by, i); if (ceph_frag_contains_value(n, v)) { @@ -347,7 +353,7 @@ static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, } BUG_ON(i == nway); } - dout("choose_frag(%x) = %x\n", v, t); + doutc(cl, "frag(%x) = %x\n", v, t); return t; } @@ -371,6 +377,7 @@ static int ceph_fill_dirfrag(struct inode *inode, struct ceph_mds_reply_dirfrag *dirinfo) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_frag *frag; u32 id = le32_to_cpu(dirinfo->frag); int mds = le32_to_cpu(dirinfo->auth); @@ -395,14 +402,14 @@ static int ceph_fill_dirfrag(struct inode *inode, goto out; if (frag->split_by == 0) { /* tree leaf, remove */ - dout("fill_dirfrag removed %llx.%llx frag %x" - " (no ref)\n", ceph_vinop(inode), id); + doutc(cl, "removed %p %llx.%llx frag %x (no ref)\n", + inode, ceph_vinop(inode), id); rb_erase(&frag->node, &ci->i_fragtree); kfree(frag); } else { /* tree branch, keep and clear */ - dout("fill_dirfrag cleared %llx.%llx frag %x" - " referral\n", ceph_vinop(inode), id); + doutc(cl, "cleared %p %llx.%llx frag %x referral\n", + inode, ceph_vinop(inode), id); frag->mds = -1; frag->ndist = 0; } @@ -415,8 +422,9 @@ static int ceph_fill_dirfrag(struct inode *inode, if (IS_ERR(frag)) { /* this is not the end of the world; we can continue with bad/inaccurate delegation info */ - pr_err("fill_dirfrag ENOMEM on mds ref %llx.%llx fg %x\n", - ceph_vinop(inode), le32_to_cpu(dirinfo->frag)); + pr_err_client(cl, "ENOMEM on mds ref %p %llx.%llx fg %x\n", + inode, ceph_vinop(inode), + le32_to_cpu(dirinfo->frag)); err = -ENOMEM; goto out; } @@ -425,8 +433,8 @@ static int ceph_fill_dirfrag(struct inode *inode, frag->ndist = min_t(u32, ndist, CEPH_MAX_DIRFRAG_REP); for (i = 0; i < frag->ndist; i++) frag->dist[i] = le32_to_cpu(dirinfo->dist[i]); - dout("fill_dirfrag %llx.%llx frag %x ndist=%d\n", - ceph_vinop(inode), frag->frag, frag->ndist); + doutc(cl, "%p %llx.%llx frag %x ndist=%d\n", inode, + ceph_vinop(inode), frag->frag, frag->ndist); out: mutex_unlock(&ci->i_fragtree_mutex); @@ -454,6 +462,7 @@ static int ceph_fill_fragtree(struct inode *inode, struct ceph_frag_tree_head *fragtree, struct ceph_mds_reply_dirfrag *dirinfo) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_frag *frag, *prev_frag = NULL; struct rb_node *rb_node; @@ -489,15 +498,15 @@ static int ceph_fill_fragtree(struct inode *inode, frag_tree_split_cmp, NULL); } - dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode)); + doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); rb_node = rb_first(&ci->i_fragtree); for (i = 0; i < nsplits; i++) { id = le32_to_cpu(fragtree->splits[i].frag); split_by = le32_to_cpu(fragtree->splits[i].by); if (split_by == 0 || ceph_frag_bits(id) + split_by > 24) { - pr_err("fill_fragtree %llx.%llx invalid split %d/%u, " - "frag %x split by %d\n", ceph_vinop(inode), - i, nsplits, id, split_by); + pr_err_client(cl, "%p %llx.%llx invalid split %d/%u, " + "frag %x split by %d\n", inode, + ceph_vinop(inode), i, nsplits, id, split_by); continue; } frag = NULL; @@ -529,7 +538,7 @@ static int ceph_fill_fragtree(struct inode *inode, if (frag->split_by == 0) ci->i_fragtree_nsplits++; frag->split_by = split_by; - dout(" frag %x split by %d\n", frag->frag, frag->split_by); + doutc(cl, " frag %x split by %d\n", frag->frag, frag->split_by); prev_frag = frag; } while (rb_node) { @@ -554,6 +563,7 @@ static int ceph_fill_fragtree(struct inode *inode, */ struct inode *ceph_alloc_inode(struct super_block *sb) { + struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); struct ceph_inode_info *ci; int i; @@ -561,7 +571,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) if (!ci) return NULL; - dout("alloc_inode %p\n", &ci->netfs.inode); + doutc(fsc->client, "%p\n", &ci->netfs.inode); /* Set parameters for the netfs library */ netfs_inode_init(&ci->netfs, &ceph_netfs_ops); @@ -675,10 +685,11 @@ void ceph_evict_inode(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_frag *frag; struct rb_node *n; - dout("evict_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode)); + doutc(cl, "%p ino %llx.%llx\n", inode, ceph_vinop(inode)); percpu_counter_dec(&mdsc->metric.total_inodes); @@ -701,8 +712,8 @@ void ceph_evict_inode(struct inode *inode) */ if (ci->i_snap_realm) { if (ceph_snap(inode) == CEPH_NOSNAP) { - dout(" dropping residual ref to snap realm %p\n", - ci->i_snap_realm); + doutc(cl, " dropping residual ref to snap realm %p\n", + ci->i_snap_realm); ceph_change_snap_realm(inode, NULL); } else { ceph_put_snapid_map(mdsc, ci->i_snapid_map); @@ -743,15 +754,16 @@ static inline blkcnt_t calc_inode_blocks(u64 size) int ceph_fill_file_size(struct inode *inode, int issued, u32 truncate_seq, u64 truncate_size, u64 size) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); int queue_trunc = 0; loff_t isize = i_size_read(inode); if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 || (truncate_seq == ci->i_truncate_seq && size > isize)) { - dout("size %lld -> %llu\n", isize, size); + doutc(cl, "size %lld -> %llu\n", isize, size); if (size > 0 && S_ISDIR(inode->i_mode)) { - pr_err("fill_file_size non-zero size for directory\n"); + pr_err_client(cl, "non-zero size for directory\n"); size = 0; } i_size_write(inode, size); @@ -764,8 +776,8 @@ int ceph_fill_file_size(struct inode *inode, int issued, ceph_fscache_update(inode); ci->i_reported_size = size; if (truncate_seq != ci->i_truncate_seq) { - dout("%s truncate_seq %u -> %u\n", __func__, - ci->i_truncate_seq, truncate_seq); + doutc(cl, "truncate_seq %u -> %u\n", + ci->i_truncate_seq, truncate_seq); ci->i_truncate_seq = truncate_seq; /* the MDS should have revoked these caps */ @@ -794,14 +806,15 @@ int ceph_fill_file_size(struct inode *inode, int issued, * anyway. */ if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0) { - dout("%s truncate_size %lld -> %llu, encrypted %d\n", __func__, - ci->i_truncate_size, truncate_size, !!IS_ENCRYPTED(inode)); + doutc(cl, "truncate_size %lld -> %llu, encrypted %d\n", + ci->i_truncate_size, truncate_size, + !!IS_ENCRYPTED(inode)); ci->i_truncate_size = truncate_size; if (IS_ENCRYPTED(inode)) { - dout("%s truncate_pagecache_size %lld -> %llu\n", - __func__, ci->i_truncate_pagecache_size, size); + doutc(cl, "truncate_pagecache_size %lld -> %llu\n", + ci->i_truncate_pagecache_size, size); ci->i_truncate_pagecache_size = size; } else { ci->i_truncate_pagecache_size = truncate_size; @@ -814,6 +827,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, u64 time_warp_seq, struct timespec64 *ctime, struct timespec64 *mtime, struct timespec64 *atime) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct timespec64 ictime = inode_get_ctime(inode); int warn = 0; @@ -825,7 +839,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, CEPH_CAP_XATTR_EXCL)) { if (ci->i_version == 0 || timespec64_compare(ctime, &ictime) > 0) { - dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n", + doutc(cl, "ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n", ictime.tv_sec, ictime.tv_nsec, ctime->tv_sec, ctime->tv_nsec); inode_set_ctime_to_ts(inode, *ctime); @@ -833,11 +847,10 @@ void ceph_fill_file_time(struct inode *inode, int issued, if (ci->i_version == 0 || ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { /* the MDS did a utimes() */ - dout("mtime %lld.%09ld -> %lld.%09ld " - "tw %d -> %d\n", - inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - mtime->tv_sec, mtime->tv_nsec, - ci->i_time_warp_seq, (int)time_warp_seq); + doutc(cl, "mtime %lld.%09ld -> %lld.%09ld tw %d -> %d\n", + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, + mtime->tv_sec, mtime->tv_nsec, + ci->i_time_warp_seq, (int)time_warp_seq); inode->i_mtime = *mtime; inode->i_atime = *atime; @@ -845,17 +858,17 @@ void ceph_fill_file_time(struct inode *inode, int issued, } else if (time_warp_seq == ci->i_time_warp_seq) { /* nobody did utimes(); take the max */ if (timespec64_compare(mtime, &inode->i_mtime) > 0) { - dout("mtime %lld.%09ld -> %lld.%09ld inc\n", - inode->i_mtime.tv_sec, - inode->i_mtime.tv_nsec, - mtime->tv_sec, mtime->tv_nsec); + doutc(cl, "mtime %lld.%09ld -> %lld.%09ld inc\n", + inode->i_mtime.tv_sec, + inode->i_mtime.tv_nsec, + mtime->tv_sec, mtime->tv_nsec); inode->i_mtime = *mtime; } if (timespec64_compare(atime, &inode->i_atime) > 0) { - dout("atime %lld.%09ld -> %lld.%09ld inc\n", - inode->i_atime.tv_sec, - inode->i_atime.tv_nsec, - atime->tv_sec, atime->tv_nsec); + doutc(cl, "atime %lld.%09ld -> %lld.%09ld inc\n", + inode->i_atime.tv_sec, + inode->i_atime.tv_nsec, + atime->tv_sec, atime->tv_nsec); inode->i_atime = *atime; } } else if (issued & CEPH_CAP_FILE_EXCL) { @@ -875,13 +888,16 @@ void ceph_fill_file_time(struct inode *inode, int issued, } } if (warn) /* time_warp_seq shouldn't go backwards */ - dout("%p mds time_warp_seq %llu < %u\n", - inode, time_warp_seq, ci->i_time_warp_seq); + doutc(cl, "%p mds time_warp_seq %llu < %u\n", inode, + time_warp_seq, ci->i_time_warp_seq); } #if IS_ENABLED(CONFIG_FS_ENCRYPTION) -static int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym) +static int decode_encrypted_symlink(struct ceph_mds_client *mdsc, + const char *encsym, + int enclen, u8 **decsym) { + struct ceph_client *cl = mdsc->fsc->client; int declen; u8 *sym; @@ -891,8 +907,9 @@ static int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym) declen = ceph_base64_decode(encsym, enclen, sym); if (declen < 0) { - pr_err("%s: can't decode symlink (%d). Content: %.*s\n", - __func__, declen, enclen, encsym); + pr_err_client(cl, + "can't decode symlink (%d). Content: %.*s\n", + declen, enclen, encsym); kfree(sym); return -EIO; } @@ -901,7 +918,9 @@ static int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym) return declen; } #else -static int decode_encrypted_symlink(const char *encsym, int symlen, u8 **decsym) +static int decode_encrypted_symlink(struct ceph_mds_client *mdsc, + const char *encsym, + int symlen, u8 **decsym) { return -EOPNOTSUPP; } @@ -918,6 +937,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, struct ceph_cap_reservation *caps_reservation) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); int issued, new_issued, info_caps; @@ -936,25 +956,26 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, lockdep_assert_held(&mdsc->snap_rwsem); - dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__, - inode, ceph_vinop(inode), le64_to_cpu(info->version), - ci->i_version); + doutc(cl, "%p ino %llx.%llx v %llu had %llu\n", inode, ceph_vinop(inode), + le64_to_cpu(info->version), ci->i_version); /* Once I_NEW is cleared, we can't change type or dev numbers */ if (inode->i_state & I_NEW) { inode->i_mode = mode; } else { if (inode_wrong_type(inode, mode)) { - pr_warn_once("inode type changed! (ino %llx.%llx is 0%o, mds says 0%o)\n", - ceph_vinop(inode), inode->i_mode, mode); + pr_warn_once_client(cl, + "inode type changed! (ino %llx.%llx is 0%o, mds says 0%o)\n", + ceph_vinop(inode), inode->i_mode, mode); return -ESTALE; } if ((S_ISCHR(mode) || S_ISBLK(mode)) && inode->i_rdev != rdev) { - pr_warn_once("dev inode rdev changed! (ino %llx.%llx is %u:%u, mds says %u:%u)\n", - ceph_vinop(inode), MAJOR(inode->i_rdev), - MINOR(inode->i_rdev), MAJOR(rdev), - MINOR(rdev)); + pr_warn_once_client(cl, + "dev inode rdev changed! (ino %llx.%llx is %u:%u, mds says %u:%u)\n", + ceph_vinop(inode), MAJOR(inode->i_rdev), + MINOR(inode->i_rdev), MAJOR(rdev), + MINOR(rdev)); return -ESTALE; } } @@ -976,8 +997,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, if (iinfo->xattr_len > 4) { xattr_blob = ceph_buffer_new(iinfo->xattr_len, GFP_NOFS); if (!xattr_blob) - pr_err("%s ENOMEM xattr blob %d bytes\n", __func__, - iinfo->xattr_len); + pr_err_client(cl, "ENOMEM xattr blob %d bytes\n", + iinfo->xattr_len); } if (iinfo->pool_ns_len > 0) @@ -1031,9 +1052,10 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, inode->i_mode = mode; inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); - dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, - from_kuid(&init_user_ns, inode->i_uid), - from_kgid(&init_user_ns, inode->i_gid)); + doutc(cl, "%p %llx.%llx mode 0%o uid.gid %d.%d\n", inode, + ceph_vinop(inode), inode->i_mode, + from_kuid(&init_user_ns, inode->i_uid), + from_kgid(&init_user_ns, inode->i_gid)); ceph_decode_timespec64(&ci->i_btime, &iinfo->btime); ceph_decode_timespec64(&ci->i_snap_btime, &iinfo->snap_btime); } @@ -1089,7 +1111,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, if (size == round_up(fsize, CEPH_FSCRYPT_BLOCK_SIZE)) { size = fsize; } else { - pr_warn("fscrypt size mismatch: size=%llu fscrypt_file=%llu, discarding fscrypt_file size.\n", + pr_warn_client(cl, + "fscrypt size mismatch: size=%llu fscrypt_file=%llu, discarding fscrypt_file size.\n", info->size, size); } } @@ -1101,8 +1124,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, /* only update max_size on auth cap */ if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && ci->i_max_size != le64_to_cpu(info->max_size)) { - dout("max_size %lld -> %llu\n", ci->i_max_size, - le64_to_cpu(info->max_size)); + doutc(cl, "max_size %lld -> %llu\n", + ci->i_max_size, le64_to_cpu(info->max_size)); ci->i_max_size = le64_to_cpu(info->max_size); } } @@ -1165,15 +1188,17 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, if (IS_ENCRYPTED(inode)) { if (symlen != i_size_read(inode)) - pr_err("%s %llx.%llx BAD symlink size %lld\n", - __func__, ceph_vinop(inode), + pr_err_client(cl, + "%p %llx.%llx BAD symlink size %lld\n", + inode, ceph_vinop(inode), i_size_read(inode)); - err = decode_encrypted_symlink(iinfo->symlink, + err = decode_encrypted_symlink(mdsc, iinfo->symlink, symlen, (u8 **)&sym); if (err < 0) { - pr_err("%s decoding encrypted symlink failed: %d\n", - __func__, err); + pr_err_client(cl, + "decoding encrypted symlink failed: %d\n", + err); goto out; } symlen = err; @@ -1181,8 +1206,9 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, inode->i_blocks = calc_inode_blocks(symlen); } else { if (symlen != i_size_read(inode)) { - pr_err("%s %llx.%llx BAD symlink size %lld\n", - __func__, ceph_vinop(inode), + pr_err_client(cl, + "%p %llx.%llx BAD symlink size %lld\n", + inode, ceph_vinop(inode), i_size_read(inode)); i_size_write(inode, symlen); inode->i_blocks = calc_inode_blocks(symlen); @@ -1217,8 +1243,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, inode->i_fop = &ceph_dir_fops; break; default: - pr_err("%s %llx.%llx BAD mode 0%o\n", __func__, - ceph_vinop(inode), inode->i_mode); + pr_err_client(cl, "%p %llx.%llx BAD mode 0%o\n", inode, + ceph_vinop(inode), inode->i_mode); } /* were we issued a capability? */ @@ -1239,7 +1265,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, (info_caps & CEPH_CAP_FILE_SHARED) && (issued & CEPH_CAP_FILE_EXCL) == 0 && !__ceph_dir_is_complete(ci)) { - dout(" marking %p complete (empty)\n", inode); + doutc(cl, " marking %p complete (empty)\n", + inode); i_size_write(inode, 0); __ceph_dir_set_complete(ci, atomic64_read(&ci->i_release_count), @@ -1248,8 +1275,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, wake = true; } else { - dout(" %p got snap_caps %s\n", inode, - ceph_cap_string(info_caps)); + doutc(cl, " %p got snap_caps %s\n", inode, + ceph_cap_string(info_caps)); ci->i_snap_caps |= info_caps; } } @@ -1265,8 +1292,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, if (cap_fmode >= 0) { if (!info_caps) - pr_warn("mds issued no caps on %llx.%llx\n", - ceph_vinop(inode)); + pr_warn_client(cl, "mds issued no caps on %llx.%llx\n", + ceph_vinop(inode)); __ceph_touch_fmode(ci, mdsc, cap_fmode); } @@ -1312,14 +1339,14 @@ static void __update_dentry_lease(struct inode *dir, struct dentry *dentry, unsigned long from_time, struct ceph_mds_session **old_lease_session) { + struct ceph_client *cl = ceph_inode_to_client(dir); struct ceph_dentry_info *di = ceph_dentry(dentry); unsigned mask = le16_to_cpu(lease->mask); long unsigned duration = le32_to_cpu(lease->duration_ms); long unsigned ttl = from_time + (duration * HZ) / 1000; long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; - dout("update_dentry_lease %p duration %lu ms ttl %lu\n", - dentry, duration, ttl); + doutc(cl, "%p duration %lu ms ttl %lu\n", dentry, duration, ttl); /* only track leases on regular dentries */ if (ceph_snap(dir) != CEPH_NOSNAP) @@ -1420,6 +1447,7 @@ static void update_dentry_lease_careful(struct dentry *dentry, */ static int splice_dentry(struct dentry **pdn, struct inode *in) { + struct ceph_client *cl = ceph_inode_to_client(in); struct dentry *dn = *pdn; struct dentry *realdn; @@ -1451,23 +1479,21 @@ static int splice_dentry(struct dentry **pdn, struct inode *in) d_drop(dn); realdn = d_splice_alias(in, dn); if (IS_ERR(realdn)) { - pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", - PTR_ERR(realdn), dn, in, ceph_vinop(in)); + pr_err_client(cl, "error %ld %p inode %p ino %llx.%llx\n", + PTR_ERR(realdn), dn, in, ceph_vinop(in)); return PTR_ERR(realdn); } if (realdn) { - dout("dn %p (%d) spliced with %p (%d) " - "inode %p ino %llx.%llx\n", - dn, d_count(dn), - realdn, d_count(realdn), - d_inode(realdn), ceph_vinop(d_inode(realdn))); + doutc(cl, "dn %p (%d) spliced with %p (%d) inode %p ino %llx.%llx\n", + dn, d_count(dn), realdn, d_count(realdn), + d_inode(realdn), ceph_vinop(d_inode(realdn))); dput(dn); *pdn = realdn; } else { BUG_ON(!ceph_dentry(dn)); - dout("dn %p attached to %p ino %llx.%llx\n", - dn, d_inode(dn), ceph_vinop(d_inode(dn))); + doutc(cl, "dn %p attached to %p ino %llx.%llx\n", dn, + d_inode(dn), ceph_vinop(d_inode(dn))); } return 0; } @@ -1490,13 +1516,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) struct inode *in = NULL; struct ceph_vino tvino, dvino; struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); + struct ceph_client *cl = fsc->client; int err = 0; - dout("fill_trace %p is_dentry %d is_target %d\n", req, - rinfo->head->is_dentry, rinfo->head->is_target); + doutc(cl, "%p is_dentry %d is_target %d\n", req, + rinfo->head->is_dentry, rinfo->head->is_target); if (!rinfo->head->is_target && !rinfo->head->is_dentry) { - dout("fill_trace reply is empty!\n"); + doutc(cl, "reply is empty!\n"); if (rinfo->head->result == 0 && req->r_parent) ceph_invalidate_dir_request(req); return 0; @@ -1553,13 +1580,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); retry_lookup: dn = d_lookup(parent, &dname); - dout("d_lookup on parent=%p name=%.*s got %p\n", - parent, dname.len, dname.name, dn); + doutc(cl, "d_lookup on parent=%p name=%.*s got %p\n", + parent, dname.len, dname.name, dn); if (!dn) { dn = d_alloc(parent, &dname); - dout("d_alloc %p '%.*s' = %p\n", parent, - dname.len, dname.name, dn); + doutc(cl, "d_alloc %p '%.*s' = %p\n", parent, + dname.len, dname.name, dn); if (!dn) { dput(parent); ceph_fname_free_buffer(dir, &oname); @@ -1575,8 +1602,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) } else if (d_really_is_positive(dn) && (ceph_ino(d_inode(dn)) != tvino.ino || ceph_snap(d_inode(dn)) != tvino.snap)) { - dout(" dn %p points to wrong inode %p\n", - dn, d_inode(dn)); + doutc(cl, " dn %p points to wrong inode %p\n", + dn, d_inode(dn)); ceph_dir_clear_ordered(dir); d_delete(dn); dput(dn); @@ -1601,8 +1628,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) rinfo->head->result == 0) ? req->r_fmode : -1, &req->r_caps_reservation); if (err < 0) { - pr_err("ceph_fill_inode badness %p %llx.%llx\n", - in, ceph_vinop(in)); + pr_err_client(cl, "badness %p %llx.%llx\n", in, + ceph_vinop(in)); req->r_target_inode = NULL; if (in->i_state & I_NEW) discard_new_inode(in); @@ -1652,36 +1679,32 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) have_lease = have_dir_cap || le32_to_cpu(rinfo->dlease->duration_ms); if (!have_lease) - dout("fill_trace no dentry lease or dir cap\n"); + doutc(cl, "no dentry lease or dir cap\n"); /* rename? */ if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) { struct inode *olddir = req->r_old_dentry_dir; BUG_ON(!olddir); - dout(" src %p '%pd' dst %p '%pd'\n", - req->r_old_dentry, - req->r_old_dentry, - dn, dn); - dout("fill_trace doing d_move %p -> %p\n", - req->r_old_dentry, dn); + doutc(cl, " src %p '%pd' dst %p '%pd'\n", + req->r_old_dentry, req->r_old_dentry, dn, dn); + doutc(cl, "doing d_move %p -> %p\n", req->r_old_dentry, dn); /* d_move screws up sibling dentries' offsets */ ceph_dir_clear_ordered(dir); ceph_dir_clear_ordered(olddir); d_move(req->r_old_dentry, dn); - dout(" src %p '%pd' dst %p '%pd'\n", - req->r_old_dentry, - req->r_old_dentry, - dn, dn); + doutc(cl, " src %p '%pd' dst %p '%pd'\n", + req->r_old_dentry, req->r_old_dentry, dn, dn); /* ensure target dentry is invalidated, despite rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(dn); - dout("dn %p gets new offset %lld\n", req->r_old_dentry, - ceph_dentry(req->r_old_dentry)->offset); + doutc(cl, "dn %p gets new offset %lld\n", + req->r_old_dentry, + ceph_dentry(req->r_old_dentry)->offset); /* swap r_dentry and r_old_dentry in case that * splice_dentry() gets called later. This is safe @@ -1693,9 +1716,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) /* null dentry? */ if (!rinfo->head->is_target) { - dout("fill_trace null dentry\n"); + doutc(cl, "null dentry\n"); if (d_really_is_positive(dn)) { - dout("d_delete %p\n", dn); + doutc(cl, "d_delete %p\n", dn); ceph_dir_clear_ordered(dir); d_delete(dn); } else if (have_lease) { @@ -1719,9 +1742,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) goto done; dn = req->r_dentry; /* may have spliced */ } else if (d_really_is_positive(dn) && d_inode(dn) != in) { - dout(" %p links to %p %llx.%llx, not %llx.%llx\n", - dn, d_inode(dn), ceph_vinop(d_inode(dn)), - ceph_vinop(in)); + doutc(cl, " %p links to %p %llx.%llx, not %llx.%llx\n", + dn, d_inode(dn), ceph_vinop(d_inode(dn)), + ceph_vinop(in)); d_invalidate(dn); have_lease = false; } @@ -1731,7 +1754,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) rinfo->dlease, session, req->r_request_started); } - dout(" final dn %p\n", dn); + doutc(cl, " final dn %p\n", dn); } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || req->r_op == CEPH_MDS_OP_MKSNAP) && test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && @@ -1742,7 +1765,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) BUG_ON(!dir); BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR); BUG_ON(!req->r_dentry); - dout(" linking snapped dir %p to dn %p\n", in, req->r_dentry); + doutc(cl, " linking snapped dir %p to dn %p\n", in, + req->r_dentry); ceph_dir_clear_ordered(dir); ihold(in); err = splice_dentry(&req->r_dentry, in); @@ -1764,7 +1788,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) &dvino, ptvino); } done: - dout("fill_trace done err=%d\n", err); + doutc(cl, "done err=%d\n", err); return err; } @@ -1775,6 +1799,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, struct ceph_mds_session *session) { struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; + struct ceph_client *cl = session->s_mdsc->fsc->client; int i, err = 0; for (i = 0; i < rinfo->dir_nr; i++) { @@ -1789,14 +1814,14 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, in = ceph_get_inode(req->r_dentry->d_sb, vino, NULL); if (IS_ERR(in)) { err = PTR_ERR(in); - dout("new_inode badness got %d\n", err); + doutc(cl, "badness got %d\n", err); continue; } rc = ceph_fill_inode(in, NULL, &rde->inode, NULL, session, -1, &req->r_caps_reservation); if (rc < 0) { - pr_err("ceph_fill_inode badness on %p got %d\n", - in, rc); + pr_err_client(cl, "inode badness on %p got %d\n", in, + rc); err = rc; if (in->i_state & I_NEW) { ihold(in); @@ -1825,6 +1850,7 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn, struct ceph_readdir_cache_control *ctl, struct ceph_mds_request *req) { + struct ceph_client *cl = ceph_inode_to_client(dir); struct ceph_inode_info *ci = ceph_inode(dir); unsigned nsize = PAGE_SIZE / sizeof(struct dentry*); unsigned idx = ctl->index % nsize; @@ -1850,11 +1876,11 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn, if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) && req->r_dir_ordered_cnt == atomic64_read(&ci->i_ordered_count)) { - dout("readdir cache dn %p idx %d\n", dn, ctl->index); + doutc(cl, "dn %p idx %d\n", dn, ctl->index); ctl->dentries[idx] = dn; ctl->index++; } else { - dout("disable readdir cache\n"); + doutc(cl, "disable readdir cache\n"); ctl->index = -1; } return 0; @@ -1867,6 +1893,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, struct inode *inode = d_inode(parent); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; + struct ceph_client *cl = session->s_mdsc->fsc->client; struct qstr dname; struct dentry *dn; struct inode *in; @@ -1894,19 +1921,18 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, if (rinfo->dir_dir && le32_to_cpu(rinfo->dir_dir->frag) != frag) { - dout("readdir_prepopulate got new frag %x -> %x\n", - frag, le32_to_cpu(rinfo->dir_dir->frag)); + doutc(cl, "got new frag %x -> %x\n", frag, + le32_to_cpu(rinfo->dir_dir->frag)); frag = le32_to_cpu(rinfo->dir_dir->frag); if (!rinfo->hash_order) req->r_readdir_offset = 2; } if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) { - dout("readdir_prepopulate %d items under SNAPDIR dn %p\n", - rinfo->dir_nr, parent); + doutc(cl, "%d items under SNAPDIR dn %p\n", + rinfo->dir_nr, parent); } else { - dout("readdir_prepopulate %d items under dn %p\n", - rinfo->dir_nr, parent); + doutc(cl, "%d items under dn %p\n", rinfo->dir_nr, parent); if (rinfo->dir_dir) ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir); @@ -1950,15 +1976,15 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, retry_lookup: dn = d_lookup(parent, &dname); - dout("d_lookup on parent=%p name=%.*s got %p\n", - parent, dname.len, dname.name, dn); + doutc(cl, "d_lookup on parent=%p name=%.*s got %p\n", + parent, dname.len, dname.name, dn); if (!dn) { dn = d_alloc(parent, &dname); - dout("d_alloc %p '%.*s' = %p\n", parent, - dname.len, dname.name, dn); + doutc(cl, "d_alloc %p '%.*s' = %p\n", parent, + dname.len, dname.name, dn); if (!dn) { - dout("d_alloc badness\n"); + doutc(cl, "d_alloc badness\n"); err = -ENOMEM; goto out; } @@ -1971,8 +1997,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, (ceph_ino(d_inode(dn)) != tvino.ino || ceph_snap(d_inode(dn)) != tvino.snap)) { struct ceph_dentry_info *di = ceph_dentry(dn); - dout(" dn %p points to wrong inode %p\n", - dn, d_inode(dn)); + doutc(cl, " dn %p points to wrong inode %p\n", + dn, d_inode(dn)); spin_lock(&dn->d_lock); if (di->offset > 0 && @@ -1994,7 +2020,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, } else { in = ceph_get_inode(parent->d_sb, tvino, NULL); if (IS_ERR(in)) { - dout("new_inode badness\n"); + doutc(cl, "new_inode badness\n"); d_drop(dn); dput(dn); err = PTR_ERR(in); @@ -2005,7 +2031,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ret = ceph_fill_inode(in, NULL, &rde->inode, NULL, session, -1, &req->r_caps_reservation); if (ret < 0) { - pr_err("ceph_fill_inode badness on %p\n", in); + pr_err_client(cl, "badness on %p %llx.%llx\n", in, + ceph_vinop(in)); if (d_really_is_negative(dn)) { if (in->i_state & I_NEW) { ihold(in); @@ -2022,8 +2049,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, if (d_really_is_negative(dn)) { if (ceph_security_xattr_deadlock(in)) { - dout(" skip splicing dn %p to inode %p" - " (security xattr deadlock)\n", dn, in); + doutc(cl, " skip splicing dn %p to inode %p" + " (security xattr deadlock)\n", dn, in); iput(in); skipped++; goto next_item; @@ -2055,17 +2082,18 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, req->r_readdir_cache_idx = cache_ctl.index; } ceph_readdir_cache_release(&cache_ctl); - dout("readdir_prepopulate done\n"); + doutc(cl, "done\n"); return err; } bool ceph_inode_set_size(struct inode *inode, loff_t size) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); bool ret; spin_lock(&ci->i_ceph_lock); - dout("set_size %p %llu -> %llu\n", inode, i_size_read(inode), size); + doutc(cl, "set_size %p %llu -> %llu\n", inode, i_size_read(inode), size); i_size_write(inode, size); ceph_fscache_update(inode); inode->i_blocks = calc_inode_blocks(size); @@ -2080,21 +2108,24 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size) void ceph_queue_inode_work(struct inode *inode, int work_bit) { struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); + struct ceph_client *cl = fsc->client; struct ceph_inode_info *ci = ceph_inode(inode); set_bit(work_bit, &ci->i_work_mask); ihold(inode); if (queue_work(fsc->inode_wq, &ci->i_work)) { - dout("queue_inode_work %p, mask=%lx\n", inode, ci->i_work_mask); + doutc(cl, "%p %llx.%llx mask=%lx\n", inode, + ceph_vinop(inode), ci->i_work_mask); } else { - dout("queue_inode_work %p already queued, mask=%lx\n", - inode, ci->i_work_mask); + doutc(cl, "%p %llx.%llx already queued, mask=%lx\n", + inode, ceph_vinop(inode), ci->i_work_mask); iput(inode); } } static void ceph_do_invalidate_pages(struct inode *inode) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u32 orig_gen; int check = 0; @@ -2104,8 +2135,9 @@ static void ceph_do_invalidate_pages(struct inode *inode) mutex_lock(&ci->i_truncate_mutex); if (ceph_inode_is_shutdown(inode)) { - pr_warn_ratelimited("%s: inode %llx.%llx is shut down\n", - __func__, ceph_vinop(inode)); + pr_warn_ratelimited_client(cl, + "%p %llx.%llx is shut down\n", inode, + ceph_vinop(inode)); mapping_set_error(inode->i_mapping, -EIO); truncate_pagecache(inode, 0); mutex_unlock(&ci->i_truncate_mutex); @@ -2113,8 +2145,8 @@ static void ceph_do_invalidate_pages(struct inode *inode) } spin_lock(&ci->i_ceph_lock); - dout("invalidate_pages %p gen %d revoking %d\n", inode, - ci->i_rdcache_gen, ci->i_rdcache_revoking); + doutc(cl, "%p %llx.%llx gen %d revoking %d\n", inode, + ceph_vinop(inode), ci->i_rdcache_gen, ci->i_rdcache_revoking); if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE)) check = 1; @@ -2126,21 +2158,21 @@ static void ceph_do_invalidate_pages(struct inode *inode) spin_unlock(&ci->i_ceph_lock); if (invalidate_inode_pages2(inode->i_mapping) < 0) { - pr_err("invalidate_inode_pages2 %llx.%llx failed\n", - ceph_vinop(inode)); + pr_err_client(cl, "invalidate_inode_pages2 %llx.%llx failed\n", + ceph_vinop(inode)); } spin_lock(&ci->i_ceph_lock); if (orig_gen == ci->i_rdcache_gen && orig_gen == ci->i_rdcache_revoking) { - dout("invalidate_pages %p gen %d successful\n", inode, - ci->i_rdcache_gen); + doutc(cl, "%p %llx.%llx gen %d successful\n", inode, + ceph_vinop(inode), ci->i_rdcache_gen); ci->i_rdcache_revoking--; check = 1; } else { - dout("invalidate_pages %p gen %d raced, now %d revoking %d\n", - inode, orig_gen, ci->i_rdcache_gen, - ci->i_rdcache_revoking); + doutc(cl, "%p %llx.%llx gen %d raced, now %d revoking %d\n", + inode, ceph_vinop(inode), orig_gen, ci->i_rdcache_gen, + ci->i_rdcache_revoking); if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE)) check = 1; } @@ -2157,6 +2189,7 @@ static void ceph_do_invalidate_pages(struct inode *inode) */ void __ceph_do_pending_vmtruncate(struct inode *inode) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 to; int wrbuffer_refs, finish = 0; @@ -2165,7 +2198,8 @@ void __ceph_do_pending_vmtruncate(struct inode *inode) retry: spin_lock(&ci->i_ceph_lock); if (ci->i_truncate_pending == 0) { - dout("%s %p none pending\n", __func__, inode); + doutc(cl, "%p %llx.%llx none pending\n", inode, + ceph_vinop(inode)); spin_unlock(&ci->i_ceph_lock); mutex_unlock(&ci->i_truncate_mutex); return; @@ -2177,7 +2211,8 @@ void __ceph_do_pending_vmtruncate(struct inode *inode) */ if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { spin_unlock(&ci->i_ceph_lock); - dout("%s %p flushing snaps first\n", __func__, inode); + doutc(cl, "%p %llx.%llx flushing snaps first\n", inode, + ceph_vinop(inode)); filemap_write_and_wait_range(&inode->i_data, 0, inode->i_sb->s_maxbytes); goto retry; @@ -2188,8 +2223,8 @@ void __ceph_do_pending_vmtruncate(struct inode *inode) to = ci->i_truncate_pagecache_size; wrbuffer_refs = ci->i_wrbuffer_ref; - dout("%s %p (%d) to %lld\n", __func__, inode, - ci->i_truncate_pending, to); + doutc(cl, "%p %llx.%llx (%d) to %lld\n", inode, ceph_vinop(inode), + ci->i_truncate_pending, to); spin_unlock(&ci->i_ceph_lock); ceph_fscache_resize(inode, to); @@ -2217,9 +2252,10 @@ static void ceph_inode_work(struct work_struct *work) struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, i_work); struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) { - dout("writeback %p\n", inode); + doutc(cl, "writeback %p %llx.%llx\n", inode, ceph_vinop(inode)); filemap_fdatawrite(&inode->i_data); } if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask)) @@ -2291,6 +2327,7 @@ static int fill_fscrypt_truncate(struct inode *inode, struct ceph_mds_request *req, struct iattr *attr) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); int boff = attr->ia_size % CEPH_FSCRYPT_BLOCK_SIZE; loff_t pos, orig_pos = round_down(attr->ia_size, @@ -2313,9 +2350,9 @@ static int fill_fscrypt_truncate(struct inode *inode, issued = __ceph_caps_issued(ci, NULL); - dout("%s size %lld -> %lld got cap refs on %s, issued %s\n", __func__, - i_size, attr->ia_size, ceph_cap_string(got), - ceph_cap_string(issued)); + doutc(cl, "size %lld -> %lld got cap refs on %s, issued %s\n", + i_size, attr->ia_size, ceph_cap_string(got), + ceph_cap_string(issued)); /* Try to writeback the dirty pagecaches */ if (issued & (CEPH_CAP_FILE_BUFFER)) { @@ -2370,8 +2407,7 @@ static int fill_fscrypt_truncate(struct inode *inode, * If the Rados object doesn't exist, it will be set to 0. */ if (!objver) { - dout("%s hit hole, ppos %lld < size %lld\n", __func__, - pos, i_size); + doutc(cl, "hit hole, ppos %lld < size %lld\n", pos, i_size); header.data_len = cpu_to_le32(8 + 8 + 4); header.file_offset = 0; @@ -2380,8 +2416,8 @@ static int fill_fscrypt_truncate(struct inode *inode, header.data_len = cpu_to_le32(8 + 8 + 4 + CEPH_FSCRYPT_BLOCK_SIZE); header.file_offset = cpu_to_le64(orig_pos); - dout("%s encrypt block boff/bsize %d/%lu\n", __func__, - boff, CEPH_FSCRYPT_BLOCK_SIZE); + doutc(cl, "encrypt block boff/bsize %d/%lu\n", boff, + CEPH_FSCRYPT_BLOCK_SIZE); /* truncate and zero out the extra contents for the last block */ memset(iov.iov_base + boff, 0, PAGE_SIZE - boff); @@ -2409,8 +2445,8 @@ static int fill_fscrypt_truncate(struct inode *inode, } req->r_pagelist = pagelist; out: - dout("%s %p size dropping cap refs on %s\n", __func__, - inode, ceph_cap_string(got)); + doutc(cl, "%p %llx.%llx size dropping cap refs on %s\n", inode, + ceph_vinop(inode), ceph_cap_string(got)); ceph_put_cap_refs(ci, got); if (iov.iov_base) kunmap_local(iov.iov_base); @@ -2428,6 +2464,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, unsigned int ia_valid = attr->ia_valid; struct ceph_mds_request *req; struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_cap_flush *prealloc_cf; loff_t isize = i_size_read(inode); int issued; @@ -2466,7 +2503,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, } } - dout("setattr %p issued %s\n", inode, ceph_cap_string(issued)); + doutc(cl, "%p %llx.%llx issued %s\n", inode, ceph_vinop(inode), + ceph_cap_string(issued)); #if IS_ENABLED(CONFIG_FS_ENCRYPTION) if (cia && cia->fscrypt_auth) { u32 len = ceph_fscrypt_auth_len(cia->fscrypt_auth); @@ -2477,8 +2515,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, goto out; } - dout("setattr %llx:%llx fscrypt_auth len %u to %u)\n", - ceph_vinop(inode), ci->fscrypt_auth_len, len); + doutc(cl, "%p %llx.%llx fscrypt_auth len %u to %u)\n", inode, + ceph_vinop(inode), ci->fscrypt_auth_len, len); /* It should never be re-set once set */ WARN_ON_ONCE(ci->fscrypt_auth); @@ -2506,9 +2544,10 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, #endif /* CONFIG_FS_ENCRYPTION */ if (ia_valid & ATTR_UID) { - dout("setattr %p uid %d -> %d\n", inode, - from_kuid(&init_user_ns, inode->i_uid), - from_kuid(&init_user_ns, attr->ia_uid)); + doutc(cl, "%p %llx.%llx uid %d -> %d\n", inode, + ceph_vinop(inode), + from_kuid(&init_user_ns, inode->i_uid), + from_kuid(&init_user_ns, attr->ia_uid)); if (issued & CEPH_CAP_AUTH_EXCL) { inode->i_uid = attr->ia_uid; dirtied |= CEPH_CAP_AUTH_EXCL; @@ -2521,9 +2560,10 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, } } if (ia_valid & ATTR_GID) { - dout("setattr %p gid %d -> %d\n", inode, - from_kgid(&init_user_ns, inode->i_gid), - from_kgid(&init_user_ns, attr->ia_gid)); + doutc(cl, "%p %llx.%llx gid %d -> %d\n", inode, + ceph_vinop(inode), + from_kgid(&init_user_ns, inode->i_gid), + from_kgid(&init_user_ns, attr->ia_gid)); if (issued & CEPH_CAP_AUTH_EXCL) { inode->i_gid = attr->ia_gid; dirtied |= CEPH_CAP_AUTH_EXCL; @@ -2536,8 +2576,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, } } if (ia_valid & ATTR_MODE) { - dout("setattr %p mode 0%o -> 0%o\n", inode, inode->i_mode, - attr->ia_mode); + doutc(cl, "%p %llx.%llx mode 0%o -> 0%o\n", inode, + ceph_vinop(inode), inode->i_mode, attr->ia_mode); if (issued & CEPH_CAP_AUTH_EXCL) { inode->i_mode = attr->ia_mode; dirtied |= CEPH_CAP_AUTH_EXCL; @@ -2551,9 +2591,10 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, } if (ia_valid & ATTR_ATIME) { - dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode, - inode->i_atime.tv_sec, inode->i_atime.tv_nsec, - attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); + doutc(cl, "%p %llx.%llx atime %lld.%ld -> %lld.%ld\n", + inode, ceph_vinop(inode), inode->i_atime.tv_sec, + inode->i_atime.tv_nsec, attr->ia_atime.tv_sec, + attr->ia_atime.tv_nsec); if (issued & CEPH_CAP_FILE_EXCL) { ci->i_time_warp_seq++; inode->i_atime = attr->ia_atime; @@ -2573,7 +2614,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, } } if (ia_valid & ATTR_SIZE) { - dout("setattr %p size %lld -> %lld\n", inode, isize, attr->ia_size); + doutc(cl, "%p %llx.%llx size %lld -> %lld\n", inode, + ceph_vinop(inode), isize, attr->ia_size); /* * Only when the new size is smaller and not aligned to * CEPH_FSCRYPT_BLOCK_SIZE will the RMW is needed. @@ -2624,9 +2666,10 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, } } if (ia_valid & ATTR_MTIME) { - dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode, - inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, - attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); + doutc(cl, "%p %llx.%llx mtime %lld.%ld -> %lld.%ld\n", + inode, ceph_vinop(inode), inode->i_mtime.tv_sec, + inode->i_mtime.tv_nsec, attr->ia_mtime.tv_sec, + attr->ia_mtime.tv_nsec); if (issued & CEPH_CAP_FILE_EXCL) { ci->i_time_warp_seq++; inode->i_mtime = attr->ia_mtime; @@ -2650,11 +2693,12 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, if (ia_valid & ATTR_CTIME) { bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME| ATTR_MODE|ATTR_UID|ATTR_GID)) == 0; - dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode, - inode_get_ctime(inode).tv_sec, - inode_get_ctime(inode).tv_nsec, - attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, - only ? "ctime only" : "ignored"); + doutc(cl, "%p %llx.%llx ctime %lld.%ld -> %lld.%ld (%s)\n", + inode, ceph_vinop(inode), inode_get_ctime(inode).tv_sec, + inode_get_ctime(inode).tv_nsec, + attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, + only ? "ctime only" : "ignored"); + if (only) { /* * if kernel wants to dirty ctime but nothing else, @@ -2672,7 +2716,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, } } if (ia_valid & ATTR_FILE) - dout("setattr %p ATTR_FILE ... hrm!\n", inode); + doutc(cl, "%p %llx.%llx ATTR_FILE ... hrm!\n", inode, + ceph_vinop(inode)); if (dirtied) { inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied, @@ -2713,16 +2758,17 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr, */ err = ceph_mdsc_do_request(mdsc, NULL, req); if (err == -EAGAIN && truncate_retry--) { - dout("setattr %p result=%d (%s locally, %d remote), retry it!\n", - inode, err, ceph_cap_string(dirtied), mask); + doutc(cl, "%p %llx.%llx result=%d (%s locally, %d remote), retry it!\n", + inode, ceph_vinop(inode), err, + ceph_cap_string(dirtied), mask); ceph_mdsc_put_request(req); ceph_free_cap_flush(prealloc_cf); goto retry; } } out: - dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, - ceph_cap_string(dirtied), mask); + doutc(cl, "%p %llx.%llx result=%d (%s locally, %d remote)\n", inode, + ceph_vinop(inode), err, ceph_cap_string(dirtied), mask); ceph_mdsc_put_request(req); ceph_free_cap_flush(prealloc_cf); @@ -2811,18 +2857,20 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page, int mask, bool force) { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; int mode; int err; if (ceph_snap(inode) == CEPH_SNAPDIR) { - dout("do_getattr inode %p SNAPDIR\n", inode); + doutc(cl, "inode %p %llx.%llx SNAPDIR\n", inode, + ceph_vinop(inode)); return 0; } - dout("do_getattr inode %p mask %s mode 0%o\n", - inode, ceph_cap_string(mask), inode->i_mode); + doutc(cl, "inode %p %llx.%llx mask %s mode 0%o\n", inode, + ceph_vinop(inode), ceph_cap_string(mask), inode->i_mode); if (!force && ceph_caps_issued_mask_metric(ceph_inode(inode), mask, 1)) return 0; @@ -2849,7 +2897,7 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page, } } ceph_mdsc_put_request(req); - dout("do_getattr result=%d\n", err); + doutc(cl, "result=%d\n", err); return err; } @@ -2857,6 +2905,7 @@ int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, size_t size) { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; int mode = USE_AUTH_MDS; @@ -2886,7 +2935,7 @@ int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, xattr_value = req->r_reply_info.xattr_info.xattr_value; xattr_value_len = req->r_reply_info.xattr_info.xattr_value_len; - dout("do_getvxattr xattr_value_len:%zu, size:%zu\n", xattr_value_len, size); + doutc(cl, "xattr_value_len:%zu, size:%zu\n", xattr_value_len, size); err = (int)xattr_value_len; if (size == 0) @@ -2901,7 +2950,7 @@ int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, put: ceph_mdsc_put_request(req); out: - dout("do_getvxattr result=%d\n", err); + doutc(cl, "result=%d\n", err); return err; } diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 3f617146e4ad..e861de3c79b9 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -245,6 +245,7 @@ static long ceph_ioctl_lazyio(struct file *file) struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; + struct ceph_client *cl = mdsc->fsc->client; if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { spin_lock(&ci->i_ceph_lock); @@ -252,11 +253,13 @@ static long ceph_ioctl_lazyio(struct file *file) ci->i_nr_by_mode[ffs(CEPH_FILE_MODE_LAZY)]++; __ceph_touch_fmode(ci, mdsc, fi->fmode); spin_unlock(&ci->i_ceph_lock); - dout("ioctl_layzio: file %p marked lazy\n", file); + doutc(cl, "file %p %p %llx.%llx marked lazy\n", file, inode, + ceph_vinop(inode)); ceph_check_caps(ci, 0); } else { - dout("ioctl_layzio: file %p already lazy\n", file); + doutc(cl, "file %p %p %llx.%llx already lazy\n", file, inode, + ceph_vinop(inode)); } return 0; } @@ -355,10 +358,12 @@ static const char *ceph_ioctl_cmd_name(const unsigned int cmd) long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + struct inode *inode = file_inode(file); + struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); int ret; - dout("ioctl file %p cmd %s arg %lu\n", file, - ceph_ioctl_cmd_name(cmd), arg); + doutc(fsc->client, "file %p %p %llx.%llx cmd %s arg %lu\n", file, + inode, ceph_vinop(inode), ceph_ioctl_cmd_name(cmd), arg); switch (cmd) { case CEPH_IOC_GET_LAYOUT: return ceph_ioctl_get_layout(file, (void __user *)arg); diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index cb51c7e9c8e2..e07ad29ff8b9 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -77,6 +77,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, int cmd, u8 wait, struct file_lock *fl) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; int err; u64 length = 0; @@ -111,10 +112,10 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, owner = secure_addr(fl->fl_owner); - dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " - "start: %llu, length: %llu, wait: %d, type: %d\n", (int)lock_type, - (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, - wait, fl->fl_type); + doutc(cl, "rule: %d, op: %d, owner: %llx, pid: %llu, " + "start: %llu, length: %llu, wait: %d, type: %d\n", + (int)lock_type, (int)operation, owner, (u64)fl->fl_pid, + fl->fl_start, length, wait, fl->fl_type); req->r_args.filelock_change.rule = lock_type; req->r_args.filelock_change.type = cmd; @@ -147,16 +148,17 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, } ceph_mdsc_put_request(req); - dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " - "length: %llu, wait: %d, type: %d, err code %d\n", (int)lock_type, - (int)operation, (u64)fl->fl_pid, fl->fl_start, - length, wait, fl->fl_type, err); + doutc(cl, "rule: %d, op: %d, pid: %llu, start: %llu, " + "length: %llu, wait: %d, type: %d, err code %d\n", + (int)lock_type, (int)operation, (u64)fl->fl_pid, + fl->fl_start, length, wait, fl->fl_type, err); return err; } static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *intr_req; struct inode *inode = req->r_inode; int err, lock_type; @@ -174,8 +176,7 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, if (!err) return 0; - dout("ceph_lock_wait_for_completion: request %llu was interrupted\n", - req->r_tid); + doutc(cl, "request %llu was interrupted\n", req->r_tid); mutex_lock(&mdsc->mutex); if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { @@ -246,6 +247,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); int err = 0; u16 op = CEPH_MDS_OP_SETFILELOCK; u8 wait = 0; @@ -257,7 +259,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) if (ceph_inode_is_shutdown(inode)) return -ESTALE; - dout("ceph_lock, fl_owner: %p\n", fl->fl_owner); + doutc(cl, "fl_owner: %p\n", fl->fl_owner); /* set wait bit as appropriate, then make command as Ceph expects it*/ if (IS_GETLK(cmd)) @@ -292,7 +294,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl); if (!err) { if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK != fl->fl_type) { - dout("mds locked, locking locally\n"); + doutc(cl, "locking locally\n"); err = posix_lock_file(file, fl, NULL); if (err) { /* undo! This should only happen if @@ -300,8 +302,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) * deadlock. */ ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, CEPH_LOCK_UNLOCK, 0, fl); - dout("got %d on posix_lock_file, undid lock\n", - err); + doutc(cl, "got %d on posix_lock_file, undid lock\n", + err); } } } @@ -312,6 +314,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); int err = 0; u8 wait = 0; u8 lock_cmd; @@ -322,7 +325,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) if (ceph_inode_is_shutdown(inode)) return -ESTALE; - dout("ceph_flock, fl_file: %p\n", fl->fl_file); + doutc(cl, "fl_file: %p\n", fl->fl_file); spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { @@ -359,7 +362,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, inode, CEPH_LOCK_UNLOCK, 0, fl); - dout("got %d on locks_lock_file_wait, undid lock\n", err); + doutc(cl, "got %d on locks_lock_file_wait, undid lock\n", + err); } } return err; @@ -371,6 +375,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) */ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct file_lock *lock; struct file_lock_context *ctx; @@ -386,17 +391,20 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) ++(*flock_count); spin_unlock(&ctx->flc_lock); } - dout("counted %d flock locks and %d fcntl locks\n", - *flock_count, *fcntl_count); + doutc(cl, "counted %d flock locks and %d fcntl locks\n", + *flock_count, *fcntl_count); } /* * Given a pointer to a lock, convert it to a ceph filelock */ -static int lock_to_ceph_filelock(struct file_lock *lock, +static int lock_to_ceph_filelock(struct inode *inode, + struct file_lock *lock, struct ceph_filelock *cephlock) { + struct ceph_client *cl = ceph_inode_to_client(inode); int err = 0; + cephlock->start = cpu_to_le64(lock->fl_start); cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); cephlock->client = cpu_to_le64(0); @@ -414,7 +422,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock, cephlock->type = CEPH_LOCK_UNLOCK; break; default: - dout("Have unknown lock type %d\n", lock->fl_type); + doutc(cl, "Have unknown lock type %d\n", lock->fl_type); err = -EINVAL; } @@ -432,13 +440,14 @@ int ceph_encode_locks_to_buffer(struct inode *inode, { struct file_lock *lock; struct file_lock_context *ctx = locks_inode_context(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); int err = 0; int seen_fcntl = 0; int seen_flock = 0; int l = 0; - dout("encoding %d flock and %d fcntl locks\n", num_flock_locks, - num_fcntl_locks); + doutc(cl, "encoding %d flock and %d fcntl locks\n", num_flock_locks, + num_fcntl_locks); if (!ctx) return 0; @@ -450,7 +459,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, err = -ENOSPC; goto fail; } - err = lock_to_ceph_filelock(lock, &flocks[l]); + err = lock_to_ceph_filelock(inode, lock, &flocks[l]); if (err) goto fail; ++l; @@ -461,7 +470,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, err = -ENOSPC; goto fail; } - err = lock_to_ceph_filelock(lock, &flocks[l]); + err = lock_to_ceph_filelock(inode, lock, &flocks[l]); if (err) goto fail; ++l; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d009f1dff291..86c8b6859852 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -411,6 +411,7 @@ static int parse_reply_info_readdir(void **p, void *end, u64 features) { struct ceph_mds_reply_info_parsed *info = &req->r_reply_info; + struct ceph_client *cl = req->r_mdsc->fsc->client; u32 num, i = 0; int err; @@ -433,7 +434,7 @@ static int parse_reply_info_readdir(void **p, void *end, BUG_ON(!info->dir_entries); if ((unsigned long)(info->dir_entries + num) > (unsigned long)info->dir_entries + info->dir_buf_size) { - pr_err("dir contents are larger than expected\n"); + pr_err_client(cl, "dir contents are larger than expected\n"); WARN_ON(1); goto bad; } @@ -454,7 +455,7 @@ static int parse_reply_info_readdir(void **p, void *end, ceph_decode_need(p, end, _name_len, bad); _name = *p; *p += _name_len; - dout("parsed dir dname '%.*s'\n", _name_len, _name); + doutc(cl, "parsed dir dname '%.*s'\n", _name_len, _name); if (info->hash_order) rde->raw_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash, @@ -514,8 +515,8 @@ static int parse_reply_info_readdir(void **p, void *end, rde->is_nokey = false; err = ceph_fname_to_usr(&fname, &tname, &oname, &rde->is_nokey); if (err) { - pr_err("%s unable to decode %.*s, got %d\n", __func__, - _name_len, _name, err); + pr_err_client(cl, "unable to decode %.*s, got %d\n", + _name_len, _name, err); goto out_bad; } rde->name = oname.name; @@ -539,7 +540,7 @@ static int parse_reply_info_readdir(void **p, void *end, bad: err = -EIO; out_bad: - pr_err("problem parsing dir contents %d\n", err); + pr_err_client(cl, "problem parsing dir contents %d\n", err); return err; } @@ -570,10 +571,11 @@ static int parse_reply_info_filelock(void **p, void *end, static int ceph_parse_deleg_inos(void **p, void *end, struct ceph_mds_session *s) { + struct ceph_client *cl = s->s_mdsc->fsc->client; u32 sets; ceph_decode_32_safe(p, end, sets, bad); - dout("got %u sets of delegated inodes\n", sets); + doutc(cl, "got %u sets of delegated inodes\n", sets); while (sets--) { u64 start, len; @@ -582,8 +584,9 @@ static int ceph_parse_deleg_inos(void **p, void *end, /* Don't accept a delegation of system inodes */ if (start < CEPH_INO_SYSTEM_BASE) { - pr_warn_ratelimited("ceph: ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n", - start, len); + pr_warn_ratelimited_client(cl, + "ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n", + start, len); continue; } while (len--) { @@ -591,10 +594,10 @@ static int ceph_parse_deleg_inos(void **p, void *end, DELEGATED_INO_AVAILABLE, GFP_KERNEL); if (!err) { - dout("added delegated inode 0x%llx\n", - start - 1); + doutc(cl, "added delegated inode 0x%llx\n", start - 1); } else if (err == -EBUSY) { - pr_warn("MDS delegated inode 0x%llx more than once.\n", + pr_warn_client(cl, + "MDS delegated inode 0x%llx more than once.\n", start - 1); } else { return err; @@ -744,6 +747,7 @@ static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg, struct ceph_mds_request *req, u64 features) { struct ceph_mds_reply_info_parsed *info = &req->r_reply_info; + struct ceph_client *cl = s->s_mdsc->fsc->client; void *p, *end; u32 len; int err; @@ -783,7 +787,7 @@ static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg, bad: err = -EIO; out_bad: - pr_err("mds parse_reply err %d\n", err); + pr_err_client(cl, "mds parse_reply err %d\n", err); ceph_msg_dump(msg); return err; } @@ -831,6 +835,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) int ceph_wait_on_conflict_unlink(struct dentry *dentry) { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dentry->d_sb); + struct ceph_client *cl = fsc->client; struct dentry *pdentry = dentry->d_parent; struct dentry *udentry, *found = NULL; struct ceph_dentry_info *di; @@ -855,8 +860,8 @@ int ceph_wait_on_conflict_unlink(struct dentry *dentry) goto next; if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags)) - pr_warn("%s dentry %p:%pd async unlink bit is not set\n", - __func__, dentry, dentry); + pr_warn_client(cl, "dentry %p:%pd async unlink bit is not set\n", + dentry, dentry); if (!d_same_name(udentry, pdentry, &dname)) goto next; @@ -872,8 +877,8 @@ int ceph_wait_on_conflict_unlink(struct dentry *dentry) if (likely(!found)) return 0; - dout("%s dentry %p:%pd conflict with old %p:%pd\n", __func__, - dentry, dentry, found, found); + doutc(cl, "dentry %p:%pd conflict with old %p:%pd\n", dentry, dentry, + found, found); err = wait_on_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT, TASK_KILLABLE); @@ -957,6 +962,7 @@ static int __verify_registered_session(struct ceph_mds_client *mdsc, static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, int mds) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_session *s; if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) @@ -973,7 +979,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, int newmax = 1 << get_count_order(mds + 1); struct ceph_mds_session **sa; - dout("%s: realloc to %d\n", __func__, newmax); + doutc(cl, "realloc to %d\n", newmax); sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); if (!sa) goto fail_realloc; @@ -986,7 +992,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, mdsc->max_sessions = newmax; } - dout("%s: mds%d\n", __func__, mds); + doutc(cl, "mds%d\n", mds); s->s_mdsc = mdsc; s->s_mds = mds; s->s_state = CEPH_MDS_SESSION_NEW; @@ -1029,7 +1035,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, static void __unregister_session(struct ceph_mds_client *mdsc, struct ceph_mds_session *s) { - dout("__unregister_session mds%d %p\n", s->s_mds, s); + doutc(mdsc->fsc->client, "mds%d %p\n", s->s_mds, s); BUG_ON(mdsc->sessions[s->s_mds] != s); mdsc->sessions[s->s_mds] = NULL; ceph_con_close(&s->s_con); @@ -1155,6 +1161,7 @@ static void __register_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req, struct inode *dir) { + struct ceph_client *cl = mdsc->fsc->client; int ret = 0; req->r_tid = ++mdsc->last_tid; @@ -1162,14 +1169,14 @@ static void __register_request(struct ceph_mds_client *mdsc, ret = ceph_reserve_caps(mdsc, &req->r_caps_reservation, req->r_num_caps); if (ret < 0) { - pr_err("__register_request %p " - "failed to reserve caps: %d\n", req, ret); + pr_err_client(cl, "%p failed to reserve caps: %d\n", + req, ret); /* set req->r_err to fail early from __do_request */ req->r_err = ret; return; } } - dout("__register_request %p tid %lld\n", req, req->r_tid); + doutc(cl, "%p tid %lld\n", req, req->r_tid); ceph_mdsc_get_request(req); insert_request(&mdsc->request_tree, req); @@ -1192,7 +1199,7 @@ static void __register_request(struct ceph_mds_client *mdsc, static void __unregister_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { - dout("__unregister_request %p tid %lld\n", req, req->r_tid); + doutc(mdsc->fsc->client, "%p tid %lld\n", req, req->r_tid); /* Never leave an unregistered request on an unsafe list! */ list_del_init(&req->r_unsafe_item); @@ -1278,6 +1285,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, int mds = -1; u32 hash = req->r_direct_hash; bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); + struct ceph_client *cl = mdsc->fsc->client; if (random) *random = false; @@ -1289,8 +1297,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, if (req->r_resend_mds >= 0 && (__have_session(mdsc, req->r_resend_mds) || ceph_mdsmap_get_state(mdsc->mdsmap, req->r_resend_mds) > 0)) { - dout("%s using resend_mds mds%d\n", __func__, - req->r_resend_mds); + doutc(cl, "using resend_mds mds%d\n", req->r_resend_mds); return req->r_resend_mds; } @@ -1307,7 +1314,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc, rcu_read_lock(); inode = get_nonsnap_parent(req->r_dentry); rcu_read_unlock(); - dout("%s using snapdir's parent %p\n", __func__, inode); + doutc(cl, "using snapdir's parent %p %llx.%llx\n", + inode, ceph_vinop(inode)); } } else if (req->r_dentry) { /* ignore race with rename; old or new d_parent is okay */ @@ -1327,7 +1335,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc, /* direct snapped/virtual snapdir requests * based on parent dir inode */ inode = get_nonsnap_parent(parent); - dout("%s using nonsnap parent %p\n", __func__, inode); + doutc(cl, "using nonsnap parent %p %llx.%llx\n", + inode, ceph_vinop(inode)); } else { /* dentry target */ inode = d_inode(req->r_dentry); @@ -1343,10 +1352,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc, rcu_read_unlock(); } - dout("%s %p is_hash=%d (0x%x) mode %d\n", __func__, inode, (int)is_hash, - hash, mode); if (!inode) goto random; + + doutc(cl, "%p %llx.%llx is_hash=%d (0x%x) mode %d\n", inode, + ceph_vinop(inode), (int)is_hash, hash, mode); ci = ceph_inode(inode); if (is_hash && S_ISDIR(inode->i_mode)) { @@ -1362,9 +1372,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, get_random_bytes(&r, 1); r %= frag.ndist; mds = frag.dist[r]; - dout("%s %p %llx.%llx frag %u mds%d (%d/%d)\n", - __func__, inode, ceph_vinop(inode), - frag.frag, mds, (int)r, frag.ndist); + doutc(cl, "%p %llx.%llx frag %u mds%d (%d/%d)\n", + inode, ceph_vinop(inode), frag.frag, + mds, (int)r, frag.ndist); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= CEPH_MDS_STATE_ACTIVE && !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) @@ -1377,9 +1387,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc, if (frag.mds >= 0) { /* choose auth mds */ mds = frag.mds; - dout("%s %p %llx.%llx frag %u mds%d (auth)\n", - __func__, inode, ceph_vinop(inode), - frag.frag, mds); + doutc(cl, "%p %llx.%llx frag %u mds%d (auth)\n", + inode, ceph_vinop(inode), frag.frag, mds); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= CEPH_MDS_STATE_ACTIVE) { if (!ceph_mdsmap_is_laggy(mdsc->mdsmap, @@ -1403,9 +1412,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, goto random; } mds = cap->session->s_mds; - dout("%s %p %llx.%llx mds%d (%scap %p)\n", __func__, - inode, ceph_vinop(inode), mds, - cap == ci->i_auth_cap ? "auth " : "", cap); + doutc(cl, "%p %llx.%llx mds%d (%scap %p)\n", inode, + ceph_vinop(inode), mds, + cap == ci->i_auth_cap ? "auth " : "", cap); spin_unlock(&ci->i_ceph_lock); out: iput(inode); @@ -1416,7 +1425,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, *random = true; mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap); - dout("%s chose random mds%d\n", __func__, mds); + doutc(cl, "chose random mds%d\n", mds); return mds; } @@ -1529,6 +1538,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 int metadata_key_count = 0; struct ceph_options *opt = mdsc->fsc->client->options; struct ceph_mount_options *fsopt = mdsc->fsc->mount_options; + struct ceph_client *cl = mdsc->fsc->client; size_t size, count; void *p, *end; int ret; @@ -1567,7 +1577,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, GFP_NOFS, false); if (!msg) { - pr_err("ENOMEM creating session open msg\n"); + pr_err_client(cl, "ENOMEM creating session open msg\n"); return ERR_PTR(-ENOMEM); } p = msg->front.iov_base; @@ -1607,14 +1617,14 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 ret = encode_supported_features(&p, end); if (ret) { - pr_err("encode_supported_features failed!\n"); + pr_err_client(cl, "encode_supported_features failed!\n"); ceph_msg_put(msg); return ERR_PTR(ret); } ret = encode_metric_spec(&p, end); if (ret) { - pr_err("encode_metric_spec failed!\n"); + pr_err_client(cl, "encode_metric_spec failed!\n"); ceph_msg_put(msg); return ERR_PTR(ret); } @@ -1642,8 +1652,8 @@ static int __open_session(struct ceph_mds_client *mdsc, /* wait for mds to go active? */ mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); - dout("open_session to mds%d (%s)\n", mds, - ceph_mds_state_name(mstate)); + doutc(mdsc->fsc->client, "open_session to mds%d (%s)\n", mds, + ceph_mds_state_name(mstate)); session->s_state = CEPH_MDS_SESSION_OPENING; session->s_renew_requested = jiffies; @@ -1686,8 +1696,9 @@ struct ceph_mds_session * ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target) { struct ceph_mds_session *session; + struct ceph_client *cl = mdsc->fsc->client; - dout("open_export_target_session to mds%d\n", target); + doutc(cl, "to mds%d\n", target); mutex_lock(&mdsc->mutex); session = __open_export_target_session(mdsc, target); @@ -1702,13 +1713,14 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc, struct ceph_mds_info *mi; struct ceph_mds_session *ts; int i, mds = session->s_mds; + struct ceph_client *cl = mdsc->fsc->client; if (mds >= mdsc->mdsmap->possible_max_rank) return; mi = &mdsc->mdsmap->m_info[mds]; - dout("open_export_target_sessions for mds%d (%d targets)\n", - session->s_mds, mi->num_export_targets); + doutc(cl, "for mds%d (%d targets)\n", session->s_mds, + mi->num_export_targets); for (i = 0; i < mi->num_export_targets; i++) { ts = __open_export_target_session(mdsc, mi->export_targets[i]); @@ -1731,11 +1743,13 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, static void detach_cap_releases(struct ceph_mds_session *session, struct list_head *target) { + struct ceph_client *cl = session->s_mdsc->fsc->client; + lockdep_assert_held(&session->s_cap_lock); list_splice_init(&session->s_cap_releases, target); session->s_num_cap_releases = 0; - dout("dispose_cap_releases mds%d\n", session->s_mds); + doutc(cl, "mds%d\n", session->s_mds); } static void dispose_cap_releases(struct ceph_mds_client *mdsc, @@ -1753,16 +1767,17 @@ static void dispose_cap_releases(struct ceph_mds_client *mdsc, static void cleanup_session_requests(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct rb_node *p; - dout("cleanup_session_requests mds%d\n", session->s_mds); + doutc(cl, "mds%d\n", session->s_mds); mutex_lock(&mdsc->mutex); while (!list_empty(&session->s_unsafe)) { req = list_first_entry(&session->s_unsafe, struct ceph_mds_request, r_unsafe_item); - pr_warn_ratelimited(" dropping unsafe request %llu\n", - req->r_tid); + pr_warn_ratelimited_client(cl, " dropping unsafe request %llu\n", + req->r_tid); if (req->r_target_inode) mapping_set_error(req->r_target_inode->i_mapping, -EIO); if (req->r_unsafe_dir) @@ -1791,13 +1806,14 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session, int (*cb)(struct inode *, int mds, void *), void *arg) { + struct ceph_client *cl = session->s_mdsc->fsc->client; struct list_head *p; struct ceph_cap *cap; struct inode *inode, *last_inode = NULL; struct ceph_cap *old_cap = NULL; int ret; - dout("iterate_session_caps %p mds%d\n", session, session->s_mds); + doutc(cl, "%p mds%d\n", session, session->s_mds); spin_lock(&session->s_cap_lock); p = session->s_caps.next; while (p != &session->s_caps) { @@ -1828,8 +1844,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session, spin_lock(&session->s_cap_lock); p = p->next; if (!cap->ci) { - dout("iterate_session_caps finishing cap %p removal\n", - cap); + doutc(cl, "finishing cap %p removal\n", cap); BUG_ON(cap->session != session); cap->session = NULL; list_del_init(&cap->session_caps); @@ -1858,6 +1873,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session, static int remove_session_caps_cb(struct inode *inode, int mds, void *arg) { struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_client *cl = ceph_inode_to_client(inode); bool invalidate = false; struct ceph_cap *cap; int iputs = 0; @@ -1865,8 +1881,8 @@ static int remove_session_caps_cb(struct inode *inode, int mds, void *arg) spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); if (cap) { - dout(" removing cap %p, ci is %p, inode is %p\n", - cap, ci, &ci->netfs.inode); + doutc(cl, " removing cap %p, ci is %p, inode is %p\n", + cap, ci, &ci->netfs.inode); iputs = ceph_purge_inode_cap(inode, cap, &invalidate); } @@ -1890,7 +1906,7 @@ static void remove_session_caps(struct ceph_mds_session *session) struct super_block *sb = fsc->sb; LIST_HEAD(dispose); - dout("remove_session_caps on %p\n", session); + doutc(fsc->client, "on %p\n", session); ceph_iterate_session_caps(session, remove_session_caps_cb, fsc); wake_up_all(&fsc->mdsc->cap_flushing_wq); @@ -1971,7 +1987,9 @@ static int wake_up_session_cb(struct inode *inode, int mds, void *arg) static void wake_up_session_caps(struct ceph_mds_session *session, int ev) { - dout("wake_up_session_caps %p mds%d\n", session, session->s_mds); + struct ceph_client *cl = session->s_mdsc->fsc->client; + + doutc(cl, "session %p mds%d\n", session, session->s_mds); ceph_iterate_session_caps(session, wake_up_session_cb, (void *)(unsigned long)ev); } @@ -1985,25 +2003,26 @@ static void wake_up_session_caps(struct ceph_mds_session *session, int ev) static int send_renew_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg; int state; if (time_after_eq(jiffies, session->s_cap_ttl) && time_after_eq(session->s_cap_ttl, session->s_renew_requested)) - pr_info("mds%d caps stale\n", session->s_mds); + pr_info_client(cl, "mds%d caps stale\n", session->s_mds); session->s_renew_requested = jiffies; /* do not try to renew caps until a recovering mds has reconnected * with its clients. */ state = ceph_mdsmap_get_state(mdsc->mdsmap, session->s_mds); if (state < CEPH_MDS_STATE_RECONNECT) { - dout("send_renew_caps ignoring mds%d (%s)\n", - session->s_mds, ceph_mds_state_name(state)); + doutc(cl, "ignoring mds%d (%s)\n", session->s_mds, + ceph_mds_state_name(state)); return 0; } - dout("send_renew_caps to mds%d (%s)\n", session->s_mds, - ceph_mds_state_name(state)); + doutc(cl, "to mds%d (%s)\n", session->s_mds, + ceph_mds_state_name(state)); msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, ++session->s_renew_seq); if (!msg) @@ -2015,10 +2034,11 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, static int send_flushmsg_ack(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, u64 seq) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg; - dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n", - session->s_mds, ceph_session_state_name(session->s_state), seq); + doutc(cl, "to mds%d (%s)s seq %lld\n", session->s_mds, + ceph_session_state_name(session->s_state), seq); msg = ceph_create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq); if (!msg) return -ENOMEM; @@ -2035,6 +2055,7 @@ static int send_flushmsg_ack(struct ceph_mds_client *mdsc, static void renewed_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, int is_renew) { + struct ceph_client *cl = mdsc->fsc->client; int was_stale; int wake = 0; @@ -2046,15 +2067,17 @@ static void renewed_caps(struct ceph_mds_client *mdsc, if (was_stale) { if (time_before(jiffies, session->s_cap_ttl)) { - pr_info("mds%d caps renewed\n", session->s_mds); + pr_info_client(cl, "mds%d caps renewed\n", + session->s_mds); wake = 1; } else { - pr_info("mds%d caps still stale\n", session->s_mds); + pr_info_client(cl, "mds%d caps still stale\n", + session->s_mds); } } - dout("renewed_caps mds%d ttl now %lu, was %s, now %s\n", - session->s_mds, session->s_cap_ttl, was_stale ? "stale" : "fresh", - time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh"); + doutc(cl, "mds%d ttl now %lu, was %s, now %s\n", session->s_mds, + session->s_cap_ttl, was_stale ? "stale" : "fresh", + time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh"); spin_unlock(&session->s_cap_lock); if (wake) @@ -2066,11 +2089,11 @@ static void renewed_caps(struct ceph_mds_client *mdsc, */ static int request_close_session(struct ceph_mds_session *session) { + struct ceph_client *cl = session->s_mdsc->fsc->client; struct ceph_msg *msg; - dout("request_close_session mds%d state %s seq %lld\n", - session->s_mds, ceph_session_state_name(session->s_state), - session->s_seq); + doutc(cl, "mds%d state %s seq %lld\n", session->s_mds, + ceph_session_state_name(session->s_state), session->s_seq); msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); if (!msg) @@ -2127,6 +2150,7 @@ static bool drop_negative_children(struct dentry *dentry) static int trim_caps_cb(struct inode *inode, int mds, void *arg) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; int *remaining = arg; struct ceph_inode_info *ci = ceph_inode(inode); int used, wanted, oissued, mine; @@ -2146,9 +2170,10 @@ static int trim_caps_cb(struct inode *inode, int mds, void *arg) wanted = __ceph_caps_file_wanted(ci); oissued = __ceph_caps_issued_other(ci, cap); - dout("trim_caps_cb %p cap %p mine %s oissued %s used %s wanted %s\n", - inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued), - ceph_cap_string(used), ceph_cap_string(wanted)); + doutc(cl, "%p %llx.%llx cap %p mine %s oissued %s used %s wanted %s\n", + inode, ceph_vinop(inode), cap, ceph_cap_string(mine), + ceph_cap_string(oissued), ceph_cap_string(used), + ceph_cap_string(wanted)); if (cap == ci->i_auth_cap) { if (ci->i_dirty_caps || ci->i_flushing_caps || !list_empty(&ci->i_cap_snaps)) @@ -2188,8 +2213,8 @@ static int trim_caps_cb(struct inode *inode, int mds, void *arg) count = atomic_read(&inode->i_count); if (count == 1) (*remaining)--; - dout("trim_caps_cb %p cap %p pruned, count now %d\n", - inode, cap, count); + doutc(cl, "%p %llx.%llx cap %p pruned, count now %d\n", + inode, ceph_vinop(inode), cap, count); } else { dput(dentry); } @@ -2208,17 +2233,18 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, int max_caps) { + struct ceph_client *cl = mdsc->fsc->client; int trim_caps = session->s_nr_caps - max_caps; - dout("trim_caps mds%d start: %d / %d, trim %d\n", - session->s_mds, session->s_nr_caps, max_caps, trim_caps); + doutc(cl, "mds%d start: %d / %d, trim %d\n", session->s_mds, + session->s_nr_caps, max_caps, trim_caps); if (trim_caps > 0) { int remaining = trim_caps; ceph_iterate_session_caps(session, trim_caps_cb, &remaining); - dout("trim_caps mds%d done: %d / %d, trimmed %d\n", - session->s_mds, session->s_nr_caps, max_caps, - trim_caps - remaining); + doutc(cl, "mds%d done: %d / %d, trimmed %d\n", + session->s_mds, session->s_nr_caps, max_caps, + trim_caps - remaining); } ceph_flush_cap_releases(mdsc, session); @@ -2228,6 +2254,7 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc, static int check_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_tid) { + struct ceph_client *cl = mdsc->fsc->client; int ret = 1; spin_lock(&mdsc->cap_dirty_lock); @@ -2236,8 +2263,8 @@ static int check_caps_flush(struct ceph_mds_client *mdsc, list_first_entry(&mdsc->cap_flush_list, struct ceph_cap_flush, g_list); if (cf->tid <= want_flush_tid) { - dout("check_caps_flush still flushing tid " - "%llu <= %llu\n", cf->tid, want_flush_tid); + doutc(cl, "still flushing tid %llu <= %llu\n", + cf->tid, want_flush_tid); ret = 0; } } @@ -2253,12 +2280,14 @@ static int check_caps_flush(struct ceph_mds_client *mdsc, static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_tid) { - dout("check_caps_flush want %llu\n", want_flush_tid); + struct ceph_client *cl = mdsc->fsc->client; + + doutc(cl, "want %llu\n", want_flush_tid); wait_event(mdsc->cap_flushing_wq, check_caps_flush(mdsc, want_flush_tid)); - dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid); + doutc(cl, "ok, flushed thru %llu\n", want_flush_tid); } /* @@ -2267,6 +2296,7 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc, static void ceph_send_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg = NULL; struct ceph_mds_cap_release *head; struct ceph_mds_cap_item *item; @@ -2325,7 +2355,7 @@ static void ceph_send_cap_releases(struct ceph_mds_client *mdsc, msg->front.iov_len += sizeof(*cap_barrier); msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - dout("send_cap_releases mds%d %p\n", session->s_mds, msg); + doutc(cl, "mds%d %p\n", session->s_mds, msg); ceph_con_send(&session->s_con, msg); msg = NULL; } @@ -2345,13 +2375,13 @@ static void ceph_send_cap_releases(struct ceph_mds_client *mdsc, msg->front.iov_len += sizeof(*cap_barrier); msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - dout("send_cap_releases mds%d %p\n", session->s_mds, msg); + doutc(cl, "mds%d %p\n", session->s_mds, msg); ceph_con_send(&session->s_con, msg); } return; out_err: - pr_err("send_cap_releases mds%d, failed to allocate message\n", - session->s_mds); + pr_err_client(cl, "mds%d, failed to allocate message\n", + session->s_mds); spin_lock(&session->s_cap_lock); list_splice(&tmp_list, &session->s_cap_releases); session->s_num_cap_releases += num_cap_releases; @@ -2374,16 +2404,17 @@ static void ceph_cap_release_work(struct work_struct *work) void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; if (mdsc->stopping) return; ceph_get_mds_session(session); if (queue_work(mdsc->fsc->cap_wq, &session->s_cap_release_work)) { - dout("cap release work queued\n"); + doutc(cl, "cap release work queued\n"); } else { ceph_put_mds_session(session); - dout("failed to queue cap release work\n"); + doutc(cl, "failed to queue cap release work\n"); } } @@ -2411,13 +2442,14 @@ static void ceph_cap_reclaim_work(struct work_struct *work) void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; if (mdsc->stopping) return; if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_reclaim_work)) { - dout("caps reclaim work queued\n"); + doutc(cl, "caps reclaim work queued\n"); } else { - dout("failed to queue caps release work\n"); + doutc(cl, "failed to queue caps release work\n"); } } @@ -2612,6 +2644,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, int *plen, u64 *pbase, int for_wire) { + struct ceph_client *cl = mdsc->fsc->client; struct dentry *cur; struct inode *inode; char *path; @@ -2637,8 +2670,7 @@ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, spin_lock(&cur->d_lock); inode = d_inode(cur); if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { - dout("build_path path+%d: %p SNAPDIR\n", - pos, cur); + doutc(cl, "path+%d: %p SNAPDIR\n", pos, cur); spin_unlock(&cur->d_lock); parent = dget_parent(cur); } else if (for_wire && inode && dentry != cur && @@ -2716,15 +2748,15 @@ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry, * A rename didn't occur, but somehow we didn't end up where * we thought we would. Throw a warning and try again. */ - pr_warn("build_path did not end path lookup where expected (pos = %d)\n", - pos); + pr_warn_client(cl, "did not end path lookup where expected (pos = %d)\n", + pos); goto retry; } *pbase = base; *plen = PATH_MAX - 1 - pos; - dout("build_path on %p %d built %llx '%.*s'\n", - dentry, d_count(dentry), base, *plen, path + pos); + doutc(cl, "on %p %d built %llx '%.*s'\n", dentry, d_count(dentry), + base, *plen, path + pos); return path + pos; } @@ -2787,22 +2819,22 @@ static int set_request_path_attr(struct ceph_mds_client *mdsc, struct inode *rin int *pathlen, u64 *ino, bool *freepath, bool parent_locked) { + struct ceph_client *cl = mdsc->fsc->client; int r = 0; if (rinode) { r = build_inode_path(rinode, ppath, pathlen, ino, freepath); - dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode), - ceph_snap(rinode)); + doutc(cl, " inode %p %llx.%llx\n", rinode, ceph_ino(rinode), + ceph_snap(rinode)); } else if (rdentry) { r = build_dentry_path(mdsc, rdentry, rdiri, ppath, pathlen, ino, freepath, parent_locked); - dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, - *ppath); + doutc(cl, " dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); } else if (rpath || rino) { *ino = rino; *ppath = rpath; *pathlen = rpath ? strlen(rpath) : 0; - dout(" path %.*s\n", *pathlen, rpath); + doutc(cl, " path %.*s\n", *pathlen, rpath); } return r; @@ -3103,6 +3135,7 @@ static int __prepare_send_request(struct ceph_mds_session *session, { int mds = session->s_mds; struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request_head_legacy *lhead; struct ceph_mds_request_head *nhead; struct ceph_msg *msg; @@ -3121,8 +3154,8 @@ static int __prepare_send_request(struct ceph_mds_session *session, old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE); if ((old_version && req->r_attempts >= old_max_retry) || ((uint32_t)req->r_attempts >= U32_MAX)) { - pr_warn_ratelimited("%s request tid %llu seq overflow\n", - __func__, req->r_tid); + pr_warn_ratelimited_client(cl, "request tid %llu seq overflow\n", + req->r_tid); return -EMULTIHOP; } } @@ -3137,8 +3170,8 @@ static int __prepare_send_request(struct ceph_mds_session *session, else req->r_sent_on_mseq = -1; } - dout("%s %p tid %lld %s (attempt %d)\n", __func__, req, - req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); + doutc(cl, "%p tid %lld %s (attempt %d)\n", req, req->r_tid, + ceph_mds_op_name(req->r_op), req->r_attempts); if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { void *p; @@ -3206,7 +3239,7 @@ static int __prepare_send_request(struct ceph_mds_session *session, nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1); } - dout(" r_parent = %p\n", req->r_parent); + doutc(cl, " r_parent = %p\n", req->r_parent); return 0; } @@ -3234,6 +3267,7 @@ static int __send_request(struct ceph_mds_session *session, static void __do_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_session *session = NULL; int mds = -1; int err = 0; @@ -3246,29 +3280,29 @@ static void __do_request(struct ceph_mds_client *mdsc, } if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) { - dout("do_request metadata corrupted\n"); + doutc(cl, "metadata corrupted\n"); err = -EIO; goto finish; } if (req->r_timeout && time_after_eq(jiffies, req->r_started + req->r_timeout)) { - dout("do_request timed out\n"); + doutc(cl, "timed out\n"); err = -ETIMEDOUT; goto finish; } if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { - dout("do_request forced umount\n"); + doutc(cl, "forced umount\n"); err = -EIO; goto finish; } if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) { if (mdsc->mdsmap_err) { err = mdsc->mdsmap_err; - dout("do_request mdsmap err %d\n", err); + doutc(cl, "mdsmap err %d\n", err); goto finish; } if (mdsc->mdsmap->m_epoch == 0) { - dout("do_request no mdsmap, waiting for map\n"); + doutc(cl, "no mdsmap, waiting for map\n"); list_add(&req->r_wait, &mdsc->waiting_for_map); return; } @@ -3289,7 +3323,7 @@ static void __do_request(struct ceph_mds_client *mdsc, err = -EJUKEBOX; goto finish; } - dout("do_request no mds or not active, waiting for map\n"); + doutc(cl, "no mds or not active, waiting for map\n"); list_add(&req->r_wait, &mdsc->waiting_for_map); return; } @@ -3305,8 +3339,8 @@ static void __do_request(struct ceph_mds_client *mdsc, } req->r_session = ceph_get_mds_session(session); - dout("do_request mds%d session %p state %s\n", mds, session, - ceph_session_state_name(session->s_state)); + doutc(cl, "mds%d session %p state %s\n", mds, session, + ceph_session_state_name(session->s_state)); /* * The old ceph will crash the MDSs when see unknown OPs @@ -3397,8 +3431,8 @@ static void __do_request(struct ceph_mds_client *mdsc, spin_lock(&ci->i_ceph_lock); cap = ci->i_auth_cap; if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE && mds != cap->mds) { - dout("do_request session changed for auth cap %d -> %d\n", - cap->session->s_mds, session->s_mds); + doutc(cl, "session changed for auth cap %d -> %d\n", + cap->session->s_mds, session->s_mds); /* Remove the auth cap from old session */ spin_lock(&cap->session->s_cap_lock); @@ -3425,7 +3459,7 @@ static void __do_request(struct ceph_mds_client *mdsc, ceph_put_mds_session(session); finish: if (err) { - dout("__do_request early error %d\n", err); + doutc(cl, "early error %d\n", err); req->r_err = err; complete_request(mdsc, req); __unregister_request(mdsc, req); @@ -3439,6 +3473,7 @@ static void __do_request(struct ceph_mds_client *mdsc, static void __wake_requests(struct ceph_mds_client *mdsc, struct list_head *head) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; LIST_HEAD(tmp_list); @@ -3448,7 +3483,8 @@ static void __wake_requests(struct ceph_mds_client *mdsc, req = list_entry(tmp_list.next, struct ceph_mds_request, r_wait); list_del_init(&req->r_wait); - dout(" wake request %p tid %llu\n", req, req->r_tid); + doutc(cl, " wake request %p tid %llu\n", req, + req->r_tid); __do_request(mdsc, req); } } @@ -3459,10 +3495,11 @@ static void __wake_requests(struct ceph_mds_client *mdsc, */ static void kick_requests(struct ceph_mds_client *mdsc, int mds) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct rb_node *p = rb_first(&mdsc->request_tree); - dout("kick_requests mds%d\n", mds); + doutc(cl, "kick_requests mds%d\n", mds); while (p) { req = rb_entry(p, struct ceph_mds_request, r_node); p = rb_next(p); @@ -3472,7 +3509,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) continue; /* only new requests */ if (req->r_session && req->r_session->s_mds == mds) { - dout(" kicking tid %llu\n", req->r_tid); + doutc(cl, " kicking tid %llu\n", req->r_tid); list_del_init(&req->r_wait); __do_request(mdsc, req); } @@ -3482,6 +3519,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, struct ceph_mds_request *req) { + struct ceph_client *cl = mdsc->fsc->client; int err = 0; /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */ @@ -3503,8 +3541,7 @@ int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, if (req->r_inode) { err = ceph_wait_on_async_create(req->r_inode); if (err) { - dout("%s: wait for async create returned: %d\n", - __func__, err); + doutc(cl, "wait for async create returned: %d\n", err); return err; } } @@ -3512,13 +3549,12 @@ int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, if (!err && req->r_old_inode) { err = ceph_wait_on_async_create(req->r_old_inode); if (err) { - dout("%s: wait for async create returned: %d\n", - __func__, err); + doutc(cl, "wait for async create returned: %d\n", err); return err; } } - dout("submit_request on %p for inode %p\n", req, dir); + doutc(cl, "submit_request on %p for inode %p\n", req, dir); mutex_lock(&mdsc->mutex); __register_request(mdsc, req, dir); __do_request(mdsc, req); @@ -3531,10 +3567,11 @@ int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc, struct ceph_mds_request *req, ceph_mds_request_wait_callback_t wait_func) { + struct ceph_client *cl = mdsc->fsc->client; int err; /* wait */ - dout("do_request waiting\n"); + doutc(cl, "do_request waiting\n"); if (wait_func) { err = wait_func(mdsc, req); } else { @@ -3548,14 +3585,14 @@ int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc, else err = timeleft; /* killed */ } - dout("do_request waited, got %d\n", err); + doutc(cl, "do_request waited, got %d\n", err); mutex_lock(&mdsc->mutex); /* only abort if we didn't race with a real reply */ if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { err = le32_to_cpu(req->r_reply_info.head->result); } else if (err < 0) { - dout("aborted request %lld with %d\n", req->r_tid, err); + doutc(cl, "aborted request %lld with %d\n", req->r_tid, err); /* * ensure we aren't running concurrently with @@ -3586,15 +3623,16 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, struct inode *dir, struct ceph_mds_request *req) { + struct ceph_client *cl = mdsc->fsc->client; int err; - dout("do_request on %p\n", req); + doutc(cl, "do_request on %p\n", req); /* issue */ err = ceph_mdsc_submit_request(mdsc, dir, req); if (!err) err = ceph_mdsc_wait_request(mdsc, req, NULL); - dout("do_request %p done, result %d\n", req, err); + doutc(cl, "do_request %p done, result %d\n", req, err); return err; } @@ -3606,8 +3644,10 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req) { struct inode *dir = req->r_parent; struct inode *old_dir = req->r_old_dentry_dir; + struct ceph_client *cl = req->r_mdsc->fsc->client; - dout("invalidate_dir_request %p %p (complete, lease(s))\n", dir, old_dir); + doutc(cl, "invalidate_dir_request %p %p (complete, lease(s))\n", + dir, old_dir); ceph_dir_clear_complete(dir); if (old_dir) @@ -3628,6 +3668,7 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req) static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) { struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; struct ceph_mds_reply_head *head = msg->front.iov_base; struct ceph_mds_reply_info_parsed *rinfo; /* parsed reply info */ @@ -3638,7 +3679,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) bool close_sessions = false; if (msg->front.iov_len < sizeof(*head)) { - pr_err("mdsc_handle_reply got corrupt (short) reply\n"); + pr_err_client(cl, "got corrupt (short) reply\n"); ceph_msg_dump(msg); return; } @@ -3648,17 +3689,17 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_lock(&mdsc->mutex); req = lookup_get_request(mdsc, tid); if (!req) { - dout("handle_reply on unknown tid %llu\n", tid); + doutc(cl, "on unknown tid %llu\n", tid); mutex_unlock(&mdsc->mutex); return; } - dout("handle_reply %p\n", req); + doutc(cl, "handle_reply %p\n", req); /* correct session? */ if (req->r_session != session) { - pr_err("mdsc_handle_reply got %llu on session mds%d" - " not mds%d\n", tid, session->s_mds, - req->r_session ? req->r_session->s_mds : -1); + pr_err_client(cl, "got %llu on session mds%d not mds%d\n", + tid, session->s_mds, + req->r_session ? req->r_session->s_mds : -1); mutex_unlock(&mdsc->mutex); goto out; } @@ -3666,14 +3707,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) /* dup? */ if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) || (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) { - pr_warn("got a dup %s reply on %llu from mds%d\n", - head->safe ? "safe" : "unsafe", tid, mds); + pr_warn_client(cl, "got a dup %s reply on %llu from mds%d\n", + head->safe ? "safe" : "unsafe", tid, mds); mutex_unlock(&mdsc->mutex); goto out; } if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) { - pr_warn("got unsafe after safe on %llu from mds%d\n", - tid, mds); + pr_warn_client(cl, "got unsafe after safe on %llu from mds%d\n", + tid, mds); mutex_unlock(&mdsc->mutex); goto out; } @@ -3696,7 +3737,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) * response. And even if it did, there is nothing * useful we could do with a revised return value. */ - dout("got safe reply %llu, mds%d\n", tid, mds); + doutc(cl, "got safe reply %llu, mds%d\n", tid, mds); mutex_unlock(&mdsc->mutex); goto out; @@ -3706,7 +3747,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); } - dout("handle_reply tid %lld result %d\n", tid, result); + doutc(cl, "tid %lld result %d\n", tid, result); if (test_bit(CEPHFS_FEATURE_REPLY_ENCODING, &session->s_features)) err = parse_reply_info(session, msg, req, (u64)-1); else @@ -3746,7 +3787,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_lock(&session->s_mutex); if (err < 0) { - pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid); + pr_err_client(cl, "got corrupt reply mds%d(tid:%lld)\n", + mds, tid); ceph_msg_dump(msg); goto out_err; } @@ -3810,7 +3852,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags); } } else { - dout("reply arrived after request %lld was aborted\n", tid); + doutc(cl, "reply arrived after request %lld was aborted\n", tid); } mutex_unlock(&mdsc->mutex); @@ -3839,6 +3881,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req; u64 tid = le64_to_cpu(msg->hdr.tid); u32 next_mds; @@ -3856,12 +3899,12 @@ static void handle_forward(struct ceph_mds_client *mdsc, req = lookup_get_request(mdsc, tid); if (!req) { mutex_unlock(&mdsc->mutex); - dout("forward tid %llu to mds%d - req dne\n", tid, next_mds); + doutc(cl, "forward tid %llu to mds%d - req dne\n", tid, next_mds); return; /* dup reply? */ } if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { - dout("forward tid %llu aborted, unregistering\n", tid); + doutc(cl, "forward tid %llu aborted, unregistering\n", tid); __unregister_request(mdsc, req); } else if (fwd_seq <= req->r_num_fwd || (uint32_t)fwd_seq >= U32_MAX) { /* @@ -3877,10 +3920,11 @@ static void handle_forward(struct ceph_mds_client *mdsc, set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); mutex_unlock(&req->r_fill_mutex); aborted = true; - pr_warn_ratelimited("forward tid %llu seq overflow\n", tid); + pr_warn_ratelimited_client(cl, "forward tid %llu seq overflow\n", + tid); } else { /* resend. forward race not possible; mds would drop */ - dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); + doutc(cl, "forward tid %llu to mds%d (we resend)\n", tid, next_mds); BUG_ON(req->r_err); BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)); req->r_attempts = 0; @@ -3898,7 +3942,7 @@ static void handle_forward(struct ceph_mds_client *mdsc, return; bad: - pr_err("mdsc_handle_forward decode error err=%d\n", err); + pr_err_client(cl, "decode error err=%d\n", err); ceph_msg_dump(msg); } @@ -3937,6 +3981,7 @@ static void handle_session(struct ceph_mds_session *session, struct ceph_msg *msg) { struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; int mds = session->s_mds; int msg_version = le16_to_cpu(msg->hdr.version); void *p = msg->front.iov_base; @@ -3984,7 +4029,8 @@ static void handle_session(struct ceph_mds_session *session, /* version >= 5, flags */ ceph_decode_32_safe(&p, end, flags, bad); if (flags & CEPH_SESSION_BLOCKLISTED) { - pr_warn("mds%d session blocklisted\n", session->s_mds); + pr_warn_client(cl, "mds%d session blocklisted\n", + session->s_mds); blocklisted = true; } } @@ -4000,22 +4046,24 @@ static void handle_session(struct ceph_mds_session *session, mutex_lock(&session->s_mutex); - dout("handle_session mds%d %s %p state %s seq %llu\n", - mds, ceph_session_op_name(op), session, - ceph_session_state_name(session->s_state), seq); + doutc(cl, "mds%d %s %p state %s seq %llu\n", mds, + ceph_session_op_name(op), session, + ceph_session_state_name(session->s_state), seq); if (session->s_state == CEPH_MDS_SESSION_HUNG) { session->s_state = CEPH_MDS_SESSION_OPEN; - pr_info("mds%d came back\n", session->s_mds); + pr_info_client(cl, "mds%d came back\n", session->s_mds); } switch (op) { case CEPH_SESSION_OPEN: if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) - pr_info("mds%d reconnect success\n", session->s_mds); + pr_info_client(cl, "mds%d reconnect success\n", + session->s_mds); if (session->s_state == CEPH_MDS_SESSION_OPEN) { - pr_notice("mds%d is already opened\n", session->s_mds); + pr_notice_client(cl, "mds%d is already opened\n", + session->s_mds); } else { session->s_state = CEPH_MDS_SESSION_OPEN; session->s_features = features; @@ -4045,7 +4093,8 @@ static void handle_session(struct ceph_mds_session *session, case CEPH_SESSION_CLOSE: if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) - pr_info("mds%d reconnect denied\n", session->s_mds); + pr_info_client(cl, "mds%d reconnect denied\n", + session->s_mds); session->s_state = CEPH_MDS_SESSION_CLOSED; cleanup_session_requests(mdsc, session); remove_session_caps(session); @@ -4054,8 +4103,8 @@ static void handle_session(struct ceph_mds_session *session, break; case CEPH_SESSION_STALE: - pr_info("mds%d caps went stale, renewing\n", - session->s_mds); + pr_info_client(cl, "mds%d caps went stale, renewing\n", + session->s_mds); atomic_inc(&session->s_cap_gen); session->s_cap_ttl = jiffies - 1; send_renew_caps(mdsc, session); @@ -4076,7 +4125,7 @@ static void handle_session(struct ceph_mds_session *session, break; case CEPH_SESSION_FORCE_RO: - dout("force_session_readonly %p\n", session); + doutc(cl, "force_session_readonly %p\n", session); spin_lock(&session->s_cap_lock); session->s_readonly = true; spin_unlock(&session->s_cap_lock); @@ -4085,7 +4134,8 @@ static void handle_session(struct ceph_mds_session *session, case CEPH_SESSION_REJECT: WARN_ON(session->s_state != CEPH_MDS_SESSION_OPENING); - pr_info("mds%d rejected session\n", session->s_mds); + pr_info_client(cl, "mds%d rejected session\n", + session->s_mds); session->s_state = CEPH_MDS_SESSION_REJECTED; cleanup_session_requests(mdsc, session); remove_session_caps(session); @@ -4095,7 +4145,7 @@ static void handle_session(struct ceph_mds_session *session, break; default: - pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds); + pr_err_client(cl, "bad op %d mds%d\n", op, mds); WARN_ON(1); } @@ -4112,30 +4162,32 @@ static void handle_session(struct ceph_mds_session *session, return; bad: - pr_err("mdsc_handle_session corrupt message mds%d len %d\n", mds, - (int)msg->front.iov_len); + pr_err_client(cl, "corrupt message mds%d len %d\n", mds, + (int)msg->front.iov_len); ceph_msg_dump(msg); return; } void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req) { + struct ceph_client *cl = req->r_mdsc->fsc->client; int dcaps; dcaps = xchg(&req->r_dir_caps, 0); if (dcaps) { - dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); + doutc(cl, "releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps); } } void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req) { + struct ceph_client *cl = req->r_mdsc->fsc->client; int dcaps; dcaps = xchg(&req->r_dir_caps, 0); if (dcaps) { - dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); + doutc(cl, "releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent), dcaps); } @@ -4150,7 +4202,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, struct ceph_mds_request *req, *nreq; struct rb_node *p; - dout("replay_unsafe_requests mds%d\n", session->s_mds); + doutc(mdsc->fsc->client, "mds%d\n", session->s_mds); mutex_lock(&mdsc->mutex); list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) @@ -4295,6 +4347,7 @@ static struct dentry* d_find_primary(struct inode *inode) static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = ceph_inode_to_client(inode); union { struct ceph_mds_cap_reconnect v2; struct ceph_mds_cap_reconnect_v1 v1; @@ -4331,9 +4384,9 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) err = 0; goto out_err; } - dout(" adding %p ino %llx.%llx cap %p %lld %s\n", - inode, ceph_vinop(inode), cap, cap->cap_id, - ceph_cap_string(cap->issued)); + doutc(cl, " adding %p ino %llx.%llx cap %p %lld %s\n", inode, + ceph_vinop(inode), cap, cap->cap_id, + ceph_cap_string(cap->issued)); cap->seq = 0; /* reset cap seq */ cap->issue_seq = 0; /* and issue_seq */ @@ -4483,6 +4536,7 @@ static int encode_snap_realms(struct ceph_mds_client *mdsc, { struct rb_node *p; struct ceph_pagelist *pagelist = recon_state->pagelist; + struct ceph_client *cl = mdsc->fsc->client; int err = 0; if (recon_state->msg_version >= 4) { @@ -4521,8 +4575,8 @@ static int encode_snap_realms(struct ceph_mds_client *mdsc, ceph_pagelist_encode_32(pagelist, sizeof(sr_rec)); } - dout(" adding snap realm %llx seq %lld parent %llx\n", - realm->ino, realm->seq, realm->parent_ino); + doutc(cl, " adding snap realm %llx seq %lld parent %llx\n", + realm->ino, realm->seq, realm->parent_ino); sr_rec.ino = cpu_to_le64(realm->ino); sr_rec.seq = cpu_to_le64(realm->seq); sr_rec.parent = cpu_to_le64(realm->parent_ino); @@ -4551,6 +4605,7 @@ static int encode_snap_realms(struct ceph_mds_client *mdsc, static void send_mds_reconnect(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *reply; int mds = session->s_mds; int err = -ENOMEM; @@ -4559,7 +4614,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, }; LIST_HEAD(dispose); - pr_info("mds%d reconnect start\n", mds); + pr_info_client(cl, "mds%d reconnect start\n", mds); recon_state.pagelist = ceph_pagelist_alloc(GFP_NOFS); if (!recon_state.pagelist) @@ -4575,8 +4630,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, session->s_state = CEPH_MDS_SESSION_RECONNECTING; session->s_seq = 0; - dout("session %p state %s\n", session, - ceph_session_state_name(session->s_state)); + doutc(cl, "session %p state %s\n", session, + ceph_session_state_name(session->s_state)); atomic_inc(&session->s_cap_gen); @@ -4710,7 +4765,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, fail_nomsg: ceph_pagelist_release(recon_state.pagelist); fail_nopagelist: - pr_err("error %d preparing reconnect for mds%d\n", err, mds); + pr_err_client(cl, "error %d preparing reconnect for mds%d\n", + err, mds); return; } @@ -4729,9 +4785,9 @@ static void check_new_map(struct ceph_mds_client *mdsc, int oldstate, newstate; struct ceph_mds_session *s; unsigned long targets[DIV_ROUND_UP(CEPH_MAX_MDS, sizeof(unsigned long))] = {0}; + struct ceph_client *cl = mdsc->fsc->client; - dout("check_new_map new %u old %u\n", - newmap->m_epoch, oldmap->m_epoch); + doutc(cl, "new %u old %u\n", newmap->m_epoch, oldmap->m_epoch); if (newmap->m_info) { for (i = 0; i < newmap->possible_max_rank; i++) { @@ -4747,12 +4803,12 @@ static void check_new_map(struct ceph_mds_client *mdsc, oldstate = ceph_mdsmap_get_state(oldmap, i); newstate = ceph_mdsmap_get_state(newmap, i); - dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n", - i, ceph_mds_state_name(oldstate), - ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "", - ceph_mds_state_name(newstate), - ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", - ceph_session_state_name(s->s_state)); + doutc(cl, "mds%d state %s%s -> %s%s (session %s)\n", + i, ceph_mds_state_name(oldstate), + ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "", + ceph_mds_state_name(newstate), + ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", + ceph_session_state_name(s->s_state)); if (i >= newmap->possible_max_rank) { /* force close session for stopped mds */ @@ -4805,7 +4861,8 @@ static void check_new_map(struct ceph_mds_client *mdsc, newstate >= CEPH_MDS_STATE_ACTIVE) { if (oldstate != CEPH_MDS_STATE_CREATING && oldstate != CEPH_MDS_STATE_STARTING) - pr_info("mds%d recovery completed\n", s->s_mds); + pr_info_client(cl, "mds%d recovery completed\n", + s->s_mds); kick_requests(mdsc, i); mutex_unlock(&mdsc->mutex); mutex_lock(&s->s_mutex); @@ -4849,12 +4906,13 @@ static void check_new_map(struct ceph_mds_client *mdsc, s = __open_export_target_session(mdsc, i); if (IS_ERR(s)) { err = PTR_ERR(s); - pr_err("failed to open export target session, err %d\n", - err); + pr_err_client(cl, + "failed to open export target session, err %d\n", + err); continue; } } - dout("send reconnect to export target mds.%d\n", i); + doutc(cl, "send reconnect to export target mds.%d\n", i); mutex_unlock(&mdsc->mutex); send_mds_reconnect(mdsc, s); ceph_put_mds_session(s); @@ -4870,8 +4928,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, if (s->s_state == CEPH_MDS_SESSION_OPEN || s->s_state == CEPH_MDS_SESSION_HUNG || s->s_state == CEPH_MDS_SESSION_CLOSING) { - dout(" connecting to export targets of laggy mds%d\n", - i); + doutc(cl, " connecting to export targets of laggy mds%d\n", i); __open_export_target_sessions(mdsc, s); } } @@ -4898,6 +4955,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) { + struct ceph_client *cl = mdsc->fsc->client; struct super_block *sb = mdsc->fsc->sb; struct inode *inode; struct dentry *parent, *dentry; @@ -4909,7 +4967,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, struct qstr dname; int release = 0; - dout("handle_lease from mds%d\n", mds); + doutc(cl, "from mds%d\n", mds); if (!ceph_inc_mds_stopping_blocker(mdsc, session)) return; @@ -4927,20 +4985,19 @@ static void handle_lease(struct ceph_mds_client *mdsc, /* lookup inode */ inode = ceph_find_inode(sb, vino); - dout("handle_lease %s, ino %llx %p %.*s\n", - ceph_lease_op_name(h->action), vino.ino, inode, - dname.len, dname.name); + doutc(cl, "%s, ino %llx %p %.*s\n", ceph_lease_op_name(h->action), + vino.ino, inode, dname.len, dname.name); mutex_lock(&session->s_mutex); if (!inode) { - dout("handle_lease no inode %llx\n", vino.ino); + doutc(cl, "no inode %llx\n", vino.ino); goto release; } /* dentry */ parent = d_find_alias(inode); if (!parent) { - dout("no parent dentry on inode %p\n", inode); + doutc(cl, "no parent dentry on inode %p\n", inode); WARN_ON(1); goto release; /* hrm... */ } @@ -5000,7 +5057,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, bad: ceph_dec_mds_stopping_blocker(mdsc); - pr_err("corrupt lease message\n"); + pr_err_client(cl, "corrupt lease message\n"); ceph_msg_dump(msg); } @@ -5008,13 +5065,14 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, struct dentry *dentry, char action, u32 seq) { + struct ceph_client *cl = session->s_mdsc->fsc->client; struct ceph_msg *msg; struct ceph_mds_lease *lease; struct inode *dir; int len = sizeof(*lease) + sizeof(u32) + NAME_MAX; - dout("lease_send_msg identry %p %s to mds%d\n", - dentry, ceph_lease_op_name(action), session->s_mds); + doutc(cl, "identry %p %s to mds%d\n", dentry, ceph_lease_op_name(action), + session->s_mds); msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false); if (!msg) @@ -5047,6 +5105,7 @@ static void lock_unlock_session(struct ceph_mds_session *s) static void maybe_recover_session(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_fs_client *fsc = mdsc->fsc; if (!ceph_test_mount_opt(fsc, CLEANRECOVER)) @@ -5058,17 +5117,19 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc) if (!READ_ONCE(fsc->blocklisted)) return; - pr_info("auto reconnect after blocklisted\n"); + pr_info_client(cl, "auto reconnect after blocklisted\n"); ceph_force_reconnect(fsc->sb); } bool check_session_state(struct ceph_mds_session *s) { + struct ceph_client *cl = s->s_mdsc->fsc->client; + switch (s->s_state) { case CEPH_MDS_SESSION_OPEN: if (s->s_ttl && time_after(jiffies, s->s_ttl)) { s->s_state = CEPH_MDS_SESSION_HUNG; - pr_info("mds%d hung\n", s->s_mds); + pr_info_client(cl, "mds%d hung\n", s->s_mds); } break; case CEPH_MDS_SESSION_CLOSING: @@ -5088,6 +5149,8 @@ bool check_session_state(struct ceph_mds_session *s) */ void inc_session_sequence(struct ceph_mds_session *s) { + struct ceph_client *cl = s->s_mdsc->fsc->client; + lockdep_assert_held(&s->s_mutex); s->s_seq++; @@ -5095,11 +5158,11 @@ void inc_session_sequence(struct ceph_mds_session *s) if (s->s_state == CEPH_MDS_SESSION_CLOSING) { int ret; - dout("resending session close request for mds%d\n", s->s_mds); + doutc(cl, "resending session close request for mds%d\n", s->s_mds); ret = request_close_session(s); if (ret < 0) - pr_err("unable to close session to mds%d: %d\n", - s->s_mds, ret); + pr_err_client(cl, "unable to close session to mds%d: %d\n", + s->s_mds, ret); } } @@ -5128,7 +5191,7 @@ static void delayed_work(struct work_struct *work) int renew_caps; int i; - dout("mdsc delayed_work\n"); + doutc(mdsc->fsc->client, "mdsc delayed_work\n"); if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED) return; @@ -5257,6 +5320,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) */ static void wait_requests(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_options *opts = mdsc->fsc->client->options; struct ceph_mds_request *req; @@ -5264,25 +5328,25 @@ static void wait_requests(struct ceph_mds_client *mdsc) if (__get_oldest_req(mdsc)) { mutex_unlock(&mdsc->mutex); - dout("wait_requests waiting for requests\n"); + doutc(cl, "waiting for requests\n"); wait_for_completion_timeout(&mdsc->safe_umount_waiters, ceph_timeout_jiffies(opts->mount_timeout)); /* tear down remaining requests */ mutex_lock(&mdsc->mutex); while ((req = __get_oldest_req(mdsc))) { - dout("wait_requests timed out on tid %llu\n", - req->r_tid); + doutc(cl, "timed out on tid %llu\n", req->r_tid); list_del_init(&req->r_wait); __unregister_request(mdsc, req); } } mutex_unlock(&mdsc->mutex); - dout("wait_requests done\n"); + doutc(cl, "done\n"); } void send_flush_mdlog(struct ceph_mds_session *s) { + struct ceph_client *cl = s->s_mdsc->fsc->client; struct ceph_msg *msg; /* @@ -5292,13 +5356,13 @@ void send_flush_mdlog(struct ceph_mds_session *s) return; mutex_lock(&s->s_mutex); - dout("request mdlog flush to mds%d (%s)s seq %lld\n", s->s_mds, - ceph_session_state_name(s->s_state), s->s_seq); + doutc(cl, "request mdlog flush to mds%d (%s)s seq %lld\n", + s->s_mds, ceph_session_state_name(s->s_state), s->s_seq); msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_FLUSH_MDLOG, s->s_seq); if (!msg) { - pr_err("failed to request mdlog flush to mds%d (%s) seq %lld\n", - s->s_mds, ceph_session_state_name(s->s_state), s->s_seq); + pr_err_client(cl, "failed to request mdlog flush to mds%d (%s) seq %lld\n", + s->s_mds, ceph_session_state_name(s->s_state), s->s_seq); } else { ceph_con_send(&s->s_con, msg); } @@ -5311,7 +5375,7 @@ void send_flush_mdlog(struct ceph_mds_session *s) */ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) { - dout("pre_umount\n"); + doutc(mdsc->fsc->client, "begin\n"); mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN; ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true); @@ -5326,6 +5390,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) ceph_msgr_flush(); ceph_cleanup_quotarealms_inodes(mdsc); + doutc(mdsc->fsc->client, "done\n"); } /* @@ -5334,12 +5399,13 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) static void flush_mdlog_and_wait_mdsc_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_request *req = NULL, *nextreq; struct ceph_mds_session *last_session = NULL; struct rb_node *n; mutex_lock(&mdsc->mutex); - dout("%s want %lld\n", __func__, want_tid); + doutc(cl, "want %lld\n", want_tid); restart: req = __get_oldest_req(mdsc); while (req && req->r_tid <= want_tid) { @@ -5373,8 +5439,8 @@ static void flush_mdlog_and_wait_mdsc_unsafe_requests(struct ceph_mds_client *md } else { ceph_put_mds_session(s); } - dout("%s wait on %llu (want %llu)\n", __func__, - req->r_tid, want_tid); + doutc(cl, "wait on %llu (want %llu)\n", + req->r_tid, want_tid); wait_for_completion(&req->r_safe_completion); mutex_lock(&mdsc->mutex); @@ -5392,17 +5458,18 @@ static void flush_mdlog_and_wait_mdsc_unsafe_requests(struct ceph_mds_client *md } mutex_unlock(&mdsc->mutex); ceph_put_mds_session(last_session); - dout("%s done\n", __func__); + doutc(cl, "done\n"); } void ceph_mdsc_sync(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; u64 want_tid, want_flush; if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) return; - dout("sync\n"); + doutc(cl, "sync\n"); mutex_lock(&mdsc->mutex); want_tid = mdsc->last_tid; mutex_unlock(&mdsc->mutex); @@ -5418,8 +5485,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) } spin_unlock(&mdsc->cap_dirty_lock); - dout("sync want tid %lld flush_seq %lld\n", - want_tid, want_flush); + doutc(cl, "sync want tid %lld flush_seq %lld\n", want_tid, want_flush); flush_mdlog_and_wait_mdsc_unsafe_requests(mdsc, want_tid); wait_caps_flush(mdsc, want_flush); @@ -5441,11 +5507,12 @@ static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped) void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) { struct ceph_options *opts = mdsc->fsc->client->options; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_session *session; int i; int skipped = 0; - dout("close_sessions\n"); + doutc(cl, "begin\n"); /* close sessions */ mutex_lock(&mdsc->mutex); @@ -5463,7 +5530,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) } mutex_unlock(&mdsc->mutex); - dout("waiting for sessions to close\n"); + doutc(cl, "waiting for sessions to close\n"); wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc, skipped), ceph_timeout_jiffies(opts->mount_timeout)); @@ -5491,7 +5558,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) cancel_work_sync(&mdsc->cap_reclaim_work); cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ - dout("stopped\n"); + doutc(cl, "done\n"); } void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) @@ -5499,7 +5566,7 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) struct ceph_mds_session *session; int mds; - dout("force umount\n"); + doutc(mdsc->fsc->client, "force umount\n"); mutex_lock(&mdsc->mutex); for (mds = 0; mds < mdsc->max_sessions; mds++) { @@ -5530,7 +5597,7 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) { - dout("stop\n"); + doutc(mdsc->fsc->client, "stop\n"); /* * Make sure the delayed work stopped before releasing * the resources. @@ -5551,7 +5618,7 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) void ceph_mdsc_destroy(struct ceph_fs_client *fsc) { struct ceph_mds_client *mdsc = fsc->mdsc; - dout("mdsc_destroy %p\n", mdsc); + doutc(fsc->client, "%p\n", mdsc); if (!mdsc) return; @@ -5565,12 +5632,13 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc) fsc->mdsc = NULL; kfree(mdsc); - dout("mdsc_destroy %p done\n", mdsc); + doutc(fsc->client, "%p done\n", mdsc); } void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) { struct ceph_fs_client *fsc = mdsc->fsc; + struct ceph_client *cl = fsc->client; const char *mds_namespace = fsc->mount_options->mds_namespace; void *p = msg->front.iov_base; void *end = p + msg->front.iov_len; @@ -5582,7 +5650,7 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) ceph_decode_need(&p, end, sizeof(u32), bad); epoch = ceph_decode_32(&p); - dout("handle_fsmap epoch %u\n", epoch); + doutc(cl, "epoch %u\n", epoch); /* struct_v, struct_cv, map_len, epoch, legacy_client_fscid */ ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 3, bad); @@ -5627,7 +5695,8 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) return; bad: - pr_err("error decoding fsmap %d. Shutting down mount.\n", err); + pr_err_client(cl, "error decoding fsmap %d. Shutting down mount.\n", + err); ceph_umount_begin(mdsc->fsc->sb); ceph_msg_dump(msg); err_out: @@ -5642,6 +5711,7 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) */ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) { + struct ceph_client *cl = mdsc->fsc->client; u32 epoch; u32 maplen; void *p = msg->front.iov_base; @@ -5656,13 +5726,12 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) return; epoch = ceph_decode_32(&p); maplen = ceph_decode_32(&p); - dout("handle_map epoch %u len %d\n", epoch, (int)maplen); + doutc(cl, "epoch %u len %d\n", epoch, (int)maplen); /* do we need it? */ mutex_lock(&mdsc->mutex); if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { - dout("handle_map epoch %u <= our %u\n", - epoch, mdsc->mdsmap->m_epoch); + doutc(cl, "epoch %u <= our %u\n", epoch, mdsc->mdsmap->m_epoch); mutex_unlock(&mdsc->mutex); return; } @@ -5696,7 +5765,8 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) bad_unlock: mutex_unlock(&mdsc->mutex); bad: - pr_err("error decoding mdsmap %d. Shutting down mount.\n", err); + pr_err_client(cl, "error decoding mdsmap %d. Shutting down mount.\n", + err); ceph_umount_begin(mdsc->fsc->sb); ceph_msg_dump(msg); return; @@ -5727,7 +5797,8 @@ static void mds_peer_reset(struct ceph_connection *con) struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; - pr_warn("mds%d closed our session\n", s->s_mds); + pr_warn_client(mdsc->fsc->client, "mds%d closed our session\n", + s->s_mds); if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO) send_mds_reconnect(mdsc, s); } @@ -5736,6 +5807,7 @@ static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg) { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; int type = le16_to_cpu(msg->hdr.type); mutex_lock(&mdsc->mutex); @@ -5775,8 +5847,8 @@ static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg) break; default: - pr_err("received unknown message type %d %s\n", type, - ceph_msg_type_name(type)); + pr_err_client(cl, "received unknown message type %d %s\n", + type, ceph_msg_type_name(type)); } out: ceph_msg_put(msg); diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 6cbec7aed5a0..fa080183ac1f 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -11,6 +11,7 @@ #include #include +#include "mds_client.h" #include "super.h" #define CEPH_MDS_IS_READY(i, ignore_laggy) \ @@ -117,6 +118,7 @@ static int __decode_and_drop_compat_set(void **p, void* end) struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, void *end, bool msgr2) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mdsmap *m; const void *start = *p; int i, j, n; @@ -234,20 +236,18 @@ struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, *p = info_end; } - dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s%s\n", - i+1, n, global_id, mds, inc, - ceph_pr_addr(&addr), - ceph_mds_state_name(state), - laggy ? "(laggy)" : ""); + doutc(cl, "%d/%d %lld mds%d.%d %s %s%s\n", i+1, n, global_id, + mds, inc, ceph_pr_addr(&addr), + ceph_mds_state_name(state), laggy ? "(laggy)" : ""); if (mds < 0 || mds >= m->possible_max_rank) { - pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds); + pr_warn_client(cl, "got incorrect mds(%d)\n", mds); continue; } if (state <= 0) { - dout("mdsmap_decode got incorrect state(%s)\n", - ceph_mds_state_name(state)); + doutc(cl, "got incorrect state(%s)\n", + ceph_mds_state_name(state)); continue; } @@ -386,16 +386,16 @@ struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, m->m_max_xattr_size = 0; } bad_ext: - dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", - !!m->m_enabled, !!m->m_damaged, m->m_num_laggy); + doutc(cl, "m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", + !!m->m_enabled, !!m->m_damaged, m->m_num_laggy); *p = end; - dout("mdsmap_decode success epoch %u\n", m->m_epoch); + doutc(cl, "success epoch %u\n", m->m_epoch); return m; nomem: err = -ENOMEM; goto out_err; corrupt: - pr_err("corrupt mdsmap\n"); + pr_err_client(cl, "corrupt mdsmap\n"); print_hex_dump(KERN_DEBUG, "mdsmap: ", DUMP_PREFIX_OFFSET, 16, 1, start, end - start, true); diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index 6d3584f16f9a..871c1090e520 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -31,6 +31,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, struct ceph_client_metric *m = &mdsc->metric; u64 nr_caps = atomic64_read(&m->total_caps); u32 header_len = sizeof(struct ceph_metric_header); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg; s64 sum; s32 items = 0; @@ -51,8 +52,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true); if (!msg) { - pr_err("send metrics to mds%d, failed to allocate message\n", - s->s_mds); + pr_err_client(cl, "to mds%d, failed to allocate message\n", + s->s_mds); return false; } diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index f7fcf7f08ec6..9d36c3532de1 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -43,6 +43,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, { struct super_block *sb = mdsc->fsc->sb; struct ceph_mds_quota *h = msg->front.iov_base; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_vino vino; struct inode *inode; struct ceph_inode_info *ci; @@ -51,8 +52,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, return; if (msg->front.iov_len < sizeof(*h)) { - pr_err("%s corrupt message mds%d len %d\n", __func__, - session->s_mds, (int)msg->front.iov_len); + pr_err_client(cl, "corrupt message mds%d len %d\n", + session->s_mds, (int)msg->front.iov_len); ceph_msg_dump(msg); goto out; } @@ -62,7 +63,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, vino.snap = CEPH_NOSNAP; inode = ceph_find_inode(sb, vino); if (!inode) { - pr_warn("Failed to find inode %llu\n", vino.ino); + pr_warn_client(cl, "failed to find inode %llx\n", vino.ino); goto out; } ci = ceph_inode(inode); @@ -85,6 +86,7 @@ find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino) { struct ceph_quotarealm_inode *qri = NULL; struct rb_node **node, *parent = NULL; + struct ceph_client *cl = mdsc->fsc->client; mutex_lock(&mdsc->quotarealms_inodes_mutex); node = &(mdsc->quotarealms_inodes.rb_node); @@ -110,7 +112,7 @@ find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino) rb_link_node(&qri->node, parent, node); rb_insert_color(&qri->node, &mdsc->quotarealms_inodes); } else - pr_warn("Failed to alloc quotarealms_inode\n"); + pr_warn_client(cl, "Failed to alloc quotarealms_inode\n"); } mutex_unlock(&mdsc->quotarealms_inodes_mutex); @@ -129,6 +131,7 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, struct super_block *sb, struct ceph_snap_realm *realm) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_quotarealm_inode *qri; struct inode *in; @@ -161,8 +164,8 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, } if (IS_ERR(in)) { - dout("Can't lookup inode %llx (err: %ld)\n", - realm->ino, PTR_ERR(in)); + doutc(cl, "Can't lookup inode %llx (err: %ld)\n", realm->ino, + PTR_ERR(in)); qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */ } else { qri->timeout = 0; @@ -213,6 +216,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, enum quota_get_realm which_quota, bool retry) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci = NULL; struct ceph_snap_realm *realm, *next; struct inode *in; @@ -226,8 +230,9 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, if (realm) ceph_get_snap_realm(mdsc, realm); else - pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) " - "null i_snap_realm\n", ceph_vinop(inode)); + pr_err_ratelimited_client(cl, + "%p %llx.%llx null i_snap_realm\n", + inode, ceph_vinop(inode)); while (realm) { bool has_inode; @@ -317,6 +322,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, loff_t delta) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct ceph_snap_realm *realm, *next; struct inode *in; @@ -332,8 +338,9 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, if (realm) ceph_get_snap_realm(mdsc, realm); else - pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) " - "null i_snap_realm\n", ceph_vinop(inode)); + pr_err_ratelimited_client(cl, + "%p %llx.%llx null i_snap_realm\n", + inode, ceph_vinop(inode)); while (realm) { bool has_inode; @@ -383,7 +390,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, break; default: /* Shouldn't happen */ - pr_warn("Invalid quota check op (%d)\n", op); + pr_warn_client(cl, "Invalid quota check op (%d)\n", op); exceeded = true; /* Just break the loop */ } iput(in); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index d0d3612f28f0..73c526954749 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -138,7 +138,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( __insert_snap_realm(&mdsc->snap_realms, realm); mdsc->num_snap_realms++; - dout("%s %llx %p\n", __func__, realm->ino, realm); + doutc(mdsc->fsc->client, "%llx %p\n", realm->ino, realm); return realm; } @@ -150,6 +150,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc, u64 ino) { + struct ceph_client *cl = mdsc->fsc->client; struct rb_node *n = mdsc->snap_realms.rb_node; struct ceph_snap_realm *r; @@ -162,7 +163,7 @@ static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc, else if (ino > r->ino) n = n->rb_right; else { - dout("%s %llx %p\n", __func__, r->ino, r); + doutc(cl, "%llx %p\n", r->ino, r); return r; } } @@ -188,9 +189,10 @@ static void __put_snap_realm(struct ceph_mds_client *mdsc, static void __destroy_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { + struct ceph_client *cl = mdsc->fsc->client; lockdep_assert_held_write(&mdsc->snap_rwsem); - dout("%s %p %llx\n", __func__, realm, realm->ino); + doutc(cl, "%p %llx\n", realm, realm->ino); rb_erase(&realm->node, &mdsc->snap_realms); mdsc->num_snap_realms--; @@ -290,6 +292,7 @@ static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm, u64 parentino) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_snap_realm *parent; lockdep_assert_held_write(&mdsc->snap_rwsem); @@ -303,8 +306,8 @@ static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc, if (IS_ERR(parent)) return PTR_ERR(parent); } - dout("%s %llx %p: %llx %p -> %llx %p\n", __func__, realm->ino, - realm, realm->parent_ino, realm->parent, parentino, parent); + doutc(cl, "%llx %p: %llx %p -> %llx %p\n", realm->ino, realm, + realm->parent_ino, realm->parent, parentino, parent); if (realm->parent) { list_del_init(&realm->child_item); ceph_put_snap_realm(mdsc, realm->parent); @@ -334,6 +337,7 @@ static int build_snap_context(struct ceph_mds_client *mdsc, struct list_head *realm_queue, struct list_head *dirty_realms) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_snap_realm *parent = realm->parent; struct ceph_snap_context *snapc; int err = 0; @@ -361,10 +365,10 @@ static int build_snap_context(struct ceph_mds_client *mdsc, realm->cached_context->seq == realm->seq && (!parent || realm->cached_context->seq >= parent->cached_context->seq)) { - dout("%s %llx %p: %p seq %lld (%u snaps) (unchanged)\n", - __func__, realm->ino, realm, realm->cached_context, - realm->cached_context->seq, - (unsigned int)realm->cached_context->num_snaps); + doutc(cl, "%llx %p: %p seq %lld (%u snaps) (unchanged)\n", + realm->ino, realm, realm->cached_context, + realm->cached_context->seq, + (unsigned int)realm->cached_context->num_snaps); return 0; } @@ -401,8 +405,8 @@ static int build_snap_context(struct ceph_mds_client *mdsc, sort(snapc->snaps, num, sizeof(u64), cmpu64_rev, NULL); snapc->num_snaps = num; - dout("%s %llx %p: %p seq %lld (%u snaps)\n", __func__, realm->ino, - realm, snapc, snapc->seq, (unsigned int) snapc->num_snaps); + doutc(cl, "%llx %p: %p seq %lld (%u snaps)\n", realm->ino, realm, + snapc, snapc->seq, (unsigned int) snapc->num_snaps); ceph_put_snap_context(realm->cached_context); realm->cached_context = snapc; @@ -419,7 +423,7 @@ static int build_snap_context(struct ceph_mds_client *mdsc, ceph_put_snap_context(realm->cached_context); realm->cached_context = NULL; } - pr_err("%s %llx %p fail %d\n", __func__, realm->ino, realm, err); + pr_err_client(cl, "%llx %p fail %d\n", realm->ino, realm, err); return err; } @@ -430,6 +434,7 @@ static void rebuild_snap_realms(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm, struct list_head *dirty_realms) { + struct ceph_client *cl = mdsc->fsc->client; LIST_HEAD(realm_queue); int last = 0; bool skip = false; @@ -455,8 +460,8 @@ static void rebuild_snap_realms(struct ceph_mds_client *mdsc, last = build_snap_context(mdsc, _realm, &realm_queue, dirty_realms); - dout("%s %llx %p, %s\n", __func__, _realm->ino, _realm, - last > 0 ? "is deferred" : !last ? "succeeded" : "failed"); + doutc(cl, "%llx %p, %s\n", realm->ino, realm, + last > 0 ? "is deferred" : !last ? "succeeded" : "failed"); /* is any child in the list ? */ list_for_each_entry(child, &_realm->children, child_item) { @@ -526,6 +531,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap **pcapsnap) { struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_snap_context *old_snapc, *new_snapc; struct ceph_cap_snap *capsnap = *pcapsnap; struct ceph_buffer *old_blob = NULL; @@ -551,14 +557,14 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, as no new writes are allowed to start when pending, so any writes in progress now were started before the previous cap_snap. lucky us. */ - dout("%s %p %llx.%llx already pending\n", - __func__, inode, ceph_vinop(inode)); + doutc(cl, "%p %llx.%llx already pending\n", inode, + ceph_vinop(inode)); goto update_snapc; } if (ci->i_wrbuffer_ref_head == 0 && !(dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR))) { - dout("%s %p %llx.%llx nothing dirty|writing\n", - __func__, inode, ceph_vinop(inode)); + doutc(cl, "%p %llx.%llx nothing dirty|writing\n", inode, + ceph_vinop(inode)); goto update_snapc; } @@ -578,15 +584,15 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, } else { if (!(used & CEPH_CAP_FILE_WR) && ci->i_wrbuffer_ref_head == 0) { - dout("%s %p %llx.%llx no new_snap|dirty_page|writing\n", - __func__, inode, ceph_vinop(inode)); + doutc(cl, "%p %llx.%llx no new_snap|dirty_page|writing\n", + inode, ceph_vinop(inode)); goto update_snapc; } } - dout("%s %p %llx.%llx cap_snap %p queuing under %p %s %s\n", - __func__, inode, ceph_vinop(inode), capsnap, old_snapc, - ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush"); + doutc(cl, "%p %llx.%llx cap_snap %p queuing under %p %s %s\n", + inode, ceph_vinop(inode), capsnap, old_snapc, + ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush"); ihold(inode); capsnap->follows = old_snapc->seq; @@ -618,9 +624,9 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); if (used & CEPH_CAP_FILE_WR) { - dout("%s %p %llx.%llx cap_snap %p snapc %p seq %llu used WR," - " now pending\n", __func__, inode, ceph_vinop(inode), - capsnap, old_snapc, old_snapc->seq); + doutc(cl, "%p %llx.%llx cap_snap %p snapc %p seq %llu used WR," + " now pending\n", inode, ceph_vinop(inode), capsnap, + old_snapc, old_snapc->seq); capsnap->writing = 1; } else { /* note mtime, size NOW. */ @@ -637,7 +643,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci, ci->i_head_snapc = NULL; } else { ci->i_head_snapc = ceph_get_snap_context(new_snapc); - dout(" new snapc is %p\n", new_snapc); + doutc(cl, " new snapc is %p\n", new_snapc); } spin_unlock(&ci->i_ceph_lock); @@ -658,6 +664,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, { struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_client *cl = mdsc->fsc->client; BUG_ON(capsnap->writing); capsnap->size = i_size_read(inode); @@ -670,11 +677,12 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, capsnap->truncate_size = ci->i_truncate_size; capsnap->truncate_seq = ci->i_truncate_seq; if (capsnap->dirty_pages) { - dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu " - "still has %d dirty pages\n", __func__, inode, - ceph_vinop(inode), capsnap, capsnap->context, - capsnap->context->seq, ceph_cap_string(capsnap->dirty), - capsnap->size, capsnap->dirty_pages); + doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s " + "s=%llu still has %d dirty pages\n", inode, + ceph_vinop(inode), capsnap, capsnap->context, + capsnap->context->seq, + ceph_cap_string(capsnap->dirty), + capsnap->size, capsnap->dirty_pages); return 0; } @@ -683,20 +691,20 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, * And trigger to flush the buffer immediately. */ if (ci->i_wrbuffer_ref) { - dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu " - "used WRBUFFER, delaying\n", __func__, inode, - ceph_vinop(inode), capsnap, capsnap->context, - capsnap->context->seq, ceph_cap_string(capsnap->dirty), - capsnap->size); + doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s " + "s=%llu used WRBUFFER, delaying\n", inode, + ceph_vinop(inode), capsnap, capsnap->context, + capsnap->context->seq, ceph_cap_string(capsnap->dirty), + capsnap->size); ceph_queue_writeback(inode); return 0; } ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; - dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu\n", - __func__, inode, ceph_vinop(inode), capsnap, capsnap->context, - capsnap->context->seq, ceph_cap_string(capsnap->dirty), - capsnap->size); + doutc(cl, "%p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu\n", + inode, ceph_vinop(inode), capsnap, capsnap->context, + capsnap->context->seq, ceph_cap_string(capsnap->dirty), + capsnap->size); spin_lock(&mdsc->snap_flush_lock); if (list_empty(&ci->i_snap_flush_item)) { @@ -714,11 +722,12 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, static void queue_realm_cap_snaps(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct inode *lastinode = NULL; struct ceph_cap_snap *capsnap = NULL; - dout("%s %p %llx inode\n", __func__, realm, realm->ino); + doutc(cl, "%p %llx inode\n", realm, realm->ino); spin_lock(&realm->inodes_with_caps_lock); list_for_each_entry(ci, &realm->inodes_with_caps, i_snap_realm_item) { @@ -737,8 +746,9 @@ static void queue_realm_cap_snaps(struct ceph_mds_client *mdsc, if (!capsnap) { capsnap = kmem_cache_zalloc(ceph_cap_snap_cachep, GFP_NOFS); if (!capsnap) { - pr_err("ENOMEM allocating ceph_cap_snap on %p\n", - inode); + pr_err_client(cl, + "ENOMEM allocating ceph_cap_snap on %p\n", + inode); return; } } @@ -756,7 +766,7 @@ static void queue_realm_cap_snaps(struct ceph_mds_client *mdsc, if (capsnap) kmem_cache_free(ceph_cap_snap_cachep, capsnap); - dout("%s %p %llx done\n", __func__, realm, realm->ino); + doutc(cl, "%p %llx done\n", realm, realm->ino); } /* @@ -770,6 +780,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, void *p, void *e, bool deletion, struct ceph_snap_realm **realm_ret) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_mds_snap_realm *ri; /* encoded */ __le64 *snaps; /* encoded */ __le64 *prior_parent_snaps; /* encoded */ @@ -784,7 +795,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, lockdep_assert_held_write(&mdsc->snap_rwsem); - dout("%s deletion=%d\n", __func__, deletion); + doutc(cl, "deletion=%d\n", deletion); more: realm = NULL; rebuild_snapcs = 0; @@ -814,8 +825,8 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, rebuild_snapcs += err; if (le64_to_cpu(ri->seq) > realm->seq) { - dout("%s updating %llx %p %lld -> %lld\n", __func__, - realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); + doutc(cl, "updating %llx %p %lld -> %lld\n", realm->ino, + realm, realm->seq, le64_to_cpu(ri->seq)); /* update realm parameters, snap lists */ realm->seq = le64_to_cpu(ri->seq); realm->created = le64_to_cpu(ri->created); @@ -838,16 +849,16 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, rebuild_snapcs = 1; } else if (!realm->cached_context) { - dout("%s %llx %p seq %lld new\n", __func__, - realm->ino, realm, realm->seq); + doutc(cl, "%llx %p seq %lld new\n", realm->ino, realm, + realm->seq); rebuild_snapcs = 1; } else { - dout("%s %llx %p seq %lld unchanged\n", __func__, - realm->ino, realm, realm->seq); + doutc(cl, "%llx %p seq %lld unchanged\n", realm->ino, realm, + realm->seq); } - dout("done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino, - realm, rebuild_snapcs, p, e); + doutc(cl, "done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino, + realm, rebuild_snapcs, p, e); /* * this will always track the uppest parent realm from which @@ -895,7 +906,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, ceph_put_snap_realm(mdsc, realm); if (first_realm) ceph_put_snap_realm(mdsc, first_realm); - pr_err("%s error %d\n", __func__, err); + pr_err_client(cl, "error %d\n", err); /* * When receiving a corrupted snap trace we don't know what @@ -909,11 +920,12 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, WRITE_ONCE(mdsc->fsc->mount_state, CEPH_MOUNT_FENCE_IO); ret = ceph_monc_blocklist_add(&client->monc, &client->msgr.inst.addr); if (ret) - pr_err("%s failed to blocklist %s: %d\n", __func__, - ceph_pr_addr(&client->msgr.inst.addr), ret); + pr_err_client(cl, "failed to blocklist %s: %d\n", + ceph_pr_addr(&client->msgr.inst.addr), ret); - WARN(1, "%s: %s%sdo remount to continue%s", - __func__, ret ? "" : ceph_pr_addr(&client->msgr.inst.addr), + WARN(1, "[client.%lld] %s %s%sdo remount to continue%s", + client->monc.auth->global_id, __func__, + ret ? "" : ceph_pr_addr(&client->msgr.inst.addr), ret ? "" : " was blocklisted, ", err == -EIO ? " after corrupted snaptrace is fixed" : ""); @@ -929,11 +941,12 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, */ static void flush_snaps(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_inode_info *ci; struct inode *inode; struct ceph_mds_session *session = NULL; - dout("%s\n", __func__); + doutc(cl, "begin\n"); spin_lock(&mdsc->snap_flush_lock); while (!list_empty(&mdsc->snap_flush_list)) { ci = list_first_entry(&mdsc->snap_flush_list, @@ -948,7 +961,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) spin_unlock(&mdsc->snap_flush_lock); ceph_put_mds_session(session); - dout("%s done\n", __func__); + doutc(cl, "done\n"); } /** @@ -1004,6 +1017,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg) { + struct ceph_client *cl = mdsc->fsc->client; struct super_block *sb = mdsc->fsc->sb; int mds = session->s_mds; u64 split; @@ -1034,8 +1048,8 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, trace_len = le32_to_cpu(h->trace_len); p += sizeof(*h); - dout("%s from mds%d op %s split %llx tracelen %d\n", __func__, - mds, ceph_snap_op_name(op), split, trace_len); + doutc(cl, "from mds%d op %s split %llx tracelen %d\n", mds, + ceph_snap_op_name(op), split, trace_len); down_write(&mdsc->snap_rwsem); locked_rwsem = 1; @@ -1066,7 +1080,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, goto out; } - dout("splitting snap_realm %llx %p\n", realm->ino, realm); + doutc(cl, "splitting snap_realm %llx %p\n", realm->ino, realm); for (i = 0; i < num_split_inos; i++) { struct ceph_vino vino = { .ino = le64_to_cpu(split_inos[i]), @@ -1091,13 +1105,13 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, */ if (ci->i_snap_realm->created > le64_to_cpu(ri->created)) { - dout(" leaving %p %llx.%llx in newer realm %llx %p\n", - inode, ceph_vinop(inode), ci->i_snap_realm->ino, - ci->i_snap_realm); + doutc(cl, " leaving %p %llx.%llx in newer realm %llx %p\n", + inode, ceph_vinop(inode), ci->i_snap_realm->ino, + ci->i_snap_realm); goto skip_inode; } - dout(" will move %p %llx.%llx to split realm %llx %p\n", - inode, ceph_vinop(inode), realm->ino, realm); + doutc(cl, " will move %p %llx.%llx to split realm %llx %p\n", + inode, ceph_vinop(inode), realm->ino, realm); ceph_get_snap_realm(mdsc, realm); ceph_change_snap_realm(inode, realm); @@ -1158,7 +1172,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, return; bad: - pr_err("%s corrupt snap message from mds%d\n", __func__, mds); + pr_err_client(cl, "corrupt snap message from mds%d\n", mds); ceph_msg_dump(msg); out: if (locked_rwsem) @@ -1174,6 +1188,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc, u64 snap) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_snapid_map *sm, *exist; struct rb_node **p, *parent; int ret; @@ -1196,8 +1211,8 @@ struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc, } spin_unlock(&mdsc->snapid_map_lock); if (exist) { - dout("%s found snapid map %llx -> %x\n", __func__, - exist->snap, exist->dev); + doutc(cl, "found snapid map %llx -> %x\n", exist->snap, + exist->dev); return exist; } @@ -1241,13 +1256,12 @@ struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc, if (exist) { free_anon_bdev(sm->dev); kfree(sm); - dout("%s found snapid map %llx -> %x\n", __func__, - exist->snap, exist->dev); + doutc(cl, "found snapid map %llx -> %x\n", exist->snap, + exist->dev); return exist; } - dout("%s create snapid map %llx -> %x\n", __func__, - sm->snap, sm->dev); + doutc(cl, "create snapid map %llx -> %x\n", sm->snap, sm->dev); return sm; } @@ -1272,6 +1286,7 @@ void ceph_put_snapid_map(struct ceph_mds_client* mdsc, void ceph_trim_snapid_map(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_snapid_map *sm; unsigned long now; LIST_HEAD(to_free); @@ -1293,7 +1308,7 @@ void ceph_trim_snapid_map(struct ceph_mds_client *mdsc) while (!list_empty(&to_free)) { sm = list_first_entry(&to_free, struct ceph_snapid_map, lru); list_del(&sm->lru); - dout("trim snapid map %llx -> %x\n", sm->snap, sm->dev); + doutc(cl, "trim snapid map %llx -> %x\n", sm->snap, sm->dev); free_anon_bdev(sm->dev); kfree(sm); } @@ -1301,6 +1316,7 @@ void ceph_trim_snapid_map(struct ceph_mds_client *mdsc) void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc) { + struct ceph_client *cl = mdsc->fsc->client; struct ceph_snapid_map *sm; struct rb_node *p; LIST_HEAD(to_free); @@ -1319,8 +1335,8 @@ void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc) list_del(&sm->lru); free_anon_bdev(sm->dev); if (WARN_ON_ONCE(atomic_read(&sm->ref))) { - pr_err("snapid map %llx -> %x still in use\n", - sm->snap, sm->dev); + pr_err_client(cl, "snapid map %llx -> %x still in use\n", + sm->snap, sm->dev); } kfree(sm); } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 52af90beab00..551fb686182b 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -46,9 +46,10 @@ static void ceph_put_super(struct super_block *s) { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s); - dout("put_super\n"); + doutc(fsc->client, "begin\n"); ceph_fscrypt_free_dummy_policy(fsc); ceph_mdsc_close_sessions(fsc->mdsc); + doutc(fsc->client, "done\n"); } static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -59,13 +60,13 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) int i, err; u64 data_pool; + doutc(fsc->client, "begin\n"); if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; } else { data_pool = CEPH_NOPOOL; } - dout("statfs\n"); err = ceph_monc_do_statfs(monc, data_pool, &st); if (err < 0) return err; @@ -113,24 +114,26 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) /* fold the fs_cluster_id into the upper bits */ buf->f_fsid.val[1] = monc->fs_cluster_id; + doutc(fsc->client, "done\n"); return 0; } static int ceph_sync_fs(struct super_block *sb, int wait) { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); + struct ceph_client *cl = fsc->client; if (!wait) { - dout("sync_fs (non-blocking)\n"); + doutc(cl, "(non-blocking)\n"); ceph_flush_dirty_caps(fsc->mdsc); - dout("sync_fs (non-blocking) done\n"); + doutc(cl, "(non-blocking) done\n"); return 0; } - dout("sync_fs (blocking)\n"); + doutc(cl, "(blocking)\n"); ceph_osdc_sync(&fsc->client->osdc); ceph_mdsc_sync(fsc->mdsc); - dout("sync_fs (blocking) done\n"); + doutc(cl, "(blocking) done\n"); return 0; } @@ -341,7 +344,7 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) char *dev_name = param->string, *dev_name_end; int ret; - dout("%s '%s'\n", __func__, dev_name); + dout("'%s'\n", dev_name); if (!dev_name || !*dev_name) return invalfc(fc, "Empty source"); @@ -413,7 +416,7 @@ static int ceph_parse_mount_param(struct fs_context *fc, return ret; token = fs_parse(fc, ceph_mount_parameters, param, &result); - dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); + dout("%s: fs_parse '%s' token %d\n",__func__, param->key, token); if (token < 0) return token; @@ -881,7 +884,7 @@ static void flush_fs_workqueues(struct ceph_fs_client *fsc) static void destroy_fs_client(struct ceph_fs_client *fsc) { - dout("destroy_fs_client %p\n", fsc); + doutc(fsc->client, "%p\n", fsc); spin_lock(&ceph_fsc_lock); list_del(&fsc->metric_wakeup); @@ -896,7 +899,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) ceph_destroy_client(fsc->client); kfree(fsc); - dout("destroy_fs_client %p done\n", fsc); + dout("%s: %p done\n", __func__, fsc); } /* @@ -1017,7 +1020,7 @@ void ceph_umount_begin(struct super_block *sb) { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); - dout("ceph_umount_begin - starting forced umount\n"); + doutc(fsc->client, "starting forced umount\n"); if (!fsc) return; fsc->mount_state = CEPH_MOUNT_SHUTDOWN; @@ -1045,13 +1048,14 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, const char *path, unsigned long started) { + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req = NULL; int err; struct dentry *root; /* open dir */ - dout("open_root_inode opening '%s'\n", path); + doutc(cl, "opening '%s'\n", path); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); if (IS_ERR(req)) return ERR_CAST(req); @@ -1071,13 +1075,13 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, if (err == 0) { struct inode *inode = req->r_target_inode; req->r_target_inode = NULL; - dout("open_root_inode success\n"); + doutc(cl, "success\n"); root = d_make_root(inode); if (!root) { root = ERR_PTR(-ENOMEM); goto out; } - dout("open_root_inode success, root dentry is %p\n", root); + doutc(cl, "success, root dentry is %p\n", root); } else { root = ERR_PTR(err); } @@ -1136,11 +1140,12 @@ static int ceph_apply_test_dummy_encryption(struct super_block *sb, static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, struct fs_context *fc) { + struct ceph_client *cl = fsc->client; int err; unsigned long started = jiffies; /* note the start time */ struct dentry *root; - dout("mount start %p\n", fsc); + doutc(cl, "mount start %p\n", fsc); mutex_lock(&fsc->client->mount_mutex); if (!fsc->sb->s_root) { @@ -1163,7 +1168,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, if (err) goto out; - dout("mount opening path '%s'\n", path); + doutc(cl, "mount opening path '%s'\n", path); ceph_fs_debugfs_init(fsc); @@ -1178,7 +1183,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, } fsc->mount_state = CEPH_MOUNT_MOUNTED; - dout("mount success\n"); + doutc(cl, "mount success\n"); mutex_unlock(&fsc->client->mount_mutex); return root; @@ -1191,9 +1196,10 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, static int ceph_set_super(struct super_block *s, struct fs_context *fc) { struct ceph_fs_client *fsc = s->s_fs_info; + struct ceph_client *cl = fsc->client; int ret; - dout("set_super %p\n", s); + doutc(cl, "%p\n", s); s->s_maxbytes = MAX_LFS_FILESIZE; @@ -1227,30 +1233,31 @@ static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) struct ceph_mount_options *fsopt = new->mount_options; struct ceph_options *opt = new->client->options; struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb); + struct ceph_client *cl = fsc->client; - dout("ceph_compare_super %p\n", sb); + doutc(cl, "%p\n", sb); if (compare_mount_options(fsopt, opt, fsc)) { - dout("monitor(s)/mount options don't match\n"); + doutc(cl, "monitor(s)/mount options don't match\n"); return 0; } if ((opt->flags & CEPH_OPT_FSID) && ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { - dout("fsid doesn't match\n"); + doutc(cl, "fsid doesn't match\n"); return 0; } if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { - dout("flags differ\n"); + doutc(cl, "flags differ\n"); return 0; } if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { - dout("client is blocklisted (and CLEANRECOVER is not set)\n"); + doutc(cl, "client is blocklisted (and CLEANRECOVER is not set)\n"); return 0; } if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { - dout("client has been forcibly unmounted\n"); + doutc(cl, "client has been forcibly unmounted\n"); return 0; } @@ -1338,8 +1345,9 @@ static int ceph_get_tree(struct fs_context *fc) err = PTR_ERR(res); goto out_splat; } - dout("root %p inode %p ino %llx.%llx\n", res, - d_inode(res), ceph_vinop(d_inode(res))); + + doutc(fsc->client, "root %p inode %p ino %llx.%llx\n", res, + d_inode(res), ceph_vinop(d_inode(res))); fc->root = fsc->sb->s_root; return 0; @@ -1397,7 +1405,8 @@ static int ceph_reconfigure_fc(struct fs_context *fc) kfree(fsc->mount_options->mon_addr); fsc->mount_options->mon_addr = fsopt->mon_addr; fsopt->mon_addr = NULL; - pr_notice("ceph: monitor addresses recorded, but not used for reconnection"); + pr_notice_client(fsc->client, + "monitor addresses recorded, but not used for reconnection"); } sync_filesystem(sb); @@ -1517,10 +1526,11 @@ void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc) static void ceph_kill_sb(struct super_block *s) { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s); + struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; bool wait; - dout("kill_sb %p\n", s); + doutc(cl, "%p\n", s); ceph_mdsc_pre_umount(mdsc); flush_fs_workqueues(fsc); @@ -1551,9 +1561,9 @@ static void ceph_kill_sb(struct super_block *s) &mdsc->stopping_waiter, fsc->client->options->mount_timeout); if (!timeleft) /* timed out */ - pr_warn("umount timed out, %ld\n", timeleft); + pr_warn_client(cl, "umount timed out, %ld\n", timeleft); else if (timeleft < 0) /* killed */ - pr_warn("umount was killed, %ld\n", timeleft); + pr_warn_client(cl, "umount was killed, %ld\n", timeleft); } mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 8efd4ba60774..983bf9ab3bd5 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -505,6 +505,12 @@ ceph_sb_to_mdsc(const struct super_block *sb) return (struct ceph_mds_client *)ceph_sb_to_fs_client(sb)->mdsc; } +static inline struct ceph_client * +ceph_inode_to_client(const struct inode *inode) +{ + return (struct ceph_client *)ceph_inode_to_fs_client(inode)->client; +} + static inline struct ceph_vino ceph_vino(const struct inode *inode) { diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 558f64554b59..04d031467193 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -58,6 +58,7 @@ static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, size_t size) { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(ci->netfs.inode.i_sb); + struct ceph_client *cl = fsc->client; struct ceph_osd_client *osdc = &fsc->client->osdc; struct ceph_string *pool_ns; s64 pool = ci->i_layout.pool_id; @@ -69,7 +70,7 @@ static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, pool_ns = ceph_try_get_string(ci->i_layout.pool_ns); - dout("ceph_vxattrcb_layout %p\n", &ci->netfs.inode); + doutc(cl, "%p\n", &ci->netfs.inode); down_read(&osdc->lock); pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); if (pool_name) { @@ -570,6 +571,8 @@ static int __set_xattr(struct ceph_inode_info *ci, int flags, int update_xattr, struct ceph_inode_xattr **newxattr) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); struct rb_node **p; struct rb_node *parent = NULL; struct ceph_inode_xattr *xattr = NULL; @@ -626,7 +629,7 @@ static int __set_xattr(struct ceph_inode_info *ci, xattr->should_free_name = update_xattr; ci->i_xattrs.count++; - dout("%s count=%d\n", __func__, ci->i_xattrs.count); + doutc(cl, "count=%d\n", ci->i_xattrs.count); } else { kfree(*newxattr); *newxattr = NULL; @@ -654,13 +657,13 @@ static int __set_xattr(struct ceph_inode_info *ci, if (new) { rb_link_node(&xattr->node, parent, p); rb_insert_color(&xattr->node, &ci->i_xattrs.index); - dout("%s p=%p\n", __func__, p); + doutc(cl, "p=%p\n", p); } - dout("%s added %llx.%llx xattr %p %.*s=%.*s%s\n", __func__, - ceph_vinop(&ci->netfs.inode), xattr, name_len, name, - min(val_len, MAX_XATTR_VAL_PRINT_LEN), val, - val_len > MAX_XATTR_VAL_PRINT_LEN ? "..." : ""); + doutc(cl, "added %p %llx.%llx xattr %p %.*s=%.*s%s\n", inode, + ceph_vinop(inode), xattr, name_len, name, min(val_len, + MAX_XATTR_VAL_PRINT_LEN), val, + val_len > MAX_XATTR_VAL_PRINT_LEN ? "..." : ""); return 0; } @@ -668,6 +671,7 @@ static int __set_xattr(struct ceph_inode_info *ci, static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, const char *name) { + struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode); struct rb_node **p; struct rb_node *parent = NULL; struct ceph_inode_xattr *xattr = NULL; @@ -688,13 +692,13 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, else { int len = min(xattr->val_len, MAX_XATTR_VAL_PRINT_LEN); - dout("%s %s: found %.*s%s\n", __func__, name, len, - xattr->val, xattr->val_len > len ? "..." : ""); + doutc(cl, "%s found %.*s%s\n", name, len, xattr->val, + xattr->val_len > len ? "..." : ""); return xattr; } } - dout("%s %s: not found\n", __func__, name); + doutc(cl, "%s not found\n", name); return NULL; } @@ -735,19 +739,20 @@ static int __remove_xattr(struct ceph_inode_info *ci, static char *__copy_xattr_names(struct ceph_inode_info *ci, char *dest) { + struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode); struct rb_node *p; struct ceph_inode_xattr *xattr = NULL; p = rb_first(&ci->i_xattrs.index); - dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count); + doutc(cl, "count=%d\n", ci->i_xattrs.count); while (p) { xattr = rb_entry(p, struct ceph_inode_xattr, node); memcpy(dest, xattr->name, xattr->name_len); dest[xattr->name_len] = '\0'; - dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name, - xattr->name_len, ci->i_xattrs.names_size); + doutc(cl, "dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name, + xattr->name_len, ci->i_xattrs.names_size); dest += xattr->name_len + 1; p = rb_next(p); @@ -758,19 +763,19 @@ static char *__copy_xattr_names(struct ceph_inode_info *ci, void __ceph_destroy_xattrs(struct ceph_inode_info *ci) { + struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode); struct rb_node *p, *tmp; struct ceph_inode_xattr *xattr = NULL; p = rb_first(&ci->i_xattrs.index); - dout("__ceph_destroy_xattrs p=%p\n", p); + doutc(cl, "p=%p\n", p); while (p) { xattr = rb_entry(p, struct ceph_inode_xattr, node); tmp = p; p = rb_next(tmp); - dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p, - xattr->name_len, xattr->name); + doutc(cl, "next p=%p (%.*s)\n", p, xattr->name_len, xattr->name); rb_erase(tmp, &ci->i_xattrs.index); __free_xattr(xattr); @@ -787,6 +792,7 @@ static int __build_xattrs(struct inode *inode) __releases(ci->i_ceph_lock) __acquires(ci->i_ceph_lock) { + struct ceph_client *cl = ceph_inode_to_client(inode); u32 namelen; u32 numattr = 0; void *p, *end; @@ -798,8 +804,8 @@ static int __build_xattrs(struct inode *inode) int err = 0; int i; - dout("__build_xattrs() len=%d\n", - ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0); + doutc(cl, "len=%d\n", + ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0); if (ci->i_xattrs.index_version >= ci->i_xattrs.version) return 0; /* already built */ @@ -874,6 +880,8 @@ static int __build_xattrs(struct inode *inode) static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size, int val_size) { + struct ceph_client *cl = ceph_inode_to_client(&ci->netfs.inode); + /* * 4 bytes for the length, and additional 4 bytes per each xattr name, * 4 bytes per each value @@ -881,9 +889,8 @@ static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size, int size = 4 + ci->i_xattrs.count*(4 + 4) + ci->i_xattrs.names_size + ci->i_xattrs.vals_size; - dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n", - ci->i_xattrs.count, ci->i_xattrs.names_size, - ci->i_xattrs.vals_size); + doutc(cl, "c=%d names.size=%d vals.size=%d\n", ci->i_xattrs.count, + ci->i_xattrs.names_size, ci->i_xattrs.vals_size); if (name_size) size += 4 + 4 + name_size + val_size; @@ -899,12 +906,14 @@ static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size, */ struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci) { + struct inode *inode = &ci->netfs.inode; + struct ceph_client *cl = ceph_inode_to_client(inode); struct rb_node *p; struct ceph_inode_xattr *xattr = NULL; struct ceph_buffer *old_blob = NULL; void *dest; - dout("__build_xattrs_blob %p\n", &ci->netfs.inode); + doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode)); if (ci->i_xattrs.dirty) { int need = __get_required_blob_size(ci, 0, 0); @@ -962,6 +971,7 @@ static inline int __get_request_mask(struct inode *in) { ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, size_t size) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_xattr *xattr; struct ceph_vxattr *vxattr; @@ -1000,8 +1010,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, req_mask = __get_request_mask(inode); spin_lock(&ci->i_ceph_lock); - dout("getxattr %p name '%s' ver=%lld index_ver=%lld\n", inode, name, - ci->i_xattrs.version, ci->i_xattrs.index_version); + doutc(cl, "%p %llx.%llx name '%s' ver=%lld index_ver=%lld\n", inode, + ceph_vinop(inode), name, ci->i_xattrs.version, + ci->i_xattrs.index_version); if (ci->i_xattrs.version == 0 || !((req_mask & CEPH_CAP_XATTR_SHARED) || @@ -1010,8 +1021,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, /* security module gets xattr while filling trace */ if (current->journal_info) { - pr_warn_ratelimited("sync getxattr %p " - "during filling trace\n", inode); + pr_warn_ratelimited_client(cl, + "sync %p %llx.%llx during filling trace\n", + inode, ceph_vinop(inode)); return -EBUSY; } @@ -1053,14 +1065,16 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) { struct inode *inode = d_inode(dentry); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); bool len_only = (size == 0); u32 namelen; int err; spin_lock(&ci->i_ceph_lock); - dout("listxattr %p ver=%lld index_ver=%lld\n", inode, - ci->i_xattrs.version, ci->i_xattrs.index_version); + doutc(cl, "%p %llx.%llx ver=%lld index_ver=%lld\n", inode, + ceph_vinop(inode), ci->i_xattrs.version, + ci->i_xattrs.index_version); if (ci->i_xattrs.version == 0 || !__ceph_caps_issued_mask_metric(ci, CEPH_CAP_XATTR_SHARED, 1)) { @@ -1095,6 +1109,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, const char *value, size_t size, int flags) { struct ceph_fs_client *fsc = ceph_sb_to_fs_client(inode->i_sb); + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req; struct ceph_mds_client *mdsc = fsc->mdsc; @@ -1119,7 +1134,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, flags |= CEPH_XATTR_REMOVE; } - dout("setxattr value size: %zu\n", size); + doutc(cl, "name %s value size %zu\n", name, size); /* do request */ req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); @@ -1148,10 +1163,10 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, req->r_num_caps = 1; req->r_inode_drop = CEPH_CAP_XATTR_SHARED; - dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); + doutc(cl, "xattr.ver (before): %lld\n", ci->i_xattrs.version); err = ceph_mdsc_do_request(mdsc, NULL, req); ceph_mdsc_put_request(req); - dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); + doutc(cl, "xattr.ver (after): %lld\n", ci->i_xattrs.version); out: if (pagelist) @@ -1162,6 +1177,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, int __ceph_setxattr(struct inode *inode, const char *name, const void *value, size_t size, int flags) { + struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(inode->i_sb)->mdsc; @@ -1220,9 +1236,9 @@ int __ceph_setxattr(struct inode *inode, const char *name, required_blob_size = __get_required_blob_size(ci, name_len, val_len); if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) || (required_blob_size > mdsc->mdsmap->m_max_xattr_size)) { - dout("%s do sync setxattr: version: %llu size: %d max: %llu\n", - __func__, ci->i_xattrs.version, required_blob_size, - mdsc->mdsmap->m_max_xattr_size); + doutc(cl, "sync version: %llu size: %d max: %llu\n", + ci->i_xattrs.version, required_blob_size, + mdsc->mdsmap->m_max_xattr_size); goto do_sync; } @@ -1236,8 +1252,8 @@ int __ceph_setxattr(struct inode *inode, const char *name, } } - dout("setxattr %p name '%s' issued %s\n", inode, name, - ceph_cap_string(issued)); + doutc(cl, "%p %llx.%llx name '%s' issued %s\n", inode, + ceph_vinop(inode), name, ceph_cap_string(issued)); __build_xattrs(inode); if (!ci->i_xattrs.prealloc_blob || @@ -1246,7 +1262,8 @@ int __ceph_setxattr(struct inode *inode, const char *name, spin_unlock(&ci->i_ceph_lock); ceph_buffer_put(old_blob); /* Shouldn't be required */ - dout(" pre-allocating new blob size=%d\n", required_blob_size); + doutc(cl, " pre-allocating new blob size=%d\n", + required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) goto do_sync_unlocked; @@ -1285,8 +1302,9 @@ int __ceph_setxattr(struct inode *inode, const char *name, /* security module set xattr while filling trace */ if (current->journal_info) { - pr_warn_ratelimited("sync setxattr %p " - "during filling trace\n", inode); + pr_warn_ratelimited_client(cl, + "sync %p %llx.%llx during filling trace\n", + inode, ceph_vinop(inode)); err = -EBUSY; } else { err = ceph_sync_setxattr(inode, name, value, size, flags); From 522dc5108f07ef30e2c7399e59b9547d382308ff Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 12 Jun 2023 15:41:10 +0800 Subject: [PATCH 062/415] libceph, ceph: move mdsmap.h to fs/ceph The mdsmap.h is only used by CephFS, so move it to fs/ceph. Signed-off-by: Xiubo Li Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.h | 2 +- fs/ceph/mdsmap.c | 2 +- {include/linux => fs}/ceph/mdsmap.h | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename {include/linux => fs}/ceph/mdsmap.h (100%) diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index d930eb79dc38..df9e3d668969 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -14,9 +14,9 @@ #include #include -#include #include +#include "mdsmap.h" #include "metric.h" #include "super.h" diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index fa080183ac1f..fae97c25ce58 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -7,10 +7,10 @@ #include #include -#include #include #include +#include "mdsmap.h" #include "mds_client.h" #include "super.h" diff --git a/include/linux/ceph/mdsmap.h b/fs/ceph/mdsmap.h similarity index 100% rename from include/linux/ceph/mdsmap.h rename to fs/ceph/mdsmap.h From 1b90344614cc5949666328b37f03edec1d4e2873 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 7 Aug 2023 15:26:15 +0200 Subject: [PATCH 063/415] fs: export mnt_idmap_get/mnt_idmap_put These helpers are required to support idmapped mounts in CephFS. Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Christian Brauner Signed-off-by: Ilya Dryomov --- fs/mnt_idmapping.c | 2 ++ include/linux/mnt_idmapping.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c index 4905665c47d0..57d1dedf3f8f 100644 --- a/fs/mnt_idmapping.c +++ b/fs/mnt_idmapping.c @@ -256,6 +256,7 @@ struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap) return idmap; } +EXPORT_SYMBOL_GPL(mnt_idmap_get); /** * mnt_idmap_put - put a reference to an idmapping @@ -271,3 +272,4 @@ void mnt_idmap_put(struct mnt_idmap *idmap) kfree(idmap); } } +EXPORT_SYMBOL_GPL(mnt_idmap_put); diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index 057c89867aa2..b8da2db4ecd2 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -115,6 +115,9 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) int vfsgid_in_group_p(vfsgid_t vfsgid); +struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); +void mnt_idmap_put(struct mnt_idmap *idmap); + vfsuid_t make_vfsuid(struct mnt_idmap *idmap, struct user_namespace *fs_userns, kuid_t kuid); From 9c2df2271c6901ba67d3437494b3d889dc43645b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Aug 2023 15:26:16 +0200 Subject: [PATCH 064/415] ceph: stash idmapping in mdsc request When sending a mds request cephfs will send relevant data for the requested operation. For creation requests the caller's fs{g,u}id is used to set the ownership of the newly created filesystem object. For setattr requests the caller can pass in arbitrary {g,u}id values to which the relevant filesystem object is supposed to be changed. If the caller is performing the relevant operation via an idmapped mount cephfs simply needs to take the idmapping into account when it sends the relevant mds request. In order to support idmapped mounts for cephfs we stash the idmapping whenever they are relevant for the operation for the duration of the request. Since mds requests can be queued and performed asynchronously we make sure to keep the idmapping around and release it once the request has finished. In follow-up patches we will use this to send correct ownership information over the wire. This patch just adds the basic infrastructure to keep the idmapping around. The actual conversion patches are all fairly minimal. Signed-off-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 5 +++++ fs/ceph/mds_client.h | 1 + 2 files changed, 6 insertions(+) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 86c8b6859852..ff5a05346f52 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "super.h" #include "mds_client.h" @@ -1122,6 +1123,8 @@ void ceph_mdsc_release_request(struct kref *kref) kfree(req->r_path1); kfree(req->r_path2); put_cred(req->r_cred); + if (req->r_mnt_idmap) + mnt_idmap_put(req->r_mnt_idmap); if (req->r_pagelist) ceph_pagelist_release(req->r_pagelist); kfree(req->r_fscrypt_auth); @@ -1181,6 +1184,8 @@ static void __register_request(struct ceph_mds_client *mdsc, insert_request(&mdsc->request_tree, req); req->r_cred = get_current_cred(); + if (!req->r_mnt_idmap) + req->r_mnt_idmap = &nop_mnt_idmap; if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK) mdsc->oldest_tid = req->r_tid; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index df9e3d668969..eb5e76446046 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -300,6 +300,7 @@ struct ceph_mds_request { int r_fmode; /* file mode, if expecting cap */ int r_request_release_offset; const struct cred *r_cred; + struct mnt_idmap *r_mnt_idmap; struct timespec64 r_stamp; /* for choosing which mds to send this request to */ From 5ccd8530dd7ba97531a50ffa11eabe258d65a7af Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Aug 2023 15:26:17 +0200 Subject: [PATCH 065/415] ceph: handle idmapped mounts in create_request_message() Inode operations that create a new filesystem object such as ->mknod, ->create, ->mkdir() and others don't take a {g,u}id argument explicitly. Instead the caller's fs{g,u}id is used for the {g,u}id of the new filesystem object. In order to ensure that the correct {g,u}id is used map the caller's fs{g,u}id for creation requests. This doesn't require complex changes. It suffices to pass in the relevant idmapping recorded in the request message. If this request message was triggered from an inode operation that creates filesystem objects it will have passed down the relevant idmaping. If this is a request message that was triggered from an inode operation that doens't need to take idmappings into account the initial idmapping is passed down which is an identity mapping. This change uses a new cephfs protocol extension CEPHFS_FEATURE_HAS_OWNER_UIDGID which adds two new fields (owner_{u,g}id) to the request head structure. So, we need to ensure that MDS supports it otherwise we need to fail any IO that comes through an idmapped mount because we can't process it in a proper way. MDS server without such an extension will use caller_{u,g}id fields to set a new inode owner UID/GID which is incorrect because caller_{u,g}id values are unmapped. At the same time we can't map these fields with an idmapping as it can break UID/GID-based permission checks logic on the MDS side. This problem was described with a lot of details at [1], [2]. [1] https://lore.kernel.org/lkml/CAEivzxfw1fHO2TFA4dx3u23ZKK6Q+EThfzuibrhA3RKM=ZOYLg@mail.gmail.com/ [2] https://lore.kernel.org/all/20220104140414.155198-3-brauner@kernel.org/ Link: https://github.com/ceph/ceph/pull/52575 Link: https://tracker.ceph.com/issues/62217 Co-Developed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 58 ++++++++++++++++++++++++++++++++---- fs/ceph/mds_client.h | 5 +++- include/linux/ceph/ceph_fs.h | 10 ++++++- 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ff5a05346f52..b97a16df7580 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2881,6 +2881,17 @@ static void encode_mclientrequest_tail(void **p, } } +static inline u16 mds_supported_head_version(struct ceph_mds_session *session) +{ + if (!test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, &session->s_features)) + return 1; + + if (!test_bit(CEPHFS_FEATURE_HAS_OWNER_UIDGID, &session->s_features)) + return 2; + + return CEPH_MDS_REQUEST_HEAD_VERSION; +} + static struct ceph_mds_request_head_legacy * find_legacy_request_head(void *p, u64 features) { @@ -2902,6 +2913,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, { int mds = session->s_mds; struct ceph_mds_client *mdsc = session->s_mdsc; + struct ceph_client *cl = mdsc->fsc->client; struct ceph_msg *msg; struct ceph_mds_request_head_legacy *lhead; const char *path1 = NULL; @@ -2915,8 +2927,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, void *p, *end; int ret; bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME); - bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, - &session->s_features); + u16 request_head_version = mds_supported_head_version(session); ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry, req->r_parent, req->r_path1, req->r_ino1.ino, @@ -2957,8 +2968,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, */ if (legacy) len = sizeof(struct ceph_mds_request_head_legacy); - else if (old_version) + else if (request_head_version == 1) len = sizeof(struct ceph_mds_request_head_old); + else if (request_head_version == 2) + len = offsetofend(struct ceph_mds_request_head, ext_num_fwd); else len = sizeof(struct ceph_mds_request_head); @@ -3008,6 +3021,18 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, lhead = find_legacy_request_head(msg->front.iov_base, session->s_con.peer_features); + if ((req->r_mnt_idmap != &nop_mnt_idmap) && + !test_bit(CEPHFS_FEATURE_HAS_OWNER_UIDGID, &session->s_features)) { + WARN_ON_ONCE(!IS_CEPH_MDS_OP_NEWINODE(req->r_op)); + + pr_err_ratelimited_client(cl, + "idmapped mount is used and CEPHFS_FEATURE_HAS_OWNER_UIDGID" + " is not supported by MDS. Fail request with -EIO.\n"); + + ret = -EIO; + goto out_err; + } + /* * The ceph_mds_request_head_legacy didn't contain a version field, and * one was added when we moved the message version from 3->4. @@ -3015,17 +3040,40 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, if (legacy) { msg->hdr.version = cpu_to_le16(3); p = msg->front.iov_base + sizeof(*lhead); - } else if (old_version) { + } else if (request_head_version == 1) { struct ceph_mds_request_head_old *ohead = msg->front.iov_base; msg->hdr.version = cpu_to_le16(4); ohead->version = cpu_to_le16(1); p = msg->front.iov_base + sizeof(*ohead); - } else { + } else if (request_head_version == 2) { struct ceph_mds_request_head *nhead = msg->front.iov_base; + msg->hdr.version = cpu_to_le16(6); + nhead->version = cpu_to_le16(2); + + p = msg->front.iov_base + offsetofend(struct ceph_mds_request_head, ext_num_fwd); + } else { + struct ceph_mds_request_head *nhead = msg->front.iov_base; + kuid_t owner_fsuid; + kgid_t owner_fsgid; + msg->hdr.version = cpu_to_le16(6); nhead->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION); + nhead->struct_len = cpu_to_le32(sizeof(struct ceph_mds_request_head)); + + if (IS_CEPH_MDS_OP_NEWINODE(req->r_op)) { + owner_fsuid = from_vfsuid(req->r_mnt_idmap, &init_user_ns, + VFSUIDT_INIT(req->r_cred->fsuid)); + owner_fsgid = from_vfsgid(req->r_mnt_idmap, &init_user_ns, + VFSGIDT_INIT(req->r_cred->fsgid)); + nhead->owner_uid = cpu_to_le32(from_kuid(&init_user_ns, owner_fsuid)); + nhead->owner_gid = cpu_to_le32(from_kgid(&init_user_ns, owner_fsgid)); + } else { + nhead->owner_uid = cpu_to_le32(-1); + nhead->owner_gid = cpu_to_le32(-1); + } + p = msg->front.iov_base + sizeof(*nhead); } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index eb5e76446046..7c942294e96b 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -33,8 +33,10 @@ enum ceph_feature_type { CEPHFS_FEATURE_NOTIFY_SESSION_STATE, CEPHFS_FEATURE_OP_GETVXATTR, CEPHFS_FEATURE_32BITS_RETRY_FWD, + CEPHFS_FEATURE_NEW_SNAPREALM_INFO, + CEPHFS_FEATURE_HAS_OWNER_UIDGID, - CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_32BITS_RETRY_FWD, + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_HAS_OWNER_UIDGID, }; #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ @@ -49,6 +51,7 @@ enum ceph_feature_type { CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \ CEPHFS_FEATURE_OP_GETVXATTR, \ CEPHFS_FEATURE_32BITS_RETRY_FWD, \ + CEPHFS_FEATURE_HAS_OWNER_UIDGID, \ } /* diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index f3b3593254b9..ee1d0e5f9789 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -357,6 +357,11 @@ enum { CEPH_MDS_OP_RENAMESNAP = 0x01403, }; +#define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE || \ + op == CEPH_MDS_OP_MKNOD || \ + op == CEPH_MDS_OP_MKDIR || \ + op == CEPH_MDS_OP_SYMLINK) + extern const char *ceph_mds_op_name(int op); #define CEPH_SETATTR_MODE (1 << 0) @@ -497,7 +502,7 @@ struct ceph_mds_request_head_legacy { union ceph_mds_request_args args; } __attribute__ ((packed)); -#define CEPH_MDS_REQUEST_HEAD_VERSION 2 +#define CEPH_MDS_REQUEST_HEAD_VERSION 3 struct ceph_mds_request_head_old { __le16 version; /* struct version */ @@ -528,6 +533,9 @@ struct ceph_mds_request_head { __le32 ext_num_retry; /* new count retry attempts */ __le32 ext_num_fwd; /* new count fwd attempts */ + + __le32 struct_len; /* to store size of struct ceph_mds_request_head */ + __le32 owner_uid, owner_gid; /* used for OPs which create inodes */ } __attribute__ ((packed)); /* cap/lease release record */ From 673478b6e59b25079a590eb5ba89d7a3ec9c1c78 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 7 Aug 2023 15:26:18 +0200 Subject: [PATCH 066/415] ceph: add enable_unsafe_idmap module parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This parameter is used to decide if we allow to perform IO on idmapped mount in case when MDS lacks support of CEPHFS_FEATURE_HAS_OWNER_UIDGID feature. In this case we can't properly handle MDS permission checks and if UID/GID-based restrictions are enabled on the MDS side then IO requests which go through an idmapped mount may fail with -EACCESS/-EPERM. Fortunately, for most of users it's not a case and everything should work fine. But we put work "unsafe" in the module parameter name to warn users about possible problems with this feature and encourage update of cephfs MDS. Suggested-by: Stéphane Graber Signed-off-by: Alexander Mikhalitsyn Acked-by: Christian Brauner Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 28 +++++++++++++++++++++------- fs/ceph/mds_client.h | 2 ++ fs/ceph/super.c | 5 +++++ 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b97a16df7580..7f4147bb98ef 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2928,6 +2928,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, int ret; bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME); u16 request_head_version = mds_supported_head_version(session); + kuid_t caller_fsuid = req->r_cred->fsuid; + kgid_t caller_fsgid = req->r_cred->fsgid; ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry, req->r_parent, req->r_path1, req->r_ino1.ino, @@ -3025,12 +3027,24 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, !test_bit(CEPHFS_FEATURE_HAS_OWNER_UIDGID, &session->s_features)) { WARN_ON_ONCE(!IS_CEPH_MDS_OP_NEWINODE(req->r_op)); - pr_err_ratelimited_client(cl, - "idmapped mount is used and CEPHFS_FEATURE_HAS_OWNER_UIDGID" - " is not supported by MDS. Fail request with -EIO.\n"); + if (enable_unsafe_idmap) { + pr_warn_once_client(cl, + "idmapped mount is used and CEPHFS_FEATURE_HAS_OWNER_UIDGID" + " is not supported by MDS. UID/GID-based restrictions may" + " not work properly.\n"); - ret = -EIO; - goto out_err; + caller_fsuid = from_vfsuid(req->r_mnt_idmap, &init_user_ns, + VFSUIDT_INIT(req->r_cred->fsuid)); + caller_fsgid = from_vfsgid(req->r_mnt_idmap, &init_user_ns, + VFSGIDT_INIT(req->r_cred->fsgid)); + } else { + pr_err_ratelimited_client(cl, + "idmapped mount is used and CEPHFS_FEATURE_HAS_OWNER_UIDGID" + " is not supported by MDS. Fail request with -EIO.\n"); + + ret = -EIO; + goto out_err; + } } /* @@ -3082,9 +3096,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, lhead->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); lhead->op = cpu_to_le32(req->r_op); lhead->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, - req->r_cred->fsuid)); + caller_fsuid)); lhead->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, - req->r_cred->fsgid)); + caller_fsgid)); lhead->ino = cpu_to_le64(req->r_deleg_ino); lhead->args = req->r_args; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 7c942294e96b..2e6ddaa13d72 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -619,4 +619,6 @@ static inline int ceph_wait_on_async_create(struct inode *inode) extern int ceph_wait_on_conflict_unlink(struct dentry *dentry); extern u64 ceph_get_deleg_ino(struct ceph_mds_session *session); extern int ceph_restore_deleg_ino(struct ceph_mds_session *session, u64 ino); + +extern bool enable_unsafe_idmap; #endif diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 551fb686182b..33fedcafbab7 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1681,6 +1681,11 @@ static const struct kernel_param_ops param_ops_mount_syntax = { module_param_cb(mount_syntax_v1, ¶m_ops_mount_syntax, &mount_support, 0444); module_param_cb(mount_syntax_v2, ¶m_ops_mount_syntax, &mount_support, 0444); +bool enable_unsafe_idmap = false; +module_param(enable_unsafe_idmap, bool, 0644); +MODULE_PARM_DESC(enable_unsafe_idmap, + "Allow to use idmapped mounts with MDS without CEPHFS_FEATURE_HAS_OWNER_UIDGID"); + module_init(init_ceph); module_exit(exit_ceph); From 09838f1bfd40f483cd6442788e810ea042e91177 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Aug 2023 15:26:19 +0200 Subject: [PATCH 067/415] ceph: pass an idmapping to mknod/symlink/mkdir Enable mknod/symlink/mkdir iops to handle idmapped mounts. This is just a matter of passing down the mount's idmapping. Signed-off-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 6bb95b4e93a8..91709934c8b1 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -953,6 +953,7 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir, req->r_parent = dir; ihold(dir); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); + req->r_mnt_idmap = mnt_idmap_get(idmap); req->r_args.mknod.mode = cpu_to_le32(mode); req->r_args.mknod.rdev = cpu_to_le32(rdev); req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | @@ -1071,6 +1072,7 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir, } set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); + req->r_mnt_idmap = mnt_idmap_get(idmap); req->r_dentry = dget(dentry); req->r_num_caps = 2; req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | @@ -1150,6 +1152,8 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir, req->r_parent = dir; ihold(dir); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); + if (op == CEPH_MDS_OP_MKDIR) + req->r_mnt_idmap = mnt_idmap_get(idmap); req->r_args.mkdir.mode = cpu_to_le32(mode); req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL | CEPH_CAP_XATTR_EXCL; From 0513043ec491500237d7215bf1ccfed54f292e86 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Aug 2023 15:26:20 +0200 Subject: [PATCH 068/415] ceph: allow idmapped getattr inode op Enable ceph_getattr() to handle idmapped mounts. This is just a matter of passing down the mount's idmapping. Signed-off-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 808b5a69f028..4e7610357d89 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -3027,7 +3027,7 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path, return err; } - generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); + generic_fillattr(idmap, request_mask, inode, stat); stat->ino = ceph_present_inode(inode); /* From 8995375fae40a9a014622fd2a0567fe8a2252e66 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Aug 2023 15:26:21 +0200 Subject: [PATCH 069/415] ceph: allow idmapped permission inode op Enable ceph_permission() to handle idmapped mounts. This is just a matter of passing down the mount's idmapping. Signed-off-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 4e7610357d89..1b0a1ac6ed74 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2970,7 +2970,7 @@ int ceph_permission(struct mnt_idmap *idmap, struct inode *inode, err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED, false); if (!err) - err = generic_permission(&nop_mnt_idmap, inode, mask); + err = generic_permission(idmap, inode, mask); return err; } From 79c66a0c8c4a5d341e948dd6a41111957b315f20 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 7 Aug 2023 15:26:22 +0200 Subject: [PATCH 070/415] ceph: pass idmap to __ceph_setattr Just pass down the mount's idmapping to __ceph_setattr, because we will need it later. Signed-off-by: Alexander Mikhalitsyn Acked-by: Christian Brauner Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/acl.c | 4 ++-- fs/ceph/crypto.c | 2 +- fs/ceph/inode.c | 6 +++--- fs/ceph/super.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index e95841961397..295db55e4eee 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -142,7 +142,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, newattrs.ia_ctime = current_time(inode); newattrs.ia_mode = new_mode; newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - ret = __ceph_setattr(inode, &newattrs, NULL); + ret = __ceph_setattr(idmap, inode, &newattrs, NULL); if (ret) goto out_free; } @@ -153,7 +153,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, newattrs.ia_ctime = old_ctime; newattrs.ia_mode = old_mode; newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - __ceph_setattr(inode, &newattrs, NULL); + __ceph_setattr(idmap, inode, &newattrs, NULL); } goto out_free; } diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index d692ebfddedb..52b1219699aa 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c @@ -113,7 +113,7 @@ static int ceph_crypt_set_context(struct inode *inode, const void *ctx, cia.fscrypt_auth = cfa; - ret = __ceph_setattr(inode, &attr, &cia); + ret = __ceph_setattr(&nop_mnt_idmap, inode, &attr, &cia); if (ret == 0) inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED); kfree(cia.fscrypt_auth); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 1b0a1ac6ed74..927a336350fc 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2457,8 +2457,8 @@ static int fill_fscrypt_truncate(struct inode *inode, return ret; } -int __ceph_setattr(struct inode *inode, struct iattr *attr, - struct ceph_iattr *cia) +int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, + struct iattr *attr, struct ceph_iattr *cia) { struct ceph_inode_info *ci = ceph_inode(inode); unsigned int ia_valid = attr->ia_valid; @@ -2811,7 +2811,7 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size)) return -EDQUOT; - err = __ceph_setattr(inode, attr, NULL); + err = __ceph_setattr(idmap, inode, attr, NULL); if (err >= 0 && (attr->ia_valid & ATTR_MODE)) err = posix_acl_chmod(&nop_mnt_idmap, dentry, attr->ia_mode); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 983bf9ab3bd5..504c6f9a5bea 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1100,8 +1100,8 @@ struct ceph_iattr { struct ceph_fscrypt_auth *fscrypt_auth; }; -extern int __ceph_setattr(struct inode *inode, struct iattr *attr, - struct ceph_iattr *cia); +extern int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, + struct iattr *attr, struct ceph_iattr *cia); extern int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); extern int ceph_getattr(struct mnt_idmap *idmap, From a04aff2588e05bdefc6abb3f16999312db14c333 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Aug 2023 15:26:23 +0200 Subject: [PATCH 071/415] ceph: allow idmapped setattr inode op Enable __ceph_setattr() to handle idmapped mounts. This is just a matter of passing down the mount's idmapping. [ aleksandr.mikhalitsyn: adapted to b27c82e12965 ("attr: port attribute changes to new types") ] Signed-off-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 927a336350fc..b1b92a948a3b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2544,33 +2544,37 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, #endif /* CONFIG_FS_ENCRYPTION */ if (ia_valid & ATTR_UID) { + kuid_t fsuid = from_vfsuid(idmap, i_user_ns(inode), attr->ia_vfsuid); + doutc(cl, "%p %llx.%llx uid %d -> %d\n", inode, ceph_vinop(inode), from_kuid(&init_user_ns, inode->i_uid), from_kuid(&init_user_ns, attr->ia_uid)); if (issued & CEPH_CAP_AUTH_EXCL) { - inode->i_uid = attr->ia_uid; + inode->i_uid = fsuid; dirtied |= CEPH_CAP_AUTH_EXCL; } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || - !uid_eq(attr->ia_uid, inode->i_uid)) { + !uid_eq(fsuid, inode->i_uid)) { req->r_args.setattr.uid = cpu_to_le32( - from_kuid(&init_user_ns, attr->ia_uid)); + from_kuid(&init_user_ns, fsuid)); mask |= CEPH_SETATTR_UID; release |= CEPH_CAP_AUTH_SHARED; } } if (ia_valid & ATTR_GID) { + kgid_t fsgid = from_vfsgid(idmap, i_user_ns(inode), attr->ia_vfsgid); + doutc(cl, "%p %llx.%llx gid %d -> %d\n", inode, ceph_vinop(inode), from_kgid(&init_user_ns, inode->i_gid), from_kgid(&init_user_ns, attr->ia_gid)); if (issued & CEPH_CAP_AUTH_EXCL) { - inode->i_gid = attr->ia_gid; + inode->i_gid = fsgid; dirtied |= CEPH_CAP_AUTH_EXCL; } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || - !gid_eq(attr->ia_gid, inode->i_gid)) { + !gid_eq(fsgid, inode->i_gid)) { req->r_args.setattr.gid = cpu_to_le32( - from_kgid(&init_user_ns, attr->ia_gid)); + from_kgid(&init_user_ns, fsgid)); mask |= CEPH_SETATTR_GID; release |= CEPH_CAP_AUTH_SHARED; } @@ -2799,7 +2803,7 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (err) return err; - err = setattr_prepare(&nop_mnt_idmap, dentry, attr); + err = setattr_prepare(idmap, dentry, attr); if (err != 0) return err; @@ -2814,7 +2818,7 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, err = __ceph_setattr(idmap, inode, attr, NULL); if (err >= 0 && (attr->ia_valid & ATTR_MODE)) - err = posix_acl_chmod(&nop_mnt_idmap, dentry, attr->ia_mode); + err = posix_acl_chmod(idmap, dentry, attr->ia_mode); return err; } From 2cce72dda2b5c494f6489ba78588c4ce191c6b61 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Aug 2023 15:26:24 +0200 Subject: [PATCH 072/415] ceph: allow idmapped set_acl inode op Enable ceph_set_acl() to handle idmapped mounts. This is just a matter of passing down the mount's idmapping. Signed-off-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/acl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 295db55e4eee..1564eacc253d 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -107,7 +107,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; if (acl) { - ret = posix_acl_update_mode(&nop_mnt_idmap, inode, + ret = posix_acl_update_mode(idmap, inode, &new_mode, &acl); if (ret) goto out; From 8a051b40abd6f948a599dc6328e9356f4697e77d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Aug 2023 15:26:25 +0200 Subject: [PATCH 073/415] ceph: allow idmapped atomic_open inode op Enable ceph_atomic_open() to handle idmapped mounts. This is just a matter of passing down the mount's idmapping. [ aleksandr.mikhalitsyn: adapted to 5fadbd9929 ("ceph: rely on vfs for setgid stripping") ] Signed-off-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index f60164d9e0c8..53789c5f2ebb 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -668,7 +668,9 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, in.truncate_seq = cpu_to_le32(1); in.truncate_size = cpu_to_le64(-1ULL); in.xattr_version = cpu_to_le64(1); - in.uid = cpu_to_le32(from_kuid(&init_user_ns, current_fsuid())); + in.uid = cpu_to_le32(from_kuid(&init_user_ns, + mapped_fsuid(req->r_mnt_idmap, + &init_user_ns))); if (dir->i_mode & S_ISGID) { in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_gid)); @@ -676,7 +678,9 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, if (S_ISDIR(mode)) mode |= S_ISGID; } else { - in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid())); + in.gid = cpu_to_le32(from_kgid(&init_user_ns, + mapped_fsgid(req->r_mnt_idmap, + &init_user_ns))); } in.mode = cpu_to_le32((u32)mode); @@ -743,6 +747,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode) { + struct mnt_idmap *idmap = file_mnt_idmap(file); struct ceph_fs_client *fsc = ceph_sb_to_fs_client(dir->i_sb); struct ceph_client *cl = fsc->client; struct ceph_mds_client *mdsc = fsc->mdsc; @@ -802,6 +807,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, mask |= CEPH_CAP_XATTR_SHARED; req->r_args.open.mask = cpu_to_le32(mask); req->r_parent = dir; + if (req->r_op == CEPH_MDS_OP_CREATE) + req->r_mnt_idmap = mnt_idmap_get(idmap); ihold(dir); if (IS_ENCRYPTED(dir)) { set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags); From 56d2e2cfa21315c12945c22e141c7e7ec8b0a630 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Aug 2023 15:26:26 +0200 Subject: [PATCH 074/415] ceph: allow idmapped mounts Now that we converted cephfs internally to account for idmapped mounts allow the creation of idmapped mounts on by setting the FS_ALLOW_IDMAP flag. Signed-off-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 33fedcafbab7..5ec102f6b1ac 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1582,7 +1582,7 @@ static struct file_system_type ceph_fs_type = { .name = "ceph", .init_fs_context = ceph_init_fs_context, .kill_sb = ceph_kill_sb, - .fs_flags = FS_RENAME_DOES_D_MOVE, + .fs_flags = FS_RENAME_DOES_D_MOVE | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("ceph"); From ce4f78f1b53d3327fbd32764aa333bf05fb68818 Mon Sep 17 00:00:00 2001 From: Haorong Lu Date: Thu, 3 Aug 2023 15:44:54 -0700 Subject: [PATCH 075/415] riscv: signal: handle syscall restart before get_signal In the current riscv implementation, blocking syscalls like read() may not correctly restart after being interrupted by ptrace. This problem arises when the syscall restart process in arch_do_signal_or_restart() is bypassed due to changes to the regs->cause register, such as an ebreak instruction. Steps to reproduce: 1. Interrupt the tracee process with PTRACE_SEIZE & PTRACE_INTERRUPT. 2. Backup original registers and instruction at new_pc. 3. Change pc to new_pc, and inject an instruction (like ebreak) to this address. 4. Resume with PTRACE_CONT and wait for the process to stop again after executing ebreak. 5. Restore original registers and instructions, and detach from the tracee process. 6. Now the read() syscall in tracee will return -1 with errno set to ERESTARTSYS. Specifically, during an interrupt, the regs->cause changes from EXC_SYSCALL to EXC_BREAKPOINT due to the injected ebreak, which is inaccessible via ptrace so we cannot restore it. This alteration breaks the syscall restart condition and ends the read() syscall with an ERESTARTSYS error. According to include/linux/errno.h, it should never be seen by user programs. X86 can avoid this issue as it checks the syscall condition using a register (orig_ax) exposed to user space. Arm64 handles syscall restart before calling get_signal, where it could be paused and inspected by ptrace/debugger. This patch adjusts the riscv implementation to arm64 style, which also checks syscall using a kernel register (syscallno). It ensures the syscall restart process is not bypassed when changes to the cause register occur, providing more consistent behavior across various architectures. For a simplified reproduction program, feel free to visit: https://github.com/ancientmodern/riscv-ptrace-bug-demo. Signed-off-by: Haorong Lu Link: https://lore.kernel.org/r/20230803224458.4156006-1-ancientmodern4@gmail.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/signal.c | 95 ++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 44 deletions(-) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 180d951d3624..d2d7169048ea 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -391,30 +391,6 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) sigset_t *oldset = sigmask_to_save(); int ret; - /* Are we from a system call? */ - if (regs->cause == EXC_SYSCALL) { - /* Avoid additional syscall restarting via ret_from_exception */ - regs->cause = -1UL; - /* If so, check system call restarting.. */ - switch (regs->a0) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: - regs->a0 = -EINTR; - break; - - case -ERESTARTSYS: - if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { - regs->a0 = -EINTR; - break; - } - fallthrough; - case -ERESTARTNOINTR: - regs->a0 = regs->orig_a0; - regs->epc -= 0x4; - break; - } - } - rseq_signal_deliver(ksig, regs); /* Set up the stack frame */ @@ -428,34 +404,65 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) void arch_do_signal_or_restart(struct pt_regs *regs) { + unsigned long continue_addr = 0, restart_addr = 0; + int retval = 0; struct ksignal ksig; + bool syscall = (regs->cause == EXC_SYSCALL); + /* If we were from a system call, check for system call restarting */ + if (syscall) { + continue_addr = regs->epc; + restart_addr = continue_addr - 4; + retval = regs->a0; + + /* Avoid additional syscall restarting via ret_from_exception */ + regs->cause = -1UL; + + /* + * Prepare for system call restart. We do this here so that a + * debugger will see the already changed PC. + */ + switch (retval) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTART_RESTARTBLOCK: + regs->a0 = regs->orig_a0; + regs->epc = restart_addr; + break; + } + } + + /* + * Get the signal to deliver. When running under ptrace, at this point + * the debugger may change all of our registers. + */ if (get_signal(&ksig)) { + /* + * Depending on the signal settings, we may need to revert the + * decision to restart the system call, but skip this if a + * debugger has chosen to restart at a different PC. + */ + if (regs->epc == restart_addr && + (retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK || + (retval == -ERESTARTSYS && + !(ksig.ka.sa.sa_flags & SA_RESTART)))) { + regs->a0 = -EINTR; + regs->epc = continue_addr; + } + /* Actually deliver the signal */ handle_signal(&ksig, regs); return; } - /* Did we come from a system call? */ - if (regs->cause == EXC_SYSCALL) { - /* Avoid additional syscall restarting via ret_from_exception */ - regs->cause = -1UL; - - /* Restart the system call - no handlers present */ - switch (regs->a0) { - case -ERESTARTNOHAND: - case -ERESTARTSYS: - case -ERESTARTNOINTR: - regs->a0 = regs->orig_a0; - regs->epc -= 0x4; - break; - case -ERESTART_RESTARTBLOCK: - regs->a0 = regs->orig_a0; - regs->a7 = __NR_restart_syscall; - regs->epc -= 0x4; - break; - } - } + /* + * Handle restarting a different system call. As above, if a debugger + * has chosen to restart at a different PC, ignore the restart. + */ + if (syscall && regs->epc == restart_addr && retval == -ERESTART_RESTARTBLOCK) + regs->a7 = __NR_restart_syscall; /* * If there is no signal to deliver, we just put the saved From 07863871dfb162965b21bb8c2e4861bdb0019da3 Mon Sep 17 00:00:00 2001 From: Jinyu Tang Date: Tue, 12 Sep 2023 21:31:28 +0800 Subject: [PATCH 076/415] riscv: defconfig : add CONFIG_MMC_DW for starfive If these config not set, mmc can't run for jh7110, rootfs can't be found when using SD card. So set CONFIG_MMC_DW=y like arm64 defconfig, and set CONFIG_MMC_DW_STARFIVE=y for starfive. Then starfive vf2 board can start SD card rootfs with mainline defconfig and dtb. Signed-off-by: Jinyu Tang Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230912133128.5247-1-tangjinyu@tinylab.org Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 1e81c865a271..d56d52e2ef87 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -214,6 +214,8 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_CADENCE=y CONFIG_MMC_SPI=y +CONFIG_MMC_DW=y +CONFIG_MMC_DW_STARFIVE=y CONFIG_MMC_SDHI=y CONFIG_MMC_SUNXI=y CONFIG_RTC_CLASS=y From c20d36cc2a2073d4cdcda92bd7a1bb9b3b3b7c79 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 12 Sep 2023 23:40:40 +0800 Subject: [PATCH 077/415] riscv: don't probe unaligned access speed if already done If misaligned_access_speed percpu var isn't so called "HWPROBE MISALIGNED UNKNOWN", it means the probe has happened(this is possible for example, hotplug off then hotplug on one cpu), and the percpu var has been set, don't probe again in this case. Signed-off-by: Jisheng Zhang Fixes: 584ea6564bca ("RISC-V: Probe for unaligned access speed") Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230912154040.3306-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 970a71422bee..6a01ded615cd 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -572,6 +572,10 @@ void check_unaligned_access(int cpu) if (check_unaligned_access_emulated(cpu)) return; + /* We are already set since the last check */ + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + return; + page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); if (!page) { pr_warn("Can't alloc pages to measure memcpy performance"); From d3eabf2f2c81f8b0de0b75c6f41b875c501eb33b Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 15 Sep 2023 16:40:44 +0100 Subject: [PATCH 078/415] RISC-V: capitalise CMO op macros The CMO op macros initially used lower case, as the original iteration of the ALT_CMO_OP alternative stringified the first parameter to finalise the assembly for the standard variant. As a knock-on, the T-Head versions of these CMOs had to use mixed case defines. Commit dd23e9535889 ("RISC-V: replace cbom instructions with an insn-def") removed the asm construction with stringify, replacing it an insn-def macro, rending the lower-case surplus to requirements. As far as I can tell from a brief check, CBO_zero does not see similar use and didn't require the mixed case define in the first place. Replace the lower case characters now for consistency with other insn-def macros in the standard and T-Head forms, and adjust the callsites. Suggested-by: Andrew Jones Signed-off-by: Conor Dooley Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230915-aloe-dollar-994937477776@spud Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/errata_list.h | 6 +++--- arch/riscv/include/asm/insn-def.h | 8 +++---- arch/riscv/lib/clear_page.S | 32 ++++++++++++++-------------- arch/riscv/mm/dma-noncoherent.c | 8 +++---- arch/riscv/mm/pmem.c | 4 ++-- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 0ac18a4135be..83ed25e43553 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -117,9 +117,9 @@ asm volatile(ALTERNATIVE( \ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | * 0000000 11001 00000 000 00000 0001011 */ -#define THEAD_inval_A0 ".long 0x0265000b" -#define THEAD_clean_A0 ".long 0x0255000b" -#define THEAD_flush_A0 ".long 0x0275000b" +#define THEAD_INVAL_A0 ".long 0x0265000b" +#define THEAD_CLEAN_A0 ".long 0x0255000b" +#define THEAD_FLUSH_A0 ".long 0x0275000b" #define THEAD_SYNC_S ".long 0x0190000b" #define ALT_CMO_OP(_op, _start, _size, _cachesize) \ diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index 6960beb75f32..e27179b26086 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -180,19 +180,19 @@ INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(51), \ __RD(0), RS1(gaddr), RS2(vmid)) -#define CBO_inval(base) \ +#define CBO_INVAL(base) \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(0)) -#define CBO_clean(base) \ +#define CBO_CLEAN(base) \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(1)) -#define CBO_flush(base) \ +#define CBO_FLUSH(base) \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(2)) -#define CBO_zero(base) \ +#define CBO_ZERO(base) \ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(4)) diff --git a/arch/riscv/lib/clear_page.S b/arch/riscv/lib/clear_page.S index d7a256eb53f4..b22de1231144 100644 --- a/arch/riscv/lib/clear_page.S +++ b/arch/riscv/lib/clear_page.S @@ -29,41 +29,41 @@ SYM_FUNC_START(clear_page) lw a1, riscv_cboz_block_size add a2, a0, a2 .Lzero_loop: - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 CBOZ_ALT(11, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 CBOZ_ALT(10, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 CBOZ_ALT(9, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 CBOZ_ALT(8, "bltu a0, a2, .Lzero_loop; ret", "nop; nop") - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 - CBO_zero(a0) + CBO_ZERO(a0) add a0, a0, a1 bltu a0, a2, .Lzero_loop ret diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index b76e7e192eb1..607d5f47d437 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -31,7 +31,7 @@ static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size) return; } #endif - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(CLEAN, vaddr, size, riscv_cbom_block_size); } static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size) @@ -45,7 +45,7 @@ static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size) } #endif - ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(INVAL, vaddr, size, riscv_cbom_block_size); } static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size) @@ -59,7 +59,7 @@ static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size) } #endif - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(FLUSH, vaddr, size, riscv_cbom_block_size); } static inline bool arch_sync_dma_clean_before_fromdevice(void) @@ -131,7 +131,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) } #endif - ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size); + ALT_CMO_OP(FLUSH, flush_addr, size, riscv_cbom_block_size); } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, diff --git a/arch/riscv/mm/pmem.c b/arch/riscv/mm/pmem.c index c5fc5ec96f6d..370a422ede11 100644 --- a/arch/riscv/mm/pmem.c +++ b/arch/riscv/mm/pmem.c @@ -17,7 +17,7 @@ void arch_wb_cache_pmem(void *addr, size_t size) return; } #endif - ALT_CMO_OP(clean, addr, size, riscv_cbom_block_size); + ALT_CMO_OP(CLEAN, addr, size, riscv_cbom_block_size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); @@ -29,6 +29,6 @@ void arch_invalidate_pmem(void *addr, size_t size) return; } #endif - ALT_CMO_OP(inval, addr, size, riscv_cbom_block_size); + ALT_CMO_OP(INVAL, addr, size, riscv_cbom_block_size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); From d5d2c264d33b9acf1a0216861942176ee3569258 Mon Sep 17 00:00:00 2001 From: Yu Chien Peter Lin Date: Thu, 21 Sep 2023 10:50:20 +0800 Subject: [PATCH 079/415] riscv: Improve PTDUMP to show RSW with non-zero value RSW field can be used to encode 2 bits of software defined information. Currently, PTDUMP only prints "RSW" when its value is 1 or 3. To fix this issue and improve the debugging experience with PTDUMP, we redefine _PAGE_SPECIAL to its original value and use _PAGE_SOFT as the RSW mask, allow it to print the RSW with any non-zero value. This patch also removes the val from the struct prot_bits as it is no longer needed. Signed-off-by: Yu Chien Peter Lin Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230921025022.3989723-2-peterlin@andestech.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable-bits.h | 4 ++-- arch/riscv/mm/ptdump.c | 33 ++++++++++++--------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index f896708e8331..179bd4afece4 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -16,9 +16,9 @@ #define _PAGE_GLOBAL (1 << 5) /* Global */ #define _PAGE_ACCESSED (1 << 6) /* Set by hardware on any access */ #define _PAGE_DIRTY (1 << 7) /* Set by hardware on any write */ -#define _PAGE_SOFT (1 << 8) /* Reserved for software */ +#define _PAGE_SOFT (3 << 8) /* Reserved for software */ -#define _PAGE_SPECIAL _PAGE_SOFT +#define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */ #define _PAGE_TABLE _PAGE_PRESENT /* diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 20a9f991a6d7..57a0926c6627 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -129,7 +129,6 @@ static struct ptd_mm_info efi_ptd_info = { /* Page Table Entry */ struct prot_bits { u64 mask; - u64 val; const char *set; const char *clear; }; @@ -137,47 +136,38 @@ struct prot_bits { static const struct prot_bits pte_bits[] = { { .mask = _PAGE_SOFT, - .val = _PAGE_SOFT, - .set = "RSW", - .clear = " ", + .set = "RSW(%d)", + .clear = " .. ", }, { .mask = _PAGE_DIRTY, - .val = _PAGE_DIRTY, .set = "D", .clear = ".", }, { .mask = _PAGE_ACCESSED, - .val = _PAGE_ACCESSED, .set = "A", .clear = ".", }, { .mask = _PAGE_GLOBAL, - .val = _PAGE_GLOBAL, .set = "G", .clear = ".", }, { .mask = _PAGE_USER, - .val = _PAGE_USER, .set = "U", .clear = ".", }, { .mask = _PAGE_EXEC, - .val = _PAGE_EXEC, .set = "X", .clear = ".", }, { .mask = _PAGE_WRITE, - .val = _PAGE_WRITE, .set = "W", .clear = ".", }, { .mask = _PAGE_READ, - .val = _PAGE_READ, .set = "R", .clear = ".", }, { .mask = _PAGE_PRESENT, - .val = _PAGE_PRESENT, .set = "V", .clear = ".", } @@ -208,15 +198,20 @@ static void dump_prot(struct pg_state *st) unsigned int i; for (i = 0; i < ARRAY_SIZE(pte_bits); i++) { - const char *s; + char s[7]; + unsigned long val; - if ((st->current_prot & pte_bits[i].mask) == pte_bits[i].val) - s = pte_bits[i].set; - else - s = pte_bits[i].clear; + val = st->current_prot & pte_bits[i].mask; + if (val) { + if (pte_bits[i].mask == _PAGE_SOFT) + sprintf(s, pte_bits[i].set, val >> 8); + else + sprintf(s, "%s", pte_bits[i].set); + } else { + sprintf(s, "%s", pte_bits[i].clear); + } - if (s) - pt_dump_seq_printf(st->seq, " %s", s); + pt_dump_seq_printf(st->seq, " %s", s); } } From 0713ff337173884bcaf163e44cadd6a56bc8193a Mon Sep 17 00:00:00 2001 From: Yu Chien Peter Lin Date: Thu, 21 Sep 2023 10:50:21 +0800 Subject: [PATCH 080/415] riscv: Introduce PBMT field to PTDUMP This patch introduces the PBMT field to the PTDUMP, so it can display the memory attributes for NC or IO. Signed-off-by: Yu Chien Peter Lin Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230921025022.3989723-3-peterlin@andestech.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/ptdump.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 57a0926c6627..13997cf3fe36 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -135,6 +135,12 @@ struct prot_bits { static const struct prot_bits pte_bits[] = { { +#ifdef CONFIG_64BIT + .mask = _PAGE_MTMASK_SVPBMT, + .set = "MT(%s)", + .clear = " .. ", + }, { +#endif .mask = _PAGE_SOFT, .set = "RSW(%d)", .clear = " .. ", @@ -205,6 +211,16 @@ static void dump_prot(struct pg_state *st) if (val) { if (pte_bits[i].mask == _PAGE_SOFT) sprintf(s, pte_bits[i].set, val >> 8); +#ifdef CONFIG_64BIT + else if (pte_bits[i].mask == _PAGE_MTMASK_SVPBMT) { + if (val == _PAGE_NOCACHE_SVPBMT) + sprintf(s, pte_bits[i].set, "NC"); + else if (val == _PAGE_IO_SVPBMT) + sprintf(s, pte_bits[i].set, "IO"); + else + sprintf(s, pte_bits[i].set, "??"); + } +#endif else sprintf(s, "%s", pte_bits[i].set); } else { From 015c3c370472290e37143d1387c000db7ec273ad Mon Sep 17 00:00:00 2001 From: Yu Chien Peter Lin Date: Thu, 21 Sep 2023 10:50:22 +0800 Subject: [PATCH 081/415] riscv: Introduce NAPOT field to PTDUMP This patch introduces the NAPOT field to PTDUMP, allowing it to display the letter "N" for pages that have the 63rd bit set. Signed-off-by: Yu Chien Peter Lin Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230921025022.3989723-4-peterlin@andestech.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/ptdump.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 13997cf3fe36..b71f08b91e53 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -136,6 +136,10 @@ struct prot_bits { static const struct prot_bits pte_bits[] = { { #ifdef CONFIG_64BIT + .mask = _PAGE_NAPOT, + .set = "N", + .clear = ".", + }, { .mask = _PAGE_MTMASK_SVPBMT, .set = "MT(%s)", .clear = " .. ", From b701f9e726f0a30a94ea6af596b74c1f07b95b6b Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 29 Aug 2023 10:36:15 +0200 Subject: [PATCH 082/415] riscv: provide riscv-specific is_trap_insn() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit uprobes expects is_trap_insn() to return true for any trap instructions, not just the one used for installing uprobe. The current default implementation only returns true for 16-bit c.ebreak if C extension is enabled. This can confuse uprobes if a 32-bit ebreak generates a trap exception from userspace: uprobes asks is_trap_insn() who says there is no trap, so uprobes assume a probe was there before but has been removed, and return to the trap instruction. This causes an infinite loop of entering and exiting trap handler. Instead of using the default implementation, implement this function speficially for riscv with checks for both ebreak and c.ebreak. Fixes: 74784081aac8 ("riscv: Add uprobes supported") Signed-off-by: Nam Cao Tested-by: Björn Töpel Reviewed-by: Guo Ren Link: https://lore.kernel.org/r/20230829083614.117748-1-namcaov@gmail.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/uprobes.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 194f166b2cc4..4b3dc8beaf77 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "decode-insn.h" @@ -17,6 +18,11 @@ bool is_swbp_insn(uprobe_opcode_t *insn) #endif } +bool is_trap_insn(uprobe_opcode_t *insn) +{ + return riscv_insn_is_ebreak(*insn) || riscv_insn_is_c_ebreak(*insn); +} + unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) { return instruction_pointer(regs); From 8cb22bec142624d21bc85ff96b7bad10b6220e6a Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 29 Aug 2023 20:25:00 +0200 Subject: [PATCH 083/415] riscv: kprobes: allow writing to x0 Instructions can write to x0, so we should simulate these instructions normally. Currently, the kernel hangs if an instruction who writes to x0 is simulated. Fixes: c22b0bcb1dd0 ("riscv: Add kprobes supported") Cc: stable@vger.kernel.org Signed-off-by: Nam Cao Reviewed-by: Charlie Jenkins Acked-by: Guo Ren Link: https://lore.kernel.org/r/20230829182500.61875-1-namcaov@gmail.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/simulate-insn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c index d3099d67816d..6c166029079c 100644 --- a/arch/riscv/kernel/probes/simulate-insn.c +++ b/arch/riscv/kernel/probes/simulate-insn.c @@ -24,7 +24,7 @@ static inline bool rv_insn_reg_set_val(struct pt_regs *regs, u32 index, unsigned long val) { if (index == 0) - return false; + return true; else if (index <= 31) *((unsigned long *)regs + index) = val; else From b8a03a634129b61a7ec69ece4460297ffbd790dc Mon Sep 17 00:00:00 2001 From: Yunhui Cui Date: Tue, 12 Sep 2023 10:13:49 +0800 Subject: [PATCH 084/415] riscv: add userland instruction dump to RISC-V splats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add userland instruction dump and rename dump_kernel_instr() to dump_instr(). An example: [ 0.822439] Freeing unused kernel image (initmem) memory: 6916K [ 0.823817] Run /init as init process [ 0.839411] init[1]: unhandled signal 4 code 0x1 at 0x000000000005be18 in bb[10000+5fb000] [ 0.840751] CPU: 0 PID: 1 Comm: init Not tainted 5.14.0-rc4-00049-gbd644290aa72-dirty #187 [ 0.841373] Hardware name: , BIOS [ 0.841743] epc : 000000000005be18 ra : 0000000000079e74 sp : 0000003fffcafda0 [ 0.842271] gp : ffffffff816e9dc8 tp : 0000000000000000 t0 : 0000000000000000 [ 0.842947] t1 : 0000003fffc9fdf0 t2 : 0000000000000000 s0 : 0000000000000000 [ 0.843434] s1 : 0000000000000000 a0 : 0000003fffca0190 a1 : 0000003fffcafe18 [ 0.843891] a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 [ 0.844357] a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000000000 [ 0.844803] s2 : 0000000000000000 s3 : 0000000000000000 s4 : 0000000000000000 [ 0.845253] s5 : 0000000000000000 s6 : 0000000000000000 s7 : 0000000000000000 [ 0.845722] s8 : 0000000000000000 s9 : 0000000000000000 s10: 0000000000000000 [ 0.846180] s11: 0000000000d144e0 t3 : 0000000000000000 t4 : 0000000000000000 [ 0.846616] t5 : 0000000000000000 t6 : 0000000000000000 [ 0.847204] status: 0000000200000020 badaddr: 00000000f0028053 cause: 0000000000000002 [ 0.848219] Code: f06f ff5f 3823 fa11 0113 fb01 2e23 0201 0293 0000 (8053) f002 [ 0.851016] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000004 Signed-off-by: Yunhui Cui Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20230912021349.28302-1-cuiyunhui@bytedance.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 8895c8197519..141036d06015 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -34,7 +34,21 @@ int show_unhandled_signals = 1; static DEFINE_SPINLOCK(die_lock); -static void dump_kernel_instr(const char *loglvl, struct pt_regs *regs) +static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns) +{ + const void __user *uaddr = (__force const void __user *)insns; + + if (!user_mode(regs)) + return get_kernel_nofault(*val, insns); + + /* The user space code from other tasks cannot be accessed. */ + if (regs != task_pt_regs(current)) + return -EPERM; + + return copy_from_user_nofault(val, uaddr, sizeof(*val)); +} + +static void dump_instr(const char *loglvl, struct pt_regs *regs) { char str[sizeof("0000 ") * 12 + 2 + 1], *p = str; const u16 *insns = (u16 *)instruction_pointer(regs); @@ -43,7 +57,7 @@ static void dump_kernel_instr(const char *loglvl, struct pt_regs *regs) int i; for (i = -10; i < 2; i++) { - bad = get_kernel_nofault(val, &insns[i]); + bad = copy_code(regs, &val, &insns[i]); if (!bad) { p += sprintf(p, i == 0 ? "(%04hx) " : "%04hx ", val); } else { @@ -72,7 +86,7 @@ void die(struct pt_regs *regs, const char *str) print_modules(); if (regs) { show_regs(regs); - dump_kernel_instr(KERN_EMERG, regs); + dump_instr(KERN_EMERG, regs); } cause = regs ? regs->cause : -1; @@ -105,6 +119,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) print_vma_addr(KERN_CONT " in ", instruction_pointer(regs)); pr_cont("\n"); __show_regs(regs); + dump_instr(KERN_EMERG, regs); } force_sig_fault(signo, code, (void __user *)addr); From ddcc7d9bf531b2e950bc4a745a41c825a4759ae6 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 12 Sep 2023 15:20:13 +0800 Subject: [PATCH 085/415] riscv: vdso.lds.S: drop __alt_start and __alt_end symbols These two symbols are not used, remove them. Signed-off-by: Jisheng Zhang Tested-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20230912072015.2424-2-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/vdso.lds.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 82ce64900f3d..d43fd7c7dd11 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -42,9 +42,7 @@ SECTIONS . = ALIGN(4); .alternative : { - __alt_start = .; *(.alternative) - __alt_end = .; } .data : { From 49cfbdc21faf5fffbdaa8fd31e1451a4432cfdaa Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 12 Sep 2023 15:20:14 +0800 Subject: [PATCH 086/415] riscv: vdso.lds.S: merge .data section into .rodata section The .data section doesn't need to be separate from .rodata section, they are both readonly. Signed-off-by: Jisheng Zhang Tested-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20230912072015.2424-3-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/vdso.lds.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index d43fd7c7dd11..671aa21769bc 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -29,7 +29,13 @@ SECTIONS .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text - .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata : { + *(.rodata .rodata.* .gnu.linkonce.r.*) + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + } /* * This linker script is used both with -r and with -shared. @@ -44,13 +50,6 @@ SECTIONS .alternative : { *(.alternative) } - - .data : { - *(.got.plt) *(.got) - *(.data .data.* .gnu.linkonce.d.*) - *(.dynbss) - *(.bss .bss.* .gnu.linkonce.b.*) - } } /* From 8f8c1ff879fab60f80f3a7aec3000f47e5b03ba9 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 12 Sep 2023 15:20:15 +0800 Subject: [PATCH 087/415] riscv: vdso.lds.S: remove hardcoded 0x800 .text start addr I believe the hardcoded 0x800 and related comments come from the long history VDSO_TEXT_OFFSET in x86 vdso code, but commit 5b9304933730 ("x86 vDSO: generate vdso-syms.lds") and commit f6b46ebf904f ("x86 vDSO: new layout") removes the comment and hard coding for x86. Similar as x86 and other arch, riscv doesn't need the rigid layout using VDSO_TEXT_OFFSET since it "no longer matters to the kernel". so we could remove the hard coding now, and removing it brings a small vdso.so and aligns with other architectures. Also, having enough separation between data and text is important for I-cache, so similar as x86, move .note, .eh_frame_hdr, and .eh_frame between .rodata and .text. Signed-off-by: Jisheng Zhang Reviewed-by: Andrew Jones Tested-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20230912072015.2424-4-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/vdso.lds.S | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 671aa21769bc..cbe2a179331d 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -23,12 +23,8 @@ SECTIONS .gnu.version_d : { *(.gnu.version_d) } .gnu.version_r : { *(.gnu.version_r) } - .note : { *(.note.*) } :text :note .dynamic : { *(.dynamic) } :text :dynamic - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) *(.got.plt) *(.got) @@ -37,13 +33,16 @@ SECTIONS *(.bss .bss.* .gnu.linkonce.b.*) } + .note : { *(.note.*) } :text :note + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + /* - * This linker script is used both with -r and with -shared. - * For the layouts to match, we need to skip more than enough - * space for the dynamic symbol table, etc. If this amount is - * insufficient, ld -shared will error; simply increase it here. + * Text is well-separated from actual data: there's plenty of + * stuff that isn't used at runtime in between. */ - . = 0x800; + . = ALIGN(16); .text : { *(.text .text.*) } :text . = ALIGN(4); From dbfbda3bd6bfb5189e05b9eab8dfaad2d1d23f62 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 12 Sep 2023 15:25:10 +0800 Subject: [PATCH 088/415] riscv: mm: update T-Head memory type definitions Update T-Head memory type definitions according to C910 doc [1] For NC and IO, SH property isn't configurable, hardcoded as SH, so set SH for NOCACHE and IO. And also set bit[61](Bufferable) for NOCACHE according to the table 6.1 in the doc [1]. Link: https://github.com/T-head-Semi/openc910 [1] Signed-off-by: Jisheng Zhang Reviewed-by: Guo Ren Tested-by: Drew Fustini Link: https://lore.kernel.org/r/20230912072510.2510-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable-64.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 7a5097202e15..9a2c780a11e9 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -126,14 +126,18 @@ enum napot_cont_order { /* * [63:59] T-Head Memory Type definitions: - * - * 00000 - NC Weakly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable + * bit[63] SO - Strong Order + * bit[62] C - Cacheable + * bit[61] B - Bufferable + * bit[60] SH - Shareable + * bit[59] Sec - Trustable + * 00110 - NC Weakly-ordered, Non-cacheable, Bufferable, Shareable, Non-trustable * 01110 - PMA Weakly-ordered, Cacheable, Bufferable, Shareable, Non-trustable - * 10000 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable + * 10010 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable */ #define _PAGE_PMA_THEAD ((1UL << 62) | (1UL << 61) | (1UL << 60)) -#define _PAGE_NOCACHE_THEAD 0UL -#define _PAGE_IO_THEAD (1UL << 63) +#define _PAGE_NOCACHE_THEAD ((1UL < 61) | (1UL << 60)) +#define _PAGE_IO_THEAD ((1UL << 63) | (1UL << 60)) #define _PAGE_MTMASK_THEAD (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59)) static inline u64 riscv_page_mtmask(void) From 322948c3198cf80e7c10d953ddad24ebd85757cd Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 22 Sep 2023 10:04:57 +0200 Subject: [PATCH 089/415] powerpc/machdep: Remove trailing whitespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix coding style. No functional changes. Signed-off-by: Thomas Zimmermann Reviewed-by: Arnd Bergmann Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Michael Ellerman Link: https://msgid.link/20230922080636.26762-4-tzimmermann@suse.de --- arch/powerpc/include/asm/machdep.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 4f6e7d7ee388..933465ed4c43 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -10,7 +10,7 @@ #include struct pt_regs; -struct pci_bus; +struct pci_bus; struct device_node; struct iommu_table; struct rtc_time; @@ -78,8 +78,8 @@ struct machdep_calls { unsigned char (*nvram_read_val)(int addr); void (*nvram_write_val)(int addr, unsigned char val); ssize_t (*nvram_write)(char *buf, size_t count, loff_t *index); - ssize_t (*nvram_read)(char *buf, size_t count, loff_t *index); - ssize_t (*nvram_size)(void); + ssize_t (*nvram_read)(char *buf, size_t count, loff_t *index); + ssize_t (*nvram_size)(void); void (*nvram_sync)(void); /* Exception handlers */ @@ -102,9 +102,9 @@ struct machdep_calls { */ long (*feature_call)(unsigned int feature, ...); - /* Get legacy PCI/IDE interrupt mapping */ + /* Get legacy PCI/IDE interrupt mapping */ int (*pci_get_legacy_ide_irq)(struct pci_dev *dev, int channel); - + /* Get access protection for /dev/mem */ pgprot_t (*phys_mem_access_prot)(struct file *file, unsigned long pfn, From 1f92a844c35e483c00bab8a7b7d39c555ee799d8 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 22 Sep 2023 10:04:58 +0200 Subject: [PATCH 090/415] powerpc: Remove file parameter from phys_mem_access_prot() Remove 'file' parameter from struct machdep_calls.phys_mem_access_prot and its implementation in pci_phys_mem_access_prot(). The file is not used on PowerPC. By removing it, a later patch can simplify fbdev's mmap code, which uses phys_mem_access_prot() on PowerPC. Signed-off-by: Thomas Zimmermann Reviewed-by: Arnd Bergmann [mpe: Rebase on unrelated changes to phys_mem_access_prot()] Signed-off-by: Michael Ellerman Link: https://msgid.link/20230922080636.26762-5-tzimmermann@suse.de --- arch/powerpc/include/asm/machdep.h | 3 +-- arch/powerpc/include/asm/pci.h | 4 +--- arch/powerpc/include/asm/pgtable.h | 10 ++++++++-- arch/powerpc/kernel/pci-common.c | 3 +-- arch/powerpc/mm/mem.c | 8 ++++---- 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 933465ed4c43..d31a5ec1550d 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -106,8 +106,7 @@ struct machdep_calls { int (*pci_get_legacy_ide_irq)(struct pci_dev *dev, int channel); /* Get access protection for /dev/mem */ - pgprot_t (*phys_mem_access_prot)(struct file *file, - unsigned long pfn, + pgprot_t (*phys_mem_access_prot)(unsigned long pfn, unsigned long size, pgprot_t vma_prot); diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index f5078a7dd85a..46a9c4491ed0 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -105,9 +105,7 @@ extern void of_scan_pci_bridge(struct pci_dev *dev); extern void of_scan_bus(struct device_node *node, struct pci_bus *bus); extern void of_rescan_bus(struct device_node *node, struct pci_bus *bus); -struct file; -extern pgprot_t pci_phys_mem_access_prot(struct file *file, - unsigned long pfn, +extern pgprot_t pci_phys_mem_access_prot(unsigned long pfn, unsigned long size, pgprot_t prot); diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 2bfb7dd3b49e..9224f23065ff 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -120,9 +120,15 @@ static inline void mark_initmem_nx(void) { } int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty); +pgprot_t __phys_mem_access_prot(unsigned long pfn, unsigned long size, + pgprot_t vma_prot); + struct file; -pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot); +static inline pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + return __phys_mem_access_prot(pfn, size, vma_prot); +} #define __HAVE_PHYS_MEM_ACCESS_PROT void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 040255ddb569..d95a48eff412 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -521,8 +521,7 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma) * PCI device, it tries to find the PCI device first and calls the * above routine */ -pgprot_t pci_phys_mem_access_prot(struct file *file, - unsigned long pfn, +pgprot_t pci_phys_mem_access_prot(unsigned long pfn, unsigned long size, pgprot_t prot) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1717554b04b1..3a440004b97d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -35,18 +35,18 @@ unsigned long long memory_limit; unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot) +pgprot_t __phys_mem_access_prot(unsigned long pfn, unsigned long size, + pgprot_t vma_prot) { if (ppc_md.phys_mem_access_prot) - return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot); + return ppc_md.phys_mem_access_prot(pfn, size, vma_prot); if (!page_is_ram(pfn)) vma_prot = pgprot_noncached(vma_prot); return vma_prot; } -EXPORT_SYMBOL(phys_mem_access_prot); +EXPORT_SYMBOL(__phys_mem_access_prot); #ifdef CONFIG_MEMORY_HOTPLUG static DEFINE_MUTEX(linear_mapping_mutex); From deebe5f607d7f72f83c41163191ad0c1c4356385 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 22 Sep 2023 10:04:59 +0200 Subject: [PATCH 091/415] powerpc/fb: Call internal __phys_mem_access_prot() in fbdev code Call __phys_mem_access_prot() from the fbdev mmap helper pgprot_framebuffer(). Allows to avoid the file argument of NULL. Signed-off-by: Thomas Zimmermann Reviewed-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20230922080636.26762-6-tzimmermann@suse.de --- arch/powerpc/include/asm/fb.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/fb.h b/arch/powerpc/include/asm/fb.h index 3cecf14d51de..c0c5d1df7ad1 100644 --- a/arch/powerpc/include/asm/fb.h +++ b/arch/powerpc/include/asm/fb.h @@ -8,12 +8,7 @@ static inline pgprot_t pgprot_framebuffer(pgprot_t prot, unsigned long vm_start, unsigned long vm_end, unsigned long offset) { - /* - * PowerPC's implementation of phys_mem_access_prot() does - * not use the file argument. Set it to NULL in preparation - * of later updates to the interface. - */ - return phys_mem_access_prot(NULL, PHYS_PFN(offset), vm_end - vm_start, prot); + return __phys_mem_access_prot(PHYS_PFN(offset), vm_end - vm_start, prot); } #define pgprot_framebuffer pgprot_framebuffer From 8ae0e970319ac0b516d285650a744bab4ed3dd37 Mon Sep 17 00:00:00 2001 From: Jia He Date: Sat, 28 Oct 2023 10:20:58 +0000 Subject: [PATCH 092/415] dma-mapping: move dma_addressing_limited() out of line This patch moves dma_addressing_limited() out of line, serving as a preliminary step to prevent the introduction of a new publicly accessible low-level helper when validating whether all system RAM is mapped within the DMA mapping range. Suggested-by: Christoph Hellwig Signed-off-by: Jia He Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 19 +++++-------------- kernel/dma/mapping.c | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f0ccca16a0ac..4a658de44ee9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -144,6 +144,7 @@ bool dma_pci_p2pdma_supported(struct device *dev); int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); +bool dma_addressing_limited(struct device *dev); size_t dma_max_mapping_size(struct device *dev); size_t dma_opt_mapping_size(struct device *dev); bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); @@ -264,6 +265,10 @@ static inline u64 dma_get_required_mask(struct device *dev) { return 0; } +static inline bool dma_addressing_limited(struct device *dev) +{ + return false; +} static inline size_t dma_max_mapping_size(struct device *dev) { return 0; @@ -465,20 +470,6 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask) return dma_set_mask_and_coherent(dev, mask); } -/** - * dma_addressing_limited - return if the device is addressing limited - * @dev: device to check - * - * Return %true if the devices DMA mask is too small to address all memory in - * the system, else %false. Lack of addressing bits is the prime reason for - * bounce buffering, but might not be the only one. - */ -static inline bool dma_addressing_limited(struct device *dev) -{ - return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < - dma_get_required_mask(dev); -} - static inline unsigned int dma_get_max_seg_size(struct device *dev) { if (dev->dma_parms && dev->dma_parms->max_segment_size) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index e323ca48f7f2..7789c86f7ba3 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -793,6 +793,21 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask); +/** + * dma_addressing_limited - return if the device is addressing limited + * @dev: device to check + * + * Return %true if the devices DMA mask is too small to address all memory in + * the system, else %false. Lack of addressing bits is the prime reason for + * bounce buffering, but might not be the only one. + */ +bool dma_addressing_limited(struct device *dev) +{ + return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < + dma_get_required_mask(dev); +} +EXPORT_SYMBOL_GPL(dma_addressing_limited); + size_t dma_max_mapping_size(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); From a409d9600959f3c4b2a48946304c8e01b8d04072 Mon Sep 17 00:00:00 2001 From: Jia He Date: Sat, 28 Oct 2023 10:20:59 +0000 Subject: [PATCH 093/415] dma-mapping: fix dma_addressing_limited() if dma_range_map can't cover all system RAM There is an unusual case that the range map covers right up to the top of system RAM, but leaves a hole somewhere lower down. Then it prevents the nvme device dma mapping in the checking path of phys_to_dma() and causes the hangs at boot. E.g. On an Armv8 Ampere server, the dsdt ACPI table is: Method (_DMA, 0, Serialized) // _DMA: Direct Memory Access { Name (RBUF, ResourceTemplate () { QWordMemory (ResourceConsumer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, 0x0000000000000000, // Granularity 0x0000000000000000, // Range Minimum 0x00000000FFFFFFFF, // Range Maximum 0x0000000000000000, // Translation Offset 0x0000000100000000, // Length ,, , AddressRangeMemory, TypeStatic) QWordMemory (ResourceConsumer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, 0x0000000000000000, // Granularity 0x0000006010200000, // Range Minimum 0x000000602FFFFFFF, // Range Maximum 0x0000000000000000, // Translation Offset 0x000000001FE00000, // Length ,, , AddressRangeMemory, TypeStatic) QWordMemory (ResourceConsumer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, 0x0000000000000000, // Granularity 0x00000060F0000000, // Range Minimum 0x00000060FFFFFFFF, // Range Maximum 0x0000000000000000, // Translation Offset 0x0000000010000000, // Length ,, , AddressRangeMemory, TypeStatic) QWordMemory (ResourceConsumer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, 0x0000000000000000, // Granularity 0x0000007000000000, // Range Minimum 0x000003FFFFFFFFFF, // Range Maximum 0x0000000000000000, // Translation Offset 0x0000039000000000, // Length ,, , AddressRangeMemory, TypeStatic) }) But the System RAM ranges are: cat /proc/iomem |grep -i ram 90000000-91ffffff : System RAM 92900000-fffbffff : System RAM 880000000-fffffffff : System RAM 8800000000-bff5990fff : System RAM bff59d0000-bff5a4ffff : System RAM bff8000000-bfffffffff : System RAM So some RAM ranges are out of dma_range_map. Fix it by checking whether each of the system RAM resources can be properly encompassed within the dma_range_map. Signed-off-by: Jia He Signed-off-by: Christoph Hellwig --- kernel/dma/direct.c | 40 ++++++++++++++++++++++++++++++++++++++++ kernel/dma/direct.h | 1 + kernel/dma/mapping.c | 11 +++++++++-- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index ed3056eb20b8..73c95815789a 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -587,6 +587,46 @@ int dma_direct_supported(struct device *dev, u64 mask) return mask >= phys_to_dma_unencrypted(dev, min_mask); } +/* + * To check whether all ram resource ranges are covered by dma range map + * Returns 0 when further check is needed + * Returns 1 if there is some RAM range can't be covered by dma_range_map + */ +static int check_ram_in_range_map(unsigned long start_pfn, + unsigned long nr_pages, void *data) +{ + unsigned long end_pfn = start_pfn + nr_pages; + const struct bus_dma_region *bdr = NULL; + const struct bus_dma_region *m; + struct device *dev = data; + + while (start_pfn < end_pfn) { + for (m = dev->dma_range_map; PFN_DOWN(m->size); m++) { + unsigned long cpu_start_pfn = PFN_DOWN(m->cpu_start); + + if (start_pfn >= cpu_start_pfn && + start_pfn - cpu_start_pfn < PFN_DOWN(m->size)) { + bdr = m; + break; + } + } + if (!bdr) + return 1; + + start_pfn = PFN_DOWN(bdr->cpu_start) + PFN_DOWN(bdr->size); + } + + return 0; +} + +bool dma_direct_all_ram_mapped(struct device *dev) +{ + if (!dev->dma_range_map) + return true; + return !walk_system_ram_range(0, PFN_DOWN(ULONG_MAX) + 1, dev, + check_ram_in_range_map); +} + size_t dma_direct_max_mapping_size(struct device *dev) { /* If SWIOTLB is active, use its maximum mapping size */ diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index 97ec892ea0b5..18d346118fe8 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -20,6 +20,7 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); +bool dma_direct_all_ram_mapped(struct device *dev); size_t dma_direct_max_mapping_size(struct device *dev); #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 7789c86f7ba3..58db8fd70471 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -803,8 +803,15 @@ EXPORT_SYMBOL(dma_set_coherent_mask); */ bool dma_addressing_limited(struct device *dev) { - return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < - dma_get_required_mask(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < + dma_get_required_mask(dev)) + return true; + + if (unlikely(ops)) + return false; + return !dma_direct_all_ram_mapped(dev); } EXPORT_SYMBOL_GPL(dma_addressing_limited); From 0e8b9f258baed25f1c5672613699247c76b007b5 Mon Sep 17 00:00:00 2001 From: Zongmin Zhou Date: Tue, 1 Aug 2023 10:53:09 +0800 Subject: [PATCH 094/415] drm/qxl: prevent memory leak The allocated memory for qdev->dumb_heads should be released in qxl_destroy_monitors_object before qxl suspend. otherwise,qxl_create_monitors_object will be called to reallocate memory for qdev->dumb_heads after qxl resume, it will cause memory leak. Signed-off-by: Zongmin Zhou Link: https://lore.kernel.org/r/20230801025309.4049813-1-zhouzongmin@kylinos.cn Reviewed-by: Dave Airlie Signed-off-by: Maxime Ripard --- drivers/gpu/drm/qxl/qxl_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 6492a70e3c39..404b0483bb7c 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -1229,6 +1229,9 @@ int qxl_destroy_monitors_object(struct qxl_device *qdev) if (!qdev->monitors_config_bo) return 0; + kfree(qdev->dumb_heads); + qdev->dumb_heads = NULL; + qdev->monitors_config = NULL; qdev->ram_header->monitors_config = 0; From 15be353d55f9e12e34f9a819f51eb41fdef5eda8 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 6 Nov 2023 11:40:11 +0100 Subject: [PATCH 095/415] ASoC: hdmi-codec: register hpd callback on component probe The HDMI hotplug callback to the hdmi-codec is currently registered when jack is set. The hotplug not only serves to report the ASoC jack state but also to get the ELD. It should be registered when the component probes instead, so it does not depend on the card driver registering a jack for the HDMI to properly report the ELD. Fixes: 25ce4f2b3593 ("ASoC: hdmi-codec: Get ELD in before reporting plugged event") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20231106104013.704356-1-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/codecs/hdmi-codec.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index 09eef6042aad..20da1eaa4f1c 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -877,18 +877,13 @@ static int hdmi_codec_set_jack(struct snd_soc_component *component, void *data) { struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component); - int ret = -ENOTSUPP; if (hcp->hcd.ops->hook_plugged_cb) { hcp->jack = jack; - ret = hcp->hcd.ops->hook_plugged_cb(component->dev->parent, - hcp->hcd.data, - plugged_cb, - component->dev); - if (ret) - hcp->jack = NULL; + return 0; } - return ret; + + return -ENOTSUPP; } static int hdmi_dai_spdif_probe(struct snd_soc_dai *dai) @@ -982,6 +977,21 @@ static int hdmi_of_xlate_dai_id(struct snd_soc_component *component, return ret; } +static int hdmi_probe(struct snd_soc_component *component) +{ + struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component); + int ret = 0; + + if (hcp->hcd.ops->hook_plugged_cb) { + ret = hcp->hcd.ops->hook_plugged_cb(component->dev->parent, + hcp->hcd.data, + plugged_cb, + component->dev); + } + + return ret; +} + static void hdmi_remove(struct snd_soc_component *component) { struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component); @@ -992,6 +1002,7 @@ static void hdmi_remove(struct snd_soc_component *component) } static const struct snd_soc_component_driver hdmi_driver = { + .probe = hdmi_probe, .remove = hdmi_remove, .dapm_widgets = hdmi_widgets, .num_dapm_widgets = ARRAY_SIZE(hdmi_widgets), From 4bdcbc31ad2112385ad525b28972c45015e6ad70 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 6 Nov 2023 11:37:09 +0100 Subject: [PATCH 096/415] ASoC: dapm: fix clock get name The name currently used to get the clock includes the dapm prefix. It should use the name as provided to the widget, without the prefix. Fixes: 3caac759681e ("ASoC: soc-dapm.c: fixup snd_soc_dapm_new_control_unlocked() error handling") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20231106103712.703962-1-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 4e2beda6f9bf..3844f777c87b 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3670,7 +3670,7 @@ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, dapm_pinctrl_event(w, NULL, SND_SOC_DAPM_POST_PMD); break; case snd_soc_dapm_clock_supply: - w->clk = devm_clk_get(dapm->dev, w->name); + w->clk = devm_clk_get(dapm->dev, widget->name); if (IS_ERR(w->clk)) { ret = PTR_ERR(w->clk); goto request_failed; From c2ded280a4b1b7bd93e53670528504be08d24967 Mon Sep 17 00:00:00 2001 From: Amit Kumar Mahapatra Date: Sat, 4 Nov 2023 00:13:51 +0530 Subject: [PATCH 097/415] spi: spi-zynq-qspi: add spi-mem to driver kconfig dependencies Zynq QSPI driver has been converted to use spi-mem framework so add spi-mem to driver kconfig dependencies. Fixes: 67dca5e580f1 ("spi: spi-mem: Add support for Zynq QSPI controller") Signed-off-by: Amit Kumar Mahapatra Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1699037031-702858-1-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Mark Brown --- drivers/spi/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index d4ac184bce95..e128f38bd11f 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -1158,6 +1158,7 @@ config SPI_XTENSA_XTFPGA config SPI_ZYNQ_QSPI tristate "Xilinx Zynq QSPI controller" depends on ARCH_ZYNQ || COMPILE_TEST + depends on SPI_MEM help This enables support for the Zynq Quad SPI controller in master mode. From 0ad755fb88bdb7452f976d97847a47dbf7496763 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 25 Oct 2023 12:28:26 +0200 Subject: [PATCH 098/415] drm/i915/mtl: Apply notify_guc to all GTs Handle platforms with multiple GTs by iterate over all GTs. Add a Fixes commit so this gets propagated for MTL support. Fixes: 213c43676beb ("drm/i915/mtl: Remove the 'force_probe' requirement for Meteor Lake") Suggested-by: John Harrison Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Andi Shyti Cc: Andrzej Hajda Signed-off-by: Nirmoy Das Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20231025102826.16955-1-nirmoy.das@intel.com (cherry picked from commit 949113d34fb82a5dc6f5dd3ad9168001b441792b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_debugfs_params.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c index 614bde321589..8bca02025e09 100644 --- a/drivers/gpu/drm/i915/i915_debugfs_params.c +++ b/drivers/gpu/drm/i915/i915_debugfs_params.c @@ -38,10 +38,13 @@ static int i915_param_int_open(struct inode *inode, struct file *file) static int notify_guc(struct drm_i915_private *i915) { - int ret = 0; + struct intel_gt *gt; + int i, ret = 0; - if (intel_uc_uses_guc_submission(&to_gt(i915)->uc)) - ret = intel_guc_global_policies_update(&to_gt(i915)->uc.guc); + for_each_gt(gt, i915, i) { + if (intel_uc_uses_guc_submission(>->uc)) + ret = intel_guc_global_policies_update(>->uc.guc); + } return ret; } From 0cb89cd42fd22bbdec0b046c48f35775f5b88bdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 Oct 2023 18:08:00 +0200 Subject: [PATCH 099/415] drm/i915: Bump GLK CDCLK frequency when driving multiple pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On GLK CDCLK frequency needs to be at least 2*96 MHz when accessing the audio hardware. Currently we bump the CDCLK frequency up temporarily (if not high enough already) whenever audio hardware is being accessed, and drop it back down afterwards. With a single active pipe this works just fine as we can switch between all the valid CDCLK frequencies by changing the cd2x divider, which doesn't require a full modeset. However with multiple active pipes the cd2x divider trick no longer works, and thus we end up blinking all displays off and back on. To avoid this let's just bump the CDCLK frequency to >=2*96MHz whenever multiple pipes are active. The downside is slightly higher power consumption, but that seems like an acceptable tradeoff. With a single active pipe we can stick to the current more optiomal (from power comsumption POV) behaviour. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9599 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231031160800.18371-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 451eaa1a614c911f5a51078dcb68022874e4cb12) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 6d7ba4d0f130..c4839c67cb0f 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2750,6 +2750,18 @@ static int intel_compute_min_cdclk(struct intel_cdclk_state *cdclk_state) for_each_pipe(dev_priv, pipe) min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk); + /* + * Avoid glk_force_audio_cdclk() causing excessive screen + * blinking when multiple pipes are active by making sure + * CDCLK frequency is always high enough for audio. With a + * single active pipe we can always change CDCLK frequency + * by changing the cd2x divider (see glk_cdclk_table[]) and + * thus a full modeset won't be needed then. + */ + if (IS_GEMINILAKE(dev_priv) && cdclk_state->active_pipes && + !is_power_of_2(cdclk_state->active_pipes)) + min_cdclk = max(2 * 96000, min_cdclk); + if (min_cdclk > dev_priv->display.cdclk.max_cdclk_freq) { drm_dbg_kms(&dev_priv->drm, "required cdclk (%d kHz) exceeds max (%d kHz)\n", From 1a8e9bad6ef563c28ab0f8619628d5511be55431 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 3 Nov 2023 11:09:22 +0000 Subject: [PATCH 100/415] drm/i915: Fix potential spectre vulnerability Fix smatch warning: drivers/gpu/drm/i915/gem/i915_gem_context.c:847 set_proto_ctx_sseu() warn: potential spectre issue 'pc->user_engines' [r] (local cap) Fixes: d4433c7600f7 ("drm/i915/gem: Use the proto-context to handle create parameters (v5)") Cc: # v5.15+ Signed-off-by: Kunwu Chan Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231103110922.430122-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 27b086382c22efb7e0a16442f7bdc2e120108ef3) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 9a9ff84c90d7..e38f06a6e56e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -844,6 +844,7 @@ static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv, if (idx >= pc->num_user_engines) return -EINVAL; + idx = array_index_nospec(idx, pc->num_user_engines); pe = &pc->user_engines[idx]; /* Only render engine supports RPCS configuration. */ From 9506fba463fcbdf8c8b7af3ec9ee34360df843fe Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 26 Oct 2023 14:56:36 +0200 Subject: [PATCH 101/415] drm/i915/tc: Fix -Wformat-truncation in intel_tc_port_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix below compiler warning: intel_tc.c:1879:11: error: ‘%d’ directive output may be truncated writing between 1 and 11 bytes into a region of size 3 [-Werror=format-truncation=] "%c/TC#%d", port_name(port), tc_port + 1); ^~ intel_tc.c:1878:2: note: ‘snprintf’ output between 7 and 17 bytes into a destination of size 8 snprintf(tc->port_name, sizeof(tc->port_name), ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "%c/TC#%d", port_name(port), tc_port + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ v2: use kasprintf(Imre) v3: use const for port_name, and fix tc mem leak(Imre) Fixes: 3eafcddf766b ("drm/i915/tc: Move TC port fields to a new intel_tc_port struct") Cc: Mika Kahola Cc: Imre Deak Cc: Jani Nikula Signed-off-by: Nirmoy Das Reviewed-by: Andrzej Hajda Reviewed-by: Imre Deak Reviewed-by: Mika Kahola Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231026125636.5080-1-nirmoy.das@intel.com (cherry picked from commit 70a3cbbe620ee66afb0c066624196077767e61b2) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_tc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 37b0f8529b4f..f64d348a969e 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -58,7 +58,7 @@ struct intel_tc_port { struct delayed_work link_reset_work; int link_refcount; bool legacy_port:1; - char port_name[8]; + const char *port_name; enum tc_port_mode mode; enum tc_port_mode init_mode; enum phy_fia phy_fia; @@ -1875,8 +1875,12 @@ int intel_tc_port_init(struct intel_digital_port *dig_port, bool is_legacy) else tc->phy_ops = &icl_tc_phy_ops; - snprintf(tc->port_name, sizeof(tc->port_name), - "%c/TC#%d", port_name(port), tc_port + 1); + tc->port_name = kasprintf(GFP_KERNEL, "%c/TC#%d", port_name(port), + tc_port + 1); + if (!tc->port_name) { + kfree(tc); + return -ENOMEM; + } mutex_init(&tc->lock); /* TODO: Combine the two works */ @@ -1897,6 +1901,7 @@ void intel_tc_port_cleanup(struct intel_digital_port *dig_port) { intel_tc_port_suspend(dig_port); + kfree(dig_port->tc->port_name); kfree(dig_port->tc); dig_port->tc = NULL; } From 0e984ec88da9747549227900e5215c5e6a1b65ae Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 6 Nov 2023 07:41:17 -0700 Subject: [PATCH 102/415] io_uring/rw: add separate prep handler for readv/writev Rather than sprinkle opcode checks in the generic read/write prep handler, have a separate prep handler for the vectored readv/writev operation. Signed-off-by: Jens Axboe --- io_uring/opdef.c | 4 ++-- io_uring/rw.c | 22 +++++++++++++++------- io_uring/rw.h | 1 + 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 25a3515a177c..0521a26bc6cd 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -66,7 +66,7 @@ const struct io_issue_def io_issue_defs[] = { .iopoll = 1, .iopoll_queue = 1, .vectored = 1, - .prep = io_prep_rw, + .prep = io_prep_rwv, .issue = io_read, }, [IORING_OP_WRITEV] = { @@ -80,7 +80,7 @@ const struct io_issue_def io_issue_defs[] = { .iopoll = 1, .iopoll_queue = 1, .vectored = 1, - .prep = io_prep_rw, + .prep = io_prep_rwv, .issue = io_write, }, [IORING_OP_FSYNC] = { diff --git a/io_uring/rw.c b/io_uring/rw.c index 1c76de483ef6..63d343bae762 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -110,15 +110,23 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) rw->addr = READ_ONCE(sqe->addr); rw->len = READ_ONCE(sqe->len); rw->flags = READ_ONCE(sqe->rw_flags); + return 0; +} - /* Have to do this validation here, as this is in io_read() rw->len might - * have chanaged due to buffer selection +int io_prep_rwv(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + int ret; + + ret = io_prep_rw(req, sqe); + if (unlikely(ret)) + return ret; + + /* + * Have to do this validation here, as this is in io_read() rw->len + * might have chanaged due to buffer selection */ - if (req->opcode == IORING_OP_READV && req->flags & REQ_F_BUFFER_SELECT) { - ret = io_iov_buffer_select_prep(req); - if (ret) - return ret; - } + if (req->flags & REQ_F_BUFFER_SELECT) + return io_iov_buffer_select_prep(req); return 0; } diff --git a/io_uring/rw.h b/io_uring/rw.h index c5aed03d42a4..32aa7937513a 100644 --- a/io_uring/rw.h +++ b/io_uring/rw.h @@ -16,6 +16,7 @@ struct io_async_rw { }; int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_prep_rwv(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_read(struct io_kiocb *req, unsigned int issue_flags); int io_readv_prep_async(struct io_kiocb *req); int io_write(struct io_kiocb *req, unsigned int issue_flags); From f688944cfb810986c626cb13d95bc666e5c8a36c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 6 Nov 2023 07:43:16 -0700 Subject: [PATCH 103/415] io_uring/rw: add separate prep handler for fixed read/write Rather than sprinkle opcode checks in the generic read/write prep handler, have a separate prep handler for the vectored readv/writev operation. Signed-off-by: Jens Axboe --- io_uring/opdef.c | 4 ++-- io_uring/rw.c | 30 ++++++++++++++++++------------ io_uring/rw.h | 1 + 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 0521a26bc6cd..799db44283c7 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -98,7 +98,7 @@ const struct io_issue_def io_issue_defs[] = { .ioprio = 1, .iopoll = 1, .iopoll_queue = 1, - .prep = io_prep_rw, + .prep = io_prep_rw_fixed, .issue = io_read, }, [IORING_OP_WRITE_FIXED] = { @@ -111,7 +111,7 @@ const struct io_issue_def io_issue_defs[] = { .ioprio = 1, .iopoll = 1, .iopoll_queue = 1, - .prep = io_prep_rw, + .prep = io_prep_rw_fixed, .issue = io_write, }, [IORING_OP_POLL_ADD] = { diff --git a/io_uring/rw.c b/io_uring/rw.c index 63d343bae762..9e3e56b74e35 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -83,18 +83,6 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) /* used for fixed read/write too - just read unconditionally */ req->buf_index = READ_ONCE(sqe->buf_index); - if (req->opcode == IORING_OP_READ_FIXED || - req->opcode == IORING_OP_WRITE_FIXED) { - struct io_ring_ctx *ctx = req->ctx; - u16 index; - - if (unlikely(req->buf_index >= ctx->nr_user_bufs)) - return -EFAULT; - index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); - req->imu = ctx->user_bufs[index]; - io_req_set_rsrc_node(req, ctx, 0); - } - ioprio = READ_ONCE(sqe->ioprio); if (ioprio) { ret = ioprio_check_cap(ioprio); @@ -131,6 +119,24 @@ int io_prep_rwv(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } +int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_ring_ctx *ctx = req->ctx; + u16 index; + int ret; + + ret = io_prep_rw(req, sqe); + if (unlikely(ret)) + return ret; + + if (unlikely(req->buf_index >= ctx->nr_user_bufs)) + return -EFAULT; + index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); + req->imu = ctx->user_bufs[index]; + io_req_set_rsrc_node(req, ctx, 0); + return 0; +} + /* * Multishot read is prepared just like a normal read/write request, only * difference is that we set the MULTISHOT flag. diff --git a/io_uring/rw.h b/io_uring/rw.h index 32aa7937513a..f9e89b4fe4da 100644 --- a/io_uring/rw.h +++ b/io_uring/rw.h @@ -17,6 +17,7 @@ struct io_async_rw { int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_prep_rwv(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_read(struct io_kiocb *req, unsigned int issue_flags); int io_readv_prep_async(struct io_kiocb *req); int io_write(struct io_kiocb *req, unsigned int issue_flags); From c5e9b2c2ae82231d85d9650854e7b3e97dde33da Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 30 Oct 2023 14:30:25 +0100 Subject: [PATCH 104/415] riscv: Improve tlb_flush() For now, tlb_flush() simply calls flush_tlb_mm() which results in a flush of the whole TLB. So let's use mmu_gather fields to provide a more fine-grained flush of the TLB. Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Reviewed-by: Samuel Holland Tested-by: Lad Prabhakar # On RZ/Five SMARC Link: https://lore.kernel.org/r/20231030133027.19542-2-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlb.h | 8 +++++++- arch/riscv/include/asm/tlbflush.h | 3 +++ arch/riscv/mm/tlbflush.c | 7 +++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h index 120bcf2ed8a8..1eb5682b2af6 100644 --- a/arch/riscv/include/asm/tlb.h +++ b/arch/riscv/include/asm/tlb.h @@ -15,7 +15,13 @@ static void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { - flush_tlb_mm(tlb->mm); +#ifdef CONFIG_MMU + if (tlb->fullmm || tlb->need_flush_all) + flush_tlb_mm(tlb->mm); + else + flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, + tlb_get_unmap_size(tlb)); +#endif } #endif /* _ASM_RISCV_TLB_H */ diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index a09196f8de68..f5c4fb0ae642 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -32,6 +32,8 @@ static inline void local_flush_tlb_page(unsigned long addr) #if defined(CONFIG_SMP) && defined(CONFIG_MMU) void flush_tlb_all(void); void flush_tlb_mm(struct mm_struct *mm); +void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned int page_size); void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -52,6 +54,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, } #define flush_tlb_mm(mm) flush_tlb_all() +#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() #endif /* !CONFIG_SMP || !CONFIG_MMU */ /* Flush a range of kernel pages */ diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 77be59aadc73..fa03289853d8 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -132,6 +132,13 @@ void flush_tlb_mm(struct mm_struct *mm) __flush_tlb_range(mm, 0, -1, PAGE_SIZE); } +void flush_tlb_mm_range(struct mm_struct *mm, + unsigned long start, unsigned long end, + unsigned int page_size) +{ + __flush_tlb_range(mm, start, end - start, page_size); +} + void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { __flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE); From c962a6e7463980a513e990a7a8a9967e529ad467 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 30 Oct 2023 14:30:26 +0100 Subject: [PATCH 105/415] riscv: Improve flush_tlb_range() for hugetlb pages flush_tlb_range() uses a fixed stride of PAGE_SIZE and in its current form, when a hugetlb mapping needs to be flushed, flush_tlb_range() flushes the whole tlb: so set a stride of the size of the hugetlb mapping in order to only flush the hugetlb mapping. However, if the hugepage is a NAPOT region, all PTEs that constitute this mapping must be invalidated, so the stride size must actually be the size of the PTE. Note that THPs are directly handled by flush_pmd_tlb_range(). Signed-off-by: Alexandre Ghiti Reviewed-by: Samuel Holland Tested-by: Lad Prabhakar # On RZ/Five SMARC Link: https://lore.kernel.org/r/20231030133027.19542-3-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/tlbflush.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index fa03289853d8..b6d712a82306 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -147,7 +148,33 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE); + unsigned long stride_size; + + if (!is_vm_hugetlb_page(vma)) { + stride_size = PAGE_SIZE; + } else { + stride_size = huge_page_size(hstate_vma(vma)); + + /* + * As stated in the privileged specification, every PTE in a + * NAPOT region must be invalidated, so reset the stride in that + * case. + */ + if (has_svnapot()) { + if (stride_size >= PGDIR_SIZE) + stride_size = PGDIR_SIZE; + else if (stride_size >= P4D_SIZE) + stride_size = P4D_SIZE; + else if (stride_size >= PUD_SIZE) + stride_size = PUD_SIZE; + else if (stride_size >= PMD_SIZE) + stride_size = PMD_SIZE; + else + stride_size = PAGE_SIZE; + } + } + + __flush_tlb_range(vma->vm_mm, start, end - start, stride_size); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, From 9d4e8d5fa7dbbb606b355f40d918a1feef821bc5 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 30 Oct 2023 14:30:27 +0100 Subject: [PATCH 106/415] riscv: Make __flush_tlb_range() loop over pte instead of flushing the whole tlb Currently, when the range to flush covers more than one page (a 4K page or a hugepage), __flush_tlb_range() flushes the whole tlb. Flushing the whole tlb comes with a greater cost than flushing a single entry so we should flush single entries up to a certain threshold so that: threshold * cost of flushing a single entry < cost of flushing the whole tlb. Co-developed-by: Mayuresh Chitale Signed-off-by: Mayuresh Chitale Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Tested-by: Lad Prabhakar # On RZ/Five SMARC Reviewed-by: Samuel Holland Tested-by: Samuel Holland Link: https://lore.kernel.org/r/20231030133027.19542-4-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 3 - arch/riscv/include/asm/tlbflush.h | 3 + arch/riscv/kernel/sbi.c | 32 +++------ arch/riscv/mm/tlbflush.c | 113 +++++++++++++++--------------- 4 files changed, 71 insertions(+), 80 deletions(-) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 5b4a1bf5f439..b79d0228144f 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -273,9 +273,6 @@ void sbi_set_timer(uint64_t stime_value); void sbi_shutdown(void); void sbi_send_ipi(unsigned int cpu); int sbi_remote_fence_i(const struct cpumask *cpu_mask); -int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, - unsigned long start, - unsigned long size); int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, unsigned long start, diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index f5c4fb0ae642..170a49c531c6 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -11,6 +11,9 @@ #include #include +#define FLUSH_TLB_MAX_SIZE ((unsigned long)-1) +#define FLUSH_TLB_NO_ASID ((unsigned long)-1) + #ifdef CONFIG_MMU extern unsigned long asid_mask; diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index c672c8ba9a2a..5a62ed1da453 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -11,6 +11,7 @@ #include #include #include +#include /* default SBI version is 0.1 */ unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT; @@ -376,32 +377,15 @@ int sbi_remote_fence_i(const struct cpumask *cpu_mask) } EXPORT_SYMBOL(sbi_remote_fence_i); -/** - * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote - * harts for the specified virtual address range. - * @cpu_mask: A cpu mask containing all the target harts. - * @start: Start of the virtual address - * @size: Total size of the virtual address range. - * - * Return: 0 on success, appropriate linux error code otherwise. - */ -int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, - unsigned long start, - unsigned long size) -{ - return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, - cpu_mask, start, size, 0, 0); -} -EXPORT_SYMBOL(sbi_remote_sfence_vma); - /** * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given - * remote harts for a virtual address range belonging to a specific ASID. + * remote harts for a virtual address range belonging to a specific ASID or not. * * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the virtual address * @size: Total size of the virtual address range. - * @asid: The value of address space identifier (ASID). + * @asid: The value of address space identifier (ASID), or FLUSH_TLB_NO_ASID + * for flushing all address spaces. * * Return: 0 on success, appropriate linux error code otherwise. */ @@ -410,8 +394,12 @@ int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, unsigned long size, unsigned long asid) { - return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, - cpu_mask, start, size, asid, 0); + if (asid == FLUSH_TLB_NO_ASID) + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, + cpu_mask, start, size, 0, 0); + else + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, + cpu_mask, start, size, asid, 0); } EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index b6d712a82306..e46fefc70927 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -9,28 +9,50 @@ static inline void local_flush_tlb_all_asid(unsigned long asid) { - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); + if (asid != FLUSH_TLB_NO_ASID) + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); + else + local_flush_tlb_all(); } static inline void local_flush_tlb_page_asid(unsigned long addr, unsigned long asid) { - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); + if (asid != FLUSH_TLB_NO_ASID) + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); + else + local_flush_tlb_page(addr); } -static inline void local_flush_tlb_range(unsigned long start, - unsigned long size, unsigned long stride) +/* + * Flush entire TLB if number of entries to be flushed is greater + * than the threshold below. + */ +static unsigned long tlb_flush_all_threshold __read_mostly = 64; + +static void local_flush_tlb_range_threshold_asid(unsigned long start, + unsigned long size, + unsigned long stride, + unsigned long asid) { - if (size <= stride) - local_flush_tlb_page(start); - else - local_flush_tlb_all(); + unsigned long nr_ptes_in_range = DIV_ROUND_UP(size, stride); + int i; + + if (nr_ptes_in_range > tlb_flush_all_threshold) { + local_flush_tlb_all_asid(asid); + return; + } + + for (i = 0; i < nr_ptes_in_range; ++i) { + local_flush_tlb_page_asid(start, asid); + start += stride; + } } static inline void local_flush_tlb_range_asid(unsigned long start, @@ -38,8 +60,10 @@ static inline void local_flush_tlb_range_asid(unsigned long start, { if (size <= stride) local_flush_tlb_page_asid(start, asid); - else + else if (size == FLUSH_TLB_MAX_SIZE) local_flush_tlb_all_asid(asid); + else + local_flush_tlb_range_threshold_asid(start, size, stride, asid); } static void __ipi_flush_tlb_all(void *info) @@ -52,7 +76,7 @@ void flush_tlb_all(void) if (riscv_use_ipi_for_rfence()) on_each_cpu(__ipi_flush_tlb_all, NULL, 1); else - sbi_remote_sfence_vma(NULL, 0, -1); + sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); } struct flush_tlb_range_data { @@ -69,18 +93,12 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __ipi_flush_tlb_range(void *info) -{ - struct flush_tlb_range_data *d = info; - - local_flush_tlb_range(d->start, d->size, d->stride); -} - static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { struct flush_tlb_range_data ftd; struct cpumask *cmask = mm_cpumask(mm); + unsigned long asid = FLUSH_TLB_NO_ASID; unsigned int cpuid; bool broadcast; @@ -90,39 +108,24 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, cpuid = get_cpu(); /* check if the tlbflush needs to be sent to other CPUs */ broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; - if (static_branch_unlikely(&use_asid_allocator)) { - unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask; - if (broadcast) { - if (riscv_use_ipi_for_rfence()) { - ftd.asid = asid; - ftd.start = start; - ftd.size = size; - ftd.stride = stride; - on_each_cpu_mask(cmask, - __ipi_flush_tlb_range_asid, - &ftd, 1); - } else - sbi_remote_sfence_vma_asid(cmask, - start, size, asid); - } else { - local_flush_tlb_range_asid(start, size, stride, asid); - } + if (static_branch_unlikely(&use_asid_allocator)) + asid = atomic_long_read(&mm->context.id) & asid_mask; + + if (broadcast) { + if (riscv_use_ipi_for_rfence()) { + ftd.asid = asid; + ftd.start = start; + ftd.size = size; + ftd.stride = stride; + on_each_cpu_mask(cmask, + __ipi_flush_tlb_range_asid, + &ftd, 1); + } else + sbi_remote_sfence_vma_asid(cmask, + start, size, asid); } else { - if (broadcast) { - if (riscv_use_ipi_for_rfence()) { - ftd.asid = 0; - ftd.start = start; - ftd.size = size; - ftd.stride = stride; - on_each_cpu_mask(cmask, - __ipi_flush_tlb_range, - &ftd, 1); - } else - sbi_remote_sfence_vma(cmask, start, size); - } else { - local_flush_tlb_range(start, size, stride); - } + local_flush_tlb_range_asid(start, size, stride, asid); } put_cpu(); @@ -130,7 +133,7 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, void flush_tlb_mm(struct mm_struct *mm) { - __flush_tlb_range(mm, 0, -1, PAGE_SIZE); + __flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); } void flush_tlb_mm_range(struct mm_struct *mm, From 5e22bfd520ea8740e9a20314d2a890baf304c9d2 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 30 Oct 2023 14:30:28 +0100 Subject: [PATCH 107/415] riscv: Improve flush_tlb_kernel_range() This function used to simply flush the whole tlb of all harts, be more subtile and try to only flush the range. The problem is that we can only use PAGE_SIZE as stride since we don't know the size of the underlying mapping and then this function will be improved only if the size of the region to flush is < threshold * PAGE_SIZE. Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Tested-by: Lad Prabhakar # On RZ/Five SMARC Reviewed-by: Samuel Holland Tested-by: Samuel Holland Link: https://lore.kernel.org/r/20231030133027.19542-5-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlbflush.h | 11 +++++----- arch/riscv/mm/tlbflush.c | 34 ++++++++++++++++++++++--------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 170a49c531c6..8f3418c5f172 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -40,6 +40,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +void flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, @@ -56,15 +57,15 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, local_flush_tlb_all(); } -#define flush_tlb_mm(mm) flush_tlb_all() -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() -#endif /* !CONFIG_SMP || !CONFIG_MMU */ - /* Flush a range of kernel pages */ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - flush_tlb_all(); + local_flush_tlb_all(); } +#define flush_tlb_mm(mm) flush_tlb_all() +#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() +#endif /* !CONFIG_SMP || !CONFIG_MMU */ + #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index e46fefc70927..e6659d7368b3 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -97,20 +97,27 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { struct flush_tlb_range_data ftd; - struct cpumask *cmask = mm_cpumask(mm); + const struct cpumask *cmask; unsigned long asid = FLUSH_TLB_NO_ASID; - unsigned int cpuid; bool broadcast; - if (cpumask_empty(cmask)) - return; + if (mm) { + unsigned int cpuid; - cpuid = get_cpu(); - /* check if the tlbflush needs to be sent to other CPUs */ - broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; + cmask = mm_cpumask(mm); + if (cpumask_empty(cmask)) + return; - if (static_branch_unlikely(&use_asid_allocator)) - asid = atomic_long_read(&mm->context.id) & asid_mask; + cpuid = get_cpu(); + /* check if the tlbflush needs to be sent to other CPUs */ + broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; + + if (static_branch_unlikely(&use_asid_allocator)) + asid = atomic_long_read(&mm->context.id) & asid_mask; + } else { + cmask = cpu_online_mask; + broadcast = true; + } if (broadcast) { if (riscv_use_ipi_for_rfence()) { @@ -128,7 +135,8 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, local_flush_tlb_range_asid(start, size, stride, asid); } - put_cpu(); + if (mm) + put_cpu(); } void flush_tlb_mm(struct mm_struct *mm) @@ -179,6 +187,12 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, __flush_tlb_range(vma->vm_mm, start, end - start, stride_size); } + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + __flush_tlb_range(NULL, start, end - start, PAGE_SIZE); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) From 744eac783f9e105358eed05b42dcc5c5789744b3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 28 Oct 2023 22:00:40 -0700 Subject: [PATCH 108/415] nvme-auth: use crypto_shash_tfm_digest() Simplify nvme_auth_augmented_challenge() by using crypto_shash_tfm_digest() instead of an alloc+init+update+final sequence. This should also improve performance. Signed-off-by: Eric Biggers Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/common/auth.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c index a8e87dfbeab2..a23ab5c968b9 100644 --- a/drivers/nvme/common/auth.c +++ b/drivers/nvme/common/auth.c @@ -341,7 +341,6 @@ int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len, u8 *challenge, u8 *aug, size_t hlen) { struct crypto_shash *tfm; - struct shash_desc *desc; u8 *hashed_key; const char *hmac_name; int ret; @@ -369,29 +368,11 @@ int nvme_auth_augmented_challenge(u8 hmac_id, u8 *skey, size_t skey_len, goto out_free_key; } - desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm), - GFP_KERNEL); - if (!desc) { - ret = -ENOMEM; - goto out_free_hash; - } - desc->tfm = tfm; - ret = crypto_shash_setkey(tfm, hashed_key, hlen); if (ret) - goto out_free_desc; + goto out_free_hash; - ret = crypto_shash_init(desc); - if (ret) - goto out_free_desc; - - ret = crypto_shash_update(desc, challenge, hlen); - if (ret) - goto out_free_desc; - - ret = crypto_shash_final(desc, aug); -out_free_desc: - kfree_sensitive(desc); + ret = crypto_shash_tfm_digest(tfm, challenge, hlen, aug); out_free_hash: crypto_free_shash(tfm); out_free_key: From 0e32fdd7968eb9a39aa4d4111aef0fda8684af9e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 30 Oct 2023 15:49:28 +0100 Subject: [PATCH 109/415] nvme-tcp: Fix a memory leak All error handling path end to the error handling path, except this one. Go to the error handling branch as well here, otherwise 'icreq' and 'icresp' will leak. Fixes: 2837966ab2a8 ("nvme-tcp: control message handling for recvmsg()") Signed-off-by: Christophe JAILLET Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4714a902f4ca..f97711fc9f9f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1423,13 +1423,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) nvme_tcp_queue_id(queue), ret); goto free_icresp; } + ret = -ENOTCONN; if (queue->ctrl->ctrl.opts->tls) { ctype = tls_get_record_type(queue->sock->sk, (struct cmsghdr *)cbuf); if (ctype != TLS_RECORD_TYPE_DATA) { pr_err("queue %d: unhandled TLS record %d\n", nvme_tcp_queue_id(queue), ctype); - return -ENOTCONN; + goto free_icresp; } } ret = -EINVAL; From 983a338b96c8a25b81e773b643f80634358e81bc Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 30 Oct 2023 17:00:44 +0100 Subject: [PATCH 110/415] nvme: update firmware version after commit The firmware version sysfs entry needs to be updated after a successfully firmware activation. nvme-cli stopped issuing an Identify Controller command to list the current firmware information and relies on sysfs showing the current firmware version. Reported-by: Kenji Tomonaga Signed-off-by: Daniel Wagner Tested-by: Kenji Tomonaga Reviewed-by: Christoph Hellwig Reviewed-by: Niklas Cassel [fixed off-by one afi index] Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 62612f87aafa..97441d022926 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4074,8 +4074,21 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) return; if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, NVME_CSI_NVM, - log, sizeof(*log), 0)) + log, sizeof(*log), 0)) { dev_warn(ctrl->device, "Get FW SLOT INFO log error\n"); + goto out_free_log; + } + + if (log->afi & 0x70 || !(log->afi & 0x7)) { + dev_info(ctrl->device, + "Firmware is activated after next Controller Level Reset\n"); + goto out_free_log; + } + + memcpy(ctrl->subsys->firmware_rev, &log->frs[(log->afi & 0x7) - 1], + sizeof(ctrl->subsys->firmware_rev)); + +out_free_log: kfree(log); } From 1147dd0503564fa0e03489a039f9e0c748a03db4 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Thu, 19 Oct 2023 00:54:30 +0530 Subject: [PATCH 111/415] nvme: fix error-handling for io_uring nvme-passthrough Driver may return an error before submitting the command to the device. Ensure that such error is propagated up. Fixes: 456cba386e94 ("nvme: wire-up uring-cmd support for io-passthru on char-device.") Signed-off-by: Anuj Gupta Signed-off-by: Kanchan Joshi Reviewed-by: Niklas Cassel Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 747c879e8982..529b9954d2b8 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -510,10 +510,13 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); req->bio = pdu->bio; - if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { pdu->nvme_status = -EINTR; - else + } else { pdu->nvme_status = nvme_req(req)->status; + if (!pdu->nvme_status) + pdu->nvme_status = blk_status_to_errno(err); + } pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64); /* From 75276847f4e262a52ccaf1a1c6b929280ddf77f6 Mon Sep 17 00:00:00 2001 From: Mark O'Donovan Date: Wed, 25 Oct 2023 10:51:23 +0000 Subject: [PATCH 112/415] nvme-auth: auth success1 msg always includes resp In cases where RVALID is false, the response is still transmitted, but is cleared to zero. Relevant extract from the spec: Response R2, if valid (i.e., if the RVALID field is set to 01h), cleared to 0h otherwise Signed-off-by: Mark O'Donovan Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- drivers/nvme/host/auth.c | 5 +---- include/linux/nvme.h | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index eaefebb2a799..5885bb0d5a9a 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -339,10 +339,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, struct nvme_dhchap_queue_context *chap) { struct nvmf_auth_dhchap_success1_data *data = chap->buf; - size_t size = sizeof(*data); - - if (chap->s2) - size += chap->hash_len; + size_t size = sizeof(*data) + chap->hash_len; if (size > CHAP_BUF_SIZE) { chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a7ba74babad7..44325c068b6a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1732,7 +1732,7 @@ struct nvmf_auth_dhchap_success1_data { __u8 rsvd2; __u8 rvalid; __u8 rsvd3[7]; - /* 'hl' bytes of response value if 'rvalid' is set */ + /* 'hl' bytes of response value */ __u8 rval[]; }; From fc1e03eacac8e5ff8664ce2ebadabba8604f09f4 Mon Sep 17 00:00:00 2001 From: Mark O'Donovan Date: Wed, 25 Oct 2023 10:51:24 +0000 Subject: [PATCH 113/415] nvme-auth: add flag for bi-directional auth Introduces an explicit variable for bi-directional auth. The currently used variable chap->s2 is incorrectly zeroed for uni-directional auth. That will be fixed in the next patch so this needs to change to avoid sending unexpected success2 messages Signed-off-by: Mark O'Donovan Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke --- drivers/nvme/host/auth.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 5885bb0d5a9a..bff2b2c3cd79 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -29,6 +29,7 @@ struct nvme_dhchap_queue_context { int error; u32 s1; u32 s2; + bool bi_directional; u16 transaction; u8 status; u8 dhgroup_id; @@ -312,6 +313,7 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl, data->dhvlen = cpu_to_le16(chap->host_key_len); memcpy(data->rval, chap->response, chap->hash_len); if (ctrl->ctrl_key) { + chap->bi_directional = true; get_random_bytes(chap->c2, chap->hash_len); data->cvalid = 1; chap->s2 = nvme_auth_get_seqnum(); @@ -660,6 +662,7 @@ static void nvme_auth_reset_dhchap(struct nvme_dhchap_queue_context *chap) chap->error = 0; chap->s1 = 0; chap->s2 = 0; + chap->bi_directional = false; chap->transaction = 0; memset(chap->c1, 0, sizeof(chap->c1)); memset(chap->c2, 0, sizeof(chap->c2)); @@ -822,7 +825,7 @@ static void nvme_queue_auth_work(struct work_struct *work) goto fail2; } - if (chap->s2) { + if (chap->bi_directional) { /* DH-HMAC-CHAP Step 5: send success2 */ dev_dbg(ctrl->device, "%s: qid %d send success2\n", __func__, chap->qid); From 6f66d046eade7a5b979e349ac52026ddfe2776b3 Mon Sep 17 00:00:00 2001 From: Mark O'Donovan Date: Wed, 25 Oct 2023 10:51:25 +0000 Subject: [PATCH 114/415] nvme-auth: always set valid seq_num in dhchap reply Currently a seqnum of zero is sent during uni-directional authentication. The zero value is reserved for the secure channel feature which is not yet implemented. Relevant extract from the spec: The value 0h is used to indicate that bidirectional authentication is not performed, but a challenge value C2 is carried in order to generate a pre-shared key (PSK) for subsequent establishment of a secure channel Signed-off-by: Mark O'Donovan Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke --- drivers/nvme/host/auth.c | 3 +-- drivers/nvme/target/fabrics-cmd-auth.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index bff2b2c3cd79..48328e36e93b 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -316,15 +316,14 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl, chap->bi_directional = true; get_random_bytes(chap->c2, chap->hash_len); data->cvalid = 1; - chap->s2 = nvme_auth_get_seqnum(); memcpy(data->rval + chap->hash_len, chap->c2, chap->hash_len); dev_dbg(ctrl->device, "%s: qid %d ctrl challenge %*ph\n", __func__, chap->qid, (int)chap->hash_len, chap->c2); } else { memset(chap->c2, 0, chap->hash_len); - chap->s2 = 0; } + chap->s2 = nvme_auth_get_seqnum(); data->seqnum = cpu_to_le32(chap->s2); if (chap->host_key_len) { dev_dbg(ctrl->device, "%s: qid %d host public key %*ph\n", diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index 1d9854484e2e..eb7785be0ca7 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -163,11 +163,11 @@ static u16 nvmet_auth_reply(struct nvmet_req *req, void *d) pr_debug("%s: ctrl %d qid %d challenge %*ph\n", __func__, ctrl->cntlid, req->sq->qid, data->hl, req->sq->dhchap_c2); - req->sq->dhchap_s2 = le32_to_cpu(data->seqnum); } else { req->sq->authenticated = true; req->sq->dhchap_c2 = NULL; } + req->sq->dhchap_s2 = le32_to_cpu(data->seqnum); return 0; } From fd1418de10b9ca03d78404cf00a95138689ea369 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 24 Oct 2023 08:13:35 +0200 Subject: [PATCH 115/415] nvme-tcp: avoid open-coding nvme_tcp_teardown_admin_queue() nvme_tcp_setup_ctrl() has an open-coded version of nvme_tcp_teardown_admin_queue(). Signed-off-by: Hannes Reinecke Tested-by: Mark O'Donovan Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index f97711fc9f9f..89661a9cf850 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2237,11 +2237,7 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) nvme_tcp_destroy_io_queues(ctrl, new); } destroy_admin: - nvme_quiesce_admin_queue(ctrl); - blk_sync_queue(ctrl->admin_q); - nvme_tcp_stop_queue(ctrl, 0); - nvme_cancel_admin_tagset(ctrl); - nvme_tcp_destroy_admin_queue(ctrl, new); + nvme_tcp_teardown_admin_queue(ctrl, false); return ret; } From 55adcdbbdd349de935de677ccb59ff8be8c67f6a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 24 Oct 2023 08:13:36 +0200 Subject: [PATCH 116/415] nvme-loop: always quiesce and cancel commands before destroying admin q Once ->init_ctrl_finish() is called there may be commands outstanding, so we should quiesce the admin queue and cancel all commands prior to call nvme_loop_destroy_admin_queue(). Signed-off-by: Hannes Reinecke Tested-by: Mark O'Donovan Signed-off-by: Keith Busch --- drivers/nvme/target/loop.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 48d5df054cd0..9cb434c58075 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -466,6 +466,8 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) out_destroy_io: nvme_loop_destroy_io_queues(ctrl); out_destroy_admin: + nvme_quiesce_admin_queue(&ctrl->ctrl); + nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_loop_destroy_admin_queue(ctrl); out_disable: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); @@ -600,6 +602,8 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, return &ctrl->ctrl; out_remove_admin_queue: + nvme_quiesce_admin_queue(&ctrl->ctrl); + nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_loop_destroy_admin_queue(ctrl); out_free_queues: kfree(ctrl->queues); From 4733b65d82bdb19bca5ba47ff6c9b24bce1b3f9f Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 24 Oct 2023 08:13:37 +0200 Subject: [PATCH 117/415] nvme: start keep-alive after admin queue setup Setting up I/O queues might take quite some time on larger and/or busy setups, so KATO might expire before all I/O queues could be set up. Fix this by start keep alive from the ->init_ctrl_finish() callback, and stopping it when calling nvme_cancel_admin_tagset(). Signed-off-by: Hannes Reinecke Tested-by: Mark O'Donovan [fixed nvme-fc compile error] Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 6 +++--- drivers/nvme/host/fc.c | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 97441d022926..75a1b58a7a43 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -483,6 +483,7 @@ EXPORT_SYMBOL_GPL(nvme_cancel_tagset); void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl) { + nvme_stop_keep_alive(ctrl); if (ctrl->admin_tagset) { blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_cancel_request, ctrl); @@ -3200,6 +3201,8 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended) clear_bit(NVME_CTRL_DIRTY_CAPABILITY, &ctrl->flags); ctrl->identified = true; + nvme_start_keep_alive(ctrl); + return 0; } EXPORT_SYMBOL_GPL(nvme_init_ctrl_finish); @@ -4346,7 +4349,6 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_mpath_stop(ctrl); nvme_auth_stop(ctrl); - nvme_stop_keep_alive(ctrl); nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); @@ -4357,8 +4359,6 @@ EXPORT_SYMBOL_GPL(nvme_stop_ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl) { - nvme_start_keep_alive(ctrl); - nvme_enable_aen(ctrl); /* diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a15b37750d6e..49c3e46eaa1e 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2530,6 +2530,12 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) * clean up the admin queue. Same thing as above. */ nvme_quiesce_admin_queue(&ctrl->ctrl); + + /* + * Open-coding nvme_cancel_admin_tagset() as fc + * is not using nvme_cancel_request(). + */ + nvme_stop_keep_alive(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); From 57a4542cb7c9baa1509c3366b57a08d75b212ead Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Oct 2023 16:53:18 +0200 Subject: [PATCH 118/415] riscv: boot: Fix creation of loader.bin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When flashing loader.bin for K210 using kflash:     [ERROR] This is an ELF file and cannot be programmed to flash directly: arch/riscv/boot/loader.bin Before, loader.bin relied on "OBJCOPYFLAGS := -O binary" in the main RISC-V Makefile to create a boot image with the right format. With this removed, the image is now created in the wrong (ELF) format. Fix this by adding an explicit rule. Fixes: 505b02957e74f0c5 ("riscv: Remove duplicate objcopy flag") Signed-off-by: Geert Uytterhoeven Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/1086025809583809538dfecaa899892218f44e7e.1698159066.git.geert+renesas@glider.be Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 22b13947bd13..8e7fc0edf21d 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -17,6 +17,7 @@ KCOV_INSTRUMENT := n OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S +OBJCOPYFLAGS_loader.bin :=-O binary OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S targets := Image Image.* loader loader.o loader.lds loader.bin From b18f7296fbfdb2ad0871f00f3042fc74663d52ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 24 Oct 2023 15:26:51 +0200 Subject: [PATCH 119/415] riscv: use ".L" local labels in assembly when applicable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the sake of coherency, use local labels in assembly when applicable. This also avoid kprobes being confused when applying a kprobe since the size of function is computed by checking where the next visible symbol is located. This might end up in computing some function size to be way shorter than expected and thus failing to apply kprobes to the specified offset. Signed-off-by: Clément Léger Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20231024132655.730417-2-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 6 ++--- arch/riscv/kernel/head.S | 18 ++++++------- arch/riscv/kernel/mcount.S | 10 +++---- arch/riscv/lib/memmove.S | 54 +++++++++++++++++++------------------- 4 files changed, 44 insertions(+), 44 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 9f92c067f7e1..e48478eb6f2d 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -26,9 +26,9 @@ SYM_CODE_START(handle_exception) * register will contain 0, and we should continue on the current TP. */ csrrw tp, CSR_SCRATCH, tp - bnez tp, _save_context + bnez tp, .Lsave_context -_restore_kernel_tpsp: +.Lrestore_kernel_tpsp: csrr tp, CSR_SCRATCH REG_S sp, TASK_TI_KERNEL_SP(tp) @@ -40,7 +40,7 @@ _restore_kernel_tpsp: REG_L sp, TASK_TI_KERNEL_SP(tp) #endif -_save_context: +.Lsave_context: REG_S sp, TASK_TI_USER_SP(tp) REG_L sp, TASK_TI_KERNEL_SP(tp) addi sp, sp, -(PT_SIZE_ON_STACK) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 18f97ec0f7ed..0c0432eb57d7 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -164,12 +164,12 @@ secondary_start_sbi: XIP_FIXUP_OFFSET a0 call relocate_enable_mmu #endif - call setup_trap_vector + call .Lsetup_trap_vector tail smp_callin #endif /* CONFIG_SMP */ .align 2 -setup_trap_vector: +.Lsetup_trap_vector: /* Set trap vector to exception handler */ la a0, handle_exception csrw CSR_TVEC, a0 @@ -206,7 +206,7 @@ ENTRY(_start_kernel) * not implement PMPs, so we set up a quick trap handler to just skip * touching the PMPs on any trap. */ - la a0, pmp_done + la a0, .Lpmp_done csrw CSR_TVEC, a0 li a0, -1 @@ -214,7 +214,7 @@ ENTRY(_start_kernel) li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X) csrw CSR_PMPCFG0, a0 .align 2 -pmp_done: +.Lpmp_done: /* * The hartid in a0 is expected later on, and we have no firmware @@ -275,12 +275,12 @@ pmp_done: /* Clear BSS for flat non-ELF images */ la a3, __bss_start la a4, __bss_stop - ble a4, a3, clear_bss_done -clear_bss: + ble a4, a3, .Lclear_bss_done +.Lclear_bss: REG_S zero, (a3) add a3, a3, RISCV_SZPTR - blt a3, a4, clear_bss -clear_bss_done: + blt a3, a4, .Lclear_bss +.Lclear_bss_done: #endif la a2, boot_cpu_hartid XIP_FIXUP_OFFSET a2 @@ -305,7 +305,7 @@ clear_bss_done: call relocate_enable_mmu #endif /* CONFIG_MMU */ - call setup_trap_vector + call .Lsetup_trap_vector /* Restore C environment */ la tp, init_task la sp, init_thread_union + THREAD_SIZE diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 8818a8fa9ff3..ab4dd0594fe7 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -85,16 +85,16 @@ ENTRY(MCOUNT_NAME) #ifdef CONFIG_FUNCTION_GRAPH_TRACER la t0, ftrace_graph_return REG_L t1, 0(t0) - bne t1, t4, do_ftrace_graph_caller + bne t1, t4, .Ldo_ftrace_graph_caller la t3, ftrace_graph_entry REG_L t2, 0(t3) la t6, ftrace_graph_entry_stub - bne t2, t6, do_ftrace_graph_caller + bne t2, t6, .Ldo_ftrace_graph_caller #endif la t3, ftrace_trace_function REG_L t5, 0(t3) - bne t5, t4, do_trace + bne t5, t4, .Ldo_trace ret #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -102,7 +102,7 @@ ENTRY(MCOUNT_NAME) * A pseudo representation for the function graph tracer: * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller) */ -do_ftrace_graph_caller: +.Ldo_ftrace_graph_caller: addi a0, s0, -SZREG mv a1, ra #ifdef HAVE_FUNCTION_GRAPH_FP_TEST @@ -118,7 +118,7 @@ do_ftrace_graph_caller: * A pseudo representation for the function tracer: * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller) */ -do_trace: +.Ldo_trace: REG_L a1, -SZREG(s0) mv a0, ra diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S index 838ff2022fe3..1930b388c3a0 100644 --- a/arch/riscv/lib/memmove.S +++ b/arch/riscv/lib/memmove.S @@ -26,8 +26,8 @@ SYM_FUNC_START_WEAK(memmove) */ /* Return if nothing to do */ - beq a0, a1, return_from_memmove - beqz a2, return_from_memmove + beq a0, a1, .Lreturn_from_memmove + beqz a2, .Lreturn_from_memmove /* * Register Uses @@ -60,7 +60,7 @@ SYM_FUNC_START_WEAK(memmove) * small enough not to bother. */ andi t0, a2, -(2 * SZREG) - beqz t0, byte_copy + beqz t0, .Lbyte_copy /* * Now solve for t5 and t6. @@ -87,14 +87,14 @@ SYM_FUNC_START_WEAK(memmove) */ xor t0, a0, a1 andi t1, t0, (SZREG - 1) - beqz t1, coaligned_copy + beqz t1, .Lcoaligned_copy /* Fall through to misaligned fixup copy */ -misaligned_fixup_copy: - bltu a1, a0, misaligned_fixup_copy_reverse +.Lmisaligned_fixup_copy: + bltu a1, a0, .Lmisaligned_fixup_copy_reverse -misaligned_fixup_copy_forward: - jal t0, byte_copy_until_aligned_forward +.Lmisaligned_fixup_copy_forward: + jal t0, .Lbyte_copy_until_aligned_forward andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ @@ -153,10 +153,10 @@ misaligned_fixup_copy_forward: mv t3, t6 /* Fix the dest pointer in case the loop was broken */ add a1, t3, a5 /* Restore the src pointer */ - j byte_copy_forward /* Copy any remaining bytes */ + j .Lbyte_copy_forward /* Copy any remaining bytes */ -misaligned_fixup_copy_reverse: - jal t0, byte_copy_until_aligned_reverse +.Lmisaligned_fixup_copy_reverse: + jal t0, .Lbyte_copy_until_aligned_reverse andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ @@ -215,18 +215,18 @@ misaligned_fixup_copy_reverse: mv t4, t5 /* Fix the dest pointer in case the loop was broken */ add a4, t4, a5 /* Restore the src pointer */ - j byte_copy_reverse /* Copy any remaining bytes */ + j .Lbyte_copy_reverse /* Copy any remaining bytes */ /* * Simple copy loops for SZREG co-aligned memory locations. * These also make calls to do byte copies for any unaligned * data at their terminations. */ -coaligned_copy: - bltu a1, a0, coaligned_copy_reverse +.Lcoaligned_copy: + bltu a1, a0, .Lcoaligned_copy_reverse -coaligned_copy_forward: - jal t0, byte_copy_until_aligned_forward +.Lcoaligned_copy_forward: + jal t0, .Lbyte_copy_until_aligned_forward 1: REG_L t1, ( 0 * SZREG)(a1) @@ -235,10 +235,10 @@ coaligned_copy_forward: REG_S t1, (-1 * SZREG)(t3) bne t3, t6, 1b - j byte_copy_forward /* Copy any remaining bytes */ + j .Lbyte_copy_forward /* Copy any remaining bytes */ -coaligned_copy_reverse: - jal t0, byte_copy_until_aligned_reverse +.Lcoaligned_copy_reverse: + jal t0, .Lbyte_copy_until_aligned_reverse 1: REG_L t1, (-1 * SZREG)(a4) @@ -247,7 +247,7 @@ coaligned_copy_reverse: REG_S t1, ( 0 * SZREG)(t4) bne t4, t5, 1b - j byte_copy_reverse /* Copy any remaining bytes */ + j .Lbyte_copy_reverse /* Copy any remaining bytes */ /* * These are basically sub-functions within the function. They @@ -258,7 +258,7 @@ coaligned_copy_reverse: * up from where they were left and we avoid code duplication * without any overhead except the call in and return jumps. */ -byte_copy_until_aligned_forward: +.Lbyte_copy_until_aligned_forward: beq t3, t5, 2f 1: lb t1, 0(a1) @@ -269,7 +269,7 @@ byte_copy_until_aligned_forward: 2: jalr zero, 0x0(t0) /* Return to multibyte copy loop */ -byte_copy_until_aligned_reverse: +.Lbyte_copy_until_aligned_reverse: beq t4, t6, 2f 1: lb t1, -1(a4) @@ -285,10 +285,10 @@ byte_copy_until_aligned_reverse: * These will byte copy until they reach the end of data to copy. * At that point, they will call to return from memmove. */ -byte_copy: - bltu a1, a0, byte_copy_reverse +.Lbyte_copy: + bltu a1, a0, .Lbyte_copy_reverse -byte_copy_forward: +.Lbyte_copy_forward: beq t3, t4, 2f 1: lb t1, 0(a1) @@ -299,7 +299,7 @@ byte_copy_forward: 2: ret -byte_copy_reverse: +.Lbyte_copy_reverse: beq t4, t3, 2f 1: lb t1, -1(a4) @@ -309,7 +309,7 @@ byte_copy_reverse: bne t4, t3, 1b 2: -return_from_memmove: +.Lreturn_from_memmove: ret SYM_FUNC_END(memmove) From 76329c693924d8f37afbf361f0d8daab594e1644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 24 Oct 2023 15:26:52 +0200 Subject: [PATCH 120/415] riscv: Use SYM_*() assembly macros instead of deprecated ones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ENTRY()/END()/WEAK() macros are deprecated and we should make use of the new SYM_*() macros [1] for better annotation of symbols. Replace the deprecated ones with the new ones and fix wrong usage of END()/ENDPROC() to correctly describe the symbols. [1] https://docs.kernel.org/core-api/asm-annotations.html Signed-off-by: Clément Léger Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20231024132655.730417-3-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/copy-unaligned.S | 8 ++++---- arch/riscv/kernel/fpu.S | 8 ++++---- arch/riscv/kernel/head.S | 12 +++++------ arch/riscv/kernel/hibernate-asm.S | 12 +++++------ arch/riscv/kernel/mcount-dyn.S | 20 ++++++++----------- arch/riscv/kernel/mcount.S | 8 ++++---- arch/riscv/kernel/probes/rethook_trampoline.S | 4 ++-- arch/riscv/kernel/suspend_entry.S | 4 ++-- arch/riscv/kernel/vdso/flush_icache.S | 4 ++-- arch/riscv/kernel/vdso/getcpu.S | 4 ++-- arch/riscv/kernel/vdso/rt_sigreturn.S | 4 ++-- arch/riscv/kernel/vdso/sys_hwprobe.S | 4 ++-- arch/riscv/lib/memcpy.S | 6 +++--- arch/riscv/lib/memmove.S | 3 +-- arch/riscv/lib/memset.S | 6 +++--- arch/riscv/lib/uaccess.S | 11 +++++----- arch/riscv/purgatory/entry.S | 16 ++++----------- 17 files changed, 60 insertions(+), 74 deletions(-) diff --git a/arch/riscv/kernel/copy-unaligned.S b/arch/riscv/kernel/copy-unaligned.S index cfdecfbaad62..2b3d9398c113 100644 --- a/arch/riscv/kernel/copy-unaligned.S +++ b/arch/riscv/kernel/copy-unaligned.S @@ -9,7 +9,7 @@ /* void __riscv_copy_words_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using word loads and stores. */ /* Note: The size is truncated to a multiple of 8 * SZREG */ -ENTRY(__riscv_copy_words_unaligned) +SYM_FUNC_START(__riscv_copy_words_unaligned) andi a4, a2, ~((8*SZREG)-1) beqz a4, 2f add a3, a1, a4 @@ -36,12 +36,12 @@ ENTRY(__riscv_copy_words_unaligned) 2: ret -END(__riscv_copy_words_unaligned) +SYM_FUNC_END(__riscv_copy_words_unaligned) /* void __riscv_copy_bytes_unaligned(void *, const void *, size_t) */ /* Performs a memcpy without aligning buffers, using only byte accesses. */ /* Note: The size is truncated to a multiple of 8 */ -ENTRY(__riscv_copy_bytes_unaligned) +SYM_FUNC_START(__riscv_copy_bytes_unaligned) andi a4, a2, ~(8-1) beqz a4, 2f add a3, a1, a4 @@ -68,4 +68,4 @@ ENTRY(__riscv_copy_bytes_unaligned) 2: ret -END(__riscv_copy_bytes_unaligned) +SYM_FUNC_END(__riscv_copy_bytes_unaligned) diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S index 5dd3161a4dac..2c543f130f93 100644 --- a/arch/riscv/kernel/fpu.S +++ b/arch/riscv/kernel/fpu.S @@ -19,7 +19,7 @@ #include #include -ENTRY(__fstate_save) +SYM_FUNC_START(__fstate_save) li a2, TASK_THREAD_F0 add a0, a0, a2 li t1, SR_FS @@ -60,9 +60,9 @@ ENTRY(__fstate_save) sw t0, TASK_THREAD_FCSR_F0(a0) csrc CSR_STATUS, t1 ret -ENDPROC(__fstate_save) +SYM_FUNC_END(__fstate_save) -ENTRY(__fstate_restore) +SYM_FUNC_START(__fstate_restore) li a2, TASK_THREAD_F0 add a0, a0, a2 li t1, SR_FS @@ -103,7 +103,7 @@ ENTRY(__fstate_restore) fscsr t0 csrc CSR_STATUS, t1 ret -ENDPROC(__fstate_restore) +SYM_FUNC_END(__fstate_restore) #define get_f32(which) fmv.x.s a0, which; j 2f #define put_f32(which) fmv.s.x which, a1; j 2f diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 0c0432eb57d7..b77397432403 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -19,7 +19,7 @@ #include "efi-header.S" __HEAD -ENTRY(_start) +SYM_CODE_START(_start) /* * Image header expected by Linux boot-loaders. The image header data * structure is described in asm/image.h. @@ -187,9 +187,9 @@ secondary_start_sbi: wfi j .Lsecondary_park -END(_start) +SYM_CODE_END(_start) -ENTRY(_start_kernel) +SYM_CODE_START(_start_kernel) /* Mask all interrupts */ csrw CSR_IE, zero csrw CSR_IP, zero @@ -348,10 +348,10 @@ ENTRY(_start_kernel) tail .Lsecondary_start_common #endif /* CONFIG_RISCV_BOOT_SPINWAIT */ -END(_start_kernel) +SYM_CODE_END(_start_kernel) #ifdef CONFIG_RISCV_M_MODE -ENTRY(reset_regs) +SYM_CODE_START_LOCAL(reset_regs) li sp, 0 li gp, 0 li tp, 0 @@ -449,5 +449,5 @@ ENTRY(reset_regs) .Lreset_regs_done_vector: #endif /* CONFIG_RISCV_ISA_V */ ret -END(reset_regs) +SYM_CODE_END(reset_regs) #endif /* CONFIG_RISCV_M_MODE */ diff --git a/arch/riscv/kernel/hibernate-asm.S b/arch/riscv/kernel/hibernate-asm.S index d698dd7df637..d040dcf4add4 100644 --- a/arch/riscv/kernel/hibernate-asm.S +++ b/arch/riscv/kernel/hibernate-asm.S @@ -21,7 +21,7 @@ * * Always returns 0 */ -ENTRY(__hibernate_cpu_resume) +SYM_FUNC_START(__hibernate_cpu_resume) /* switch to hibernated image's page table. */ csrw CSR_SATP, s0 sfence.vma @@ -34,7 +34,7 @@ ENTRY(__hibernate_cpu_resume) mv a0, zero ret -END(__hibernate_cpu_resume) +SYM_FUNC_END(__hibernate_cpu_resume) /* * Prepare to restore the image. @@ -42,7 +42,7 @@ END(__hibernate_cpu_resume) * a1: satp of temporary page tables. * a2: cpu_resume. */ -ENTRY(hibernate_restore_image) +SYM_FUNC_START(hibernate_restore_image) mv s0, a0 mv s1, a1 mv s2, a2 @@ -50,7 +50,7 @@ ENTRY(hibernate_restore_image) REG_L a1, relocated_restore_code jr a1 -END(hibernate_restore_image) +SYM_FUNC_END(hibernate_restore_image) /* * The below code will be executed from a 'safe' page. @@ -58,7 +58,7 @@ END(hibernate_restore_image) * back to the original memory location. Finally, it jumps to __hibernate_cpu_resume() * to restore the CPU context. */ -ENTRY(hibernate_core_restore_code) +SYM_FUNC_START(hibernate_core_restore_code) /* switch to temp page table. */ csrw satp, s1 sfence.vma @@ -73,4 +73,4 @@ ENTRY(hibernate_core_restore_code) bnez s4, .Lcopy jr s2 -END(hibernate_core_restore_code) +SYM_FUNC_END(hibernate_core_restore_code) diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 669b8697aa38..58dd96a2a153 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -82,7 +82,7 @@ .endm #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ -ENTRY(ftrace_caller) +SYM_FUNC_START(ftrace_caller) SAVE_ABI addi a0, t0, -FENTRY_RA_OFFSET @@ -91,8 +91,7 @@ ENTRY(ftrace_caller) mv a1, ra mv a3, sp -ftrace_call: - .global ftrace_call +SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -102,16 +101,15 @@ ftrace_call: #ifdef HAVE_FUNCTION_GRAPH_FP_TEST mv a2, s0 #endif -ftrace_graph_call: - .global ftrace_graph_call +SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) call ftrace_stub #endif RESTORE_ABI jr t0 -ENDPROC(ftrace_caller) +SYM_FUNC_END(ftrace_caller) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -ENTRY(ftrace_regs_caller) +SYM_FUNC_START(ftrace_regs_caller) SAVE_ALL addi a0, t0, -FENTRY_RA_OFFSET @@ -120,8 +118,7 @@ ENTRY(ftrace_regs_caller) mv a1, ra mv a3, sp -ftrace_regs_call: - .global ftrace_regs_call +SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) call ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -131,12 +128,11 @@ ftrace_regs_call: #ifdef HAVE_FUNCTION_GRAPH_FP_TEST mv a2, s0 #endif -ftrace_graph_regs_call: - .global ftrace_graph_regs_call +SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL) call ftrace_stub #endif RESTORE_ALL jr t0 -ENDPROC(ftrace_regs_caller) +SYM_FUNC_END(ftrace_regs_caller) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index ab4dd0594fe7..b4dd9ed6849e 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -61,7 +61,7 @@ SYM_TYPED_FUNC_START(ftrace_stub_graph) ret SYM_FUNC_END(ftrace_stub_graph) -ENTRY(return_to_handler) +SYM_FUNC_START(return_to_handler) /* * On implementing the frame point test, the ideal way is to compare the * s0 (frame pointer, if enabled) on entry and the sp (stack pointer) on return. @@ -76,11 +76,11 @@ ENTRY(return_to_handler) mv a2, a0 RESTORE_RET_ABI_STATE jalr a2 -ENDPROC(return_to_handler) +SYM_FUNC_END(return_to_handler) #endif #ifndef CONFIG_DYNAMIC_FTRACE -ENTRY(MCOUNT_NAME) +SYM_FUNC_START(MCOUNT_NAME) la t4, ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER la t0, ftrace_graph_return @@ -126,6 +126,6 @@ ENTRY(MCOUNT_NAME) jalr t5 RESTORE_ABI_STATE ret -ENDPROC(MCOUNT_NAME) +SYM_FUNC_END(MCOUNT_NAME) #endif EXPORT_SYMBOL(MCOUNT_NAME) diff --git a/arch/riscv/kernel/probes/rethook_trampoline.S b/arch/riscv/kernel/probes/rethook_trampoline.S index 21bac92a170a..f2cd83d9b0f0 100644 --- a/arch/riscv/kernel/probes/rethook_trampoline.S +++ b/arch/riscv/kernel/probes/rethook_trampoline.S @@ -75,7 +75,7 @@ REG_L x31, PT_T6(sp) .endm -ENTRY(arch_rethook_trampoline) +SYM_CODE_START(arch_rethook_trampoline) addi sp, sp, -(PT_SIZE_ON_STACK) save_all_base_regs @@ -90,4 +90,4 @@ ENTRY(arch_rethook_trampoline) addi sp, sp, PT_SIZE_ON_STACK ret -ENDPROC(arch_rethook_trampoline) +SYM_CODE_END(arch_rethook_trampoline) diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S index d5cf8b575777..2d54f309c140 100644 --- a/arch/riscv/kernel/suspend_entry.S +++ b/arch/riscv/kernel/suspend_entry.S @@ -16,7 +16,7 @@ .altmacro .option norelax -ENTRY(__cpu_suspend_enter) +SYM_FUNC_START(__cpu_suspend_enter) /* Save registers (except A0 and T0-T6) */ REG_S ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0) REG_S sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0) @@ -57,7 +57,7 @@ ENTRY(__cpu_suspend_enter) /* Return to C code */ ret -END(__cpu_suspend_enter) +SYM_FUNC_END(__cpu_suspend_enter) SYM_TYPED_FUNC_START(__cpu_resume_enter) /* Load the global pointer */ diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S index 82f97d67c23e..8f884227e8bc 100644 --- a/arch/riscv/kernel/vdso/flush_icache.S +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -8,7 +8,7 @@ .text /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ -ENTRY(__vdso_flush_icache) +SYM_FUNC_START(__vdso_flush_icache) .cfi_startproc #ifdef CONFIG_SMP li a7, __NR_riscv_flush_icache @@ -19,4 +19,4 @@ ENTRY(__vdso_flush_icache) #endif ret .cfi_endproc -ENDPROC(__vdso_flush_icache) +SYM_FUNC_END(__vdso_flush_icache) diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S index bb0c05e2ffba..9c1bd531907f 100644 --- a/arch/riscv/kernel/vdso/getcpu.S +++ b/arch/riscv/kernel/vdso/getcpu.S @@ -8,11 +8,11 @@ .text /* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */ -ENTRY(__vdso_getcpu) +SYM_FUNC_START(__vdso_getcpu) .cfi_startproc /* For now, just do the syscall. */ li a7, __NR_getcpu ecall ret .cfi_endproc -ENDPROC(__vdso_getcpu) +SYM_FUNC_END(__vdso_getcpu) diff --git a/arch/riscv/kernel/vdso/rt_sigreturn.S b/arch/riscv/kernel/vdso/rt_sigreturn.S index 10438c7c626a..3dc022aa8931 100644 --- a/arch/riscv/kernel/vdso/rt_sigreturn.S +++ b/arch/riscv/kernel/vdso/rt_sigreturn.S @@ -7,10 +7,10 @@ #include .text -ENTRY(__vdso_rt_sigreturn) +SYM_FUNC_START(__vdso_rt_sigreturn) .cfi_startproc .cfi_signal_frame li a7, __NR_rt_sigreturn ecall .cfi_endproc -ENDPROC(__vdso_rt_sigreturn) +SYM_FUNC_END(__vdso_rt_sigreturn) diff --git a/arch/riscv/kernel/vdso/sys_hwprobe.S b/arch/riscv/kernel/vdso/sys_hwprobe.S index 4e704146c77a..77e57f830521 100644 --- a/arch/riscv/kernel/vdso/sys_hwprobe.S +++ b/arch/riscv/kernel/vdso/sys_hwprobe.S @@ -5,11 +5,11 @@ #include .text -ENTRY(riscv_hwprobe) +SYM_FUNC_START(riscv_hwprobe) .cfi_startproc li a7, __NR_riscv_hwprobe ecall ret .cfi_endproc -ENDPROC(riscv_hwprobe) +SYM_FUNC_END(riscv_hwprobe) diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S index 1a40d01a9543..44e009ec5fef 100644 --- a/arch/riscv/lib/memcpy.S +++ b/arch/riscv/lib/memcpy.S @@ -7,8 +7,7 @@ #include /* void *memcpy(void *, const void *, size_t) */ -ENTRY(__memcpy) -WEAK(memcpy) +SYM_FUNC_START(__memcpy) move t6, a0 /* Preserve return value */ /* Defer to byte-oriented copy for small sizes */ @@ -105,6 +104,7 @@ WEAK(memcpy) bltu a1, a3, 5b 6: ret -END(__memcpy) +SYM_FUNC_END(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) SYM_FUNC_ALIAS(__pi_memcpy, __memcpy) SYM_FUNC_ALIAS(__pi___memcpy, __memcpy) diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S index 1930b388c3a0..cb3e2e7ef0ba 100644 --- a/arch/riscv/lib/memmove.S +++ b/arch/riscv/lib/memmove.S @@ -7,7 +7,6 @@ #include SYM_FUNC_START(__memmove) -SYM_FUNC_START_WEAK(memmove) /* * Returns * a0 - dest @@ -312,7 +311,7 @@ SYM_FUNC_START_WEAK(memmove) .Lreturn_from_memmove: ret -SYM_FUNC_END(memmove) SYM_FUNC_END(__memmove) +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) SYM_FUNC_ALIAS(__pi_memmove, __memmove) SYM_FUNC_ALIAS(__pi___memmove, __memmove) diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S index 34c5360c6705..35f358e70bdb 100644 --- a/arch/riscv/lib/memset.S +++ b/arch/riscv/lib/memset.S @@ -8,8 +8,7 @@ #include /* void *memset(void *, int, size_t) */ -ENTRY(__memset) -WEAK(memset) +SYM_FUNC_START(__memset) move t0, a0 /* Preserve return value */ /* Defer to byte-oriented fill for small sizes */ @@ -110,4 +109,5 @@ WEAK(memset) bltu t0, a3, 5b 6: ret -END(__memset) +SYM_FUNC_END(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 09b47ebacf2e..3ab438f30d13 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -10,8 +10,7 @@ _asm_extable 100b, \lbl .endm -ENTRY(__asm_copy_to_user) -ENTRY(__asm_copy_from_user) +SYM_FUNC_START(__asm_copy_to_user) /* Enable access to user memory */ li t6, SR_SUM @@ -181,13 +180,13 @@ ENTRY(__asm_copy_from_user) csrc CSR_STATUS, t6 sub a0, t5, a0 ret -ENDPROC(__asm_copy_to_user) -ENDPROC(__asm_copy_from_user) +SYM_FUNC_END(__asm_copy_to_user) EXPORT_SYMBOL(__asm_copy_to_user) +SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) EXPORT_SYMBOL(__asm_copy_from_user) -ENTRY(__clear_user) +SYM_FUNC_START(__clear_user) /* Enable access to user memory */ li t6, SR_SUM @@ -233,5 +232,5 @@ ENTRY(__clear_user) csrc CSR_STATUS, t6 sub a0, a3, a0 ret -ENDPROC(__clear_user) +SYM_FUNC_END(__clear_user) EXPORT_SYMBOL(__clear_user) diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S index 0194f4554130..5bcf3af903da 100644 --- a/arch/riscv/purgatory/entry.S +++ b/arch/riscv/purgatory/entry.S @@ -7,15 +7,11 @@ * Author: Li Zhengyu (lizhengyu3@huawei.com) * */ - -.macro size, sym:req - .size \sym, . - \sym -.endm +#include .text -.globl purgatory_start -purgatory_start: +SYM_CODE_START(purgatory_start) lla sp, .Lstack mv s0, a0 /* The hartid of the current hart */ @@ -28,8 +24,7 @@ purgatory_start: mv a1, s1 ld a2, riscv_kernel_entry jr a2 - -size purgatory_start +SYM_CODE_END(purgatory_start) .align 4 .rept 256 @@ -39,9 +34,6 @@ size purgatory_start .data -.globl riscv_kernel_entry -riscv_kernel_entry: - .quad 0 -size riscv_kernel_entry +SYM_DATA(riscv_kernel_entry, .quad 0) .end From 4cc0d8a3f109fbdd8100ed88fc9417203a5d5b4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 24 Oct 2023 15:26:53 +0200 Subject: [PATCH 121/415] riscv: kernel: Use correct SYM_DATA_*() macro for data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some data were incorrectly annotated with SYM_FUNC_*() instead of SYM_DATA_*() ones. Use the correct ones. Signed-off-by: Clément Léger Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20231024132655.730417-4-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index e48478eb6f2d..54ca4564a926 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -322,7 +322,7 @@ SYM_FUNC_END(__switch_to) .section ".rodata" .align LGREG /* Exception vector table */ -SYM_CODE_START(excp_vect_table) +SYM_DATA_START_LOCAL(excp_vect_table) RISCV_PTR do_trap_insn_misaligned ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault) RISCV_PTR do_trap_insn_illegal @@ -340,12 +340,11 @@ SYM_CODE_START(excp_vect_table) RISCV_PTR do_page_fault /* load page fault */ RISCV_PTR do_trap_unknown RISCV_PTR do_page_fault /* store page fault */ -excp_vect_table_end: -SYM_CODE_END(excp_vect_table) +SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end) #ifndef CONFIG_MMU -SYM_CODE_START(__user_rt_sigreturn) +SYM_DATA_START(__user_rt_sigreturn) li a7, __NR_rt_sigreturn ecall -SYM_CODE_END(__user_rt_sigreturn) +SYM_DATA_END(__user_rt_sigreturn) #endif From 89d528ba2f8281de61163c6b62e598b64d832175 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Mon, 6 Nov 2023 20:39:07 +0000 Subject: [PATCH 122/415] io_uring: indicate if io_kbuf_recycle did recycle anything It can be useful to know if io_kbuf_recycle did actually recycle the buffer on the request, or if it left the request alone. Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20231106203909.197089-2-dyudaken@gmail.com Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 6 +++--- io_uring/kbuf.h | 13 ++++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index fea06810b43d..a1e4239c7d75 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -52,7 +52,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx, return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); } -void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) +bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; struct io_buffer_list *bl; @@ -65,7 +65,7 @@ void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) * multiple use. */ if (req->flags & REQ_F_PARTIAL_IO) - return; + return false; io_ring_submit_lock(ctx, issue_flags); @@ -76,7 +76,7 @@ void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) req->buf_index = buf->bgid; io_ring_submit_unlock(ctx, issue_flags); - return; + return true; } unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags) diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index d14345ef61fc..f2d615236b2c 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -53,11 +53,11 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags); -void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); +bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid); -static inline void io_kbuf_recycle_ring(struct io_kiocb *req) +static inline bool io_kbuf_recycle_ring(struct io_kiocb *req) { /* * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear @@ -80,8 +80,10 @@ static inline void io_kbuf_recycle_ring(struct io_kiocb *req) } else { req->buf_index = req->buf_list->bgid; req->flags &= ~REQ_F_BUFFER_RING; + return true; } } + return false; } static inline bool io_do_buffer_select(struct io_kiocb *req) @@ -91,12 +93,13 @@ static inline bool io_do_buffer_select(struct io_kiocb *req) return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)); } -static inline void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) +static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) { if (req->flags & REQ_F_BUFFER_SELECTED) - io_kbuf_recycle_legacy(req, issue_flags); + return io_kbuf_recycle_legacy(req, issue_flags); if (req->flags & REQ_F_BUFFER_RING) - io_kbuf_recycle_ring(req); + return io_kbuf_recycle_ring(req); + return false; } static inline unsigned int __io_put_kbuf_list(struct io_kiocb *req, From 49fbe99486786661994a55ced855c31d966bbdf0 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Mon, 6 Nov 2023 20:39:08 +0000 Subject: [PATCH 123/415] io_uring: do not allow multishot read to set addr or len For addr: this field is not used, since buffer select is forced. But by forcing it to be zero it leaves open future uses of the field. len is actually usable, you could imagine that you want to receive multishot up to a certain length. However right now this is not how it is implemented, and it seems safer to force this to be zero. Fixes: fc68fcda0491 ("io_uring/rw: add support for IORING_OP_READ_MULTISHOT") Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20231106203909.197089-3-dyudaken@gmail.com Signed-off-by: Jens Axboe --- io_uring/rw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/io_uring/rw.c b/io_uring/rw.c index 9e3e56b74e35..8321e004ab13 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -143,6 +143,7 @@ int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe) */ int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); int ret; /* must be used with provided buffers */ @@ -153,6 +154,9 @@ int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(ret)) return ret; + if (rw->addr || rw->len) + return -EINVAL; + req->flags |= REQ_F_APOLL_MULTISHOT; return 0; } From e53759298a7d7e98c3e5c2440d395d19cea7d6bf Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Mon, 6 Nov 2023 20:39:09 +0000 Subject: [PATCH 124/415] io_uring: do not clamp read length for multishot read When doing a multishot read, the code path reuses the old read paths. However this breaks an assumption built into those paths, namely that struct io_rw::len is available for reuse by __io_import_iovec. For multishot this results in len being set for the first receive call, and then subsequent calls are clamped to that buffer length incorrectly. Instead keep len as zero after recycling buffers, to reuse the full buffer size of the next selected buffer. Fixes: fc68fcda0491 ("io_uring/rw: add support for IORING_OP_READ_MULTISHOT") Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20231106203909.197089-4-dyudaken@gmail.com Signed-off-by: Jens Axboe --- io_uring/rw.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 8321e004ab13..64390d4e20c1 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -912,6 +912,7 @@ int io_read(struct io_kiocb *req, unsigned int issue_flags) int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) { + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); unsigned int cflags = 0; int ret; @@ -928,7 +929,12 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) * handling arm it. */ if (ret == -EAGAIN) { - io_kbuf_recycle(req, issue_flags); + /* + * Reset rw->len to 0 again to avoid clamping future mshot + * reads, in case the buffer size varies. + */ + if (io_kbuf_recycle(req, issue_flags)) + rw->len = 0; return -EAGAIN; } @@ -941,6 +947,7 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) * jump to the termination path. This request is then done. */ cflags = io_put_kbuf(req, issue_flags); + rw->len = 0; /* similarly to above, reset len to 0 */ if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, From e0c0a7c35f67191152635e5913f76aa7094d967c Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 31 Oct 2023 12:40:47 +0100 Subject: [PATCH 125/415] riscv: select ARCH_PROC_KCORE_TEXT This adds a separate segment for kernel text in /proc/kcore, which has a different address than the direct linear map. Signed-off-by: Andreas Schwab Link: https://lore.kernel.org/r/mvmh6m758ao.fsf@suse.de Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 3f77530180b9..5b1e61aca6cf 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -916,6 +916,9 @@ config PORTABLE select MMU select OF +config ARCH_PROC_KCORE_TEXT + def_bool y + menu "Power management options" source "kernel/power/Kconfig" From 65083333d3d16b282674aeef5cce5c72226c05e0 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 6 Nov 2023 07:42:53 -0600 Subject: [PATCH 126/415] powerpc/pseries/rtas-work-area: Fix rtas_work_area_reserve_arena() kernel-doc >From a W=1 build: >> arch/powerpc/platforms/pseries/rtas-work-area.c:189: warning: Function parameter or member 'limit' not >> described in 'rtas_work_area_reserve_arena' Add the missing description of the limit parameter. Signed-off-by: Nathan Lynch Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309131221.Bm1pg96n-lkp@intel.com/ Signed-off-by: Michael Ellerman Link: https://msgid.link/20231106-rtas-trivial-v1-1-61847655c51f@linux.ibm.com --- arch/powerpc/platforms/pseries/rtas-work-area.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/rtas-work-area.c b/arch/powerpc/platforms/pseries/rtas-work-area.c index b37d52f40360..7fe34bee84d8 100644 --- a/arch/powerpc/platforms/pseries/rtas-work-area.c +++ b/arch/powerpc/platforms/pseries/rtas-work-area.c @@ -184,6 +184,7 @@ machine_arch_initcall(pseries, rtas_work_area_allocator_init); /** * rtas_work_area_reserve_arena() - Reserve memory suitable for RTAS work areas. + * @limit: Upper limit for memblock allocation. */ void __init rtas_work_area_reserve_arena(const phys_addr_t limit) { From 644b6025bcaff59737270d812c70302f5a8d4a8f Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 6 Nov 2023 07:42:54 -0600 Subject: [PATCH 127/415] powerpc/rtas: Fix ppc_rtas_rmo_buf_show() kernel-doc >From a W=1 build: >> arch/powerpc/kernel/rtas-proc.c:771: warning: Function parameter or member 'm' not described in >> 'ppc_rtas_rmo_buf_show' >> arch/powerpc/kernel/rtas-proc.c:771: warning: Function parameter or member 'v' not described in >> 'ppc_rtas_rmo_buf_show' Add the missing parameter descriptions. Signed-off-by: Nathan Lynch Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309211645.1Lvwmbv4-lkp@intel.com/ Signed-off-by: Michael Ellerman Link: https://msgid.link/20231106-rtas-trivial-v1-2-61847655c51f@linux.ibm.com --- arch/powerpc/kernel/rtas-proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 9454b8395b6a..f38df72e64b8 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -752,6 +752,8 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v) /** * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space. + * @m: seq_file output target. + * @v: Unused. * * Base + size description of a range of RTAS-addressable memory set * aside for user space to use as work area(s) for certain RTAS From 114d5c85a39a0e35024ff5156a160eef1907f3e6 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 30 Oct 2023 14:30:25 +0100 Subject: [PATCH 128/415] riscv: Improve tlb_flush() For now, tlb_flush() simply calls flush_tlb_mm() which results in a flush of the whole TLB. So let's use mmu_gather fields to provide a more fine-grained flush of the TLB. Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Reviewed-by: Samuel Holland Tested-by: Lad Prabhakar # On RZ/Five SMARC Link: https://lore.kernel.org/r/20231030133027.19542-2-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlb.h | 8 +++++++- arch/riscv/include/asm/tlbflush.h | 3 +++ arch/riscv/mm/tlbflush.c | 7 +++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h index 120bcf2ed8a8..1eb5682b2af6 100644 --- a/arch/riscv/include/asm/tlb.h +++ b/arch/riscv/include/asm/tlb.h @@ -15,7 +15,13 @@ static void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { - flush_tlb_mm(tlb->mm); +#ifdef CONFIG_MMU + if (tlb->fullmm || tlb->need_flush_all) + flush_tlb_mm(tlb->mm); + else + flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, + tlb_get_unmap_size(tlb)); +#endif } #endif /* _ASM_RISCV_TLB_H */ diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index a09196f8de68..f5c4fb0ae642 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -32,6 +32,8 @@ static inline void local_flush_tlb_page(unsigned long addr) #if defined(CONFIG_SMP) && defined(CONFIG_MMU) void flush_tlb_all(void); void flush_tlb_mm(struct mm_struct *mm); +void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned int page_size); void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -52,6 +54,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, } #define flush_tlb_mm(mm) flush_tlb_all() +#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() #endif /* !CONFIG_SMP || !CONFIG_MMU */ /* Flush a range of kernel pages */ diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 77be59aadc73..fa03289853d8 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -132,6 +132,13 @@ void flush_tlb_mm(struct mm_struct *mm) __flush_tlb_range(mm, 0, -1, PAGE_SIZE); } +void flush_tlb_mm_range(struct mm_struct *mm, + unsigned long start, unsigned long end, + unsigned int page_size) +{ + __flush_tlb_range(mm, start, end - start, page_size); +} + void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { __flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE); From 9e113064b4c291ad06a7a3864691288bd2cf014f Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 30 Oct 2023 14:30:26 +0100 Subject: [PATCH 129/415] riscv: Improve flush_tlb_range() for hugetlb pages flush_tlb_range() uses a fixed stride of PAGE_SIZE and in its current form, when a hugetlb mapping needs to be flushed, flush_tlb_range() flushes the whole tlb: so set a stride of the size of the hugetlb mapping in order to only flush the hugetlb mapping. However, if the hugepage is a NAPOT region, all PTEs that constitute this mapping must be invalidated, so the stride size must actually be the size of the PTE. Note that THPs are directly handled by flush_pmd_tlb_range(). Signed-off-by: Alexandre Ghiti Reviewed-by: Samuel Holland Tested-by: Lad Prabhakar # On RZ/Five SMARC Link: https://lore.kernel.org/r/20231030133027.19542-3-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/tlbflush.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index fa03289853d8..b6d712a82306 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -147,7 +148,33 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE); + unsigned long stride_size; + + if (!is_vm_hugetlb_page(vma)) { + stride_size = PAGE_SIZE; + } else { + stride_size = huge_page_size(hstate_vma(vma)); + + /* + * As stated in the privileged specification, every PTE in a + * NAPOT region must be invalidated, so reset the stride in that + * case. + */ + if (has_svnapot()) { + if (stride_size >= PGDIR_SIZE) + stride_size = PGDIR_SIZE; + else if (stride_size >= P4D_SIZE) + stride_size = P4D_SIZE; + else if (stride_size >= PUD_SIZE) + stride_size = PUD_SIZE; + else if (stride_size >= PMD_SIZE) + stride_size = PMD_SIZE; + else + stride_size = PAGE_SIZE; + } + } + + __flush_tlb_range(vma->vm_mm, start, end - start, stride_size); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, From ba6f35964c518b4520bc3f2fe25d8457cb4a7be5 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 30 Oct 2023 14:30:27 +0100 Subject: [PATCH 130/415] riscv: Make __flush_tlb_range() loop over pte instead of flushing the whole tlb Currently, when the range to flush covers more than one page (a 4K page or a hugepage), __flush_tlb_range() flushes the whole tlb. Flushing the whole tlb comes with a greater cost than flushing a single entry so we should flush single entries up to a certain threshold so that: threshold * cost of flushing a single entry < cost of flushing the whole tlb. Co-developed-by: Mayuresh Chitale Signed-off-by: Mayuresh Chitale Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Tested-by: Lad Prabhakar # On RZ/Five SMARC Reviewed-by: Samuel Holland Tested-by: Samuel Holland Link: https://lore.kernel.org/r/20231030133027.19542-4-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 3 - arch/riscv/include/asm/tlbflush.h | 3 + arch/riscv/kernel/sbi.c | 32 +++------ arch/riscv/mm/tlbflush.c | 113 +++++++++++++++--------------- 4 files changed, 71 insertions(+), 80 deletions(-) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 5b4a1bf5f439..b79d0228144f 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -273,9 +273,6 @@ void sbi_set_timer(uint64_t stime_value); void sbi_shutdown(void); void sbi_send_ipi(unsigned int cpu); int sbi_remote_fence_i(const struct cpumask *cpu_mask); -int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, - unsigned long start, - unsigned long size); int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, unsigned long start, diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index f5c4fb0ae642..170a49c531c6 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -11,6 +11,9 @@ #include #include +#define FLUSH_TLB_MAX_SIZE ((unsigned long)-1) +#define FLUSH_TLB_NO_ASID ((unsigned long)-1) + #ifdef CONFIG_MMU extern unsigned long asid_mask; diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index c672c8ba9a2a..5a62ed1da453 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -11,6 +11,7 @@ #include #include #include +#include /* default SBI version is 0.1 */ unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT; @@ -376,32 +377,15 @@ int sbi_remote_fence_i(const struct cpumask *cpu_mask) } EXPORT_SYMBOL(sbi_remote_fence_i); -/** - * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote - * harts for the specified virtual address range. - * @cpu_mask: A cpu mask containing all the target harts. - * @start: Start of the virtual address - * @size: Total size of the virtual address range. - * - * Return: 0 on success, appropriate linux error code otherwise. - */ -int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, - unsigned long start, - unsigned long size) -{ - return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, - cpu_mask, start, size, 0, 0); -} -EXPORT_SYMBOL(sbi_remote_sfence_vma); - /** * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given - * remote harts for a virtual address range belonging to a specific ASID. + * remote harts for a virtual address range belonging to a specific ASID or not. * * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the virtual address * @size: Total size of the virtual address range. - * @asid: The value of address space identifier (ASID). + * @asid: The value of address space identifier (ASID), or FLUSH_TLB_NO_ASID + * for flushing all address spaces. * * Return: 0 on success, appropriate linux error code otherwise. */ @@ -410,8 +394,12 @@ int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, unsigned long size, unsigned long asid) { - return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, - cpu_mask, start, size, asid, 0); + if (asid == FLUSH_TLB_NO_ASID) + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, + cpu_mask, start, size, 0, 0); + else + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, + cpu_mask, start, size, asid, 0); } EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index b6d712a82306..e46fefc70927 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -9,28 +9,50 @@ static inline void local_flush_tlb_all_asid(unsigned long asid) { - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); + if (asid != FLUSH_TLB_NO_ASID) + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); + else + local_flush_tlb_all(); } static inline void local_flush_tlb_page_asid(unsigned long addr, unsigned long asid) { - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); + if (asid != FLUSH_TLB_NO_ASID) + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); + else + local_flush_tlb_page(addr); } -static inline void local_flush_tlb_range(unsigned long start, - unsigned long size, unsigned long stride) +/* + * Flush entire TLB if number of entries to be flushed is greater + * than the threshold below. + */ +static unsigned long tlb_flush_all_threshold __read_mostly = 64; + +static void local_flush_tlb_range_threshold_asid(unsigned long start, + unsigned long size, + unsigned long stride, + unsigned long asid) { - if (size <= stride) - local_flush_tlb_page(start); - else - local_flush_tlb_all(); + unsigned long nr_ptes_in_range = DIV_ROUND_UP(size, stride); + int i; + + if (nr_ptes_in_range > tlb_flush_all_threshold) { + local_flush_tlb_all_asid(asid); + return; + } + + for (i = 0; i < nr_ptes_in_range; ++i) { + local_flush_tlb_page_asid(start, asid); + start += stride; + } } static inline void local_flush_tlb_range_asid(unsigned long start, @@ -38,8 +60,10 @@ static inline void local_flush_tlb_range_asid(unsigned long start, { if (size <= stride) local_flush_tlb_page_asid(start, asid); - else + else if (size == FLUSH_TLB_MAX_SIZE) local_flush_tlb_all_asid(asid); + else + local_flush_tlb_range_threshold_asid(start, size, stride, asid); } static void __ipi_flush_tlb_all(void *info) @@ -52,7 +76,7 @@ void flush_tlb_all(void) if (riscv_use_ipi_for_rfence()) on_each_cpu(__ipi_flush_tlb_all, NULL, 1); else - sbi_remote_sfence_vma(NULL, 0, -1); + sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); } struct flush_tlb_range_data { @@ -69,18 +93,12 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __ipi_flush_tlb_range(void *info) -{ - struct flush_tlb_range_data *d = info; - - local_flush_tlb_range(d->start, d->size, d->stride); -} - static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { struct flush_tlb_range_data ftd; struct cpumask *cmask = mm_cpumask(mm); + unsigned long asid = FLUSH_TLB_NO_ASID; unsigned int cpuid; bool broadcast; @@ -90,39 +108,24 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, cpuid = get_cpu(); /* check if the tlbflush needs to be sent to other CPUs */ broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; - if (static_branch_unlikely(&use_asid_allocator)) { - unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask; - if (broadcast) { - if (riscv_use_ipi_for_rfence()) { - ftd.asid = asid; - ftd.start = start; - ftd.size = size; - ftd.stride = stride; - on_each_cpu_mask(cmask, - __ipi_flush_tlb_range_asid, - &ftd, 1); - } else - sbi_remote_sfence_vma_asid(cmask, - start, size, asid); - } else { - local_flush_tlb_range_asid(start, size, stride, asid); - } + if (static_branch_unlikely(&use_asid_allocator)) + asid = atomic_long_read(&mm->context.id) & asid_mask; + + if (broadcast) { + if (riscv_use_ipi_for_rfence()) { + ftd.asid = asid; + ftd.start = start; + ftd.size = size; + ftd.stride = stride; + on_each_cpu_mask(cmask, + __ipi_flush_tlb_range_asid, + &ftd, 1); + } else + sbi_remote_sfence_vma_asid(cmask, + start, size, asid); } else { - if (broadcast) { - if (riscv_use_ipi_for_rfence()) { - ftd.asid = 0; - ftd.start = start; - ftd.size = size; - ftd.stride = stride; - on_each_cpu_mask(cmask, - __ipi_flush_tlb_range, - &ftd, 1); - } else - sbi_remote_sfence_vma(cmask, start, size); - } else { - local_flush_tlb_range(start, size, stride); - } + local_flush_tlb_range_asid(start, size, stride, asid); } put_cpu(); @@ -130,7 +133,7 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, void flush_tlb_mm(struct mm_struct *mm) { - __flush_tlb_range(mm, 0, -1, PAGE_SIZE); + __flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); } void flush_tlb_mm_range(struct mm_struct *mm, From 62b78fd5fe39b5b82e4b4b8d0ba87ad40d1a99bb Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 30 Oct 2023 14:30:28 +0100 Subject: [PATCH 131/415] riscv: Improve flush_tlb_kernel_range() This function used to simply flush the whole tlb of all harts, be more subtile and try to only flush the range. The problem is that we can only use PAGE_SIZE as stride since we don't know the size of the underlying mapping and then this function will be improved only if the size of the region to flush is < threshold * PAGE_SIZE. Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Tested-by: Lad Prabhakar # On RZ/Five SMARC Reviewed-by: Samuel Holland Tested-by: Samuel Holland Link: https://lore.kernel.org/r/20231030133027.19542-5-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlbflush.h | 11 +++++----- arch/riscv/mm/tlbflush.c | 34 ++++++++++++++++++++++--------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 170a49c531c6..8f3418c5f172 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -40,6 +40,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +void flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, @@ -56,15 +57,15 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, local_flush_tlb_all(); } -#define flush_tlb_mm(mm) flush_tlb_all() -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() -#endif /* !CONFIG_SMP || !CONFIG_MMU */ - /* Flush a range of kernel pages */ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - flush_tlb_all(); + local_flush_tlb_all(); } +#define flush_tlb_mm(mm) flush_tlb_all() +#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() +#endif /* !CONFIG_SMP || !CONFIG_MMU */ + #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index e46fefc70927..e6659d7368b3 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -97,20 +97,27 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { struct flush_tlb_range_data ftd; - struct cpumask *cmask = mm_cpumask(mm); + const struct cpumask *cmask; unsigned long asid = FLUSH_TLB_NO_ASID; - unsigned int cpuid; bool broadcast; - if (cpumask_empty(cmask)) - return; + if (mm) { + unsigned int cpuid; - cpuid = get_cpu(); - /* check if the tlbflush needs to be sent to other CPUs */ - broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; + cmask = mm_cpumask(mm); + if (cpumask_empty(cmask)) + return; - if (static_branch_unlikely(&use_asid_allocator)) - asid = atomic_long_read(&mm->context.id) & asid_mask; + cpuid = get_cpu(); + /* check if the tlbflush needs to be sent to other CPUs */ + broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; + + if (static_branch_unlikely(&use_asid_allocator)) + asid = atomic_long_read(&mm->context.id) & asid_mask; + } else { + cmask = cpu_online_mask; + broadcast = true; + } if (broadcast) { if (riscv_use_ipi_for_rfence()) { @@ -128,7 +135,8 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, local_flush_tlb_range_asid(start, size, stride, asid); } - put_cpu(); + if (mm) + put_cpu(); } void flush_tlb_mm(struct mm_struct *mm) @@ -179,6 +187,12 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, __flush_tlb_range(vma->vm_mm, start, end - start, stride_size); } + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + __flush_tlb_range(NULL, start, end - start, PAGE_SIZE); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) From 6eeeb4c7e4b5e03405b335fa7ce340922b7ce089 Mon Sep 17 00:00:00 2001 From: Marielle Novastrider Date: Tue, 31 Oct 2023 20:08:38 +0000 Subject: [PATCH 132/415] Documentation/arm64: Fix typos in elf_hwcaps Small typos in register and field names. Signed-off-by: Marielle Novastrider Acked-by: Will Deacon Link: https://lore.kernel.org/r/20231031200838.55569-1-marielle@novastrider.com Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/elf_hwcaps.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index 4b8399ac592b..ced7b335e2e0 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -174,7 +174,7 @@ HWCAP2_DCPODP Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010. HWCAP2_SVE2 - Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001. + Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0001. HWCAP2_SVEAES Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001. @@ -222,7 +222,7 @@ HWCAP2_RNG Functionality implied by ID_AA64ISAR0_EL1.RNDR == 0b0001. HWCAP2_BTI - Functionality implied by ID_AA64PFR0_EL1.BT == 0b0001. + Functionality implied by ID_AA64PFR1_EL1.BT == 0b0001. HWCAP2_MTE Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described @@ -232,7 +232,7 @@ HWCAP2_ECV Functionality implied by ID_AA64MMFR0_EL1.ECV == 0b0001. HWCAP2_AFP - Functionality implied by ID_AA64MFR1_EL1.AFP == 0b0001. + Functionality implied by ID_AA64MMFR1_EL1.AFP == 0b0001. HWCAP2_RPRES Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001. From 15c7ef7341a2e54cfa12ac502c65d6fd2cce2b62 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 2 Nov 2023 17:16:54 -0700 Subject: [PATCH 133/415] perf: arm_cspmu: Reject events meant for other PMUs Coresight PMU driver didn't reject events meant for other PMUs. This caused some of the Core PMU events disappearing from the output of "perf list". In addition, trying to run e.g. $ perf stat -e r2 sleep 1 made Coresight PMU driver to handle the event instead of letting Core PMU driver to deal with it. Cc: stable@vger.kernel.org Fixes: e37dfd65731d ("perf: arm_cspmu: Add support for ARM CoreSight PMU driver") Signed-off-by: Ilkka Koskinen Acked-by: Will Deacon Reviewed-by: Besar Wicaksono Acked-by: Mark Rutland Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20231103001654.35565-1-ilkka@os.amperecomputing.com Signed-off-by: Catalin Marinas --- drivers/perf/arm_cspmu/arm_cspmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 0e3fe00d741d..9f478f2c8554 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -676,6 +676,9 @@ static int arm_cspmu_event_init(struct perf_event *event) cspmu = to_arm_cspmu(event->pmu); + if (event->attr.type != event->pmu->type) + return -ENOENT; + /* * Following other "uncore" PMUs, we do not support sampling mode or * attach to a task (per-process mode). From 403edfa436286b21f5ffe6856ae5b36396e8966c Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 2 Nov 2023 11:30:12 -0700 Subject: [PATCH 134/415] arm64/arm: arm_pmuv3: perf: Don't truncate 64-bit registers The driver used to truncate several 64-bit registers such as PMCEID[n] registers used to describe whether architectural and microarchitectural events in range 0x4000-0x401f exist. Due to discarding the bits, the driver made the events invisible, even if they existed. Moreover, PMCCFILTR and PMCR registers have additional bits in the upper 32 bits. This patch makes them available although they aren't currently used. Finally, functions handling PMXEVCNTR and PMXEVTYPER registers are removed as they not being used at all. Fixes: df29ddf4f04b ("arm64: perf: Abstract system register accesses away") Reported-by: Carl Worth Signed-off-by: Ilkka Koskinen Acked-by: Will Deacon Closes: https://lore.kernel.org/.. Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20231102183012.1251410-1-ilkka@os.amperecomputing.com Signed-off-by: Catalin Marinas --- arch/arm/include/asm/arm_pmuv3.h | 48 ++++++++++++++---------------- arch/arm64/include/asm/arm_pmuv3.h | 25 ++++------------ drivers/perf/arm_pmuv3.c | 6 ++-- 3 files changed, 31 insertions(+), 48 deletions(-) diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h index 72529f5e2bed..a41b503b7dcd 100644 --- a/arch/arm/include/asm/arm_pmuv3.h +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -23,6 +23,8 @@ #define PMUSERENR __ACCESS_CP15(c9, 0, c14, 0) #define PMINTENSET __ACCESS_CP15(c9, 0, c14, 1) #define PMINTENCLR __ACCESS_CP15(c9, 0, c14, 2) +#define PMCEID2 __ACCESS_CP15(c9, 0, c14, 4) +#define PMCEID3 __ACCESS_CP15(c9, 0, c14, 5) #define PMMIR __ACCESS_CP15(c9, 0, c14, 6) #define PMCCFILTR __ACCESS_CP15(c14, 0, c15, 7) @@ -150,21 +152,6 @@ static inline u64 read_pmccntr(void) return read_sysreg(PMCCNTR); } -static inline void write_pmxevcntr(u32 val) -{ - write_sysreg(val, PMXEVCNTR); -} - -static inline u32 read_pmxevcntr(void) -{ - return read_sysreg(PMXEVCNTR); -} - -static inline void write_pmxevtyper(u32 val) -{ - write_sysreg(val, PMXEVTYPER); -} - static inline void write_pmcntenset(u32 val) { write_sysreg(val, PMCNTENSET); @@ -205,16 +192,6 @@ static inline void write_pmuserenr(u32 val) write_sysreg(val, PMUSERENR); } -static inline u32 read_pmceid0(void) -{ - return read_sysreg(PMCEID0); -} - -static inline u32 read_pmceid1(void) -{ - return read_sysreg(PMCEID1); -} - static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) @@ -231,6 +208,7 @@ static inline void kvm_vcpu_pmu_resync_el0(void) {} /* PMU Version in DFR Register */ #define ARMV8_PMU_DFR_VER_NI 0 +#define ARMV8_PMU_DFR_VER_V3P1 0x4 #define ARMV8_PMU_DFR_VER_V3P4 0x5 #define ARMV8_PMU_DFR_VER_V3P5 0x6 #define ARMV8_PMU_DFR_VER_IMP_DEF 0xF @@ -251,4 +229,24 @@ static inline bool is_pmuv3p5(int pmuver) return pmuver >= ARMV8_PMU_DFR_VER_V3P5; } +static inline u64 read_pmceid0(void) +{ + u64 val = read_sysreg(PMCEID0); + + if (read_pmuver() >= ARMV8_PMU_DFR_VER_V3P1) + val |= (u64)read_sysreg(PMCEID2) << 32; + + return val; +} + +static inline u64 read_pmceid1(void) +{ + u64 val = read_sysreg(PMCEID1); + + if (read_pmuver() >= ARMV8_PMU_DFR_VER_V3P1) + val |= (u64)read_sysreg(PMCEID3) << 32; + + return val; +} + #endif diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index 18dc2fb3d7b7..c27404fa4418 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -46,12 +46,12 @@ static inline u32 read_pmuver(void) ID_AA64DFR0_EL1_PMUVer_SHIFT); } -static inline void write_pmcr(u32 val) +static inline void write_pmcr(u64 val) { write_sysreg(val, pmcr_el0); } -static inline u32 read_pmcr(void) +static inline u64 read_pmcr(void) { return read_sysreg(pmcr_el0); } @@ -71,21 +71,6 @@ static inline u64 read_pmccntr(void) return read_sysreg(pmccntr_el0); } -static inline void write_pmxevcntr(u32 val) -{ - write_sysreg(val, pmxevcntr_el0); -} - -static inline u32 read_pmxevcntr(void) -{ - return read_sysreg(pmxevcntr_el0); -} - -static inline void write_pmxevtyper(u32 val) -{ - write_sysreg(val, pmxevtyper_el0); -} - static inline void write_pmcntenset(u32 val) { write_sysreg(val, pmcntenset_el0); @@ -106,7 +91,7 @@ static inline void write_pmintenclr(u32 val) write_sysreg(val, pmintenclr_el1); } -static inline void write_pmccfiltr(u32 val) +static inline void write_pmccfiltr(u64 val) { write_sysreg(val, pmccfiltr_el0); } @@ -126,12 +111,12 @@ static inline void write_pmuserenr(u32 val) write_sysreg(val, pmuserenr_el0); } -static inline u32 read_pmceid0(void) +static inline u64 read_pmceid0(void) { return read_sysreg(pmceid0_el0); } -static inline u32 read_pmceid1(void) +static inline u64 read_pmceid1(void) { return read_sysreg(pmceid1_el0); } diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 4f6923ad4589..ee98e213a3de 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -428,12 +428,12 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event) #define ARMV8_IDX_TO_COUNTER(x) \ (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) -static inline u32 armv8pmu_pmcr_read(void) +static inline u64 armv8pmu_pmcr_read(void) { return read_pmcr(); } -static inline void armv8pmu_pmcr_write(u32 val) +static inline void armv8pmu_pmcr_write(u64 val) { val &= ARMV8_PMU_PMCR_MASK; isb(); @@ -957,7 +957,7 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u32 pmcr; + u64 pmcr; /* The counter and interrupt enable registers are unknown at reset. */ armv8pmu_disable_counter(U32_MAX); From 45f2f28bd498fb697d07a38775d55f0f50fee5ca Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Tue, 31 Oct 2023 13:22:18 +0200 Subject: [PATCH 135/415] ASoC: SOF: sof-client: trivial: fix comment typo Fix typo s/depndent/dependent Fixes: 6e9548cdb30e ("ASoC: SOF: Convert the generic IPC flood test into SOF client") Signed-off-by: Eugen Hristev Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231031112218.79136-1-eugen.hristev@collabora.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/sof-client.c b/sound/soc/sof/sof-client.c index 9dce7f53b482..54dca91255a0 100644 --- a/sound/soc/sof/sof-client.c +++ b/sound/soc/sof/sof-client.c @@ -176,7 +176,7 @@ int sof_register_clients(struct snd_sof_dev *sdev) goto err_kernel_injector; } - /* Platform depndent client device registration */ + /* Platform dependent client device registration */ if (sof_ops(sdev) && sof_ops(sdev)->register_ipc_clients) ret = sof_ops(sdev)->register_ipc_clients(sdev); From 85dd3af64965c1c0eb7373b340a1b1f7773586b0 Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Tue, 7 Nov 2023 17:57:41 +0800 Subject: [PATCH 136/415] mmc: sdhci-pci-gli: GL9755: Mask the replay timer timeout of AER Due to a flaw in the hardware design, the GL9755 replay timer frequently times out when ASPM is enabled. As a result, the warning messages will often appear in the system log when the system accesses the GL9755 PCI config. Therefore, the replay timer timeout must be masked. Fixes: 36ed2fd32b2c ("mmc: sdhci-pci-gli: A workaround to allow GL9755 to enter ASPM L1.2") Signed-off-by: Victor Shih Acked-by: Adrian Hunter Acked-by: Kai-Heng Feng Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231107095741.8832-3-victorshihgli@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index d83261e857a5..044b4704d5bb 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -152,6 +152,9 @@ #define PCI_GLI_9755_PM_CTRL 0xFC #define PCI_GLI_9755_PM_STATE GENMASK(1, 0) +#define PCI_GLI_9755_CORRERR_MASK 0x214 +#define PCI_GLI_9755_CORRERR_MASK_REPLAY_TIMER_TIMEOUT BIT(12) + #define SDHCI_GLI_9767_GM_BURST_SIZE 0x510 #define SDHCI_GLI_9767_GM_BURST_SIZE_AXI_ALWAYS_SET BIT(8) @@ -770,6 +773,11 @@ static void gl9755_hw_setting(struct sdhci_pci_slot *slot) value &= ~PCI_GLI_9755_PM_STATE; pci_write_config_dword(pdev, PCI_GLI_9755_PM_CTRL, value); + /* mask the replay timer timeout of AER */ + pci_read_config_dword(pdev, PCI_GLI_9755_CORRERR_MASK, &value); + value |= PCI_GLI_9755_CORRERR_MASK_REPLAY_TIMER_TIMEOUT; + pci_write_config_dword(pdev, PCI_GLI_9755_CORRERR_MASK, value); + gl9755_wt_off(pdev); } From 015c9cbcf0ad709079117d27c2094a46e0eadcdb Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Tue, 7 Nov 2023 17:57:40 +0800 Subject: [PATCH 137/415] mmc: sdhci-pci-gli: GL9750: Mask the replay timer timeout of AER Due to a flaw in the hardware design, the GL9750 replay timer frequently times out when ASPM is enabled. As a result, the warning messages will often appear in the system log when the system accesses the GL9750 PCI config. Therefore, the replay timer timeout must be masked. Fixes: d7133797e9e1 ("mmc: sdhci-pci-gli: A workaround to allow GL9750 to enter ASPM L1.2") Signed-off-by: Victor Shih Acked-by: Adrian Hunter Acked-by: Kai-Heng Feng Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231107095741.8832-2-victorshihgli@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 044b4704d5bb..d8a991b349a8 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -28,6 +28,9 @@ #define PCI_GLI_9750_PM_CTRL 0xFC #define PCI_GLI_9750_PM_STATE GENMASK(1, 0) +#define PCI_GLI_9750_CORRERR_MASK 0x214 +#define PCI_GLI_9750_CORRERR_MASK_REPLAY_TIMER_TIMEOUT BIT(12) + #define SDHCI_GLI_9750_CFG2 0x848 #define SDHCI_GLI_9750_CFG2_L1DLY GENMASK(28, 24) #define GLI_9750_CFG2_L1DLY_VALUE 0x1F @@ -564,6 +567,11 @@ static void gl9750_hw_setting(struct sdhci_host *host) value &= ~PCI_GLI_9750_PM_STATE; pci_write_config_dword(pdev, PCI_GLI_9750_PM_CTRL, value); + /* mask the replay timer timeout of AER */ + pci_read_config_dword(pdev, PCI_GLI_9750_CORRERR_MASK, &value); + value |= PCI_GLI_9750_CORRERR_MASK_REPLAY_TIMER_TIMEOUT; + pci_write_config_dword(pdev, PCI_GLI_9750_CORRERR_MASK, value); + gl9750_wt_off(host); } From e89a60ba93c266ee3606adcdd460412c1e7c0924 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 18 Oct 2023 06:37:50 +0300 Subject: [PATCH 138/415] fbdev: omapfb: Do not shadow error code from platform_get_irq() There is no point in shadowing the error codes from platform_get_irq(). Refactor omapfb_do_probe() accordingly. Signed-off-by: Andy Shevchenko Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/omapfb_main.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index 42c96f1cfc93..631076bf71f9 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -1643,17 +1643,16 @@ static int omapfb_do_probe(struct platform_device *pdev, r = -ENOMEM; goto cleanup; } - fbdev->int_irq = platform_get_irq(pdev, 0); - if (fbdev->int_irq < 0) { - r = -ENXIO; - goto cleanup; - } - fbdev->ext_irq = platform_get_irq(pdev, 1); - if (fbdev->ext_irq < 0) { - r = -ENXIO; + r = platform_get_irq(pdev, 0); + if (r < 0) goto cleanup; - } + fbdev->int_irq = r; + + r = platform_get_irq(pdev, 1); + if (r < 0) + goto cleanup; + fbdev->ext_irq = r; init_state++; From 7be6adf11370d58f9a7afa50fbf06801db04af5c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 18 Oct 2023 06:47:25 +0300 Subject: [PATCH 139/415] fbdev: omapfb: Replace custom memparse() implementation Our library has memparse() for parsing numbers with respective suffixes suitable for memory sizes. Use it instead of custom implementation. Signed-off-by: Andy Shevchenko Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/omapfb_main.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index 631076bf71f9..694cf6318782 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -1856,20 +1856,13 @@ static int __init omapfb_setup(char *options) if (!strncmp(this_opt, "accel", 5)) def_accel = 1; else if (!strncmp(this_opt, "vram:", 5)) { + unsigned long long vram; char *suffix; - unsigned long vram; - vram = (simple_strtoul(this_opt + 5, &suffix, 0)); + + vram = memparse(this_opt + 5, &suffix); switch (suffix[0]) { case '\0': break; - case 'm': - case 'M': - vram *= 1024; - fallthrough; - case 'k': - case 'K': - vram *= 1024; - break; default: pr_debug("omapfb: invalid vram suffix %c\n", suffix[0]); From 02d487fa30042fb68392e36f04e51fda266637e7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 21 Oct 2023 08:53:37 +0200 Subject: [PATCH 140/415] fbdev: offb: Simplify offb_init_fb() Turn a strcpy()+strncat()+'\0' into an equivalent snprintf(). Signed-off-by: Christophe JAILLET Signed-off-by: Helge Deller --- drivers/video/fbdev/offb.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c index dcb1b81d35db..b421b46d88ef 100644 --- a/drivers/video/fbdev/offb.c +++ b/drivers/video/fbdev/offb.c @@ -423,11 +423,9 @@ static void offb_init_fb(struct platform_device *parent, const char *name, fix = &info->fix; var = &info->var; - if (name) { - strcpy(fix->id, "OFfb "); - strncat(fix->id, name, sizeof(fix->id) - sizeof("OFfb ")); - fix->id[sizeof(fix->id) - 1] = '\0'; - } else + if (name) + snprintf(fix->id, sizeof(fix->id), "OFfb %s", name); + else snprintf(fix->id, sizeof(fix->id), "OFfb %pOFn", dp); From fc6699d62f5f4facc3e934efd25892fc36050b70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 3 Nov 2023 18:35:58 +0100 Subject: [PATCH 141/415] fbdev: omapfb: Drop unused remove function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OMAP2_VRFB is a bool, so the vrfb driver can never be compiled as a module. With that __exit_p(vrfb_remove) always evaluates to NULL and vrfb_remove() is unused. If the driver was compilable as a module, it would fail to build because the type of vrfb_remove() isn't compatible with struct platform_driver::remove(). (The former returns void, the latter int.) Fixes: aa1e49a3752f ("OMAPDSS: VRFB: add omap_vrfb_supported()") Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/vrfb.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/vrfb.c b/drivers/video/fbdev/omap2/omapfb/vrfb.c index ee0dd4c6a646..568e6e1eca62 100644 --- a/drivers/video/fbdev/omap2/omapfb/vrfb.c +++ b/drivers/video/fbdev/omap2/omapfb/vrfb.c @@ -368,17 +368,10 @@ static int __init vrfb_probe(struct platform_device *pdev) return 0; } -static void __exit vrfb_remove(struct platform_device *pdev) -{ - vrfb_loaded = false; -} - static struct platform_driver vrfb_driver = { .driver.name = "omapvrfb", - .remove = __exit_p(vrfb_remove), }; - -module_platform_driver_probe(vrfb_driver, vrfb_probe); +builtin_platform_driver_probe(vrfb_driver, vrfb_probe); MODULE_AUTHOR("Tomi Valkeinen "); MODULE_DESCRIPTION("OMAP VRFB"); From 3e91a38de1dce97b385d732a5f444264ea8fbd92 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 2 Nov 2023 20:24:03 +0100 Subject: [PATCH 142/415] fbdev: viafb: use new array-copying-wrapper viafbdev.c utilizes memdup_user() to copy an array from userspace. There is a new wrapper, specifically designed for copying arrays. Use this one instead. Suggested-by: Dave Airlie Signed-off-by: Philipp Stanner Signed-off-by: Helge Deller --- drivers/video/fbdev/via/viafbdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/via/viafbdev.c b/drivers/video/fbdev/via/viafbdev.c index 58868f8880d6..a52b1ba43a48 100644 --- a/drivers/video/fbdev/via/viafbdev.c +++ b/drivers/video/fbdev/via/viafbdev.c @@ -574,7 +574,7 @@ static int viafb_ioctl(struct fb_info *info, u_int cmd, u_long arg) break; case VIAFB_SET_GAMMA_LUT: - viafb_gamma_table = memdup_user(argp, 256 * sizeof(u32)); + viafb_gamma_table = memdup_array_user(argp, 256, sizeof(u32)); if (IS_ERR(viafb_gamma_table)) return PTR_ERR(viafb_gamma_table); viafb_set_gamma_table(viafb_bpp, viafb_gamma_table); From e08c30efda21ef4c0ec084a3a9581c220b442ba9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 Oct 2023 15:04:56 +0300 Subject: [PATCH 143/415] fbdev: imsttfb: fix double free in probe() The init_imstt() function calls framebuffer_release() on error and then the probe() function calls it again. It should only be done in probe. Fixes: 518ecb6a209f ("fbdev: imsttfb: Fix error path of imsttfb_probe()") Signed-off-by: Dan Carpenter Signed-off-by: Helge Deller --- drivers/video/fbdev/imsttfb.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c index e7e03e920729..acb943f85700 100644 --- a/drivers/video/fbdev/imsttfb.c +++ b/drivers/video/fbdev/imsttfb.c @@ -1421,7 +1421,6 @@ static int init_imstt(struct fb_info *info) if ((info->var.xres * info->var.yres) * (info->var.bits_per_pixel >> 3) > info->fix.smem_len || !(compute_imstt_regvals(par, info->var.xres, info->var.yres))) { printk("imsttfb: %ux%ux%u not supported\n", info->var.xres, info->var.yres, info->var.bits_per_pixel); - framebuffer_release(info); return -ENODEV; } @@ -1453,14 +1452,11 @@ static int init_imstt(struct fb_info *info) FBINFO_HWACCEL_FILLRECT | FBINFO_HWACCEL_YPAN; - if (fb_alloc_cmap(&info->cmap, 0, 0)) { - framebuffer_release(info); + if (fb_alloc_cmap(&info->cmap, 0, 0)) return -ENODEV; - } if (register_framebuffer(info) < 0) { fb_dealloc_cmap(&info->cmap); - framebuffer_release(info); return -ENODEV; } From aba6ab57a910ad4b940c2024d15f2cdbf5b7f76b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 Oct 2023 15:05:44 +0300 Subject: [PATCH 144/415] fbdev: imsttfb: fix a resource leak in probe I've re-written the error handling but the bug is that if init_imstt() fails we need to call iounmap(par->cmap_regs). Fixes: c75f5a550610 ("fbdev: imsttfb: Fix use after free bug in imsttfb_probe") Signed-off-by: Dan Carpenter Signed-off-by: Helge Deller --- drivers/video/fbdev/imsttfb.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c index acb943f85700..660499260f46 100644 --- a/drivers/video/fbdev/imsttfb.c +++ b/drivers/video/fbdev/imsttfb.c @@ -1496,8 +1496,8 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!request_mem_region(addr, size, "imsttfb")) { printk(KERN_ERR "imsttfb: Can't reserve memory region\n"); - framebuffer_release(info); - return -ENODEV; + ret = -ENODEV; + goto release_info; } switch (pdev->device) { @@ -1514,36 +1514,39 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) printk(KERN_INFO "imsttfb: Device 0x%x unknown, " "contact maintainer.\n", pdev->device); ret = -ENODEV; - goto error; + goto release_mem_region; } info->fix.smem_start = addr; info->screen_base = (__u8 *)ioremap(addr, par->ramdac == IBM ? 0x400000 : 0x800000); if (!info->screen_base) - goto error; + goto release_mem_region; info->fix.mmio_start = addr + 0x800000; par->dc_regs = ioremap(addr + 0x800000, 0x1000); if (!par->dc_regs) - goto error; + goto unmap_screen_base; par->cmap_regs_phys = addr + 0x840000; par->cmap_regs = (__u8 *)ioremap(addr + 0x840000, 0x1000); if (!par->cmap_regs) - goto error; + goto unmap_dc_regs; info->pseudo_palette = par->palette; ret = init_imstt(info); if (ret) - goto error; + goto unmap_cmap_regs; pci_set_drvdata(pdev, info); - return ret; + return 0; -error: - if (par->dc_regs) - iounmap(par->dc_regs); - if (info->screen_base) - iounmap(info->screen_base); +unmap_cmap_regs: + iounmap(par->cmap_regs); +unmap_dc_regs: + iounmap(par->dc_regs); +unmap_screen_base: + iounmap(info->screen_base); +release_mem_region: release_mem_region(addr, size); +release_info: framebuffer_release(info); return ret; } From 26fd31ef9c02a5e91cdb8eea127b056bd7cf0b3b Mon Sep 17 00:00:00 2001 From: Alex Spataru Date: Sat, 4 Nov 2023 16:01:52 -0500 Subject: [PATCH 145/415] ALSA: hda/realtek: Add quirk for ASUS UX7602ZM Enables the SPI-connected CSC35L41 audio amplifier for this laptop model. As of BIOS version 303 it's still necessary to modify the ACPI table to add the related _DSD properties: https://github.com/alex-spataru/asus_zenbook_ux7602zm_sound/ Signed-off-by: Alex Spataru Link: https://lore.kernel.org/r/DS7PR07MB7621BB5BB14F5473D181624CE3A4A@DS7PR07MB7621.namprd07.prod.outlook.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 58006c8bcfb9..415d3832952b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9929,6 +9929,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), From df42ee7e22f03de034939d582c4dff19e6030e4f Mon Sep 17 00:00:00 2001 From: Alexander Koskovich Date: Sun, 5 Nov 2023 15:29:29 +0000 Subject: [PATCH 146/415] ALSA: hda: Add ASRock X670E Taichi to denylist Recent AMD platforms expose an HD-audio bus but without any actual codecs, which is internally tied with a USB-audio device, supposedly. It results in "no codecs" error of HD-audio bus driver, and it's nothing but a waste of resources. snd_hda_intel 0000:59:00.6: no codecs found! Signed-off-by: Alexander Koskovich Link: https://lore.kernel.org/r/20231105152834.5620-1-akoskovich@pm.me Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 976e9d388cc7..e871afeeb383 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2074,6 +2074,7 @@ static const struct pci_device_id driver_denylist[] = { { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1043, 0x874f) }, /* ASUS ROG Zenith II / Strix */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb59) }, /* MSI TRX40 Creator */ { PCI_DEVICE_SUB(0x1022, 0x1487, 0x1462, 0xcb60) }, /* MSI TRX40 */ + { PCI_DEVICE_SUB(0x1022, 0x15e3, 0x1022, 0xd601) }, /* ASRock X670E Taichi */ {} }; From 327462725b0f759f093788dfbcb2f1fd132f956b Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Tue, 7 Nov 2023 18:34:35 +0800 Subject: [PATCH 147/415] nbd: fix uaf in nbd_open Commit 4af5f2e03013 ("nbd: use blk_mq_alloc_disk and blk_cleanup_disk") cleans up disk by blk_cleanup_disk() and it won't set disk->private_data as NULL as before. UAF may be triggered in nbd_open() if someone tries to open nbd device right after nbd_put() since nbd has been free in nbd_dev_remove(). Fix this by implementing ->free_disk and free private data in it. Fixes: 4af5f2e03013 ("nbd: use blk_mq_alloc_disk and blk_cleanup_disk") Signed-off-by: Li Lingfeng Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20231107103435.2074904-1-lilingfeng@huaweicloud.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 800f131222fc..855fdf5c3b4e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -250,7 +250,6 @@ static void nbd_dev_remove(struct nbd_device *nbd) struct gendisk *disk = nbd->disk; del_gendisk(disk); - put_disk(disk); blk_mq_free_tag_set(&nbd->tag_set); /* @@ -261,7 +260,7 @@ static void nbd_dev_remove(struct nbd_device *nbd) idr_remove(&nbd_index_idr, nbd->index); mutex_unlock(&nbd_index_mutex); destroy_workqueue(nbd->recv_workq); - kfree(nbd); + put_disk(disk); } static void nbd_dev_remove_work(struct work_struct *work) @@ -1608,6 +1607,13 @@ static void nbd_release(struct gendisk *disk) nbd_put(nbd); } +static void nbd_free_disk(struct gendisk *disk) +{ + struct nbd_device *nbd = disk->private_data; + + kfree(nbd); +} + static const struct block_device_operations nbd_fops = { .owner = THIS_MODULE, @@ -1615,6 +1621,7 @@ static const struct block_device_operations nbd_fops = .release = nbd_release, .ioctl = nbd_ioctl, .compat_ioctl = nbd_ioctl, + .free_disk = nbd_free_disk, }; #if IS_ENABLED(CONFIG_DEBUG_FS) From 1b0a151c10a6d823f033023b9fdd9af72a89591b Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 7 Nov 2023 19:12:47 +0800 Subject: [PATCH 148/415] blk-core: use pr_warn_ratelimited() in bio_check_ro() If one of the underlying disks of raid or dm is set to read-only, then each io will generate new log, which will cause message storm. This environment is indeed problematic, however we can't make sure our naive custormer won't do this, hence use pr_warn_ratelimited() to prevent message storm in this case. Signed-off-by: Yu Kuai Fixes: 57e95e4670d1 ("block: fix and cleanup bio_check_ro") Signed-off-by: Ye Bin Link: https://lore.kernel.org/r/20231107111247.2157820-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 9d51e9894ece..fdf25b8d6e78 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -501,8 +501,8 @@ static inline void bio_check_ro(struct bio *bio) if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) { if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) return; - pr_warn("Trying to write to read-only block-device %pg\n", - bio->bi_bdev); + pr_warn_ratelimited("Trying to write to read-only block-device %pg\n", + bio->bi_bdev); /* Older lvm-tools actually trigger this */ } } From 9256e8d47a2fa0bcb5d32e7fee8c674c476a480f Mon Sep 17 00:00:00 2001 From: Surbhi Kakarya Date: Mon, 25 Sep 2023 12:12:10 -0400 Subject: [PATCH 149/415] drm/amd: Disable XNACK on SRIOV environment The purpose of this patch is to disable XNACK or set XNACK OFF mode on SRIOV platform which doesn't support it. This will prevent user-space application to fail or result into unexpected behaviour whenever the application need to run test-case in XNACK ON mode. Signed-off-by: Surbhi Kakarya Reviewed-by: Shaoyun Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 5 ++++- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 7 ++++++- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 2dce338b0f1e..5f71414190e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -826,7 +826,10 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) gc_ver == IP_VERSION(9, 4, 3) || gc_ver >= IP_VERSION(10, 3, 0)); - gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; + if (!amdgpu_sriov_xnack_support(adev)) + gmc->noretry = 1; + else + gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; } void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a0aa624f5a92..f7d1d356ecdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1093,3 +1093,13 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, else return RREG32(offset); } + +bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) +{ + bool xnack_mode = true; + + if (amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + xnack_mode = false; + + return xnack_mode; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 858ef21ae515..935ca736300e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -365,4 +365,5 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id); void amdgpu_virt_post_reset(struct amdgpu_device *adev); +bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index fbf053001af9..7a33e06f5c90 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1416,8 +1416,13 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) * per-process XNACK mode selection. But let the dev->noretry * setting still influence the default XNACK mode. */ - if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) + if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev)) { + if (!amdgpu_sriov_xnack_support(dev->kfd->adev)) { + pr_debug("SRIOV platform xnack not supported\n"); + return false; + } continue; + } /* GFXv10 and later GPUs do not support shader preemption * during page faults. This can lead to poor QoS for queue From 89830c62e677187a75b25202effbbf6611fc6552 Mon Sep 17 00:00:00 2001 From: Daniel Miess Date: Fri, 13 Oct 2023 10:38:09 -0400 Subject: [PATCH 150/415] drm/amd/display: On boot disable domain22 force power on [Why] HDCP2 enablement fails when domain22 is set to force power on [How] Disable force power on for domain22 on startup Reviewed-by: Nicholas Kazlauskas Acked-by: Hersen Wu Signed-off-by: Daniel Miess Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c | 10 +++++++++- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h | 1 + .../gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 3 +++ drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h | 2 ++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c index 0f60c40e1fc5..46f71ff08fd1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c @@ -332,6 +332,13 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on) pg_cntl->pg_res_enable[PG_DCIO] = power_on; } +void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on) +{ + struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl); + + REG_UPDATE(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, power_on ? 1 : 0); +} + static bool pg_cntl35_plane_otg_status(struct pg_cntl *pg_cntl) { struct dcn_pg_cntl *pg_cntl_dcn = TO_DCN_PG_CNTL(pg_cntl); @@ -501,7 +508,8 @@ static const struct pg_cntl_funcs pg_cntl35_funcs = { .mpcc_pg_control = pg_cntl35_mpcc_pg_control, .opp_pg_control = pg_cntl35_opp_pg_control, .optc_pg_control = pg_cntl35_optc_pg_control, - .dwb_pg_control = pg_cntl35_dwb_pg_control + .dwb_pg_control = pg_cntl35_dwb_pg_control, + .set_force_poweron_domain22 = pg_cntl35_set_force_poweron_domain22 }; struct pg_cntl *pg_cntl35_create( diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h index 3de240884d22..069dae08e222 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.h @@ -183,6 +183,7 @@ void pg_cntl35_optc_pg_control(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on); void pg_cntl35_dwb_pg_control(struct pg_cntl *pg_cntl, bool power_on); void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl); +void pg_cntl35_set_force_poweron_domain22(struct pg_cntl *pg_cntl, bool power_on); struct pg_cntl *pg_cntl35_create( struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 34737d60b965..ff46e36cb254 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -311,6 +311,9 @@ void dcn35_init_hw(struct dc *dc) if (dc->res_pool->pg_cntl) { if (dc->res_pool->pg_cntl->funcs->init_pg_status) dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl); + + if (dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22) + dc->res_pool->pg_cntl->funcs->set_force_poweron_domain22(dc->res_pool->pg_cntl, false); } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h index 00ea3864dd4d..b9812afb886b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h @@ -47,6 +47,8 @@ struct pg_cntl_funcs { void (*optc_pg_control)(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on); void (*dwb_pg_control)(struct pg_cntl *pg_cntl, bool power_on); void (*init_pg_status)(struct pg_cntl *pg_cntl); + + void (*set_force_poweron_domain22)(struct pg_cntl *pg_cntl, bool power_on); }; #endif //__DC_PG_CNTL_H__ From 13c84bbe0524e6a5c8a3d873152c1eaa295e3592 Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sat, 14 Oct 2023 11:12:13 -0400 Subject: [PATCH 151/415] drm/amd/display: [FW Promotion] Release 0.0.189.0 - Minor formatting changes - Update defines to match the bit width of the field it is used for - Add new boot up bits to control HW sub block regions power down Reviewed-by: Aric Cyr Acked-by: Hersen Wu Signed-off-by: Anthony Koo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index bc907ae2052d..61b4009c3b76 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -583,6 +583,7 @@ union dmub_fw_boot_status { uint32_t fams_enabled : 1; /**< 1 if VBIOS data is deferred programmed */ uint32_t detection_required: 1; /**< if detection need to be triggered by driver */ uint32_t hw_power_init_done: 1; /**< 1 if hw power init is completed */ + uint32_t ono_regions_enabled: 1; /**< 1 if ONO regions are enabled */ } bits; /**< status bits */ uint32_t all; /**< 32-bit access to status bits */ }; @@ -599,6 +600,7 @@ enum dmub_fw_boot_status_bit { DMUB_FW_BOOT_STATUS_BIT_FAMS_ENABLED = (1 << 5), /**< 1 if FAMS is enabled*/ DMUB_FW_BOOT_STATUS_BIT_DETECTION_REQUIRED = (1 << 6), /**< 1 if detection need to be triggered by driver*/ DMUB_FW_BOOT_STATUS_BIT_HW_POWER_INIT_DONE = (1 << 7), /**< 1 if hw power init is completed */ + DMUB_FW_BOOT_STATUS_BIT_ONO_REGIONS_ENABLED = (1 << 8), /**< 1 if ONO regions are enabled */ }; /* Register bit definition for SCRATCH5 */ @@ -2098,7 +2100,7 @@ enum psr_version { /** * PSR not supported. */ - PSR_VERSION_UNSUPPORTED = 0xFFFFFFFF, + PSR_VERSION_UNSUPPORTED = 0xFF, // psr_version field is only 8 bits wide }; /** @@ -3620,7 +3622,6 @@ struct dmub_cmd_abm_pause_data { uint8_t pad[1]; }; - /** * Definition of a DMUB_CMD__ABM_PAUSE command. */ @@ -4046,6 +4047,7 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__MALL command. */ struct dmub_rb_cmd_mall mall; + /** * Definition of a DMUB_CMD__CAB command. */ @@ -4067,6 +4069,7 @@ union dmub_rb_cmd { * Definition of DMUB_CMD__PANEL_CNTL commands. */ struct dmub_rb_cmd_panel_cntl panel_cntl; + /** * Definition of a DMUB_CMD__ABM_SET_PIPE command. */ @@ -4470,10 +4473,6 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) uint64_t *data = (uint64_t *)((uint8_t *)(rb->base_address) + rptr); uint8_t i; - /* Don't remove this. - * The contents need to actually be read from the ring buffer - * for this function to be effective. - */ for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) (void)READ_ONCE(*data++); @@ -4522,5 +4521,4 @@ static inline void dmub_rb_get_return_data(struct dmub_rb *rb, //============================================================================== //==================================================================== //============================================================================== - #endif /* _DMUB_CMD_H_ */ From 028bac5834495f4f4036bf8b3206fcdafe99a393 Mon Sep 17 00:00:00 2001 From: "JinZe.Xu" Date: Fri, 13 Oct 2023 09:50:00 +0800 Subject: [PATCH 152/415] drm/amd/display: decouple dmcub execution to reduce lock granularity [Why] On some systems dmub commands run at high IRQ, so long running commands will block other interrupts. [How] Decouple wait_for_idle from dmcub queue/execute/wait. Reviewed-by: Josip Pavic Acked-by: Hersen Wu Signed-off-by: JinZe.Xu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 74 ++++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 8 +++ 2 files changed, 82 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index ba142bef626b..e4c007203318 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -120,6 +120,80 @@ void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dc_dmub_srv, } } +bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, + unsigned int count, + union dmub_rb_cmd *cmd_list) +{ + struct dc_context *dc_ctx = dc_dmub_srv->ctx; + struct dmub_srv *dmub; + enum dmub_status status; + int i; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + dmub = dc_dmub_srv->dmub; + + for (i = 0 ; i < count; i++) { + // Queue command + status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); + + if (status == DMUB_STATUS_QUEUE_FULL) { + /* Execute and wait for queue to become empty again. */ + dmub_srv_cmd_execute(dmub); + dmub_srv_wait_for_idle(dmub, 100000); + + /* Requeue the command. */ + status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); + } + + if (status != DMUB_STATUS_OK) { + DC_ERROR("Error queueing DMUB command: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + return false; + } + } + + status = dmub_srv_cmd_execute(dmub); + if (status != DMUB_STATUS_OK) { + DC_ERROR("Error starting DMUB execution: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + return false; + } + + return true; +} + +bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, + enum dm_dmub_wait_type wait_type, + union dmub_rb_cmd *cmd_list) +{ + struct dmub_srv *dmub; + enum dmub_status status; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + dmub = dc_dmub_srv->dmub; + + // Wait for DMUB to process command + if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) { + status = dmub_srv_wait_for_idle(dmub, 100000); + + if (status != DMUB_STATUS_OK) { + DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + return false; + } + + // Copy data back from ring buffer into command + if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) + dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list); + } + + return true; +} + bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type) { return dc_dmub_srv_cmd_run_list(dc_dmub_srv, 1, cmd, wait_type); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 31150b214394..d4a60f53faab 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -56,6 +56,14 @@ void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv); bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv); +bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, + unsigned int count, + union dmub_rb_cmd *cmd_list); + +bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, + enum dm_dmub_wait_type wait_type, + union dmub_rb_cmd *cmd_list); + bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd, enum dm_dmub_wait_type wait_type); bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type); From 5d71a8e336e1553aa685963ba362d951541ce082 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 15 Oct 2023 14:28:01 -0400 Subject: [PATCH 153/415] drm/amd/display: 3.2.257 This version brings along following fixes: On boot disable domain22 force power on decouple dmcub execution to reduce lock granularity Enable fast update on blendTF change Fix blend LUT programming Program plane color setting correctly amend HPD handler for Replay Avoid NULL dereference of timing generator Acked-by: Hersen Wu Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6e54ca055fcb..db4b214c636d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -49,7 +49,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.256" +#define DC_VER "3.2.257" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 566f648c4e028ffd62f533d2e8d7e7f89d0e420c Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Fri, 13 Oct 2023 19:34:58 -0400 Subject: [PATCH 154/415] drm/amd/display: Fix missing blendTF programming [Why] When MPO surface pixel format is not ARGB8888, fast update can miss programming blendTF. [How] Set the gamma_change update flag on blend_tf change. Reviewed-by: Aric Cyr Acked-by: Hersen Wu Signed-off-by: Ilya Bakoulin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 4360a696f10a..940ceaca8545 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2582,6 +2582,9 @@ static enum surface_update_type det_surface_update(const struct dc *dc, if (u->gamut_remap_matrix) update_flags->bits.gamut_remap_change = 1; + if (u->blend_tf) + update_flags->bits.gamma_change = 1; + if (u->gamma) { enum surface_pixel_format format = SURFACE_PIXEL_FORMAT_GRPH_BEGIN; From f896cd2686817db915c265ff693a8dad7b6580dc Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Mon, 16 Oct 2023 15:23:16 -0400 Subject: [PATCH 155/415] drm/amd/display: Fix FRL assertion on boot [why] Make sure to ungate the clocks on boot so programming sequence is done successfully. [how] Move the ungate logic after bios init. Reviewed-by: Xi (Alex) Liu Acked-by: Hersen Wu Signed-off-by: Sung Joon Kim Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index ff46e36cb254..0569fa6f7600 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -138,16 +138,17 @@ void dcn35_init_hw(struct dc *dc) if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); - REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); - REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0x3F000000); - REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0x1f7c3fcf); - //dcn35_set_dmu_fgcg(hws, dc->debug.enable_fine_grain_clock_gating.bits.dmu); if (!dcb->funcs->is_accelerated_mode(dcb)) { /*this calls into dmubfw to do the init*/ hws->funcs.bios_golden_init(dc); } + + REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); + REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0x3F000000); + REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0x1f7c3fcf); + // Initialize the dccg if (res_pool->dccg->funcs->dccg_init) res_pool->dccg->funcs->dccg_init(res_pool->dccg); From eacfdc362d3c1eaab517f7c25b089f2536c010f1 Mon Sep 17 00:00:00 2001 From: Daniel Miess Date: Fri, 20 Oct 2023 12:31:09 -0400 Subject: [PATCH 156/415] drm/amd/display: Enable RCO options for dcn35 [Why & How] Enable root clock optimization options for dcn35 for power savings Reviewed-by: Nicholas Kazlauskas Acked-by: Hersen Wu Signed-off-by: Daniel Miess Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c | 1 + .../drm/amd/display/dc/dcn35/dcn35_resource.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c index addedcfd1238..277aae811eea 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c @@ -754,6 +754,7 @@ static const struct dccg_funcs dccg35_funcs = { .disable_symclk32_se = dccg31_disable_symclk32_se, .enable_symclk32_le = dccg31_enable_symclk32_le, .disable_symclk32_le = dccg31_disable_symclk32_le, + .set_symclk32_le_root_clock_gating = dccg31_set_symclk32_le_root_clock_gating, .set_physymclk = dccg35_set_physymclk, .set_dtbclk_dto = dccg35_set_dtbclk_dto, .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto, diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c index 3c7c810bab1f..9699adf52e15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c @@ -719,14 +719,14 @@ static const struct dc_debug_options debug_defaults_drv = { .bits = { .dpp = true, .dsc = true,/*dscclk and dsc pg*/ - .hdmistream = false, - .hdmichar = false, - .dpstream = false, - .symclk32_se = false, - .symclk32_le = false, - .symclk_fe = false, - .physymclk = false, - .dpiasymclk = false, + .hdmistream = true, + .hdmichar = true, + .dpstream = true, + .symclk32_se = true, + .symclk32_le = true, + .symclk_fe = true, + .physymclk = false, // Prevents eDP light up + .dpiasymclk = true, } }, .seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT, From 92e11f0159f6635bb8b0a7bb427ddb525bccbcb5 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Mon, 16 Oct 2023 11:35:56 -0400 Subject: [PATCH 157/415] drm/amd/display: Enable more IPS options [why] To help isolate static screen and video playback tests, we want to enable an IPS option to allow IPS only on D3 cycle. [how] Add DISABLE_DYNAMIC and DISABLE_ALL IPS disable flags for user control. Reviewed-by: Jun Lei Acked-by: Hersen Wu Signed-off-by: Sung Joon Kim Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 7 ++++--- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 ++-- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 9 ++++++--- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index f80917f6153b..1c05d12962e3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -814,7 +814,8 @@ static void dcn35_set_idle_state(struct clk_mgr *clk_mgr_base, bool allow_idle) struct dc *dc = clk_mgr_base->ctx->dc; uint32_t val = dcn35_smu_read_ips_scratch(clk_mgr); - if (dc->config.disable_ips == 0) { + if (dc->config.disable_ips == DMUB_IPS_ENABLE || + dc->config.disable_ips == DMUB_IPS_DISABLE_DYNAMIC) { val |= DMUB_IPS1_ALLOW_MASK; val |= DMUB_IPS2_ALLOW_MASK; } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) { @@ -1114,7 +1115,7 @@ void dcn35_clk_mgr_construct( dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER, smu_dpm_clks.dpm_clks); - if (ctx->dc->config.disable_ips == 0) { + if (ctx->dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) { bool ips_support = false; /*avoid call pmfw at init*/ @@ -1127,7 +1128,7 @@ void dcn35_clk_mgr_construct( ctx->dc->debug.disable_hpo_power_gate = false; } else { /*let's reset the config control flag*/ - ctx->dc->config.disable_ips = 1; /*pmfw not support it, disable it all*/ + ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/ } } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 940ceaca8545..de8e5b18a12c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4887,7 +4887,7 @@ void dc_allow_idle_optimizations(struct dc *dc, bool allow) if (dc->debug.disable_idle_power_optimizations) return; - if (dc->caps.ips_support && dc->config.disable_ips) + if (dc->caps.ips_support && (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL)) return; if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->is_smu_present) @@ -4908,7 +4908,7 @@ bool dc_dmub_is_ips_idle_state(struct dc *dc) if (dc->debug.disable_idle_power_optimizations) return false; - if (!dc->caps.ips_support || dc->config.disable_ips) + if (!dc->caps.ips_support || (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL)) return false; if (dc->hwss.get_idle_state) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 61b4009c3b76..98ce036368d5 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -619,9 +619,12 @@ enum dmub_lvtma_status_bit { }; enum dmub_ips_disable_type { - DMUB_IPS_DISABLE_IPS1 = 1, - DMUB_IPS_DISABLE_IPS2 = 2, - DMUB_IPS_DISABLE_IPS2_Z10 = 3, + DMUB_IPS_ENABLE = 0, + DMUB_IPS_DISABLE_ALL = 1, + DMUB_IPS_DISABLE_IPS1 = 2, + DMUB_IPS_DISABLE_IPS2 = 3, + DMUB_IPS_DISABLE_IPS2_Z10 = 4, + DMUB_IPS_DISABLE_DYNAMIC = 5, }; #define DMUB_IPS1_ALLOW_MASK 0x00000001 From 8df0d7d33a58d9394bd1240205e393d5f2bab6c7 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Wed, 18 Oct 2023 09:38:33 -0400 Subject: [PATCH 158/415] drm/amd/display: Allow 16 max_slices for DP2 DSC Enable 12 and 16 max_slices for DP2 DSC Reviewed-by: Alvin Lee Acked-by: Hersen Wu Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c | 10 +++++++++- drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 11 +++++++++++ drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h | 2 ++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c index 5eebe7f03ddc..c9ae2d8f0096 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c @@ -137,7 +137,15 @@ void dsc2_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz) dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz * 2; } - // TODO DSC: This is actually image width limitation, not a slice width. This should be added to the criteria to use ODM. + /* For pixel clock bigger than a single-pipe limit needing four engines ODM 4:1, which then quardruples our + * throughput and number of slices + */ + if (pixel_clock_100Hz > DCN20_MAX_PIXEL_CLOCK_Mhz*10000*2) { + dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 = 1; + dsc_enc_caps->max_total_throughput_mps = DCN20_MAX_DISPLAY_CLOCK_Mhz * 4; + } + dsc_enc_caps->max_slice_width = 5184; /* (including 64 overlap pixels for eDP MSO mode) */ dsc_enc_caps->bpp_increment_div = 16; /* 1/16th of a bit */ } diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 3966845c7694..e8b5f17beb96 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -512,6 +512,11 @@ static bool intersect_dsc_caps( dsc_sink_caps->slice_caps1.bits.NUM_SLICES_4 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_4; dsc_common_caps->slice_caps.bits.NUM_SLICES_8 = dsc_sink_caps->slice_caps1.bits.NUM_SLICES_8 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_8; + dsc_common_caps->slice_caps.bits.NUM_SLICES_12 = + dsc_sink_caps->slice_caps1.bits.NUM_SLICES_12 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_12; + dsc_common_caps->slice_caps.bits.NUM_SLICES_16 = + dsc_sink_caps->slice_caps2.bits.NUM_SLICES_16 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_16; + if (!dsc_common_caps->slice_caps.raw) return false; @@ -703,6 +708,12 @@ static int get_available_dsc_slices(union dsc_enc_slice_caps slice_caps, int *av if (slice_caps.bits.NUM_SLICES_8) available_slices[idx++] = 8; + if (slice_caps.bits.NUM_SLICES_12) + available_slices[idx++] = 12; + + if (slice_caps.bits.NUM_SLICES_16) + available_slices[idx++] = 16; + return idx; } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h index d7b8d586b523..4b27f29d0d80 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h @@ -76,6 +76,8 @@ union dsc_enc_slice_caps { uint8_t NUM_SLICES_3 : 1; /* This one is not per DSC spec, but our encoder supports it */ uint8_t NUM_SLICES_4 : 1; uint8_t NUM_SLICES_8 : 1; + uint8_t NUM_SLICES_12 : 1; + uint8_t NUM_SLICES_16 : 1; } bits; uint8_t raw; }; From f031ba12082cadd1d827b36ba1d2c76a2395134d Mon Sep 17 00:00:00 2001 From: George Shen Date: Thu, 19 Oct 2023 22:03:41 -0400 Subject: [PATCH 159/415] drm/amd/display: Update test link rate DPCD bit field to match spec [Why] An SCR was made to the DP2.0 spec that updated the bit field definition for UHBR13.5 in the test link rate DPCD register. [How] Add new translation to match the SCR update. Keep old translation for backwards compatibility. Reviewed-by: Wenjing Liu Acked-by: Hersen Wu Signed-off-by: George Shen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 3 ++- .../gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 35ae245ef722..eeeeeef4d717 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -142,7 +142,8 @@ enum dp_test_link_rate { DP_TEST_LINK_RATE_HBR3 = 0x1E, DP_TEST_LINK_RATE_UHBR10 = 0x01, DP_TEST_LINK_RATE_UHBR20 = 0x02, - DP_TEST_LINK_RATE_UHBR13_5 = 0x03, + DP_TEST_LINK_RATE_UHBR13_5_LEGACY = 0x03, /* For backward compatibility*/ + DP_TEST_LINK_RATE_UHBR13_5 = 0x04, }; struct dc_link_settings { diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 21a39afd274b..a3ae75761338 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -53,6 +53,7 @@ static enum dc_link_rate get_link_rate_from_test_link_rate(uint8_t test_rate) return LINK_RATE_UHBR10; case DP_TEST_LINK_RATE_UHBR20: return LINK_RATE_UHBR20; + case DP_TEST_LINK_RATE_UHBR13_5_LEGACY: case DP_TEST_LINK_RATE_UHBR13_5: return LINK_RATE_UHBR13_5; default: @@ -119,6 +120,11 @@ static void dp_test_send_link_training(struct dc_link *link) 1); link_settings.link_rate = get_link_rate_from_test_link_rate(test_rate); + if (link_settings.link_rate == LINK_RATE_UNKNOWN) { + DC_LOG_ERROR("%s: Invalid test link rate.", __func__); + ASSERT(0); + } + /* Set preferred link settings */ link->verified_link_cap.lane_count = link_settings.lane_count; link->verified_link_cap.link_rate = link_settings.link_rate; @@ -457,7 +463,7 @@ static void set_crtc_test_pattern(struct dc_link *link, controller_color_space = pipe_ctx->stream_res.test_pattern_params.color_space; if (controller_color_space == CONTROLLER_DP_COLOR_SPACE_UDEFINED) { - DC_LOG_WARNING("%s: Color space must be defined for test pattern", __func__); + DC_LOG_ERROR("%s: Color space must be defined for test pattern", __func__); ASSERT(0); } From 81df7271688cf04a502e3bbd19d0395a986a89e1 Mon Sep 17 00:00:00 2001 From: George Shen Date: Wed, 11 Oct 2023 16:32:39 -0400 Subject: [PATCH 160/415] drm/amd/display: Update DP HPO MSA with colorimetry from test request [Why] Some DP link layer tests request a different colorimetry than the default one that is used. Currently, our test automation logic does not update the MSA with the test request value for DP HPO case. [How] Update HPO MSA colorimetry with test automation request value. Reviewed-by: Wenjing Liu Acked-by: Hersen Wu Signed-off-by: George Shen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/link/accessories/link_dp_cts.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index a3ae75761338..2d152b68a501 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -598,6 +598,7 @@ bool dp_set_test_pattern( const unsigned char *p_custom_pattern, unsigned int cust_pattern_size) { + const struct link_hwss *link_hwss; struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx; struct pipe_ctx *pipe_ctx = NULL; unsigned int lane; @@ -834,11 +835,9 @@ bool dp_set_test_pattern( pipe_ctx->stream_res.tg->funcs->lock(pipe_ctx->stream_res.tg); /* update MSA to requested color space */ - pipe_ctx->stream_res.stream_enc->funcs->dp_set_stream_attribute(pipe_ctx->stream_res.stream_enc, - &pipe_ctx->stream->timing, - color_space, - pipe_ctx->stream->use_vsc_sdp_for_colorimetry, - link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP); + link_hwss = get_link_hwss(link, &pipe_ctx->link_res); + pipe_ctx->stream->output_color_space = color_space; + link_hwss->setup_stream_attribute(pipe_ctx); if (pipe_ctx->stream->use_vsc_sdp_for_colorimetry) { if (test_pattern == DP_TEST_PATTERN_COLOR_SQUARES_CEA) From 85de32cd7b383f5d84195aed0c53e920e6786005 Mon Sep 17 00:00:00 2001 From: Yihan Zhu Date: Thu, 19 Oct 2023 14:23:17 -0400 Subject: [PATCH 161/415] drm/amd/display: DCN35 Disable cm power optimization [WHY & HOW] Enabling SCE after boot up will cause color distortion. Reviewed-by: Ovidiu Bunea Acked-by: Hersen Wu Signed-off-by: Yihan Zhu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c index 9699adf52e15..e35d4c028d01 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c @@ -708,7 +708,7 @@ static const struct dc_debug_options debug_defaults_drv = { .i2c = true, .dmcu = false, // This is previously known to cause hang on S3 cycles if enabled .dscl = true, - .cm = true, + .cm = false, .mpc = true, .optc = true, .vpg = true, From 39ad51cb61556892ce8af02b995136cd2711527b Mon Sep 17 00:00:00 2001 From: Dennis Chan Date: Wed, 3 May 2023 14:20:17 +0800 Subject: [PATCH 162/415] drm/amd/display: Introduce flag for disabling Replay desync recovery [why] It's useful to disable the recovery mechanism when debugging replay desync errors. Signed-off-by: Dennis Chan Acked-by: Hersen Wu Reviewed-by: Robin Chen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 3 ++- .../drm/amd/display/dc/link/protocols/link_dp_irq_handler.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 40dc51853d62..bb159b6b1b76 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1037,7 +1037,8 @@ struct replay_config { bool replay_smu_opt_supported; // SMU optimization is supported unsigned int replay_enable_option; // Replay enablement option uint32_t debug_flags; // Replay debug flags - bool replay_timing_sync_supported; // Replay desync is supported + bool replay_timing_sync_supported; // Replay desync is supported + bool force_disable_desync_error_check; // Replay desync is supported union replay_error_status replay_error_status; // Replay error status }; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index 34bf8a9ef738..fc14c3644144 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -190,6 +190,9 @@ static bool handle_hpd_irq_replay_sink(struct dc_link *link) /*AMD Replay version reuse DP_PSR_ERROR_STATUS for REPLAY_ERROR status.*/ union psr_error_status replay_error_status; + if (link->replay_settings.config.force_disable_desync_error_check) + return true; + if (!link->replay_settings.replay_feature_enabled) return false; From ce3b32ec4aef7171277c7c8efc07861eac27998c Mon Sep 17 00:00:00 2001 From: ChunTao Tso Date: Tue, 3 Oct 2023 17:05:05 +0800 Subject: [PATCH 163/415] drm/amd/display: amend HPD handler for Replay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] For Replay, if we receive HPD, it doesn’t need to reboot the display. We don’t need to return anything exactly. [How] Return nothing just because we don’t need to reboot the display. Signed-off-by: ChunTao Tso Acked-by: Hersen Wu Reviewed-by: Jerry Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/link/protocols/link_dp_irq_handler.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index fc14c3644144..06e829ef48af 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -184,17 +184,17 @@ static bool handle_hpd_irq_psr_sink(struct dc_link *link) return false; } -static bool handle_hpd_irq_replay_sink(struct dc_link *link) +static void handle_hpd_irq_replay_sink(struct dc_link *link) { union dpcd_replay_configuration replay_configuration; /*AMD Replay version reuse DP_PSR_ERROR_STATUS for REPLAY_ERROR status.*/ union psr_error_status replay_error_status; if (link->replay_settings.config.force_disable_desync_error_check) - return true; + return; if (!link->replay_settings.replay_feature_enabled) - return false; + return; dm_helpers_dp_read_dpcd( link->ctx, @@ -246,7 +246,6 @@ static bool handle_hpd_irq_replay_sink(struct dc_link *link) edp_set_replay_allow_active(link, &allow_active, true, false, NULL); } } - return true; } void dp_handle_link_loss(struct dc_link *link) @@ -427,9 +426,7 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link, /* PSR-related error was detected and handled */ return true; - if (handle_hpd_irq_replay_sink(link)) - /* Replay-related error was detected and handled */ - return true; + handle_hpd_irq_replay_sink(link); /* If PSR-related error handled, Main link may be off, * so do not handle as a normal sink status change interrupt. From fd7cedccdde3ff9c7d31092787f280631da7b207 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Tue, 10 Oct 2023 13:31:19 -0400 Subject: [PATCH 164/415] drm/amd/display: Fix OTG disable workaround logic [Why] DENTIST was hanging when performing DISPCLK update with OTG enabled, as OTG disable workaround was not executing. [How] Workaround was checking against current_state before running, but when called from optimize_bandwidth (safe_to_lower), we should be checking against context instead. Reviewed-by: Nicholas Kazlauskas Acked-by: Hersen Wu Signed-off-by: Taimur Hassan Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 1c05d12962e3..085ac191c94f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -111,13 +111,16 @@ static int dcn35_get_active_display_cnt_wa( return display_count; } -static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable) +static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, + bool safe_to_lower, bool disable) { struct dc *dc = clk_mgr_base->ctx->dc; int i; for (i = 0; i < dc->res_pool->pipe_count; ++i) { - struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe = safe_to_lower + ? &context->res_ctx.pipe_ctx[i] + : &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; @@ -301,11 +304,11 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - dcn35_disable_otg_wa(clk_mgr_base, context, true); + dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - dcn35_disable_otg_wa(clk_mgr_base, context, false); + dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; } From f9e7d4fadc4fbd8083e8dec04fabf870f3f6ae39 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 19 Oct 2023 15:32:55 -0400 Subject: [PATCH 165/415] drm/amd/display: Add missing dml2 init value for dcn35 [Why] For lighting up, some dml2 params needs to be initialized. One of them escaped initial patch under: "drm/amd/display: Add DCN35 DML2 support" [How] Add missing initialization. Fixes: 115009d11ccf ("drm/amd/display: Add DCN35 DML2 support") Reviewed-by: Harry Wentland Acked-by: Hersen Wu Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 89836f175a13..159a0956893f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -231,6 +231,7 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s out->num_chans = 4; out->round_trip_ping_latency_dcfclk_cycles = 106; out->smn_latency_us = 2; + out->dispclk_dppclk_vco_speed_mhz = 3600; break; case dml_project_dcn351: From 51131758c79f3f727318ee468bbb9c22666604e3 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 22 Oct 2023 17:26:35 -0400 Subject: [PATCH 166/415] drm/amd/display: 3.2.258 This version brings along following fixes: Update test link rate DPCD bit field to match spec Enable RCO options for dcn35 Add missing dml2 init value for dcn35 Enable DCN clock gating DCN35 Disable cm power optimization Allow 16 max_slices for DP2 DSC Fix OTG disable workaround logic Enable more IPS options Fix FRL assertion on boot Fix missing blendTF programming Update DP HPO MSA with colorimetry from test request Fix handling duplicate planes on one stream Acked-by: Hersen Wu Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index db4b214c636d..f02159853c46 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -49,7 +49,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.257" +#define DC_VER "3.2.258" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 77b2c07d7d3cc1ee11cb64d209d59e57b0ae649b Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sat, 21 Oct 2023 03:51:08 -0400 Subject: [PATCH 167/415] drm/amd/display: [FW Promotion] Release 0.0.190.0 - Increase number of bits for IPS boot option Reviewed-by: Aric Cyr Acked-by: Hersen Wu Signed-off-by: Anthony Koo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 98ce036368d5..ed4379c04715 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -658,8 +658,8 @@ union dmub_fw_boot_options { uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/ uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */ uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/ - uint32_t ips_disable: 2; /* options to disable ips support*/ - uint32_t reserved : 10; /**< reserved */ + uint32_t ips_disable: 3; /* options to disable ips support*/ + uint32_t reserved : 9; /**< reserved */ } bits; /**< boot bits */ uint32_t all; /**< 32-bit access to bits */ }; From d0ef62bd109c2af3ba8dc16a6d5ad4a0f30e03dc Mon Sep 17 00:00:00 2001 From: Dennis Chan Date: Thu, 12 Oct 2023 23:08:25 +0800 Subject: [PATCH 168/415] drm/amd/display: Revise Replay Desync Error IRQ handle [Why] Current Desync IRQ handler will have some potential do not hit the desync error case. We change to check both desync error HPD and DPCD. Signed-off-by: Dennis Chan Acked-by: Hersen Wu Reviewed-by: Robin Chen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + .../drm/amd/display/dc/link/protocols/link_dp_irq_handler.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index bb159b6b1b76..cea666ea66c6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1039,6 +1039,7 @@ struct replay_config { uint32_t debug_flags; // Replay debug flags bool replay_timing_sync_supported; // Replay desync is supported bool force_disable_desync_error_check; // Replay desync is supported + bool received_desync_error_hpd; //Replay Received Desync Error HPD. union replay_error_status replay_error_status; // Replay error status }; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index 06e829ef48af..0c00e94e90b1 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -210,6 +210,9 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) &replay_error_status.raw, sizeof(replay_error_status.raw)); + if (replay_configuration.bits.DESYNC_ERROR_STATUS) + link->replay_settings.config.received_desync_error_hpd = 1; + link->replay_settings.config.replay_error_status.bits.LINK_CRC_ERROR = replay_error_status.bits.LINK_CRC_ERROR; link->replay_settings.config.replay_error_status.bits.DESYNC_ERROR = From 3e18d4bd9ac627d8262661272ea1e60631c2608e Mon Sep 17 00:00:00 2001 From: Ovidiu Bunea Date: Wed, 25 Oct 2023 13:05:47 -0400 Subject: [PATCH 169/415] drm/amd/display: Disable OTG for mode timing switch on DCN35 [why] Doing a mode timing change causes a hang when OTG is not disabled. [how] Add link_enc null check in disable_otg_wa to cover this case. Reviewed-by: Nicholas Kazlauskas Acked-by: Hersen Wu Signed-off-by: Ovidiu Bunea Signed-off-by: Hersen Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 085ac191c94f..0fa4fcd00de2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -124,7 +124,8 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * if (pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { + if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || + !pipe->stream->link_enc)) { struct stream_encoder *stream_enc = pipe->stream_res.stream_enc; if (disable) { From 60ccd588d5820fc270bdd75185b5dc0220019e35 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 20 Oct 2023 15:31:24 -0600 Subject: [PATCH 170/415] drm/amd/display: Create optc.h file For all the components that participate in DCN architecture, there is a header in the dc/inch/hw. For some reason, OPTC broke this pattern and added all the primary functions/structs associated with that in the dcn10_optc.h file. For consistency's sake, this commit introduces a new optc.h file and extracts the code from dcn10_optc to this new file. Reviewed-by: Hamza Mahfooz Acked-by: Hersen Wu Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_trace.h | 2 +- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.h | 186 +-------------- drivers/gpu/drm/amd/display/dc/inc/hw/optc.h | 219 ++++++++++++++++++ 3 files changed, 221 insertions(+), 186 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/inc/hw/optc.h diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h index 0f580ea37576..133af994a08c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h @@ -37,7 +37,7 @@ #include #include #include -#include "dcn10/dcn10_optc.h" +#include "dc/inc/hw/optc.h" #include "dc/inc/core_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index aaf6c981fd9e..ab81594a7fad 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -26,7 +26,7 @@ #ifndef __DC_TIMING_GENERATOR_DCN10_H__ #define __DC_TIMING_GENERATOR_DCN10_H__ -#include "timing_generator.h" +#include "optc.h" #define DCN10TG_FROM_TG(tg)\ container_of(tg, struct optc, base) @@ -594,190 +594,6 @@ struct dcn_optc_mask { TG_REG_FIELD_LIST_DCN3_5(uint32_t) }; -struct optc { - struct timing_generator base; - - const struct dcn_optc_registers *tg_regs; - const struct dcn_optc_shift *tg_shift; - const struct dcn_optc_mask *tg_mask; - - int opp_count; - - uint32_t max_h_total; - uint32_t max_v_total; - - uint32_t min_h_blank; - - uint32_t min_h_sync_width; - uint32_t min_v_sync_width; - uint32_t min_v_blank; - uint32_t min_v_blank_interlace; - - int vstartup_start; - int vupdate_offset; - int vupdate_width; - int vready_offset; - struct dc_crtc_timing orginal_patched_timing; - enum signal_type signal; -}; - void dcn10_timing_generator_init(struct optc *optc); -struct dcn_otg_state { - uint32_t v_blank_start; - uint32_t v_blank_end; - uint32_t v_sync_a_pol; - uint32_t v_total; - uint32_t v_total_max; - uint32_t v_total_min; - uint32_t v_total_min_sel; - uint32_t v_total_max_sel; - uint32_t v_sync_a_start; - uint32_t v_sync_a_end; - uint32_t h_blank_start; - uint32_t h_blank_end; - uint32_t h_sync_a_start; - uint32_t h_sync_a_end; - uint32_t h_sync_a_pol; - uint32_t h_total; - uint32_t underflow_occurred_status; - uint32_t otg_enabled; - uint32_t blank_enabled; - uint32_t vertical_interrupt1_en; - uint32_t vertical_interrupt1_line; - uint32_t vertical_interrupt2_en; - uint32_t vertical_interrupt2_line; -}; - -void optc1_read_otg_state(struct optc *optc1, - struct dcn_otg_state *s); - -bool optc1_get_hw_timing(struct timing_generator *tg, - struct dc_crtc_timing *hw_crtc_timing); - -bool optc1_validate_timing( - struct timing_generator *optc, - const struct dc_crtc_timing *timing); - -void optc1_program_timing( - struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, - int vready_offset, - int vstartup_start, - int vupdate_offset, - int vupdate_width, - const enum signal_type signal, - bool use_vbios); - -void optc1_setup_vertical_interrupt0( - struct timing_generator *optc, - uint32_t start_line, - uint32_t end_line); -void optc1_setup_vertical_interrupt1( - struct timing_generator *optc, - uint32_t start_line); -void optc1_setup_vertical_interrupt2( - struct timing_generator *optc, - uint32_t start_line); - -void optc1_program_global_sync( - struct timing_generator *optc, - int vready_offset, - int vstartup_start, - int vupdate_offset, - int vupdate_width); - -bool optc1_disable_crtc(struct timing_generator *optc); - -bool optc1_is_counter_moving(struct timing_generator *optc); - -void optc1_get_position(struct timing_generator *optc, - struct crtc_position *position); - -uint32_t optc1_get_vblank_counter(struct timing_generator *optc); - -void optc1_get_crtc_scanoutpos( - struct timing_generator *optc, - uint32_t *v_blank_start, - uint32_t *v_blank_end, - uint32_t *h_position, - uint32_t *v_position); - -void optc1_set_early_control( - struct timing_generator *optc, - uint32_t early_cntl); - -void optc1_wait_for_state(struct timing_generator *optc, - enum crtc_state state); - -void optc1_set_blank(struct timing_generator *optc, - bool enable_blanking); - -bool optc1_is_blanked(struct timing_generator *optc); - -void optc1_program_blank_color( - struct timing_generator *optc, - const struct tg_color *black_color); - -bool optc1_did_triggered_reset_occur( - struct timing_generator *optc); - -void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst); - -void optc1_disable_reset_trigger(struct timing_generator *optc); - -void optc1_lock(struct timing_generator *optc); - -void optc1_unlock(struct timing_generator *optc); - -void optc1_enable_optc_clock(struct timing_generator *optc, bool enable); - -void optc1_set_drr( - struct timing_generator *optc, - const struct drr_params *params); - -void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max); - -void optc1_set_static_screen_control( - struct timing_generator *optc, - uint32_t event_triggers, - uint32_t num_frames); - -void optc1_program_stereo(struct timing_generator *optc, - const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); - -bool optc1_is_stereo_left_eye(struct timing_generator *optc); - -void optc1_clear_optc_underflow(struct timing_generator *optc); - -void optc1_tg_init(struct timing_generator *optc); - -bool optc1_is_tg_enabled(struct timing_generator *optc); - -bool optc1_is_optc_underflow_occurred(struct timing_generator *optc); - -void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable); - -void optc1_set_timing_double_buffer(struct timing_generator *optc, bool enable); - -bool optc1_get_otg_active_size(struct timing_generator *optc, - uint32_t *otg_active_width, - uint32_t *otg_active_height); - -void optc1_enable_crtc_reset( - struct timing_generator *optc, - int source_tg_inst, - struct crtc_trigger_info *crtc_tp); - -bool optc1_configure_crc(struct timing_generator *optc, - const struct crc_params *params); - -bool optc1_get_crc(struct timing_generator *optc, - uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); - -bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); - -void optc1_set_vtg_params(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, bool program_fp2); - #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h new file mode 100644 index 000000000000..9a8bf6ec70ea --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +/** + * DOC: overview + * + * Output Pipe Timing Combiner (OPTC) includes two major functional blocks: + * Output Data Mapper (ODM) and Output Timing Generator (OTG). + * + * - ODM: It is Output Data Mapping block. It can combine input data from + * multiple OPP data pipes into one single data stream or split data from one + * OPP data pipe into multiple data streams or just bypass OPP data to DIO. + * - OTG: It is Output Timing Generator. It generates display timing signals to + * drive the display output. + */ + +#ifndef __DC_OPTC_H__ +#define __DC_OPTC_H__ + +#include "timing_generator.h" + +struct optc { + struct timing_generator base; + + const struct dcn_optc_registers *tg_regs; + const struct dcn_optc_shift *tg_shift; + const struct dcn_optc_mask *tg_mask; + + int opp_count; + + uint32_t max_h_total; + uint32_t max_v_total; + + uint32_t min_h_blank; + + uint32_t min_h_sync_width; + uint32_t min_v_sync_width; + uint32_t min_v_blank; + uint32_t min_v_blank_interlace; + + int vstartup_start; + int vupdate_offset; + int vupdate_width; + int vready_offset; + struct dc_crtc_timing orginal_patched_timing; + enum signal_type signal; +}; + +struct dcn_otg_state { + uint32_t v_blank_start; + uint32_t v_blank_end; + uint32_t v_sync_a_pol; + uint32_t v_total; + uint32_t v_total_max; + uint32_t v_total_min; + uint32_t v_total_min_sel; + uint32_t v_total_max_sel; + uint32_t v_sync_a_start; + uint32_t v_sync_a_end; + uint32_t h_blank_start; + uint32_t h_blank_end; + uint32_t h_sync_a_start; + uint32_t h_sync_a_end; + uint32_t h_sync_a_pol; + uint32_t h_total; + uint32_t underflow_occurred_status; + uint32_t otg_enabled; + uint32_t blank_enabled; + uint32_t vertical_interrupt1_en; + uint32_t vertical_interrupt1_line; + uint32_t vertical_interrupt2_en; + uint32_t vertical_interrupt2_line; +}; + +void optc1_read_otg_state(struct optc *optc1, struct dcn_otg_state *s); + +bool optc1_get_hw_timing(struct timing_generator *tg, struct dc_crtc_timing *hw_crtc_timing); + +bool optc1_validate_timing(struct timing_generator *optc, + const struct dc_crtc_timing *timing); + +void optc1_program_timing(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, + bool use_vbios); + +void optc1_setup_vertical_interrupt0(struct timing_generator *optc, + uint32_t start_line, + uint32_t end_line); + +void optc1_setup_vertical_interrupt1(struct timing_generator *optc, + uint32_t start_line); + +void optc1_setup_vertical_interrupt2(struct timing_generator *optc, + uint32_t start_line); + +void optc1_program_global_sync(struct timing_generator *optc, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width); + +bool optc1_disable_crtc(struct timing_generator *optc); + +bool optc1_is_counter_moving(struct timing_generator *optc); + +void optc1_get_position(struct timing_generator *optc, + struct crtc_position *position); + +uint32_t optc1_get_vblank_counter(struct timing_generator *optc); + +void optc1_get_crtc_scanoutpos(struct timing_generator *optc, + uint32_t *v_blank_start, + uint32_t *v_blank_end, + uint32_t *h_position, + uint32_t *v_position); + +void optc1_set_early_control(struct timing_generator *optc, + uint32_t early_cntl); + +void optc1_wait_for_state(struct timing_generator *optc, + enum crtc_state state); + +void optc1_set_blank(struct timing_generator *optc, + bool enable_blanking); + +bool optc1_is_blanked(struct timing_generator *optc); + +void optc1_program_blank_color(struct timing_generator *optc, + const struct tg_color *black_color); + +bool optc1_did_triggered_reset_occur(struct timing_generator *optc); + +void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst); + +void optc1_disable_reset_trigger(struct timing_generator *optc); + +void optc1_lock(struct timing_generator *optc); + +void optc1_unlock(struct timing_generator *optc); + +void optc1_enable_optc_clock(struct timing_generator *optc, bool enable); + +void optc1_set_drr(struct timing_generator *optc, + const struct drr_params *params); + +void optc1_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max); + +void optc1_set_static_screen_control(struct timing_generator *optc, + uint32_t event_triggers, + uint32_t num_frames); + +void optc1_program_stereo(struct timing_generator *optc, + const struct dc_crtc_timing *timing, + struct crtc_stereo_flags *flags); + +bool optc1_is_stereo_left_eye(struct timing_generator *optc); + +void optc1_clear_optc_underflow(struct timing_generator *optc); + +void optc1_tg_init(struct timing_generator *optc); + +bool optc1_is_tg_enabled(struct timing_generator *optc); + +bool optc1_is_optc_underflow_occurred(struct timing_generator *optc); + +void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable); + +void optc1_set_timing_double_buffer(struct timing_generator *optc, bool enable); + +bool optc1_get_otg_active_size(struct timing_generator *optc, + uint32_t *otg_active_width, + uint32_t *otg_active_height); + +void optc1_enable_crtc_reset(struct timing_generator *optc, + int source_tg_inst, + struct crtc_trigger_info *crtc_tp); + +bool optc1_configure_crc(struct timing_generator *optc, const struct crc_params *params); + +bool optc1_get_crc(struct timing_generator *optc, + uint32_t *r_cr, + uint32_t *g_y, + uint32_t *b_cb); + +bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); + +void optc1_set_vtg_params(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + bool program_fp2); + +#endif From 62893e9794c5ba237af93fa1f67cd04ca823405e Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Wed, 25 Oct 2023 01:18:04 -0400 Subject: [PATCH 171/415] drm/amd/display: Remove unused duplicate register definition [Why] DCN32 uses ABM register definitions in dcn32_resource.h, remove duplicate from dce_abm.h to avoid confusion. Reviewed-by: Dillon Varone Acked-by: Hersen Wu Signed-off-by: Joshua Aberback Signed-off-by: Hersen Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_abm.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h index c50aa30614be..051e4c2b4cf2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h @@ -128,21 +128,6 @@ SRI(DC_ABM1_ACE_THRES_12, ABM, id), \ NBIO_SR(BIOS_SCRATCH_2) -#define ABM_DCN32_REG_LIST(id)\ - SRI(DC_ABM1_HG_SAMPLE_RATE, ABM, id), \ - SRI(DC_ABM1_LS_SAMPLE_RATE, ABM, id), \ - SRI(BL1_PWM_BL_UPDATE_SAMPLE_RATE, ABM, id), \ - SRI(DC_ABM1_HG_MISC_CTRL, ABM, id), \ - SRI(DC_ABM1_IPCSC_COEFF_SEL, ABM, id), \ - SRI(BL1_PWM_CURRENT_ABM_LEVEL, ABM, id), \ - SRI(BL1_PWM_TARGET_ABM_LEVEL, ABM, id), \ - SRI(BL1_PWM_USER_LEVEL, ABM, id), \ - SRI(DC_ABM1_LS_MIN_MAX_PIXEL_VALUE_THRES, ABM, id), \ - SRI(DC_ABM1_HGLS_REG_READ_PROGRESS, ABM, id), \ - SRI(DC_ABM1_ACE_OFFSET_SLOPE_0, ABM, id), \ - SRI(DC_ABM1_ACE_THRES_12, ABM, id), \ - NBIO_SR(BIOS_SCRATCH_2) - #define ABM_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix From fecbaa0a79adaa632e406ee5cffe5751e2d44fcb Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Fri, 20 Oct 2023 16:18:40 -0400 Subject: [PATCH 172/415] drm/amd/display: save and restore mall state when applying minimal transition [why] There is a case when we are switching from ODM combine to Subvp where minimal transition based off subvp state is required. In thise case, we need to save and restore mall state when applying minimal transition. Reviewed-by: Dillon Varone Acked-by: Hersen Wu Signed-off-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index de8e5b18a12c..7b9bf5cb4529 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4116,8 +4116,17 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc, bool success = false; struct dc_state *minimal_transition_context; struct pipe_split_policy_backup policy; + struct mall_temp_config mall_temp_config; /* commit based on new context */ + /* Since all phantom pipes are removed in full validation, + * we have to save and restore the subvp/mall config when + * we do a minimal transition since the flags marking the + * pipe as subvp/phantom will be cleared (dc copy constructor + * creates a shallow copy). + */ + if (dc->res_pool->funcs->save_mall_state) + dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config); minimal_transition_context = create_minimal_transition_state(dc, context, &policy); if (minimal_transition_context) { @@ -4126,9 +4135,20 @@ static bool commit_minimal_transition_state_for_windowed_mpo_odm(struct dc *dc, dc->hwss.is_pipe_topology_transition_seamless( dc, minimal_transition_context, context)) { DC_LOG_DC("%s base = new state\n", __func__); + success = dc_commit_state_no_check(dc, minimal_transition_context) == DC_OK; } release_minimal_transition_state(dc, minimal_transition_context, &policy); + if (dc->res_pool->funcs->restore_mall_state) + dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config); + /* If we do a minimal transition with plane removal and the context + * has subvp we also have to retain back the phantom stream / planes + * since the refcount is decremented as part of the min transition + * (we commit a state with no subvp, so the phantom streams / planes + * had to be removed). + */ + if (dc->res_pool->funcs->retain_phantom_pipes) + dc->res_pool->funcs->retain_phantom_pipes(dc, context); } if (!success) { From 5c10147464fafbd3850d1f276a75a8825ecbbc0d Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 29 Oct 2023 21:22:37 -0400 Subject: [PATCH 173/415] drm/amd/display: Promote DAL to 3.2.259 Summary: - Enable DCN35 physymclk root clock gating - Fix DP automation test pattern bug - Disable OTG for mode timing switch on DCN35 - Refactor DML2 - Revert Fix handling duplicate planes on one stream - Revert Enable DCN clock gating - Implement cursor P-State allow for SubVP - Optimize pipe otg allocation - Save and restore mall state while switching from ODM to Subvp Acked-by: Hersen Wu Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index f02159853c46..9316b737a8ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -49,7 +49,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.258" +#define DC_VER "3.2.259" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 90f2f83352f7e85edb38cdb171627ded3d9c7040 Mon Sep 17 00:00:00 2001 From: Chaitanya Dhere Date: Tue, 24 Oct 2023 13:10:12 -0400 Subject: [PATCH 174/415] drm/amd/display: Remove references to unused dml arch version Clean-up the code to remove references of all unused dml architecture versions since only dml2 is actively used. Reviewed-by: Jun Lei Acked-by: Hersen Wu Signed-off-by: Chaitanya Dhere Signed-off-by: Hersen Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml2_dc_resource_mgmt.c | 16 ++-------------- .../amd/display/dc/dml2/dml2_internal_types.h | 1 - 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index d2046e770c50..82a1152e18b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -911,26 +911,14 @@ bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const s unsigned int stream_id; const unsigned int *ODMMode, *DPPPerSurface; - unsigned int odm_mode_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}, dpp_per_surface_array[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}; struct dc_pipe_mapping_scratch scratch; if (ctx->config.map_dc_pipes_with_callbacks) return map_dc_pipes_with_callbacks( ctx, state, disp_cfg, mapping, existing_state); - if (ctx->architecture == dml2_architecture_21) { - /* - * Extract ODM and DPP outputs from DML2.1 and map them in an array as required for pipe mapping in dml2_map_dc_pipes. - * As data cannot be directly extracted in const pointers, assign these arrays to const pointers before proceeding to - * maximize the reuse of existing code. Const pointers are required because dml2.0 dml_display_cfg_st is const. - * - */ - ODMMode = (const unsigned int *)odm_mode_array; - DPPPerSurface = (const unsigned int *)dpp_per_surface_array; - } else { - ODMMode = (unsigned int *)disp_cfg->hw.ODMMode; - DPPPerSurface = disp_cfg->hw.DPPPerSurface; - } + ODMMode = (unsigned int *)disp_cfg->hw.ODMMode; + DPPPerSurface = disp_cfg->hw.DPPPerSurface; for (stream_index = 0; stream_index < state->stream_count; stream_index++) { memset(&scratch, 0, sizeof(struct dc_pipe_mapping_scratch)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h index ed5b767d46e0..df46a866f801 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h @@ -104,7 +104,6 @@ struct dml2_helper_det_policy_scratch { enum dml2_architecture { dml2_architecture_20, - dml2_architecture_21 }; struct dml2_context { From e4c33fff2eae41d16d9760e56efc23dcc30c6b91 Mon Sep 17 00:00:00 2001 From: Daniel Miess Date: Thu, 26 Oct 2023 14:34:14 -0400 Subject: [PATCH 175/415] drm/amd/display: Enable physymclk RCO [Why] Enable the last of the RCO options for dcn35 [How] Breakout RCO from dccg35_set_physymclk so that physymclk RCO can be set in dccg_init without disabling physymclk Reviewed-by: Nicholas Kazlauskas Reviewed-by: Jun Lei Acked-by: Hersen Wu Signed-off-by: Daniel Miess Signed-off-by: Hersen Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c | 72 ++++++++++--------- .../drm/amd/display/dc/dcn35/dcn35_resource.c | 20 +++++- .../drm/amd/display/dc/hwss/dce/dce_hwseq.h | 18 ++++- .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 24 ++++++- drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h | 5 ++ 5 files changed, 102 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c index 277aae811eea..479f3683c0b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dccg.c @@ -325,6 +325,43 @@ static void dccg35_set_dpstreamclk( } } +static void dccg35_set_physymclk_root_clock_gating( + struct dccg *dccg, + int phy_inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (phy_inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + static void dccg35_set_physymclk( struct dccg *dccg, int phy_inst, @@ -340,16 +377,10 @@ static void dccg35_set_physymclk( REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, 1, PHYASYMCLK_SRC_SEL, clk_src); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYASYMCLK_ROOT_GATE_DISABLE, 1); } else { REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, 0, PHYASYMCLK_SRC_SEL, 0); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYASYMCLK_ROOT_GATE_DISABLE, 0); } break; case 1: @@ -357,16 +388,10 @@ static void dccg35_set_physymclk( REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, 1, PHYBSYMCLK_SRC_SEL, clk_src); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYBSYMCLK_ROOT_GATE_DISABLE, 1); } else { REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, 0, PHYBSYMCLK_SRC_SEL, 0); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYBSYMCLK_ROOT_GATE_DISABLE, 0); } break; case 2: @@ -374,16 +399,10 @@ static void dccg35_set_physymclk( REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, 1, PHYCSYMCLK_SRC_SEL, clk_src); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYCSYMCLK_ROOT_GATE_DISABLE, 1); } else { REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, 0, PHYCSYMCLK_SRC_SEL, 0); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYCSYMCLK_ROOT_GATE_DISABLE, 0); } break; case 3: @@ -391,16 +410,10 @@ static void dccg35_set_physymclk( REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, 1, PHYDSYMCLK_SRC_SEL, clk_src); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYDSYMCLK_ROOT_GATE_DISABLE, 1); } else { REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, 0, PHYDSYMCLK_SRC_SEL, 0); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYDSYMCLK_ROOT_GATE_DISABLE, 0); } break; case 4: @@ -408,16 +421,10 @@ static void dccg35_set_physymclk( REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN, 1, PHYESYMCLK_SRC_SEL, clk_src); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYESYMCLK_ROOT_GATE_DISABLE, 1); } else { REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN, 0, PHYESYMCLK_SRC_SEL, 0); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYESYMCLK_ROOT_GATE_DISABLE, 0); } break; default: @@ -490,8 +497,8 @@ void dccg35_init(struct dccg *dccg) if (dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) for (otg_inst = 0; otg_inst < 5; otg_inst++) - dccg35_set_physymclk(dccg, otg_inst, - PHYSYMCLK_FORCE_SRC_SYMCLK, false); + dccg35_set_physymclk_root_clock_gating(dccg, otg_inst, + false); /* dccg35_enable_global_fgcg_rep( dccg, dccg->ctx->dc->debug.enable_fine_grain_clock_gating.bits @@ -756,6 +763,7 @@ static const struct dccg_funcs dccg35_funcs = { .disable_symclk32_le = dccg31_disable_symclk32_le, .set_symclk32_le_root_clock_gating = dccg31_set_symclk32_le_root_clock_gating, .set_physymclk = dccg35_set_physymclk, + .set_physymclk_root_clock_gating = dccg35_set_physymclk_root_clock_gating, .set_dtbclk_dto = dccg35_set_dtbclk_dto, .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto, .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en, diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c index e35d4c028d01..9fb8d2fa5e53 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c @@ -610,7 +610,23 @@ static struct dce_hwseq_registers hwseq_reg; HWS_SF(, DMU_CLK_CNTL, LONO_FGCG_REP_DIS, mask_sh),\ HWS_SF(, DMU_CLK_CNTL, LONO_DISPCLK_GATE_DISABLE, mask_sh),\ HWS_SF(, DMU_CLK_CNTL, LONO_SOCCLK_GATE_DISABLE, mask_sh),\ - HWS_SF(, DMU_CLK_CNTL, LONO_DMCUBCLK_GATE_DISABLE, mask_sh) + HWS_SF(, DMU_CLK_CNTL, LONO_DMCUBCLK_GATE_DISABLE, mask_sh),\ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_FE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKB_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKC_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKD_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, SYMCLKE_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYBSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYCSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYDSYMCLK_ROOT_GATE_DISABLE, mask_sh), \ + HWS_SF(, DCCG_GATE_DISABLE_CNTL2, PHYESYMCLK_ROOT_GATE_DISABLE, mask_sh) static const struct dce_hwseq_shift hwseq_shift = { HWSEQ_DCN35_MASK_SH_LIST(__SHIFT) @@ -725,7 +741,7 @@ static const struct dc_debug_options debug_defaults_drv = { .symclk32_se = true, .symclk32_le = true, .symclk_fe = true, - .physymclk = false, // Prevents eDP light up + .physymclk = true, .dpiasymclk = true, } }, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h index 2fefdf40612d..44b4df6469d1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce/dce_hwseq.h @@ -1183,7 +1183,23 @@ struct dce_hwseq_registers { type LONO_FGCG_REP_DIS;\ type LONO_DISPCLK_GATE_DISABLE;\ type LONO_SOCCLK_GATE_DISABLE;\ - type LONO_DMCUBCLK_GATE_DISABLE; + type LONO_DMCUBCLK_GATE_DISABLE;\ + type SYMCLKA_FE_GATE_DISABLE;\ + type SYMCLKB_FE_GATE_DISABLE;\ + type SYMCLKC_FE_GATE_DISABLE;\ + type SYMCLKD_FE_GATE_DISABLE;\ + type SYMCLKE_FE_GATE_DISABLE;\ + type HDMICHARCLK0_GATE_DISABLE;\ + type SYMCLKA_GATE_DISABLE;\ + type SYMCLKB_GATE_DISABLE;\ + type SYMCLKC_GATE_DISABLE;\ + type SYMCLKD_GATE_DISABLE;\ + type SYMCLKE_GATE_DISABLE;\ + type PHYASYMCLK_ROOT_GATE_DISABLE;\ + type PHYBSYMCLK_ROOT_GATE_DISABLE;\ + type PHYCSYMCLK_ROOT_GATE_DISABLE;\ + type PHYDSYMCLK_ROOT_GATE_DISABLE;\ + type PHYESYMCLK_ROOT_GATE_DISABLE; struct dce_hwseq_shift { HWSEQ_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 0569fa6f7600..5a8258287438 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -146,7 +146,15 @@ void dcn35_init_hw(struct dc *dc) } REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); - REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0x3F000000); + REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); + + /* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */ + REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1, + PHYBSYMCLK_ROOT_GATE_DISABLE, 1, + PHYCSYMCLK_ROOT_GATE_DISABLE, 1, + PHYDSYMCLK_ROOT_GATE_DISABLE, 1, + PHYESYMCLK_ROOT_GATE_DISABLE, 1); + REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0x1f7c3fcf); // Initialize the dccg @@ -275,7 +283,19 @@ void dcn35_init_hw(struct dc *dc) if (!dc->debug.disable_clock_gate) { /* enable all DCN clock gating */ REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); - REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); + + REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, 0, + SYMCLKB_FE_GATE_DISABLE, 0, + SYMCLKC_FE_GATE_DISABLE, 0, + SYMCLKD_FE_GATE_DISABLE, 0, + SYMCLKE_FE_GATE_DISABLE, 0); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, 0); + REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, 0, + SYMCLKB_GATE_DISABLE, 0, + SYMCLKC_GATE_DISABLE, 0, + SYMCLKD_GATE_DISABLE, 0, + SYMCLKE_GATE_DISABLE, 0); + REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index 13f12f2a3f81..ce2f0c0e82bd 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -141,6 +141,11 @@ struct dccg_funcs { enum physymclk_clock_source clk_src, bool force_enable); + void (*set_physymclk_root_clock_gating)( + struct dccg *dccg, + int phy_inst, + bool enable); + void (*set_dtbclk_dto)( struct dccg *dccg, const struct dtbclk_dto_params *params); From ed6e2782e9747508888f671e1101250bb19045be Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 23 Oct 2023 14:33:16 -0400 Subject: [PATCH 176/415] drm/amd/display: For cursor P-State allow for SubVP [Description] - Similar to FPO, SubVP should also force cursor P-State allow instead of relying on natural assertion - Implement code path to force and unforce cursor P-State allow for SubVP Reviewed-by: Samson Tam Acked-by: Hersen Wu Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 1b9f21fd4f17..6a65af8c36b9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -615,12 +615,6 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context) pipe->stream->fpo_in_use)) { if (hubp && hubp->funcs->hubp_update_force_pstate_disallow) hubp->funcs->hubp_update_force_pstate_disallow(hubp, false); - } - - /* Today only FPO uses cursor P-State force. Only clear cursor P-State force - * if it's not FPO. - */ - if (!pipe->stream || !pipe->stream->fpo_in_use) { if (hubp && hubp->funcs->hubp_update_force_cursor_pstate_disallow) hubp->funcs->hubp_update_force_cursor_pstate_disallow(hubp, false); } @@ -632,17 +626,10 @@ void dcn32_update_force_pstate(struct dc *dc, struct dc_state *context) struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; struct hubp *hubp = pipe->plane_res.hubp; - if (pipe->stream && pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + if (pipe->stream && (pipe->stream->mall_stream_config.type == SUBVP_MAIN || + pipe->stream->fpo_in_use)) { if (hubp && hubp->funcs->hubp_update_force_pstate_disallow) hubp->funcs->hubp_update_force_pstate_disallow(hubp, true); - } - - if (pipe->stream && pipe->stream->fpo_in_use) { - if (hubp && hubp->funcs->hubp_update_force_pstate_disallow) - hubp->funcs->hubp_update_force_pstate_disallow(hubp, true); - /* For now only force cursor p-state disallow for FPO - * Needs to be added for subvp once FW side gets updated - */ if (hubp && hubp->funcs->hubp_update_force_cursor_pstate_disallow) hubp->funcs->hubp_update_force_cursor_pstate_disallow(hubp, true); } From 35c1d9664cbfa3a592c208cff86353c7c7689eef Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Wed, 11 Oct 2023 17:12:05 -0400 Subject: [PATCH 177/415] drm/amd/display: Fix handling duplicate planes on one stream [why] DML2 does not handle the case when we have a single stream sourcing 2 or more planes that are duplicates of one another. To properly handle this scenario, pipe index to plane index mapping is used to decide which plane is being processed and programmed. [how] Create static array of pipe index to plane index map. Populate the array properly and use in appropriate places. Reviewed-by: Xi (Alex) Liu Acked-by: Hersen Wu Signed-off-by: Sung Joon Kim Signed-off-by: Hersen Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml2_dc_resource_mgmt.c | 45 ++++++++++------ .../amd/display/dc/dml2/dml2_internal_types.h | 3 ++ .../display/dc/dml2/dml2_translation_helper.c | 54 ++++++++++++++++--- .../display/dc/dml2/dml2_translation_helper.h | 2 +- .../gpu/drm/amd/display/dc/dml2/dml2_utils.c | 18 ++++--- .../drm/amd/display/dc/dml2/dml2_wrapper.c | 2 +- 6 files changed, 90 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index 82a1152e18b5..1a2b24cc6b61 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -55,10 +55,11 @@ struct dc_pipe_mapping_scratch { struct dc_plane_pipe_pool pipe_pool; }; -static bool get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, - unsigned int stream_id, unsigned int *plane_id) +static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane, + unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) { int i, j; + bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; if (!plane_id) return false; @@ -66,7 +67,8 @@ static bool get_plane_id(const struct dc_state *state, const struct dc_plane_sta for (i = 0; i < state->stream_count; i++) { if (state->streams[i]->stream_id == stream_id) { for (j = 0; j < state->stream_status[i].plane_count; j++) { - if (state->stream_status[i].plane_states[j] == plane) { + if (state->stream_status[i].plane_states[j] == plane && + (!is_plane_duplicate || (is_plane_duplicate && (j == plane_index)))) { *plane_id = (i << 16) | j; return true; } @@ -123,8 +125,9 @@ static struct pipe_ctx *find_master_pipe_of_plane(struct dml2_context *ctx, unsigned int plane_id_assigned_to_pipe; for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(state, state->res_ctx.pipe_ctx[i].plane_state, - state->res_ctx.pipe_ctx[i].stream->stream_id, &plane_id_assigned_to_pipe)) { + if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(ctx, state, state->res_ctx.pipe_ctx[i].plane_state, + state->res_ctx.pipe_ctx[i].stream->stream_id, + ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id_assigned_to_pipe)) { if (plane_id_assigned_to_pipe == plane_id) return &state->res_ctx.pipe_ctx[i]; } @@ -141,8 +144,9 @@ static unsigned int find_pipes_assigned_to_plane(struct dml2_context *ctx, unsigned int plane_id_assigned_to_pipe; for (i = 0; i < ctx->config.dcn_pipe_count; i++) { - if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(state, state->res_ctx.pipe_ctx[i].plane_state, - state->res_ctx.pipe_ctx[i].stream->stream_id, &plane_id_assigned_to_pipe)) { + if (state->res_ctx.pipe_ctx[i].plane_state && get_plane_id(ctx, state, state->res_ctx.pipe_ctx[i].plane_state, + state->res_ctx.pipe_ctx[i].stream->stream_id, + ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id_assigned_to_pipe)) { if (plane_id_assigned_to_pipe == plane_id) pipes[num_found++] = i; } @@ -609,6 +613,7 @@ static struct pipe_ctx *assign_pipes_to_plane(struct dml2_context *ctx, struct d const struct dc_plane_state *plane, int odm_factor, int mpc_factor, + int plane_index, struct dc_plane_pipe_pool *pipe_pool, const struct dc_state *existing_state) { @@ -620,7 +625,7 @@ static struct pipe_ctx *assign_pipes_to_plane(struct dml2_context *ctx, struct d unsigned int next_pipe_to_assign; int odm_slice, mpc_slice; - if (!get_plane_id(state, plane, stream->stream_id, &plane_id)) { + if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) { ASSERT(false); return master_pipe; } @@ -667,12 +672,16 @@ static void free_pipe(struct pipe_ctx *pipe) } static void free_unused_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state, - const struct dc_plane_state *plane, const struct dc_plane_pipe_pool *pool, unsigned int stream_id) + const struct dc_plane_state *plane, const struct dc_plane_pipe_pool *pool, unsigned int stream_id, int plane_index) { int i; + bool is_plane_duplicate = ctx->v20.scratch.plane_duplicate_exists; + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { if (state->res_ctx.pipe_ctx[i].plane_state == plane && state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id && + (!is_plane_duplicate || (is_plane_duplicate && + ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[state->res_ctx.pipe_ctx[i].pipe_idx] == plane_index)) && !is_pipe_used(pool, state->res_ctx.pipe_ctx[i].pipe_idx)) { free_pipe(&state->res_ctx.pipe_ctx[i]); } @@ -717,19 +726,20 @@ static void map_pipes_for_stream(struct dml2_context *ctx, struct dc_state *stat } static void map_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state, const struct dc_stream_state *stream, const struct dc_plane_state *plane, - struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state) + int plane_index, struct dc_pipe_mapping_scratch *scratch, const struct dc_state *existing_state) { int odm_slice_index; unsigned int plane_id; struct pipe_ctx *master_pipe = NULL; int i; - if (!get_plane_id(state, plane, stream->stream_id, &plane_id)) { + if (!get_plane_id(ctx, state, plane, stream->stream_id, plane_index, &plane_id)) { ASSERT(false); return; } - master_pipe = assign_pipes_to_plane(ctx, state, stream, plane, scratch->odm_info.odm_factor, scratch->mpc_info.mpc_factor, &scratch->pipe_pool, existing_state); + master_pipe = assign_pipes_to_plane(ctx, state, stream, plane, scratch->odm_info.odm_factor, + scratch->mpc_info.mpc_factor, plane_index, &scratch->pipe_pool, existing_state); sort_pipes_for_splitting(&scratch->pipe_pool); for (odm_slice_index = 0; odm_slice_index < scratch->odm_info.odm_factor; odm_slice_index++) { @@ -755,7 +765,7 @@ static void map_pipes_for_plane(struct dml2_context *ctx, struct dc_state *state } } - free_unused_pipes_for_plane(ctx, state, plane, &scratch->pipe_pool, stream->stream_id); + free_unused_pipes_for_plane(ctx, state, plane, &scratch->pipe_pool, stream->stream_id, plane_index); } static unsigned int get_mpc_factor(struct dml2_context *ctx, @@ -768,7 +778,7 @@ static unsigned int get_mpc_factor(struct dml2_context *ctx, unsigned int plane_id; unsigned int cfg_idx; - get_plane_id(state, status->plane_states[plane_idx], stream_id, &plane_id); + get_plane_id(ctx, state, status->plane_states[plane_idx], stream_id, plane_idx, &plane_id); cfg_idx = find_disp_cfg_idx_by_plane_id(mapping, plane_id); if (ctx->architecture == dml2_architecture_20) return (unsigned int)disp_cfg->hw.DPPPerSurface[cfg_idx]; @@ -946,8 +956,8 @@ bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const s for (plane_index = 0; plane_index < state->stream_status[stream_index].plane_count; plane_index++) { // Planes are ordered top to bottom. - if (get_plane_id(state, state->stream_status[stream_index].plane_states[plane_index], - stream_id, &plane_id)) { + if (get_plane_id(ctx, state, state->stream_status[stream_index].plane_states[plane_index], + stream_id, plane_index, &plane_id)) { plane_disp_cfg_index = find_disp_cfg_idx_by_plane_id(mapping, plane_id); // Setup mpc_info for this plane @@ -971,7 +981,8 @@ bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const s // Clear the pool assignment scratch (which is per plane) memset(&scratch.pipe_pool, 0, sizeof(struct dc_plane_pipe_pool)); - map_pipes_for_plane(ctx, state, state->streams[stream_index], state->stream_status[stream_index].plane_states[plane_index], &scratch, existing_state); + map_pipes_for_plane(ctx, state, state->streams[stream_index], + state->stream_status[stream_index].plane_states[plane_index], plane_index, &scratch, existing_state); } else { // Plane ID cannot be generated, therefore no DML mapping can be performed. ASSERT(false); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h index df46a866f801..1cf8a884c0fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h @@ -75,6 +75,8 @@ struct dml2_dml_to_dc_pipe_mapping { bool dml_pipe_idx_to_stream_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; unsigned int dml_pipe_idx_to_plane_id[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; bool dml_pipe_idx_to_plane_id_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + unsigned int dml_pipe_idx_to_plane_index[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; + bool dml_pipe_idx_to_plane_index_valid[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; }; struct dml2_wrapper_scratch { @@ -96,6 +98,7 @@ struct dml2_wrapper_scratch { struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; bool enable_flexible_pipe_mapping; + bool plane_duplicate_exists; }; struct dml2_helper_det_policy_scratch { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 159a0956893f..75171bee6f71 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -931,10 +931,11 @@ static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml return location; } -static bool get_plane_id(const struct dc_state *context, const struct dc_plane_state *plane, - unsigned int stream_id, unsigned int *plane_id) +static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *context, const struct dc_plane_state *plane, + unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) { int i, j; + bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; if (!plane_id) return false; @@ -942,7 +943,8 @@ static bool get_plane_id(const struct dc_state *context, const struct dc_plane_s for (i = 0; i < context->stream_count; i++) { if (context->streams[i]->stream_id == stream_id) { for (j = 0; j < context->stream_status[i].plane_count; j++) { - if (context->stream_status[i].plane_states[j] == plane) { + if (context->stream_status[i].plane_states[j] == plane && + (!is_plane_duplicate || (is_plane_duplicate && (j == plane_index)))) { *plane_id = (i << 16) | j; return true; } @@ -954,13 +956,13 @@ static bool get_plane_id(const struct dc_state *context, const struct dc_plane_s } static unsigned int map_plane_to_dml_display_cfg(const struct dml2_context *dml2, const struct dc_plane_state *plane, - const struct dc_state *context, const struct dml_display_cfg_st *dml_dispcfg, unsigned int stream_id) + const struct dc_state *context, const struct dml_display_cfg_st *dml_dispcfg, unsigned int stream_id, int plane_index) { unsigned int plane_id; int i = 0; int location = -1; - if (!get_plane_id(context, plane, stream_id, &plane_id)) { + if (!get_plane_id(context->bw_ctx.dml2, context, plane, stream_id, plane_index, &plane_id)) { ASSERT(false); return -1; } @@ -991,7 +993,41 @@ static void apply_legacy_svp_drr_settings(struct dml2_context *dml2, const struc } } -void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg) +static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, struct dc_state *state) +{ + unsigned int i; + unsigned int pipe_index = 0; + unsigned int plane_index = 0; + struct dml2_dml_to_dc_pipe_mapping *dml_to_dc_pipe_mapping = &dml2->v20.scratch.dml_to_dc_pipe_mapping; + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[i] = false; + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[i] = 0; + } + + for (i = 0; i < __DML2_WRAPPER_MAX_STREAMS_PLANES__; i++) { + struct pipe_ctx *pipe = &state->res_ctx.pipe_ctx[i]; + + if (!pipe || !pipe->stream || !pipe->plane_state) + continue; + + while (pipe) { + pipe_index = pipe->pipe_idx; + + if (pipe->stream && dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] == false) { + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index[pipe_index] = plane_index; + plane_index++; + dml_to_dc_pipe_mapping->dml_pipe_idx_to_plane_index_valid[pipe_index] = true; + } + + pipe = pipe->bottom_pipe; + } + + plane_index = 0; + } +} + +void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg) { int i = 0, j = 0; int disp_cfg_stream_location, disp_cfg_plane_location; @@ -1008,6 +1044,8 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct d dml_dispcfg->plane.GPUVMMaxPageTableLevels = 4; dml_dispcfg->plane.HostVMEnable = false; + dml2_populate_pipe_to_plane_index_mapping(dml2, context); + for (i = 0; i < context->stream_count; i++) { disp_cfg_stream_location = map_stream_to_dml_display_cfg(dml2, context->streams[i], dml_dispcfg); @@ -1044,7 +1082,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct d } else { for (j = 0; j < context->stream_status[i].plane_count; j++) { disp_cfg_plane_location = map_plane_to_dml_display_cfg(dml2, - context->stream_status[i].plane_states[j], context, dml_dispcfg, context->streams[i]->stream_id); + context->stream_status[i].plane_states[j], context, dml_dispcfg, context->streams[i]->stream_id, j); if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_surfaces++; @@ -1068,7 +1106,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct d dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location; - if (get_plane_id(context, context->stream_status[i].plane_states[j], context->streams[i]->stream_id, + if (get_plane_id(dml2, context, context->stream_status[i].plane_states[j], context->streams[i]->stream_id, j, &dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h index dac6d27b14cd..d764773938f4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h @@ -34,7 +34,7 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, void dml2_translate_ip_params(const struct dc *in_dc, struct ip_params_st *out); void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box_st *out); void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states); -void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, const struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); +void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out); bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 69fd96f4f3b0..2498b8341199 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -209,10 +209,11 @@ static int find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int return -1; } -static bool get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, - unsigned int stream_id, unsigned int *plane_id) +static bool get_plane_id(struct dml2_context *dml2, const struct dc_state *state, const struct dc_plane_state *plane, + unsigned int stream_id, unsigned int plane_index, unsigned int *plane_id) { int i, j; + bool is_plane_duplicate = dml2->v20.scratch.plane_duplicate_exists; if (!plane_id) return false; @@ -220,7 +221,8 @@ static bool get_plane_id(const struct dc_state *state, const struct dc_plane_sta for (i = 0; i < state->stream_count; i++) { if (state->streams[i]->stream_id == stream_id) { for (j = 0; j < state->stream_status[i].plane_count; j++) { - if (state->stream_status[i].plane_states[j] == plane) { + if (state->stream_status[i].plane_states[j] == plane && + (!is_plane_duplicate || (is_plane_duplicate && (j == plane_index)))) { *plane_id = (i << 16) | j; return true; } @@ -304,8 +306,9 @@ void dml2_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *cont * there is a need to know which DML pipe index maps to which DC pipe. The code below * finds a dml_pipe_index from the plane id if a plane is valid. If a plane is not valid then * it finds a dml_pipe_index from the stream id. */ - if (get_plane_id(context, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state, - context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id, &plane_id)) { + if (get_plane_id(in_ctx, context, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].plane_state, + context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id, + in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[context->res_ctx.pipe_ctx[dc_pipe_ctx_index].pipe_idx], &plane_id)) { dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id); } else { dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, context->res_ctx.pipe_ctx[dc_pipe_ctx_index].stream->stream_id); @@ -445,8 +448,9 @@ bool dml2_verify_det_buffer_configuration(struct dml2_context *in_ctx, struct dc for (i = 0; i < MAX_PIPES; i++) { if (!display_state->res_ctx.pipe_ctx[i].stream) continue; - if (get_plane_id(display_state, display_state->res_ctx.pipe_ctx[i].plane_state, - display_state->res_ctx.pipe_ctx[i].stream->stream_id, &plane_id)) + if (get_plane_id(in_ctx, display_state, display_state->res_ctx.pipe_ctx[i].plane_state, + display_state->res_ctx.pipe_ctx[i].stream->stream_id, + in_ctx->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_index[display_state->res_ctx.pipe_ctx[i].pipe_idx], &plane_id)) dml_pipe_idx = find_dml_pipe_idx_by_plane_id(in_ctx, plane_id); else dml_pipe_idx = dml2_helper_find_dml_pipe_idx_by_stream_id(in_ctx, display_state->res_ctx.pipe_ctx[i].stream->stream_id); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 0a06bf3b135a..8f231418870f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -639,7 +639,7 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s return result; } -static bool dml2_validate_only(const struct dc_state *context) +static bool dml2_validate_only(struct dc_state *context) { struct dml2_context *dml2 = context->bw_ctx.dml2; unsigned int result = 0; From d736c2e0744807e4cb12e84b179896c995a096f9 Mon Sep 17 00:00:00 2001 From: George Shen Date: Tue, 24 Oct 2023 21:47:57 -0400 Subject: [PATCH 178/415] drm/amd/display: Set stream's DP test pattern upon test request [Why] A recent refactor of DC's DP test pattern automation code requires the DC stream's test pattern and test pattern color space fields to be correctly populated before calling dc_link_dp_set_test_pattern. [How] Populate stream's test pattern type and color space fields before calling into DC to program DP test pattern. Reviewed-by: Aurabindo Pillai Acked-by: Hersen Wu Signed-off-by: George Shen Signed-off-by: Hersen Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 9cd83b780102..ed784cf27d39 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1216,6 +1216,9 @@ bool dm_helpers_dp_handle_test_pattern_request( } + pipe_ctx->stream->test_pattern.type = test_pattern; + pipe_ctx->stream->test_pattern.color_space = test_pattern_color_space; + dc_link_dp_set_test_pattern( (struct dc_link *) link, test_pattern, From 9c561ca2d3ca99606034880f62791e866af35ef9 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Thu, 26 Oct 2023 17:08:49 +0800 Subject: [PATCH 179/415] drm/amdgpu/soc21: add mode2 asic reset for SMU IP v14.0.0 Set the default reset method to mode2 for SMU IP v14.0.0 Signed-off-by: Jiadong Zhu Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index d5083c549330..48c6efcdeac9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -381,6 +381,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): case IP_VERSION(13, 0, 11): + case IP_VERSION(14, 0, 0): return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) From 0553eb9f33aa1a89a788682c78bd9747d41e65cb Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 6 Nov 2023 09:06:58 +0530 Subject: [PATCH 180/415] drm/amdgpu: Fix sdma 4.4.2 doorbell rptr/wptr init Doorbell rptr/wptr can be set through multiple ways including direct register initialization. Disable doorbell during hw_fini once the ring is disabled so that during next module reload direct initialization takes effect. Also, move the direct initialization after minor update is set to 1 since rptr/wptr are reinitialized back to 0 which could be lower than the previous doorbell value (ex: cases like module reload). Signed-off-by: Lijo Lazar Reviewed-by: Le Ma Reviewed-by: Asad Kamal Tested-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 25 ++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index c46bc6aa4f48..bd65a62f8903 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -427,6 +427,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask) { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; + u32 doorbell_offset, doorbell; u32 rb_cntl, ib_cntl; int i, unset = 0; @@ -444,6 +445,18 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); + + if (sdma[i]->use_doorbell) { + doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); + doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET); + + doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE, 0); + doorbell_offset = REG_SET_FIELD(doorbell_offset, + SDMA_GFX_DOORBELL_OFFSET, + OFFSET, 0); + WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell); + WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset); + } } } @@ -631,12 +644,6 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl); WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl); - /* Initialize the ring buffer's read and write pointers */ - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); - /* set the wb address whether it's enabled or not */ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_HI, upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); @@ -654,6 +661,12 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1); + /* Initialize the ring buffer's read and write pointers */ + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); + doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET); From 8cfd6a05750cd7aa84e7f1e5933fa7781006bfc3 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 31 Oct 2023 12:32:03 +0530 Subject: [PATCH 181/415] drm/amd/pm: Hide irrelevant pm device attributes Change return code to EOPNOTSUPP for unsupported functions. Use the error code information to hide sysfs nodes not valid for the SOC. Signed-off-by: Lijo Lazar Reviewed-by: Yang Wang Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 12 ++++++------ drivers/gpu/drm/amd/pm/amdgpu_pm.c | 12 ++++++++++++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 4 ++-- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index acf3527fff2d..08cb79401410 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -491,7 +491,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, uint32_t *limit) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (pp_funcs && pp_funcs->get_apu_thermal_limit) { mutex_lock(&adev->pm.mutex); @@ -505,7 +505,7 @@ int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, uint32_t *limit int amdgpu_dpm_set_apu_thermal_limit(struct amdgpu_device *adev, uint32_t limit) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - int ret = -EINVAL; + int ret = -EOPNOTSUPP; if (pp_funcs && pp_funcs->set_apu_thermal_limit) { mutex_lock(&adev->pm.mutex); @@ -1182,7 +1182,7 @@ int amdgpu_dpm_get_sclk_od(struct amdgpu_device *adev) int ret = 0; if (!pp_funcs->get_sclk_od) - return 0; + return -EOPNOTSUPP; mutex_lock(&adev->pm.mutex); ret = pp_funcs->get_sclk_od(adev->powerplay.pp_handle); @@ -1196,7 +1196,7 @@ int amdgpu_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value) const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; if (is_support_sw_smu(adev)) - return 0; + return -EOPNOTSUPP; mutex_lock(&adev->pm.mutex); if (pp_funcs->set_sclk_od) @@ -1219,7 +1219,7 @@ int amdgpu_dpm_get_mclk_od(struct amdgpu_device *adev) int ret = 0; if (!pp_funcs->get_mclk_od) - return 0; + return -EOPNOTSUPP; mutex_lock(&adev->pm.mutex); ret = pp_funcs->get_mclk_od(adev->powerplay.pp_handle); @@ -1233,7 +1233,7 @@ int amdgpu_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value) const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; if (is_support_sw_smu(adev)) - return 0; + return -EOPNOTSUPP; mutex_lock(&adev->pm.mutex); if (pp_funcs->set_mclk_od) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 4ba9195c83c5..1f6d2dbcca4a 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2197,6 +2197,18 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } else if (DEVICE_ATTR_IS(xgmi_plpd_policy)) { if (amdgpu_dpm_get_xgmi_plpd_mode(adev, NULL) == XGMI_PLPD_NONE) *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_mclk_od)) { + if (amdgpu_dpm_get_mclk_od(adev) == -EOPNOTSUPP) + *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_sclk_od)) { + if (amdgpu_dpm_get_sclk_od(adev) == -EOPNOTSUPP) + *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(apu_thermal_cap)) { + u32 limit; + + if (amdgpu_dpm_get_apu_thermal_limit(adev, &limit) == + -EOPNOTSUPP) + *states = ATTR_STATE_UNSUPPORTED; } switch (gc_ver) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 23b00eddc1af..d86750df5940 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2747,7 +2747,7 @@ static int smu_read_sensor(void *handle, static int smu_get_apu_thermal_limit(void *handle, uint32_t *limit) { - int ret = -EINVAL; + int ret = -EOPNOTSUPP; struct smu_context *smu = handle; if (smu->ppt_funcs && smu->ppt_funcs->get_apu_thermal_limit) @@ -2758,7 +2758,7 @@ static int smu_get_apu_thermal_limit(void *handle, uint32_t *limit) static int smu_set_apu_thermal_limit(void *handle, uint32_t limit) { - int ret = -EINVAL; + int ret = -EOPNOTSUPP; struct smu_context *smu = handle; if (smu->ppt_funcs && smu->ppt_funcs->set_apu_thermal_limit) From c68b4550b6b432cbb05ad30f67178d2d3845d919 Mon Sep 17 00:00:00 2001 From: Bragatheswaran Manickavel Date: Tue, 24 Oct 2023 23:41:34 +0530 Subject: [PATCH 182/415] drm/amd/display: avoid variable reinitialization The member variable enable_hpo_pg_support is already initialized and hence the reinitialization instruction can be removed. Issue identified using the doubleinit.cocci Coccinelle semantic patch script. Signed-off-by: Bragatheswaran Manickavel Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c index 9fb8d2fa5e53..c7e011d26d41 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c @@ -757,7 +757,6 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_boot_optimizations = false, .disable_unbounded_requesting = false, .disable_mem_low_power = false, - .enable_hpo_pg_support = false, //must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions .enable_double_buffered_dsc_pg_support = true, .enable_dp_dig_pixel_rate_div_policy = 1, From d78fa1c309327cee1cfb7c608ec59f5a60ab94bd Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 19 Oct 2023 10:24:11 +0800 Subject: [PATCH 183/415] drm/amd/pm: not stop rlc for IMU enabled APUs when suspend For IMU enabled APUs, after sending the PrepareMp1ForUnload message to SMU in system_features_control, the RLC registers can't be touched. The driver to stop the rlc in suspending is no longer required. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index d86750df5940..1ead323f1c78 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1711,6 +1711,7 @@ static int smu_disable_dpms(struct smu_context *smu) } if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2) && + !((adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs) && !amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs->stop) adev->gfx.rlc.funcs->stop(adev); From 61fe5536d06cf485d387c894d2083de883c81ad7 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 31 Oct 2023 10:39:51 +0800 Subject: [PATCH 184/415] drm/amdgpu: handle extra UE register entries for gfx v9_4_3 The UE registe list is larger than CE list. Reported-by: yipeng.chai@amd.com Signed-off-by: Tao Zhou Reviewed-by: Stanley.Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 38 +++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 41bbabd9ad4d..046ae95b366a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3799,6 +3799,27 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, } } + /* handle extra register entries of UE */ + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 || + gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); + + amdgpu_ras_inst_query_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size, + GET_INST(GC, xcc_id), + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + &ue_count); + } + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); @@ -3838,6 +3859,23 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, } } + /* handle extra register entries of UE */ + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 || + gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); + + amdgpu_ras_inst_reset_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + GET_INST(GC, xcc_id)); + } + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); From 20238a2cc9a6a926f9f47ae4ae9edd1bc98f278c Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Thu, 19 Oct 2023 16:01:07 +0800 Subject: [PATCH 185/415] drm/amdgpu: add RAS reset/query operations for XGMI v6_4 Reset/query RAS error status and count. v2: use XGMI IP version instead of WAFL version. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 46 ++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 9d5d742ee9d3..713e17cca071 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -103,6 +103,16 @@ static const int walf_pcs_err_noncorrectable_mask_reg_aldebaran[] = { smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000 }; +static const int xgmi3x16_pcs_err_status_reg_v6_4[] = { + smnPCS_XGMI3X16_PCS_ERROR_STATUS, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000 +}; + +static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[] = { + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK, + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000 +}; + static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = { {"XGMI PCS DataLossErr", SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)}, @@ -952,6 +962,16 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) default: break; } + + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) + pcs_clear_status(adev, + xgmi3x16_pcs_err_status_reg_v6_4[i]); + break; + default: + break; + } } static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, @@ -969,7 +989,9 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, if (is_xgmi_pcs) { if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == - IP_VERSION(6, 1, 0)) { + IP_VERSION(6, 1, 0) || + amdgpu_ip_version(adev, XGMI_HWIP, 0) == + IP_VERSION(6, 4, 0)) { pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0]; field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields); } else { @@ -1007,7 +1029,7 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - int i; + int i, supported = 1; uint32_t data, mask_data = 0; uint32_t ue_cnt = 0, ce_cnt = 0; @@ -1071,7 +1093,25 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, } break; default: - dev_warn(adev->dev, "XGMI RAS error query not supported"); + supported = 0; + break; + } + + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + /* check xgmi3x16 pcs error */ + for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) { + data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_v6_4[i]); + mask_data = + RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[i]); + if (data) + amdgpu_xgmi_query_pcs_error_status(adev, data, + mask_data, &ue_cnt, &ce_cnt, true, true); + } + break; + default: + if (!supported) + dev_warn(adev->dev, "XGMI RAS error query not supported"); break; } From 946bb33d330251966223f770f64885c79448b1a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 28 Oct 2023 17:51:01 +0200 Subject: [PATCH 186/415] riscv: split cache ops out of dma-noncoherent.c The cache ops are also used by the pmem code which is unconditionally built into the kernel. Move them into a separate file that is built based on the correct config option. Fixes: fd962781270e ("riscv: RISCV_NONSTANDARD_CACHE_OPS shouldn't depend on RISCV_DMA_NONCOHERENT") Reported-by: kernel test robot Signed-off-by: Christoph Hellwig Reviewed-by: Conor Dooley Tested-by: Conor Dooley Reviewed-by: Lad Prabhakar Tested-by: Lad Prabhakar # Link: https://lore.kernel.org/r/20231028155101.1039049-1-hch@lst.de Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/Makefile | 1 + arch/riscv/mm/cache-ops.c | 17 +++++++++++++++++ arch/riscv/mm/dma-noncoherent.c | 15 --------------- 3 files changed, 18 insertions(+), 15 deletions(-) create mode 100644 arch/riscv/mm/cache-ops.c diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 9c454f90fd3d..3a4dfc8babcf 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -36,3 +36,4 @@ endif obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_RISCV_DMA_NONCOHERENT) += dma-noncoherent.o +obj-$(CONFIG_RISCV_NONSTANDARD_CACHE_OPS) += cache-ops.o diff --git a/arch/riscv/mm/cache-ops.c b/arch/riscv/mm/cache-ops.c new file mode 100644 index 000000000000..a993ad11d0ec --- /dev/null +++ b/arch/riscv/mm/cache-ops.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + */ + +#include + +struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init; + +void +riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops) +{ + if (!ops) + return; + noncoherent_cache_ops = *ops; +} +EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops); diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index 607d5f47d437..4e4e469b8dd6 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -15,12 +15,6 @@ static bool noncoherent_supported __ro_after_init; int dma_cache_alignment __ro_after_init = ARCH_DMA_MINALIGN; EXPORT_SYMBOL_GPL(dma_cache_alignment); -struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init = { - .wback = NULL, - .inv = NULL, - .wback_inv = NULL, -}; - static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size) { void *vaddr = phys_to_virt(paddr); @@ -162,12 +156,3 @@ void __init riscv_set_dma_cache_alignment(void) if (!noncoherent_supported) dma_cache_alignment = 1; } - -void riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops) -{ - if (!ops) - return; - - noncoherent_cache_ops = *ops; -} -EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops); From 6affe08aea5f3b630565676e227b41d55a6f009c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 26 Oct 2023 15:08:03 +0200 Subject: [PATCH 187/415] nvme: common: make keyring and auth separate modules When only the keyring module is included but auth is not, modpost complains about the lack of a module license tag: ERROR: modpost: missing MODULE_LICENSE() in drivers/nvme/common/nvme-common.o Address this by making both modules buildable standalone, removing the now unnecessary CONFIG_NVME_COMMON symbol in the process. Also, now that NVME_KEYRING config symbol can be either a module or built-in, the stubs need to check for '#if IS_ENABLED' rather than a simple '#ifdef'. Fixes: 9d77eb5277849 ("nvme-keyring: register '.nvme' keyring") Signed-off-by: Arnd Bergmann Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/Makefile | 2 +- drivers/nvme/common/Kconfig | 7 ++----- drivers/nvme/common/Makefile | 7 ++++--- drivers/nvme/common/keyring.c | 2 ++ drivers/nvme/host/Kconfig | 2 -- drivers/nvme/target/Kconfig | 2 -- include/linux/nvme-keyring.h | 2 +- 7 files changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/Makefile b/drivers/nvme/Makefile index eedca8c72098..74f59ceed3d5 100644 --- a/drivers/nvme/Makefile +++ b/drivers/nvme/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_NVME_COMMON) += common/ +obj-y += common/ obj-y += host/ obj-y += target/ diff --git a/drivers/nvme/common/Kconfig b/drivers/nvme/common/Kconfig index 06c8df00d1e2..244432e0b73d 100644 --- a/drivers/nvme/common/Kconfig +++ b/drivers/nvme/common/Kconfig @@ -1,14 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only -config NVME_COMMON - tristate - config NVME_KEYRING - bool + tristate select KEYS config NVME_AUTH - bool + tristate select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA256 diff --git a/drivers/nvme/common/Makefile b/drivers/nvme/common/Makefile index 0cbd0b0b8d49..681514cf2e2f 100644 --- a/drivers/nvme/common/Makefile +++ b/drivers/nvme/common/Makefile @@ -2,7 +2,8 @@ ccflags-y += -I$(src) -obj-$(CONFIG_NVME_COMMON) += nvme-common.o +obj-$(CONFIG_NVME_AUTH) += nvme-auth.o +obj-$(CONFIG_NVME_KEYRING) += nvme-keyring.o -nvme-common-$(CONFIG_NVME_AUTH) += auth.o -nvme-common-$(CONFIG_NVME_KEYRING) += keyring.o +nvme-auth-y += auth.o +nvme-keyring-y += keyring.o diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index f8d9a208397b..46d7a537dbc2 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -180,3 +180,5 @@ void nvme_keyring_exit(void) key_put(nvme_keyring); } EXPORT_SYMBOL_GPL(nvme_keyring_exit); + +MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 48f7d72de5e9..8fe2dd619e80 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -95,7 +95,6 @@ config NVME_TCP config NVME_TCP_TLS bool "NVMe over Fabrics TCP TLS encryption support" depends on NVME_TCP - select NVME_COMMON select NVME_KEYRING select NET_HANDSHAKE select KEYS @@ -110,7 +109,6 @@ config NVME_TCP_TLS config NVME_HOST_AUTH bool "NVM Express over Fabrics In-Band Authentication" depends on NVME_CORE - select NVME_COMMON select NVME_AUTH help This provides support for NVMe over Fabrics In-Band Authentication. diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index fa479c9f5c3d..31633da9427c 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -87,7 +87,6 @@ config NVME_TARGET_TCP config NVME_TARGET_TCP_TLS bool "NVMe over Fabrics TCP target TLS encryption support" depends on NVME_TARGET_TCP - select NVME_COMMON select NVME_KEYRING select NET_HANDSHAKE select KEYS @@ -102,7 +101,6 @@ config NVME_TARGET_TCP_TLS config NVME_TARGET_AUTH bool "NVMe over Fabrics In-band Authentication support" depends on NVME_TARGET - select NVME_COMMON select NVME_AUTH help This enables support for NVMe over Fabrics In-band Authentication diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h index 4efea9dd967c..6cc0696625f3 100644 --- a/include/linux/nvme-keyring.h +++ b/include/linux/nvme-keyring.h @@ -6,7 +6,7 @@ #ifndef _NVME_KEYRING_H #define _NVME_KEYRING_H -#ifdef CONFIG_NVME_KEYRING +#if IS_ENABLED(CONFIG_NVME_KEYRING) key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn); From 166b0110d1ee53290bd11618df6e3991c117495a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 7 Nov 2023 14:33:32 +0100 Subject: [PATCH 188/415] parisc/pgtable: Do not drop upper 5 address bits of physical address When calculating the pfn for the iitlbt/idtlbt instruction, do not drop the upper 5 address bits. This doesn't seem to have an effect on physical hardware which uses less physical address bits, but in qemu the missing bits are visible. Signed-off-by: Helge Deller Cc: --- arch/parisc/kernel/entry.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index cab1ec23e0d7..ab23e61a6f01 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -475,13 +475,13 @@ * to a CPU TLB 4k PFN (4k => 12 bits to shift) */ #define PAGE_ADD_SHIFT (PAGE_SHIFT-12) #define PAGE_ADD_HUGE_SHIFT (REAL_HPAGE_SHIFT-12) + #define PFN_START_BIT (63-ASM_PFN_PTE_SHIFT+(63-58)-PAGE_ADD_SHIFT) /* Drop prot bits and convert to page addr for iitlbt and idtlbt */ .macro convert_for_tlb_insert20 pte,tmp #ifdef CONFIG_HUGETLB_PAGE copy \pte,\tmp - extrd,u \tmp,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\ - 64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte + extrd,u \tmp,PFN_START_BIT,PFN_START_BIT+1,\pte depdi _PAGE_SIZE_ENCODING_DEFAULT,63,\ (63-58)+PAGE_ADD_SHIFT,\pte @@ -489,8 +489,7 @@ depdi _HUGE_PAGE_SIZE_ENCODING_DEFAULT,63,\ (63-58)+PAGE_ADD_HUGE_SHIFT,\pte #else /* Huge pages disabled */ - extrd,u \pte,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\ - 64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte + extrd,u \pte,PFN_START_BIT,PFN_START_BIT+1,\pte depdi _PAGE_SIZE_ENCODING_DEFAULT,63,\ (63-58)+PAGE_ADD_SHIFT,\pte #endif From 8cbe0accc4a6ba7ed34812a1c7e1ba67e7f7b2a4 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Wed, 1 Nov 2023 11:32:59 -0700 Subject: [PATCH 189/415] riscv: Avoid unaligned access when relocating modules With the C-extension regular 32bit instructions are not necessarily aligned on 4-byte boundaries. RISC-V instructions are in fact an ordered list of 16bit little-endian "parcels", so access the instruction as such. This should also make the code work in case someone builds a big-endian RISC-V machine. Signed-off-by: Emil Renner Berthing Signed-off-by: Charlie Jenkins Link: https://lore.kernel.org/r/20231101-module_relocations-v9-1-8dfa3483c400@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 157 +++++++++++++++++++------------------ 1 file changed, 81 insertions(+), 76 deletions(-) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 7c651d55fcbd..8286d3bb04f4 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -27,68 +27,90 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) #endif } -static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) +static int riscv_insn_rmw(void *location, u32 keep, u32 set) +{ + u16 *parcel = location; + u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; + + insn &= keep; + insn |= set; + + parcel[0] = cpu_to_le16(insn); + parcel[1] = cpu_to_le16(insn >> 16); + return 0; +} + +static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set) +{ + u16 *parcel = location; + u16 insn = le16_to_cpu(*parcel); + + insn &= keep; + insn |= set; + + *parcel = cpu_to_le16(insn); + return 0; +} + +static int apply_r_riscv_32_rela(struct module *me, void *location, Elf_Addr v) { if (v != (u32)v) { pr_err("%s: value %016llx out of range for 32-bit field\n", me->name, (long long)v); return -EINVAL; } - *location = v; + *(u32 *)location = v; return 0; } -static int apply_r_riscv_64_rela(struct module *me, u32 *location, Elf_Addr v) +static int apply_r_riscv_64_rela(struct module *me, void *location, Elf_Addr v) { *(u64 *)location = v; return 0; } -static int apply_r_riscv_branch_rela(struct module *me, u32 *location, +static int apply_r_riscv_branch_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u32 imm12 = (offset & 0x1000) << (31 - 12); u32 imm11 = (offset & 0x800) >> (11 - 7); u32 imm10_5 = (offset & 0x7e0) << (30 - 10); u32 imm4_1 = (offset & 0x1e) << (11 - 4); - *location = (*location & 0x1fff07f) | imm12 | imm11 | imm10_5 | imm4_1; - return 0; + return riscv_insn_rmw(location, 0x1fff07f, imm12 | imm11 | imm10_5 | imm4_1); } -static int apply_r_riscv_jal_rela(struct module *me, u32 *location, +static int apply_r_riscv_jal_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u32 imm20 = (offset & 0x100000) << (31 - 20); u32 imm19_12 = (offset & 0xff000); u32 imm11 = (offset & 0x800) << (20 - 11); u32 imm10_1 = (offset & 0x7fe) << (30 - 10); - *location = (*location & 0xfff) | imm20 | imm19_12 | imm11 | imm10_1; - return 0; + return riscv_insn_rmw(location, 0xfff, imm20 | imm19_12 | imm11 | imm10_1); } -static int apply_r_riscv_rvc_branch_rela(struct module *me, u32 *location, +static int apply_r_riscv_rvc_branch_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u16 imm8 = (offset & 0x100) << (12 - 8); u16 imm7_6 = (offset & 0xc0) >> (6 - 5); u16 imm5 = (offset & 0x20) >> (5 - 2); u16 imm4_3 = (offset & 0x18) << (12 - 5); u16 imm2_1 = (offset & 0x6) << (12 - 10); - *(u16 *)location = (*(u16 *)location & 0xe383) | - imm8 | imm7_6 | imm5 | imm4_3 | imm2_1; - return 0; + return riscv_insn_rvc_rmw(location, 0xe383, + imm8 | imm7_6 | imm5 | imm4_3 | imm2_1); } -static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location, +static int apply_r_riscv_rvc_jump_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u16 imm11 = (offset & 0x800) << (12 - 11); u16 imm10 = (offset & 0x400) >> (10 - 8); u16 imm9_8 = (offset & 0x300) << (12 - 11); @@ -98,16 +120,14 @@ static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location, u16 imm4 = (offset & 0x10) << (12 - 5); u16 imm3_1 = (offset & 0xe) << (12 - 10); - *(u16 *)location = (*(u16 *)location & 0xe003) | - imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1; - return 0; + return riscv_insn_rvc_rmw(location, 0xe003, + imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1); } -static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, +static int apply_r_riscv_pcrel_hi20_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; - s32 hi20; + ptrdiff_t offset = (void *)v - location; if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( @@ -116,23 +136,20 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, return -EINVAL; } - hi20 = (offset + 0x800) & 0xfffff000; - *location = (*location & 0xfff) | hi20; - return 0; + return riscv_insn_rmw(location, 0xfff, (offset + 0x800) & 0xfffff000); } -static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, u32 *location, +static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, void *location, Elf_Addr v) { /* * v is the lo12 value to fill. It is calculated before calling this * handler. */ - *location = (*location & 0xfffff) | ((v & 0xfff) << 20); - return 0; + return riscv_insn_rmw(location, 0xfffff, (v & 0xfff) << 20); } -static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location, +static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, void *location, Elf_Addr v) { /* @@ -142,15 +159,12 @@ static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location, u32 imm11_5 = (v & 0xfe0) << (31 - 11); u32 imm4_0 = (v & 0x1f) << (11 - 4); - *location = (*location & 0x1fff07f) | imm11_5 | imm4_0; - return 0; + return riscv_insn_rmw(location, 0x1fff07f, imm11_5 | imm4_0); } -static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, +static int apply_r_riscv_hi20_rela(struct module *me, void *location, Elf_Addr v) { - s32 hi20; - if (IS_ENABLED(CONFIG_CMODEL_MEDLOW)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", @@ -158,22 +172,20 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, return -EINVAL; } - hi20 = ((s32)v + 0x800) & 0xfffff000; - *location = (*location & 0xfff) | hi20; - return 0; + return riscv_insn_rmw(location, 0xfff, ((s32)v + 0x800) & 0xfffff000); } -static int apply_r_riscv_lo12_i_rela(struct module *me, u32 *location, +static int apply_r_riscv_lo12_i_rela(struct module *me, void *location, Elf_Addr v) { /* Skip medlow checking because of filtering by HI20 already */ s32 hi20 = ((s32)v + 0x800) & 0xfffff000; s32 lo12 = ((s32)v - hi20); - *location = (*location & 0xfffff) | ((lo12 & 0xfff) << 20); - return 0; + + return riscv_insn_rmw(location, 0xfffff, (lo12 & 0xfff) << 20); } -static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location, +static int apply_r_riscv_lo12_s_rela(struct module *me, void *location, Elf_Addr v) { /* Skip medlow checking because of filtering by HI20 already */ @@ -181,20 +193,18 @@ static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location, s32 lo12 = ((s32)v - hi20); u32 imm11_5 = (lo12 & 0xfe0) << (31 - 11); u32 imm4_0 = (lo12 & 0x1f) << (11 - 4); - *location = (*location & 0x1fff07f) | imm11_5 | imm4_0; - return 0; + + return riscv_insn_rmw(location, 0x1fff07f, imm11_5 | imm4_0); } -static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, +static int apply_r_riscv_got_hi20_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; - s32 hi20; + ptrdiff_t offset = (void *)v - location; /* Always emit the got entry */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { - offset = module_emit_got_entry(me, v); - offset = (void *)offset - (void *)location; + offset = (void *)module_emit_got_entry(me, v) - location; } else { pr_err( "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n", @@ -202,22 +212,19 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, return -EINVAL; } - hi20 = (offset + 0x800) & 0xfffff000; - *location = (*location & 0xfff) | hi20; - return 0; + return riscv_insn_rmw(location, 0xfff, (offset + 0x800) & 0xfffff000); } -static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, +static int apply_r_riscv_call_plt_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u32 hi20, lo12; if (!riscv_insn_valid_32bit_offset(offset)) { /* Only emit the plt entry if offset over 32-bit range */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { - offset = module_emit_plt_entry(me, v); - offset = (void *)offset - (void *)location; + offset = (void *)module_emit_plt_entry(me, v) - location; } else { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", @@ -228,15 +235,14 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, hi20 = (offset + 0x800) & 0xfffff000; lo12 = (offset - hi20) & 0xfff; - *location = (*location & 0xfff) | hi20; - *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20); - return 0; + riscv_insn_rmw(location, 0xfff, hi20); + return riscv_insn_rmw(location + 4, 0xfffff, lo12 << 20); } -static int apply_r_riscv_call_rela(struct module *me, u32 *location, +static int apply_r_riscv_call_rela(struct module *me, void *location, Elf_Addr v) { - ptrdiff_t offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - location; u32 hi20, lo12; if (!riscv_insn_valid_32bit_offset(offset)) { @@ -248,18 +254,17 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, hi20 = (offset + 0x800) & 0xfffff000; lo12 = (offset - hi20) & 0xfff; - *location = (*location & 0xfff) | hi20; - *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20); - return 0; + riscv_insn_rmw(location, 0xfff, hi20); + return riscv_insn_rmw(location + 4, 0xfffff, lo12 << 20); } -static int apply_r_riscv_relax_rela(struct module *me, u32 *location, +static int apply_r_riscv_relax_rela(struct module *me, void *location, Elf_Addr v) { return 0; } -static int apply_r_riscv_align_rela(struct module *me, u32 *location, +static int apply_r_riscv_align_rela(struct module *me, void *location, Elf_Addr v) { pr_err( @@ -268,49 +273,49 @@ static int apply_r_riscv_align_rela(struct module *me, u32 *location, return -EINVAL; } -static int apply_r_riscv_add16_rela(struct module *me, u32 *location, +static int apply_r_riscv_add16_rela(struct module *me, void *location, Elf_Addr v) { *(u16 *)location += (u16)v; return 0; } -static int apply_r_riscv_add32_rela(struct module *me, u32 *location, +static int apply_r_riscv_add32_rela(struct module *me, void *location, Elf_Addr v) { *(u32 *)location += (u32)v; return 0; } -static int apply_r_riscv_add64_rela(struct module *me, u32 *location, +static int apply_r_riscv_add64_rela(struct module *me, void *location, Elf_Addr v) { *(u64 *)location += (u64)v; return 0; } -static int apply_r_riscv_sub16_rela(struct module *me, u32 *location, +static int apply_r_riscv_sub16_rela(struct module *me, void *location, Elf_Addr v) { *(u16 *)location -= (u16)v; return 0; } -static int apply_r_riscv_sub32_rela(struct module *me, u32 *location, +static int apply_r_riscv_sub32_rela(struct module *me, void *location, Elf_Addr v) { *(u32 *)location -= (u32)v; return 0; } -static int apply_r_riscv_sub64_rela(struct module *me, u32 *location, +static int apply_r_riscv_sub64_rela(struct module *me, void *location, Elf_Addr v) { *(u64 *)location -= (u64)v; return 0; } -static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, +static int (*reloc_handlers_rela[]) (struct module *me, void *location, Elf_Addr v) = { [R_RISCV_32] = apply_r_riscv_32_rela, [R_RISCV_64] = apply_r_riscv_64_rela, @@ -342,9 +347,9 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, struct module *me) { Elf_Rela *rel = (void *) sechdrs[relsec].sh_addr; - int (*handler)(struct module *me, u32 *location, Elf_Addr v); + int (*handler)(struct module *me, void *location, Elf_Addr v); Elf_Sym *sym; - u32 *location; + void *location; unsigned int i, type; Elf_Addr v; int res; From 8fd6c5142395a106b63c8668e9f4a7106b6a0772 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Wed, 1 Nov 2023 11:33:00 -0700 Subject: [PATCH 190/415] riscv: Add remaining module relocations Add all final module relocations and add error logs explaining the ones that are not supported. Implement overflow checks for ADD/SUB/SET/ULEB128 relocations. Signed-off-by: Charlie Jenkins Link: https://lore.kernel.org/r/20231101-module_relocations-v9-2-8dfa3483c400@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/elf.h | 5 +- arch/riscv/kernel/module.c | 448 ++++++++++++++++++++++++++++-- 2 files changed, 423 insertions(+), 30 deletions(-) diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h index d696d6610231..11a71b8533d5 100644 --- a/arch/riscv/include/uapi/asm/elf.h +++ b/arch/riscv/include/uapi/asm/elf.h @@ -49,6 +49,7 @@ typedef union __riscv_fp_state elf_fpregset_t; #define R_RISCV_TLS_DTPREL64 9 #define R_RISCV_TLS_TPREL32 10 #define R_RISCV_TLS_TPREL64 11 +#define R_RISCV_IRELATIVE 58 /* Relocation types not used by the dynamic linker */ #define R_RISCV_BRANCH 16 @@ -81,7 +82,6 @@ typedef union __riscv_fp_state elf_fpregset_t; #define R_RISCV_ALIGN 43 #define R_RISCV_RVC_BRANCH 44 #define R_RISCV_RVC_JUMP 45 -#define R_RISCV_LUI 46 #define R_RISCV_GPREL_I 47 #define R_RISCV_GPREL_S 48 #define R_RISCV_TPREL_I 49 @@ -93,6 +93,9 @@ typedef union __riscv_fp_state elf_fpregset_t; #define R_RISCV_SET16 55 #define R_RISCV_SET32 56 #define R_RISCV_32_PCREL 57 +#define R_RISCV_PLT32 59 +#define R_RISCV_SET_ULEB128 60 +#define R_RISCV_SUB_ULEB128 61 #endif /* _UAPI_ASM_RISCV_ELF_H */ diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 8286d3bb04f4..4b339729d5ec 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -7,6 +7,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -14,6 +17,38 @@ #include #include +struct used_bucket { + struct list_head head; + struct hlist_head *bucket; +}; + +struct relocation_head { + struct hlist_node node; + struct list_head *rel_entry; + void *location; +}; + +struct relocation_entry { + struct list_head head; + Elf_Addr value; + unsigned int type; +}; + +struct relocation_handlers { + int (*reloc_handler)(struct module *me, void *location, Elf_Addr v); + int (*accumulate_handler)(struct module *me, void *location, + long buffer); +}; + +unsigned int initialize_relocation_hashtable(unsigned int num_relocations); +void process_accumulated_relocations(struct module *me); +int add_relocation_to_accumulate(struct module *me, int type, void *location, + unsigned int hashtable_bits, Elf_Addr v); + +struct hlist_head *relocation_hashtable; + +struct list_head used_buckets_list; + /* * The auipc+jalr instruction pair can reach any PC-relative offset * in the range [-2^31 - 2^11, 2^31 - 2^11) @@ -273,6 +308,12 @@ static int apply_r_riscv_align_rela(struct module *me, void *location, return -EINVAL; } +static int apply_r_riscv_add8_rela(struct module *me, void *location, Elf_Addr v) +{ + *(u8 *)location += (u8)v; + return 0; +} + static int apply_r_riscv_add16_rela(struct module *me, void *location, Elf_Addr v) { @@ -294,6 +335,12 @@ static int apply_r_riscv_add64_rela(struct module *me, void *location, return 0; } +static int apply_r_riscv_sub8_rela(struct module *me, void *location, Elf_Addr v) +{ + *(u8 *)location -= (u8)v; + return 0; +} + static int apply_r_riscv_sub16_rela(struct module *me, void *location, Elf_Addr v) { @@ -315,33 +362,369 @@ static int apply_r_riscv_sub64_rela(struct module *me, void *location, return 0; } -static int (*reloc_handlers_rela[]) (struct module *me, void *location, - Elf_Addr v) = { - [R_RISCV_32] = apply_r_riscv_32_rela, - [R_RISCV_64] = apply_r_riscv_64_rela, - [R_RISCV_BRANCH] = apply_r_riscv_branch_rela, - [R_RISCV_JAL] = apply_r_riscv_jal_rela, - [R_RISCV_RVC_BRANCH] = apply_r_riscv_rvc_branch_rela, - [R_RISCV_RVC_JUMP] = apply_r_riscv_rvc_jump_rela, - [R_RISCV_PCREL_HI20] = apply_r_riscv_pcrel_hi20_rela, - [R_RISCV_PCREL_LO12_I] = apply_r_riscv_pcrel_lo12_i_rela, - [R_RISCV_PCREL_LO12_S] = apply_r_riscv_pcrel_lo12_s_rela, - [R_RISCV_HI20] = apply_r_riscv_hi20_rela, - [R_RISCV_LO12_I] = apply_r_riscv_lo12_i_rela, - [R_RISCV_LO12_S] = apply_r_riscv_lo12_s_rela, - [R_RISCV_GOT_HI20] = apply_r_riscv_got_hi20_rela, - [R_RISCV_CALL_PLT] = apply_r_riscv_call_plt_rela, - [R_RISCV_CALL] = apply_r_riscv_call_rela, - [R_RISCV_RELAX] = apply_r_riscv_relax_rela, - [R_RISCV_ALIGN] = apply_r_riscv_align_rela, - [R_RISCV_ADD16] = apply_r_riscv_add16_rela, - [R_RISCV_ADD32] = apply_r_riscv_add32_rela, - [R_RISCV_ADD64] = apply_r_riscv_add64_rela, - [R_RISCV_SUB16] = apply_r_riscv_sub16_rela, - [R_RISCV_SUB32] = apply_r_riscv_sub32_rela, - [R_RISCV_SUB64] = apply_r_riscv_sub64_rela, +static int dynamic_linking_not_supported(struct module *me, void *location, + Elf_Addr v) +{ + pr_err("%s: Dynamic linking not supported in kernel modules PC = %p\n", + me->name, location); + return -EINVAL; +} + +static int tls_not_supported(struct module *me, void *location, Elf_Addr v) +{ + pr_err("%s: Thread local storage not supported in kernel modules PC = %p\n", + me->name, location); + return -EINVAL; +} + +static int apply_r_riscv_sub6_rela(struct module *me, void *location, Elf_Addr v) +{ + u8 *byte = location; + u8 value = v; + + *byte = (*byte - (value & 0x3f)) & 0x3f; + return 0; +} + +static int apply_r_riscv_set6_rela(struct module *me, void *location, Elf_Addr v) +{ + u8 *byte = location; + u8 value = v; + + *byte = (*byte & 0xc0) | (value & 0x3f); + return 0; +} + +static int apply_r_riscv_set8_rela(struct module *me, void *location, Elf_Addr v) +{ + *(u8 *)location = (u8)v; + return 0; +} + +static int apply_r_riscv_set16_rela(struct module *me, void *location, + Elf_Addr v) +{ + *(u16 *)location = (u16)v; + return 0; +} + +static int apply_r_riscv_set32_rela(struct module *me, void *location, + Elf_Addr v) +{ + *(u32 *)location = (u32)v; + return 0; +} + +static int apply_r_riscv_32_pcrel_rela(struct module *me, void *location, + Elf_Addr v) +{ + *(u32 *)location = v - (uintptr_t)location; + return 0; +} + +static int apply_r_riscv_plt32_rela(struct module *me, void *location, + Elf_Addr v) +{ + ptrdiff_t offset = (void *)v - location; + + if (!riscv_insn_valid_32bit_offset(offset)) { + /* Only emit the plt entry if offset over 32-bit range */ + if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { + offset = (void *)module_emit_plt_entry(me, v) - location; + } else { + pr_err("%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", + me->name, (long long)v, location); + return -EINVAL; + } + } + + *(u32 *)location = (u32)offset; + return 0; +} + +static int apply_r_riscv_set_uleb128(struct module *me, void *location, Elf_Addr v) +{ + *(long *)location = v; + return 0; +} + +static int apply_r_riscv_sub_uleb128(struct module *me, void *location, Elf_Addr v) +{ + *(long *)location -= v; + return 0; +} + +static int apply_6_bit_accumulation(struct module *me, void *location, long buffer) +{ + u8 *byte = location; + u8 value = buffer; + + if (buffer > 0x3f) { + pr_err("%s: value %ld out of range for 6-bit relocation.\n", + me->name, buffer); + return -EINVAL; + } + + *byte = (*byte & 0xc0) | (value & 0x3f); + return 0; +} + +static int apply_8_bit_accumulation(struct module *me, void *location, long buffer) +{ + if (buffer > U8_MAX) { + pr_err("%s: value %ld out of range for 8-bit relocation.\n", + me->name, buffer); + return -EINVAL; + } + *(u8 *)location = (u8)buffer; + return 0; +} + +static int apply_16_bit_accumulation(struct module *me, void *location, long buffer) +{ + if (buffer > U16_MAX) { + pr_err("%s: value %ld out of range for 16-bit relocation.\n", + me->name, buffer); + return -EINVAL; + } + *(u16 *)location = (u16)buffer; + return 0; +} + +static int apply_32_bit_accumulation(struct module *me, void *location, long buffer) +{ + if (buffer > U32_MAX) { + pr_err("%s: value %ld out of range for 32-bit relocation.\n", + me->name, buffer); + return -EINVAL; + } + *(u32 *)location = (u32)buffer; + return 0; +} + +static int apply_64_bit_accumulation(struct module *me, void *location, long buffer) +{ + *(u64 *)location = (u64)buffer; + return 0; +} + +static int apply_uleb128_accumulation(struct module *me, void *location, long buffer) +{ + /* + * ULEB128 is a variable length encoding. Encode the buffer into + * the ULEB128 data format. + */ + u8 *p = location; + + while (buffer != 0) { + u8 value = buffer & 0x7f; + + buffer >>= 7; + value |= (!!buffer) << 7; + + *p++ = value; + } + return 0; +} + +/* + * Relocations defined in the riscv-elf-psabi-doc. + * This handles static linking only. + */ +static const struct relocation_handlers reloc_handlers[] = { + [R_RISCV_32] = { apply_r_riscv_32_rela }, + [R_RISCV_64] = { apply_r_riscv_64_rela }, + [R_RISCV_RELATIVE] = { dynamic_linking_not_supported }, + [R_RISCV_COPY] = { dynamic_linking_not_supported }, + [R_RISCV_JUMP_SLOT] = { dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPMOD32] = { dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPMOD64] = { dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPREL32] = { dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPREL64] = { dynamic_linking_not_supported }, + [R_RISCV_TLS_TPREL32] = { dynamic_linking_not_supported }, + [R_RISCV_TLS_TPREL64] = { dynamic_linking_not_supported }, + /* 12-15 undefined */ + [R_RISCV_BRANCH] = { apply_r_riscv_branch_rela }, + [R_RISCV_JAL] = { apply_r_riscv_jal_rela }, + [R_RISCV_CALL] = { apply_r_riscv_call_rela }, + [R_RISCV_CALL_PLT] = { apply_r_riscv_call_plt_rela }, + [R_RISCV_GOT_HI20] = { apply_r_riscv_got_hi20_rela }, + [R_RISCV_TLS_GOT_HI20] = { tls_not_supported }, + [R_RISCV_TLS_GD_HI20] = { tls_not_supported }, + [R_RISCV_PCREL_HI20] = { apply_r_riscv_pcrel_hi20_rela }, + [R_RISCV_PCREL_LO12_I] = { apply_r_riscv_pcrel_lo12_i_rela }, + [R_RISCV_PCREL_LO12_S] = { apply_r_riscv_pcrel_lo12_s_rela }, + [R_RISCV_HI20] = { apply_r_riscv_hi20_rela }, + [R_RISCV_LO12_I] = { apply_r_riscv_lo12_i_rela }, + [R_RISCV_LO12_S] = { apply_r_riscv_lo12_s_rela }, + [R_RISCV_TPREL_HI20] = { tls_not_supported }, + [R_RISCV_TPREL_LO12_I] = { tls_not_supported }, + [R_RISCV_TPREL_LO12_S] = { tls_not_supported }, + [R_RISCV_TPREL_ADD] = { tls_not_supported }, + [R_RISCV_ADD8] = { apply_r_riscv_add8_rela, apply_8_bit_accumulation }, + [R_RISCV_ADD16] = { apply_r_riscv_add16_rela, + apply_16_bit_accumulation }, + [R_RISCV_ADD32] = { apply_r_riscv_add32_rela, + apply_32_bit_accumulation }, + [R_RISCV_ADD64] = { apply_r_riscv_add64_rela, + apply_64_bit_accumulation }, + [R_RISCV_SUB8] = { apply_r_riscv_sub8_rela, apply_8_bit_accumulation }, + [R_RISCV_SUB16] = { apply_r_riscv_sub16_rela, + apply_16_bit_accumulation }, + [R_RISCV_SUB32] = { apply_r_riscv_sub32_rela, + apply_32_bit_accumulation }, + [R_RISCV_SUB64] = { apply_r_riscv_sub64_rela, + apply_64_bit_accumulation }, + /* 41-42 reserved for future standard use */ + [R_RISCV_ALIGN] = { apply_r_riscv_align_rela }, + [R_RISCV_RVC_BRANCH] = { apply_r_riscv_rvc_branch_rela }, + [R_RISCV_RVC_JUMP] = { apply_r_riscv_rvc_jump_rela }, + /* 46-50 reserved for future standard use */ + [R_RISCV_RELAX] = { apply_r_riscv_relax_rela }, + [R_RISCV_SUB6] = { apply_r_riscv_sub6_rela, apply_6_bit_accumulation }, + [R_RISCV_SET6] = { apply_r_riscv_set6_rela, apply_6_bit_accumulation }, + [R_RISCV_SET8] = { apply_r_riscv_set8_rela, apply_8_bit_accumulation }, + [R_RISCV_SET16] = { apply_r_riscv_set16_rela, + apply_16_bit_accumulation }, + [R_RISCV_SET32] = { apply_r_riscv_set32_rela, + apply_32_bit_accumulation }, + [R_RISCV_32_PCREL] = { apply_r_riscv_32_pcrel_rela }, + [R_RISCV_IRELATIVE] = { dynamic_linking_not_supported }, + [R_RISCV_PLT32] = { apply_r_riscv_plt32_rela }, + [R_RISCV_SET_ULEB128] = { apply_r_riscv_set_uleb128, + apply_uleb128_accumulation }, + [R_RISCV_SUB_ULEB128] = { apply_r_riscv_sub_uleb128, + apply_uleb128_accumulation }, + /* 62-191 reserved for future standard use */ + /* 192-255 nonstandard ABI extensions */ }; +void process_accumulated_relocations(struct module *me) +{ + /* + * Only ADD/SUB/SET/ULEB128 should end up here. + * + * Each bucket may have more than one relocation location. All + * relocations for a location are stored in a list in a bucket. + * + * Relocations are applied to a temp variable before being stored to the + * provided location to check for overflow. This also allows ULEB128 to + * properly decide how many entries are needed before storing to + * location. The final value is stored into location using the handler + * for the last relocation to an address. + * + * Three layers of indexing: + * - Each of the buckets in use + * - Groups of relocations in each bucket by location address + * - Each relocation entry for a location address + */ + struct used_bucket *bucket_iter; + struct relocation_head *rel_head_iter; + struct relocation_entry *rel_entry_iter; + int curr_type; + void *location; + long buffer; + + list_for_each_entry(bucket_iter, &used_buckets_list, head) { + hlist_for_each_entry(rel_head_iter, bucket_iter->bucket, node) { + buffer = 0; + location = rel_head_iter->location; + list_for_each_entry(rel_entry_iter, + rel_head_iter->rel_entry, head) { + curr_type = rel_entry_iter->type; + reloc_handlers[curr_type].reloc_handler( + me, &buffer, rel_entry_iter->value); + kfree(rel_entry_iter); + } + reloc_handlers[curr_type].accumulate_handler( + me, location, buffer); + kfree(rel_head_iter); + } + kfree(bucket_iter); + } + + kfree(relocation_hashtable); +} + +int add_relocation_to_accumulate(struct module *me, int type, void *location, + unsigned int hashtable_bits, Elf_Addr v) +{ + struct relocation_entry *entry; + struct relocation_head *rel_head; + struct hlist_head *current_head; + struct used_bucket *bucket; + unsigned long hash; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + INIT_LIST_HEAD(&entry->head); + entry->type = type; + entry->value = v; + + hash = hash_min((uintptr_t)location, hashtable_bits); + + current_head = &relocation_hashtable[hash]; + + /* Find matching location (if any) */ + bool found = false; + struct relocation_head *rel_head_iter; + + hlist_for_each_entry(rel_head_iter, current_head, node) { + if (rel_head_iter->location == location) { + found = true; + rel_head = rel_head_iter; + break; + } + } + + if (!found) { + rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL); + rel_head->rel_entry = + kmalloc(sizeof(struct list_head), GFP_KERNEL); + INIT_LIST_HEAD(rel_head->rel_entry); + rel_head->location = location; + INIT_HLIST_NODE(&rel_head->node); + if (!current_head->first) { + bucket = + kmalloc(sizeof(struct used_bucket), GFP_KERNEL); + INIT_LIST_HEAD(&bucket->head); + bucket->bucket = current_head; + list_add(&bucket->head, &used_buckets_list); + } + hlist_add_head(&rel_head->node, current_head); + } + + /* Add relocation to head of discovered rel_head */ + list_add_tail(&entry->head, rel_head->rel_entry); + + return 0; +} + +unsigned int initialize_relocation_hashtable(unsigned int num_relocations) +{ + /* Can safely assume that bits is not greater than sizeof(long) */ + unsigned long hashtable_size = roundup_pow_of_two(num_relocations); + unsigned int hashtable_bits = ilog2(hashtable_size); + + /* + * Double size of hashtable if num_relocations * 1.25 is greater than + * hashtable_size. + */ + int should_double_size = ((num_relocations + (num_relocations >> 2)) > (hashtable_size)); + + hashtable_bits += should_double_size; + + hashtable_size <<= should_double_size; + + relocation_hashtable = kmalloc_array(hashtable_size, + sizeof(*relocation_hashtable), + GFP_KERNEL); + __hash_init(relocation_hashtable, hashtable_size); + + INIT_LIST_HEAD(&used_buckets_list); + + return hashtable_bits; +} + int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned int relsec, struct module *me) @@ -353,11 +736,13 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, unsigned int i, type; Elf_Addr v; int res; + unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel); + unsigned int hashtable_bits = initialize_relocation_hashtable(num_relocations); pr_debug("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); - for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + for (i = 0; i < num_relocations; i++) { /* This is where to make the change */ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; @@ -375,8 +760,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, type = ELF_RISCV_R_TYPE(rel[i].r_info); - if (type < ARRAY_SIZE(reloc_handlers_rela)) - handler = reloc_handlers_rela[type]; + if (type < ARRAY_SIZE(reloc_handlers)) + handler = reloc_handlers[type].reloc_handler; else handler = NULL; @@ -432,11 +817,16 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } } - res = handler(me, location, v); + if (reloc_handlers[type].accumulate_handler) + res = add_relocation_to_accumulate(me, type, location, hashtable_bits, v); + else + res = handler(me, location, v); if (res) return res; } + process_accumulated_relocations(me); + return 0; } From af71bc194916b10f9b394f9b14419d99700a5e67 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Wed, 1 Nov 2023 11:33:01 -0700 Subject: [PATCH 191/415] riscv: Add tests for riscv module loading Add test cases for the two main groups of relocations added: SUB and SET, along with uleb128. Signed-off-by: Charlie Jenkins Link: https://lore.kernel.org/r/20231101-module_relocations-v9-3-8dfa3483c400@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.debug | 1 + arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/tests/Kconfig.debug | 35 ++++++++ arch/riscv/kernel/tests/Makefile | 1 + arch/riscv/kernel/tests/module_test/Makefile | 15 ++++ .../module_test/test_module_linking_main.c | 88 +++++++++++++++++++ .../kernel/tests/module_test/test_set16.S | 23 +++++ .../kernel/tests/module_test/test_set32.S | 20 +++++ .../kernel/tests/module_test/test_set6.S | 23 +++++ .../kernel/tests/module_test/test_set8.S | 23 +++++ .../kernel/tests/module_test/test_sub16.S | 20 +++++ .../kernel/tests/module_test/test_sub32.S | 20 +++++ .../kernel/tests/module_test/test_sub6.S | 20 +++++ .../kernel/tests/module_test/test_sub64.S | 25 ++++++ .../kernel/tests/module_test/test_sub8.S | 20 +++++ .../kernel/tests/module_test/test_uleb128.S | 31 +++++++ 16 files changed, 366 insertions(+) create mode 100644 arch/riscv/kernel/tests/Kconfig.debug create mode 100644 arch/riscv/kernel/tests/Makefile create mode 100644 arch/riscv/kernel/tests/module_test/Makefile create mode 100644 arch/riscv/kernel/tests/module_test/test_module_linking_main.c create mode 100644 arch/riscv/kernel/tests/module_test/test_set16.S create mode 100644 arch/riscv/kernel/tests/module_test/test_set32.S create mode 100644 arch/riscv/kernel/tests/module_test/test_set6.S create mode 100644 arch/riscv/kernel/tests/module_test/test_set8.S create mode 100644 arch/riscv/kernel/tests/module_test/test_sub16.S create mode 100644 arch/riscv/kernel/tests/module_test/test_sub32.S create mode 100644 arch/riscv/kernel/tests/module_test/test_sub6.S create mode 100644 arch/riscv/kernel/tests/module_test/test_sub64.S create mode 100644 arch/riscv/kernel/tests/module_test/test_sub8.S create mode 100644 arch/riscv/kernel/tests/module_test/test_uleb128.S diff --git a/arch/riscv/Kconfig.debug b/arch/riscv/Kconfig.debug index e69de29bb2d1..eafe17ebf710 100644 --- a/arch/riscv/Kconfig.debug +++ b/arch/riscv/Kconfig.debug @@ -0,0 +1 @@ +source "arch/riscv/kernel/tests/Kconfig.debug" diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 95cf25d48405..bb99657252f4 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -57,6 +57,7 @@ obj-y += stacktrace.o obj-y += cacheinfo.o obj-y += patch.o obj-y += probes/ +obj-y += tests/ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o diff --git a/arch/riscv/kernel/tests/Kconfig.debug b/arch/riscv/kernel/tests/Kconfig.debug new file mode 100644 index 000000000000..5dba64e8e977 --- /dev/null +++ b/arch/riscv/kernel/tests/Kconfig.debug @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: GPL-2.0-only +menu "arch/riscv/kernel Testing and Coverage" + +config AS_HAS_ULEB128 + def_bool $(as-instr,.reloc label$(comma) R_RISCV_SET_ULEB128$(comma) 127\n.reloc label$(comma) R_RISCV_SUB_ULEB128$(comma) 127\nlabel:\n.word 0) + +menuconfig RUNTIME_KERNEL_TESTING_MENU + bool "arch/riscv/kernel runtime Testing" + def_bool y + help + Enable riscv kernel runtime testing. + +if RUNTIME_KERNEL_TESTING_MENU + +config RISCV_MODULE_LINKING_KUNIT + bool "KUnit test riscv module linking at runtime" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Enable this option to test riscv module linking at boot. This will + enable a module called "test_module_linking". + + KUnit tests run during boot and output the results to the debug log + in TAP format (http://testanything.org/). Only useful for kernel devs + running the KUnit test harness, and not intended for inclusion into a + production build. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + +endif # RUNTIME_TESTING_MENU + +endmenu # "arch/riscv/kernel runtime Testing" diff --git a/arch/riscv/kernel/tests/Makefile b/arch/riscv/kernel/tests/Makefile new file mode 100644 index 000000000000..7d6c76cffe20 --- /dev/null +++ b/arch/riscv/kernel/tests/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_RISCV_MODULE_LINKING_KUNIT) += module_test/ diff --git a/arch/riscv/kernel/tests/module_test/Makefile b/arch/riscv/kernel/tests/module_test/Makefile new file mode 100644 index 000000000000..d7a6fd8943de --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/Makefile @@ -0,0 +1,15 @@ +obj-m += test_module_linking.o + +test_sub := test_sub6.o test_sub8.o test_sub16.o test_sub32.o test_sub64.o + +test_set := test_set6.o test_set8.o test_set16.o test_set32.o + +test_module_linking-objs += $(test_sub) + +test_module_linking-objs += $(test_set) + +ifeq ($(CONFIG_AS_HAS_ULEB128),y) +test_module_linking-objs += test_uleb128.o +endif + +test_module_linking-objs += test_module_linking_main.o diff --git a/arch/riscv/kernel/tests/module_test/test_module_linking_main.c b/arch/riscv/kernel/tests/module_test/test_module_linking_main.c new file mode 100644 index 000000000000..8df5fa5b834e --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_module_linking_main.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Rivos Inc. + */ + +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Test module linking"); + +extern int test_set32(void); +extern int test_set16(void); +extern int test_set8(void); +extern int test_set6(void); +extern long test_sub64(void); +extern int test_sub32(void); +extern int test_sub16(void); +extern int test_sub8(void); +extern int test_sub6(void); + +#ifdef CONFIG_AS_HAS_ULEB128 +extern int test_uleb_basic(void); +extern int test_uleb_large(void); +#endif + +#define CHECK_EQ(lhs, rhs) KUNIT_ASSERT_EQ(test, lhs, rhs) + +void run_test_set(struct kunit *test); +void run_test_sub(struct kunit *test); +void run_test_uleb(struct kunit *test); + +void run_test_set(struct kunit *test) +{ + int val32 = test_set32(); + int val16 = test_set16(); + int val8 = test_set8(); + int val6 = test_set6(); + + CHECK_EQ(val32, 0); + CHECK_EQ(val16, 0); + CHECK_EQ(val8, 0); + CHECK_EQ(val6, 0); +} + +void run_test_sub(struct kunit *test) +{ + int val64 = test_sub64(); + int val32 = test_sub32(); + int val16 = test_sub16(); + int val8 = test_sub8(); + int val6 = test_sub6(); + + CHECK_EQ(val64, 0); + CHECK_EQ(val32, 0); + CHECK_EQ(val16, 0); + CHECK_EQ(val8, 0); + CHECK_EQ(val6, 0); +} + +#ifdef CONFIG_AS_HAS_ULEB128 +void run_test_uleb(struct kunit *test) +{ + int val_uleb = test_uleb_basic(); + int val_uleb2 = test_uleb_large(); + + CHECK_EQ(val_uleb, 0); + CHECK_EQ(val_uleb2, 0); +} +#endif + +static struct kunit_case __refdata riscv_module_linking_test_cases[] = { + KUNIT_CASE(run_test_set), + KUNIT_CASE(run_test_sub), +#ifdef CONFIG_AS_HAS_ULEB128 + KUNIT_CASE(run_test_uleb), +#endif + {} +}; + +static struct kunit_suite riscv_module_linking_test_suite = { + .name = "riscv_checksum", + .test_cases = riscv_module_linking_test_cases, +}; + +kunit_test_suites(&riscv_module_linking_test_suite); diff --git a/arch/riscv/kernel/tests/module_test/test_set16.S b/arch/riscv/kernel/tests/module_test/test_set16.S new file mode 100644 index 000000000000..2be0e441a12e --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_set16.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_set16 +test_set16: + lw a0, set16 + la t0, set16 +#ifdef CONFIG_32BIT + slli t0, t0, 16 + srli t0, t0, 16 +#else + slli t0, t0, 48 + srli t0, t0, 48 +#endif + sub a0, a0, t0 + ret +.data +set16: + .reloc set16, R_RISCV_SET16, set16 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_set32.S b/arch/riscv/kernel/tests/module_test/test_set32.S new file mode 100644 index 000000000000..de0444537e67 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_set32.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_set32 +test_set32: + lw a0, set32 + la t0, set32 +#ifndef CONFIG_32BIT + slli t0, t0, 32 + srli t0, t0, 32 +#endif + sub a0, a0, t0 + ret +.data +set32: + .reloc set32, R_RISCV_SET32, set32 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_set6.S b/arch/riscv/kernel/tests/module_test/test_set6.S new file mode 100644 index 000000000000..c39ce4c219eb --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_set6.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_set6 +test_set6: + lw a0, set6 + la t0, set6 +#ifdef CONFIG_32BIT + slli t0, t0, 26 + srli t0, t0, 26 +#else + slli t0, t0, 58 + srli t0, t0, 58 +#endif + sub a0, a0, t0 + ret +.data +set6: + .reloc set6, R_RISCV_SET6, set6 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_set8.S b/arch/riscv/kernel/tests/module_test/test_set8.S new file mode 100644 index 000000000000..a656173f6f99 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_set8.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_set8 +test_set8: + lw a0, set8 + la t0, set8 +#ifdef CONFIG_32BIT + slli t0, t0, 24 + srli t0, t0, 24 +#else + slli t0, t0, 56 + srli t0, t0, 56 +#endif + sub a0, a0, t0 + ret +.data +set8: + .reloc set8, R_RISCV_SET8, set8 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub16.S b/arch/riscv/kernel/tests/module_test/test_sub16.S new file mode 100644 index 000000000000..80f731d599ba --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub16.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub16 +test_sub16: + lh a0, sub16 + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub16: + .reloc sub16, R_RISCV_ADD16, second + .reloc sub16, R_RISCV_SUB16, first + .half 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub32.S b/arch/riscv/kernel/tests/module_test/test_sub32.S new file mode 100644 index 000000000000..a341686e12df --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub32.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub32 +test_sub32: + lw a0, sub32 + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub32: + .reloc sub32, R_RISCV_ADD32, second + .reloc sub32, R_RISCV_SUB32, first + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub6.S b/arch/riscv/kernel/tests/module_test/test_sub6.S new file mode 100644 index 000000000000..e8b61c1ec527 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub6.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub6 +test_sub6: + lb a0, sub6 + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub6: + .reloc sub6, R_RISCV_SET6, second + .reloc sub6, R_RISCV_SUB6, first + .byte 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub64.S b/arch/riscv/kernel/tests/module_test/test_sub64.S new file mode 100644 index 000000000000..a59e8afa88fd --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub64.S @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub64 +test_sub64: +#ifdef CONFIG_32BIT + lw a0, sub64 +#else + ld a0, sub64 +#endif + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub64: + .reloc sub64, R_RISCV_ADD64, second + .reloc sub64, R_RISCV_SUB64, first + .word 0 + .word 0 diff --git a/arch/riscv/kernel/tests/module_test/test_sub8.S b/arch/riscv/kernel/tests/module_test/test_sub8.S new file mode 100644 index 000000000000..ac5d0ec98de3 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_sub8.S @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_sub8 +test_sub8: + lb a0, sub8 + addi a0, a0, -32 + ret +first: + .space 32 +second: + +.data +sub8: + .reloc sub8, R_RISCV_ADD8, second + .reloc sub8, R_RISCV_SUB8, first + .byte 0 diff --git a/arch/riscv/kernel/tests/module_test/test_uleb128.S b/arch/riscv/kernel/tests/module_test/test_uleb128.S new file mode 100644 index 000000000000..90f22049d553 --- /dev/null +++ b/arch/riscv/kernel/tests/module_test/test_uleb128.S @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Rivos Inc. + */ + +.text +.global test_uleb_basic +test_uleb_basic: + ld a0, second + addi a0, a0, -127 + ret + +.global test_uleb_large +test_uleb_large: + ld a0, fourth + addi a0, a0, -0x07e8 + ret + +.data +first: + .space 127 +second: + .reloc second, R_RISCV_SET_ULEB128, second + .reloc second, R_RISCV_SUB_ULEB128, first + .dword 0 +third: + .space 1000 +fourth: + .reloc fourth, R_RISCV_SET_ULEB128, fourth + .reloc fourth, R_RISCV_SUB_ULEB128, third + .dword 0 From 28ea54bade76e2d47cb8cdb96e427cfb90d3eea7 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 7 Nov 2023 07:55:29 -0800 Subject: [PATCH 192/415] RISC-V: Don't rely on positional structure initialization Without this I get a bunch of warnings along the lines of arch/riscv/kernel/module.c:535:26: error: positional initialization of field in 'struct' declared with 'designated_init' attribute [-Werror=designated-init] 535 | [R_RISCV_32] = { apply_r_riscv_32_rela }, This just mades the member initializers explicit instead of positional. I also aligned some of the table, but mostly just to make the batch editing go faster. Fixes: b51fc88cb35e ("Merge patch series "riscv: Add remaining module relocations and tests"") Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20231107155529.8368-1-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 125 +++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 60 deletions(-) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 4b339729d5ec..56a8c78e9e21 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -532,69 +532,74 @@ static int apply_uleb128_accumulation(struct module *me, void *location, long bu * This handles static linking only. */ static const struct relocation_handlers reloc_handlers[] = { - [R_RISCV_32] = { apply_r_riscv_32_rela }, - [R_RISCV_64] = { apply_r_riscv_64_rela }, - [R_RISCV_RELATIVE] = { dynamic_linking_not_supported }, - [R_RISCV_COPY] = { dynamic_linking_not_supported }, - [R_RISCV_JUMP_SLOT] = { dynamic_linking_not_supported }, - [R_RISCV_TLS_DTPMOD32] = { dynamic_linking_not_supported }, - [R_RISCV_TLS_DTPMOD64] = { dynamic_linking_not_supported }, - [R_RISCV_TLS_DTPREL32] = { dynamic_linking_not_supported }, - [R_RISCV_TLS_DTPREL64] = { dynamic_linking_not_supported }, - [R_RISCV_TLS_TPREL32] = { dynamic_linking_not_supported }, - [R_RISCV_TLS_TPREL64] = { dynamic_linking_not_supported }, + [R_RISCV_32] = { .reloc_handler = apply_r_riscv_32_rela }, + [R_RISCV_64] = { .reloc_handler = apply_r_riscv_64_rela }, + [R_RISCV_RELATIVE] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_COPY] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_JUMP_SLOT] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPMOD32] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPMOD64] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPREL32] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_DTPREL64] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_TPREL32] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_TLS_TPREL64] = { .reloc_handler = dynamic_linking_not_supported }, /* 12-15 undefined */ - [R_RISCV_BRANCH] = { apply_r_riscv_branch_rela }, - [R_RISCV_JAL] = { apply_r_riscv_jal_rela }, - [R_RISCV_CALL] = { apply_r_riscv_call_rela }, - [R_RISCV_CALL_PLT] = { apply_r_riscv_call_plt_rela }, - [R_RISCV_GOT_HI20] = { apply_r_riscv_got_hi20_rela }, - [R_RISCV_TLS_GOT_HI20] = { tls_not_supported }, - [R_RISCV_TLS_GD_HI20] = { tls_not_supported }, - [R_RISCV_PCREL_HI20] = { apply_r_riscv_pcrel_hi20_rela }, - [R_RISCV_PCREL_LO12_I] = { apply_r_riscv_pcrel_lo12_i_rela }, - [R_RISCV_PCREL_LO12_S] = { apply_r_riscv_pcrel_lo12_s_rela }, - [R_RISCV_HI20] = { apply_r_riscv_hi20_rela }, - [R_RISCV_LO12_I] = { apply_r_riscv_lo12_i_rela }, - [R_RISCV_LO12_S] = { apply_r_riscv_lo12_s_rela }, - [R_RISCV_TPREL_HI20] = { tls_not_supported }, - [R_RISCV_TPREL_LO12_I] = { tls_not_supported }, - [R_RISCV_TPREL_LO12_S] = { tls_not_supported }, - [R_RISCV_TPREL_ADD] = { tls_not_supported }, - [R_RISCV_ADD8] = { apply_r_riscv_add8_rela, apply_8_bit_accumulation }, - [R_RISCV_ADD16] = { apply_r_riscv_add16_rela, - apply_16_bit_accumulation }, - [R_RISCV_ADD32] = { apply_r_riscv_add32_rela, - apply_32_bit_accumulation }, - [R_RISCV_ADD64] = { apply_r_riscv_add64_rela, - apply_64_bit_accumulation }, - [R_RISCV_SUB8] = { apply_r_riscv_sub8_rela, apply_8_bit_accumulation }, - [R_RISCV_SUB16] = { apply_r_riscv_sub16_rela, - apply_16_bit_accumulation }, - [R_RISCV_SUB32] = { apply_r_riscv_sub32_rela, - apply_32_bit_accumulation }, - [R_RISCV_SUB64] = { apply_r_riscv_sub64_rela, - apply_64_bit_accumulation }, + [R_RISCV_BRANCH] = { .reloc_handler = apply_r_riscv_branch_rela }, + [R_RISCV_JAL] = { .reloc_handler = apply_r_riscv_jal_rela }, + [R_RISCV_CALL] = { .reloc_handler = apply_r_riscv_call_rela }, + [R_RISCV_CALL_PLT] = { .reloc_handler = apply_r_riscv_call_plt_rela }, + [R_RISCV_GOT_HI20] = { .reloc_handler = apply_r_riscv_got_hi20_rela }, + [R_RISCV_TLS_GOT_HI20] = { .reloc_handler = tls_not_supported }, + [R_RISCV_TLS_GD_HI20] = { .reloc_handler = tls_not_supported }, + [R_RISCV_PCREL_HI20] = { .reloc_handler = apply_r_riscv_pcrel_hi20_rela }, + [R_RISCV_PCREL_LO12_I] = { .reloc_handler = apply_r_riscv_pcrel_lo12_i_rela }, + [R_RISCV_PCREL_LO12_S] = { .reloc_handler = apply_r_riscv_pcrel_lo12_s_rela }, + [R_RISCV_HI20] = { .reloc_handler = apply_r_riscv_hi20_rela }, + [R_RISCV_LO12_I] = { .reloc_handler = apply_r_riscv_lo12_i_rela }, + [R_RISCV_LO12_S] = { .reloc_handler = apply_r_riscv_lo12_s_rela }, + [R_RISCV_TPREL_HI20] = { .reloc_handler = tls_not_supported }, + [R_RISCV_TPREL_LO12_I] = { .reloc_handler = tls_not_supported }, + [R_RISCV_TPREL_LO12_S] = { .reloc_handler = tls_not_supported }, + [R_RISCV_TPREL_ADD] = { .reloc_handler = tls_not_supported }, + [R_RISCV_ADD8] = { .reloc_handler = apply_r_riscv_add8_rela, + .accumulate_handler = apply_8_bit_accumulation }, + [R_RISCV_ADD16] = { .reloc_handler = apply_r_riscv_add16_rela, + .accumulate_handler = apply_16_bit_accumulation }, + [R_RISCV_ADD32] = { .reloc_handler = apply_r_riscv_add32_rela, + .accumulate_handler = apply_32_bit_accumulation }, + [R_RISCV_ADD64] = { .reloc_handler = apply_r_riscv_add64_rela, + .accumulate_handler = apply_64_bit_accumulation }, + [R_RISCV_SUB8] = { .reloc_handler = apply_r_riscv_sub8_rela, + .accumulate_handler = apply_8_bit_accumulation }, + [R_RISCV_SUB16] = { .reloc_handler = apply_r_riscv_sub16_rela, + .accumulate_handler = apply_16_bit_accumulation }, + [R_RISCV_SUB32] = { .reloc_handler = apply_r_riscv_sub32_rela, + .accumulate_handler = apply_32_bit_accumulation }, + [R_RISCV_SUB64] = { .reloc_handler = apply_r_riscv_sub64_rela, + .accumulate_handler = apply_64_bit_accumulation }, /* 41-42 reserved for future standard use */ - [R_RISCV_ALIGN] = { apply_r_riscv_align_rela }, - [R_RISCV_RVC_BRANCH] = { apply_r_riscv_rvc_branch_rela }, - [R_RISCV_RVC_JUMP] = { apply_r_riscv_rvc_jump_rela }, + [R_RISCV_ALIGN] = { .reloc_handler = apply_r_riscv_align_rela }, + [R_RISCV_RVC_BRANCH] = { .reloc_handler = apply_r_riscv_rvc_branch_rela }, + [R_RISCV_RVC_JUMP] = { .reloc_handler = apply_r_riscv_rvc_jump_rela }, /* 46-50 reserved for future standard use */ - [R_RISCV_RELAX] = { apply_r_riscv_relax_rela }, - [R_RISCV_SUB6] = { apply_r_riscv_sub6_rela, apply_6_bit_accumulation }, - [R_RISCV_SET6] = { apply_r_riscv_set6_rela, apply_6_bit_accumulation }, - [R_RISCV_SET8] = { apply_r_riscv_set8_rela, apply_8_bit_accumulation }, - [R_RISCV_SET16] = { apply_r_riscv_set16_rela, - apply_16_bit_accumulation }, - [R_RISCV_SET32] = { apply_r_riscv_set32_rela, - apply_32_bit_accumulation }, - [R_RISCV_32_PCREL] = { apply_r_riscv_32_pcrel_rela }, - [R_RISCV_IRELATIVE] = { dynamic_linking_not_supported }, - [R_RISCV_PLT32] = { apply_r_riscv_plt32_rela }, - [R_RISCV_SET_ULEB128] = { apply_r_riscv_set_uleb128, - apply_uleb128_accumulation }, - [R_RISCV_SUB_ULEB128] = { apply_r_riscv_sub_uleb128, - apply_uleb128_accumulation }, + [R_RISCV_RELAX] = { .reloc_handler = apply_r_riscv_relax_rela }, + [R_RISCV_SUB6] = { .reloc_handler = apply_r_riscv_sub6_rela, + .accumulate_handler = apply_6_bit_accumulation }, + [R_RISCV_SET6] = { .reloc_handler = apply_r_riscv_set6_rela, + .accumulate_handler = apply_6_bit_accumulation }, + [R_RISCV_SET8] = { .reloc_handler = apply_r_riscv_set8_rela, + .accumulate_handler = apply_8_bit_accumulation }, + [R_RISCV_SET16] = { .reloc_handler = apply_r_riscv_set16_rela, + .accumulate_handler = apply_16_bit_accumulation }, + [R_RISCV_SET32] = { .reloc_handler = apply_r_riscv_set32_rela, + .accumulate_handler = apply_32_bit_accumulation }, + [R_RISCV_32_PCREL] = { .reloc_handler = apply_r_riscv_32_pcrel_rela }, + [R_RISCV_IRELATIVE] = { .reloc_handler = dynamic_linking_not_supported }, + [R_RISCV_PLT32] = { .reloc_handler = apply_r_riscv_plt32_rela }, + [R_RISCV_SET_ULEB128] = { .reloc_handler = apply_r_riscv_set_uleb128, + .accumulate_handler = apply_uleb128_accumulation }, + [R_RISCV_SUB_ULEB128] = { .reloc_handler = apply_r_riscv_sub_uleb128, + .accumulate_handler = apply_uleb128_accumulation }, /* 62-191 reserved for future standard use */ /* 192-255 nonstandard ABI extensions */ }; From d3d2cf1acab1857ae1982d431be9d96dc0e0775c Mon Sep 17 00:00:00 2001 From: Evan Green Date: Mon, 6 Nov 2023 15:24:39 -0800 Subject: [PATCH 193/415] RISC-V: Show accurate per-hart isa in /proc/cpuinfo In /proc/cpuinfo, most of the information we show for each processor is specific to that hart: marchid, mvendorid, mimpid, processor, hart, compatible, and the mmu size. But the ISA string gets filtered through a lowest common denominator mask, so that if one CPU is missing an ISA extension, no CPUs will show it. Now that we track the ISA extensions for each hart, let's report ISA extension info accurately per-hart in /proc/cpuinfo. We cannot change the "isa:" line, as usermode may be relying on that line to show only the common set of extensions supported across all harts. Add a new "hart isa" line instead, which reports the true set of extensions for that hart. Signed-off-by: Evan Green Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20231106232439.3176268-1-evan@rivosinc.com Signed-off-by: Palmer Dabbelt --- Documentation/riscv/uabi.rst | 20 ++++++++++++++++++++ arch/riscv/kernel/cpu.c | 22 ++++++++++++++++++---- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/Documentation/riscv/uabi.rst b/Documentation/riscv/uabi.rst index 8960fac42c40..54d199dce78b 100644 --- a/Documentation/riscv/uabi.rst +++ b/Documentation/riscv/uabi.rst @@ -42,6 +42,26 @@ An example string following the order is:: rv64imadc_zifoo_zigoo_zafoo_sbar_scar_zxmbaz_xqux_xrux +"isa" and "hart isa" lines in /proc/cpuinfo +------------------------------------------- + +The "isa" line in /proc/cpuinfo describes the lowest common denominator of +RISC-V ISA extensions recognized by the kernel and implemented on all harts. The +"hart isa" line, in contrast, describes the set of extensions recognized by the +kernel on the particular hart being described, even if those extensions may not +be present on all harts in the system. + +In both lines, the presence of an extension guarantees only that the hardware +has the described capability. Additional kernel support or policy changes may be +required before an extension's capability is fully usable by userspace programs. +Similarly, for S-mode extensions, presence in one of these lines does not +guarantee that the kernel is taking advantage of the extension, or that the +feature will be visible in guest VMs managed by this kernel. + +Inversely, the absence of an extension in these lines does not necessarily mean +the hardware does not support that feature. The running kernel may not recognize +the extension, or may have deliberately removed it from the listing. + Misaligned accesses ------------------- diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index c17dacb1141c..bcfc0c8dfd01 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -202,9 +202,8 @@ arch_initcall(riscv_cpuinfo_init); #ifdef CONFIG_PROC_FS -static void print_isa(struct seq_file *f) +static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap) { - seq_puts(f, "isa\t\t: "); if (IS_ENABLED(CONFIG_32BIT)) seq_write(f, "rv32", 4); @@ -212,7 +211,7 @@ static void print_isa(struct seq_file *f) seq_write(f, "rv64", 4); for (int i = 0; i < riscv_isa_ext_count; i++) { - if (!__riscv_isa_extension_available(NULL, riscv_isa_ext[i].id)) + if (!__riscv_isa_extension_available(isa_bitmap, riscv_isa_ext[i].id)) continue; /* Only multi-letter extensions are split by underscores */ @@ -276,7 +275,15 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "processor\t: %lu\n", cpu_id); seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id)); - print_isa(m); + + /* + * For historical raisins, the isa: line is limited to the lowest common + * denominator of extensions supported across all harts. A true list of + * extensions supported on this hart is printed later in the hart isa: + * line. + */ + seq_puts(m, "isa\t\t: "); + print_isa(m, NULL); print_mmu(m); if (acpi_disabled) { @@ -292,6 +299,13 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "mvendorid\t: 0x%lx\n", ci->mvendorid); seq_printf(m, "marchid\t\t: 0x%lx\n", ci->marchid); seq_printf(m, "mimpid\t\t: 0x%lx\n", ci->mimpid); + + /* + * Print the ISA extensions specific to this hart, which may show + * additional extensions not present across all harts. + */ + seq_puts(m, "hart isa\t: "); + print_isa(m, hart_isa[cpu_id].isa); seq_puts(m, "\n"); return 0; From 6eb7a6445b76a2fced91ebcf93d7a9073258c8cf Mon Sep 17 00:00:00 2001 From: Evan Green Date: Mon, 6 Nov 2023 15:11:05 -0800 Subject: [PATCH 194/415] RISC-V: Remove __init on unaligned_emulation_finish() This function shouldn't be __init, since it's called during hotplug. The warning says it well enough: WARNING: modpost: vmlinux: section mismatch in reference: check_unaligned_access_all_cpus+0x13a (section: .text) -> unaligned_emulation_finish (section: .init.text) Signed-off-by: Evan Green Fixes: 71c54b3d169d ("riscv: report misaligned accesses emulation to hwprobe") Link: https://lore.kernel.org/r/20231106231105.3141413-1-evan@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps_misaligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index bba301b5194d..5eba37147caa 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -625,7 +625,7 @@ bool check_unaligned_access_emulated(int cpu) return misaligned_emu_detected; } -void __init unaligned_emulation_finish(void) +void unaligned_emulation_finish(void) { int cpu; From 55e0bf49a0d0387d682d696e41cada071f516075 Mon Sep 17 00:00:00 2001 From: Evan Green Date: Mon, 6 Nov 2023 14:58:55 -0800 Subject: [PATCH 195/415] RISC-V: Probe misaligned access speed in parallel Probing for misaligned access speed takes about 0.06 seconds. On a system with 64 cores, doing this in smp_callin() means it's done serially, extending boot time by 3.8 seconds. That's a lot of boot time. Instead of measuring each CPU serially, let's do the measurements on all CPUs in parallel. If we disable preemption on all CPUs, the jiffies stop ticking, so we can do this in stages of 1) everybody except core 0, then 2) core 0. The allocations are all done outside of on_each_cpu() to avoid calling alloc_pages() with interrupts disabled. For hotplugged CPUs that come in after the boot time measurement, register CPU hotplug callbacks, and do the measurement there. Interrupts are enabled in those callbacks, so they're fine to do alloc_pages() in. Reported-by: Jisheng Zhang Closes: https://lore.kernel.org/all/mhng-9359993d-6872-4134-83ce-c97debe1cf9a@palmer-ri-x1c9/T/#mae9b8f40016f9df428829d33360144dc5026bcbf Fixes: 584ea6564bca ("RISC-V: Probe for unaligned access speed") Signed-off-by: Evan Green Link: https://lore.kernel.org/r/20231106225855.3121724-1-evan@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cpufeature.h | 1 - arch/riscv/kernel/cpufeature.c | 106 +++++++++++++++++++++------- arch/riscv/kernel/smpboot.c | 1 - 3 files changed, 82 insertions(+), 26 deletions(-) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 7f1e46a9d445..69f2cae96f0b 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -30,7 +30,6 @@ DECLARE_PER_CPU(long, misaligned_access_speed); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; -void check_unaligned_access(int cpu); void riscv_user_isa_enable(void); #ifdef CONFIG_RISCV_MISALIGNED diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 6a01ded615cd..fe59e18dbd5b 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #define MISALIGNED_ACCESS_JIFFIES_LG2 1 #define MISALIGNED_BUFFER_SIZE 0x4000 +#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) unsigned long elf_hwcap __read_mostly; @@ -557,30 +559,21 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } -void check_unaligned_access(int cpu) +static int check_unaligned_access(void *param) { + int cpu = smp_processor_id(); u64 start_cycles, end_cycles; u64 word_cycles; u64 byte_cycles; int ratio; unsigned long start_jiffies, now; - struct page *page; + struct page *page = param; void *dst; void *src; long speed = RISCV_HWPROBE_MISALIGNED_SLOW; if (check_unaligned_access_emulated(cpu)) - return; - - /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) - return; - - page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE)); - if (!page) { - pr_warn("Can't alloc pages to measure memcpy performance"); - return; - } + return 0; /* Make an unaligned destination buffer. */ dst = (void *)((unsigned long)page_address(page) | 0x1); @@ -634,7 +627,7 @@ void check_unaligned_access(int cpu) pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", cpu); - goto out; + return 0; } if (word_cycles < byte_cycles) @@ -648,19 +641,84 @@ void check_unaligned_access(int cpu) (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); per_cpu(misaligned_access_speed, cpu) = speed; - -out: - __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE)); -} - -static int __init check_unaligned_access_boot_cpu(void) -{ - check_unaligned_access(0); - unaligned_emulation_finish(); return 0; } -arch_initcall(check_unaligned_access_boot_cpu); +static void check_unaligned_access_nonboot_cpu(void *param) +{ + unsigned int cpu = smp_processor_id(); + struct page **pages = param; + + if (smp_processor_id() != 0) + check_unaligned_access(pages[cpu]); +} + +static int riscv_online_cpu(unsigned int cpu) +{ + static struct page *buf; + + /* We are already set since the last check */ + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + return 0; + + buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!buf) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return -ENOMEM; + } + + check_unaligned_access(buf); + __free_pages(buf, MISALIGNED_BUFFER_ORDER); + return 0; +} + +/* Measure unaligned access on all CPUs present at boot in parallel. */ +static int check_unaligned_access_all_cpus(void) +{ + unsigned int cpu; + unsigned int cpu_count = num_possible_cpus(); + struct page **bufs = kzalloc(cpu_count * sizeof(struct page *), + GFP_KERNEL); + + if (!bufs) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return 0; + } + + /* + * Allocate separate buffers for each CPU so there's no fighting over + * cache lines. + */ + for_each_cpu(cpu, cpu_online_mask) { + bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!bufs[cpu]) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + goto out; + } + } + + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); + + /* Setup hotplug callback for any new CPUs that come online. */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu, NULL); + +out: + unaligned_emulation_finish(); + for_each_cpu(cpu, cpu_online_mask) { + if (bufs[cpu]) + __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); + } + + kfree(bufs); + return 0; +} + +arch_initcall(check_unaligned_access_all_cpus); void riscv_user_isa_enable(void) { diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index d69c628c24f4..d162bf339beb 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -247,7 +247,6 @@ asmlinkage __visible void smp_callin(void) riscv_ipi_enable(); numa_add_cpu(curr_cpuid); - check_unaligned_access(curr_cpuid); set_cpu_online(curr_cpuid, 1); if (has_vector()) { From fd3a6837d8e18cb7be80dcca1283276290336a7a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 6 Nov 2023 13:00:49 +0900 Subject: [PATCH 196/415] ata: libata-core: Fix ata_pci_shutdown_one() This reverts commit 5b6fba546da246b3d0dd8465c07783e22629cc53. Commit 5b6fba546da2 ("ata: libata-core: Detach a port devices on shutdown") modified the function ata_pci_shutdown_one() to stop (suspend) devices attached to the ports of a PCI AHCI adapter to ensure that drives are spun down before shutting down a system. However, this is done only for PCI adapters and not for other types of adapters. This limitation was addressed with commit 24eca2dce0f8 ("scsi: sd: Introduce manage_shutdown device flag"). With this, all ATA disks are spun down on system shutdown, which make the changes introduced with 5b6fba546da2 useless. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel --- drivers/ata/libata-core.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6fb4e8dc8c3c..09ed67772fae 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6180,24 +6180,10 @@ EXPORT_SYMBOL_GPL(ata_pci_remove_one); void ata_pci_shutdown_one(struct pci_dev *pdev) { struct ata_host *host = pci_get_drvdata(pdev); - struct ata_port *ap; - unsigned long flags; int i; - /* Tell EH to disable all devices */ for (i = 0; i < host->n_ports; i++) { - ap = host->ports[i]; - spin_lock_irqsave(ap->lock, flags); - ap->pflags |= ATA_PFLAG_UNLOADING; - ata_port_schedule_eh(ap); - spin_unlock_irqrestore(ap->lock, flags); - } - - for (i = 0; i < host->n_ports; i++) { - ap = host->ports[i]; - - /* Wait for EH to complete before freezing the port */ - ata_port_wait_eh(ap); + struct ata_port *ap = host->ports[i]; ap->pflags |= ATA_PFLAG_FROZEN; From 36f10a914a7b7169b5e399d1671ba2169c0801f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 5 Nov 2023 16:00:36 +0100 Subject: [PATCH 197/415] ata: pata_falcon: Stop using module_platform_driver_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On today's platforms the benefit of platform_driver_probe() isn't that relevant any more. It allows to drop some code after booting (or module loading) for .probe() and discard the .remove() function completely if the driver is built-in. This typically saves a few 100k. The downside of platform_driver_probe() is that the driver cannot be bound and unbound at runtime which is ancient and so slightly complicates testing. There are also thoughts to deprecate platform_driver_probe() because it adds some complexity in the driver core for little gain. Also many drivers don't use it correctly. This driver for example misses to mark the driver struct with __ref which is needed to suppress a (W=1) modpost warning. Signed-off-by: Uwe Kleine-König Reviewed-by: Sergey Shtylyov Reviewed-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/pata_falcon.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/ata/pata_falcon.c b/drivers/ata/pata_falcon.c index 0c2ae430f5aa..9dfc2df47cf8 100644 --- a/drivers/ata/pata_falcon.c +++ b/drivers/ata/pata_falcon.c @@ -121,7 +121,7 @@ static struct ata_port_operations pata_falcon_ops = { .set_mode = pata_falcon_set_mode, }; -static int __init pata_falcon_init_one(struct platform_device *pdev) +static int pata_falcon_init_one(struct platform_device *pdev) { struct resource *base_mem_res, *ctl_mem_res; struct resource *base_res, *ctl_res, *irq_res; @@ -216,7 +216,7 @@ static int __init pata_falcon_init_one(struct platform_device *pdev) IRQF_SHARED, &pata_falcon_sht); } -static int __exit pata_falcon_remove_one(struct platform_device *pdev) +static int pata_falcon_remove_one(struct platform_device *pdev) { struct ata_host *host = platform_get_drvdata(pdev); @@ -226,13 +226,14 @@ static int __exit pata_falcon_remove_one(struct platform_device *pdev) } static struct platform_driver pata_falcon_driver = { - .remove = __exit_p(pata_falcon_remove_one), + .probe = pata_falcon_init_one, + .remove = pata_falcon_remove_one, .driver = { .name = "atari-falcon-ide", }, }; -module_platform_driver_probe(pata_falcon_driver, pata_falcon_init_one); +module_platform_driver(pata_falcon_driver); MODULE_AUTHOR("Bartlomiej Zolnierkiewicz"); MODULE_DESCRIPTION("low-level driver for Atari Falcon PATA"); From 0b2771dd52570698c1b92e428f8bf2a224e7a2c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 5 Nov 2023 16:00:37 +0100 Subject: [PATCH 198/415] ata: pata_gayle: Stop using module_platform_driver_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On today's platforms the benefit of platform_driver_probe() isn't that relevant any more. It allows to drop some code after booting (or module loading) for .probe() and discard the .remove() function completely if the driver is built-in. This typically saves a few 100k. The downside of platform_driver_probe() is that the driver cannot be bound and unbound at runtime which is ancient and so slightly complicates testing. There are also thoughts to deprecate platform_driver_probe() because it adds some complexity in the driver core for little gain. Also many drivers don't use it correctly. This driver for example misses to mark the driver struct with __ref which is needed to suppress a (W=1) modpost warning. Signed-off-by: Uwe Kleine-König Reviewed-by: Sergey Shtylyov Reviewed-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/pata_gayle.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/ata/pata_gayle.c b/drivers/ata/pata_gayle.c index 3bdbe2b65a2b..e10411b02047 100644 --- a/drivers/ata/pata_gayle.c +++ b/drivers/ata/pata_gayle.c @@ -124,7 +124,7 @@ static struct ata_port_operations pata_gayle_a4000_ops = { .set_mode = pata_gayle_set_mode, }; -static int __init pata_gayle_init_one(struct platform_device *pdev) +static int pata_gayle_init_one(struct platform_device *pdev) { struct resource *res; struct gayle_ide_platform_data *pdata; @@ -193,7 +193,7 @@ static int __init pata_gayle_init_one(struct platform_device *pdev) return 0; } -static int __exit pata_gayle_remove_one(struct platform_device *pdev) +static int pata_gayle_remove_one(struct platform_device *pdev) { struct ata_host *host = platform_get_drvdata(pdev); @@ -203,13 +203,14 @@ static int __exit pata_gayle_remove_one(struct platform_device *pdev) } static struct platform_driver pata_gayle_driver = { - .remove = __exit_p(pata_gayle_remove_one), + .probe = pata_gayle_init_one, + .remove = pata_gayle_remove_one, .driver = { .name = "amiga-gayle-ide", }, }; -module_platform_driver_probe(pata_gayle_driver, pata_gayle_init_one); +module_platform_driver(pata_gayle_driver); MODULE_AUTHOR("Bartlomiej Zolnierkiewicz"); MODULE_DESCRIPTION("low-level driver for Amiga Gayle PATA"); From 47d4708dfa54ad0e84e47bc681c133c2146b0f56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 5 Nov 2023 16:00:38 +0100 Subject: [PATCH 199/415] ata: pata_falcon: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Reviewed-by: Sergey Shtylyov Reviewed-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/pata_falcon.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/ata/pata_falcon.c b/drivers/ata/pata_falcon.c index 9dfc2df47cf8..18ceefd176df 100644 --- a/drivers/ata/pata_falcon.c +++ b/drivers/ata/pata_falcon.c @@ -216,18 +216,16 @@ static int pata_falcon_init_one(struct platform_device *pdev) IRQF_SHARED, &pata_falcon_sht); } -static int pata_falcon_remove_one(struct platform_device *pdev) +static void pata_falcon_remove_one(struct platform_device *pdev) { struct ata_host *host = platform_get_drvdata(pdev); ata_host_detach(host); - - return 0; } static struct platform_driver pata_falcon_driver = { .probe = pata_falcon_init_one, - .remove = pata_falcon_remove_one, + .remove_new = pata_falcon_remove_one, .driver = { .name = "atari-falcon-ide", }, From 99bce5182d8fe90e5c57e9d99f831baaa94f90cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 5 Nov 2023 16:00:39 +0100 Subject: [PATCH 200/415] ata: pata_gayle: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Reviewed-by: Sergey Shtylyov Reviewed-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/pata_gayle.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/ata/pata_gayle.c b/drivers/ata/pata_gayle.c index e10411b02047..94df60ac2307 100644 --- a/drivers/ata/pata_gayle.c +++ b/drivers/ata/pata_gayle.c @@ -193,18 +193,16 @@ static int pata_gayle_init_one(struct platform_device *pdev) return 0; } -static int pata_gayle_remove_one(struct platform_device *pdev) +static void pata_gayle_remove_one(struct platform_device *pdev) { struct ata_host *host = platform_get_drvdata(pdev); ata_host_detach(host); - - return 0; } static struct platform_driver pata_gayle_driver = { .probe = pata_gayle_init_one, - .remove = pata_gayle_remove_one, + .remove_new = pata_gayle_remove_one, .driver = { .name = "amiga-gayle-ide", }, From eebff19acaa35820cb09ce2ccb3d21bee2156ffb Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 5 Nov 2023 12:46:24 +0900 Subject: [PATCH 201/415] ksmbd: fix slab out of bounds write in smb_inherit_dacl() slab out-of-bounds write is caused by that offsets is bigger than pntsd allocation size. This patch add the check to validate 3 offsets using allocation size. Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-22271 Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 6c0305be895e..51b8bfab7481 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1107,6 +1107,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, struct smb_acl *pdacl; struct smb_sid *powner_sid = NULL, *pgroup_sid = NULL; int powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size; + int pntsd_alloc_size; if (parent_pntsd->osidoffset) { powner_sid = (struct smb_sid *)((char *)parent_pntsd + @@ -1119,9 +1120,10 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, pgroup_sid_size = 1 + 1 + 6 + (pgroup_sid->num_subauth * 4); } - pntsd = kzalloc(sizeof(struct smb_ntsd) + powner_sid_size + - pgroup_sid_size + sizeof(struct smb_acl) + - nt_size, GFP_KERNEL); + pntsd_alloc_size = sizeof(struct smb_ntsd) + powner_sid_size + + pgroup_sid_size + sizeof(struct smb_acl) + nt_size; + + pntsd = kzalloc(pntsd_alloc_size, GFP_KERNEL); if (!pntsd) { rc = -ENOMEM; goto free_aces_base; @@ -1136,6 +1138,27 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, pntsd->gsidoffset = parent_pntsd->gsidoffset; pntsd->dacloffset = parent_pntsd->dacloffset; + if ((u64)le32_to_cpu(pntsd->osidoffset) + powner_sid_size > + pntsd_alloc_size) { + rc = -EINVAL; + kfree(pntsd); + goto free_aces_base; + } + + if ((u64)le32_to_cpu(pntsd->gsidoffset) + pgroup_sid_size > + pntsd_alloc_size) { + rc = -EINVAL; + kfree(pntsd); + goto free_aces_base; + } + + if ((u64)le32_to_cpu(pntsd->dacloffset) + sizeof(struct smb_acl) + nt_size > + pntsd_alloc_size) { + rc = -EINVAL; + kfree(pntsd); + goto free_aces_base; + } + if (pntsd->osidoffset) { struct smb_sid *owner_sid = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->osidoffset)); From f6049712e520287ad695e9d4f1572ab76807fa0c Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 5 Nov 2023 12:47:53 +0900 Subject: [PATCH 202/415] ksmbd: fix kernel-doc comment of ksmbd_vfs_kern_path_locked() Fix argument list that the kdoc format and script verified in ksmbd_vfs_kern_path_locked(). fs/smb/server/vfs.c:1207: warning: Function parameter or member 'parent_path' not described in 'ksmbd_vfs_kern_path_locked' Reported-by: kernel test robot Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 1053127f71ad..c53dea5598fc 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -1177,9 +1177,10 @@ static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, /** * ksmbd_vfs_kern_path_locked() - lookup a file and get path info - * @name: file path that is relative to share - * @flags: lookup flags - * @path: if lookup succeed, return path info + * @name: file path that is relative to share + * @flags: lookup flags + * @parent_path: if lookup succeed, return parent_path info + * @path: if lookup succeed, return path info * @caseless: caseless filename lookup * * Return: 0 on success, otherwise error From 5a5409d90bd05f87fe5623a749ccfbf3f7c7d400 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 7 Nov 2023 21:04:31 +0900 Subject: [PATCH 203/415] ksmbd: handle malformed smb1 message If set_smb1_rsp_status() is not implemented, It will cause NULL pointer dereferece error when client send malformed smb1 message. This patch add set_smb1_rsp_status() to ignore malformed smb1 message. Cc: stable@vger.kernel.org Reported-by: Robert Morris Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb_common.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index e6ba1e9b8589..6691ae68af0c 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -366,11 +366,22 @@ static int smb1_allocate_rsp_buf(struct ksmbd_work *work) return 0; } +/** + * set_smb1_rsp_status() - set error type in smb response header + * @work: smb work containing smb response header + * @err: error code to set in response + */ +static void set_smb1_rsp_status(struct ksmbd_work *work, __le32 err) +{ + work->send_no_response = 1; +} + static struct smb_version_ops smb1_server_ops = { .get_cmd_val = get_smb1_cmd_val, .init_rsp_hdr = init_smb1_rsp_hdr, .allocate_rsp_buf = smb1_allocate_rsp_buf, .check_user_session = smb1_check_user_session, + .set_rsp_status = set_smb1_rsp_status, }; static int smb1_negotiate(struct ksmbd_work *work) From 80c7889de7a8246e44a9632a2b7d15b41ab3fe41 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 8 Nov 2023 14:12:01 +0800 Subject: [PATCH 204/415] LoongArch: Support PREEMPT_DYNAMIC with static keys Since commit 4e90d0522a688371402c ("riscv: support PREEMPT_DYNAMIC with static keys"), the infrastructure is complete and we can simply select HAVE_PREEMPT_DYNAMIC_KEY to enable PREEMPT_DYNAMIC on LoongArch because we already support static keys. Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e14396a2ddcb..86fdc8c55608 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -135,6 +135,7 @@ config LOONGARCH select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RETHOOK select HAVE_RSEQ From 21eb2bfe2748b238f06983e4308cb30611371605 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Wed, 8 Nov 2023 14:12:07 +0800 Subject: [PATCH 205/415] LoongArch: Disable module from accessing external data directly The distance between vmlinux and the module is too far so that PC-REL cannot be accessed directly, only GOT. When compiling module with GCC, the option `-mdirect-extern-access` is disabled by default. The Clang option `-fdirect-access-external-data` is enabled by default, so it needs to be explicitly disabled. Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index fb0fada43197..c31a62521e6b 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -68,6 +68,8 @@ LDFLAGS_vmlinux += -static -n -nostdlib ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS cflags-y += $(call cc-option,-mexplicit-relocs) KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) +KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) +KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) else From 71945968d8b128c955204baa33ec03bdd91bdc26 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 8 Nov 2023 14:12:15 +0800 Subject: [PATCH 206/415] LoongArch: Mark __percpu functions as always inline A recent change to the optimization pipeline in LLVM reveals some fragility around the inlining of LoongArch's __percpu functions, which manifests as a BUILD_BUG() failure: In file included from kernel/sched/build_policy.c:17: In file included from include/linux/sched/cputime.h:5: In file included from include/linux/sched/signal.h:5: In file included from include/linux/rculist.h:11: In file included from include/linux/rcupdate.h:26: In file included from include/linux/irqflags.h:18: arch/loongarch/include/asm/percpu.h:97:3: error: call to '__compiletime_assert_51' declared with 'error' attribute: BUILD_BUG failed 97 | BUILD_BUG(); | ^ include/linux/build_bug.h:59:21: note: expanded from macro 'BUILD_BUG' 59 | #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") | ^ include/linux/build_bug.h:39:37: note: expanded from macro 'BUILD_BUG_ON_MSG' 39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) | ^ include/linux/compiler_types.h:425:2: note: expanded from macro 'compiletime_assert' 425 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^ include/linux/compiler_types.h:413:2: note: expanded from macro '_compiletime_assert' 413 | __compiletime_assert(condition, msg, prefix, suffix) | ^ include/linux/compiler_types.h:406:4: note: expanded from macro '__compiletime_assert' 406 | prefix ## suffix(); \ | ^ :86:1: note: expanded from here 86 | __compiletime_assert_51 | ^ 1 error generated. If these functions are not inlined (which the compiler is free to do even with functions marked with the standard 'inline' keyword), the BUILD_BUG() in the default case cannot be eliminated since the compiler cannot prove it is never used, resulting in a build failure due to the error attribute. Mark these functions as __always_inline to guarantee inlining so that the BUILD_BUG() only triggers when the default case genuinely cannot be eliminated due to an unexpected size. Cc: Closes: https://github.com/ClangBuiltLinux/linux/issues/1955 Fixes: 46859ac8af52 ("LoongArch: Add multi-processor (SMP) support") Link: https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/percpu.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index b9f567e66016..ed5da02b1cf6 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -32,7 +32,7 @@ static inline void set_my_cpu_offset(unsigned long off) #define __my_cpu_offset __my_cpu_offset #define PERCPU_OP(op, asm_op, c_op) \ -static inline unsigned long __percpu_##op(void *ptr, \ +static __always_inline unsigned long __percpu_##op(void *ptr, \ unsigned long val, int size) \ { \ unsigned long ret; \ @@ -63,7 +63,7 @@ PERCPU_OP(and, and, &) PERCPU_OP(or, or, |) #undef PERCPU_OP -static inline unsigned long __percpu_read(void *ptr, int size) +static __always_inline unsigned long __percpu_read(void *ptr, int size) { unsigned long ret; @@ -100,7 +100,7 @@ static inline unsigned long __percpu_read(void *ptr, int size) return ret; } -static inline void __percpu_write(void *ptr, unsigned long val, int size) +static __always_inline void __percpu_write(void *ptr, unsigned long val, int size) { switch (size) { case 1: @@ -132,8 +132,8 @@ static inline void __percpu_write(void *ptr, unsigned long val, int size) } } -static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, - int size) +static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, + int size) { switch (size) { case 1: From affef66b65889a0ea0060e13e5f7fe569897d787 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Wed, 8 Nov 2023 14:12:15 +0800 Subject: [PATCH 207/415] LoongArch: Relax memory ordering for atomic operations This patch relaxes the implementation while satisfying the memory ordering requirements for atomic operations, which will help improve performance on LA664+. Unixbench with full threads (8) before after Dhrystone 2 using register variables 203910714.2 203909539.8 0.00% Double-Precision Whetstone 37930.9 37931 0.00% Execl Throughput 29431.5 29545.8 0.39% File Copy 1024 bufsize 2000 maxblocks 6645759.5 6676320 0.46% File Copy 256 bufsize 500 maxblocks 2138772.4 2144182.4 0.25% File Copy 4096 bufsize 8000 maxblocks 11640698.4 11602703 -0.33% Pipe Throughput 8849077.7 8917009.4 0.77% Pipe-based Context Switching 1255108.5 1287277.3 2.56% Process Creation 50825.9 50442.1 -0.76% Shell Scripts (1 concurrent) 25795.8 25942.3 0.57% Shell Scripts (8 concurrent) 3812.6 3835.2 0.59% System Call Overhead 9248212.6 9353348.6 1.14% ======= System Benchmarks Index Score 8076.6 8114.4 0.47% Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/atomic.h | 88 ++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 20 deletions(-) diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index e27f0c72d324..99af8b3160a8 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -36,19 +36,19 @@ static inline void arch_atomic_##op(int i, atomic_t *v) \ { \ __asm__ __volatile__( \ - "am"#asm_op"_db.w" " $zero, %1, %0 \n" \ + "am"#asm_op".w" " $zero, %1, %0 \n" \ : "+ZB" (v->counter) \ : "r" (I) \ : "memory"); \ } -#define ATOMIC_OP_RETURN(op, I, asm_op, c_op) \ -static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ +#define ATOMIC_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \ +static inline int arch_atomic_##op##_return##suffix(int i, atomic_t *v) \ { \ int result; \ \ __asm__ __volatile__( \ - "am"#asm_op"_db.w" " %1, %2, %0 \n" \ + "am"#asm_op#mb".w" " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -56,13 +56,13 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ return result c_op I; \ } -#define ATOMIC_FETCH_OP(op, I, asm_op) \ -static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +#define ATOMIC_FETCH_OP(op, I, asm_op, mb, suffix) \ +static inline int arch_atomic_fetch_##op##suffix(int i, atomic_t *v) \ { \ int result; \ \ __asm__ __volatile__( \ - "am"#asm_op"_db.w" " %1, %2, %0 \n" \ + "am"#asm_op#mb".w" " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -72,29 +72,53 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ #define ATOMIC_OPS(op, I, asm_op, c_op) \ ATOMIC_OP(op, I, asm_op) \ - ATOMIC_OP_RETURN(op, I, asm_op, c_op) \ - ATOMIC_FETCH_OP(op, I, asm_op) + ATOMIC_OP_RETURN(op, I, asm_op, c_op, _db, ) \ + ATOMIC_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \ + ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC_OPS(add, i, add, +) ATOMIC_OPS(sub, -i, add, +) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_add_return_acquire arch_atomic_add_return +#define arch_atomic_add_return_release arch_atomic_add_return #define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_sub_return_acquire arch_atomic_sub_return +#define arch_atomic_sub_return_release arch_atomic_sub_return #define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add +#define arch_atomic_fetch_add_release arch_atomic_fetch_add #define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed +#define arch_atomic_fetch_sub arch_atomic_fetch_sub +#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub +#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub #define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed #undef ATOMIC_OPS #define ATOMIC_OPS(op, I, asm_op) \ ATOMIC_OP(op, I, asm_op) \ - ATOMIC_FETCH_OP(op, I, asm_op) + ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC_OPS(and, i, and) ATOMIC_OPS(or, i, or) ATOMIC_OPS(xor, i, xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and +#define arch_atomic_fetch_and_release arch_atomic_fetch_and #define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or +#define arch_atomic_fetch_or_release arch_atomic_fetch_or #define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed +#define arch_atomic_fetch_xor arch_atomic_fetch_xor +#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor +#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor #define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed #undef ATOMIC_OPS @@ -172,18 +196,18 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v) static inline void arch_atomic64_##op(long i, atomic64_t *v) \ { \ __asm__ __volatile__( \ - "am"#asm_op"_db.d " " $zero, %1, %0 \n" \ + "am"#asm_op".d " " $zero, %1, %0 \n" \ : "+ZB" (v->counter) \ : "r" (I) \ : "memory"); \ } -#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \ -static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v) \ +#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \ +static inline long arch_atomic64_##op##_return##suffix(long i, atomic64_t *v) \ { \ long result; \ __asm__ __volatile__( \ - "am"#asm_op"_db.d " " %1, %2, %0 \n" \ + "am"#asm_op#mb".d " " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -191,13 +215,13 @@ static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v) \ return result c_op I; \ } -#define ATOMIC64_FETCH_OP(op, I, asm_op) \ -static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \ +#define ATOMIC64_FETCH_OP(op, I, asm_op, mb, suffix) \ +static inline long arch_atomic64_fetch_##op##suffix(long i, atomic64_t *v) \ { \ long result; \ \ __asm__ __volatile__( \ - "am"#asm_op"_db.d " " %1, %2, %0 \n" \ + "am"#asm_op#mb".d " " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -207,29 +231,53 @@ static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \ #define ATOMIC64_OPS(op, I, asm_op, c_op) \ ATOMIC64_OP(op, I, asm_op) \ - ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \ - ATOMIC64_FETCH_OP(op, I, asm_op) + ATOMIC64_OP_RETURN(op, I, asm_op, c_op, _db, ) \ + ATOMIC64_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \ + ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC64_OPS(add, i, add, +) ATOMIC64_OPS(sub, -i, add, +) +#define arch_atomic64_add_return arch_atomic64_add_return +#define arch_atomic64_add_return_acquire arch_atomic64_add_return +#define arch_atomic64_add_return_release arch_atomic64_add_return #define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed +#define arch_atomic64_sub_return arch_atomic64_sub_return +#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return +#define arch_atomic64_sub_return_release arch_atomic64_sub_return #define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed +#define arch_atomic64_fetch_add arch_atomic64_fetch_add +#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add +#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add #define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed +#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub +#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub +#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub #define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed #undef ATOMIC64_OPS #define ATOMIC64_OPS(op, I, asm_op) \ ATOMIC64_OP(op, I, asm_op) \ - ATOMIC64_FETCH_OP(op, I, asm_op) + ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC64_OPS(and, i, and) ATOMIC64_OPS(or, i, or) ATOMIC64_OPS(xor, i, xor) +#define arch_atomic64_fetch_and arch_atomic64_fetch_and +#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and +#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and #define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed +#define arch_atomic64_fetch_or arch_atomic64_fetch_or +#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or +#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or #define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor +#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor +#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor #define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed #undef ATOMIC64_OPS From a2ccf46333d7b2cf9658f0d82ac74097c1542fae Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 8 Nov 2023 14:12:15 +0800 Subject: [PATCH 208/415] LoongArch/smp: Call rcutree_report_cpu_starting() earlier rcutree_report_cpu_starting() must be called before cpu_probe() to avoid the following lockdep splat that triggered by calling __alloc_pages() when CONFIG_PROVE_RCU_LIST=y: ============================= WARNING: suspicious RCU usage 6.6.0+ #980 Not tainted ----------------------------- kernel/locking/lockdep.c:3761 RCU-list traversed in non-reader section!! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 1 lock held by swapper/1/0: #0: 900000000c82ef98 (&pcp->lock){+.+.}-{2:2}, at: get_page_from_freelist+0x894/0x1790 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.0+ #980 Stack : 0000000000000001 9000000004f79508 9000000004893670 9000000100310000 90000001003137d0 0000000000000000 90000001003137d8 9000000004f79508 0000000000000000 0000000000000001 0000000000000000 90000000048a3384 203a656d616e2065 ca43677b3687e616 90000001002c3480 0000000000000008 000000000000009d 0000000000000000 0000000000000001 80000000ffffe0b8 000000000000000d 0000000000000033 0000000007ec0000 13bbf50562dad831 9000000005140748 0000000000000000 9000000004f79508 0000000000000004 0000000000000000 9000000005140748 90000001002bad40 0000000000000000 90000001002ba400 0000000000000000 9000000003573ec8 0000000000000000 00000000000000b0 0000000000000004 0000000000000000 0000000000070000 ... Call Trace: [<9000000003573ec8>] show_stack+0x38/0x150 [<9000000004893670>] dump_stack_lvl+0x74/0xa8 [<900000000360d2bc>] lockdep_rcu_suspicious+0x14c/0x190 [<900000000361235c>] __lock_acquire+0xd0c/0x2740 [<90000000036146f4>] lock_acquire+0x104/0x2c0 [<90000000048a955c>] _raw_spin_lock_irqsave+0x5c/0x90 [<900000000381cd5c>] rmqueue_bulk+0x6c/0x950 [<900000000381fc0c>] get_page_from_freelist+0xd4c/0x1790 [<9000000003821c6c>] __alloc_pages+0x1bc/0x3e0 [<9000000003583b40>] tlb_init+0x150/0x2a0 [<90000000035742a0>] per_cpu_trap_init+0xf0/0x110 [<90000000035712fc>] cpu_probe+0x3dc/0x7a0 [<900000000357ed20>] start_secondary+0x40/0xb0 [<9000000004897138>] smpboot_entry+0x54/0x58 raw_smp_processor_id() is required in order to avoid calling into lockdep before RCU has declared the CPU to be watched for readers. See also commit 29368e093921 ("x86/smpboot: Move rcu_cpu_starting() earlier"), commit de5d9dae150c ("s390/smp: move rcu_cpu_starting() earlier") and commit 99f070b62322 ("powerpc/smp: Call rcu_cpu_starting() earlier"). Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index ef35c871244f..5bca12d16e06 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -504,8 +504,9 @@ asmlinkage void start_secondary(void) unsigned int cpu; sync_counter(); - cpu = smp_processor_id(); + cpu = raw_smp_processor_id(); set_my_cpu_offset(per_cpu_offset(cpu)); + rcutree_report_cpu_starting(cpu); cpu_probe(); constant_clockevent_init(); From add28024405ed600afaa02749989d4fd119f9057 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Wed, 8 Nov 2023 14:12:15 +0800 Subject: [PATCH 209/415] LoongArch: Add more instruction opcodes and emit_* helpers This patch adds more instruction opcodes and their corresponding emit_* helpers which will be used in later patches. Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/inst.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 71e1ed4165c8..5350ae9ee380 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -65,6 +65,8 @@ enum reg2_op { revbd_op = 0x0f, revh2w_op = 0x10, revhd_op = 0x11, + extwh_op = 0x16, + extwb_op = 0x17, }; enum reg2i5_op { @@ -556,6 +558,8 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \ DEF_EMIT_REG2_FORMAT(revb2h, revb2h_op) DEF_EMIT_REG2_FORMAT(revb2w, revb2w_op) DEF_EMIT_REG2_FORMAT(revbd, revbd_op) +DEF_EMIT_REG2_FORMAT(extwh, extwh_op) +DEF_EMIT_REG2_FORMAT(extwb, extwb_op) #define DEF_EMIT_REG2I5_FORMAT(NAME, OP) \ static inline void emit_##NAME(union loongarch_instruction *insn, \ @@ -607,6 +611,9 @@ DEF_EMIT_REG2I12_FORMAT(lu52id, lu52id_op) DEF_EMIT_REG2I12_FORMAT(andi, andi_op) DEF_EMIT_REG2I12_FORMAT(ori, ori_op) DEF_EMIT_REG2I12_FORMAT(xori, xori_op) +DEF_EMIT_REG2I12_FORMAT(ldb, ldb_op) +DEF_EMIT_REG2I12_FORMAT(ldh, ldh_op) +DEF_EMIT_REG2I12_FORMAT(ldw, ldw_op) DEF_EMIT_REG2I12_FORMAT(ldbu, ldbu_op) DEF_EMIT_REG2I12_FORMAT(ldhu, ldhu_op) DEF_EMIT_REG2I12_FORMAT(ldwu, ldwu_op) @@ -685,9 +692,12 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \ insn->reg3_format.rk = rk; \ } +DEF_EMIT_REG3_FORMAT(addw, addw_op) DEF_EMIT_REG3_FORMAT(addd, addd_op) DEF_EMIT_REG3_FORMAT(subd, subd_op) DEF_EMIT_REG3_FORMAT(muld, muld_op) +DEF_EMIT_REG3_FORMAT(divd, divd_op) +DEF_EMIT_REG3_FORMAT(modd, modd_op) DEF_EMIT_REG3_FORMAT(divdu, divdu_op) DEF_EMIT_REG3_FORMAT(moddu, moddu_op) DEF_EMIT_REG3_FORMAT(and, and_op) @@ -699,6 +709,9 @@ DEF_EMIT_REG3_FORMAT(srlw, srlw_op) DEF_EMIT_REG3_FORMAT(srld, srld_op) DEF_EMIT_REG3_FORMAT(sraw, sraw_op) DEF_EMIT_REG3_FORMAT(srad, srad_op) +DEF_EMIT_REG3_FORMAT(ldxb, ldxb_op) +DEF_EMIT_REG3_FORMAT(ldxh, ldxh_op) +DEF_EMIT_REG3_FORMAT(ldxw, ldxw_op) DEF_EMIT_REG3_FORMAT(ldxbu, ldxbu_op) DEF_EMIT_REG3_FORMAT(ldxhu, ldxhu_op) DEF_EMIT_REG3_FORMAT(ldxwu, ldxwu_op) From 7111afe8fb5f15e11b8eff90d7aed1c58e3b1167 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Wed, 8 Nov 2023 14:12:16 +0800 Subject: [PATCH 210/415] LoongArch: BPF: Support sign-extension load instructions Add support for sign-extension load instructions. Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 49 ++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index db9342b2d0e6..0c2bbca527ef 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -411,7 +411,11 @@ static int add_exception_handler(const struct bpf_insn *insn, off_t offset; struct exception_table_entry *ex; - if (!ctx->image || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) + if (!ctx->image || !ctx->prog->aux->extable) + return 0; + + if (BPF_MODE(insn->code) != BPF_PROBE_MEM && + BPF_MODE(insn->code) != BPF_PROBE_MEMSX) return 0; if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries)) @@ -450,7 +454,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext { u8 tm = -1; u64 func_addr; - bool func_addr_fixed; + bool func_addr_fixed, sign_extend; int i = insn - ctx->prog->insnsi; int ret, jmp_offset; const u8 code = insn->code; @@ -879,31 +883,56 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_B: + /* dst_reg = (s64)*(signed size *)(src_reg + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: + sign_extend = BPF_MODE(insn->code) == BPF_MEMSX || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX; switch (BPF_SIZE(code)) { case BPF_B: if (is_signed_imm12(off)) { - emit_insn(ctx, ldbu, dst, src, off); + if (sign_extend) + emit_insn(ctx, ldb, dst, src, off); + else + emit_insn(ctx, ldbu, dst, src, off); } else { move_imm(ctx, t1, off, is32); - emit_insn(ctx, ldxbu, dst, src, t1); + if (sign_extend) + emit_insn(ctx, ldxb, dst, src, t1); + else + emit_insn(ctx, ldxbu, dst, src, t1); } break; case BPF_H: if (is_signed_imm12(off)) { - emit_insn(ctx, ldhu, dst, src, off); + if (sign_extend) + emit_insn(ctx, ldh, dst, src, off); + else + emit_insn(ctx, ldhu, dst, src, off); } else { move_imm(ctx, t1, off, is32); - emit_insn(ctx, ldxhu, dst, src, t1); + if (sign_extend) + emit_insn(ctx, ldxh, dst, src, t1); + else + emit_insn(ctx, ldxhu, dst, src, t1); } break; case BPF_W: if (is_signed_imm12(off)) { - emit_insn(ctx, ldwu, dst, src, off); - } else if (is_signed_imm14(off)) { - emit_insn(ctx, ldptrw, dst, src, off); + if (sign_extend) + emit_insn(ctx, ldw, dst, src, off); + else + emit_insn(ctx, ldwu, dst, src, off); } else { move_imm(ctx, t1, off, is32); - emit_insn(ctx, ldxwu, dst, src, t1); + if (sign_extend) + emit_insn(ctx, ldxw, dst, src, t1); + else + emit_insn(ctx, ldxwu, dst, src, t1); } break; case BPF_DW: From f48012f161508c743e1b39c3521a2b285d19c6aa Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Wed, 8 Nov 2023 14:12:16 +0800 Subject: [PATCH 211/415] LoongArch: BPF: Support sign-extension mov instructions Add support for sign-extension mov instructions. Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 0c2bbca527ef..ac9edf02675c 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -472,8 +472,23 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: - move_reg(ctx, dst, src); - emit_zext_32(ctx, dst, is32); + switch (off) { + case 0: + move_reg(ctx, dst, src); + emit_zext_32(ctx, dst, is32); + break; + case 8: + move_reg(ctx, t1, src); + emit_insn(ctx, extwb, dst, t1); + break; + case 16: + move_reg(ctx, t1, src); + emit_insn(ctx, extwh, dst, t1); + break; + case 32: + emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO); + break; + } break; /* dst = imm */ From 4ebf9216e7dff0b38e350007da3b03afb15c816b Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Wed, 8 Nov 2023 14:12:16 +0800 Subject: [PATCH 212/415] LoongArch: BPF: Support unconditional bswap instructions Add support for unconditional bswap instruction. Since LoongArch is always little-endian, just treat unconditional bswap the same as big- endian conversion. Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index ac9edf02675c..a8be6d4b058c 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -731,6 +731,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext break; case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: switch (imm) { case 16: emit_insn(ctx, revb2h, dst, dst); From 9ddd2b8d1a8b566195c196fe4249d04cd75cc73c Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Wed, 8 Nov 2023 14:12:16 +0800 Subject: [PATCH 213/415] LoongArch: BPF: Support 32-bit offset jmp instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for 32-bit offset jmp instruction. Currently, we use b instruction which supports range within ±128MB for such jumps. This should be large enough for BPF progs. Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index a8be6d4b058c..050fcf233a34 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -848,7 +848,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* PC += off */ case BPF_JMP | BPF_JA: - jmp_offset = bpf2la_offset(i, off, ctx); + case BPF_JMP32 | BPF_JA: + if (BPF_CLASS(code) == BPF_JMP) + jmp_offset = bpf2la_offset(i, off, ctx); + else + jmp_offset = bpf2la_offset(i, imm, ctx); if (emit_uncond_jmp(ctx, jmp_offset) < 0) goto toofar; break; From 2425c9e002d2a1fdca34261b2fa6713eafef2163 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Wed, 8 Nov 2023 14:12:21 +0800 Subject: [PATCH 214/415] LoongArch: BPF: Support signed div instructions Add support for signed div instructions. Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 050fcf233a34..7c0d129b82a4 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -553,20 +553,36 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* dst = dst / src */ case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: - emit_zext_32(ctx, dst, is32); - move_reg(ctx, t1, src); - emit_zext_32(ctx, t1, is32); - emit_insn(ctx, divdu, dst, dst, t1); - emit_zext_32(ctx, dst, is32); + if (!off) { + emit_zext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_zext_32(ctx, t1, is32); + emit_insn(ctx, divdu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + } else { + emit_sext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_sext_32(ctx, t1, is32); + emit_insn(ctx, divd, dst, dst, t1); + emit_sext_32(ctx, dst, is32); + } break; /* dst = dst / imm */ case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: - move_imm(ctx, t1, imm, is32); - emit_zext_32(ctx, dst, is32); - emit_insn(ctx, divdu, dst, dst, t1); - emit_zext_32(ctx, dst, is32); + if (!off) { + move_imm(ctx, t1, imm, is32); + emit_zext_32(ctx, dst, is32); + emit_insn(ctx, divdu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + } else { + move_imm(ctx, t1, imm, false); + emit_sext_32(ctx, t1, is32); + emit_sext_32(ctx, dst, is32); + emit_insn(ctx, divd, dst, dst, t1); + emit_sext_32(ctx, dst, is32); + } break; /* dst = dst % src */ From 7b6b13d32965ad7f1eb889d1a7058868a88eb29f Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Wed, 8 Nov 2023 14:12:21 +0800 Subject: [PATCH 215/415] LoongArch: BPF: Support signed mod instructions Add support for signed mod instructions. Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 7c0d129b82a4..169ff8b3915e 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -588,20 +588,36 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* dst = dst % src */ case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: - emit_zext_32(ctx, dst, is32); - move_reg(ctx, t1, src); - emit_zext_32(ctx, t1, is32); - emit_insn(ctx, moddu, dst, dst, t1); - emit_zext_32(ctx, dst, is32); + if (!off) { + emit_zext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_zext_32(ctx, t1, is32); + emit_insn(ctx, moddu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + } else { + emit_sext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_sext_32(ctx, t1, is32); + emit_insn(ctx, modd, dst, dst, t1); + emit_sext_32(ctx, dst, is32); + } break; /* dst = dst % imm */ case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K: - move_imm(ctx, t1, imm, is32); - emit_zext_32(ctx, dst, is32); - emit_insn(ctx, moddu, dst, dst, t1); - emit_zext_32(ctx, dst, is32); + if (!off) { + move_imm(ctx, t1, imm, is32); + emit_zext_32(ctx, dst, is32); + emit_insn(ctx, moddu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + } else { + move_imm(ctx, t1, imm, false); + emit_sext_32(ctx, t1, is32); + emit_sext_32(ctx, dst, is32); + emit_insn(ctx, modd, dst, dst, t1); + emit_sext_32(ctx, dst, is32); + } break; /* dst = -dst */ From 1d375d65466e5c8d7a9406826d80d475a22e8c6d Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Wed, 8 Nov 2023 14:12:21 +0800 Subject: [PATCH 216/415] selftests/bpf: Enable cpu v4 tests for LoongArch Enable the cpu v4 tests for LoongArch. Currently, we don't have BPF trampoline in LoongArch JIT, so the fentry test `test_ptr_struct_arg` still failed, will followup. Test result attached below: # ./test_progs -t verifier_sdiv,verifier_movsx,verifier_ldsx,verifier_gotol,verifier_bswap #316/1 verifier_bswap/BSWAP, 16:OK #316/2 verifier_bswap/BSWAP, 16 @unpriv:OK #316/3 verifier_bswap/BSWAP, 32:OK #316/4 verifier_bswap/BSWAP, 32 @unpriv:OK #316/5 verifier_bswap/BSWAP, 64:OK #316/6 verifier_bswap/BSWAP, 64 @unpriv:OK #316 verifier_bswap:OK #330/1 verifier_gotol/gotol, small_imm:OK #330/2 verifier_gotol/gotol, small_imm @unpriv:OK #330 verifier_gotol:OK #338/1 verifier_ldsx/LDSX, S8:OK #338/2 verifier_ldsx/LDSX, S8 @unpriv:OK #338/3 verifier_ldsx/LDSX, S16:OK #338/4 verifier_ldsx/LDSX, S16 @unpriv:OK #338/5 verifier_ldsx/LDSX, S32:OK #338/6 verifier_ldsx/LDSX, S32 @unpriv:OK #338/7 verifier_ldsx/LDSX, S8 range checking, privileged:OK #338/8 verifier_ldsx/LDSX, S16 range checking:OK #338/9 verifier_ldsx/LDSX, S16 range checking @unpriv:OK #338/10 verifier_ldsx/LDSX, S32 range checking:OK #338/11 verifier_ldsx/LDSX, S32 range checking @unpriv:OK #338 verifier_ldsx:OK #349/1 verifier_movsx/MOV32SX, S8:OK #349/2 verifier_movsx/MOV32SX, S8 @unpriv:OK #349/3 verifier_movsx/MOV32SX, S16:OK #349/4 verifier_movsx/MOV32SX, S16 @unpriv:OK #349/5 verifier_movsx/MOV64SX, S8:OK #349/6 verifier_movsx/MOV64SX, S8 @unpriv:OK #349/7 verifier_movsx/MOV64SX, S16:OK #349/8 verifier_movsx/MOV64SX, S16 @unpriv:OK #349/9 verifier_movsx/MOV64SX, S32:OK #349/10 verifier_movsx/MOV64SX, S32 @unpriv:OK #349/11 verifier_movsx/MOV32SX, S8, range_check:OK #349/12 verifier_movsx/MOV32SX, S8, range_check @unpriv:OK #349/13 verifier_movsx/MOV32SX, S16, range_check:OK #349/14 verifier_movsx/MOV32SX, S16, range_check @unpriv:OK #349/15 verifier_movsx/MOV32SX, S16, range_check 2:OK #349/16 verifier_movsx/MOV32SX, S16, range_check 2 @unpriv:OK #349/17 verifier_movsx/MOV64SX, S8, range_check:OK #349/18 verifier_movsx/MOV64SX, S8, range_check @unpriv:OK #349/19 verifier_movsx/MOV64SX, S16, range_check:OK #349/20 verifier_movsx/MOV64SX, S16, range_check @unpriv:OK #349/21 verifier_movsx/MOV64SX, S32, range_check:OK #349/22 verifier_movsx/MOV64SX, S32, range_check @unpriv:OK #349/23 verifier_movsx/MOV64SX, S16, R10 Sign Extension:OK #349/24 verifier_movsx/MOV64SX, S16, R10 Sign Extension @unpriv:OK #349 verifier_movsx:OK #361/1 verifier_sdiv/SDIV32, non-zero imm divisor, check 1:OK #361/2 verifier_sdiv/SDIV32, non-zero imm divisor, check 1 @unpriv:OK #361/3 verifier_sdiv/SDIV32, non-zero imm divisor, check 2:OK #361/4 verifier_sdiv/SDIV32, non-zero imm divisor, check 2 @unpriv:OK #361/5 verifier_sdiv/SDIV32, non-zero imm divisor, check 3:OK #361/6 verifier_sdiv/SDIV32, non-zero imm divisor, check 3 @unpriv:OK #361/7 verifier_sdiv/SDIV32, non-zero imm divisor, check 4:OK #361/8 verifier_sdiv/SDIV32, non-zero imm divisor, check 4 @unpriv:OK #361/9 verifier_sdiv/SDIV32, non-zero imm divisor, check 5:OK #361/10 verifier_sdiv/SDIV32, non-zero imm divisor, check 5 @unpriv:OK #361/11 verifier_sdiv/SDIV32, non-zero imm divisor, check 6:OK #361/12 verifier_sdiv/SDIV32, non-zero imm divisor, check 6 @unpriv:OK #361/13 verifier_sdiv/SDIV32, non-zero imm divisor, check 7:OK #361/14 verifier_sdiv/SDIV32, non-zero imm divisor, check 7 @unpriv:OK #361/15 verifier_sdiv/SDIV32, non-zero imm divisor, check 8:OK #361/16 verifier_sdiv/SDIV32, non-zero imm divisor, check 8 @unpriv:OK #361/17 verifier_sdiv/SDIV32, non-zero reg divisor, check 1:OK #361/18 verifier_sdiv/SDIV32, non-zero reg divisor, check 1 @unpriv:OK #361/19 verifier_sdiv/SDIV32, non-zero reg divisor, check 2:OK #361/20 verifier_sdiv/SDIV32, non-zero reg divisor, check 2 @unpriv:OK #361/21 verifier_sdiv/SDIV32, non-zero reg divisor, check 3:OK #361/22 verifier_sdiv/SDIV32, non-zero reg divisor, check 3 @unpriv:OK #361/23 verifier_sdiv/SDIV32, non-zero reg divisor, check 4:OK #361/24 verifier_sdiv/SDIV32, non-zero reg divisor, check 4 @unpriv:OK #361/25 verifier_sdiv/SDIV32, non-zero reg divisor, check 5:OK #361/26 verifier_sdiv/SDIV32, non-zero reg divisor, check 5 @unpriv:OK #361/27 verifier_sdiv/SDIV32, non-zero reg divisor, check 6:OK #361/28 verifier_sdiv/SDIV32, non-zero reg divisor, check 6 @unpriv:OK #361/29 verifier_sdiv/SDIV32, non-zero reg divisor, check 7:OK #361/30 verifier_sdiv/SDIV32, non-zero reg divisor, check 7 @unpriv:OK #361/31 verifier_sdiv/SDIV32, non-zero reg divisor, check 8:OK #361/32 verifier_sdiv/SDIV32, non-zero reg divisor, check 8 @unpriv:OK #361/33 verifier_sdiv/SDIV64, non-zero imm divisor, check 1:OK #361/34 verifier_sdiv/SDIV64, non-zero imm divisor, check 1 @unpriv:OK #361/35 verifier_sdiv/SDIV64, non-zero imm divisor, check 2:OK #361/36 verifier_sdiv/SDIV64, non-zero imm divisor, check 2 @unpriv:OK #361/37 verifier_sdiv/SDIV64, non-zero imm divisor, check 3:OK #361/38 verifier_sdiv/SDIV64, non-zero imm divisor, check 3 @unpriv:OK #361/39 verifier_sdiv/SDIV64, non-zero imm divisor, check 4:OK #361/40 verifier_sdiv/SDIV64, non-zero imm divisor, check 4 @unpriv:OK #361/41 verifier_sdiv/SDIV64, non-zero imm divisor, check 5:OK #361/42 verifier_sdiv/SDIV64, non-zero imm divisor, check 5 @unpriv:OK #361/43 verifier_sdiv/SDIV64, non-zero imm divisor, check 6:OK #361/44 verifier_sdiv/SDIV64, non-zero imm divisor, check 6 @unpriv:OK #361/45 verifier_sdiv/SDIV64, non-zero reg divisor, check 1:OK #361/46 verifier_sdiv/SDIV64, non-zero reg divisor, check 1 @unpriv:OK #361/47 verifier_sdiv/SDIV64, non-zero reg divisor, check 2:OK #361/48 verifier_sdiv/SDIV64, non-zero reg divisor, check 2 @unpriv:OK #361/49 verifier_sdiv/SDIV64, non-zero reg divisor, check 3:OK #361/50 verifier_sdiv/SDIV64, non-zero reg divisor, check 3 @unpriv:OK #361/51 verifier_sdiv/SDIV64, non-zero reg divisor, check 4:OK #361/52 verifier_sdiv/SDIV64, non-zero reg divisor, check 4 @unpriv:OK #361/53 verifier_sdiv/SDIV64, non-zero reg divisor, check 5:OK #361/54 verifier_sdiv/SDIV64, non-zero reg divisor, check 5 @unpriv:OK #361/55 verifier_sdiv/SDIV64, non-zero reg divisor, check 6:OK #361/56 verifier_sdiv/SDIV64, non-zero reg divisor, check 6 @unpriv:OK #361/57 verifier_sdiv/SMOD32, non-zero imm divisor, check 1:OK #361/58 verifier_sdiv/SMOD32, non-zero imm divisor, check 1 @unpriv:OK #361/59 verifier_sdiv/SMOD32, non-zero imm divisor, check 2:OK #361/60 verifier_sdiv/SMOD32, non-zero imm divisor, check 2 @unpriv:OK #361/61 verifier_sdiv/SMOD32, non-zero imm divisor, check 3:OK #361/62 verifier_sdiv/SMOD32, non-zero imm divisor, check 3 @unpriv:OK #361/63 verifier_sdiv/SMOD32, non-zero imm divisor, check 4:OK #361/64 verifier_sdiv/SMOD32, non-zero imm divisor, check 4 @unpriv:OK #361/65 verifier_sdiv/SMOD32, non-zero imm divisor, check 5:OK #361/66 verifier_sdiv/SMOD32, non-zero imm divisor, check 5 @unpriv:OK #361/67 verifier_sdiv/SMOD32, non-zero imm divisor, check 6:OK #361/68 verifier_sdiv/SMOD32, non-zero imm divisor, check 6 @unpriv:OK #361/69 verifier_sdiv/SMOD32, non-zero reg divisor, check 1:OK #361/70 verifier_sdiv/SMOD32, non-zero reg divisor, check 1 @unpriv:OK #361/71 verifier_sdiv/SMOD32, non-zero reg divisor, check 2:OK #361/72 verifier_sdiv/SMOD32, non-zero reg divisor, check 2 @unpriv:OK #361/73 verifier_sdiv/SMOD32, non-zero reg divisor, check 3:OK #361/74 verifier_sdiv/SMOD32, non-zero reg divisor, check 3 @unpriv:OK #361/75 verifier_sdiv/SMOD32, non-zero reg divisor, check 4:OK #361/76 verifier_sdiv/SMOD32, non-zero reg divisor, check 4 @unpriv:OK #361/77 verifier_sdiv/SMOD32, non-zero reg divisor, check 5:OK #361/78 verifier_sdiv/SMOD32, non-zero reg divisor, check 5 @unpriv:OK #361/79 verifier_sdiv/SMOD32, non-zero reg divisor, check 6:OK #361/80 verifier_sdiv/SMOD32, non-zero reg divisor, check 6 @unpriv:OK #361/81 verifier_sdiv/SMOD64, non-zero imm divisor, check 1:OK #361/82 verifier_sdiv/SMOD64, non-zero imm divisor, check 1 @unpriv:OK #361/83 verifier_sdiv/SMOD64, non-zero imm divisor, check 2:OK #361/84 verifier_sdiv/SMOD64, non-zero imm divisor, check 2 @unpriv:OK #361/85 verifier_sdiv/SMOD64, non-zero imm divisor, check 3:OK #361/86 verifier_sdiv/SMOD64, non-zero imm divisor, check 3 @unpriv:OK #361/87 verifier_sdiv/SMOD64, non-zero imm divisor, check 4:OK #361/88 verifier_sdiv/SMOD64, non-zero imm divisor, check 4 @unpriv:OK #361/89 verifier_sdiv/SMOD64, non-zero imm divisor, check 5:OK #361/90 verifier_sdiv/SMOD64, non-zero imm divisor, check 5 @unpriv:OK #361/91 verifier_sdiv/SMOD64, non-zero imm divisor, check 6:OK #361/92 verifier_sdiv/SMOD64, non-zero imm divisor, check 6 @unpriv:OK #361/93 verifier_sdiv/SMOD64, non-zero imm divisor, check 7:OK #361/94 verifier_sdiv/SMOD64, non-zero imm divisor, check 7 @unpriv:OK #361/95 verifier_sdiv/SMOD64, non-zero imm divisor, check 8:OK #361/96 verifier_sdiv/SMOD64, non-zero imm divisor, check 8 @unpriv:OK #361/97 verifier_sdiv/SMOD64, non-zero reg divisor, check 1:OK #361/98 verifier_sdiv/SMOD64, non-zero reg divisor, check 1 @unpriv:OK #361/99 verifier_sdiv/SMOD64, non-zero reg divisor, check 2:OK #361/100 verifier_sdiv/SMOD64, non-zero reg divisor, check 2 @unpriv:OK #361/101 verifier_sdiv/SMOD64, non-zero reg divisor, check 3:OK #361/102 verifier_sdiv/SMOD64, non-zero reg divisor, check 3 @unpriv:OK #361/103 verifier_sdiv/SMOD64, non-zero reg divisor, check 4:OK #361/104 verifier_sdiv/SMOD64, non-zero reg divisor, check 4 @unpriv:OK #361/105 verifier_sdiv/SMOD64, non-zero reg divisor, check 5:OK #361/106 verifier_sdiv/SMOD64, non-zero reg divisor, check 5 @unpriv:OK #361/107 verifier_sdiv/SMOD64, non-zero reg divisor, check 6:OK #361/108 verifier_sdiv/SMOD64, non-zero reg divisor, check 6 @unpriv:OK #361/109 verifier_sdiv/SMOD64, non-zero reg divisor, check 7:OK #361/110 verifier_sdiv/SMOD64, non-zero reg divisor, check 7 @unpriv:OK #361/111 verifier_sdiv/SMOD64, non-zero reg divisor, check 8:OK #361/112 verifier_sdiv/SMOD64, non-zero reg divisor, check 8 @unpriv:OK #361/113 verifier_sdiv/SDIV32, zero divisor:OK #361/114 verifier_sdiv/SDIV32, zero divisor @unpriv:OK #361/115 verifier_sdiv/SDIV64, zero divisor:OK #361/116 verifier_sdiv/SDIV64, zero divisor @unpriv:OK #361/117 verifier_sdiv/SMOD32, zero divisor:OK #361/118 verifier_sdiv/SMOD32, zero divisor @unpriv:OK #361/119 verifier_sdiv/SMOD64, zero divisor:OK #361/120 verifier_sdiv/SMOD64, zero divisor @unpriv:OK #361 verifier_sdiv:OK Summary: 5/163 PASSED, 0 SKIPPED, 0 FAILED # ./test_progs -t ldsx_insn test_map_val_and_probed_memory:PASS:test_ldsx_insn__open 0 nsec test_map_val_and_probed_memory:PASS:test_ldsx_insn__load 0 nsec libbpf: prog 'test_ptr_struct_arg': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'test_ptr_struct_arg': failed to auto-attach: -524 test_map_val_and_probed_memory:FAIL:test_ldsx_insn__attach unexpected error: -524 (errno 524) #116/1 ldsx_insn/map_val and probed_memory:FAIL #116/2 ldsx_insn/ctx_member_sign_ext:OK #116/3 ldsx_insn/ctx_member_narrow_sign_ext:OK #116 ldsx_insn:FAIL All error logs: test_map_val_and_probed_memory:PASS:test_ldsx_insn__open 0 nsec test_map_val_and_probed_memory:PASS:test_ldsx_insn__load 0 nsec libbpf: prog 'test_ptr_struct_arg': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'test_ptr_struct_arg': failed to auto-attach: -524 test_map_val_and_probed_memory:FAIL:test_ldsx_insn__attach unexpected error: -524 (errno 524) #116/1 ldsx_insn/map_val and probed_memory:FAIL #116 ldsx_insn:FAIL Summary: 0/2 PASSED, 0 SKIPPED, 1 FAILED Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- tools/testing/selftests/bpf/progs/test_ldsx_insn.c | 3 ++- tools/testing/selftests/bpf/progs/verifier_bswap.c | 3 ++- tools/testing/selftests/bpf/progs/verifier_gotol.c | 3 ++- tools/testing/selftests/bpf/progs/verifier_ldsx.c | 3 ++- tools/testing/selftests/bpf/progs/verifier_movsx.c | 3 ++- tools/testing/selftests/bpf/progs/verifier_sdiv.c | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c index 3ddcb3777912..2a2a942737d7 100644 --- a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c +++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c @@ -7,7 +7,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_s390)) && __clang_major__ >= 18 + defined(__TARGET_ARCH_s390) || defined(__TARGET_ARCH_loongarch)) && \ + __clang_major__ >= 18 const volatile int skip = 0; #else const volatile int skip = 1; diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c index 107525fb4a6a..e61755656e8d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bswap.c +++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c index 9f202eda952f..d1edbcff9a18 100644 --- a/tools/testing/selftests/bpf/progs/verifier_gotol.c +++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c index 375525329637..d4427d8e1217 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index b2a04d1179d0..cbb9d6714f53 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c index 8fc5174808b2..2a2271cf0294 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c +++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") From f0d9da19d7de9e845e7a93a901c4b9658df6b492 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 8 Nov 2023 15:45:00 +0800 Subject: [PATCH 217/415] ALSA: hda/realtek: Add support dual speaker for Dell Dell new platform support dual speaker. But BIOS verb table only show one speaker. It will fill verb table for second speaker. Then bind with CS AMP model. Fixes: de90f5165b1c ("ALSA: hda/realtek: Add support for DELL Oasis 13/14/16 laptops") Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/4dd390a77bf742b8a518ac2deee00b0f@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 40 +++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 415d3832952b..0df44b1876f3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7343,8 +7343,10 @@ enum { ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, ALC299_FIXUP_PREDATOR_SPK, ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, + ALC289_FIXUP_DELL_SPK1, ALC289_FIXUP_DELL_SPK2, ALC289_FIXUP_DUAL_SPK, + ALC289_FIXUP_RTK_AMP_DUAL_SPK, ALC294_FIXUP_SPK2_TO_DAC1, ALC294_FIXUP_ASUS_DUAL_SPK, ALC285_FIXUP_THINKPAD_X1_GEN7, @@ -7444,6 +7446,7 @@ enum { ALC287_FIXUP_THINKPAD_I2S_SPK, ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD, ALC2XX_FIXUP_HEADSET_MIC, + ALC289_FIXUP_DELL_CS35L41_SPI_2, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -8670,6 +8673,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE }, + [ALC289_FIXUP_DELL_SPK1] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x14, 0x90170140 }, + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE + }, [ALC289_FIXUP_DELL_SPK2] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -8685,6 +8697,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC289_FIXUP_DELL_SPK2 }, + [ALC289_FIXUP_RTK_AMP_DUAL_SPK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_speaker2_to_dac1, + .chained = true, + .chain_id = ALC289_FIXUP_DELL_SPK1 + }, [ALC294_FIXUP_SPK2_TO_DAC1] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_speaker2_to_dac1, @@ -9552,6 +9570,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mic, }, + [ALC289_FIXUP_DELL_CS35L41_SPI_2] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l41_fixup_spi_two, + .chained = true, + .chain_id = ALC289_FIXUP_DUAL_SPK + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9662,13 +9686,15 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), - SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1028, 0x0cbf, "Dell Oasis 13 Low Weight MTU-L", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1028, 0x0cc1, "Dell Oasis 14 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1028, 0x0cc2, "Dell Oasis 14 2-in-1 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1028, 0x0cc3, "Dell Oasis 14 Low Weight MTL-U", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1028, 0x0cc4, "Dell Oasis 16 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cbf, "Dell Oasis 13 Low Weight MTU-L", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cc0, "Dell Oasis 13", ALC289_FIXUP_RTK_AMP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x0cc1, "Dell Oasis 14 MTL-H/U", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cc2, "Dell Oasis 14 2-in-1 MTL-H/U", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cc3, "Dell Oasis 14 Low Weight MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cc4, "Dell Oasis 16 MTL-H/U", ALC289_FIXUP_DELL_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1028, 0x0cc5, "Dell Oasis 14", ALC289_FIXUP_RTK_AMP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From a60a609b7f548050d1e84c7aa1c0a57d5d7e22d5 Mon Sep 17 00:00:00 2001 From: David Lin Date: Wed, 8 Nov 2023 14:16:59 +0800 Subject: [PATCH 218/415] ASoC: nau8540: Add self recovery to improve capture quility Reading the peak data to detect abnormal data in the ADC channel. If abnormal data occurs, the driver takes recovery actions to refresh the ADC channel. Signed-off-by: David Lin Link: https://lore.kernel.org/r/20231108061658.1265065-1-CTLIN0@nuvoton.com Signed-off-by: Mark Brown --- sound/soc/codecs/nau8540.c | 49 ++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/nau8540.h | 15 ++++++++++++ 2 files changed, 64 insertions(+) diff --git a/sound/soc/codecs/nau8540.c b/sound/soc/codecs/nau8540.c index 5cf28d034f09..f66417a0f29f 100644 --- a/sound/soc/codecs/nau8540.c +++ b/sound/soc/codecs/nau8540.c @@ -530,12 +530,61 @@ static int nau8540_set_tdm_slot(struct snd_soc_dai *dai, return 0; } +static int nau8540_dai_trigger(struct snd_pcm_substream *substream, + int cmd, struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct nau8540 *nau8540 = snd_soc_component_get_drvdata(component); + struct regmap *regmap = nau8540->regmap; + unsigned int val; + int ret = 0; + + /* Reading the peak data to detect abnormal data in the ADC channel. + * If abnormal data happens, the driver takes recovery actions to + * refresh the ADC channel. + */ + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + regmap_update_bits(regmap, NAU8540_REG_CLOCK_CTRL, + NAU8540_CLK_AGC_EN, NAU8540_CLK_AGC_EN); + regmap_update_bits(regmap, NAU8540_REG_ALC_CONTROL_3, + NAU8540_ALC_CH_ALL_EN, NAU8540_ALC_CH_ALL_EN); + + regmap_read(regmap, NAU8540_REG_PEAK_CH1, &val); + dev_dbg(nau8540->dev, "1.ADC CH1 peak data %x", val); + if (!val) { + regmap_update_bits(regmap, NAU8540_REG_MUTE, + NAU8540_PGA_CH_ALL_MUTE, NAU8540_PGA_CH_ALL_MUTE); + regmap_update_bits(regmap, NAU8540_REG_MUTE, + NAU8540_PGA_CH_ALL_MUTE, 0); + regmap_write(regmap, NAU8540_REG_RST, 0x1); + regmap_write(regmap, NAU8540_REG_RST, 0); + regmap_read(regmap, NAU8540_REG_PEAK_CH1, &val); + dev_dbg(nau8540->dev, "2.ADC CH1 peak data %x", val); + if (!val) { + dev_err(nau8540->dev, "Channel recovery failed!!"); + ret = -EIO; + } + } + regmap_update_bits(regmap, NAU8540_REG_CLOCK_CTRL, + NAU8540_CLK_AGC_EN, 0); + regmap_update_bits(regmap, NAU8540_REG_ALC_CONTROL_3, + NAU8540_ALC_CH_ALL_EN, 0); + break; + + default: + break; + } + + return ret; +} static const struct snd_soc_dai_ops nau8540_dai_ops = { .startup = nau8540_dai_startup, .hw_params = nau8540_hw_params, .set_fmt = nau8540_set_fmt, .set_tdm_slot = nau8540_set_tdm_slot, + .trigger = nau8540_dai_trigger, }; #define NAU8540_RATES SNDRV_PCM_RATE_8000_48000 diff --git a/sound/soc/codecs/nau8540.h b/sound/soc/codecs/nau8540.h index 305ea9207cf0..2ce6063d462b 100644 --- a/sound/soc/codecs/nau8540.h +++ b/sound/soc/codecs/nau8540.h @@ -85,6 +85,7 @@ /* CLOCK_CTRL (0x02) */ #define NAU8540_CLK_ADC_EN (0x1 << 15) +#define NAU8540_CLK_AGC_EN (0x1 << 3) #define NAU8540_CLK_I2S_EN (0x1 << 1) /* CLOCK_SRC (0x03) */ @@ -168,6 +169,13 @@ #define NAU8540_TDM_OFFSET_EN (0x1 << 14) #define NAU8540_TDM_TX_MASK 0xf +/* ALC_CONTROL_3 (0x22) */ +#define NAU8540_ALC_CH1_EN (0x1 << 12) +#define NAU8540_ALC_CH2_EN (0x1 << 13) +#define NAU8540_ALC_CH3_EN (0x1 << 14) +#define NAU8540_ALC_CH4_EN (0x1 << 15) +#define NAU8540_ALC_CH_ALL_EN (0xf << 12) + /* ADC_SAMPLE_RATE (0x3A) */ #define NAU8540_CH_SYNC (0x1 << 14) #define NAU8540_ADC_OSR_MASK 0x3 @@ -181,6 +189,13 @@ #define NAU8540_VMID_SEL_SFT 4 #define NAU8540_VMID_SEL_MASK (0x3 << NAU8540_VMID_SEL_SFT) +/* MUTE (0x61) */ +#define NAU8540_PGA_CH1_MUTE 0x1 +#define NAU8540_PGA_CH2_MUTE 0x2 +#define NAU8540_PGA_CH3_MUTE 0x4 +#define NAU8540_PGA_CH4_MUTE 0x8 +#define NAU8540_PGA_CH_ALL_MUTE 0xf + /* MIC_BIAS (0x67) */ #define NAU8540_PU_PRE (0x1 << 8) From 629db01c64ff6cea08fc61b52426362689ef8618 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 8 Nov 2023 08:59:29 +0100 Subject: [PATCH 219/415] riscv: Don't use PGD entries for the linear mapping Propagating changes at this level is cumbersome as we need to go through all the page tables when that happens (either when changing the permissions or when splitting the mapping). Note that this prevents the use of 4MB mapping for sv32 and 1GB mapping for sv39 in the linear mapping. Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20231108075930.7157-2-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 0798bd861dcb..6dc61d3c392f 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -664,16 +664,16 @@ void __init create_pgd_mapping(pgd_t *pgdp, static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va, phys_addr_t size) { - if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE) - return PGDIR_SIZE; - - if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE) + if (pgtable_l5_enabled && + !(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE) return P4D_SIZE; - if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE) + if (pgtable_l4_enabled && + !(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE) return PUD_SIZE; - if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE) + if (IS_ENABLED(CONFIG_64BIT) && + !(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE) return PMD_SIZE; return PAGE_SIZE; From 311cd2f6e25380cff0abc2884dc6a3d33bc9b5c3 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 8 Nov 2023 08:59:30 +0100 Subject: [PATCH 220/415] riscv: Fix set_memory_XX() and set_direct_map_XX() by splitting huge linear mappings When STRICT_KERNEL_RWX is set, any change of permissions on any kernel mapping (vmalloc/modules/kernel text...etc) should be applied on its linear mapping alias. The problem is that the riscv kernel uses huge mappings for the linear mapping and walk_page_range_novma() does not split those huge mappings. So this patchset implements such split in order to apply fine-grained permissions on the linear mapping. Below is the difference before and after (the first PUD mapping is split into PTE/PMD mappings): Before: ---[ Linear mapping ]--- 0xffffaf8000080000-0xffffaf8000200000 0x0000000080080000 1536K PTE D A G . . W R V 0xffffaf8000200000-0xffffaf8077c00000 0x0000000080200000 1914M PMD D A G . . W R V 0xffffaf8077c00000-0xffffaf8078800000 0x00000000f7c00000 12M PMD D A G . . . R V 0xffffaf8078800000-0xffffaf8078c00000 0x00000000f8800000 4M PMD D A G . . W R V 0xffffaf8078c00000-0xffffaf8079200000 0x00000000f8c00000 6M PMD D A G . . . R V 0xffffaf8079200000-0xffffaf807e600000 0x00000000f9200000 84M PMD D A G . . W R V 0xffffaf807e600000-0xffffaf807e716000 0x00000000fe600000 1112K PTE D A G . . W R V 0xffffaf807e717000-0xffffaf807e71a000 0x00000000fe717000 12K PTE D A G . . W R V 0xffffaf807e71d000-0xffffaf807e71e000 0x00000000fe71d000 4K PTE D A G . . W R V 0xffffaf807e722000-0xffffaf807e800000 0x00000000fe722000 888K PTE D A G . . W R V 0xffffaf807e800000-0xffffaf807fe00000 0x00000000fe800000 22M PMD D A G . . W R V 0xffffaf807fe00000-0xffffaf807ff54000 0x00000000ffe00000 1360K PTE D A G . . W R V 0xffffaf807ff55000-0xffffaf8080000000 0x00000000fff55000 684K PTE D A G . . W R V 0xffffaf8080000000-0xffffaf8400000000 0x0000000100000000 14G PUD D A G . . W R V After: ---[ Linear mapping ]--- 0xffffaf8000080000-0xffffaf8000200000 0x0000000080080000 1536K PTE D A G . . W R V 0xffffaf8000200000-0xffffaf8077c00000 0x0000000080200000 1914M PMD D A G . . W R V 0xffffaf8077c00000-0xffffaf8078800000 0x00000000f7c00000 12M PMD D A G . . . R V 0xffffaf8078800000-0xffffaf8078a00000 0x00000000f8800000 2M PMD D A G . . W R V 0xffffaf8078a00000-0xffffaf8078c00000 0x00000000f8a00000 2M PTE D A G . . W R V 0xffffaf8078c00000-0xffffaf8079200000 0x00000000f8c00000 6M PMD D A G . . . R V 0xffffaf8079200000-0xffffaf807e600000 0x00000000f9200000 84M PMD D A G . . W R V 0xffffaf807e600000-0xffffaf807e716000 0x00000000fe600000 1112K PTE D A G . . W R V 0xffffaf807e717000-0xffffaf807e71a000 0x00000000fe717000 12K PTE D A G . . W R V 0xffffaf807e71d000-0xffffaf807e71e000 0x00000000fe71d000 4K PTE D A G . . W R V 0xffffaf807e722000-0xffffaf807e800000 0x00000000fe722000 888K PTE D A G . . W R V 0xffffaf807e800000-0xffffaf807fe00000 0x00000000fe800000 22M PMD D A G . . W R V 0xffffaf807fe00000-0xffffaf807ff54000 0x00000000ffe00000 1360K PTE D A G . . W R V 0xffffaf807ff55000-0xffffaf8080000000 0x00000000fff55000 684K PTE D A G . . W R V 0xffffaf8080000000-0xffffaf8080800000 0x0000000100000000 8M PMD D A G . . W R V 0xffffaf8080800000-0xffffaf8080af6000 0x0000000100800000 3032K PTE D A G . . W R V 0xffffaf8080af6000-0xffffaf8080af8000 0x0000000100af6000 8K PTE D A G . X . R V 0xffffaf8080af8000-0xffffaf8080c00000 0x0000000100af8000 1056K PTE D A G . . W R V 0xffffaf8080c00000-0xffffaf8081a00000 0x0000000100c00000 14M PMD D A G . . W R V 0xffffaf8081a00000-0xffffaf8081a40000 0x0000000101a00000 256K PTE D A G . . W R V 0xffffaf8081a40000-0xffffaf8081a44000 0x0000000101a40000 16K PTE D A G . X . R V 0xffffaf8081a44000-0xffffaf8081a52000 0x0000000101a44000 56K PTE D A G . . W R V 0xffffaf8081a52000-0xffffaf8081a54000 0x0000000101a52000 8K PTE D A G . X . R V ... 0xffffaf809e800000-0xffffaf80c0000000 0x000000011e800000 536M PMD D A G . . W R V 0xffffaf80c0000000-0xffffaf8400000000 0x0000000140000000 13G PUD D A G . . W R V Note that this also fixes memfd_secret() syscall which uses set_direct_map_invalid_noflush() and set_direct_map_default_noflush() to remove the pages from the linear mapping. Below is the kernel page table while a memfd_secret() syscall is running, you can see all the !valid page table entries in the linear mapping: ... 0xffffaf8082240000-0xffffaf8082241000 0x0000000102240000 4K PTE D A G . . W R . 0xffffaf8082241000-0xffffaf8082250000 0x0000000102241000 60K PTE D A G . . W R V 0xffffaf8082250000-0xffffaf8082252000 0x0000000102250000 8K PTE D A G . . W R . 0xffffaf8082252000-0xffffaf8082256000 0x0000000102252000 16K PTE D A G . . W R V 0xffffaf8082256000-0xffffaf8082257000 0x0000000102256000 4K PTE D A G . . W R . 0xffffaf8082257000-0xffffaf8082258000 0x0000000102257000 4K PTE D A G . . W R V 0xffffaf8082258000-0xffffaf8082259000 0x0000000102258000 4K PTE D A G . . W R . 0xffffaf8082259000-0xffffaf808225a000 0x0000000102259000 4K PTE D A G . . W R V 0xffffaf808225a000-0xffffaf808225c000 0x000000010225a000 8K PTE D A G . . W R . 0xffffaf808225c000-0xffffaf8082266000 0x000000010225c000 40K PTE D A G . . W R V 0xffffaf8082266000-0xffffaf8082268000 0x0000000102266000 8K PTE D A G . . W R . 0xffffaf8082268000-0xffffaf8082284000 0x0000000102268000 112K PTE D A G . . W R V 0xffffaf8082284000-0xffffaf8082288000 0x0000000102284000 16K PTE D A G . . W R . 0xffffaf8082288000-0xffffaf808229c000 0x0000000102288000 80K PTE D A G . . W R V 0xffffaf808229c000-0xffffaf80822a0000 0x000000010229c000 16K PTE D A G . . W R . 0xffffaf80822a0000-0xffffaf80822a5000 0x00000001022a0000 20K PTE D A G . . W R V 0xffffaf80822a5000-0xffffaf80822a6000 0x00000001022a5000 4K PTE D A G . . . R V 0xffffaf80822a6000-0xffffaf80822ab000 0x00000001022a6000 20K PTE D A G . . W R V ... And when the memfd_secret() fd is released, the linear mapping is correctly reset: ... 0xffffaf8082240000-0xffffaf80822a5000 0x0000000102240000 404K PTE D A G . . W R V 0xffffaf80822a5000-0xffffaf80822a6000 0x00000001022a5000 4K PTE D A G . . . R V 0xffffaf80822a6000-0xffffaf80822af000 0x00000001022a6000 36K PTE D A G . . W R V ... Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20231108075930.7157-3-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/pageattr.c | 270 +++++++++++++++++++++++++++++++++------ 1 file changed, 230 insertions(+), 40 deletions(-) diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 161d0b34c2cb..fc5fc4f785c4 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -25,19 +26,6 @@ static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk) return new_val; } -static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - pgd_t val = READ_ONCE(*pgd); - - if (pgd_leaf(val)) { - val = __pgd(set_pageattr_masks(pgd_val(val), walk)); - set_pgd(pgd, val); - } - - return 0; -} - static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr, unsigned long next, struct mm_walk *walk) { @@ -96,7 +84,6 @@ static int pageattr_pte_hole(unsigned long addr, unsigned long next, } static const struct mm_walk_ops pageattr_ops = { - .pgd_entry = pageattr_pgd_entry, .p4d_entry = pageattr_p4d_entry, .pud_entry = pageattr_pud_entry, .pmd_entry = pageattr_pmd_entry, @@ -105,12 +92,181 @@ static const struct mm_walk_ops pageattr_ops = { .walk_lock = PGWALK_RDLOCK, }; +#ifdef CONFIG_64BIT +static int __split_linear_mapping_pmd(pud_t *pudp, + unsigned long vaddr, unsigned long end) +{ + pmd_t *pmdp; + unsigned long next; + + pmdp = pmd_offset(pudp, vaddr); + + do { + next = pmd_addr_end(vaddr, end); + + if (next - vaddr >= PMD_SIZE && + vaddr <= (vaddr & PMD_MASK) && end >= next) + continue; + + if (pmd_leaf(*pmdp)) { + struct page *pte_page; + unsigned long pfn = _pmd_pfn(*pmdp); + pgprot_t prot = __pgprot(pmd_val(*pmdp) & ~_PAGE_PFN_MASK); + pte_t *ptep_new; + int i; + + pte_page = alloc_page(GFP_KERNEL); + if (!pte_page) + return -ENOMEM; + + ptep_new = (pte_t *)page_address(pte_page); + for (i = 0; i < PTRS_PER_PTE; ++i, ++ptep_new) + set_pte(ptep_new, pfn_pte(pfn + i, prot)); + + smp_wmb(); + + set_pmd(pmdp, pfn_pmd(page_to_pfn(pte_page), PAGE_TABLE)); + } + } while (pmdp++, vaddr = next, vaddr != end); + + return 0; +} + +static int __split_linear_mapping_pud(p4d_t *p4dp, + unsigned long vaddr, unsigned long end) +{ + pud_t *pudp; + unsigned long next; + int ret; + + pudp = pud_offset(p4dp, vaddr); + + do { + next = pud_addr_end(vaddr, end); + + if (next - vaddr >= PUD_SIZE && + vaddr <= (vaddr & PUD_MASK) && end >= next) + continue; + + if (pud_leaf(*pudp)) { + struct page *pmd_page; + unsigned long pfn = _pud_pfn(*pudp); + pgprot_t prot = __pgprot(pud_val(*pudp) & ~_PAGE_PFN_MASK); + pmd_t *pmdp_new; + int i; + + pmd_page = alloc_page(GFP_KERNEL); + if (!pmd_page) + return -ENOMEM; + + pmdp_new = (pmd_t *)page_address(pmd_page); + for (i = 0; i < PTRS_PER_PMD; ++i, ++pmdp_new) + set_pmd(pmdp_new, + pfn_pmd(pfn + ((i * PMD_SIZE) >> PAGE_SHIFT), prot)); + + smp_wmb(); + + set_pud(pudp, pfn_pud(page_to_pfn(pmd_page), PAGE_TABLE)); + } + + ret = __split_linear_mapping_pmd(pudp, vaddr, next); + if (ret) + return ret; + } while (pudp++, vaddr = next, vaddr != end); + + return 0; +} + +static int __split_linear_mapping_p4d(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + p4d_t *p4dp; + unsigned long next; + int ret; + + p4dp = p4d_offset(pgdp, vaddr); + + do { + next = p4d_addr_end(vaddr, end); + + /* + * If [vaddr; end] contains [vaddr & P4D_MASK; next], we don't + * need to split, we'll change the protections on the whole P4D. + */ + if (next - vaddr >= P4D_SIZE && + vaddr <= (vaddr & P4D_MASK) && end >= next) + continue; + + if (p4d_leaf(*p4dp)) { + struct page *pud_page; + unsigned long pfn = _p4d_pfn(*p4dp); + pgprot_t prot = __pgprot(p4d_val(*p4dp) & ~_PAGE_PFN_MASK); + pud_t *pudp_new; + int i; + + pud_page = alloc_page(GFP_KERNEL); + if (!pud_page) + return -ENOMEM; + + /* + * Fill the pud level with leaf puds that have the same + * protections as the leaf p4d. + */ + pudp_new = (pud_t *)page_address(pud_page); + for (i = 0; i < PTRS_PER_PUD; ++i, ++pudp_new) + set_pud(pudp_new, + pfn_pud(pfn + ((i * PUD_SIZE) >> PAGE_SHIFT), prot)); + + /* + * Make sure the pud filling is not reordered with the + * p4d store which could result in seeing a partially + * filled pud level. + */ + smp_wmb(); + + set_p4d(p4dp, pfn_p4d(page_to_pfn(pud_page), PAGE_TABLE)); + } + + ret = __split_linear_mapping_pud(p4dp, vaddr, next); + if (ret) + return ret; + } while (p4dp++, vaddr = next, vaddr != end); + + return 0; +} + +static int __split_linear_mapping_pgd(pgd_t *pgdp, + unsigned long vaddr, + unsigned long end) +{ + unsigned long next; + int ret; + + do { + next = pgd_addr_end(vaddr, end); + /* We never use PGD mappings for the linear mapping */ + ret = __split_linear_mapping_p4d(pgdp, vaddr, next); + if (ret) + return ret; + } while (pgdp++, vaddr = next, vaddr != end); + + return 0; +} + +static int split_linear_mapping(unsigned long start, unsigned long end) +{ + return __split_linear_mapping_pgd(pgd_offset_k(start), start, end); +} +#endif /* CONFIG_64BIT */ + static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { int ret; unsigned long start = addr; unsigned long end = start + PAGE_SIZE * numpages; + unsigned long __maybe_unused lm_start; + unsigned long __maybe_unused lm_end; struct pageattr_masks masks = { .set_mask = set_mask, .clear_mask = clear_mask @@ -120,11 +276,67 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, return 0; mmap_write_lock(&init_mm); + +#ifdef CONFIG_64BIT + /* + * We are about to change the permissions of a kernel mapping, we must + * apply the same changes to its linear mapping alias, which may imply + * splitting a huge mapping. + */ + + if (is_vmalloc_or_module_addr((void *)start)) { + struct vm_struct *area = NULL; + int i, page_start; + + area = find_vm_area((void *)start); + page_start = (start - (unsigned long)area->addr) >> PAGE_SHIFT; + + for (i = page_start; i < page_start + numpages; ++i) { + lm_start = (unsigned long)page_address(area->pages[i]); + lm_end = lm_start + PAGE_SIZE; + + ret = split_linear_mapping(lm_start, lm_end); + if (ret) + goto unlock; + + ret = walk_page_range_novma(&init_mm, lm_start, lm_end, + &pageattr_ops, NULL, &masks); + if (ret) + goto unlock; + } + } else if (is_kernel_mapping(start) || is_linear_mapping(start)) { + lm_start = (unsigned long)lm_alias(start); + lm_end = (unsigned long)lm_alias(end); + + ret = split_linear_mapping(lm_start, lm_end); + if (ret) + goto unlock; + + ret = walk_page_range_novma(&init_mm, lm_start, lm_end, + &pageattr_ops, NULL, &masks); + if (ret) + goto unlock; + } + ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks); + +unlock: + mmap_write_unlock(&init_mm); + + /* + * We can't use flush_tlb_kernel_range() here as we may have split a + * hugepage that is larger than that, so let's flush everything. + */ + flush_tlb_all(); +#else + ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, + &masks); + mmap_write_unlock(&init_mm); flush_tlb_kernel_range(start, end); +#endif return ret; } @@ -159,36 +371,14 @@ int set_memory_nx(unsigned long addr, int numpages) int set_direct_map_invalid_noflush(struct page *page) { - int ret; - unsigned long start = (unsigned long)page_address(page); - unsigned long end = start + PAGE_SIZE; - struct pageattr_masks masks = { - .set_mask = __pgprot(0), - .clear_mask = __pgprot(_PAGE_PRESENT) - }; - - mmap_read_lock(&init_mm); - ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); - mmap_read_unlock(&init_mm); - - return ret; + return __set_memory((unsigned long)page_address(page), 1, + __pgprot(0), __pgprot(_PAGE_PRESENT)); } int set_direct_map_default_noflush(struct page *page) { - int ret; - unsigned long start = (unsigned long)page_address(page); - unsigned long end = start + PAGE_SIZE; - struct pageattr_masks masks = { - .set_mask = PAGE_KERNEL, - .clear_mask = __pgprot(0) - }; - - mmap_read_lock(&init_mm); - ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); - mmap_read_unlock(&init_mm); - - return ret; + return __set_memory((unsigned long)page_address(page), 1, + PAGE_KERNEL, __pgprot(0)); } #ifdef CONFIG_DEBUG_PAGEALLOC From 53c87e846e335e3c18044c397cc35178163d7827 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Wed, 8 Nov 2023 12:12:49 +0100 Subject: [PATCH 221/415] swiotlb: fix out-of-bounds TLB allocations with CONFIG_SWIOTLB_DYNAMIC Limit the free list length to the size of the IO TLB. Transient pool can be smaller than IO_TLB_SEGSIZE, but the free list is initialized with the assumption that the total number of slots is a multiple of IO_TLB_SEGSIZE. As a result, swiotlb_area_find_slots() may allocate slots past the end of a transient IO TLB buffer. Reported-by: Niklas Schnelle Closes: https://lore.kernel.org/linux-iommu/104a8c8fedffd1ff8a2890983e2ec1c26bff6810.camel@linux.ibm.com/ Fixes: 79636caad361 ("swiotlb: if swiotlb is full, fall back to a transient memory pool") Cc: stable@vger.kernel.org Signed-off-by: Petr Tesarik Reviewed-by: Halil Pasic Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 71392c9fac10..33d942615be5 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -283,7 +283,8 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, } for (i = 0; i < mem->nslabs; i++) { - mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i); + mem->slots[i].list = min(IO_TLB_SEGSIZE - io_tlb_offset(i), + mem->nslabs - i); mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; } From 1d816ba168ea1999afe8cd2ccbe66b0e762bf455 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 7 Nov 2023 07:26:56 -0800 Subject: [PATCH 222/415] arm64: Move MediaTek GIC quirk handling from irqchip to core In commit 44bd78dd2b88 ("irqchip/gic-v3: Disable pseudo NMIs on MediaTek devices w/ firmware issues") we added a method for detecting MediaTek devices with broken firmware and disabled pseudo-NMI. While that worked, it didn't address the problem at a deep enough level. The fundamental issue with this broken firmware is that it's not saving and restoring several important GICR registers. The current list is believed to be: * GICR_NUM_IPRIORITYR * GICR_CTLR * GICR_ISPENDR0 * GICR_ISACTIVER0 * GICR_NSACR Pseudo-NMI didn't work because it was the only thing (currently) in the kernel that relied on the broken registers, so forcing pseudo-NMI off was an effective fix. However, it could be observed that calling system_uses_irq_prio_masking() on these systems still returned "true". That caused confusion and led to the need for commit a07a59415217 ("arm64: smp: avoid NMI IPIs with broken MediaTek FW"). It's worried that the incorrect value returned by system_uses_irq_prio_masking() on these systems will continue to confuse future developers. Let's fix the issue a little more completely by disabling IRQ priorities at a deeper level in the kernel. Once we do this we can revert some of the other bits of code dealing with this quirk. This includes a partial revert of commit 44bd78dd2b88 ("irqchip/gic-v3: Disable pseudo NMIs on MediaTek devices w/ firmware issues"). This isn't a full revert because it leaves some of the changes to the "quirks" structure around in case future code needs it. Suggested-by: Mark Rutland Signed-off-by: Douglas Anderson Reviewed-by: AngeloGioacchino Del Regno Tested-by: AngeloGioacchino Del Regno Acked-by: Mark Rutland Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20231107072651.v2.1.Ide945748593cffd8ff0feb9ae22b795935b944d6@changeid Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 46 ++++++++++++++++++++++++++++------ drivers/irqchip/irq-gic-v3.c | 22 +--------------- 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f6b2e2906fc9..646591c67e7a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -999,6 +999,37 @@ static void init_32bit_cpu_features(struct cpuinfo_32bit *info) init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); } +#ifdef CONFIG_ARM64_PSEUDO_NMI +static bool enable_pseudo_nmi; + +static int __init early_enable_pseudo_nmi(char *p) +{ + return kstrtobool(p, &enable_pseudo_nmi); +} +early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi); + +static __init void detect_system_supports_pseudo_nmi(void) +{ + struct device_node *np; + + if (!enable_pseudo_nmi) + return; + + /* + * Detect broken MediaTek firmware that doesn't properly save and + * restore GIC priorities. + */ + np = of_find_compatible_node(NULL, NULL, "arm,gic-v3"); + if (np && of_property_read_bool(np, "mediatek,broken-save-restore-fw")) { + pr_info("Pseudo-NMI disabled due to MediaTek Chromebook GICR save problem\n"); + enable_pseudo_nmi = false; + } + of_node_put(np); +} +#else /* CONFIG_ARM64_PSEUDO_NMI */ +static inline void detect_system_supports_pseudo_nmi(void) { } +#endif + void __init init_cpu_features(struct cpuinfo_arm64 *info) { /* Before we start using the tables, make sure it is sorted */ @@ -1057,6 +1088,13 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) */ init_cpucap_indirect_list(); + /* + * Detect broken pseudo-NMI. Must be called _before_ the call to + * setup_boot_cpu_capabilities() since it interacts with + * can_use_gic_priorities(). + */ + detect_system_supports_pseudo_nmi(); + /* * Detect and enable early CPU capabilities based on the boot CPU, * after we have initialised the CPU feature infrastructure. @@ -2085,14 +2123,6 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) #endif /* CONFIG_ARM64_E0PD */ #ifdef CONFIG_ARM64_PSEUDO_NMI -static bool enable_pseudo_nmi; - -static int __init early_enable_pseudo_nmi(char *p) -{ - return kstrtobool(p, &enable_pseudo_nmi); -} -early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi); - static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, int scope) { diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 00060db0dbe2..552a3fed7a89 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -39,8 +39,7 @@ #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0) #define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1) -#define FLAGS_WORKAROUND_MTK_GICR_SAVE (1ULL << 2) -#define FLAGS_WORKAROUND_ASR_ERRATUM_8601001 (1ULL << 3) +#define FLAGS_WORKAROUND_ASR_ERRATUM_8601001 (1ULL << 2) #define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1) @@ -1779,15 +1778,6 @@ static bool gic_enable_quirk_msm8996(void *data) return true; } -static bool gic_enable_quirk_mtk_gicr(void *data) -{ - struct gic_chip_data *d = data; - - d->flags |= FLAGS_WORKAROUND_MTK_GICR_SAVE; - - return true; -} - static bool gic_enable_quirk_cavium_38539(void *data) { struct gic_chip_data *d = data; @@ -1880,11 +1870,6 @@ static const struct gic_quirk gic_quirks[] = { .compatible = "asr,asr8601-gic-v3", .init = gic_enable_quirk_asr8601, }, - { - .desc = "GICv3: Mediatek Chromebook GICR save problem", - .property = "mediatek,broken-save-restore-fw", - .init = gic_enable_quirk_mtk_gicr, - }, { .desc = "GICv3: HIP06 erratum 161010803", .iidr = 0x0204043b, @@ -1946,11 +1931,6 @@ static void gic_enable_nmi_support(void) if (!gic_prio_masking_enabled()) return; - if (gic_data.flags & FLAGS_WORKAROUND_MTK_GICR_SAVE) { - pr_warn("Skipping NMI enable due to firmware issues\n"); - return; - } - rdist_nmi_refs = kcalloc(gic_data.ppi_nr + SGI_NR, sizeof(*rdist_nmi_refs), GFP_KERNEL); if (!rdist_nmi_refs) From 4bb49009e07175266e8c5ffbd5d4c4b241c97f19 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 7 Nov 2023 07:26:57 -0800 Subject: [PATCH 223/415] Revert "arm64: smp: avoid NMI IPIs with broken MediaTek FW" This reverts commit a07a594152173a3dd3bdd12fc7d73dbba54cdbca. This is no longer needed after the patch ("arm64: Move MediaTek GIC quirk handling from irqchip to core). Signed-off-by: Douglas Anderson Acked-by: Mark Rutland Acked-by: Marc Zyngier Tested-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231107072651.v2.2.I2c5fa192e767eb3ee233bc28eb60e2f8656c29a6@changeid Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 5 +---- drivers/irqchip/irq-gic-v3.c | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 13fa576dd2c6..03204e841172 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -965,10 +965,7 @@ static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) static bool ipi_should_be_nmi(enum ipi_msg_type ipi) { - DECLARE_STATIC_KEY_FALSE(supports_pseudo_nmis); - - if (!system_uses_irq_prio_masking() || - !static_branch_likely(&supports_pseudo_nmis)) + if (!system_uses_irq_prio_masking()) return false; switch (ipi) { diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 552a3fed7a89..dd4163554738 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -105,7 +105,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); * - Figure 4-7 Secure read of the priority field for a Non-secure Group 1 * interrupt. */ -DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); +static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities); EXPORT_SYMBOL(gic_nonsecure_priorities); From 37de5a80e932f828c34abeaae63170d73930dca3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 6 Nov 2023 14:40:11 +0000 Subject: [PATCH 224/415] cifs: Fix encryption of cleared, but unset rq_iter data buffers Each smb_rqst struct contains two things: an array of kvecs (rq_iov) that contains the protocol data for an RPC op and an iterator (rq_iter) that contains the data payload of an RPC op. When an smb_rqst is allocated rq_iter is it always cleared, but we don't set it up unless we're going to use it. The functions that determines the size of the ciphertext buffer that will be needed to encrypt a request, cifs_get_num_sgs(), assumes that rq_iter is always initialised - and employs user_backed_iter() to check that the iterator isn't user-backed. This used to incidentally work, because ->user_backed was set to false because the iterator has never been initialised, but with commit f1b4cb650b9a0eeba206d8f069fcdc532bfbcd74[1] which changes user_backed_iter() to determine this based on the iterator type insted, a warning is now emitted: WARNING: CPU: 7 PID: 4584 at fs/smb/client/cifsglob.h:2165 smb2_get_aead_req+0x3fc/0x420 [cifs] ... RIP: 0010:smb2_get_aead_req+0x3fc/0x420 [cifs] ... crypt_message+0x33e/0x550 [cifs] smb3_init_transform_rq+0x27d/0x3f0 [cifs] smb_send_rqst+0xc7/0x160 [cifs] compound_send_recv+0x3ca/0x9f0 [cifs] cifs_send_recv+0x25/0x30 [cifs] SMB2_tcon+0x38a/0x820 [cifs] cifs_get_smb_ses+0x69c/0xee0 [cifs] cifs_mount_get_session+0x76/0x1d0 [cifs] dfs_mount_share+0x74/0x9d0 [cifs] cifs_mount+0x6e/0x2e0 [cifs] cifs_smb3_do_mount+0x143/0x300 [cifs] smb3_get_tree+0x15e/0x290 [cifs] vfs_get_tree+0x2d/0xe0 do_new_mount+0x124/0x340 __se_sys_mount+0x143/0x1a0 The problem is that rq_iter was never set, so the type is 0 (ie. ITER_UBUF) which causes user_backed_iter() to return true. The code doesn't malfunction because it checks the size of the iterator - which is 0. Fix cifs_get_num_sgs() to ignore rq_iter if its count is 0, thereby bypassing the warnings. It might be better to explicitly initialise rq_iter to a zero-length ITER_BVEC, say, as it can always be reinitialised later. Fixes: d08089f649a0 ("cifs: Change the I/O paths to use an iterator rather than a page list") Reported-by: Damian Tometzki Closes: https://lore.kernel.org/r/ZUfQo47uo0p2ZsYg@fedora.fritz.box/ Tested-by: Damian Tometzki Cc: stable@vger.kernel.org cc: Eric Biggers cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f1b4cb650b9a0eeba206d8f069fcdc532bfbcd74 [1] Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: David Howells Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 02082621d8e0..e55f49e278a2 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -2143,6 +2143,7 @@ static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, unsigned int len, skip; unsigned int nents = 0; unsigned long addr; + size_t data_size; int i, j; /* @@ -2158,17 +2159,21 @@ static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, * rqst[1+].rq_iov[0+] data to be encrypted/decrypted */ for (i = 0; i < num_rqst; i++) { + data_size = iov_iter_count(&rqst[i].rq_iter); + /* We really don't want a mixture of pinned and unpinned pages * in the sglist. It's hard to keep track of which is what. * Instead, we convert to a BVEC-type iterator higher up. */ - if (WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter))) + if (data_size && + WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter))) return -EIO; /* We also don't want to have any extra refs or pins to clean * up in the sglist. */ - if (WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter))) + if (data_size && + WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter))) return -EIO; for (j = 0; j < rqst[i].rq_nvec; j++) { @@ -2184,7 +2189,8 @@ static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, } skip = 0; } - nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX); + if (data_size) + nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX); } nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE); return nents; From 706add13676da7ad213b65e92b94af5efc8c4131 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 26 Oct 2023 15:08:04 +0200 Subject: [PATCH 225/415] nvme: keyring: fix conditional compilation The keyring and auth functions can be called from both the host and the target side and are controlled by Kconfig options for each of the combinations, but the declarations are controlled by #ifdef checks on the shared Kconfig symbols. This leads to link failures in combinations where one of the frontends is built-in and the other one is a module, and the keyring code ends up in a module that is not reachable from the builtin code: ld: drivers/nvme/host/core.o: in function `nvme_core_exit': core.c:(.exit.text+0x4): undefined reference to `nvme_keyring_exit' ld: drivers/nvme/host/core.o: in function `nvme_core_init': core.c:(.init.text+0x94): undefined reference to `nvme_keyring_init ld: drivers/nvme/host/tcp.o: in function `nvme_tcp_setup_ctrl': tcp.c:(.text+0x4c18): undefined reference to `nvme_tls_psk_default' Address this by moving nvme_keyring_init()/nvme_keyring_exit() into module init/exit functions for the keyring module. Fixes: be8e82caa6859 ("nvme-tcp: enable TLS handshake upcall") Signed-off-by: Hannes Reinecke Cc: Arnd Bergmann Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/common/keyring.c | 9 +++++---- drivers/nvme/host/core.c | 9 +-------- include/linux/nvme-keyring.h | 8 -------- 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index 46d7a537dbc2..ee341b83eeba 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -151,7 +151,7 @@ key_serial_t nvme_tls_psk_default(struct key *keyring, } EXPORT_SYMBOL_GPL(nvme_tls_psk_default); -int nvme_keyring_init(void) +static int __init nvme_keyring_init(void) { int err; @@ -171,14 +171,15 @@ int nvme_keyring_init(void) } return 0; } -EXPORT_SYMBOL_GPL(nvme_keyring_init); -void nvme_keyring_exit(void) +static void __exit nvme_keyring_exit(void) { unregister_key_type(&nvme_tls_psk_key_type); key_revoke(nvme_keyring); key_put(nvme_keyring); } -EXPORT_SYMBOL_GPL(nvme_keyring_exit); MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Hannes Reinecke "); +module_init(nvme_keyring_init); +module_exit(nvme_keyring_exit); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 75a1b58a7a43..88b54cdcbd68 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -25,7 +25,6 @@ #include "nvme.h" #include "fabrics.h" #include -#include #define CREATE_TRACE_POINTS #include "trace.h" @@ -4737,16 +4736,11 @@ static int __init nvme_core_init(void) result = PTR_ERR(nvme_ns_chr_class); goto unregister_generic_ns; } - result = nvme_keyring_init(); - if (result) - goto destroy_ns_chr; result = nvme_init_auth(); if (result) - goto keyring_exit; + goto destroy_ns_chr; return 0; -keyring_exit: - nvme_keyring_exit(); destroy_ns_chr: class_destroy(nvme_ns_chr_class); unregister_generic_ns: @@ -4770,7 +4764,6 @@ static int __init nvme_core_init(void) static void __exit nvme_core_exit(void) { nvme_exit_auth(); - nvme_keyring_exit(); class_destroy(nvme_ns_chr_class); class_destroy(nvme_subsys_class); class_destroy(nvme_class); diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h index 6cc0696625f3..e10333d78dbb 100644 --- a/include/linux/nvme-keyring.h +++ b/include/linux/nvme-keyring.h @@ -12,8 +12,6 @@ key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn); key_serial_t nvme_keyring_id(void); -int nvme_keyring_init(void); -void nvme_keyring_exit(void); #else @@ -26,11 +24,5 @@ static inline key_serial_t nvme_keyring_id(void) { return 0; } -static inline int nvme_keyring_init(void) -{ - return 0; -} -static inline void nvme_keyring_exit(void) {} - #endif /* !CONFIG_NVME_KEYRING */ #endif /* _NVME_KEYRING_H */ From 65120498aaf8d7320647a8b6d6de7db42e74ea52 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Nov 2023 13:58:27 +0100 Subject: [PATCH 226/415] stackleak: add declarations for global functions With -Wmissing-prototypes enabled, the stackleak code produces a couple of warnings that have no declarations because they are only called from assembler: stackleak.c:127:25: error: no previous prototype for 'stackleak_erase' [-Werror=missing-prototypes] stackleak.c:139:25: error: no previous prototype for 'stackleak_erase_on_task_stack' [-Werror=missing-prototypes] stackleak.c:151:25: error: no previous prototype for 'stackleak_erase_off_task_stack' [-Werror=missing-prototypes] stackleak.c:159:49: error: no previous prototype for 'stackleak_track_stack' [-Werror=missing-prototypes] Add declarations to the stackleak header to shut up the warnings. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20231108125843.3806765-7-arnd@kernel.org Signed-off-by: Kees Cook --- include/linux/stackleak.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index c36e7a3b45e7..3be2cb564710 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -14,6 +14,7 @@ #ifdef CONFIG_GCC_PLUGIN_STACKLEAK #include +#include /* * The lowest address on tsk's stack which we can plausibly erase. @@ -76,6 +77,11 @@ static inline void stackleak_task_init(struct task_struct *t) # endif } +asmlinkage void noinstr stackleak_erase(void); +asmlinkage void noinstr stackleak_erase_on_task_stack(void); +asmlinkage void noinstr stackleak_erase_off_task_stack(void); +void __no_caller_saved_registers noinstr stackleak_track_stack(void); + #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ static inline void stackleak_task_init(struct task_struct *t) { } #endif From 866d648059d5faf53f1cd960b43fe8365ad93ea7 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 31 Oct 2023 13:32:06 +0100 Subject: [PATCH 227/415] lsm: fix default return value for vm_enough_memory 1 is the return value that implements a "no-op" hook, not 0. Cc: stable@vger.kernel.org Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 99b8176c3738..4dd55fdfec26 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -48,7 +48,7 @@ LSM_HOOK(int, 0, quota_on, struct dentry *dentry) LSM_HOOK(int, 0, syslog, int type) LSM_HOOK(int, 0, settime, const struct timespec64 *ts, const struct timezone *tz) -LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) +LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) From b36995b8609a5a8fe5cf259a1ee768fcaed919f8 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 31 Oct 2023 13:32:07 +0100 Subject: [PATCH 228/415] lsm: fix default return value for inode_getsecctx -EOPNOTSUPP is the return value that implements a "no-op" hook, not 0. Without this fix having only the BPF LSM enabled (with no programs attached) can cause uninitialized variable reads in nfsd4_encode_fattr(), because the BPF hook returns 0 without touching the 'ctxlen' variable and the corresponding 'contextlen' variable in nfsd4_encode_fattr() remains uninitialized, yet being treated as valid based on the 0 return value. Cc: stable@vger.kernel.org Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Reported-by: Benjamin Coddington Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 4dd55fdfec26..ff217a5ce552 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -273,7 +273,7 @@ LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) -LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx, +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx, u32 *ctxlen) #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) From 1ee60356c2dca938362528404af95b8ef3e49b6a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 4 Nov 2023 13:43:37 -0700 Subject: [PATCH 229/415] gcc-plugins: randstruct: Only warn about true flexible arrays The randstruct GCC plugin tried to discover "fake" flexible arrays to issue warnings about them in randomized structs. In the future LSM overhead reduction series, it would be legal to have a randomized struct with a 1-element array, and this should _not_ be treated as a flexible array, especially since commit df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3"). Disable the 0-sized and 1-element array discovery logic in the plugin, but keep the "true" flexible array check. Cc: KP Singh Cc: linux-hardening@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311021532.iBwuZUZ0-lkp@intel.com/ Fixes: df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") Reviewed-by: Bill Wendling Acked-by: "Gustavo A. R. Silva" Link: https://lore.kernel.org/r/20231104204334.work.160-kees@kernel.org Signed-off-by: Kees Cook --- scripts/gcc-plugins/randomize_layout_plugin.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 366395cab490..910bd21d08f4 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -278,8 +278,6 @@ static bool is_flexible_array(const_tree field) { const_tree fieldtype; const_tree typesize; - const_tree elemtype; - const_tree elemsize; fieldtype = TREE_TYPE(field); typesize = TYPE_SIZE(fieldtype); @@ -287,20 +285,12 @@ static bool is_flexible_array(const_tree field) if (TREE_CODE(fieldtype) != ARRAY_TYPE) return false; - elemtype = TREE_TYPE(fieldtype); - elemsize = TYPE_SIZE(elemtype); - /* size of type is represented in bits */ if (typesize == NULL_TREE && TYPE_DOMAIN(fieldtype) != NULL_TREE && TYPE_MAX_VALUE(TYPE_DOMAIN(fieldtype)) == NULL_TREE) return true; - if (typesize != NULL_TREE && - (TREE_CONSTANT(typesize) && (!tree_to_uhwi(typesize) || - tree_to_uhwi(typesize) == tree_to_uhwi(elemsize)))) - return true; - return false; } From c4676f8dc1e12e68d6511f9ed89707fdad4c962c Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 27 Oct 2023 21:12:53 +0530 Subject: [PATCH 230/415] RISC-V: Don't fail in riscv_of_parent_hartid() for disabled HARTs The riscv_of_processor_hartid() used by riscv_of_parent_hartid() fails for HARTs disabled in the DT. This results in the following warning thrown by the RISC-V INTC driver for the E-core on SiFive boards: [ 0.000000] riscv-intc: unable to find hart id for /cpus/cpu@0/interrupt-controller The riscv_of_parent_hartid() is only expected to read the hartid from the DT so we directly call of_get_cpu_hwid() instead of calling riscv_of_processor_hartid(). Fixes: ad635e723e17 ("riscv: cpu: Add 64bit hartid support on RV64") Signed-off-by: Anup Patel Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20231027154254.355853-2-apatel@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpu.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index c17dacb1141c..157ace8b262c 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -125,13 +125,14 @@ int __init riscv_early_of_processor_hartid(struct device_node *node, unsigned lo */ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) { - int rc; - for (; node; node = node->parent) { if (of_device_is_compatible(node, "riscv")) { - rc = riscv_of_processor_hartid(node, hartid); - if (!rc) - return 0; + *hartid = (unsigned long)of_get_cpu_hwid(node, 0); + if (*hartid == ~0UL) { + pr_warn("Found CPU without hart ID\n"); + return -ENODEV; + } + return 0; } } From c5e4ce9db635fef5ebffdfba1e7d80710474b176 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 27 Oct 2023 21:12:54 +0530 Subject: [PATCH 231/415] of: property: Add fw_devlink support for msi-parent This allows fw_devlink to create device links between consumers of a MSI and the supplier of the MSI. Signed-off-by: Anup Patel Acked-by: Rob Herring Reviewed-by: Saravana Kannan Link: https://lore.kernel.org/r/20231027154254.355853-3-apatel@ventanamicro.com Signed-off-by: Palmer Dabbelt --- drivers/of/property.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/of/property.c b/drivers/of/property.c index cf8dacf3e3b8..afdaefbd03f6 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1267,6 +1267,7 @@ DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) DEFINE_SIMPLE_PROP(backlight, "backlight", NULL) DEFINE_SIMPLE_PROP(panel, "panel", NULL) +DEFINE_SIMPLE_PROP(msi_parent, "msi-parent", "#msi-cells") DEFINE_SUFFIX_PROP(regulators, "-supply", NULL) DEFINE_SUFFIX_PROP(gpio, "-gpio", "#gpio-cells") @@ -1356,6 +1357,7 @@ static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_leds, }, { .parse_prop = parse_backlight, }, { .parse_prop = parse_panel, }, + { .parse_prop = parse_msi_parent, }, { .parse_prop = parse_gpio_compat, }, { .parse_prop = parse_interrupts, }, { .parse_prop = parse_regulators, }, From 6c8e69e4a702b072206f166111c003d704de15d9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 3 Nov 2023 12:26:25 +0000 Subject: [PATCH 232/415] btrfs: fix race between accounting qgroup extents and removing a qgroup When doing qgroup accounting for an extent, we take the spinlock fs_info->qgroup_lock and then add qgroups to the local list (iterator) named "qgroups". These qgroups are found in the fs_info->qgroup_tree rbtree. After we're done, we unlock fs_info->qgroup_lock and then call qgroup_iterator_nested_clean(), which will iterate over all the qgroups added to the local list "qgroups" and then delete them from the list. Deleting a qgroup from the list can however result in a use-after-free if a qgroup remove operation happens after we unlock fs_info->qgroup_lock and before or while we are at qgroup_iterator_nested_clean(). Fix this by calling qgroup_iterator_nested_clean() while still holding the lock fs_info->qgroup_lock - we don't need it under the 'out' label since before taking the lock the "qgroups" list is always empty. This guarantees safety because btrfs_remove_qgroup() takes that lock before removing a qgroup from the rbtree fs_info->qgroup_tree. This was reported by syzbot with the following stack traces: BUG: KASAN: slab-use-after-free in __list_del_entry_valid_or_report+0x2f/0x130 lib/list_debug.c:49 Read of size 8 at addr ffff888027e420b0 by task kworker/u4:3/48 CPU: 1 PID: 48 Comm: kworker/u4:3 Not tainted 6.6.0-syzkaller-10396-g4652b8e4f3ff #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Workqueue: btrfs-qgroup-rescan btrfs_work_helper Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0x163/0x540 mm/kasan/report.c:475 kasan_report+0x175/0x1b0 mm/kasan/report.c:588 __list_del_entry_valid_or_report+0x2f/0x130 lib/list_debug.c:49 __list_del_entry_valid include/linux/list.h:124 [inline] __list_del_entry include/linux/list.h:215 [inline] list_del_init include/linux/list.h:287 [inline] qgroup_iterator_nested_clean fs/btrfs/qgroup.c:2623 [inline] btrfs_qgroup_account_extent+0x18b/0x1150 fs/btrfs/qgroup.c:2883 qgroup_rescan_leaf fs/btrfs/qgroup.c:3543 [inline] btrfs_qgroup_rescan_worker+0x1078/0x1c60 fs/btrfs/qgroup.c:3604 btrfs_work_helper+0x37c/0xbd0 fs/btrfs/async-thread.c:315 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0x90f/0x1400 kernel/workqueue.c:2703 worker_thread+0xa5f/0xff0 kernel/workqueue.c:2784 kthread+0x2d3/0x370 kernel/kthread.c:388 ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 Allocated by task 6355: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x4f/0x70 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:383 kmalloc include/linux/slab.h:600 [inline] kzalloc include/linux/slab.h:721 [inline] btrfs_quota_enable+0xee9/0x2060 fs/btrfs/qgroup.c:1209 btrfs_ioctl_quota_ctl+0x143/0x190 fs/btrfs/ioctl.c:3705 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl+0xf8/0x170 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Freed by task 6355: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x4f/0x70 mm/kasan/common.c:52 kasan_save_free_info+0x28/0x40 mm/kasan/generic.c:522 ____kasan_slab_free+0xd6/0x120 mm/kasan/common.c:236 kasan_slab_free include/linux/kasan.h:164 [inline] slab_free_hook mm/slub.c:1800 [inline] slab_free_freelist_hook mm/slub.c:1826 [inline] slab_free mm/slub.c:3809 [inline] __kmem_cache_free+0x263/0x3a0 mm/slub.c:3822 btrfs_remove_qgroup+0x764/0x8c0 fs/btrfs/qgroup.c:1787 btrfs_ioctl_qgroup_create+0x185/0x1e0 fs/btrfs/ioctl.c:3811 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl+0xf8/0x170 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Last potentially related work creation: kasan_save_stack+0x3f/0x60 mm/kasan/common.c:45 __kasan_record_aux_stack+0xad/0xc0 mm/kasan/generic.c:492 __call_rcu_common kernel/rcu/tree.c:2667 [inline] call_rcu+0x167/0xa70 kernel/rcu/tree.c:2781 kthread_worker_fn+0x4ba/0xa90 kernel/kthread.c:823 kthread+0x2d3/0x370 kernel/kthread.c:388 ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 Second to last potentially related work creation: kasan_save_stack+0x3f/0x60 mm/kasan/common.c:45 __kasan_record_aux_stack+0xad/0xc0 mm/kasan/generic.c:492 __call_rcu_common kernel/rcu/tree.c:2667 [inline] call_rcu+0x167/0xa70 kernel/rcu/tree.c:2781 kthread_worker_fn+0x4ba/0xa90 kernel/kthread.c:823 kthread+0x2d3/0x370 kernel/kthread.c:388 ret_from_fork+0x48/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 The buggy address belongs to the object at ffff888027e42000 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 176 bytes inside of freed 512-byte region [ffff888027e42000, ffff888027e42200) The buggy address belongs to the physical page: page:ffffea00009f9000 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x27e40 head:ffffea00009f9000 order:2 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0xfff00000000840(slab|head|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000840 ffff888012c41c80 ffffea0000a5ba00 dead000000000002 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 2, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 4514, tgid 4514 (udevadm), ts 24598439480, free_ts 23755696267 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x1e6/0x210 mm/page_alloc.c:1536 prep_new_page mm/page_alloc.c:1543 [inline] get_page_from_freelist+0x31db/0x3360 mm/page_alloc.c:3170 __alloc_pages+0x255/0x670 mm/page_alloc.c:4426 alloc_slab_page+0x6a/0x160 mm/slub.c:1870 allocate_slab mm/slub.c:2017 [inline] new_slab+0x84/0x2f0 mm/slub.c:2070 ___slab_alloc+0xc85/0x1310 mm/slub.c:3223 __slab_alloc mm/slub.c:3322 [inline] __slab_alloc_node mm/slub.c:3375 [inline] slab_alloc_node mm/slub.c:3468 [inline] __kmem_cache_alloc_node+0x19d/0x270 mm/slub.c:3517 kmalloc_trace+0x2a/0xe0 mm/slab_common.c:1098 kmalloc include/linux/slab.h:600 [inline] kzalloc include/linux/slab.h:721 [inline] kernfs_fop_open+0x3e7/0xcc0 fs/kernfs/file.c:670 do_dentry_open+0x8fd/0x1590 fs/open.c:948 do_open fs/namei.c:3622 [inline] path_openat+0x2845/0x3280 fs/namei.c:3779 do_filp_open+0x234/0x490 fs/namei.c:3809 do_sys_openat2+0x13e/0x1d0 fs/open.c:1440 do_sys_open fs/open.c:1455 [inline] __do_sys_openat fs/open.c:1471 [inline] __se_sys_openat fs/open.c:1466 [inline] __x64_sys_openat+0x247/0x290 fs/open.c:1466 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1136 [inline] free_unref_page_prepare+0x8c3/0x9f0 mm/page_alloc.c:2312 free_unref_page+0x37/0x3f0 mm/page_alloc.c:2405 discard_slab mm/slub.c:2116 [inline] __unfreeze_partials+0x1dc/0x220 mm/slub.c:2655 put_cpu_partial+0x17b/0x250 mm/slub.c:2731 __slab_free+0x2b6/0x390 mm/slub.c:3679 qlink_free mm/kasan/quarantine.c:166 [inline] qlist_free_all+0x75/0xe0 mm/kasan/quarantine.c:185 kasan_quarantine_reduce+0x14b/0x160 mm/kasan/quarantine.c:292 __kasan_slab_alloc+0x23/0x70 mm/kasan/common.c:305 kasan_slab_alloc include/linux/kasan.h:188 [inline] slab_post_alloc_hook+0x67/0x3d0 mm/slab.h:762 slab_alloc_node mm/slub.c:3478 [inline] slab_alloc mm/slub.c:3486 [inline] __kmem_cache_alloc_lru mm/slub.c:3493 [inline] kmem_cache_alloc+0x104/0x2c0 mm/slub.c:3502 getname_flags+0xbc/0x4f0 fs/namei.c:140 do_sys_openat2+0xd2/0x1d0 fs/open.c:1434 do_sys_open fs/open.c:1455 [inline] __do_sys_openat fs/open.c:1471 [inline] __se_sys_openat fs/open.c:1466 [inline] __x64_sys_openat+0x247/0x290 fs/open.c:1466 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Memory state around the buggy address: ffff888027e41f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888027e42000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888027e42080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888027e42100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888027e42180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Reported-by: syzbot+e0b615318f8fcfc01ceb@syzkaller.appspotmail.com Fixes: dce28769a33a ("btrfs: qgroup: use qgroup_iterator_nested to in qgroup_update_refcnt()") CC: stable@vger.kernel.org # 6.6 Link: https://lore.kernel.org/linux-btrfs/00000000000091a5b2060936bf6d@google.com/ Reviewed-by: Josef Bacik Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index edb84cc03237..e48eba7e9379 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2874,13 +2874,19 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, qgroup_update_counters(fs_info, &qgroups, nr_old_roots, nr_new_roots, num_bytes, seq); + /* + * We're done using the iterator, release all its qgroups while holding + * fs_info->qgroup_lock so that we don't race with btrfs_remove_qgroup() + * and trigger use-after-free accesses to qgroups. + */ + qgroup_iterator_nested_clean(&qgroups); + /* * Bump qgroup_seq to avoid seq overlap */ fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; spin_unlock(&fs_info->qgroup_lock); out_free: - qgroup_iterator_nested_clean(&qgroups); ulist_free(old_roots); ulist_free(new_roots); return ret; From 609d99379736aa6c5b0658654084198aa808035a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 6 Nov 2023 20:17:37 +0000 Subject: [PATCH 233/415] btrfs: fix qgroup record leaks when using simple quotas When using simple quotas we are not supposed to allocate qgroup records when adding delayed references. However we allocate them if either mode of quotas is enabled (the new simple one or the old one), but then we never free them because running the accounting, which frees the records, is only run when using the old quotas (at btrfs_qgroup_account_extents()), resulting in a memory leak of the records allocated when adding delayed references. Fix this by allocating the records only if the old quotas mode is enabled. Also fix btrfs_qgroup_trace_extent_nolock() to return 1 if the old quotas mode is not enabled - meaning the caller has to free the record. Fixes: 182940f4f4db ("btrfs: qgroup: add new quota mode for simple quotas") Reported-by: syzbot+d3ddc6dcc6386dea398b@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/00000000000004769106097f9a34@google.com/ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 4 ++-- fs/btrfs/qgroup.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 9223934d95f4..891ea2fa263c 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -1041,7 +1041,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, return -ENOMEM; } - if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) { + if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) { record = kzalloc(sizeof(*record), GFP_NOFS); if (!record) { kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); @@ -1144,7 +1144,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, return -ENOMEM; } - if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) { + if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) { record = kzalloc(sizeof(*record), GFP_NOFS); if (!record) { kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index e48eba7e9379..ce446d9d7f23 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1888,7 +1888,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, u64 bytenr = record->bytenr; if (!btrfs_qgroup_full_accounting(fs_info)) - return 0; + return 1; lockdep_assert_held(&delayed_refs->lock); trace_btrfs_qgroup_trace_extent(fs_info, record); From d3933152442b7f94419e9ea71835d71b620baf0e Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 3 Nov 2023 11:38:04 -0700 Subject: [PATCH 234/415] btrfs: make OWNER_REF_KEY type value smallest among inline refs BTRFS_EXTENT_OWNER_REF_KEY is the type of simple quotas extent owner refs. This special inline ref goes in front of all other inline refs. In general, inline refs have a required sorted order s.t. type never decreases (among other requirements). This was recently reified into a tree-checker and fsck rule, which broke simple quotas. To be fair, though, in a sense, the new owner ref item had also violated that not yet fully enforced requirement. This fix brings the owner ref item into compliance with the requirement that inline ref type never decrease. btrfs/301 exercises this behavior and should pass again with this fix. Fixes: d9a620f77e33 ("btrfs: new inline ref storing owning subvol of data extents") Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index c25fc9614594..d24e8e121507 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -219,6 +219,22 @@ */ #define BTRFS_METADATA_ITEM_KEY 169 +/* + * Special inline ref key which stores the id of the subvolume which originally + * created the extent. This subvolume owns the extent permanently from the + * perspective of simple quotas. Needed to know which subvolume to free quota + * usage from when the extent is deleted. + * + * Stored as an inline ref rather to avoid wasting space on a separate item on + * top of the existing extent item. However, unlike the other inline refs, + * there is one one owner ref per extent rather than one per extent. + * + * Because of this, it goes at the front of the list of inline refs, and thus + * must have a lower type value than any other inline ref type (to satisfy the + * disk format rule that inline refs have non-decreasing type). + */ +#define BTRFS_EXTENT_OWNER_REF_KEY 172 + #define BTRFS_TREE_BLOCK_REF_KEY 176 #define BTRFS_EXTENT_DATA_REF_KEY 178 @@ -233,14 +249,6 @@ #define BTRFS_SHARED_DATA_REF_KEY 184 -/* - * Special inline ref key which stores the id of the subvolume which originally - * created the extent. This subvolume owns the extent permanently from the - * perspective of simple quotas. Needed to know which subvolume to free quota - * usage from when the extent is deleted. - */ -#define BTRFS_EXTENT_OWNER_REF_KEY 188 - /* * block groups give us hints into the extent allocation trees. Which * blocks are free etc etc From c6e316ac05532febb0c966fa9b55f5258ed037be Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 9 Nov 2023 09:21:28 +0100 Subject: [PATCH 235/415] drivers: perf: Check find_first_bit() return value We must check the return value of find_first_bit() before using the return value as an index array since it happens to overflow the array and then panic: [ 107.318430] Kernel BUG [#1] [ 107.319434] CPU: 3 PID: 1238 Comm: kill Tainted: G E 6.6.0-rc6ubuntu-defconfig #2 [ 107.319465] Hardware name: riscv-virtio,qemu (DT) [ 107.319551] epc : pmu_sbi_ovf_handler+0x3a4/0x3ae [ 107.319840] ra : pmu_sbi_ovf_handler+0x52/0x3ae [ 107.319868] epc : ffffffff80a0a77c ra : ffffffff80a0a42a sp : ffffaf83fecda350 [ 107.319884] gp : ffffffff823961a8 tp : ffffaf8083db1dc0 t0 : ffffaf83fecda480 [ 107.319899] t1 : ffffffff80cafe62 t2 : 000000000000ff00 s0 : ffffaf83fecda520 [ 107.319921] s1 : ffffaf83fecda380 a0 : 00000018fca29df0 a1 : ffffffffffffffff [ 107.319936] a2 : 0000000001073734 a3 : 0000000000000004 a4 : 0000000000000000 [ 107.319951] a5 : 0000000000000040 a6 : 000000001d1c8774 a7 : 0000000000504d55 [ 107.319965] s2 : ffffffff82451f10 s3 : ffffffff82724e70 s4 : 000000000000003f [ 107.319980] s5 : 0000000000000011 s6 : ffffaf8083db27c0 s7 : 0000000000000000 [ 107.319995] s8 : 0000000000000001 s9 : 00007fffb45d6558 s10: 00007fffb45d81a0 [ 107.320009] s11: ffffaf7ffff60000 t3 : 0000000000000004 t4 : 0000000000000000 [ 107.320023] t5 : ffffaf7f80000000 t6 : ffffaf8000000000 [ 107.320037] status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003 [ 107.320081] [] pmu_sbi_ovf_handler+0x3a4/0x3ae [ 107.320112] [] handle_percpu_devid_irq+0x9e/0x1a0 [ 107.320131] [] generic_handle_domain_irq+0x28/0x36 [ 107.320148] [] riscv_intc_irq+0x36/0x4e [ 107.320166] [] handle_riscv_irq+0x54/0x86 [ 107.320189] [] do_irq+0x64/0x96 [ 107.320271] Code: 85a6 855e b097 ff7f 80e7 9220 b709 9002 4501 bbd9 (9002) 6097 [ 107.320585] ---[ end trace 0000000000000000 ]--- [ 107.320704] Kernel panic - not syncing: Fatal exception in interrupt [ 107.320775] SMP: stopping secondary CPUs [ 107.321219] Kernel Offset: 0x0 from 0xffffffff80000000 [ 107.333051] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fixes: 4905ec2fb7e6 ("RISC-V: Add sscofpmf extension support") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20231109082128.40777-1-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- drivers/perf/riscv_pmu_sbi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 9a51053b1f99..0b5053152ee6 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -685,6 +685,11 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) /* Firmware counter don't support overflow yet */ fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS); + if (fidx == RISCV_MAX_COUNTERS) { + csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); + return IRQ_NONE; + } + event = cpu_hw_evt->events[fidx]; if (!event) { csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); From 61e3d993c8bd3e80f8f1363ed5e04f88ab531b72 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 26 Oct 2023 10:40:10 +0200 Subject: [PATCH 236/415] drivers: perf: Do not broadcast to other cpus when starting a counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This command: $ perf record -e cycles:k -e instructions:k -c 10000 -m 64M dd if=/dev/zero of=/dev/null count=1000 gives rise to this kernel warning: [ 444.364395] WARNING: CPU: 0 PID: 104 at kernel/smp.c:775 smp_call_function_many_cond+0x42c/0x436 [ 444.364515] Modules linked in: [ 444.364657] CPU: 0 PID: 104 Comm: perf-exec Not tainted 6.6.0-rc6-00051-g391df82e8ec3-dirty #73 [ 444.364771] Hardware name: riscv-virtio,qemu (DT) [ 444.364868] epc : smp_call_function_many_cond+0x42c/0x436 [ 444.364917] ra : on_each_cpu_cond_mask+0x20/0x32 [ 444.364948] epc : ffffffff8009f9e0 ra : ffffffff8009fa5a sp : ff20000000003800 [ 444.364966] gp : ffffffff81500aa0 tp : ff60000002b83000 t0 : ff200000000038c0 [ 444.364982] t1 : ffffffff815021f0 t2 : 000000000000001f s0 : ff200000000038b0 [ 444.364998] s1 : ff60000002c54d98 a0 : ff60000002a73940 a1 : 0000000000000000 [ 444.365013] a2 : 0000000000000000 a3 : 0000000000000003 a4 : 0000000000000100 [ 444.365029] a5 : 0000000000010100 a6 : 0000000000f00000 a7 : 0000000000000000 [ 444.365044] s2 : 0000000000000000 s3 : ffffffffffffffff s4 : ff60000002c54d98 [ 444.365060] s5 : ffffffff81539610 s6 : ffffffff80c20c48 s7 : 0000000000000000 [ 444.365075] s8 : 0000000000000000 s9 : 0000000000000001 s10: 0000000000000001 [ 444.365090] s11: ffffffff80099394 t3 : 0000000000000003 t4 : 00000000eac0c6e6 [ 444.365104] t5 : 0000000400000000 t6 : ff60000002e010d0 [ 444.365120] status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003 [ 444.365226] [] smp_call_function_many_cond+0x42c/0x436 [ 444.365295] [] on_each_cpu_cond_mask+0x20/0x32 [ 444.365311] [] pmu_sbi_ctr_start+0x7a/0xaa [ 444.365327] [] riscv_pmu_start+0x48/0x66 [ 444.365339] [] perf_adjust_freq_unthr_context+0x196/0x1ac [ 444.365356] [] perf_event_task_tick+0x78/0x8c [ 444.365368] [] scheduler_tick+0xe6/0x25e [ 444.365383] [] update_process_times+0x80/0x96 [ 444.365398] [] tick_sched_handle+0x26/0x52 [ 444.365410] [] tick_sched_timer+0x50/0x98 [ 444.365422] [] __hrtimer_run_queues+0x126/0x18a [ 444.365433] [] hrtimer_interrupt+0xce/0x1da [ 444.365444] [] riscv_timer_interrupt+0x30/0x3a [ 444.365457] [] handle_percpu_devid_irq+0x80/0x114 [ 444.365470] [] generic_handle_domain_irq+0x1c/0x2a [ 444.365483] [] riscv_intc_irq+0x2e/0x46 [ 444.365497] [] handle_riscv_irq+0x4a/0x74 [ 444.365521] [] do_irq+0x7c/0x7e [ 444.365796] ---[ end trace 0000000000000000 ]--- That's because the fix in commit 3fec323339a4 ("drivers: perf: Fix panic in riscv SBI mmap support") was wrong since there is no need to broadcast to other cpus when starting a counter, that's only needed in mmap when the counters could have already been started on other cpus, so simply remove this broadcast. Fixes: 3fec323339a4 ("drivers: perf: Fix panic in riscv SBI mmap support") Signed-off-by: Alexandre Ghiti Tested-by: Clément Léger Tested-by: Yu Chien Peter Lin Tested-by: Lad Prabhakar #On Link: https://lore.kernel.org/r/20231026084010.11888-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- drivers/perf/riscv_pmu_sbi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 96c7f670c8f0..fcb0c70ca222 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -543,8 +543,7 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) - on_each_cpu_mask(mm_cpumask(event->owner->mm), - pmu_sbi_set_scounteren, (void *)event, 1); + pmu_sbi_set_scounteren((void *)event); } static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) @@ -554,8 +553,7 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag) if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) && (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT)) - on_each_cpu_mask(mm_cpumask(event->owner->mm), - pmu_sbi_reset_scounteren, (void *)event, 1); + pmu_sbi_reset_scounteren((void *)event); ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0); if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) && From c7a60651953359f98dbf24b43e1bf561e1573ed4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 9 Nov 2023 15:19:54 +0100 Subject: [PATCH 237/415] ALSA: info: Fix potential deadlock at disconnection As reported recently, ALSA core info helper may cause a deadlock at the forced device disconnection during the procfs operation. The proc_remove() (that is called from the snd_card_disconnect() helper) has a synchronization of the pending procfs accesses via wait_for_completion(). Meanwhile, ALSA procfs helper takes the global mutex_lock(&info_mutex) at both the proc_open callback and snd_card_info_disconnect() helper. Since the proc_open can't finish due to the mutex lock, wait_for_completion() never returns, either, hence it deadlocks. TASK#1 TASK#2 proc_reg_open() takes use_pde() snd_info_text_entry_open() snd_card_disconnect() snd_info_card_disconnect() takes mutex_lock(&info_mutex) proc_remove() wait_for_completion(unused_pde) ... waiting task#1 closes mutex_lock(&info_mutex) => DEADLOCK This patch is a workaround for avoiding the deadlock scenario above. The basic strategy is to move proc_remove() call outside the mutex lock. proc_remove() can work gracefully without extra locking, and it can delete the tree recursively alone. So, we call proc_remove() at snd_info_card_disconnection() at first, then delete the rest resources recursively within the info_mutex lock. After the change, the function snd_info_disconnect() doesn't do disconnection by itself any longer, but it merely clears the procfs pointer. So rename the function to snd_info_clear_entries() for avoiding confusion. The similar change is applied to snd_info_free_entry(), too. Since the proc_remove() is called only conditionally with the non-NULL entry->p, it's skipped after the snd_info_clear_entries() call. Reported-by: Shinhyung Kang Closes: https://lore.kernel.org/r/664457955.21699345385931.JavaMail.epsvc@epcpadp4 Reviewed-by: Jaroslav Kysela Cc: Link: https://lore.kernel.org/r/20231109141954.4283-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/info.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/sound/core/info.c b/sound/core/info.c index 0b2f04dcb589..e2f302e55bbb 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -56,7 +56,7 @@ struct snd_info_private_data { }; static int snd_info_version_init(void); -static void snd_info_disconnect(struct snd_info_entry *entry); +static void snd_info_clear_entries(struct snd_info_entry *entry); /* @@ -569,11 +569,16 @@ void snd_info_card_disconnect(struct snd_card *card) { if (!card) return; - mutex_lock(&info_mutex); + proc_remove(card->proc_root_link); - card->proc_root_link = NULL; if (card->proc_root) - snd_info_disconnect(card->proc_root); + proc_remove(card->proc_root->p); + + mutex_lock(&info_mutex); + if (card->proc_root) + snd_info_clear_entries(card->proc_root); + card->proc_root_link = NULL; + card->proc_root = NULL; mutex_unlock(&info_mutex); } @@ -745,15 +750,14 @@ struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card, } EXPORT_SYMBOL(snd_info_create_card_entry); -static void snd_info_disconnect(struct snd_info_entry *entry) +static void snd_info_clear_entries(struct snd_info_entry *entry) { struct snd_info_entry *p; if (!entry->p) return; list_for_each_entry(p, &entry->children, list) - snd_info_disconnect(p); - proc_remove(entry->p); + snd_info_clear_entries(p); entry->p = NULL; } @@ -770,8 +774,9 @@ void snd_info_free_entry(struct snd_info_entry * entry) if (!entry) return; if (entry->p) { + proc_remove(entry->p); mutex_lock(&info_mutex); - snd_info_disconnect(entry); + snd_info_clear_entries(entry); mutex_unlock(&info_mutex); } From 6ae90e906aed727759b88eb2b000fcdc8fcd94a3 Mon Sep 17 00:00:00 2001 From: Vitalii Torshyn Date: Thu, 9 Nov 2023 01:13:54 +0200 Subject: [PATCH 238/415] ALSA: hda: ASUS UM5302LA: Added quirks for cs35L41/10431A83 on i2c bus Proposed patch fixes initialization of CSC3551 on the UM5302LA laptop. Patching DSDT table is not required since ASUS did added _DSD entry. Nothing new introduced but reused work started by Stefan B. Currently there is no official firmware available for 10431A83 on cirrus git unfortunately. For testing used 104317f3 (which is also seems on i2c bus): $ cd /lib/firmware/cirrus/ && \ for fw in $(find ./ -name '*104317f3*'); do newfw=$(echo $fw | sed 's/104317f3/10431a83/g'); echo echo "$fw -> $newfw"; ln -s $f $newfw; done With the patch applied to 6.6.0 and obviously symlinks to 104317F3 FW, speakers works and to my susrprise they sound quite good and loud without distortion. Probably confirmation from cirrus team is needed on firmware. Signed-off-by: Vitalii Torshyn Link: https://bugzilla.kernel.org/show_bug.cgi?id=218119 Link: https://lore.kernel.org/r/CAHiQ-bCMPpCJ8eOYAaVVoqGkFixS1qTgSS4xfbZvL4oZV9LYew@mail.gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0df44b1876f3..669ae3d6e447 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7447,6 +7447,7 @@ enum { ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD, ALC2XX_FIXUP_HEADSET_MIC, ALC289_FIXUP_DELL_CS35L41_SPI_2, + ALC294_FIXUP_CS35L41_I2C_2, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9576,6 +9577,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC289_FIXUP_DUAL_SPK }, + [ALC294_FIXUP_CS35L41_I2C_2] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l41_fixup_i2c_two, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9939,6 +9944,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), From 43960dc2328e554c4c61b22c47e77e8b1c48d854 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 6 Nov 2023 13:31:45 -0600 Subject: [PATCH 239/415] smb3: minor RDMA cleanup Some minor smbdirect debug cleanup spotted by checkpatch Cc: Long Li Reviewed-by: Bharath SM Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 6d8804fb6535..0d09bc3ea283 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -746,14 +746,14 @@ static ssize_t name##_write(struct file *file, const char __user *buffer, \ size_t count, loff_t *ppos) \ { \ int rc; \ - rc = kstrtoint_from_user(buffer, count, 10, & name); \ + rc = kstrtoint_from_user(buffer, count, 10, &name); \ if (rc) \ return rc; \ return count; \ } \ static int name##_proc_show(struct seq_file *m, void *v) \ { \ - seq_printf(m, "%d\n", name ); \ + seq_printf(m, "%d\n", name); \ return 0; \ } \ static int name##_open(struct inode *inode, struct file *file) \ From 1bc081b67a79b6e75fae686e98048cea1038ae31 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 6 Nov 2023 22:40:38 -0600 Subject: [PATCH 240/415] smb3: more minor cleanups for session handling routines Some trivial cleanup pointed out by checkpatch Reviewed-by: Bharath SM Signed-off-by: Steve French --- fs/smb/client/sess.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index cd474cf98f30..829c7688a7b7 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -659,8 +659,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, if (WARN_ON_ONCE(len < 0)) len = CIFS_MAX_DOMAINNAME_LEN - 1; bcc_ptr += len; - } /* else we will send a null domain name - so the server will default to its own domain */ + } /* else we send a null domain name so server will default to its own domain */ *bcc_ptr = 0; bcc_ptr++; @@ -756,11 +755,14 @@ static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft, if (len > bleft) return; - /* No domain field in LANMAN case. Domain is - returned by old servers in the SMB negprot response */ - /* BB For newer servers which do not support Unicode, - but thus do return domain here we could add parsing - for it later, but it is not very important */ + /* + * No domain field in LANMAN case. Domain is + * returned by old servers in the SMB negprot response + * + * BB For newer servers which do not support Unicode, + * but thus do return domain here, we could add parsing + * for it later, but it is not very important + */ cifs_dbg(FYI, "ascii: bytes left %d\n", bleft); } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ @@ -816,9 +818,12 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, ses->ntlmssp->server_flags = server_flags; memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE); - /* In particular we can examine sign flags */ - /* BB spec says that if AvId field of MsvAvTimestamp is populated then - we must set the MIC field of the AUTHENTICATE_MESSAGE */ + /* + * In particular we can examine sign flags + * + * BB spec says that if AvId field of MsvAvTimestamp is populated then + * we must set the MIC field of the AUTHENTICATE_MESSAGE + */ tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset); tilen = le16_to_cpu(pblob->TargetInfoArray.Length); From 0c51cc6f2cb0108e7d49805f6e089cd85caab279 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 13 Oct 2023 09:25:30 +0000 Subject: [PATCH 241/415] cifs: handle cases where a channel is closed So far, SMB multichannel could only scale up, but not scale down the number of channels. In this series of patch, we now allow the client to deal with the case of multichannel disabled on the server when the share is mounted. With that change, we now need the ability to scale down the channels. This change allows the client to deal with cases of missing channels more gracefully. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 5 +++++ fs/smb/client/cifsglob.h | 1 + fs/smb/client/cifsproto.h | 2 +- fs/smb/client/connect.c | 6 +++++- fs/smb/client/sess.c | 28 ++++++++++++++++++++++++---- fs/smb/client/smb2transport.c | 8 +++++++- 6 files changed, 43 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 0d09bc3ea283..772d4226957f 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -136,6 +136,11 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan) { struct TCP_Server_Info *server = chan->server; + if (!server) { + seq_printf(m, "\n\n\t\tChannel: %d DISABLED", i+1); + return; + } + seq_printf(m, "\n\n\t\tChannel: %d ConnectionId: 0x%llx" "\n\t\tNumber of credits: %d,%d,%d Dialect 0x%x" "\n\t\tTCP status: %d Instance: %d" diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index e55f49e278a2..eae138040edf 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1050,6 +1050,7 @@ struct cifs_ses { spinlock_t chan_lock; /* ========= begin: protected by chan_lock ======== */ #define CIFS_MAX_CHANNELS 16 +#define CIFS_INVAL_CHAN_INDEX (-1) #define CIFS_ALL_CHANNELS_SET(ses) \ ((1UL << (ses)->chan_count) - 1) #define CIFS_ALL_CHANS_GOOD(ses) \ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 890ceddae07e..c1f71c6be7e3 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -616,7 +616,7 @@ bool is_server_using_iface(struct TCP_Server_Info *server, bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface); void cifs_ses_mark_for_reconnect(struct cifs_ses *ses); -unsigned int +int cifs_ses_get_chan_index(struct cifs_ses *ses, struct TCP_Server_Info *server); void diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 1a137b33858a..3ff82f0aa00e 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -173,8 +173,12 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { + if (!ses->chans[i].server) + continue; + spin_lock(&ses->chans[i].server->srv_lock); - ses->chans[i].server->tcpStatus = CifsNeedReconnect; + if (ses->chans[i].server->tcpStatus != CifsExiting) + ses->chans[i].server->tcpStatus = CifsNeedReconnect; spin_unlock(&ses->chans[i].server->srv_lock); } spin_unlock(&ses->chan_lock); diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 829c7688a7b7..d13a24613710 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -69,7 +69,7 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface) /* channel helper functions. assumed that chan_lock is held by caller. */ -unsigned int +int cifs_ses_get_chan_index(struct cifs_ses *ses, struct TCP_Server_Info *server) { @@ -85,14 +85,17 @@ cifs_ses_get_chan_index(struct cifs_ses *ses, cifs_dbg(VFS, "unable to get chan index for server: 0x%llx", server->conn_id); WARN_ON(1); - return 0; + return CIFS_INVAL_CHAN_INDEX; } void cifs_chan_set_in_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { - unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + int chan_index = cifs_ses_get_chan_index(ses, server); + + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return; ses->chans[chan_index].in_reconnect = true; } @@ -102,6 +105,8 @@ cifs_chan_clear_in_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return; ses->chans[chan_index].in_reconnect = false; } @@ -111,6 +116,8 @@ cifs_chan_in_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return true; /* err on the safer side */ return CIFS_CHAN_IN_RECONNECT(ses, chan_index); } @@ -120,6 +127,8 @@ cifs_chan_set_need_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return; set_bit(chan_index, &ses->chans_need_reconnect); cifs_dbg(FYI, "Set reconnect bitmask for chan %u; now 0x%lx\n", @@ -131,6 +140,8 @@ cifs_chan_clear_need_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return; clear_bit(chan_index, &ses->chans_need_reconnect); cifs_dbg(FYI, "Cleared reconnect bitmask for chan %u; now 0x%lx\n", @@ -142,6 +153,8 @@ cifs_chan_needs_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return true; /* err on the safer side */ return CIFS_CHAN_NEEDS_RECONNECT(ses, chan_index); } @@ -151,6 +164,8 @@ cifs_chan_is_iface_active(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) + return true; /* err on the safer side */ return ses->chans[chan_index].iface && ses->chans[chan_index].iface->is_active; @@ -269,7 +284,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) spin_lock(&ses->chan_lock); chan_index = cifs_ses_get_chan_index(ses, server); - if (!chan_index) { + if (chan_index == CIFS_INVAL_CHAN_INDEX) { spin_unlock(&ses->chan_lock); return 0; } @@ -319,6 +334,11 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) spin_lock(&ses->chan_lock); chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + return 0; + } + ses->chans[chan_index].iface = iface; /* No iface is found. if secondary chan, drop connection */ diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 23c50ed7d4b5..84ea67301303 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -413,7 +413,13 @@ generate_smb3signingkey(struct cifs_ses *ses, ses->ses_status == SES_GOOD); chan_index = cifs_ses_get_chan_index(ses, server); - /* TODO: introduce ref counting for channels when the can be freed */ + if (chan_index == CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); + + return -EINVAL; + } + spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); From a6d8fb54a515f0546ffdb7870102b1238917e567 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 26 Dec 2022 11:24:56 +0000 Subject: [PATCH 242/415] cifs: distribute channels across interfaces based on speed Today, if the server interfaces RSS capable, we simply choose the fastest interface to setup a channel. This is not a scalable approach, and does not make a lot of attempt to distribute the connections. This change does a weighted distribution of channels across all the available server interfaces, where the weight is a function of the advertised interface speed. Also make sure that we don't mix rdma and non-rdma for channels. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 16 ++++++++ fs/smb/client/cifsglob.h | 2 + fs/smb/client/sess.c | 84 +++++++++++++++++++++++++++++++------- 3 files changed, 88 insertions(+), 14 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 772d4226957f..5596c9f30ccb 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -284,6 +284,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifs_server_iface *iface; + size_t iface_weight = 0, iface_min_speed = 0; + struct cifs_server_iface *last_iface = NULL; int c, i, j; seq_puts(m, @@ -549,11 +551,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) "\tLast updated: %lu seconds ago", ses->iface_count, (jiffies - ses->iface_last_update) / HZ); + + last_iface = list_last_entry(&ses->iface_list, + struct cifs_server_iface, + iface_head); + iface_min_speed = last_iface->speed; + j = 0; list_for_each_entry(iface, &ses->iface_list, iface_head) { seq_printf(m, "\n\t%d)", ++j); cifs_dump_iface(m, iface); + + iface_weight = iface->speed / iface_min_speed; + seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)" + "\n\t\tAllocated channels: %u\n", + iface->weight_fulfilled, + iface_weight, + iface->num_channels); + if (is_ses_using_iface(ses, iface)) seq_puts(m, "\t\t[CONNECTED]\n"); } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index eae138040edf..30763c68cc39 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -969,6 +969,8 @@ struct cifs_server_iface { struct list_head iface_head; struct kref refcount; size_t speed; + size_t weight_fulfilled; + unsigned int num_channels; unsigned int rdma_capable : 1; unsigned int rss_capable : 1; unsigned int is_active : 1; /* unset if non existent */ diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index d13a24613710..336b64d93e41 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -179,7 +179,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) int left; int rc = 0; int tries = 0; + size_t iface_weight = 0, iface_min_speed = 0; struct cifs_server_iface *iface = NULL, *niface = NULL; + struct cifs_server_iface *last_iface = NULL; spin_lock(&ses->chan_lock); @@ -207,21 +209,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) } spin_unlock(&ses->chan_lock); - /* - * Keep connecting to same, fastest, iface for all channels as - * long as its RSS. Try next fastest one if not RSS or channel - * creation fails. - */ - spin_lock(&ses->iface_lock); - iface = list_first_entry(&ses->iface_list, struct cifs_server_iface, - iface_head); - spin_unlock(&ses->iface_lock); - while (left > 0) { tries++; if (tries > 3*ses->chan_max) { - cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n", + cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n", left); break; } @@ -229,17 +221,35 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) spin_lock(&ses->iface_lock); if (!ses->iface_count) { spin_unlock(&ses->iface_lock); + cifs_dbg(VFS, "server %s does not advertise interfaces\n", + ses->server->hostname); break; } + if (!iface) + iface = list_first_entry(&ses->iface_list, struct cifs_server_iface, + iface_head); + last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface, + iface_head); + iface_min_speed = last_iface->speed; + list_for_each_entry_safe_from(iface, niface, &ses->iface_list, iface_head) { + /* do not mix rdma and non-rdma interfaces */ + if (iface->rdma_capable != ses->server->rdma) + continue; + /* skip ifaces that are unusable */ if (!iface->is_active || (is_ses_using_iface(ses, iface) && - !iface->rss_capable)) { + !iface->rss_capable)) + continue; + + /* check if we already allocated enough channels */ + iface_weight = iface->speed / iface_min_speed; + + if (iface->weight_fulfilled >= iface_weight) continue; - } /* take ref before unlock */ kref_get(&iface->refcount); @@ -256,10 +266,21 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) continue; } - cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n", + iface->num_channels++; + iface->weight_fulfilled++; + cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n", &iface->sockaddr); break; } + + /* reached end of list. reset weight_fulfilled and start over */ + if (list_entry_is_head(iface, &ses->iface_list, iface_head)) { + list_for_each_entry(iface, &ses->iface_list, iface_head) + iface->weight_fulfilled = 0; + spin_unlock(&ses->iface_lock); + iface = NULL; + continue; + } spin_unlock(&ses->iface_lock); left--; @@ -278,8 +299,10 @@ int cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index; + size_t iface_weight = 0, iface_min_speed = 0; struct cifs_server_iface *iface = NULL; struct cifs_server_iface *old_iface = NULL; + struct cifs_server_iface *last_iface = NULL; int rc = 0; spin_lock(&ses->chan_lock); @@ -299,13 +322,34 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) spin_unlock(&ses->chan_lock); spin_lock(&ses->iface_lock); + if (!ses->iface_count) { + spin_unlock(&ses->iface_lock); + cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname); + return 0; + } + + last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface, + iface_head); + iface_min_speed = last_iface->speed; + /* then look for a new one */ list_for_each_entry(iface, &ses->iface_list, iface_head) { + /* do not mix rdma and non-rdma interfaces */ + if (iface->rdma_capable != server->rdma) + continue; + if (!iface->is_active || (is_ses_using_iface(ses, iface) && !iface->rss_capable)) { continue; } + + /* check if we already allocated enough channels */ + iface_weight = iface->speed / iface_min_speed; + + if (iface->weight_fulfilled >= iface_weight) + continue; + kref_get(&iface->refcount); break; } @@ -321,10 +365,22 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", &old_iface->sockaddr, &iface->sockaddr); + + old_iface->num_channels--; + if (old_iface->weight_fulfilled) + old_iface->weight_fulfilled--; + iface->num_channels++; + iface->weight_fulfilled++; + kref_put(&old_iface->refcount, release_iface); } else if (old_iface) { cifs_dbg(FYI, "releasing ref to iface: %pIS\n", &old_iface->sockaddr); + + old_iface->num_channels--; + if (old_iface->weight_fulfilled) + old_iface->weight_fulfilled--; + kref_put(&old_iface->refcount, release_iface); } else { WARN_ON(!iface); From fa1d0508bdd4a68c5e40f85f635712af8c12f180 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 14 Mar 2023 11:14:58 +0000 Subject: [PATCH 243/415] cifs: account for primary channel in the interface list The refcounting of server interfaces should account for the primary channel too. Although this is not strictly necessary, doing so will account for the primary channel in DebugData. Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/sess.c | 28 ++++++++++++++++++++++++++++ fs/smb/client/smb2ops.c | 6 ++++++ 2 files changed, 34 insertions(+) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 336b64d93e41..e716d046fb5f 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -303,6 +303,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) struct cifs_server_iface *iface = NULL; struct cifs_server_iface *old_iface = NULL; struct cifs_server_iface *last_iface = NULL; + struct sockaddr_storage ss; int rc = 0; spin_lock(&ses->chan_lock); @@ -321,6 +322,10 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) } spin_unlock(&ses->chan_lock); + spin_lock(&server->srv_lock); + ss = server->dstaddr; + spin_unlock(&server->srv_lock); + spin_lock(&ses->iface_lock); if (!ses->iface_count) { spin_unlock(&ses->iface_lock); @@ -334,6 +339,16 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) /* then look for a new one */ list_for_each_entry(iface, &ses->iface_list, iface_head) { + if (!chan_index) { + /* if we're trying to get the updated iface for primary channel */ + if (!cifs_match_ipaddr((struct sockaddr *) &ss, + (struct sockaddr *) &iface->sockaddr)) + continue; + + kref_get(&iface->refcount); + break; + } + /* do not mix rdma and non-rdma interfaces */ if (iface->rdma_capable != server->rdma) continue; @@ -360,6 +375,13 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) cifs_dbg(FYI, "unable to find a suitable iface\n"); } + if (!chan_index && !iface) { + cifs_dbg(FYI, "unable to get the interface matching: %pIS\n", + &ss); + spin_unlock(&ses->iface_lock); + return 0; + } + /* now drop the ref to the current iface */ if (old_iface && iface) { cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", @@ -382,6 +404,12 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) old_iface->weight_fulfilled--; kref_put(&old_iface->refcount, release_iface); + } else if (!chan_index) { + /* special case: update interface for primary channel */ + cifs_dbg(FYI, "referencing primary channel iface: %pIS\n", + &iface->sockaddr); + iface->num_channels++; + iface->weight_fulfilled++; } else { WARN_ON(!iface); cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 601e7a187f87..a959ed2c9b22 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -756,6 +756,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_ unsigned int ret_data_len = 0; struct network_interface_info_ioctl_rsp *out_buf = NULL; struct cifs_ses *ses = tcon->ses; + struct TCP_Server_Info *pserver; /* do not query too frequently */ if (ses->iface_last_update && @@ -780,6 +781,11 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_ if (rc) goto out; + /* check if iface is still active */ + pserver = ses->chans[0].server; + if (pserver && !cifs_chan_is_iface_active(ses, pserver)) + cifs_chan_update_iface(ses, pserver); + out: kfree(out_buf); return rc; From 9599d59eb8fc0c0fd9480c4f22901533d08965ee Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 6 Nov 2023 16:22:11 +0000 Subject: [PATCH 244/415] cifs: do not pass cifs_sb when trying to add channels The only reason why cifs_sb gets passed today to cifs_try_adding_channels is to pass the local_nls field for the new channels and binding session. However, the ses struct already has local_nls field that is setup during the first cifs_setup_session. So there is no need to pass cifs_sb. This change removes cifs_sb from the arg list for this and the functions that it calls and uses ses->local_nls instead. Cc: stable@vger.kernel.org Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/cifsproto.h | 2 +- fs/smb/client/connect.c | 2 +- fs/smb/client/sess.c | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index c1f71c6be7e3..eed8dbb6b2fb 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -610,7 +610,7 @@ void cifs_free_hash(struct shash_desc **sdesc); struct cifs_chan * cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server); -int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses); +int cifs_try_adding_channels(struct cifs_ses *ses); bool is_server_using_iface(struct TCP_Server_Info *server, struct cifs_server_iface *iface); bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface); diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 3ff82f0aa00e..947e3c362beb 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3564,7 +3564,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) ctx->prepath = NULL; out: - cifs_try_adding_channels(cifs_sb, mnt_ctx.ses); + cifs_try_adding_channels(mnt_ctx.ses); rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); if (rc) goto error; diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index e716d046fb5f..fe45ccfdc802 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -24,7 +24,7 @@ #include "fs_context.h" static int -cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, +cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface); bool @@ -172,7 +172,7 @@ cifs_chan_is_iface_active(struct cifs_ses *ses, } /* returns number of channels added */ -int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) +int cifs_try_adding_channels(struct cifs_ses *ses) { struct TCP_Server_Info *server = ses->server; int old_chan_count, new_chan_count; @@ -255,7 +255,7 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) kref_get(&iface->refcount); spin_unlock(&ses->iface_lock); - rc = cifs_ses_add_channel(cifs_sb, ses, iface); + rc = cifs_ses_add_channel(ses, iface); spin_lock(&ses->iface_lock); if (rc) { @@ -458,7 +458,7 @@ cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server) } static int -cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, +cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface) { struct TCP_Server_Info *chan_server; @@ -537,7 +537,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, * This will be used for encoding/decoding user/domain/pw * during sess setup auth. */ - ctx->local_nls = cifs_sb->local_nls; + ctx->local_nls = ses->local_nls; /* Use RDMA if possible */ ctx->rdma = iface->rdma_capable; @@ -583,7 +583,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, rc = cifs_negotiate_protocol(xid, ses, chan->server); if (!rc) - rc = cifs_setup_session(xid, ses, chan->server, cifs_sb->local_nls); + rc = cifs_setup_session(xid, ses, chan->server, ses->local_nls); mutex_unlock(&ses->session_mutex); From 19a4b9d6c372cab6a3b2c9a061a236136fe95274 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 13 Oct 2023 11:43:09 +0000 Subject: [PATCH 245/415] cifs: reconnect work should have reference on server struct The delayed work for reconnect takes server struct as a parameter. But it does so without holding a ref to it. Normally, this may not show a problem as the reconnect work is only cancelled on umount. However, since we now plan to support scaling down of channels, and the scale down can happen from reconnect work itself, we need to fix it. This change takes a reference on the server struct before it is passed to the delayed work. And drops the reference in the delayed work itself. Or if the delayed work is successfully cancelled, by the process that cancels it. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/connect.c | 27 +++++++++++++++++++++------ fs/smb/client/smb2pdu.c | 23 +++++++++++++---------- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 947e3c362beb..b514b41cc9f0 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -389,7 +389,13 @@ static int __cifs_reconnect(struct TCP_Server_Info *server, spin_unlock(&server->srv_lock); cifs_swn_reset_server_dstaddr(server); cifs_server_unlock(server); - mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + + /* increase ref count which reconnect work will drop */ + spin_lock(&cifs_tcp_ses_lock); + server->srv_count++; + spin_unlock(&cifs_tcp_ses_lock); + if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0)) + cifs_put_tcp_session(server, false); } } while (server->tcpStatus == CifsNeedReconnect); @@ -519,7 +525,13 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) spin_unlock(&server->srv_lock); cifs_swn_reset_server_dstaddr(server); cifs_server_unlock(server); - mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + + /* increase ref count which reconnect work will drop */ + spin_lock(&cifs_tcp_ses_lock); + server->srv_count++; + spin_unlock(&cifs_tcp_ses_lock); + if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0)) + cifs_put_tcp_session(server, false); } while (server->tcpStatus == CifsNeedReconnect); mutex_lock(&server->refpath_lock); @@ -1601,16 +1613,19 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) cancel_delayed_work_sync(&server->echo); - if (from_reconnect) + if (from_reconnect) { /* * Avoid deadlock here: reconnect work calls * cifs_put_tcp_session() at its end. Need to be sure * that reconnect work does nothing with server pointer after * that step. */ - cancel_delayed_work(&server->reconnect); - else - cancel_delayed_work_sync(&server->reconnect); + if (cancel_delayed_work(&server->reconnect)) + cifs_put_tcp_session(server, from_reconnect); + } else { + if (cancel_delayed_work_sync(&server->reconnect)) + cifs_put_tcp_session(server, from_reconnect); + } spin_lock(&server->srv_lock); server->tcpStatus = CifsExiting; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c75a80bb6d9e..b7665155f4e2 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3852,12 +3852,6 @@ void smb2_reconnect_server(struct work_struct *work) } spin_unlock(&ses->chan_lock); } - /* - * Get the reference to server struct to be sure that the last call of - * cifs_put_tcon() in the loop below won't release the server pointer. - */ - if (tcon_exist || ses_exist) - server->srv_count++; spin_unlock(&cifs_tcp_ses_lock); @@ -3905,13 +3899,17 @@ void smb2_reconnect_server(struct work_struct *work) done: cifs_dbg(FYI, "Reconnecting tcons and channels finished\n"); - if (resched) + if (resched) { queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ); + mutex_unlock(&pserver->reconnect_mutex); + + /* no need to put tcp session as we're retrying */ + return; + } mutex_unlock(&pserver->reconnect_mutex); /* now we can safely release srv struct */ - if (tcon_exist || ses_exist) - cifs_put_tcp_session(server, 1); + cifs_put_tcp_session(server, true); } int @@ -3931,7 +3929,12 @@ SMB2_echo(struct TCP_Server_Info *server) server->ops->need_neg(server)) { spin_unlock(&server->srv_lock); /* No need to send echo on newly established connections */ - mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + spin_lock(&cifs_tcp_ses_lock); + server->srv_count++; + spin_unlock(&cifs_tcp_ses_lock); + if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0)) + cifs_put_tcp_session(server, false); + return rc; } spin_unlock(&server->srv_lock); From f72d96507640835726d4f5ba26c1c11acbe1bc97 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 6 Nov 2023 15:37:03 -0600 Subject: [PATCH 246/415] smb3: minor cleanup of session handling code Minor cleanup of style issues found by checkpatch Reviewed-by: Bharath SM Signed-off-by: Steve French --- fs/smb/client/sess.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index fe45ccfdc802..4e4cc0cd5206 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -105,6 +105,7 @@ cifs_chan_clear_in_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) return; @@ -116,6 +117,7 @@ cifs_chan_in_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) return true; /* err on the safer side */ @@ -127,6 +129,7 @@ cifs_chan_set_need_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) return; @@ -140,6 +143,7 @@ cifs_chan_clear_need_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) return; @@ -153,6 +157,7 @@ cifs_chan_needs_reconnect(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) return true; /* err on the safer side */ @@ -164,6 +169,7 @@ cifs_chan_is_iface_active(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) return true; /* err on the safer side */ @@ -639,8 +645,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */ - /* BB verify whether signing required on neg or just on auth frame - (and NTLM case) */ + /* BB verify whether signing required on neg or just auth frame (and NTLM case) */ capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS | CAP_LARGE_WRITE_X | CAP_LARGE_READ_X; @@ -697,8 +702,10 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses, /* copy domain */ if (ses->domainName == NULL) { - /* Sending null domain better than using a bogus domain name (as - we did briefly in 2.6.18) since server will use its default */ + /* + * Sending null domain better than using a bogus domain name (as + * we did briefly in 2.6.18) since server will use its default + */ *bcc_ptr = 0; *(bcc_ptr+1) = 0; bytes_ret = 0; @@ -717,8 +724,7 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, char *bcc_ptr = *pbcc_area; int bytes_ret = 0; - /* BB FIXME add check that strings total less - than 335 or will need to send them as arrays */ + /* BB FIXME add check that strings less than 335 or will need to send as arrays */ /* copy user */ if (ses->user_name == NULL) { From 5923d6686a100c2b4cabd4c2ca9d5a12579c7614 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 7 Nov 2023 21:38:13 -0600 Subject: [PATCH 247/415] smb3: fix caching of ctime on setxattr Fixes xfstest generic/728 which had been failing due to incorrect ctime after setxattr and removexattr Update ctime on successful set of xattr Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/xattr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c index ac199160bce6..6780aa3e98a1 100644 --- a/fs/smb/client/xattr.c +++ b/fs/smb/client/xattr.c @@ -150,10 +150,13 @@ static int cifs_xattr_set(const struct xattr_handler *handler, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto out; - if (pTcon->ses->server->ops->set_EA) + if (pTcon->ses->server->ops->set_EA) { rc = pTcon->ses->server->ops->set_EA(xid, pTcon, full_path, name, value, (__u16)size, cifs_sb->local_nls, cifs_sb); + if (rc == 0) + inode_set_ctime_current(inode); + } break; case XATTR_CIFS_ACL: From bce36aa682da7ca996d4a02636ebfb6b5f2c3f83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 16:12:24 +0100 Subject: [PATCH 248/415] OSS: dmasound/paula: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20231107151223.3971602-2-u.kleine-koenig@pengutronix.de Signed-off-by: Takashi Iwai --- sound/oss/dmasound/dmasound_paula.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/oss/dmasound/dmasound_paula.c b/sound/oss/dmasound/dmasound_paula.c index 23cf8284ce36..0ba8f0c4cd99 100644 --- a/sound/oss/dmasound/dmasound_paula.c +++ b/sound/oss/dmasound/dmasound_paula.c @@ -720,15 +720,14 @@ static int __init amiga_audio_probe(struct platform_device *pdev) return dmasound_init(); } -static int __exit amiga_audio_remove(struct platform_device *pdev) +static void __exit amiga_audio_remove(struct platform_device *pdev) { dmasound_deinit(); - return 0; } static struct platform_driver amiga_audio_driver = { - .remove = __exit_p(amiga_audio_remove), - .driver = { + .remove_new = __exit_p(amiga_audio_remove), + .driver = { .name = "amiga-audio", }, }; From f86128050d2d854035bfa461aadf36e6951b2bac Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Thu, 9 Nov 2023 14:11:53 +0000 Subject: [PATCH 249/415] arm64/syscall: Remove duplicate declaration Commit 6ac19f96515e ("arm64: avoid prototype warnings for syscalls") added missing declarations to various syscall wrapper macros. It however proved a little too zealous in __SYSCALL_DEFINEx(), as a declaration for __arm64_sys##name was already present. A declaration is required before the call to ALLOW_ERROR_INJECTION(), so keep the original one and remove the new one. Fixes: 6ac19f96515e ("arm64: avoid prototype warnings for syscalls") Signed-off-by: Kevin Brodsky Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20231109141153.250046-1-kevin.brodsky@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/syscall_wrapper.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index 17f687510c48..d977713ec0ba 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -54,7 +54,6 @@ ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO); \ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - asmlinkage long __arm64_sys##name(const struct pt_regs *regs); \ asmlinkage long __arm64_sys##name(const struct pt_regs *regs) \ { \ return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__)); \ From e72c4333d2f2e7f2200f71a88c0480fd2a769a64 Mon Sep 17 00:00:00 2001 From: Xiao Wang Date: Tue, 31 Oct 2023 14:45:52 +0800 Subject: [PATCH 250/415] riscv: Rearrange hwcap.h and cpufeature.h Now hwcap.h and cpufeature.h are mutually including each other, and most of the variable/API declarations in hwcap.h are implemented in cpufeature.c, so, it's better to move them into cpufeature.h and leave only macros for ISA extension logical IDs in hwcap.h. BTW, the riscv_isa_extension_mask macro is not used now, so this patch removes it. Suggested-by: Andrew Jones Signed-off-by: Xiao Wang Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20231031064553.2319688-2-xiao.w.wang@intel.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cpufeature.h | 83 ++++++++++++++++++++++++++ arch/riscv/include/asm/elf.h | 2 +- arch/riscv/include/asm/hwcap.h | 91 ----------------------------- arch/riscv/include/asm/pgtable.h | 1 + arch/riscv/include/asm/switch_to.h | 2 +- arch/riscv/include/asm/vector.h | 2 +- arch/riscv/kvm/aia.c | 2 +- arch/riscv/kvm/main.c | 2 +- arch/riscv/kvm/tlb.c | 2 +- arch/riscv/kvm/vcpu_fp.c | 2 +- arch/riscv/kvm/vcpu_onereg.c | 2 +- arch/riscv/kvm/vcpu_vector.c | 2 +- drivers/clocksource/timer-riscv.c | 2 +- drivers/perf/riscv_pmu_sbi.c | 2 +- 14 files changed, 95 insertions(+), 102 deletions(-) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 69f2cae96f0b..a418c3112cd6 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -7,7 +7,10 @@ #define _ASM_CPUFEATURE_H #include +#include #include +#include +#include /* * These are probed via a device_initcall(), via either the SBI or directly @@ -50,4 +53,84 @@ static inline bool check_unaligned_access_emulated(int cpu) static inline void unaligned_emulation_finish(void) {} #endif +unsigned long riscv_get_elf_hwcap(void); + +struct riscv_isa_ext_data { + const unsigned int id; + const char *name; + const char *property; +}; + +extern const struct riscv_isa_ext_data riscv_isa_ext[]; +extern const size_t riscv_isa_ext_count; +extern bool riscv_isa_fallback; + +unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); + +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); +#define riscv_isa_extension_available(isa_bitmap, ext) \ + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) + +static __always_inline bool +riscv_has_extension_likely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, + "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + asm_volatile_goto( + ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) + : + : [ext] "i" (ext) + : + : l_no); + } else { + if (!__riscv_isa_extension_available(NULL, ext)) + goto l_no; + } + + return true; +l_no: + return false; +} + +static __always_inline bool +riscv_has_extension_unlikely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, + "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + asm_volatile_goto( + ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) + : + : [ext] "i" (ext) + : + : l_yes); + } else { + if (__riscv_isa_extension_available(NULL, ext)) + goto l_yes; + } + + return false; +l_yes: + return true; +} + +static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) +{ + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) + return true; + + return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); +} + +static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) +{ + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) + return true; + + return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); +} + #endif diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index b3b2dfbdf945..06c236bfab53 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include /* * These are used to set parameters in the core dumps. diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 31774bcdf1c6..141b7109c25c 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -8,9 +8,6 @@ #ifndef _ASM_RISCV_HWCAP_H #define _ASM_RISCV_HWCAP_H -#include -#include -#include #include #define RISCV_ISA_EXT_a ('a' - 'a') @@ -67,92 +64,4 @@ #define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SSAIA #endif -#ifndef __ASSEMBLY__ - -#include -#include - -unsigned long riscv_get_elf_hwcap(void); - -struct riscv_isa_ext_data { - const unsigned int id; - const char *name; - const char *property; -}; - -extern const struct riscv_isa_ext_data riscv_isa_ext[]; -extern const size_t riscv_isa_ext_count; -extern bool riscv_isa_fallback; - -unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); - -#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext) - -bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); -#define riscv_isa_extension_available(isa_bitmap, ext) \ - __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) - -static __always_inline bool -riscv_has_extension_likely(const unsigned long ext) -{ - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm_volatile_goto( - ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_no); - } else { - if (!__riscv_isa_extension_available(NULL, ext)) - goto l_no; - } - - return true; -l_no: - return false; -} - -static __always_inline bool -riscv_has_extension_unlikely(const unsigned long ext) -{ - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm_volatile_goto( - ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_yes); - } else { - if (__riscv_isa_extension_available(NULL, ext)) - goto l_yes; - } - - return false; -l_yes: - return true; -} - -static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) -{ - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) - return true; - - return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); -} - -static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) -{ - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) - return true; - - return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); -} -#endif - #endif /* _ASM_RISCV_HWCAP_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index c8e8867c42f6..294044429e8e 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -291,6 +291,7 @@ static inline pte_t pud_pte(pud_t pud) } #ifdef CONFIG_RISCV_ISA_SVNAPOT +#include static __always_inline bool has_svnapot(void) { diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index a727be723c56..f90d8e42f3c7 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index c5ee07b3df07..87aaef656257 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 74bb27440527..a944294f6f23 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include struct aia_hgei_control { diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 48ae0d4b3932..225a435d9c9a 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include long kvm_arch_dev_ioctl(struct file *filp, diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c index 44bc324aeeb0..23c0e82b5103 100644 --- a/arch/riscv/kvm/tlb.c +++ b/arch/riscv/kvm/tlb.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL) diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c index 08ba48a395aa..030904d82b58 100644 --- a/arch/riscv/kvm/vcpu_fp.c +++ b/arch/riscv/kvm/vcpu_fp.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #ifdef CONFIG_FPU void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 1b7e9fa265cb..b03e0c879dab 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c index b430cbb69521..b339a2682f25 100644 --- a/arch/riscv/kvm/vcpu_vector.c +++ b/arch/riscv/kvm/vcpu_vector.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 9c8f3e2decc2..e0333142c18c 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index cd8a2b9efd78..16acd4dcdb96 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -22,7 +22,7 @@ #include #include -#include +#include #define SYSCTL_NO_USER_ACCESS 0 #define SYSCTL_USER_ACCESS 1 From 457926b253200bd9bdfae9a016a3b1d1dc661d55 Mon Sep 17 00:00:00 2001 From: Xiao Wang Date: Tue, 31 Oct 2023 14:45:53 +0800 Subject: [PATCH 251/415] riscv: Optimize bitops with Zbb extension This patch leverages the alternative mechanism to dynamically optimize bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When Zbb ext is not supported by the runtime CPU, legacy implementation is used. If Zbb is supported, then the optimized variants will be selected via alternative patching. The legacy bitops support is taken from the generic C implementation as fallback. If the parameter is a build-time constant, we leverage compiler builtin to calculate the result directly, this approach is inspired by x86 bitops implementation. EFI stub runs before the kernel, so alternative mechanism should not be used there, this patch introduces a macro NO_ALTERNATIVE for this purpose. Signed-off-by: Xiao Wang Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20231031064553.2319688-3-xiao.w.wang@intel.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/bitops.h | 254 +++++++++++++++++++++++++- drivers/firmware/efi/libstub/Makefile | 2 +- 2 files changed, 252 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 3540b690944b..b212c2708cda 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -15,13 +15,261 @@ #include #include +#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) #include -#include -#include #include +#include +#include + +#else +#include +#include + +#if (BITS_PER_LONG == 64) +#define CTZW "ctzw " +#define CLZW "clzw " +#elif (BITS_PER_LONG == 32) +#define CTZW "ctz " +#define CLZW "clz " +#else +#error "Unexpected BITS_PER_LONG" +#endif + +static __always_inline unsigned long variable__ffs(unsigned long word) +{ + int num; + + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm volatile (".option push\n" + ".option arch,+zbb\n" + "ctz %0, %1\n" + ".option pop\n" + : "=r" (word) : "r" (word) :); + + return word; + +legacy: + num = 0; +#if BITS_PER_LONG == 64 + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } +#endif + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +/** + * __ffs - find first set bit in a long word + * @word: The word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +#define __ffs(word) \ + (__builtin_constant_p(word) ? \ + (unsigned long)__builtin_ctzl(word) : \ + variable__ffs(word)) + +static __always_inline unsigned long variable__fls(unsigned long word) +{ + int num; + + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm volatile (".option push\n" + ".option arch,+zbb\n" + "clz %0, %1\n" + ".option pop\n" + : "=r" (word) : "r" (word) :); + + return BITS_PER_LONG - 1 - word; + +legacy: + num = BITS_PER_LONG - 1; +#if BITS_PER_LONG == 64 + if (!(word & (~0ul << 32))) { + num -= 32; + word <<= 32; + } +#endif + if (!(word & (~0ul << (BITS_PER_LONG - 16)))) { + num -= 16; + word <<= 16; + } + if (!(word & (~0ul << (BITS_PER_LONG - 8)))) { + num -= 8; + word <<= 8; + } + if (!(word & (~0ul << (BITS_PER_LONG - 4)))) { + num -= 4; + word <<= 4; + } + if (!(word & (~0ul << (BITS_PER_LONG - 2)))) { + num -= 2; + word <<= 2; + } + if (!(word & (~0ul << (BITS_PER_LONG - 1)))) + num -= 1; + return num; +} + +/** + * __fls - find last set bit in a long word + * @word: the word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +#define __fls(word) \ + (__builtin_constant_p(word) ? \ + (unsigned long)(BITS_PER_LONG - 1 - __builtin_clzl(word)) : \ + variable__fls(word)) + +static __always_inline int variable_ffs(int x) +{ + int r; + + if (!x) + return 0; + + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm volatile (".option push\n" + ".option arch,+zbb\n" + CTZW "%0, %1\n" + ".option pop\n" + : "=r" (r) : "r" (x) :); + + return r + 1; + +legacy: + r = 1; + if (!(x & 0xffff)) { + x >>= 16; + r += 16; + } + if (!(x & 0xff)) { + x >>= 8; + r += 8; + } + if (!(x & 0xf)) { + x >>= 4; + r += 4; + } + if (!(x & 3)) { + x >>= 2; + r += 2; + } + if (!(x & 1)) { + x >>= 1; + r += 1; + } + return r; +} + +/** + * ffs - find first set bit in a word + * @x: the word to search + * + * This is defined the same way as the libc and compiler builtin ffs routines. + * + * ffs(value) returns 0 if value is 0 or the position of the first set bit if + * value is nonzero. The first (least significant) bit is at position 1. + */ +#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x)) + +static __always_inline int variable_fls(unsigned int x) +{ + int r; + + if (!x) + return 0; + + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm volatile (".option push\n" + ".option arch,+zbb\n" + CLZW "%0, %1\n" + ".option pop\n" + : "=r" (r) : "r" (x) :); + + return 32 - r; + +legacy: + r = 32; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + +/** + * fls - find last set bit in a word + * @x: the word to search + * + * This is defined in a similar way as ffs, but returns the position of the most + * significant set bit. + * + * fls(value) returns 0 if value is 0 or the position of the last set bit if + * value is nonzero. The last (most significant) bit is at position 32. + */ +#define fls(x) \ +({ \ + typeof(x) x_ = (x); \ + __builtin_constant_p(x_) ? \ + (int)((x_ != 0) ? (32 - __builtin_clz(x_)) : 0) \ + : \ + variable_fls(x_); \ +}) + +#endif /* !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) */ + +#include #include #include -#include #include diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index a1157c2a7170..d68cacd4e3af 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \ -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \ -DEFI_HAVE_STRCMP -fno-builtin -fpic \ $(call cc-option,-mno-single-pic-base) -cflags-$(CONFIG_RISCV) += -fpic +cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE cflags-$(CONFIG_LOONGARCH) += -fpie cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt From 5e2fd17f434d2fed78efb123e2fc6711e4f598f1 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 9 Nov 2023 12:01:48 -0300 Subject: [PATCH 252/415] smb: client: fix mount when dns_resolver key is not available There was a wrong assumption that with CONFIG_CIFS_DFS_UPCALL=y there would always be a dns_resolver key set up so we could unconditionally upcall to resolve UNC hostname rather than using the value provided by mount(2). Only require it when performing automount of junctions within a DFS share so users that don't have dns_resolver key still can mount their regular shares with server hostname resolved by mount.cifs(8). Fixes: 348a04a8d113 ("smb: client: get rid of dfs code dep in namespace.c") Cc: stable@vger.kernel.org Tested-by: Eduard Bachmakov Reported-by: Eduard Bachmakov Closes: https://lore.kernel.org/all/CADCRUiNvZuiUZ0VGZZO9HRyPyw6x92kiA7o7Q4tsX5FkZqUkKg@mail.gmail.com/ Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/dfs.c | 18 +++++++++++++----- fs/smb/client/fs_context.h | 1 + fs/smb/client/namespace.c | 17 +++++++++++++++-- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index 81b84151450d..a8a1d386da65 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -263,15 +263,23 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) return rc; } -/* Resolve UNC hostname in @ctx->source and set ip addr in @ctx->dstaddr */ +/* + * If @ctx->dfs_automount, then update @ctx->dstaddr earlier with the DFS root + * server from where we'll start following any referrals. Otherwise rely on the + * value provided by mount(2) as the user might not have dns_resolver key set up + * and therefore failing to upcall to resolve UNC hostname under @ctx->source. + */ static int update_fs_context_dstaddr(struct smb3_fs_context *ctx) { struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr; - int rc; + int rc = 0; - rc = dns_resolve_server_name_to_ip(ctx->source, addr, NULL); - if (!rc) - cifs_set_port(addr, ctx->port); + if (!ctx->nodfs && ctx->dfs_automount) { + rc = dns_resolve_server_name_to_ip(ctx->source, addr, NULL); + if (!rc) + cifs_set_port(addr, ctx->port); + ctx->dfs_automount = false; + } return rc; } diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 9d8d34af0211..cf46916286d0 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -268,6 +268,7 @@ struct smb3_fs_context { bool witness:1; /* use witness protocol */ char *leaf_fullpath; struct cifs_ses *dfs_root_ses; + bool dfs_automount:1; /* set for dfs automount only */ }; extern const struct fs_parameter_spec smb3_fs_parameters[]; diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c index c8f5ed8a69f1..a6968573b775 100644 --- a/fs/smb/client/namespace.c +++ b/fs/smb/client/namespace.c @@ -117,6 +117,18 @@ cifs_build_devname(char *nodename, const char *prepath) return dev; } +static bool is_dfs_mount(struct dentry *dentry) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + bool ret; + + spin_lock(&tcon->tc_lock); + ret = !!tcon->origin_fullpath; + spin_unlock(&tcon->tc_lock); + return ret; +} + /* Return full path out of a dentry set for automount */ static char *automount_fullpath(struct dentry *dentry, void *page) { @@ -212,8 +224,9 @@ static struct vfsmount *cifs_do_automount(struct path *path) ctx->source = NULL; goto out; } - cifs_dbg(FYI, "%s: ctx: source=%s UNC=%s prepath=%s\n", - __func__, ctx->source, ctx->UNC, ctx->prepath); + ctx->dfs_automount = is_dfs_mount(mntpt); + cifs_dbg(FYI, "%s: ctx: source=%s UNC=%s prepath=%s dfs_automount=%d\n", + __func__, ctx->source, ctx->UNC, ctx->prepath, ctx->dfs_automount); mnt = fc_mount(fc); out: From bff3315ba8b1d81655743136bfc38514e820a739 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 7 Nov 2023 14:07:44 -0500 Subject: [PATCH 253/415] drm/amdgpu: fix AGP init order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default AGP settings were overwriting the IP selected ones since the default was getting set after the IP ones were selected. Fixes: de59b69932e6 ("drm/amdgpu/gmc: set a default disable value for AGP") Link: https://lists.freedesktop.org/archives/amd-gfx/2023-November/100966.html Tested-by: Mikhail Gavrilov Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: Mikhail Gavrilov --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 ++ 7 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 0dcb6c36b02c..cef920a93924 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1062,9 +1062,6 @@ static const char * const amdgpu_vram_names[] = { */ int amdgpu_bo_init(struct amdgpu_device *adev) { - /* set the default AGP aperture state */ - amdgpu_gmc_set_agp_default(adev, &adev->gmc); - /* On A+A platform, VRAM can be mapped as WB */ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { /* reserve PAT memory space to WC for VRAM */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d8a4fddab9c1..ef80ea0929fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -672,6 +672,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); if (!amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 4713a62ad586..5f794a907945 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -637,6 +637,7 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev, base = adev->mmhub.funcs->get_fb_location(adev); + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH); if (!amdgpu_sriov_vf(adev) || diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 7f66954fd302..42e103d7077d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -211,6 +211,7 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, base <<= 24; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 61ca1a82b651..efc16e580f1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -239,6 +239,7 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, base <<= 24; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index fa59749c2aef..ff4ae73d27ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -413,6 +413,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b66c5f7e1c56..fe52d132b629 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1614,6 +1614,8 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = adev->mmhub.funcs->get_fb_location(adev); + amdgpu_gmc_set_agp_default(adev, mc); + /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; if (adev->gmc.xgmi.connected_to_cpu) { From 12f76050d8d4d10dab96333656b821bd4620d103 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 9 Nov 2023 10:12:39 +0100 Subject: [PATCH 254/415] drm/amdgpu: fix error handling in amdgpu_bo_list_get() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should not leak the pointer where we couldn't grab the reference on to the caller because it can be that the error handling still tries to put the reference then. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 781e5c5ce04d..702f6610d024 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -172,6 +172,7 @@ int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, } rcu_read_unlock(); + *result = NULL; return -ENOENT; } From 17daf01ab4e3e5a5929747aa05cc15eb2bad5438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 9 Nov 2023 10:14:14 +0100 Subject: [PATCH 255/415] drm/amdgpu: lower CS errors to debug severity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise userspace can spam the logs by using incorrect input values. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 0245de81cabd..1e35e9c06b09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1414,7 +1414,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); else if (r != -ERESTARTSYS && r != -EAGAIN) - DRM_ERROR("Failed to process the buffer list %d!\n", r); + DRM_DEBUG("Failed to process the buffer list %d!\n", r); goto error_fini; } From 61d7052216214e828b71407172aa85031cf138a9 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Tue, 31 Oct 2023 11:09:30 +0800 Subject: [PATCH 256/415] drm/amdgpu: Don't warn for unsupported set_xgmi_plpd_mode set_xgmi_plpd_mode may be unsupported and this isn't error, no need to print warning for it. v2: add ret2 to save the status of psp_ras_trigger_error. Suggested-by: lijo.lazar@amd.com Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 713e17cca071..1eeb0a517d80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1125,28 +1125,30 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if, uint32_t instance_mask) { - int ret = 0; + int ret1, ret2; struct ta_ras_trigger_error_input *block_info = (struct ta_ras_trigger_error_input *)inject_if; if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) dev_warn(adev->dev, "Failed to disallow df cstate"); - if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW)) + ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW); + if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to disallow XGMI power down"); - ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); + ret2 = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); if (amdgpu_ras_intr_triggered()) - return ret; + return ret2; - if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT)) + ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT); + if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to allow XGMI power down"); if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) dev_warn(adev->dev, "Failed to allow df cstate"); - return ret; + return ret2; } struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = { From 8abf799ea4d58e7d0522bd6e4bb070be3de3ed62 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 3 Nov 2023 11:24:44 +0530 Subject: [PATCH 257/415] drm/amd/pm: Hide pp_dpm_pcie device attribute Hide PCIe DPM attribute on SOCs with GC v9.4.2 and GC v9.4.3. Signed-off-by: Lijo Lazar Reviewed-by: Yang Wang Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 1f6d2dbcca4a..8257be1b9464 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2209,6 +2209,10 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ if (amdgpu_dpm_get_apu_thermal_limit(adev, &limit) == -EOPNOTSUPP) *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_pcie)) { + if (gc_ver == IP_VERSION(9, 4, 2) || + gc_ver == IP_VERSION(9, 4, 3)) + *states = ATTR_STATE_UNSUPPORTED; } switch (gc_ver) { From 4eaa007c739991b08b6343453035e5d1dfe2bd98 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 31 Oct 2023 10:48:50 +0800 Subject: [PATCH 258/415] drm/amdgpu: correct amdgpu ip block rev info correct following amdgpu ip block version information: - gfx_v9_4_3 - sdma_v4_4_2 Signed-off-by: Yang Wang Reviewed-by: Kenneth Feng Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 046ae95b366a..b194b4472943 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -4338,7 +4338,7 @@ const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, .major = 9, .minor = 4, - .rev = 0, + .rev = 3, .funcs = &gfx_v9_4_3_ip_funcs, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index bd65a62f8903..0f24af6f2810 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2061,7 +2061,7 @@ const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 4, .minor = 4, - .rev = 0, + .rev = 2, .funcs = &sdma_v4_4_2_ip_funcs, }; From 5a2913aadabc4711e98fb48d56e5c5f5728bbc33 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Tue, 7 Nov 2023 18:10:29 +0800 Subject: [PATCH 259/415] drm/amd/pm: raise the deep sleep clock threshold for smu 13.0.6 The DS clock may exceed the limit as sclk dfll divider is 16 to target freq. Signed-off-by: Le Ma Reviewed-by: Lijo Lazar Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 20f66e696f87..83e1228e6eee 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -94,7 +94,7 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_6.bin"); #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5 #define LINK_SPEED_MAX 4 -#define SMU_13_0_6_DSCLK_THRESHOLD 100 +#define SMU_13_0_6_DSCLK_THRESHOLD 140 #define MCA_BANK_IPID(_ip, _hwid, _type) \ [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, } From f64c3fce460469cd356ccb5c91d0bcbd1b9bc403 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Tue, 8 Aug 2023 15:11:19 -0400 Subject: [PATCH 260/415] drm/amdgpu: Add flag to enable indirect RLCG access for gfx v9.4.3 The "rlcg_reg_access_supported" flag is missing. Add it back in. Signed-off-by: Victor Lu Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index b194b4472943..fc08104a7944 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1102,6 +1102,7 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX); reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPARE_INT); } + adev->gfx.rlc.rlcg_reg_access_supported = true; } static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) From bc3c566071c8504f5d7c73a4171ead394f097639 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Tue, 8 Aug 2023 13:57:16 -0400 Subject: [PATCH 261/415] drm/amdgpu: Add xcc param to SRIOV kiq write and WREG32_SOC15_IP_NO_KIQ (v4) WREG32/RREG32_SOC15_IP_NO_KIQ and amdgpu_virt_kiq_reg_write_reg_wait are not using the correct rlcg interface or mec engine, respectively. Add xcc instance parameter to them. v4: Use GET_INST and squash commit with: "drm/amdgpu: Add xcc_inst param to amdgpu_virt_kiq_reg_write_reg_wait" v3: xcc not needed for MMMHUB v2: rebase Signed-off-by: Victor Lu Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 +++++++++++++---------- drivers/gpu/drm/amd/amdgpu/soc15_common.h | 6 +++--- 6 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index f7d1d356ecdd..468b251429d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, - uint32_t ref, uint32_t mask) + uint32_t ref, uint32_t mask, + uint32_t xcc_inst) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst]; struct amdgpu_ring *ring = &kiq->ring; signed long r, cnt = 0; unsigned long flags; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 935ca736300e..73523633bd8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -334,7 +334,8 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t rreg1, - uint32_t ref, uint32_t mask); + uint32_t ref, uint32_t mask, + uint32_t xcc_inst); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index ef80ea0929fe..0ec7b061d7c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + 1 << vmid, GET_INST(GC, 0)); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 5f794a907945..6dce9b29f675 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -229,7 +229,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + 1 << vmid, GET_INST(GC, 0)); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index fe52d132b629..05fb57540e35 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -817,7 +817,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); - u32 j, inv_req, tmp, sem, req, ack; + u32 j, inv_req, tmp, sem, req, ack, inst; const unsigned int eng = 17; struct amdgpu_vmhub *hub; @@ -832,13 +832,17 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for a HW workaround under SRIOV as well * as GFXOFF under bare metal */ - if (adev->gfx.kiq[0].ring.sched.ready && + if (vmhub >= AMDGPU_MMHUB0(0)) + inst = GET_INST(GC, 0); + else + inst = vmhub; + if (adev->gfx.kiq[inst].ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + 1 << vmid, inst); return; } @@ -856,9 +860,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acquire */ if (vmhub >= AMDGPU_MMHUB0(0)) - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem); + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, inst); else - tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem); + tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, inst); if (tmp & 0x1) break; udelay(1); @@ -869,9 +873,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req); + WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, inst); else - WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req); + WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, inst); /* * Issue a dummy read to wait for the ACK register to @@ -884,9 +888,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, for (j = 0; j < adev->usec_timeout; j++) { if (vmhub >= AMDGPU_MMHUB0(0)) - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack); + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, inst); else - tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack); + tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, inst); if (tmp & (1 << vmid)) break; udelay(1); @@ -899,9 +903,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * write with 0 means semaphore release */ if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0); + WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, inst); else - WREG32_SOC15_IP_NO_KIQ(GC, sem, 0); + WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, inst); } spin_unlock(&adev->gmc.invalidate_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index da683afa0222..c75e9cd5c98b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -69,7 +69,7 @@ #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0) -#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0) +#define RREG32_SOC15_IP_NO_KIQ(ip, reg, inst) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst) #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ @@ -86,8 +86,8 @@ #define WREG32_SOC15_IP(ip, reg, value) \ __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0) -#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \ - __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0) +#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value, inst) \ + __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst) #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ From bf13da6ae1a0097cf2ff4fba1e3236aaa3fa3a7a Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 24 Oct 2023 14:00:39 +0800 Subject: [PATCH 262/415] drm/amdgpu: correct smu v13.0.6 umc ras error check correct smu v13.0.0 umc ras error check Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 3 +++ .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 16 +++++++++++----- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 770b4b4e3138..e9c2ff74f0bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -88,7 +88,7 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev) umc_v12_0_reset_error_count_per_channel, NULL); } -static bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status) +bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status) { return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || @@ -96,7 +96,7 @@ static bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status) REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)); } -static bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) +bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) { return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 || diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index 4885b9fff272..b34b1e358f8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -117,6 +117,9 @@ (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ } while (0) +bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status); +bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status); + extern const uint32_t umc_v12_0_channel_idx_tbl[] [UMC_V12_0_UMC_INSTANCE_NUM] diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 83e1228e6eee..a6b57a5d0c77 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -48,6 +48,7 @@ #include "smu_cmn.h" #include "mp/mp_13_0_6_offset.h" #include "mp/mp_13_0_6_sh_mask.h" +#include "umc_v12_0.h" #undef MP1_Public #undef smnMP1_FIRMWARE_FLAGS @@ -2481,7 +2482,7 @@ static int mca_decode_mca_ipid(struct amdgpu_device *adev, enum amdgpu_mca_error return 0; } -static int mca_normal_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, +static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, enum amdgpu_mca_error_type type, int idx, uint32_t *count) { uint64_t status0; @@ -2491,10 +2492,15 @@ static int mca_normal_mca_get_err_count(const struct mca_ras_info *mca_ras, stru if (ret) return ret; - if (REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) - *count = 1; - else + if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { *count = 0; + return 0; + } + + if (type == AMDGPU_MCA_ERROR_TYPE_UE && umc_v12_0_is_uncorrectable_error(status0)) + *count = 1; + else if (type == AMDGPU_MCA_ERROR_TYPE_CE && umc_v12_0_is_correctable_error(status0)) + *count = 1; return 0; } @@ -2608,7 +2614,7 @@ static const struct mca_ras_info mca_ras_table[] = { { .blkid = AMDGPU_RAS_BLOCK__UMC, .ip = AMDGPU_MCA_IP_UMC, - .get_err_count = mca_normal_mca_get_err_count, + .get_err_count = mca_umc_mca_get_err_count, }, { .blkid = AMDGPU_RAS_BLOCK__GFX, .ip = AMDGPU_MCA_IP_MP5, From bb619539629cee523df886705d6ef866e099640a Mon Sep 17 00:00:00 2001 From: Hunter Chasens Date: Tue, 7 Nov 2023 11:28:30 -0500 Subject: [PATCH 263/415] drm: amd: Resolve Sphinx unexpected indentation warning Resolves Sphinx unexpected indentation warning when compiling documentation (e.g. `make htmldocs`). Replaces tabs with spaces and adds a literal block to keep vertical formatting of the example power state list. Reviewed-by: Lijo Lazar Reviewed-by: Bagas Sanjaya (v2) Acked-by: Randy Dunlap (v2) Signed-off-by: Hunter Chasens Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 8257be1b9464..ca2ece24e1e0 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -989,12 +989,13 @@ static ssize_t amdgpu_get_pp_features(struct device *dev, * Reading back the files will show you the available power levels within * the power state and the clock information for those levels. If deep sleep is * applied to a clock, the level will be denoted by a special level 'S:' - * E.g., - * S: 19Mhz * - * 0: 615Mhz - * 1: 800Mhz - * 2: 888Mhz - * 3: 1000Mhz + * E.g., :: + * + * S: 19Mhz * + * 0: 615Mhz + * 1: 800Mhz + * 2: 888Mhz + * 3: 1000Mhz * * * To manually adjust these states, first select manual using From a78b4814697251419f3460bb124aaa5689e65055 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Wed, 4 Oct 2023 15:52:57 -0400 Subject: [PATCH 264/415] drm/amdgpu: Skip PCTL0_MMHUB_DEEPSLEEP_IB write in jpegv4.0.3 under SRIOV PCTL0_MMHUB_DEEPSLEEP_IB is blocked for VF access Signed-off-by: Victor Lu Reviewed-by: Samir Dhume Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 9a8ec4d7e333..82b6b62c170b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -654,9 +654,11 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) */ static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ + if (!amdgpu_sriov_vf(ring->adev)) { + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ + } amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); @@ -672,9 +674,11 @@ static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) */ static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04); + if (!amdgpu_sriov_vf(ring->adev)) { + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x62a04); + } amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); From 0b1695710ab8be263a5c19f17240c6a44b4b0a3e Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Wed, 11 Oct 2023 16:27:59 -0400 Subject: [PATCH 265/415] drm/amdgpu: Do not program PF-only regs in hdp_v4_0.c under SRIOV (v2) The following regs can only be programmed by the PF: HDP_MISC_CNTL HDP_NONSURFACE_BASE HDP_NONSURFACE_BASE_HI v2: update commit message Signed-off-by: Victor Lu Reviewed-by: Samir Dhume Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 3f3a6445c006..49e934975719 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -145,6 +145,10 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev) break; } + /* Do not program registers if VF */ + if (amdgpu_sriov_vf(adev)) + return; + WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0)) From 07c1db70364671eea4e84befe43ac91941153a43 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 2 Nov 2023 14:17:04 +0800 Subject: [PATCH 266/415] drm/amdgpu: refine smu v13.0.6 mca dump driver refine smu mca driver to support query ras error from pmfw path. - correct gfx smu bank hwid (from mp5 to smu bank) - retire unused callback function in amdgpu_mca_smu_funcs{} - add new mca_bank_set{} structure to collect mca bank - move enum mca_reg_idx into amdgpu_mca.h header - add mca status register field decode macro Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 164 ++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 61 +++- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 320 +++++++----------- 3 files changed, 341 insertions(+), 204 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 5a828c175e3a..3302f4a29387 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -143,6 +143,46 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev) return 0; } +void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set) +{ + if (!mca_set) + return; + + memset(mca_set, 0, sizeof(*mca_set)); + INIT_LIST_HEAD(&mca_set->list); +} + +int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry) +{ + struct mca_bank_node *node; + + if (!entry) + return -EINVAL; + + node = kvzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + memcpy(&node->entry, entry, sizeof(*entry)); + + INIT_LIST_HEAD(&node->node); + list_add_tail(&node->node, &mca_set->list); + + mca_set->nr_entries++; + + return 0; +} + +void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set) +{ + struct mca_bank_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &mca_set->list, node) { + list_del(&node->node); + kvfree(node); + } +} + void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs) { struct amdgpu_mca *mca = &adev->mca; @@ -160,6 +200,58 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) return -EOPNOTSUPP; } +static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(entry->regs); i++) + dev_dbg(adev->dev, "mca entry[%02d].regs[%02d]=0x%016llx\n", idx, i, entry->regs[i]); +} + +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data) +{ + struct amdgpu_smuio_mcm_config_info mcm_info; + struct mca_bank_set mca_set; + struct mca_bank_node *node; + struct mca_bank_entry *entry; + uint32_t count; + int ret, i = 0; + + amdgpu_mca_bank_set_init(&mca_set); + + ret = amdgpu_mca_smu_get_mca_set(adev, blk, type, &mca_set); + if (ret) + goto out_mca_release; + + list_for_each_entry(node, &mca_set.list, node) { + entry = &node->entry; + + amdgpu_mca_smu_mca_bank_dump(adev, i++, entry); + + count = 0; + ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count); + if (ret) + goto out_mca_release; + + if (!count) + continue; + + mcm_info.socket_id = entry->info.socket_id; + mcm_info.die_id = entry->info.aid; + + if (type == AMDGPU_MCA_ERROR_TYPE_UE) + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, (uint64_t)count); + else + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, (uint64_t)count); + } + +out_mca_release: + amdgpu_mca_bank_set_release(&mca_set); + + return ret; +} + + int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count) { const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; @@ -173,17 +265,77 @@ int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_m return -EOPNOTSUPP; } -int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, uint32_t *count) +int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, uint32_t *total) { const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; - if (!count) + struct mca_bank_set mca_set; + struct mca_bank_node *node; + struct mca_bank_entry *entry; + uint32_t count; + int ret; + + if (!total) return -EINVAL; - if (mca_funcs && mca_funcs->mca_get_error_count) - return mca_funcs->mca_get_error_count(adev, blk, type, count); + if (!mca_funcs) + return -EOPNOTSUPP; - return -EOPNOTSUPP; + if (!mca_funcs->mca_get_ras_mca_set || !mca_funcs->mca_get_valid_mca_count) + return -EOPNOTSUPP; + + amdgpu_mca_bank_set_init(&mca_set); + + ret = mca_funcs->mca_get_ras_mca_set(adev, blk, type, &mca_set); + if (ret) + goto err_mca_set_release; + + *total = 0; + list_for_each_entry(node, &mca_set.list, node) { + entry = &node->entry; + + count = 0; + ret = mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, &count); + if (ret) + goto err_mca_set_release; + + *total += count; + } + +err_mca_set_release: + amdgpu_mca_bank_set_release(&mca_set); + + return ret; +} + +int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + if (!count || !entry) + return -EINVAL; + + if (!mca_funcs || !mca_funcs->mca_parse_mca_error_count) + return -EOPNOTSUPP; + + + return mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, count); +} + +int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (!mca_set) + return -EINVAL; + + if (!mca_funcs || !mca_funcs->mca_get_ras_mca_set) + return -EOPNOTSUPP; + + WARN_ON(!list_empty(&mca_set->list)); + + return mca_funcs->mca_get_ras_mca_set(adev, blk, type, mca_set); } int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 28ad463cf5c9..45f90edf2283 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -25,6 +25,27 @@ #define MCA_MAX_REGS_COUNT (16) +#define MCA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l) +#define MCA_REG__STATUS__VAL(x) MCA_REG_FIELD(x, 63, 63) +#define MCA_REG__STATUS__OVERFLOW(x) MCA_REG_FIELD(x, 62, 62) +#define MCA_REG__STATUS__UC(x) MCA_REG_FIELD(x, 61, 61) +#define MCA_REG__STATUS__EN(x) MCA_REG_FIELD(x, 60, 60) +#define MCA_REG__STATUS__MISCV(x) MCA_REG_FIELD(x, 59, 59) +#define MCA_REG__STATUS__ADDRV(x) MCA_REG_FIELD(x, 58, 58) +#define MCA_REG__STATUS__PCC(x) MCA_REG_FIELD(x, 57, 57) +#define MCA_REG__STATUS__ERRCOREIDVAL(x) MCA_REG_FIELD(x, 56, 56) +#define MCA_REG__STATUS__TCC(x) MCA_REG_FIELD(x, 55, 55) +#define MCA_REG__STATUS__SYNDV(x) MCA_REG_FIELD(x, 53, 53) +#define MCA_REG__STATUS__CECC(x) MCA_REG_FIELD(x, 46, 46) +#define MCA_REG__STATUS__UECC(x) MCA_REG_FIELD(x, 45, 45) +#define MCA_REG__STATUS__DEFERRED(x) MCA_REG_FIELD(x, 44, 44) +#define MCA_REG__STATUS__POISON(x) MCA_REG_FIELD(x, 43, 43) +#define MCA_REG__STATUS__SCRUB(x) MCA_REG_FIELD(x, 40, 40) +#define MCA_REG__STATUS__ERRCOREID(x) MCA_REG_FIELD(x, 37, 32) +#define MCA_REG__STATUS__ADDRLSB(x) MCA_REG_FIELD(x, 29, 24) +#define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16) +#define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0) + enum amdgpu_mca_ip { AMDGPU_MCA_IP_UNKNOW = -1, AMDGPU_MCA_IP_PSP = 0, @@ -57,6 +78,17 @@ struct amdgpu_mca { const struct amdgpu_mca_smu_funcs *mca_funcs; }; +enum mca_reg_idx { + MCA_REG_IDX_CONTROL = 0, + MCA_REG_IDX_STATUS = 1, + MCA_REG_IDX_ADDR = 2, + MCA_REG_IDX_MISC0 = 3, + MCA_REG_IDX_CONFIG = 4, + MCA_REG_IDX_IPID = 5, + MCA_REG_IDX_SYND = 6, + MCA_REG_IDX_COUNT = 16, +}; + struct mca_bank_info { int socket_id; int aid; @@ -72,18 +104,28 @@ struct mca_bank_entry { uint64_t regs[MCA_MAX_REGS_COUNT]; }; +struct mca_bank_node { + struct mca_bank_entry entry; + struct list_head node; +}; + +struct mca_bank_set { + int nr_entries; + struct list_head list; +}; + struct amdgpu_mca_smu_funcs { int max_ue_count; int max_ce_count; int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable); - int (*mca_get_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, uint32_t *count); + int (*mca_get_ras_mca_set)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct mca_bank_set *mca_set); + int (*mca_parse_mca_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct mca_bank_entry *entry, uint32_t *count); int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count); int (*mca_get_mca_entry)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, int idx, struct mca_bank_entry *entry); - int (*mca_get_ras_mca_idx_array)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size); }; void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, @@ -107,11 +149,22 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev); void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs); int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable); int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count); +int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, uint32_t *total); int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, uint32_t *count); +int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); +int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set); int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, int idx, struct mca_bank_entry *entry); void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); +void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set); +int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry); +void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set); +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data); + #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index a6b57a5d0c77..f0cddbc78496 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -100,17 +100,6 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_6.bin"); #define MCA_BANK_IPID(_ip, _hwid, _type) \ [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, } -enum mca_reg_idx { - MCA_REG_IDX_CONTROL = 0, - MCA_REG_IDX_STATUS = 1, - MCA_REG_IDX_ADDR = 2, - MCA_REG_IDX_MISC0 = 3, - MCA_REG_IDX_CONFIG = 4, - MCA_REG_IDX_IPID = 5, - MCA_REG_IDX_SYND = 6, - MCA_REG_IDX_COUNT = 16, -}; - struct mca_bank_ipid { enum amdgpu_mca_ip ip; uint16_t hwid; @@ -123,7 +112,9 @@ struct mca_ras_info { int *err_code_array; int err_code_count; int (*get_err_count)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, - enum amdgpu_mca_error_type type, int idx, uint32_t *count); + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); + bool (*bank_is_valid)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry); }; #define P2S_TABLE_ID_A 0x50325341 @@ -2449,48 +2440,34 @@ static int mca_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_t return 0; } -static int mca_decode_mca_ipid(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, int idx, int *ip) +static int mca_decode_ipid_to_hwip(uint64_t val) { const struct mca_bank_ipid *ipid; - uint64_t val; uint16_t hwid, mcatype; - int i, ret; - - ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_IPID, &val); - if (ret) - return ret; + int i; hwid = REG_GET_FIELD(val, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(val, MCMP1_IPIDT0, McaType); - if (hwid) { - for (i = 0; i < ARRAY_SIZE(smu_v13_0_6_mca_ipid_table); i++) { - ipid = &smu_v13_0_6_mca_ipid_table[i]; + for (i = 0; i < ARRAY_SIZE(smu_v13_0_6_mca_ipid_table); i++) { + ipid = &smu_v13_0_6_mca_ipid_table[i]; - if (!ipid->hwid) - continue; + if (!ipid->hwid) + continue; - if (ipid->hwid == hwid && ipid->mcatype == mcatype) { - *ip = i; - return 0; - } - } + if (ipid->hwid == hwid && ipid->mcatype == mcatype) + return i; } - *ip = AMDGPU_MCA_IP_UNKNOW; - - return 0; + return AMDGPU_MCA_IP_UNKNOW; } static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, - enum amdgpu_mca_error_type type, int idx, uint32_t *count) + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) { uint64_t status0; - int ret; - ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_STATUS, &status0); - if (ret) - return ret; + status0 = entry->regs[MCA_REG_IDX_STATUS]; if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { *count = 0; @@ -2521,70 +2498,41 @@ static bool mca_smu_check_error_code(struct amdgpu_device *adev, const struct mc return false; } -static int mca_mp5_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, - enum amdgpu_mca_error_type type, int idx, uint32_t *count) +static int mca_gfx_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) { - uint64_t status0 = 0, misc0 = 0; - uint32_t errcode; - int ret; - - if (mca_ras->ip != AMDGPU_MCA_IP_MP5) - return -EINVAL; - - ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_STATUS, &status0); - if (ret) - return ret; + uint64_t status0, misc0; + status0 = entry->regs[MCA_REG_IDX_STATUS]; if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { *count = 0; return 0; } - errcode = REG_GET_FIELD(status0, MCMP1_STATUST0, ErrorCode); - if (!mca_smu_check_error_code(adev, mca_ras, errcode)) - return 0; - if (type == AMDGPU_MCA_ERROR_TYPE_UE && REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 && REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) { - if (count) - *count = 1; + *count = 1; return 0; - } - - ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_MISC0, &misc0); - if (ret) - return ret; - - if (count) + } else { + misc0 = entry->regs[MCA_REG_IDX_MISC0]; *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt); + } return 0; } static int mca_smu_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, - enum amdgpu_mca_error_type type, int idx, uint32_t *count) + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) { - uint64_t status0 = 0, misc0 = 0; - uint32_t errcode; - int ret; - - if (mca_ras->ip != AMDGPU_MCA_IP_SMU) - return -EINVAL; - - ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_STATUS, &status0); - if (ret) - return ret; + uint64_t status0, misc0; + status0 = entry->regs[MCA_REG_IDX_STATUS]; if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { *count = 0; return 0; } - errcode = REG_GET_FIELD(status0, MCMP1_STATUST0, ErrorCode); - if (!mca_smu_check_error_code(adev, mca_ras, errcode)) - return 0; - if (type == AMDGPU_MCA_ERROR_TYPE_UE && REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 && REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) { @@ -2593,16 +2541,43 @@ static int mca_smu_mca_get_err_count(const struct mca_ras_info *mca_ras, struct return 0; } - ret = mca_bank_read_reg(adev, type, idx, MCA_REG_IDX_MISC0, &misc0); - if (ret) - return ret; - - if (count) - *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt); + misc0 = entry->regs[MCA_REG_IDX_MISC0]; + *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt); return 0; } +static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) +{ + uint32_t instlo; + + instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); + switch (instlo) { + case 0x36430400: /* SMNAID XCD 0 */ + case 0x38430400: /* SMNAID XCD 1 */ + case 0x40430400: /* SMNXCD XCD 0, NOTE: FIXME: fix this error later */ + return true; + default: + return false; + } + + return false; +}; + +static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) +{ + uint32_t errcode, instlo; + + instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); + if (instlo != 0x03b30400) + return false; + + errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); + return mca_smu_check_error_code(adev, mca_ras, errcode); +} + static int sdma_err_codes[] = { CODE_SDMA0, CODE_SDMA1, CODE_SDMA2, CODE_SDMA3 }; static int mmhub_err_codes[] = { CODE_DAGB0, CODE_DAGB0 + 1, CODE_DAGB0 + 2, CODE_DAGB0 + 3, CODE_DAGB0 + 4, /* DAGB0-4 */ @@ -2617,20 +2592,23 @@ static const struct mca_ras_info mca_ras_table[] = { .get_err_count = mca_umc_mca_get_err_count, }, { .blkid = AMDGPU_RAS_BLOCK__GFX, - .ip = AMDGPU_MCA_IP_MP5, - .get_err_count = mca_mp5_mca_get_err_count, + .ip = AMDGPU_MCA_IP_SMU, + .get_err_count = mca_gfx_mca_get_err_count, + .bank_is_valid = mca_gfx_smu_bank_is_valid, }, { .blkid = AMDGPU_RAS_BLOCK__SDMA, .ip = AMDGPU_MCA_IP_SMU, .err_code_array = sdma_err_codes, .err_code_count = ARRAY_SIZE(sdma_err_codes), .get_err_count = mca_smu_mca_get_err_count, + .bank_is_valid = mca_smu_bank_is_valid, }, { .blkid = AMDGPU_RAS_BLOCK__MMHUB, .ip = AMDGPU_MCA_IP_SMU, .err_code_array = mmhub_err_codes, .err_code_count = ARRAY_SIZE(mmhub_err_codes), .get_err_count = mca_smu_mca_get_err_count, + .bank_is_valid = mca_smu_bank_is_valid, }, }; @@ -2665,130 +2643,84 @@ static int mca_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_e } static bool mca_bank_is_valid(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, - enum amdgpu_mca_error_type type, int idx) + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) { - int ret, ip = AMDGPU_MCA_IP_UNKNOW; - - ret = mca_decode_mca_ipid(adev, type, idx, &ip); - if (ret) + if (mca_decode_ipid_to_hwip(entry->regs[MCA_REG_IDX_IPID]) != mca_ras->ip) return false; - if (ip == AMDGPU_MCA_IP_UNKNOW) - return false; + if (mca_ras->bank_is_valid) + return mca_ras->bank_is_valid(mca_ras, adev, type, entry); - return ip == mca_ras->ip; + return true; } -static int mca_get_valid_mca_idx(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, - enum amdgpu_mca_error_type type, - uint32_t mca_cnt, int *idx_array, int idx_array_size) +static int __mca_smu_get_ras_mca_set(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, + enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set) { - int i, idx_cnt = 0; - - for (i = 0; i < mca_cnt; i++) { - if (!mca_bank_is_valid(adev, mca_ras, type, i)) - continue; - - if (idx_array) { - if (idx_cnt < idx_array_size) - idx_array[idx_cnt] = i; - else - return -EINVAL; - } - - idx_cnt++; - } - - return idx_cnt; -} - -static int __mca_smu_get_error_count(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, enum amdgpu_mca_error_type type, uint32_t *count) -{ - uint32_t result, mca_cnt, total = 0; - int idx_array[16]; - int i, ret, idx_cnt = 0; + struct mca_bank_entry entry; + uint32_t mca_cnt; + int i, ret; ret = mca_get_valid_mca_count(adev, type, &mca_cnt); if (ret) return ret; /* if valid mca bank count is 0, the driver can return 0 directly */ - if (!mca_cnt) { + if (!mca_cnt) + return 0; + + for (i = 0; i < mca_cnt; i++) { + memset(&entry, 0, sizeof(entry)); + ret = mca_get_mca_entry(adev, type, i, &entry); + if (ret) + return ret; + + if (mca_ras && !mca_bank_is_valid(adev, mca_ras, type, &entry)) + continue; + + ret = amdgpu_mca_bank_set_add_entry(mca_set, &entry); + if (ret) + return ret; + } + + return 0; +} + +static int mca_smu_get_ras_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set) +{ + const struct mca_ras_info *mca_ras = NULL; + + if (!mca_set) + return -EINVAL; + + if (blk != AMDGPU_RAS_BLOCK_COUNT) { + mca_ras = mca_get_mca_ras_info(adev, blk); + if (!mca_ras) + return -EOPNOTSUPP; + } + + return __mca_smu_get_ras_mca_set(adev, mca_ras, type, mca_set); +} + +static int mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct mca_bank_entry *entry, uint32_t *count) +{ + const struct mca_ras_info *mca_ras; + + if (!entry || !count) + return -EINVAL; + + mca_ras = mca_get_mca_ras_info(adev, blk); + if (!mca_ras) + return -EOPNOTSUPP; + + if (!mca_bank_is_valid(adev, mca_ras, type, entry)) { *count = 0; return 0; } - if (!mca_ras->get_err_count) - return -EINVAL; - - idx_cnt = mca_get_valid_mca_idx(adev, mca_ras, type, mca_cnt, idx_array, ARRAY_SIZE(idx_array)); - if (idx_cnt < 0) - return -EINVAL; - - for (i = 0; i < idx_cnt; i++) { - result = 0; - ret = mca_ras->get_err_count(mca_ras, adev, type, idx_array[i], &result); - if (ret) - return ret; - - total += result; - } - - *count = total; - - return 0; -} - -static int mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, uint32_t *count) -{ - const struct mca_ras_info *mca_ras; - - if (!count) - return -EINVAL; - - mca_ras = mca_get_mca_ras_info(adev, blk); - if (!mca_ras) - return -EOPNOTSUPP; - - return __mca_smu_get_error_count(adev, mca_ras, type, count); -} - -static int __mca_smu_get_ras_mca_idx_array(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, - enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size) -{ - uint32_t mca_cnt = 0; - int ret, idx_cnt = 0; - - ret = mca_get_valid_mca_count(adev, type, &mca_cnt); - if (ret) - return ret; - - /* if valid mca bank count is 0, the driver can return 0 directly */ - if (!mca_cnt) { - *idx_array_size = 0; - return 0; - } - - idx_cnt = mca_get_valid_mca_idx(adev, mca_ras, type, mca_cnt, idx_array, *idx_array_size); - if (idx_cnt < 0) - return -EINVAL; - - *idx_array_size = idx_cnt; - - return 0; -} - -static int mca_smu_get_ras_mca_idx_array(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size) -{ - const struct mca_ras_info *mca_ras; - - mca_ras = mca_get_mca_ras_info(adev, blk); - if (!mca_ras) - return -EOPNOTSUPP; - - return __mca_smu_get_ras_mca_idx_array(adev, mca_ras, type, idx_array, idx_array_size); + return mca_ras->get_err_count(mca_ras, adev, type, entry, count); } static int mca_smu_get_mca_entry(struct amdgpu_device *adev, @@ -2807,10 +2739,10 @@ static const struct amdgpu_mca_smu_funcs smu_v13_0_6_mca_smu_funcs = { .max_ue_count = 12, .max_ce_count = 12, .mca_set_debug_mode = mca_smu_set_debug_mode, - .mca_get_error_count = mca_smu_get_error_count, + .mca_get_ras_mca_set = mca_smu_get_ras_mca_set, + .mca_parse_mca_error_count = mca_smu_parse_mca_error_count, .mca_get_mca_entry = mca_smu_get_mca_entry, .mca_get_valid_mca_count = mca_smu_get_valid_mca_count, - .mca_get_ras_mca_idx_array = mca_smu_get_ras_mca_idx_array, }; static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu, From 8cc0f5669eb6d4f156c721956da67560c9319317 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 8 Nov 2023 16:07:45 +0800 Subject: [PATCH 267/415] drm/amdgpu: Support multiple error query modes Direct error query mode and firmware error query mode are supported for now. Signed-off-by: Hawking Zhang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 93 +++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 8 +++ 2 files changed, 78 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index b7fe5951b166..a5c14dbb00b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1165,13 +1165,53 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s } } -/* query/inject/cure begin */ -int amdgpu_ras_query_error_status(struct amdgpu_device *adev, - struct ras_query_if *info) +static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, + struct ras_query_if *info, + struct ras_err_data *err_data, + unsigned int error_query_mode) { + enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT; struct amdgpu_ras_block_object *block_obj = NULL; + + if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY) + return -EINVAL; + + if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) { + if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { + amdgpu_ras_get_ecc_info(adev, err_data); + } else { + block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0); + if (!block_obj || !block_obj->hw_ops) { + dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", + get_ras_block_str(&info->head)); + return -EINVAL; + } + + if (block_obj->hw_ops->query_ras_error_count) + block_obj->hw_ops->query_ras_error_count(adev, &err_data); + + if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) || + (info->head.block == AMDGPU_RAS_BLOCK__GFX) || + (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) { + if (block_obj->hw_ops->query_ras_error_status) + block_obj->hw_ops->query_ras_error_status(adev); + } + } + } else { + /* FIXME: add code to check return value later */ + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data); + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data); + } + + return 0; +} + +/* query/inject/cure begin */ +int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) +{ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data; + unsigned int error_query_mode; int ret; if (!obj) @@ -1181,27 +1221,14 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, if (ret) return ret; - if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { - amdgpu_ras_get_ecc_info(adev, &err_data); - } else { - block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0); - if (!block_obj || !block_obj->hw_ops) { - dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", - get_ras_block_str(&info->head)); - ret = -EINVAL; - goto out_fini_err_data; - } + if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) + return -EINVAL; - if (block_obj->hw_ops->query_ras_error_count) - block_obj->hw_ops->query_ras_error_count(adev, &err_data); - - if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) || - (info->head.block == AMDGPU_RAS_BLOCK__GFX) || - (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) { - if (block_obj->hw_ops->query_ras_error_status) - block_obj->hw_ops->query_ras_error_status(adev); - } - } + ret = amdgpu_ras_query_error_status_helper(adev, info, + &err_data, + error_query_mode); + if (ret) + goto out_fini_err_data; amdgpu_rasmgr_error_data_statistic_update(obj, &err_data); @@ -3397,6 +3424,26 @@ bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev) return true; } +bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, + unsigned int *error_query_mode) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (!con) { + *error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY; + return false; + } + + if (mca_funcs && mca_funcs->mca_set_debug_mode) + *error_query_mode = + (con->is_mca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY; + else + *error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY; + + return true; +} + /* Register each ip ras block into amdgpu ras */ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object *ras_block_obj) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 665414c22ca9..19161916ac46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -320,6 +320,12 @@ enum amdgpu_ras_ret { AMDGPU_RAS_PT, }; +enum amdgpu_ras_error_query_mode { + AMDGPU_RAS_INVALID_ERROR_QUERY = 0, + AMDGPU_RAS_DIRECT_ERROR_QUERY = 1, + AMDGPU_RAS_FIRMWARE_ERROR_QUERY = 2, +}; + /* ras error status reisger fields */ #define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT 0x0 #define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK 0x00000001L @@ -769,6 +775,8 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable); bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev); +bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, + unsigned int *mode); int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object *ras_block_obj); From 61e0a98200f49d0b78e17aa2ccd71967cd92f2ab Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 7 Nov 2023 09:34:33 +0800 Subject: [PATCH 268/415] drm/amdgpu: disable smu v13.0.6 mca debug mode by default disable mca debug mode for smu v13.0.6 by default. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index f0cddbc78496..45483bc91cbe 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2297,7 +2297,7 @@ static int smu_v13_0_6_post_init(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; if (!amdgpu_sriov_vf(adev) && adev->ras_enabled) - return smu_v13_0_6_mca_set_debug_mode(smu, true); + return smu_v13_0_6_mca_set_debug_mode(smu, false); return 0; } From 4abf0b0bdf5ffe7e79e6416cc2c1b7f018b71c79 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Wed, 8 Nov 2023 02:32:13 +0000 Subject: [PATCH 269/415] drm/amdgpu: Change extended-scope MTYPE on GC 9.4.3 Change local memory type to MTYPE_UC on revision id 0 Signed-off-by: David Yat Sin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 +++++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 +++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 05fb57540e35..bde25eb4ed8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1180,7 +1180,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, if (uncached) { mtype = MTYPE_UC; } else if (ext_coherent) { - mtype = is_local ? MTYPE_CC : MTYPE_UC; + if (adev->rev_id) + mtype = is_local ? MTYPE_CC : MTYPE_UC; + else + mtype = MTYPE_UC; } else if (adev->flags & AMD_IS_APU) { mtype = is_local ? mtype_local : MTYPE_NC; } else { @@ -1301,7 +1304,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | AMDGPU_PTE_MTYPE_VG10(mtype_local); - } else { + } else if (adev->rev_id) { /* MTYPE_UC case */ *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index e67d06a42809..f2f3c338fd94 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1255,9 +1255,11 @@ svm_range_get_pte_flags(struct kfd_node *node, } break; case IP_VERSION(9, 4, 3): - mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : - (amdgpu_mtype_local == 2 || ext_coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW); + if (ext_coherent) + mtype_local = node->adev->rev_id ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_UC; + else + mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : + amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; From 27d80f7d68185a62e101575d302539353622e523 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 3 Nov 2023 17:00:10 +0800 Subject: [PATCH 270/415] drm/amdgpu: add pcs xgmi v6.4.0 ras support add pcs xgmi v6.4.0 ras support Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 158 +++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/soc15_common.h | 6 + 2 files changed, 161 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 1eeb0a517d80..bd20cb3b9819 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -113,6 +113,43 @@ static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[] = { smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000 }; +static const u64 xgmi_v6_4_0_mca_base_array[] = { + 0x11a09200, + 0x11b09200, +}; + +static const char *xgmi_v6_4_0_ras_error_code_ext[32] = { + [0x00] = "XGMI PCS DataLossErr", + [0x01] = "XGMI PCS TrainingErr", + [0x02] = "XGMI PCS FlowCtrlAckErr", + [0x03] = "XGMI PCS RxFifoUnderflowErr", + [0x04] = "XGMI PCS RxFifoOverflowErr", + [0x05] = "XGMI PCS CRCErr", + [0x06] = "XGMI PCS BERExceededErr", + [0x07] = "XGMI PCS TxMetaDataErr", + [0x08] = "XGMI PCS ReplayBufParityErr", + [0x09] = "XGMI PCS DataParityErr", + [0x0a] = "XGMI PCS ReplayFifoOverflowErr", + [0x0b] = "XGMI PCS ReplayFifoUnderflowErr", + [0x0c] = "XGMI PCS ElasticFifoOverflowErr", + [0x0d] = "XGMI PCS DeskewErr", + [0x0e] = "XGMI PCS FlowCtrlCRCErr", + [0x0f] = "XGMI PCS DataStartupLimitErr", + [0x10] = "XGMI PCS FCInitTimeoutErr", + [0x11] = "XGMI PCS RecoveryTimeoutErr", + [0x12] = "XGMI PCS ReadySerialTimeoutErr", + [0x13] = "XGMI PCS ReadySerialAttemptErr", + [0x14] = "XGMI PCS RecoveryAttemptErr", + [0x15] = "XGMI PCS RecoveryRelockAttemptErr", + [0x16] = "XGMI PCS ReplayAttemptErr", + [0x17] = "XGMI PCS SyncHdrErr", + [0x18] = "XGMI PCS TxReplayTimeoutErr", + [0x19] = "XGMI PCS RxReplayTimeoutErr", + [0x1a] = "XGMI PCS LinkSubTxTimeoutErr", + [0x1b] = "XGMI PCS LinkSubRxTimeoutErr", + [0x1c] = "XGMI PCS RxCMDPktErr", +}; + static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = { {"XGMI PCS DataLossErr", SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)}, @@ -936,7 +973,7 @@ static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg WREG32_PCIE(pcs_status_reg, 0); } -static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) +static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev) { uint32_t i; @@ -974,6 +1011,39 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) } } +static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base) +{ + WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL); +} + +static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++) + __xgmi_v6_4_0_reset_error_count(adev, xgmi_inst, xgmi_v6_4_0_mca_base_array[i]); +} + +static void xgmi_v6_4_0_reset_ras_error_count(struct amdgpu_device *adev) +{ + int i; + + for_each_inst(i, adev->aid_mask) + xgmi_v6_4_0_reset_error_count(adev, i); +} + +static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + xgmi_v6_4_0_reset_ras_error_count(adev); + break; + default: + amdgpu_xgmi_legacy_reset_ras_error_count(adev); + break; + } +} + static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, uint32_t value, uint32_t mask_value, @@ -1025,8 +1095,8 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, return 0; } -static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, - void *ras_error_status) +static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; int i, supported = 1; @@ -1121,6 +1191,88 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count += ce_cnt; } +static enum amdgpu_mca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status) +{ + const char *error_str; + int ext_error_code; + + ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status); + + error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ? + xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL; + if (error_str) + dev_info(adev->dev, "%s detected\n", error_str); + + switch (ext_error_code) { + case 0: + return AMDGPU_MCA_ERROR_TYPE_UE; + case 6: + return AMDGPU_MCA_ERROR_TYPE_CE; + default: + return -EINVAL; + } + + return -EINVAL; +} + +static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 mca_base, struct ras_err_data *err_data) +{ + int xgmi_inst = mcm_info->die_id; + u64 status = 0; + + status = RREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS); + if (!MCA_REG__STATUS__VAL(status)) + return; + + switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) { + case AMDGPU_MCA_ERROR_TYPE_UE: + amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL); + break; + case AMDGPU_MCA_ERROR_TYPE_CE: + amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL); + break; + default: + break; + } + + WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL); +} + +static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data) +{ + struct amdgpu_smuio_mcm_config_info mcm_info = { + .socket_id = adev->smuio.funcs->get_socket_id(adev), + .die_id = xgmi_inst, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++) + __xgmi_v6_4_0_query_error_count(adev, &mcm_info, xgmi_v6_4_0_mca_base_array[i], err_data); +} + +static void xgmi_v6_4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + int i; + + for_each_inst(i, adev->aid_mask) + xgmi_v6_4_0_query_error_count(adev, i, err_data); +} + +static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + xgmi_v6_4_0_query_ras_error_count(adev, ras_error_status); + break; + default: + amdgpu_xgmi_legacy_query_ras_error_count(adev, ras_error_status); + break; + } +} + /* Trigger XGMI/WAFL error */ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if, uint32_t instance_mask) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index c75e9cd5c98b..6775cce08090 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -204,4 +204,10 @@ + adev->asic_funcs->encode_ext_smn_addressing(ext), \ value) \ +#define RREG64_MCA(ext, mca_base, idx) \ + RREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8)) + +#define WREG64_MCA(ext, mca_base, idx, val) \ + WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8), val) + #endif From d406aec8dc2a001d4a91f786b525b3b4ea7fa1ef Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 8 Nov 2023 17:08:49 +0800 Subject: [PATCH 271/415] drm/amdgpu: correct acclerator check architecutre dump So driver doesn't touch invalid aca entries. Signed-off-by: Hawking Zhang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 15 +++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 2 -- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 3302f4a29387..65926f3c708a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -202,10 +202,17 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry) { - int i; - - for (i = 0; i < ARRAY_SIZE(entry->regs); i++) - dev_dbg(adev->dev, "mca entry[%02d].regs[%02d]=0x%016llx\n", idx, i, entry->regs[i]); + dev_info(adev->dev, "[Hardware error] Accelerator Check Architecture events logged\n"); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].STATUS=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_STATUS]); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].ADDR=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_ADDR]); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].MISC0=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_MISC0]); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].IPID=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_IPID]); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].SYND=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_SYND]); } int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 45f90edf2283..e4f30d20170f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -79,11 +79,9 @@ struct amdgpu_mca { }; enum mca_reg_idx { - MCA_REG_IDX_CONTROL = 0, MCA_REG_IDX_STATUS = 1, MCA_REG_IDX_ADDR = 2, MCA_REG_IDX_MISC0 = 3, - MCA_REG_IDX_CONFIG = 4, MCA_REG_IDX_IPID = 5, MCA_REG_IDX_SYND = 6, MCA_REG_IDX_COUNT = 16, From 8140b07b0a69a7e8d5d764237c68af7942c4bfdd Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 8 Nov 2023 20:35:52 +0800 Subject: [PATCH 272/415] drm/amdgpu: correct mca debugfs dump reg list avoid driver to touch invalid mca reg. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 65926f3c708a..cf33eb219e25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -389,14 +389,21 @@ static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val) static void mca_dump_entry(struct seq_file *m, struct mca_bank_entry *entry) { int i, idx = entry->idx; + int reg_idx_array[] = { + MCA_REG_IDX_STATUS, + MCA_REG_IDX_ADDR, + MCA_REG_IDX_MISC0, + MCA_REG_IDX_IPID, + MCA_REG_IDX_SYND, + }; seq_printf(m, "mca entry[%d].type: %s\n", idx, entry->type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE"); seq_printf(m, "mca entry[%d].ip: %d\n", idx, entry->ip); seq_printf(m, "mca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n", idx, entry->info.socket_id, entry->info.aid, entry->info.hwid, entry->info.mcatype); - for (i = 0; i < ARRAY_SIZE(entry->regs); i++) - seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, i, entry->regs[i]); + for (i = 0; i < ARRAY_SIZE(reg_idx_array); i++) + seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, reg_idx_array[i], entry->regs[reg_idx_array[i]]); } static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type) From 36e0d7088555a6a32664635eebe372452027bc6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Pekkarinen?= Date: Sun, 29 Oct 2023 11:39:26 +0200 Subject: [PATCH 273/415] drm/amd/display: remove duplicated argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spotted by coccicheck, there is a redundant check for v->SourcePixelFormat[k] != dm_444_16. This patch will remove it. The corresponding output follows. drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c:5130:86-122: duplicated argument to && or || Signed-off-by: José Pekkarinen Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index ad741a723c0e..3686f1e7de3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -5128,7 +5128,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l ViewportExceedsSurface = true; if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 - && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) { + && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) { if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { ViewportExceedsSurface = true; } From 4638e0c29a3f2294d5de0d052a4b8c9f33ccb957 Mon Sep 17 00:00:00 2001 From: Vitaly Prosyak Date: Wed, 11 Oct 2023 19:31:48 -0400 Subject: [PATCH 274/415] drm/amdgpu: fix software pci_unplug on some chips When software 'pci unplug' using IGT is executed we got a sysfs directory entry is NULL for differant ras blocks like hdp, umc, etc. Before call 'sysfs_remove_file_from_group' and 'sysfs_remove_group' check that 'sd' is not NULL. [ +0.000001] RIP: 0010:sysfs_remove_group+0x83/0x90 [ +0.000002] Code: 31 c0 31 d2 31 f6 31 ff e9 9a a8 b4 00 4c 89 e7 e8 f2 a2 ff ff eb c2 49 8b 55 00 48 8b 33 48 c7 c7 80 65 94 82 e8 cd 82 bb ff <0f> 0b eb cc 66 0f 1f 84 00 00 00 00 00 90 90 90 90 90 90 90 90 90 [ +0.000001] RSP: 0018:ffffc90002067c90 EFLAGS: 00010246 [ +0.000002] RAX: 0000000000000000 RBX: ffffffff824ea180 RCX: 0000000000000000 [ +0.000001] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ +0.000001] RBP: ffffc90002067ca8 R08: 0000000000000000 R09: 0000000000000000 [ +0.000001] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ +0.000001] R13: ffff88810a395f48 R14: ffff888101aab0d0 R15: 0000000000000000 [ +0.000001] FS: 00007f5ddaa43a00(0000) GS:ffff88841e800000(0000) knlGS:0000000000000000 [ +0.000002] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000001] CR2: 00007f8ffa61ba50 CR3: 0000000106432000 CR4: 0000000000350ef0 [ +0.000001] Call Trace: [ +0.000001] [ +0.000001] ? show_regs+0x72/0x90 [ +0.000002] ? sysfs_remove_group+0x83/0x90 [ +0.000002] ? __warn+0x8d/0x160 [ +0.000001] ? sysfs_remove_group+0x83/0x90 [ +0.000001] ? report_bug+0x1bb/0x1d0 [ +0.000003] ? handle_bug+0x46/0x90 [ +0.000001] ? exc_invalid_op+0x19/0x80 [ +0.000002] ? asm_exc_invalid_op+0x1b/0x20 [ +0.000003] ? sysfs_remove_group+0x83/0x90 [ +0.000001] dpm_sysfs_remove+0x61/0x70 [ +0.000002] device_del+0xa3/0x3d0 [ +0.000002] ? ktime_get_mono_fast_ns+0x46/0xb0 [ +0.000002] device_unregister+0x18/0x70 [ +0.000001] i2c_del_adapter+0x26d/0x330 [ +0.000002] arcturus_i2c_control_fini+0x25/0x50 [amdgpu] [ +0.000236] smu_sw_fini+0x38/0x260 [amdgpu] [ +0.000241] amdgpu_device_fini_sw+0x116/0x670 [amdgpu] [ +0.000186] ? mutex_lock+0x13/0x50 [ +0.000003] amdgpu_driver_release_kms+0x16/0x40 [amdgpu] [ +0.000192] drm_minor_release+0x4f/0x80 [drm] [ +0.000025] drm_release+0xfe/0x150 [drm] [ +0.000027] __fput+0x9f/0x290 [ +0.000002] ____fput+0xe/0x20 [ +0.000002] task_work_run+0x61/0xa0 [ +0.000002] exit_to_user_mode_prepare+0x150/0x170 [ +0.000002] syscall_exit_to_user_mode+0x2a/0x50 Cc: Hawking Zhang Cc: Luben Tuikov Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Vitaly Prosyak Reviewed-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a5c14dbb00b7..84e5987b14e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1564,7 +1564,8 @@ static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - sysfs_remove_file_from_group(&adev->dev->kobj, + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, &con->badpages_attr.attr, RAS_FS_NAME); } @@ -1583,7 +1584,8 @@ static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev) .attrs = attrs, }; - sysfs_remove_group(&adev->dev->kobj, &group); + if (adev->dev->kobj.sd) + sysfs_remove_group(&adev->dev->kobj, &group); return 0; } @@ -1630,7 +1632,8 @@ int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, if (!obj || !obj->attr_inuse) return -EINVAL; - sysfs_remove_file_from_group(&adev->dev->kobj, + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, &obj->sysfs_attr.attr, RAS_FS_NAME); obj->attr_inuse = 0; From 76d2da18afde2c78e9fc1fbcc9dc57c27ac77ac5 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 7 Nov 2023 18:03:45 +0800 Subject: [PATCH 275/415] drm/amdgpu: add smu v13.0.6 pcs xgmi ras error query support add pcs xgmi ras error query support for smu v13.0.6. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 1 + .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index e4f30d20170f..2b488fcf2f95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -54,6 +54,7 @@ enum amdgpu_mca_ip { AMDGPU_MCA_IP_SMU, AMDGPU_MCA_IP_MP5, AMDGPU_MCA_IP_UMC, + AMDGPU_MCA_IP_PCS_XGMI, AMDGPU_MCA_IP_COUNT, }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 45483bc91cbe..891605d4975f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2379,6 +2379,7 @@ static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT MCA_BANK_IPID(UMC, 0x96, 0x0), MCA_BANK_IPID(SMU, 0x01, 0x1), MCA_BANK_IPID(MP5, 0x01, 0x2), + MCA_BANK_IPID(PCS_XGMI, 0x50, 0x0), }; static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info) @@ -2482,6 +2483,22 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct return 0; } +static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, + uint32_t *count) +{ + u32 ext_error_code; + + ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]); + + if (type == AMDGPU_MCA_ERROR_TYPE_UE && ext_error_code == 0) + *count = 1; + else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6) + *count = 1; + + return 0; +} + static bool mca_smu_check_error_code(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, uint32_t errcode) { @@ -2609,6 +2626,10 @@ static const struct mca_ras_info mca_ras_table[] = { .err_code_count = ARRAY_SIZE(mmhub_err_codes), .get_err_count = mca_smu_mca_get_err_count, .bank_is_valid = mca_smu_bank_is_valid, + }, { + .blkid = AMDGPU_RAS_BLOCK__XGMI_WAFL, + .ip = AMDGPU_MCA_IP_PCS_XGMI, + .get_err_count = mca_pcs_xgmi_mca_get_err_count, }, }; From 85150626ea0423fd0adb5ac9b5ab4bbaff9aa30b Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Tue, 3 Oct 2023 16:15:52 -0400 Subject: [PATCH 276/415] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v5) amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0. Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC and amdgpu_device_xcc_wreg/rreg to use the new xcc_id parameter. Using amdgpu_sriov_runtime to determine whether to access via kiq or RLC is sufficient for now. v5: add condition in amdgpu_device_xcc_w/rreg, remove trace func call v4: avoid using amdgpu_sriov_w/rreg v3: use W/RREG32_XCC to handle non-kiq case v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters of amdgpu_device_wreg/rreg Signed-off-by: Victor Lu Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 13 ++- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 89 ++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 4 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 +- 9 files changed, 116 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8b4ca2576a3d..afec09930efa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1159,11 +1159,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, u64 reg_addr); +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t acc_flags, + uint32_t xcc_id); void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, u64 reg_addr, u32 reg_data); +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t v, + uint32_t acc_flags, + uint32_t xcc_id); void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); @@ -1204,8 +1211,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) -#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) -#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) +#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0) +#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0) #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) @@ -1215,6 +1222,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) +#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst) +#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 490c8f5ddb60..80309d39737a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI); for (reg = hqd_base; reg <= hqd_end; reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 51011e8ee90d..9285789b3a42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fd8cd8e2d3f2..d93240e8cb6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -73,6 +73,7 @@ #include "amdgpu_pmu.h" #include "amdgpu_fru_eeprom.h" #include "amdgpu_reset.h" +#include "amdgpu_virt.h" #include #include @@ -472,7 +473,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && down_read_trylock(&adev->reset_domain->sem)) { - ret = amdgpu_kiq_rreg(adev, reg); + ret = amdgpu_kiq_rreg(adev, reg, 0); up_read(&adev->reset_domain->sem); } else { ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -509,6 +510,49 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) BUG(); } + +/** + * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC + * + * @adev: amdgpu_device pointer + * @reg: dword aligned register offset + * @acc_flags: access flags which require special behavior + * @xcc_id: xcc accelerated compute core id + * + * Returns the 32 bit value from the offset specified. + */ +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t acc_flags, + uint32_t xcc_id) +{ + uint32_t ret, rlcg_flag; + + if (amdgpu_device_skip_hw_access(adev)) + return 0; + + if ((reg * 4) < adev->rmmio_size) { + if (amdgpu_sriov_vf(adev) && + !amdgpu_sriov_runtime(adev) && + adev->gfx.rlc.rlcg_reg_access_supported && + amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, + GC_HWIP, false, + &rlcg_flag)) { + ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id); + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_domain->sem)) { + ret = amdgpu_kiq_rreg(adev, reg, xcc_id); + up_read(&adev->reset_domain->sem); + } else { + ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + ret = adev->pcie_rreg(adev, reg * 4); + } + + return ret; +} + /* * MMIO register write with bytes helper functions * @offset:bytes offset from MMIO start @@ -556,7 +600,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev, if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && down_read_trylock(&adev->reset_domain->sem)) { - amdgpu_kiq_wreg(adev, reg, v); + amdgpu_kiq_wreg(adev, reg, v, 0); up_read(&adev->reset_domain->sem); } else { writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); @@ -597,6 +641,47 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, } } +/** + * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC + * + * @adev: amdgpu_device pointer + * @reg: dword aligned register offset + * @v: 32 bit value to write to the register + * @acc_flags: access flags which require special behavior + * @xcc_id: xcc accelerated compute core id + * + * Writes the value specified to the offset specified. + */ +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t v, + uint32_t acc_flags, uint32_t xcc_id) +{ + uint32_t rlcg_flag; + + if (amdgpu_device_skip_hw_access(adev)) + return; + + if ((reg * 4) < adev->rmmio_size) { + if (amdgpu_sriov_vf(adev) && + !amdgpu_sriov_runtime(adev) && + adev->gfx.rlc.rlcg_reg_access_supported && + amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, + GC_HWIP, true, + &rlcg_flag)) { + amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id); + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_kiq_wreg(adev, reg, v, xcc_id); + up_read(&adev->reset_domain->sem); + } else { + writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + adev->pcie_wreg(adev, reg * 4, v); + } +} + /** * amdgpu_device_indirect_rreg - read an indirect register * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a2a29dcb2422..b9674c57c436 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -931,12 +931,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, func(adev, ras_error_status, i); } -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id) { signed long r, cnt = 0; unsigned long flags; uint32_t seq, reg_val_offs = 0, value = 0; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *ring = &kiq->ring; if (amdgpu_device_skip_hw_access(adev)) @@ -999,12 +999,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) return ~0; } -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id) { signed long r, cnt = 0; unsigned long flags; uint32_t seq; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 7088c5015675..f23bafec71c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -521,8 +521,8 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id); +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id); int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 468b251429d2..3a632c3b1a2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -943,7 +943,7 @@ void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev, } } -static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, +bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip, bool write, u32 *rlcg_flag) { @@ -976,7 +976,7 @@ static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, return ret; } -static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id) +u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id) { struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; uint32_t timeout = 50000; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 73523633bd8c..d4207e44141f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -367,4 +367,8 @@ bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id); void amdgpu_virt_post_reset(struct amdgpu_device *adev); bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev); +bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, + u32 acc_flags, u32 hwip, + bool write, u32 *rlcg_flag); +u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index fc08104a7944..40d06d32bb74 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2739,16 +2739,16 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 0); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id); break; case AMDGPU_IRQ_STATE_ENABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 1); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id); break; default: break; From 197264284303b30b26e885d83680f594e69840e5 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Tue, 8 Aug 2023 14:28:56 -0400 Subject: [PATCH 277/415] drm/amdgpu: Change WREG32_RLC to WREG32_SOC15_RLC where inst != 0 (v2) W/RREG32_RLC is hardedcoded to use instance 0. W/RREG32_SOC15_RLC should be used instead when inst != 0. v2: rebase Signed-off-by: Victor Lu Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 38 ++++++++---------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 40 +++++++++---------- drivers/gpu/drm/amd/amdgpu/soc15_common.h | 2 +- 3 files changed, 37 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 80309d39737a..f6598b9e4faa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -306,8 +306,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL), - data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, data); if (wptr) { /* Don't read wptr with get_user because the user @@ -336,27 +335,24 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO), - lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI), - upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR), - lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), - regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO, + lower_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI, + upper_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR, + lower_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, upper_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1), - (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, - queue_id)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1, + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR), - REG_SET_FIELD(m->cp_hqd_eop_rptr, - CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR, + REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data); kgd_gfx_v9_release_queue(adev, inst); @@ -494,15 +490,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch( VALID, 1); - WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_H) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_high); + watch_address_high, inst); - WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_L) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_low); + watch_address_low, inst); return watch_address_cntl; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 9285789b3a42..00fbc0f44c92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi { kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_BASES, sh_mem_bases); /* APE1 no longer exists on GFX9 */ kgd_gfx_v9_unlock_srbm(adev, inst); @@ -245,8 +245,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL), - data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL, data); if (wptr) { /* Don't read wptr with get_user because the user @@ -275,25 +274,24 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO), - lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI), - upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR), - lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), - upper_32_bits((uintptr_t)wptr)); - WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1, - (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO, + lower_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI, + upper_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR, + lower_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, + upper_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1, + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR), - REG_SET_FIELD(m->cp_hqd_eop_rptr, - CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR, + REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE, data); kgd_gfx_v9_release_queue(adev, inst); @@ -556,7 +554,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, break; } - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST, type); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { @@ -908,8 +906,8 @@ void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t inst) { - *wait_times = RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), - mmCP_IQ_WAIT_TIME2)); + *wait_times = RREG32_SOC15_RLC(GC, GET_INST(GC, inst), + mmCP_IQ_WAIT_TIME2); } void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 6775cce08090..242b24f73c17 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -140,7 +140,7 @@ /* for GC only */ #define RREG32_RLC(reg) \ - __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP) + __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP, 0) #define WREG32_RLC_NO_KIQ(reg, value, hwip) \ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0) From fe69a1b1b6ed9ffc2c578c63f526026a8ab74f0c Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 9 Nov 2023 18:43:28 +0100 Subject: [PATCH 278/415] selftests: bpf: xskxceiver: ksft_print_msg: fix format type error Crossbuilding selftests/bpf for architecture arm64, format specifies type error show up like. xskxceiver.c:912:34: error: format specifies type 'int' but the argument has type '__u64' (aka 'unsigned long long') [-Werror,-Wformat] ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", ~~ %llu __func__, pkt->pkt_nb, meta->count); ^~~~~~~~~~~ xskxceiver.c:929:55: error: format specifies type 'unsigned long long' but the argument has type 'u64' (aka 'unsigned long') [-Werror,-Wformat] ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len); ~~~~ ^~~~ Fixing the issues by casting to (unsigned long long) and changing the specifiers to be %llu from %d and %u, since with u64s it might be %llx or %lx, depending on architecture. Signed-off-by: Anders Roxell Link: https://lore.kernel.org/r/20231109174328.1774571-1-anders.roxell@linaro.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/xskxceiver.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 591ca9637b23..b604c570309a 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -908,8 +908,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) struct xdp_info *meta = data - sizeof(struct xdp_info); if (meta->count != pkt->pkt_nb) { - ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", - __func__, pkt->pkt_nb, meta->count); + ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n", + __func__, pkt->pkt_nb, + (unsigned long long)meta->count); return false; } @@ -926,11 +927,13 @@ static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 exp if (addr >= umem->num_frames * umem->frame_size || addr + len > umem->num_frames * umem->frame_size) { - ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len); + ksft_print_msg("Frag invalid addr: %llx len: %u\n", + (unsigned long long)addr, len); return false; } if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) { - ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len); + ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", + (unsigned long long)addr, len); return false; } @@ -1029,7 +1032,8 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); ksft_print_msg("[%s] Too many packets completed\n", __func__); - ksft_print_msg("Last completion address: %llx\n", addr); + ksft_print_msg("Last completion address: %llx\n", + (unsigned long long)addr); return TEST_FAILURE; } @@ -1513,8 +1517,9 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject) } if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) { - ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", - __func__, stats.tx_invalid_descs, + ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n", + __func__, + (unsigned long long)stats.tx_invalid_descs, ifobject->xsk->pkt_stream->nb_pkts); return TEST_FAILURE; } From 3feb263bb516ee7e1da0acd22b15afbb9a7daa19 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 9 Nov 2023 16:26:36 -0800 Subject: [PATCH 279/415] bpf: handle ldimm64 properly in check_cfg() ldimm64 instructions are 16-byte long, and so have to be handled appropriately in check_cfg(), just like the rest of BPF verifier does. This has implications in three places: - when determining next instruction for non-jump instructions; - when determining next instruction for callback address ldimm64 instructions (in visit_func_call_insn()); - when checking for unreachable instructions, where second half of ldimm64 is expected to be unreachable; We take this also as an opportunity to report jump into the middle of ldimm64. And adjust few test_verifier tests accordingly. Acked-by: Eduard Zingerman Reported-by: Hao Sun Fixes: 475fb78fbf48 ("bpf: verifier (add branch/goto checks)") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231110002638.4168352-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 ++++-- kernel/bpf/verifier.c | 27 ++++++++++++++----- .../testing/selftests/bpf/verifier/ld_imm64.c | 8 +++--- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b4825d3cdb29..35bff17396c0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -909,10 +909,14 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +static bool bpf_is_ldimm64(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + static inline bool bpf_pseudo_func(const struct bpf_insn *insn) { - return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && - insn->src_reg == BPF_PSEUDO_FUNC; + return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; } struct bpf_prog_ops { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bd1c42eb540f..b87715b364fd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15439,15 +15439,16 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, struct bpf_verifier_env *env, bool visit_callee) { - int ret; + int ret, insn_sz; - ret = push_insn(t, t + 1, FALLTHROUGH, env, false); + insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1; + ret = push_insn(t, t + insn_sz, FALLTHROUGH, env, false); if (ret) return ret; - mark_prune_point(env, t + 1); + mark_prune_point(env, t + insn_sz); /* when we exit from subprog, we need to record non-linear history */ - mark_jmp_point(env, t + 1); + mark_jmp_point(env, t + insn_sz); if (visit_callee) { mark_prune_point(env, t); @@ -15469,15 +15470,17 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, static int visit_insn(int t, struct bpf_verifier_env *env) { struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t]; - int ret, off; + int ret, off, insn_sz; if (bpf_pseudo_func(insn)) return visit_func_call_insn(t, insns, env, true); /* All non-branch instructions have a single fall-through edge. */ if (BPF_CLASS(insn->code) != BPF_JMP && - BPF_CLASS(insn->code) != BPF_JMP32) - return push_insn(t, t + 1, FALLTHROUGH, env, false); + BPF_CLASS(insn->code) != BPF_JMP32) { + insn_sz = bpf_is_ldimm64(insn) ? 2 : 1; + return push_insn(t, t + insn_sz, FALLTHROUGH, env, false); + } switch (BPF_OP(insn->code)) { case BPF_EXIT: @@ -15607,11 +15610,21 @@ static int check_cfg(struct bpf_verifier_env *env) } for (i = 0; i < insn_cnt; i++) { + struct bpf_insn *insn = &env->prog->insnsi[i]; + if (insn_state[i] != EXPLORED) { verbose(env, "unreachable insn %d\n", i); ret = -EINVAL; goto err_free; } + if (bpf_is_ldimm64(insn)) { + if (insn_state[i + 1] != 0) { + verbose(env, "jump into the middle of ldimm64 insn %d\n", i); + ret = -EINVAL; + goto err_free; + } + i++; /* skip second half of ldimm64 */ + } } ret = 0; /* cfg looks good */ diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c index f9297900cea6..78f19c255f20 100644 --- a/tools/testing/selftests/bpf/verifier/ld_imm64.c +++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c @@ -9,8 +9,8 @@ BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM insn", - .errstr_unpriv = "R1 pointer comparison", + .errstr = "jump into the middle of ldimm64 insn 1", + .errstr_unpriv = "jump into the middle of ldimm64 insn 1", .result = REJECT, }, { @@ -23,8 +23,8 @@ BPF_LD_IMM64(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM insn", - .errstr_unpriv = "R1 pointer comparison", + .errstr = "jump into the middle of ldimm64 insn 1", + .errstr_unpriv = "jump into the middle of ldimm64 insn 1", .result = REJECT, }, { From 4bb7ea946a370707315ab774432963ce47291946 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 9 Nov 2023 16:26:37 -0800 Subject: [PATCH 280/415] bpf: fix precision backtracking instruction iteration Fix an edge case in __mark_chain_precision() which prematurely stops backtracking instructions in a state if it happens that state's first and last instruction indexes are the same. This situations doesn't necessarily mean that there were no instructions simulated in a state, but rather that we starting from the instruction, jumped around a bit, and then ended up at the same instruction before checkpointing or marking precision. To distinguish between these two possible situations, we need to consult jump history. If it's empty or contain a single record "bridging" parent state and first instruction of processed state, then we indeed backtracked all instructions in this state. But if history is not empty, we are definitely not done yet. Move this logic inside get_prev_insn_idx() to contain it more nicely. Use -ENOENT return code to denote "we are out of instructions" situation. This bug was exposed by verifier_loop1.c's bounded_recursion subtest, once the next fix in this patch set is applied. Acked-by: Eduard Zingerman Fixes: b5dc0163d8fd ("bpf: precise scalar_value tracking") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231110002638.4168352-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b87715b364fd..484c742f733e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3516,12 +3516,29 @@ static int push_jmp_history(struct bpf_verifier_env *env, /* Backtrack one insn at a time. If idx is not at the top of recorded * history then previous instruction came from straight line execution. + * Return -ENOENT if we exhausted all instructions within given state. + * + * It's legal to have a bit of a looping with the same starting and ending + * insn index within the same state, e.g.: 3->4->5->3, so just because current + * instruction index is the same as state's first_idx doesn't mean we are + * done. If there is still some jump history left, we should keep going. We + * need to take into account that we might have a jump history between given + * state's parent and itself, due to checkpointing. In this case, we'll have + * history entry recording a jump from last instruction of parent state and + * first instruction of given state. */ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, u32 *history) { u32 cnt = *history; + if (i == st->first_insn_idx) { + if (cnt == 0) + return -ENOENT; + if (cnt == 1 && st->jmp_history[0].idx == i) + return -ENOENT; + } + if (cnt && st->jmp_history[cnt - 1].idx == i) { i = st->jmp_history[cnt - 1].prev_idx; (*history)--; @@ -4401,10 +4418,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) * Nothing to be tracked further in the parent state. */ return 0; - if (i == first_idx) - break; subseq_idx = i; i = get_prev_insn_idx(st, i, &history); + if (i == -ENOENT) + break; if (i >= env->prog->len) { /* This can happen if backtracking reached insn 0 * and there are still reg_mask or stack_mask From 62ccdb11d3c63dc697dea1fd92b3496fe43dcc1e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 9 Nov 2023 16:26:38 -0800 Subject: [PATCH 281/415] selftests/bpf: add edge case backtracking logic test Add a dedicated selftests to try to set up conditions to have a state with same first and last instruction index, but it actually is a loop 3->4->1->2->3. This confuses mark_chain_precision() if verifier doesn't take into account jump history. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231110002638.4168352-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_precision.c | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 193c0f8272d0..6b564d4c0986 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -91,3 +91,43 @@ __naked int bpf_end_bswap(void) } #endif /* v4 instruction */ + +SEC("?raw_tp") +__success __log_level(2) +/* + * Without the bug fix there will be no history between "last_idx 3 first_idx 3" + * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor + * expected log messages to the one specific mark_chain_precision operation. + * + * This is quite fragile: if verifier checkpointing heuristic changes, this + * might need adjusting. + */ +__msg("2: (07) r0 += 1 ; R0_w=6") +__msg("3: (35) if r0 >= 0xa goto pc+1") +__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1") +__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4") +__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1") +__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4") +__msg("3: R0_w=6") +__naked int state_loop_first_last_equal(void) +{ + asm volatile ( + "r0 = 0;" + "l0_%=:" + "r0 += 1;" + "r0 += 1;" + /* every few iterations we'll have a checkpoint here with + * first_idx == last_idx, potentially confusing precision + * backtracking logic + */ + "if r0 >= 10 goto l1_%=;" /* checkpoint + mark_precise */ + "goto l0_%=;" + "l1_%=:" + "exit;" + ::: __clobber_common + ); +} + +char _license[] SEC("license") = "GPL"; From 13c8fb98b7bd3872e75f89c86a2453b0e4a99401 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:43 +0100 Subject: [PATCH 282/415] fbdev: atmel_lcdfb: Stop using platform_driver_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On today's platforms the benefit of platform_driver_probe() isn't that relevant any more. It allows to drop some code after booting (or module loading) for .probe() and discard the .remove() function completely if the driver is built-in. This typically saves a few 100k. The downside of platform_driver_probe() is that the driver cannot be bound and unbound at runtime which is ancient and also slightly complicates testing. There are also thoughts to deprecate platform_driver_probe() because it adds some complexity in the driver core for little gain. Also many drivers don't use it correctly. This driver for example misses to mark the driver struct with __refdata which is needed to suppress a (W=1) modpost warning: WARNING: modpost: drivers/video/fbdev/atmel_lcdfb: section mismatch in reference: atmel_lcdfb_driver+0x4 (section: .data) -> atmel_lcdfb_remove (section: .exit.text) [folded in patch by Nathan Chancellor] Signed-off-by: Uwe Kleine-König Signed-off-by: Nathan Chancellor Signed-off-by: Helge Deller --- drivers/video/fbdev/atmel_lcdfb.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c index a908db233409..00b643483a4e 100644 --- a/drivers/video/fbdev/atmel_lcdfb.c +++ b/drivers/video/fbdev/atmel_lcdfb.c @@ -220,7 +220,7 @@ static inline void atmel_lcdfb_power_control(struct atmel_lcdfb_info *sinfo, int } } -static const struct fb_fix_screeninfo atmel_lcdfb_fix __initconst = { +static const struct fb_fix_screeninfo atmel_lcdfb_fix = { .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_TRUECOLOR, .xpanstep = 0, @@ -841,7 +841,7 @@ static void atmel_lcdfb_task(struct work_struct *work) atmel_lcdfb_reset(sinfo); } -static int __init atmel_lcdfb_init_fbinfo(struct atmel_lcdfb_info *sinfo) +static int atmel_lcdfb_init_fbinfo(struct atmel_lcdfb_info *sinfo) { struct fb_info *info = sinfo->info; int ret = 0; @@ -1017,7 +1017,7 @@ static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo) return ret; } -static int __init atmel_lcdfb_probe(struct platform_device *pdev) +static int atmel_lcdfb_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct fb_info *info; @@ -1223,7 +1223,7 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev) return ret; } -static int __exit atmel_lcdfb_remove(struct platform_device *pdev) +static int atmel_lcdfb_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct fb_info *info = dev_get_drvdata(dev); @@ -1301,7 +1301,8 @@ static int atmel_lcdfb_resume(struct platform_device *pdev) #endif static struct platform_driver atmel_lcdfb_driver = { - .remove = __exit_p(atmel_lcdfb_remove), + .probe = atmel_lcdfb_probe, + .remove = atmel_lcdfb_remove, .suspend = atmel_lcdfb_suspend, .resume = atmel_lcdfb_resume, .driver = { @@ -1309,8 +1310,7 @@ static struct platform_driver atmel_lcdfb_driver = { .of_match_table = atmel_lcdfb_dt_ids, }, }; - -module_platform_driver_probe(atmel_lcdfb_driver, atmel_lcdfb_probe); +module_platform_driver(atmel_lcdfb_driver); MODULE_DESCRIPTION("AT91 LCD Controller framebuffer driver"); MODULE_AUTHOR("Nicolas Ferre "); From 7fbbc0868ca473f9ec97754acb21ae0e4ddb3d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:44 +0100 Subject: [PATCH 283/415] fbdev: omapfb/analog-tv: Don't put .remove() in .exit.text and drop suppress_bind_attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On today's platforms the memory savings of putting the remove function in .exit isn't that relevant any more. It only matters for built-in drivers and typically saves a few 100k. The downside is that the driver cannot be unbound at runtime which is ancient and also slightly complicates testing. Also it requires to mark the driver struct with __refdata which is needed to suppress a (W=1) modpost warning: WARNING: modpost: drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv: section mismatch in reference: tvc_connector_driver+0x4 (section: .data) -> tvc_remove (section: .exit.text) To simplify matters, move the remove callback to .text and drop .suppress_bind_attrs = true. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- .../video/fbdev/omap2/omapfb/displays/connector-analog-tv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c index 0daaf9f89bab..85fa58f48a81 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c @@ -221,7 +221,7 @@ static int tvc_probe(struct platform_device *pdev) return r; } -static int __exit tvc_remove(struct platform_device *pdev) +static int tvc_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -247,11 +247,10 @@ MODULE_DEVICE_TABLE(of, tvc_of_match); static struct platform_driver tvc_connector_driver = { .probe = tvc_probe, - .remove = __exit_p(tvc_remove), + .remove = tvc_remove, .driver = { .name = "connector-analog-tv", .of_match_table = tvc_of_match, - .suppress_bind_attrs = true, }, }; From bfaee69738bcae5c3dd0aaccae4f55dc381a9b36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:45 +0100 Subject: [PATCH 284/415] fbdev: omapfb/dpi: Don't put .remove() in .exit.text and drop suppress_bind_attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On today's platforms the memory savings of putting the remove function in .exit isn't that relevant any more. It only matters for built-in drivers and typically saves a few 100k. The downside is that the driver cannot be unbound at runtime which is ancient and also slightly complicates testing. Also it requires to mark the driver struct with __refdata which is needed to suppress a (W=1) modpost warning: WARNING: modpost: drivers/video/fbdev/omap2/omapfb/displays/panel-dpi: section mismatch in reference: panel_dpi_driver+0x4 (section: .data) -> panel_dpi_remove (section: .exit.text) To simplify matters, move the remove callback to .text and drop .suppress_bind_attrs = true. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c index 9790053c5877..aa6faa5ba158 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c @@ -211,7 +211,7 @@ static int panel_dpi_probe(struct platform_device *pdev) return r; } -static int __exit panel_dpi_remove(struct platform_device *pdev) +static int panel_dpi_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -236,11 +236,10 @@ MODULE_DEVICE_TABLE(of, panel_dpi_of_match); static struct platform_driver panel_dpi_driver = { .probe = panel_dpi_probe, - .remove = __exit_p(panel_dpi_remove), + .remove = panel_dpi_remove, .driver = { .name = "panel-dpi", .of_match_table = panel_dpi_of_match, - .suppress_bind_attrs = true, }, }; From b02e6f70f8d87d8cfc8306fcb38120bf58d5e6cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:46 +0100 Subject: [PATCH 285/415] fbdev: omapfb/dsi-cm: Don't put .remove() in .exit.text and drop suppress_bind_attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On today's platforms the memory savings of putting the remove function in .exit isn't that relevant any more. It only matters for built-in drivers and typically saves a few 100k. The downside is that the driver cannot be unbound at runtime which is ancient and also slightly complicates testing. Also it requires to mark the driver struct with __refdata which is needed to suppress a (W=1) modpost warning: WARNING: modpost: drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm: section mismatch in reference: dsicm_driver+0x4 (section: .data) -> dsicm_remove (section: .exit.text) To simplify matters, move the remove callback to .text and drop .suppress_bind_attrs = true. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c index 77fce1223a64..3d0978167144 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c @@ -1241,7 +1241,7 @@ static int dsicm_probe(struct platform_device *pdev) return r; } -static int __exit dsicm_remove(struct platform_device *pdev) +static int dsicm_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -1282,11 +1282,10 @@ MODULE_DEVICE_TABLE(of, dsicm_of_match); static struct platform_driver dsicm_driver = { .probe = dsicm_probe, - .remove = __exit_p(dsicm_remove), + .remove = dsicm_remove, .driver = { .name = "panel-dsi-cm", .of_match_table = dsicm_of_match, - .suppress_bind_attrs = true, }, }; From f004d91130595a01203272be12a8764788b68109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:47 +0100 Subject: [PATCH 286/415] fbdev: omapfb/dvi: Don't put .remove() in .exit.text and drop suppress_bind_attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On today's platforms the memory savings of putting the remove function in .exit isn't that relevant any more. It only matters for built-in drivers and typically saves a few 100k. The downside is that the driver cannot be unbound at runtime which is ancient and also slightly complicates testing. Also it requires to mark the driver struct with __refdata which is needed to suppress a (W=1) modpost warning: WARNING: modpost: drivers/video/fbdev/omap2/omapfb/displays/connector-dvi: section mismatch in reference: dvi_connector_driver+0x4 (section: .data) -> dvic_remove (section: .exit.text) To simplify matters, move the remove callback to .text and drop .suppress_bind_attrs = true. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c index c8ad3ef42bd3..2a5824fe8ea0 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c @@ -303,7 +303,7 @@ static int dvic_probe(struct platform_device *pdev) return r; } -static int __exit dvic_remove(struct platform_device *pdev) +static int dvic_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -330,11 +330,10 @@ MODULE_DEVICE_TABLE(of, dvic_of_match); static struct platform_driver dvi_connector_driver = { .probe = dvic_probe, - .remove = __exit_p(dvic_remove), + .remove = dvic_remove, .driver = { .name = "connector-dvi", .of_match_table = dvic_of_match, - .suppress_bind_attrs = true, }, }; From 1fc9ea058089f27a5f4c5f04927daed1bb7668d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:48 +0100 Subject: [PATCH 287/415] fbdev: omapfb/hdmi: Don't put .remove() in .exit.text and drop suppress_bind_attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On today's platforms the memory savings of putting the remove function in .exit isn't that relevant any more. It only matters for built-in drivers and typically saves a few 100k. The downside is that the driver cannot be unbound at runtime which is ancient and also slightly complicates testing. Also it requires to mark the driver struct with __refdata which is needed to suppress a (W=1) modpost warning: WARNING: modpost: drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi: section mismatch in reference: hdmi_connector_driver+0x4 (section: .data) -> hdmic_remove (section: .exit.text) To simplify matters, move the remove callback to .text and drop .suppress_bind_attrs = true. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c index 8f9ff9fb4ca4..f76664c69481 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c @@ -249,7 +249,7 @@ static int hdmic_probe(struct platform_device *pdev) return r; } -static int __exit hdmic_remove(struct platform_device *pdev) +static int hdmic_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -274,11 +274,10 @@ MODULE_DEVICE_TABLE(of, hdmic_of_match); static struct platform_driver hdmi_connector_driver = { .probe = hdmic_probe, - .remove = __exit_p(hdmic_remove), + .remove = hdmic_remove, .driver = { .name = "connector-hdmi", .of_match_table = hdmic_of_match, - .suppress_bind_attrs = true, }, }; From 7462e46054c5ce07639aabec5b605d6a481368b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:49 +0100 Subject: [PATCH 288/415] fbdev: omapfb/opa362: Don't put .remove() in .exit.text and drop suppress_bind_attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On today's platforms the memory savings of putting the remove function in .exit isn't that relevant any more. It only matters for built-in drivers and typically saves a few 100k. The downside is that the driver cannot be unbound at runtime which is ancient and also slightly complicates testing. Also it requires to mark the driver struct with __refdata which is needed to suppress a (W=1) modpost warning: WARNING: modpost: drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410: section mismatch in reference: tfp410_driver+0x4 (section: .data) -> tfp410_remove (section: .exit.text) To simplify matters, move the remove callback to .text and drop .suppress_bind_attrs = true. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c index dd29dc5c77ec..866d71489358 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c @@ -231,7 +231,7 @@ static int opa362_probe(struct platform_device *pdev) return r; } -static int __exit opa362_remove(struct platform_device *pdev) +static int opa362_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -260,11 +260,10 @@ MODULE_DEVICE_TABLE(of, opa362_of_match); static struct platform_driver opa362_driver = { .probe = opa362_probe, - .remove = __exit_p(opa362_remove), + .remove = opa362_remove, .driver = { .name = "amplifier-opa362", .of_match_table = opa362_of_match, - .suppress_bind_attrs = true, }, }; From 6a06fc772fa53a436e50aef44f4163253dbb089f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:50 +0100 Subject: [PATCH 289/415] fbdev: omapfb/sharp-ls037v7dw01: Don't put .remove() in .exit.text and drop suppress_bind_attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On today's platforms the memory savings of putting the remove function in .exit isn't that relevant any more. It only matters for built-in drivers and typically saves a few 100k. The downside is that the driver cannot be unbound at runtime which is ancient and also slightly complicates testing. Also it requires to mark the driver struct with __refdata which is needed to suppress a (W=1) modpost warning: WARNING: modpost: drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01: section mismatch in reference: sharp_ls_driver+0x4 (section: .data) -> sharp_ls_remove (section: .exit.text) To simplify matters, move the remove callback to .text and drop .suppress_bind_attrs = true. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- .../fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c index cc30758300e2..d228d74f3bd5 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c @@ -292,7 +292,7 @@ static int sharp_ls_probe(struct platform_device *pdev) return r; } -static int __exit sharp_ls_remove(struct platform_device *pdev) +static int sharp_ls_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -317,11 +317,10 @@ MODULE_DEVICE_TABLE(of, sharp_ls_of_match); static struct platform_driver sharp_ls_driver = { .probe = sharp_ls_probe, - .remove = __exit_p(sharp_ls_remove), + .remove = sharp_ls_remove, .driver = { .name = "panel-sharp-ls037v7dw01", .of_match_table = sharp_ls_of_match, - .suppress_bind_attrs = true, }, }; From 20bcc282d7ccf78bd2dd24a518210baf8763ffaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:51 +0100 Subject: [PATCH 290/415] fbdev: omapfb/tfp410: Don't put .remove() in .exit.text and drop suppress_bind_attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On today's platforms the memory savings of putting the remove function in .exit isn't that relevant any more. It only matters for built-in drivers and typically saves a few 100k. The downside is that the driver cannot be unbound at runtime which is ancient and also slightly complicates testing. Also it requires to mark the driver struct with __refdata which is needed to suppress a (W=1) modpost warning: WARNING: modpost: drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410: section mismatch in reference: tfp410_driver+0x4 (section: .data) -> tfp410_remove (section: .exit.text) To simplify matters, move the remove callback to .text and drop .suppress_bind_attrs = true. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c index 7bac420169a6..6aa21afc8b21 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c @@ -217,7 +217,7 @@ static int tfp410_probe(struct platform_device *pdev) return r; } -static int __exit tfp410_remove(struct platform_device *pdev) +static int tfp410_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -247,11 +247,10 @@ MODULE_DEVICE_TABLE(of, tfp410_of_match); static struct platform_driver tfp410_driver = { .probe = tfp410_probe, - .remove = __exit_p(tfp410_remove), + .remove = tfp410_remove, .driver = { .name = "tfp410", .of_match_table = tfp410_of_match, - .suppress_bind_attrs = true, }, }; From 3e9ec97706fd8bc06ed41ff85ff168537618879f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:52 +0100 Subject: [PATCH 291/415] fbdev: omapfb/tpd12s015: Don't put .remove() in .exit.text and drop suppress_bind_attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On today's platforms the memory savings of putting the remove function in .exit isn't that relevant any more. It only matters for built-in drivers and typically saves a few 100k. The downside is that the driver cannot be unbound at runtime which is ancient and also slightly complicates testing. Also it requires to mark the driver struct with __refdata which is needed to suppress a (W=1) modpost warning: WARNING: modpost: drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015: section mismatch in reference: tpd_driver+0x4 (section: .data) -> tpd_remove (section: .exit.text) To simplify matters, move the remove callback to .text and drop .suppress_bind_attrs = true. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- .../video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c index 67f0c9250e9e..0bdedc0f6527 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c @@ -283,7 +283,7 @@ static int tpd_probe(struct platform_device *pdev) return r; } -static int __exit tpd_remove(struct platform_device *pdev) +static int tpd_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -313,11 +313,10 @@ MODULE_DEVICE_TABLE(of, tpd_of_match); static struct platform_driver tpd_driver = { .probe = tpd_probe, - .remove = __exit_p(tpd_remove), + .remove = tpd_remove, .driver = { .name = "tpd12s015", .of_match_table = tpd_of_match, - .suppress_bind_attrs = true, }, }; From 42a0148ab7f85f4e102630e4cdbb5d336056aedf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:54 +0100 Subject: [PATCH 292/415] fbdev: atmel_lcdfb: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/atmel_lcdfb.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c index 00b643483a4e..9e391e5eaf9d 100644 --- a/drivers/video/fbdev/atmel_lcdfb.c +++ b/drivers/video/fbdev/atmel_lcdfb.c @@ -1223,14 +1223,14 @@ static int atmel_lcdfb_probe(struct platform_device *pdev) return ret; } -static int atmel_lcdfb_remove(struct platform_device *pdev) +static void atmel_lcdfb_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct fb_info *info = dev_get_drvdata(dev); struct atmel_lcdfb_info *sinfo; if (!info || !info->par) - return 0; + return; sinfo = info->par; cancel_work_sync(&sinfo->task); @@ -1252,8 +1252,6 @@ static int atmel_lcdfb_remove(struct platform_device *pdev) } framebuffer_release(info); - - return 0; } #ifdef CONFIG_PM @@ -1302,7 +1300,7 @@ static int atmel_lcdfb_resume(struct platform_device *pdev) static struct platform_driver atmel_lcdfb_driver = { .probe = atmel_lcdfb_probe, - .remove = atmel_lcdfb_remove, + .remove_new = atmel_lcdfb_remove, .suspend = atmel_lcdfb_suspend, .resume = atmel_lcdfb_resume, .driver = { From 26553eb8df2dbfbc075619abd53618efc8c5586a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:55 +0100 Subject: [PATCH 293/415] fbdev: omapfb/analog-tv: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- .../video/fbdev/omap2/omapfb/displays/connector-analog-tv.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c index 85fa58f48a81..c6786726a1af 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c @@ -221,7 +221,7 @@ static int tvc_probe(struct platform_device *pdev) return r; } -static int tvc_remove(struct platform_device *pdev) +static void tvc_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -233,8 +233,6 @@ static int tvc_remove(struct platform_device *pdev) tvc_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id tvc_of_match[] = { @@ -247,7 +245,7 @@ MODULE_DEVICE_TABLE(of, tvc_of_match); static struct platform_driver tvc_connector_driver = { .probe = tvc_probe, - .remove = tvc_remove, + .remove_new = tvc_remove, .driver = { .name = "connector-analog-tv", .of_match_table = tvc_of_match, From d5d8b9df6b6d72841f827327b37de2b0b7af88f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:56 +0100 Subject: [PATCH 294/415] fbdev: omapfb/dpi: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c index aa6faa5ba158..937f9091274f 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c @@ -211,7 +211,7 @@ static int panel_dpi_probe(struct platform_device *pdev) return r; } -static int panel_dpi_remove(struct platform_device *pdev) +static void panel_dpi_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -223,8 +223,6 @@ static int panel_dpi_remove(struct platform_device *pdev) panel_dpi_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id panel_dpi_of_match[] = { @@ -236,7 +234,7 @@ MODULE_DEVICE_TABLE(of, panel_dpi_of_match); static struct platform_driver panel_dpi_driver = { .probe = panel_dpi_probe, - .remove = panel_dpi_remove, + .remove_new = panel_dpi_remove, .driver = { .name = "panel-dpi", .of_match_table = panel_dpi_of_match, From da21ff3954ed31e04f8280b3ef573484c38e01ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:57 +0100 Subject: [PATCH 295/415] fbdev: omapfb/dsi-cm: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c index 3d0978167144..adb8881bac28 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c @@ -1241,7 +1241,7 @@ static int dsicm_probe(struct platform_device *pdev) return r; } -static int dsicm_remove(struct platform_device *pdev) +static void dsicm_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -1269,8 +1269,6 @@ static int dsicm_remove(struct platform_device *pdev) /* reset, to be sure that the panel is in a valid state */ dsicm_hw_reset(ddata); - - return 0; } static const struct of_device_id dsicm_of_match[] = { @@ -1282,7 +1280,7 @@ MODULE_DEVICE_TABLE(of, dsicm_of_match); static struct platform_driver dsicm_driver = { .probe = dsicm_probe, - .remove = dsicm_remove, + .remove_new = dsicm_remove, .driver = { .name = "panel-dsi-cm", .of_match_table = dsicm_of_match, From ebfb1334cf430ef36059c5bed35e9cfbab23ff11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:58 +0100 Subject: [PATCH 296/415] fbdev: omapfb/dvi: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c index 2a5824fe8ea0..0cc9294f89b4 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-dvi.c @@ -303,7 +303,7 @@ static int dvic_probe(struct platform_device *pdev) return r; } -static int dvic_remove(struct platform_device *pdev) +static void dvic_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -317,8 +317,6 @@ static int dvic_remove(struct platform_device *pdev) omap_dss_put_device(in); i2c_put_adapter(ddata->i2c_adapter); - - return 0; } static const struct of_device_id dvic_of_match[] = { @@ -330,7 +328,7 @@ MODULE_DEVICE_TABLE(of, dvic_of_match); static struct platform_driver dvi_connector_driver = { .probe = dvic_probe, - .remove = dvic_remove, + .remove_new = dvic_remove, .driver = { .name = "connector-dvi", .of_match_table = dvic_of_match, From 34948a36a4bbf583bf39b5c9783aa9e872c7910d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:17:59 +0100 Subject: [PATCH 297/415] fbdev: omapfb/hdmi: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c index f76664c69481..b862a32670ae 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c @@ -249,7 +249,7 @@ static int hdmic_probe(struct platform_device *pdev) return r; } -static int hdmic_remove(struct platform_device *pdev) +static void hdmic_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -261,8 +261,6 @@ static int hdmic_remove(struct platform_device *pdev) hdmic_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id hdmic_of_match[] = { @@ -274,7 +272,7 @@ MODULE_DEVICE_TABLE(of, hdmic_of_match); static struct platform_driver hdmi_connector_driver = { .probe = hdmic_probe, - .remove = hdmic_remove, + .remove_new = hdmic_remove, .driver = { .name = "connector-hdmi", .of_match_table = hdmic_of_match, From fe83fc52db06aa0632e53b3b27184e26971b555d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:18:00 +0100 Subject: [PATCH 298/415] fbdev: omapfb/opa362: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c index 866d71489358..f0d3eb581166 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c @@ -231,7 +231,7 @@ static int opa362_probe(struct platform_device *pdev) return r; } -static int opa362_remove(struct platform_device *pdev) +static void opa362_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -248,8 +248,6 @@ static int opa362_remove(struct platform_device *pdev) opa362_disconnect(dssdev, dssdev->dst); omap_dss_put_device(in); - - return 0; } static const struct of_device_id opa362_of_match[] = { @@ -260,7 +258,7 @@ MODULE_DEVICE_TABLE(of, opa362_of_match); static struct platform_driver opa362_driver = { .probe = opa362_probe, - .remove = opa362_remove, + .remove_new = opa362_remove, .driver = { .name = "amplifier-opa362", .of_match_table = opa362_of_match, From b0d61e8c04965373b74d719a460153cb700dab80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:18:01 +0100 Subject: [PATCH 299/415] fbdev: omapfb/sharp-ls037v7dw01: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- .../fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c index d228d74f3bd5..e37268cf8dca 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c @@ -292,7 +292,7 @@ static int sharp_ls_probe(struct platform_device *pdev) return r; } -static int sharp_ls_remove(struct platform_device *pdev) +static void sharp_ls_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -304,8 +304,6 @@ static int sharp_ls_remove(struct platform_device *pdev) sharp_ls_disconnect(dssdev); omap_dss_put_device(in); - - return 0; } static const struct of_device_id sharp_ls_of_match[] = { @@ -317,7 +315,7 @@ MODULE_DEVICE_TABLE(of, sharp_ls_of_match); static struct platform_driver sharp_ls_driver = { .probe = sharp_ls_probe, - .remove = sharp_ls_remove, + .remove_new = sharp_ls_remove, .driver = { .name = "panel-sharp-ls037v7dw01", .of_match_table = sharp_ls_of_match, From a23f29d057d034c77bd6689a5c328a6933255bf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:18:02 +0100 Subject: [PATCH 300/415] fbdev: omapfb/tfp410: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c index 6aa21afc8b21..c8aca4592949 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c @@ -217,7 +217,7 @@ static int tfp410_probe(struct platform_device *pdev) return r; } -static int tfp410_remove(struct platform_device *pdev) +static void tfp410_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -234,8 +234,6 @@ static int tfp410_remove(struct platform_device *pdev) tfp410_disconnect(dssdev, dssdev->dst); omap_dss_put_device(in); - - return 0; } static const struct of_device_id tfp410_of_match[] = { @@ -247,7 +245,7 @@ MODULE_DEVICE_TABLE(of, tfp410_of_match); static struct platform_driver tfp410_driver = { .probe = tfp410_probe, - .remove = tfp410_remove, + .remove_new = tfp410_remove, .driver = { .name = "tfp410", .of_match_table = tfp410_of_match, From 2e2389ca5d7efd2f029b138d78e38200d5ba125b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 7 Nov 2023 10:18:03 +0100 Subject: [PATCH 301/415] fbdev: omapfb/tpd12s015: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- .../video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c index 0bdedc0f6527..eb3926d0361b 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c @@ -283,7 +283,7 @@ static int tpd_probe(struct platform_device *pdev) return r; } -static int tpd_remove(struct platform_device *pdev) +static void tpd_remove(struct platform_device *pdev) { struct panel_drv_data *ddata = platform_get_drvdata(pdev); struct omap_dss_device *dssdev = &ddata->dssdev; @@ -300,8 +300,6 @@ static int tpd_remove(struct platform_device *pdev) tpd_disconnect(dssdev, dssdev->dst); omap_dss_put_device(in); - - return 0; } static const struct of_device_id tpd_of_match[] = { @@ -313,7 +311,7 @@ MODULE_DEVICE_TABLE(of, tpd_of_match); static struct platform_driver tpd_driver = { .probe = tpd_probe, - .remove = tpd_remove, + .remove_new = tpd_remove, .driver = { .name = "tpd12s015", .of_match_table = tpd_of_match, From cca202a5e59578247cd7f0496056884f5c9cc338 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Nov 2023 15:58:13 +0100 Subject: [PATCH 302/415] fbdev: hyperv_fb: fix uninitialized local variable use When CONFIG_SYSFB is disabled, the hyperv_fb driver can now run into undefined behavior on a gen2 VM, as indicated by this smatch warning: drivers/video/fbdev/hyperv_fb.c:1077 hvfb_getmem() error: uninitialized symbol 'base'. drivers/video/fbdev/hyperv_fb.c:1077 hvfb_getmem() error: uninitialized symbol 'size'. Since there is no way to know the actual framebuffer in this configuration, just return an allocation failure here, which should avoid the build warning and the undefined behavior. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202311070802.YCpvehaz-lkp@intel.com/ Fixes: a07b50d80ab6 ("hyperv: avoid dependency on screen_info") Signed-off-by: Arnd Bergmann Signed-off-by: Helge Deller --- drivers/video/fbdev/hyperv_fb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index bf59daf862fc..a80939fe2ee6 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -1013,6 +1013,8 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) } else if (IS_ENABLED(CONFIG_SYSFB)) { base = screen_info.lfb_base; size = screen_info.lfb_size; + } else { + goto err1; } /* From 10e14e9652bf9e8104151bfd9200433083deae3d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 9 Nov 2023 22:14:10 -0800 Subject: [PATCH 303/415] bpf: fix control-flow graph checking in privileged mode When BPF program is verified in privileged mode, BPF verifier allows bounded loops. This means that from CFG point of view there are definitely some back-edges. Original commit adjusted check_cfg() logic to not detect back-edges in control flow graph if they are resulting from conditional jumps, which the idea that subsequent full BPF verification process will determine whether such loops are bounded or not, and either accept or reject the BPF program. At least that's my reading of the intent. Unfortunately, the implementation of this idea doesn't work correctly in all possible situations. Conditional jump might not result in immediate back-edge, but just a few unconditional instructions later we can arrive at back-edge. In such situations check_cfg() would reject BPF program even in privileged mode, despite it might be bounded loop. Next patch adds one simple program demonstrating such scenario. To keep things simple, instead of trying to detect back edges in privileged mode, just assume every back edge is valid and let subsequent BPF verification prove or reject bounded loops. Note a few test changes. For unknown reason, we have a few tests that are specified to detect a back-edge in a privileged mode, but looking at their code it seems like the right outcome is passing check_cfg() and letting subsequent verification to make a decision about bounded or not bounded looping. Bounded recursion case is also interesting. The example should pass, as recursion is limited to just a few levels and so we never reach maximum number of nested frames and never exhaust maximum stack depth. But the way that max stack depth logic works today it falsely detects this as exceeding max nested frame count. This patch series doesn't attempt to fix this orthogonal problem, so we just adjust expected verifier failure. Suggested-by: Alexei Starovoitov Fixes: 2589726d12a1 ("bpf: introduce bounded loops") Reported-by: Hao Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231110061412.2995786-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 23 +++++++------------ .../selftests/bpf/progs/verifier_loops1.c | 9 +++++--- tools/testing/selftests/bpf/verifier/calls.c | 6 ++--- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 484c742f733e..a2267d5ed14e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15403,8 +15403,7 @@ enum { * w - next instruction * e - edge */ -static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, - bool loop_ok) +static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) { int *insn_stack = env->cfg.insn_stack; int *insn_state = env->cfg.insn_state; @@ -15436,7 +15435,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, insn_stack[env->cfg.cur_stack++] = w; return KEEP_EXPLORING; } else if ((insn_state[w] & 0xF0) == DISCOVERED) { - if (loop_ok && env->bpf_capable) + if (env->bpf_capable) return DONE_EXPLORING; verbose_linfo(env, t, "%d: ", t); verbose_linfo(env, w, "%d: ", w); @@ -15459,7 +15458,7 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, int ret, insn_sz; insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1; - ret = push_insn(t, t + insn_sz, FALLTHROUGH, env, false); + ret = push_insn(t, t + insn_sz, FALLTHROUGH, env); if (ret) return ret; @@ -15469,12 +15468,7 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, if (visit_callee) { mark_prune_point(env, t); - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env, - /* It's ok to allow recursion from CFG point of - * view. __check_func_call() will do the actual - * check. - */ - bpf_pseudo_func(insns + t)); + ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); } return ret; } @@ -15496,7 +15490,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env) if (BPF_CLASS(insn->code) != BPF_JMP && BPF_CLASS(insn->code) != BPF_JMP32) { insn_sz = bpf_is_ldimm64(insn) ? 2 : 1; - return push_insn(t, t + insn_sz, FALLTHROUGH, env, false); + return push_insn(t, t + insn_sz, FALLTHROUGH, env); } switch (BPF_OP(insn->code)) { @@ -15543,8 +15537,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env) off = insn->imm; /* unconditional jump with single edge */ - ret = push_insn(t, t + off + 1, FALLTHROUGH, env, - true); + ret = push_insn(t, t + off + 1, FALLTHROUGH, env); if (ret) return ret; @@ -15557,11 +15550,11 @@ static int visit_insn(int t, struct bpf_verifier_env *env) /* conditional jump with two edges */ mark_prune_point(env, t); - ret = push_insn(t, t + 1, FALLTHROUGH, env, true); + ret = push_insn(t, t + 1, FALLTHROUGH, env); if (ret) return ret; - return push_insn(t, t + insn->off + 1, BRANCH, env, true); + return push_insn(t, t + insn->off + 1, BRANCH, env); } } diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c index 5bc86af80a9a..71735dbf33d4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_loops1.c +++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c @@ -75,9 +75,10 @@ l0_%=: r0 += 1; \ " ::: __clobber_all); } -SEC("tracepoint") +SEC("socket") __description("bounded loop, start in the middle") -__failure __msg("back-edge") +__success +__failure_unpriv __msg_unpriv("back-edge") __naked void loop_start_in_the_middle(void) { asm volatile (" \ @@ -136,7 +137,9 @@ l0_%=: exit; \ SEC("tracepoint") __description("bounded recursion") -__failure __msg("back-edge") +__failure +/* verifier limitation in detecting max stack depth */ +__msg("the call stack of 8 frames is too deep !") __naked void bounded_recursion(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 1bdf2b43e49e..3d5cd51071f0 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -442,7 +442,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge from insn 0 to 0", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { @@ -799,7 +799,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { @@ -811,7 +811,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { From e2e57d637aa5da0a2f49d83ad44e9febf95df7b4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 9 Nov 2023 22:14:11 -0800 Subject: [PATCH 304/415] selftests/bpf: add more test cases for check_cfg() Add a few more simple cases to validate proper privileged vs unprivileged loop detection behavior. conditional_loop2 is the one reported by Hao Sun that triggered this set of fixes. Acked-by: Eduard Zingerman Suggested-by: Hao Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231110061412.2995786-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_cfg.c | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_cfg.c b/tools/testing/selftests/bpf/progs/verifier_cfg.c index df7697b94007..c1f55e1d80a4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_cfg.c +++ b/tools/testing/selftests/bpf/progs/verifier_cfg.c @@ -97,4 +97,66 @@ l0_%=: r2 = r0; \ " ::: __clobber_all); } +SEC("socket") +__description("conditional loop (2)") +__success +__failure_unpriv __msg_unpriv("back-edge from insn 10 to 11") +__naked void conditional_loop2(void) +{ + asm volatile (" \ + r9 = 2 ll; \ + r3 = 0x20 ll; \ + r4 = 0x35 ll; \ + r8 = r4; \ + goto l1_%=; \ +l0_%=: r9 -= r3; \ + r9 -= r4; \ + r9 -= r8; \ +l1_%=: r8 += r4; \ + if r8 < 0x64 goto l0_%=; \ + r0 = r9; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unconditional loop after conditional jump") +__failure __msg("infinite loop detected") +__failure_unpriv __msg_unpriv("back-edge from insn 3 to 2") +__naked void uncond_loop_after_cond_jmp(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 > 0 goto l1_%=; \ +l0_%=: r0 = 1; \ + goto l0_%=; \ +l1_%=: exit; \ +" ::: __clobber_all); +} + + +__naked __noinline __used +static unsigned long never_ending_subprog() +{ + asm volatile (" \ + r0 = r1; \ + goto -1; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unconditional loop after conditional jump") +/* infinite loop is detected *after* check_cfg() */ +__failure __msg("infinite loop detected") +__naked void uncond_loop_in_subprog_after_cond_jmp(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 > 0 goto l1_%=; \ +l0_%=: r0 += 1; \ + call never_ending_subprog; \ +l1_%=: exit; \ +" ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; From 8e8e46a6036f5e0eefdbf6e0ed43b3581d488aa7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Nov 2023 13:58:26 +0100 Subject: [PATCH 305/415] parport: gsc: mark init function static This is only used locally, so mark it static to avoid a warning: drivers/parport/parport_gsc.c:395:5: error: no previous prototype for 'parport_gsc_init' [-Werror=missing-prototypes] Acked-by: Helge Deller Acked-by: Sudip Mukherjee Signed-off-by: Arnd Bergmann Signed-off-by: Helge Deller --- drivers/parport/parport_gsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parport/parport_gsc.c b/drivers/parport/parport_gsc.c index 5e4475254bd0..c7e18382dc01 100644 --- a/drivers/parport/parport_gsc.c +++ b/drivers/parport/parport_gsc.c @@ -392,7 +392,7 @@ static struct parisc_driver parport_driver __refdata = { .remove = __exit_p(parport_remove_chip), }; -int parport_gsc_init(void) +static int parport_gsc_init(void) { return register_parisc_driver(&parport_driver); } From de4eceab578ead12a71e5b5588a57e142bbe8ceb Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 9 Nov 2023 15:28:12 -0600 Subject: [PATCH 306/415] smb3: allow dumping session and tcon id to improve stats analysis and debugging When multiple mounts are to the same share from the same client it was not possible to determine which section of /proc/fs/cifs/Stats (and DebugData) correspond to that mount. In some recent examples this turned out to be a significant problem when trying to analyze performance data - since there are many cases where unless we know the tree id and session id we can't figure out which stats (e.g. number of SMB3.1.1 requests by type, the total time they take, which is slowest, how many fail etc.) apply to which mount. The only existing loosely related ioctl CIFS_IOC_GET_MNT_INFO does not return the information needed to uniquely identify which tcon is which mount although it does return various flags and device info. Add a cifs.ko ioctl CIFS_IOC_GET_TCON_INFO (0x800ccf0c) to return tid, session id, tree connect count. Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifs_ioctl.h | 6 ++++++ fs/smb/client/ioctl.c | 25 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h index 332588e77c31..26327442e383 100644 --- a/fs/smb/client/cifs_ioctl.h +++ b/fs/smb/client/cifs_ioctl.h @@ -26,6 +26,11 @@ struct smb_mnt_fs_info { __u64 cifs_posix_caps; } __packed; +struct smb_mnt_tcon_info { + __u32 tid; + __u64 session_id; +} __packed; + struct smb_snapshot_array { __u32 number_of_snapshots; __u32 number_of_snapshots_returned; @@ -108,6 +113,7 @@ struct smb3_notify_info { #define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify) #define CIFS_DUMP_FULL_KEY _IOWR(CIFS_IOCTL_MAGIC, 10, struct smb3_full_key_debug_info) #define CIFS_IOC_NOTIFY_INFO _IOWR(CIFS_IOCTL_MAGIC, 11, struct smb3_notify_info) +#define CIFS_IOC_GET_TCON_INFO _IOR(CIFS_IOCTL_MAGIC, 12, struct smb_mnt_tcon_info) #define CIFS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index f7160003e0ed..73ededa8eba5 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -117,6 +117,20 @@ static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file, return rc; } +static long smb_mnt_get_tcon_info(struct cifs_tcon *tcon, void __user *arg) +{ + int rc = 0; + struct smb_mnt_tcon_info tcon_inf; + + tcon_inf.tid = tcon->tid; + tcon_inf.session_id = tcon->ses->Suid; + + if (copy_to_user(arg, &tcon_inf, sizeof(struct smb_mnt_tcon_info))) + rc = -EFAULT; + + return rc; +} + static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon, void __user *arg) { @@ -414,6 +428,17 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) tcon = tlink_tcon(pSMBFile->tlink); rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg); break; + case CIFS_IOC_GET_TCON_INFO: + cifs_sb = CIFS_SB(inode->i_sb); + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + break; + } + tcon = tlink_tcon(tlink); + rc = smb_mnt_get_tcon_info(tcon, (void __user *)arg); + cifs_put_tlink(tlink); + break; case CIFS_ENUMERATE_SNAPSHOTS: if (pSMBFile == NULL) break; From 67e1ab5bb58a787809772eba14e7e758c765e2fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 9 Nov 2023 23:01:53 +0100 Subject: [PATCH 307/415] fbdev: amifb: Mark driver struct with __refdata to prevent section mismatch warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As described in the added code comment, a reference to .exit.text is ok for drivers registered via module_platform_driver_probe(). Make this explicit to prevent a section mismatch warning. Signed-off-by: Uwe Kleine-König Reviewed-by: Geert Uytterhoeven Signed-off-by: Helge Deller --- drivers/video/fbdev/amifb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/amifb.c b/drivers/video/fbdev/amifb.c index b18c6b4f129a..4a1bc693cebd 100644 --- a/drivers/video/fbdev/amifb.c +++ b/drivers/video/fbdev/amifb.c @@ -3768,7 +3768,13 @@ static int __exit amifb_remove(struct platform_device *pdev) return 0; } -static struct platform_driver amifb_driver = { +/* + * amifb_remove() lives in .exit.text. For drivers registered via + * module_platform_driver_probe() this ok because they cannot get unboud at + * runtime. The driver needs to be marked with __refdata, otherwise modpost + * triggers a section mismatch warning. + */ +static struct platform_driver amifb_driver __refdata = { .remove = __exit_p(amifb_remove), .driver = { .name = "amiga-video", From dce217780270300db7931d0fd73796aa64fea237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 9 Nov 2023 23:01:54 +0100 Subject: [PATCH 308/415] fbdev: amifb: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is ignored (apart from emitting a warning) and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new(), which already returns void. Eventually after all drivers are converted, .remove_new() will be renamed to .remove(). Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Reviewed-by: Geert Uytterhoeven Signed-off-by: Helge Deller --- drivers/video/fbdev/amifb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/amifb.c b/drivers/video/fbdev/amifb.c index 4a1bc693cebd..305f396c764c 100644 --- a/drivers/video/fbdev/amifb.c +++ b/drivers/video/fbdev/amifb.c @@ -3752,7 +3752,7 @@ static int __init amifb_probe(struct platform_device *pdev) } -static int __exit amifb_remove(struct platform_device *pdev) +static void __exit amifb_remove(struct platform_device *pdev) { struct fb_info *info = platform_get_drvdata(pdev); @@ -3765,7 +3765,6 @@ static int __exit amifb_remove(struct platform_device *pdev) chipfree(); framebuffer_release(info); amifb_video_off(); - return 0; } /* @@ -3775,7 +3774,7 @@ static int __exit amifb_remove(struct platform_device *pdev) * triggers a section mismatch warning. */ static struct platform_driver amifb_driver __refdata = { - .remove = __exit_p(amifb_remove), + .remove_new = __exit_p(amifb_remove), .driver = { .name = "amiga-video", }, From a5035c81847430dfa3482807b07325f29e9e8c09 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Nov 2023 13:58:42 +0100 Subject: [PATCH 309/415] fbdev: fsl-diu-fb: mark wr_reg_wa() static wr_reg_wa() is not an appropriate name for a global function, and doesn't need to be global anyway, so mark it static and avoid the warning: drivers/video/fbdev/fsl-diu-fb.c:493:6: error: no previous prototype for 'wr_reg_wa' [-Werror=missing-prototypes] Fixes: 0d9dab39fbbe ("powerpc/5121: fsl-diu-fb: fix issue with re-enabling DIU area descriptor") Signed-off-by: Arnd Bergmann Signed-off-by: Helge Deller --- drivers/video/fbdev/fsl-diu-fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/fsl-diu-fb.c b/drivers/video/fbdev/fsl-diu-fb.c index 7fbd9f069ac2..0bced82fa494 100644 --- a/drivers/video/fbdev/fsl-diu-fb.c +++ b/drivers/video/fbdev/fsl-diu-fb.c @@ -490,7 +490,7 @@ static enum fsl_diu_monitor_port fsl_diu_name_to_port(const char *s) * Workaround for failed writing desc register of planes. * Needed with MPC5121 DIU rev 2.0 silicon. */ -void wr_reg_wa(u32 *reg, u32 val) +static void wr_reg_wa(u32 *reg, u32 val) { do { out_be32(reg, val); From a2da597ff6f5504fc99163de4f15b7f3a129894e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Oct 2023 14:57:34 +0300 Subject: [PATCH 310/415] pwm: samsung: Fix a bit test in pwm_samsung_resume() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PWMF_REQUESTED enum is supposed to be used with test_bit() and not used as in a bitwise AND. In this specific code the flag will never be set so the function is effectively a no-op. Fixes: e3fe982b2e4e ("pwm: samsung: Put per-channel data into driver data") Signed-off-by: Dan Carpenter Reviewed-by: Uwe Kleine-König Reviewed-by: Sam Protsenko Signed-off-by: Thierry Reding --- drivers/pwm/pwm-samsung.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c index 568491ed6829..69d9f4577b34 100644 --- a/drivers/pwm/pwm-samsung.c +++ b/drivers/pwm/pwm-samsung.c @@ -631,7 +631,7 @@ static int pwm_samsung_resume(struct device *dev) struct pwm_device *pwm = &chip->pwms[i]; struct samsung_pwm_channel *chan = &our_chip->channel[i]; - if (!(pwm->flags & PWMF_REQUESTED)) + if (!test_bit(PWMF_REQUESTED, &pwm->flags)) continue; if (our_chip->variant.output_mask & BIT(i)) From d27abbfd4888d79dd24baf50e774631046ac4732 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Oct 2023 14:58:18 +0300 Subject: [PATCH 311/415] pwm: Fix double shift bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These enums are passed to set/test_bit(). The set/test_bit() functions take a bit number instead of a shifted value. Passing a shifted value is a double shift bug like doing BIT(BIT(1)). The double shift bug doesn't cause a problem here because we are only checking 0 and 1 but if the value was 5 or above then it can lead to a buffer overflow. Signed-off-by: Dan Carpenter Reviewed-by: Uwe Kleine-König Reviewed-by: Sam Protsenko Signed-off-by: Thierry Reding --- include/linux/pwm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/pwm.h b/include/linux/pwm.h index e3b437587b32..cda3597b84f2 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -41,8 +41,8 @@ struct pwm_args { }; enum { - PWMF_REQUESTED = 1 << 0, - PWMF_EXPORTED = 1 << 1, + PWMF_REQUESTED = 0, + PWMF_EXPORTED = 1, }; /* From 8a4f030dbced6fc255cbe67b2d0a129947e18493 Mon Sep 17 00:00:00 2001 From: Yuran Pereira Date: Wed, 8 Nov 2023 02:18:36 +0530 Subject: [PATCH 312/415] ptp: Fixes a null pointer dereference in ptp_ioctl Syzkaller found a null pointer dereference in ptp_ioctl originating from the lack of a null check for tsevq. ``` general protection fault, probably for non-canonical address 0xdffffc000000020b: 0000 [#1] PREEMPT SMP KASAN KASAN: probably user-memory-access in range [0x0000000000001058-0x000000000000105f] CPU: 0 PID: 5053 Comm: syz-executor353 Not tainted 6.6.0-syzkaller-10396-g4652b8e4f3ff #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 RIP: 0010:ptp_ioctl+0xcb7/0x1d10 drivers/ptp/ptp_chardev.c:476 ... Call Trace: posix_clock_ioctl+0xf8/0x160 kernel/time/posix-clock.c:86 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __x64_sys_ioctl+0x18f/0x210 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b ``` This patch fixes the issue by adding a check for tsevq and ensuring ptp_ioctl returns with an error if tsevq is null. Reported-by: syzbot+8a78ecea7ac1a2ea26e5@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8a78ecea7ac1a2ea26e5 Fixes: c5a445b1e934 ("ptp: support event queue reader channel masks") Signed-off-by: Yuran Pereira Reviewed-by: Przemek Kitszel Signed-off-by: David S. Miller --- drivers/ptp/ptp_chardev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 3f7a74788802..e95a6ed130ef 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -176,6 +176,8 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, int enable, err = 0; tsevq = pccontext->private_clkdata; + if (!tsevq) + return -EINVAL; switch (cmd) { From 871019b22d1bcc9fab2d1feba1b9a564acbb6e99 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 8 Nov 2023 13:13:25 -0800 Subject: [PATCH 313/415] net: set SOCK_RCU_FREE before inserting socket into hashtable We've started to see the following kernel traces: WARNING: CPU: 83 PID: 0 at net/core/filter.c:6641 sk_lookup+0x1bd/0x1d0 Call Trace: __bpf_skc_lookup+0x10d/0x120 bpf_sk_lookup+0x48/0xd0 bpf_sk_lookup_tcp+0x19/0x20 bpf_prog_+0x37c/0x16a3 cls_bpf_classify+0x205/0x2e0 tcf_classify+0x92/0x160 __netif_receive_skb_core+0xe52/0xf10 __netif_receive_skb_list_core+0x96/0x2b0 napi_complete_done+0x7b5/0xb70 _poll+0x94/0xb0 net_rx_action+0x163/0x1d70 __do_softirq+0xdc/0x32e asm_call_irq_on_stack+0x12/0x20 do_softirq_own_stack+0x36/0x50 do_softirq+0x44/0x70 __inet_hash can race with lockless (rcu) readers on the other cpus: __inet_hash __sk_nulls_add_node_rcu <- (bpf triggers here) sock_set_flag(SOCK_RCU_FREE) Let's move the SOCK_RCU_FREE part up a bit, before we are inserting the socket into hashtables. Note, that the race is really harmless; the bpf callers are handling this situation (where listener socket doesn't have SOCK_RCU_FREE set) correctly, so the only annoyance is a WARN_ONCE. More details from Eric regarding SOCK_RCU_FREE timeline: Commit 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") added SOCK_RCU_FREE. At that time, the precise location of sock_set_flag(sk, SOCK_RCU_FREE) did not matter, because the thread calling __inet_hash() owns a reference on sk. SOCK_RCU_FREE was only tested at dismantle time. Commit 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF") started checking SOCK_RCU_FREE _after_ the lookup to infer whether the refcount has been taken care of. Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF") Reviewed-by: Eric Dumazet Signed-off-by: Stanislav Fomichev Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 598c1b114d2c..a532f749e477 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -751,12 +751,12 @@ int __inet_hash(struct sock *sk, struct sock *osk) if (err) goto unlock; } + sock_set_flag(sk, SOCK_RCU_FREE); if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head); else __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head); - sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: spin_unlock(&ilb2->lock); From cbe9e68e1e0f965fd3b1caf975eb29083c65e6b0 Mon Sep 17 00:00:00 2001 From: Ravi Gunasekaran Date: Fri, 10 Nov 2023 14:57:49 +0530 Subject: [PATCH 314/415] MAINTAINERS: net: Update reviewers for TI's Ethernet drivers Grygorii is no longer associated with TI and messages addressed to him bounce. Add Siddharth, Roger and myself as reviewers. Signed-off-by: Ravi Gunasekaran Signed-off-by: David S. Miller --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 350d00657f6b..e3acb36989f0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21769,7 +21769,9 @@ F: Documentation/devicetree/bindings/counter/ti-eqep.yaml F: drivers/counter/ti-eqep.c TI ETHERNET SWITCH DRIVER (CPSW) -R: Grygorii Strashko +R: Siddharth Vadapalli +R: Ravi Gunasekaran +R: Roger Quadros L: linux-omap@vger.kernel.org L: netdev@vger.kernel.org S: Maintained From b2a866975f6cd5859c746f1da39c8f5736c8def2 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 10 Nov 2023 19:59:03 +0900 Subject: [PATCH 315/415] Documentation: tracing: Add a note about argument and retval access Add a note about the argument and return value accecss will be best effort. Depending on the type, it will be passed via stack or a pair of the registers, but $argN and $retval only support the single register access. Link: https://lore.kernel.org/all/169556269377.146934.14829235476649685954.stgit@devnote2/ Suggested-by: Alexei Starovoitov Signed-off-by: Masami Hiramatsu (Google) --- Documentation/trace/fprobetrace.rst | 8 ++++++-- Documentation/trace/kprobetrace.rst | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst index 8e9bebcf0a2e..e35e6b18df40 100644 --- a/Documentation/trace/fprobetrace.rst +++ b/Documentation/trace/fprobetrace.rst @@ -59,8 +59,12 @@ Synopsis of fprobe-events and bitfield are supported. (\*1) This is available only when BTF is enabled. - (\*2) only for the probe on function entry (offs == 0). - (\*3) only for return probe. + (\*2) only for the probe on function entry (offs == 0). Note, this argument access + is best effort, because depending on the argument type, it may be passed on + the stack. But this only support the arguments via registers. + (\*3) only for return probe. Note that this is also best effort. Depending on the + return value type, it might be passed via a pair of registers. But this only + accesses one register. (\*4) this is useful for fetching a field of data structures. (\*5) "u" means user-space dereference. diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 8a2dfee38145..bf9cecb69fc9 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -61,8 +61,12 @@ Synopsis of kprobe_events (x8/x16/x32/x64), "char", "string", "ustring", "symbol", "symstr" and bitfield are supported. - (\*1) only for the probe on function entry (offs == 0). - (\*2) only for return probe. + (\*1) only for the probe on function entry (offs == 0). Note, this argument access + is best effort, because depending on the argument type, it may be passed on + the stack. But this only support the arguments via registers. + (\*2) only for return probe. Note that this is also best effort. Depending on the + return value type, it might be passed via a pair of registers. But this only + accesses one register. (\*3) this is useful for fetching a field of data structures. (\*4) "u" means user-space dereference. See :ref:`user_mem_access`. From 3afe73372966d4d8b40922d7229c8ecb1c2ca287 Mon Sep 17 00:00:00 2001 From: "wuqiang.matt" Date: Fri, 10 Nov 2023 19:59:04 +0900 Subject: [PATCH 316/415] lib: test_objpool: make global variables static Kernel test robot reported build warnings that structures g_ot_sync_ops, g_ot_async_ops and g_testcases should be static. These definitions are only used in test_objpool.c, so make them static Link: https://lore.kernel.org/all/20231108012248.313574-1-wuqiang.matt@bytedance.com/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311071229.WGrWUjM1-lkp@intel.com/ Signed-off-by: wuqiang.matt Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- lib/test_objpool.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/test_objpool.c b/lib/test_objpool.c index a94078402138..bfdb81599832 100644 --- a/lib/test_objpool.c +++ b/lib/test_objpool.c @@ -311,7 +311,7 @@ static void ot_fini_sync(struct ot_context *sop) ot_kfree(sop->test, sop, sizeof(*sop)); } -struct { +static struct { struct ot_context * (*init)(struct ot_test *oc); void (*fini)(struct ot_context *sop); } g_ot_sync_ops[] = { @@ -475,7 +475,7 @@ static struct ot_context *ot_init_async_m0(struct ot_test *test) return sop; } -struct { +static struct { struct ot_context * (*init)(struct ot_test *oc); void (*fini)(struct ot_context *sop); } g_ot_async_ops[] = { @@ -632,7 +632,7 @@ static int ot_start_async(struct ot_test *test) #define NODE_COMPACT sizeof(struct ot_node) #define NODE_VMALLOC (512) -struct ot_test g_testcases[] = { +static struct ot_test g_testcases[] = { /* sync & normal */ {0, 0, NODE_COMPACT, 1000, 0, 1, 0, 0, "sync: percpu objpool"}, From abc28463c81853e4fdf8d009f71b2a3ce62a6f40 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 10 Nov 2023 19:59:05 +0900 Subject: [PATCH 317/415] kprobes: unify kprobes_exceptions_nofify() prototypes Most architectures that support kprobes declare this function in their own asm/kprobes.h header and provide an override, but some are missing the prototype, which causes a warning for the __weak stub implementation: kernel/kprobes.c:1865:12: error: no previous prototype for 'kprobe_exceptions_notify' [-Werror=missing-prototypes] 1865 | int __weak kprobe_exceptions_notify(struct notifier_block *self, Move the prototype into linux/kprobes.h so it is visible to all the definitions. Link: https://lore.kernel.org/all/20231108125843.3806765-4-arnd@kernel.org/ Acked-by: Masami Hiramatsu (Google) Signed-off-by: Arnd Bergmann Signed-off-by: Masami Hiramatsu (Google) --- arch/arc/include/asm/kprobes.h | 3 --- arch/arm/include/asm/kprobes.h | 2 -- arch/arm64/include/asm/kprobes.h | 2 -- arch/mips/include/asm/kprobes.h | 2 -- arch/powerpc/include/asm/kprobes.h | 2 -- arch/s390/include/asm/kprobes.h | 2 -- arch/sh/include/asm/kprobes.h | 2 -- arch/sparc/include/asm/kprobes.h | 2 -- arch/x86/include/asm/kprobes.h | 2 -- include/linux/kprobes.h | 4 ++++ 10 files changed, 4 insertions(+), 19 deletions(-) diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h index de1566e32cb8..68e8301c0df2 100644 --- a/arch/arc/include/asm/kprobes.h +++ b/arch/arc/include/asm/kprobes.h @@ -32,9 +32,6 @@ struct kprobe; void arch_remove_kprobe(struct kprobe *p); -int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); - struct prev_kprobe { struct kprobe *kp; unsigned long status; diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index e26a278d301a..5b8dbf1b0be4 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -40,8 +40,6 @@ struct kprobe_ctlblk { void arch_remove_kprobe(struct kprobe *); int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); -int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); /* optinsn template addresses */ extern __visible kprobe_opcode_t optprobe_template_entry[]; diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 05cd82eeca13..be7a3680dadf 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -37,8 +37,6 @@ struct kprobe_ctlblk { void arch_remove_kprobe(struct kprobe *); int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); -int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); void __kretprobe_trampoline(void); void __kprobes *trampoline_probe_handler(struct pt_regs *regs); diff --git a/arch/mips/include/asm/kprobes.h b/arch/mips/include/asm/kprobes.h index 68b1e5d458cf..bc27d99c9436 100644 --- a/arch/mips/include/asm/kprobes.h +++ b/arch/mips/include/asm/kprobes.h @@ -71,8 +71,6 @@ struct kprobe_ctlblk { struct prev_kprobe prev_kprobe; }; -extern int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); #endif /* CONFIG_KPROBES */ #endif /* _ASM_KPROBES_H */ diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index c8e4b4fd4e33..4525a9c68260 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -84,8 +84,6 @@ struct arch_optimized_insn { kprobe_opcode_t *insn; }; -extern int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_handler(struct pt_regs *regs); extern int kprobe_post_handler(struct pt_regs *regs); diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 21b9e5290c04..01f1682a73b7 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -73,8 +73,6 @@ struct kprobe_ctlblk { void arch_remove_kprobe(struct kprobe *p); int kprobe_fault_handler(struct pt_regs *regs, int trapnr); -int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); #define flush_insn_slot(p) do { } while (0) diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h index eeba83e0a7d2..65d4c3316a5b 100644 --- a/arch/sh/include/asm/kprobes.h +++ b/arch/sh/include/asm/kprobes.h @@ -46,8 +46,6 @@ struct kprobe_ctlblk { }; extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); -extern int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); extern int kprobe_handle_illslot(unsigned long pc); #else diff --git a/arch/sparc/include/asm/kprobes.h b/arch/sparc/include/asm/kprobes.h index 06c2bc767ef7..aec742cd898f 100644 --- a/arch/sparc/include/asm/kprobes.h +++ b/arch/sparc/include/asm/kprobes.h @@ -47,8 +47,6 @@ struct kprobe_ctlblk { struct prev_kprobe prev_kprobe; }; -int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); int kprobe_fault_handler(struct pt_regs *regs, int trapnr); asmlinkage void __kprobes kprobe_trap(unsigned long trap_level, struct pt_regs *regs); diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index a2e9317aad49..5939694dfb28 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -113,8 +113,6 @@ struct kprobe_ctlblk { }; extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); -extern int kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data); extern int kprobe_int3_handler(struct pt_regs *regs); #else diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 365eb092e9c4..ab1da3142b06 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -445,6 +445,10 @@ int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value, char *type, char *sym); + +int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + #else /* !CONFIG_KPROBES: */ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) From 18f039428c7df183b09c69ebf10ffd4e521035d2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Nov 2023 15:22:41 +0000 Subject: [PATCH 318/415] ipvlan: add ipvlan_route_v6_outbound() helper Inspired by syzbot reports using a stack of multiple ipvlan devices. Reduce stack size needed in ipvlan_process_v6_outbound() by moving the flowi6 struct used for the route lookup in an non inlined helper. ipvlan_route_v6_outbound() needs 120 bytes on the stack, immediately reclaimed. Also make sure ipvlan_process_v4_outbound() is not inlined. We might also have to lower MAX_NEST_DEV, because only syzbot uses setups with more than four stacked devices. BUG: TASK stack guard page was hit at ffffc9000e803ff8 (stack is ffffc9000e804000..ffffc9000e808000) stack guard page: 0000 [#1] SMP KASAN CPU: 0 PID: 13442 Comm: syz-executor.4 Not tainted 6.1.52-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 RIP: 0010:kasan_check_range+0x4/0x2a0 mm/kasan/generic.c:188 Code: 48 01 c6 48 89 c7 e8 db 4e c1 03 31 c0 5d c3 cc 0f 0b eb 02 0f 0b b8 ea ff ff ff 5d c3 cc 00 00 cc cc 00 00 cc cc 55 48 89 e5 <41> 57 41 56 41 55 41 54 53 b0 01 48 85 f6 0f 84 a4 01 00 00 48 89 RSP: 0018:ffffc9000e804000 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff817e5bf2 RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffffffff887c6568 RBP: ffffc9000e804000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: dffffc0000000001 R12: 1ffff92001d0080c R13: dffffc0000000000 R14: ffffffff87e6b100 R15: 0000000000000000 FS: 00007fd0c55826c0(0000) GS:ffff8881f6800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffc9000e803ff8 CR3: 0000000170ef7000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <#DF> [] __kasan_check_read+0x11/0x20 mm/kasan/shadow.c:31 [] instrument_atomic_read include/linux/instrumented.h:72 [inline] [] _test_bit include/asm-generic/bitops/instrumented-non-atomic.h:141 [inline] [] cpumask_test_cpu include/linux/cpumask.h:506 [inline] [] cpu_online include/linux/cpumask.h:1092 [inline] [] trace_lock_acquire include/trace/events/lock.h:24 [inline] [] lock_acquire+0xe2/0x590 kernel/locking/lockdep.c:5632 [] rcu_lock_acquire+0x2e/0x40 include/linux/rcupdate.h:306 [] rcu_read_lock include/linux/rcupdate.h:747 [inline] [] ip6_pol_route+0x15d/0x1440 net/ipv6/route.c:2221 [] ip6_pol_route_output+0x50/0x80 net/ipv6/route.c:2606 [] pol_lookup_func include/net/ip6_fib.h:584 [inline] [] fib6_rule_lookup+0x265/0x620 net/ipv6/fib6_rules.c:116 [] ip6_route_output_flags_noref+0x2d9/0x3a0 net/ipv6/route.c:2638 [] ip6_route_output_flags+0xca/0x340 net/ipv6/route.c:2651 [] ip6_route_output include/net/ip6_route.h:100 [inline] [] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:473 [inline] [] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline] [] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] [] ipvlan_queue_xmit+0xc33/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677 [] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229 [] netdev_start_xmit include/linux/netdevice.h:4966 [inline] [] xmit_one net/core/dev.c:3644 [inline] [] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660 [] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324 [] dev_queue_xmit include/linux/netdevice.h:3067 [inline] [] neigh_hh_output include/net/neighbour.h:529 [inline] [] neigh_output include/net/neighbour.h:543 [inline] [] ip6_finish_output2+0x160d/0x1ae0 net/ipv6/ip6_output.c:139 [] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline] [] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211 [] NF_HOOK_COND include/linux/netfilter.h:298 [inline] [] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232 [] dst_output include/net/dst.h:444 [inline] [] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161 [] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline] [] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline] [] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] [] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677 [] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229 [] netdev_start_xmit include/linux/netdevice.h:4966 [inline] [] xmit_one net/core/dev.c:3644 [inline] [] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660 [] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324 [] dev_queue_xmit include/linux/netdevice.h:3067 [inline] [] neigh_hh_output include/net/neighbour.h:529 [inline] [] neigh_output include/net/neighbour.h:543 [inline] [] ip6_finish_output2+0x160d/0x1ae0 net/ipv6/ip6_output.c:139 [] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline] [] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211 [] NF_HOOK_COND include/linux/netfilter.h:298 [inline] [] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232 [] dst_output include/net/dst.h:444 [inline] [] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161 [] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline] [] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline] [] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] [] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677 [] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229 [] netdev_start_xmit include/linux/netdevice.h:4966 [inline] [] xmit_one net/core/dev.c:3644 [inline] [] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660 [] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324 [] dev_queue_xmit include/linux/netdevice.h:3067 [inline] [] neigh_hh_output include/net/neighbour.h:529 [inline] [] neigh_output include/net/neighbour.h:543 [inline] [] ip6_finish_output2+0x160d/0x1ae0 net/ipv6/ip6_output.c:139 [] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline] [] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211 [] NF_HOOK_COND include/linux/netfilter.h:298 [inline] [] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232 [] dst_output include/net/dst.h:444 [inline] [] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161 [] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline] [] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline] [] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] [] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677 [] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229 [] netdev_start_xmit include/linux/netdevice.h:4966 [inline] [] xmit_one net/core/dev.c:3644 [inline] [] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660 [] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324 [] dev_queue_xmit include/linux/netdevice.h:3067 [inline] [] neigh_hh_output include/net/neighbour.h:529 [inline] [] neigh_output include/net/neighbour.h:543 [inline] [] ip6_finish_output2+0x160d/0x1ae0 net/ipv6/ip6_output.c:139 [] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline] [] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211 [] NF_HOOK_COND include/linux/netfilter.h:298 [inline] [] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232 [] dst_output include/net/dst.h:444 [inline] [] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161 [] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline] [] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline] [] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] [] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677 [] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229 [] netdev_start_xmit include/linux/netdevice.h:4966 [inline] [] xmit_one net/core/dev.c:3644 [inline] [] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660 [] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324 [] dev_queue_xmit include/linux/netdevice.h:3067 [inline] [] neigh_resolve_output+0x64e/0x750 net/core/neighbour.c:1560 [] neigh_output include/net/neighbour.h:545 [inline] [] ip6_finish_output2+0x1643/0x1ae0 net/ipv6/ip6_output.c:139 [] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline] [] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211 [] NF_HOOK_COND include/linux/netfilter.h:298 [inline] [] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232 [] dst_output include/net/dst.h:444 [inline] [] NF_HOOK include/linux/netfilter.h:309 [inline] [] ip6_xmit+0x11a4/0x1b20 net/ipv6/ip6_output.c:352 [] sctp_v6_xmit+0x9ae/0x1230 net/sctp/ipv6.c:250 [] sctp_packet_transmit+0x25de/0x2bc0 net/sctp/output.c:653 [] sctp_packet_singleton+0x202/0x310 net/sctp/outqueue.c:783 [] sctp_outq_flush_ctrl net/sctp/outqueue.c:914 [inline] [] sctp_outq_flush+0x661/0x3d40 net/sctp/outqueue.c:1212 [] sctp_outq_uncork+0x79/0xb0 net/sctp/outqueue.c:764 [] sctp_side_effects net/sctp/sm_sideeffect.c:1199 [inline] [] sctp_do_sm+0x55c0/0x5c30 net/sctp/sm_sideeffect.c:1170 [] sctp_primitive_ASSOCIATE+0x97/0xc0 net/sctp/primitive.c:73 [] sctp_sendmsg_to_asoc+0xf62/0x17b0 net/sctp/socket.c:1839 [] sctp_sendmsg+0x212e/0x33b0 net/sctp/socket.c:2029 [] inet_sendmsg+0x149/0x310 net/ipv4/af_inet.c:849 [] sock_sendmsg_nosec net/socket.c:716 [inline] [] sock_sendmsg net/socket.c:736 [inline] [] ____sys_sendmsg+0x572/0x8c0 net/socket.c:2504 [] ___sys_sendmsg net/socket.c:2558 [inline] [] __sys_sendmsg+0x271/0x360 net/socket.c:2587 [] __do_sys_sendmsg net/socket.c:2596 [inline] [] __se_sys_sendmsg net/socket.c:2594 [inline] [] __x64_sys_sendmsg+0x7f/0x90 net/socket.c:2594 [] do_syscall_x64 arch/x86/entry/common.c:51 [inline] [] do_syscall_64+0x53/0x80 arch/x86/entry/common.c:84 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Mahesh Bandewar Cc: Willem de Bruijn Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_core.c | 41 +++++++++++++++++++------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 21e9cac73121..2d5b021b4ea6 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -411,7 +411,7 @@ struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, return addr; } -static int ipvlan_process_v4_outbound(struct sk_buff *skb) +static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) { const struct iphdr *ip4h = ip_hdr(skb); struct net_device *dev = skb->dev; @@ -453,13 +453,11 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb) } #if IS_ENABLED(CONFIG_IPV6) -static int ipvlan_process_v6_outbound(struct sk_buff *skb) + +static noinline_for_stack int +ipvlan_route_v6_outbound(struct net_device *dev, struct sk_buff *skb) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); - struct net_device *dev = skb->dev; - struct net *net = dev_net(dev); - struct dst_entry *dst; - int err, ret = NET_XMIT_DROP; struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, .daddr = ip6h->daddr, @@ -469,27 +467,38 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) .flowi6_mark = skb->mark, .flowi6_proto = ip6h->nexthdr, }; + struct dst_entry *dst; + int err; - dst = ip6_route_output(net, NULL, &fl6); - if (dst->error) { - ret = dst->error; + dst = ip6_route_output(dev_net(dev), NULL, &fl6); + err = dst->error; + if (err) { dst_release(dst); - goto err; + return err; } skb_dst_set(skb, dst); + return 0; +} + +static int ipvlan_process_v6_outbound(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + int err, ret = NET_XMIT_DROP; + + err = ipvlan_route_v6_outbound(dev, skb); + if (unlikely(err)) { + DEV_STATS_INC(dev, tx_errors); + kfree_skb(skb); + return err; + } memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - err = ip6_local_out(net, skb->sk, skb); + err = ip6_local_out(dev_net(dev), skb->sk, skb); if (unlikely(net_xmit_eval(err))) DEV_STATS_INC(dev, tx_errors); else ret = NET_XMIT_SUCCESS; - goto out; -err: - DEV_STATS_INC(dev, tx_errors); - kfree_skb(skb); -out: return ret; } #else From ce51e6153f7781bcde0f8bb4c81d6fd85ee422e6 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 8 Nov 2023 21:12:39 +0900 Subject: [PATCH 319/415] tracing: fprobe-event: Fix to check tracepoint event and return Fix to check the tracepoint event is not valid with $retval. The commit 08c9306fc2e3 ("tracing/fprobe-event: Assume fprobe is a return event by $retval") introduced automatic return probe conversion with $retval. But since tracepoint event does not support return probe, $retval is not acceptable. Without this fix, ftracetest, tprobe_syntax_errors.tc fails; [22] Tracepoint probe event parser error log check [FAIL] ---- # tail 22-tprobe_syntax_errors.tc-log.mRKroL + ftrace_errlog_check trace_fprobe t kfree ^$retval dynamic_events + printf %s t kfree + wc -c + pos=8 + printf %s t kfree ^$retval + tr -d ^ + command=t kfree $retval + echo Test command: t kfree $retval Test command: t kfree $retval + echo ---- So 't kfree $retval' should fail (tracepoint doesn't support return probe) but passed it. Link: https://lore.kernel.org/all/169944555933.45057.12831706585287704173.stgit@devnote2/ Fixes: 08c9306fc2e3 ("tracing/fprobe-event: Assume fprobe is a return event by $retval") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_fprobe.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 8bfe23af9c73..7d2ddbcfa377 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -927,11 +927,12 @@ static int parse_symbol_and_return(int argc, const char *argv[], for (i = 2; i < argc; i++) { tmp = strstr(argv[i], "$retval"); if (tmp && !isalnum(tmp[7]) && tmp[7] != '_') { + if (is_tracepoint) { + trace_probe_log_set_index(i); + trace_probe_log_err(tmp - argv[i], RETVAL_ON_PROBE); + return -EINVAL; + } *is_return = true; - /* - * NOTE: Don't check is_tracepoint here, because it will - * be checked when the argument is parsed. - */ break; } } From 719639853d88071dfdfd8d9971eca9c283ff314c Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Thu, 9 Nov 2023 00:44:20 +0900 Subject: [PATCH 320/415] tty: Fix uninit-value access in ppp_sync_receive() KMSAN reported the following uninit-value access issue: ===================================================== BUG: KMSAN: uninit-value in ppp_sync_input drivers/net/ppp/ppp_synctty.c:690 [inline] BUG: KMSAN: uninit-value in ppp_sync_receive+0xdc9/0xe70 drivers/net/ppp/ppp_synctty.c:334 ppp_sync_input drivers/net/ppp/ppp_synctty.c:690 [inline] ppp_sync_receive+0xdc9/0xe70 drivers/net/ppp/ppp_synctty.c:334 tiocsti+0x328/0x450 drivers/tty/tty_io.c:2295 tty_ioctl+0x808/0x1920 drivers/tty/tty_io.c:2694 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl+0x211/0x400 fs/ioctl.c:857 __x64_sys_ioctl+0x97/0xe0 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: __alloc_pages+0x75d/0xe80 mm/page_alloc.c:4591 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] __page_frag_cache_refill+0x9a/0x2c0 mm/page_alloc.c:4691 page_frag_alloc_align+0x91/0x5d0 mm/page_alloc.c:4722 page_frag_alloc include/linux/gfp.h:322 [inline] __netdev_alloc_skb+0x215/0x6d0 net/core/skbuff.c:728 netdev_alloc_skb include/linux/skbuff.h:3225 [inline] dev_alloc_skb include/linux/skbuff.h:3238 [inline] ppp_sync_input drivers/net/ppp/ppp_synctty.c:669 [inline] ppp_sync_receive+0x237/0xe70 drivers/net/ppp/ppp_synctty.c:334 tiocsti+0x328/0x450 drivers/tty/tty_io.c:2295 tty_ioctl+0x808/0x1920 drivers/tty/tty_io.c:2694 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl+0x211/0x400 fs/ioctl.c:857 __x64_sys_ioctl+0x97/0xe0 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b CPU: 0 PID: 12950 Comm: syz-executor.1 Not tainted 6.6.0-14500-g1c41041124bd #10 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 ===================================================== ppp_sync_input() checks the first 2 bytes of the data are PPP_ALLSTATIONS and PPP_UI. However, if the data length is 1 and the first byte is PPP_ALLSTATIONS, an access to an uninitialized value occurs when checking PPP_UI. This patch resolves this issue by checking the data length. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Shigeru Yoshida Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_synctty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index ebcdffdf4f0e..ea261a628786 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -687,7 +687,7 @@ ppp_sync_input(struct syncppp *ap, const u8 *buf, const u8 *flags, int count) /* strip address/control field if present */ p = skb->data; - if (p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) { + if (skb->len >= 2 && p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) { /* chop off address/control */ if (skb->len < 3) goto err; From bef4a48f4ef798c4feddf045d49e53c8a97d5e37 Mon Sep 17 00:00:00 2001 From: Mark Hasemeyer Date: Tue, 7 Nov 2023 14:47:43 -0700 Subject: [PATCH 321/415] spi: Fix null dereference on suspend A race condition exists where a synchronous (noqueue) transfer can be active during a system suspend. This can cause a null pointer dereference exception to occur when the system resumes. Example order of events leading to the exception: 1. spi_sync() calls __spi_transfer_message_noqueue() which sets ctlr->cur_msg 2. Spi transfer begins via spi_transfer_one_message() 3. System is suspended interrupting the transfer context 4. System is resumed 6. spi_controller_resume() calls spi_start_queue() which resets cur_msg to NULL 7. Spi transfer context resumes and spi_finalize_current_message() is called which dereferences cur_msg (which is now NULL) Wait for synchronous transfers to complete before suspending by acquiring the bus mutex and setting/checking a suspend flag. Signed-off-by: Mark Hasemeyer Link: https://lore.kernel.org/r/20231107144743.v1.1.I7987f05f61901f567f7661763646cb7d7919b528@changeid Signed-off-by: Mark Brown Cc: stable@kernel.org --- drivers/spi/spi.c | 56 ++++++++++++++++++++++++++++------------- include/linux/spi/spi.h | 1 + 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 791df0e69105..8ead7acb99f3 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3317,33 +3317,52 @@ void spi_unregister_controller(struct spi_controller *ctlr) } EXPORT_SYMBOL_GPL(spi_unregister_controller); +static inline int __spi_check_suspended(const struct spi_controller *ctlr) +{ + return ctlr->flags & SPI_CONTROLLER_SUSPENDED ? -ESHUTDOWN : 0; +} + +static inline void __spi_mark_suspended(struct spi_controller *ctlr) +{ + mutex_lock(&ctlr->bus_lock_mutex); + ctlr->flags |= SPI_CONTROLLER_SUSPENDED; + mutex_unlock(&ctlr->bus_lock_mutex); +} + +static inline void __spi_mark_resumed(struct spi_controller *ctlr) +{ + mutex_lock(&ctlr->bus_lock_mutex); + ctlr->flags &= ~SPI_CONTROLLER_SUSPENDED; + mutex_unlock(&ctlr->bus_lock_mutex); +} + int spi_controller_suspend(struct spi_controller *ctlr) { - int ret; + int ret = 0; /* Basically no-ops for non-queued controllers */ - if (!ctlr->queued) - return 0; - - ret = spi_stop_queue(ctlr); - if (ret) - dev_err(&ctlr->dev, "queue stop failed\n"); + if (ctlr->queued) { + ret = spi_stop_queue(ctlr); + if (ret) + dev_err(&ctlr->dev, "queue stop failed\n"); + } + __spi_mark_suspended(ctlr); return ret; } EXPORT_SYMBOL_GPL(spi_controller_suspend); int spi_controller_resume(struct spi_controller *ctlr) { - int ret; + int ret = 0; - if (!ctlr->queued) - return 0; - - ret = spi_start_queue(ctlr); - if (ret) - dev_err(&ctlr->dev, "queue restart failed\n"); + __spi_mark_resumed(ctlr); + if (ctlr->queued) { + ret = spi_start_queue(ctlr); + if (ret) + dev_err(&ctlr->dev, "queue restart failed\n"); + } return ret; } EXPORT_SYMBOL_GPL(spi_controller_resume); @@ -4147,8 +4166,7 @@ static void __spi_transfer_message_noqueue(struct spi_controller *ctlr, struct s ctlr->cur_msg = msg; ret = __spi_pump_transfer_message(ctlr, msg, was_busy); if (ret) - goto out; - + dev_err(&ctlr->dev, "noqueue transfer failed\n"); ctlr->cur_msg = NULL; ctlr->fallback = false; @@ -4164,7 +4182,6 @@ static void __spi_transfer_message_noqueue(struct spi_controller *ctlr, struct s spi_idle_runtime_pm(ctlr); } -out: mutex_unlock(&ctlr->io_mutex); } @@ -4187,6 +4204,11 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message) int status; struct spi_controller *ctlr = spi->controller; + if (__spi_check_suspended(ctlr)) { + dev_warn_once(&spi->dev, "Attempted to sync while suspend\n"); + return -ESHUTDOWN; + } + status = __spi_validate(spi, message); if (status != 0) return status; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 86825c88b576..255a0562aea5 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -566,6 +566,7 @@ struct spi_controller { #define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ #define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ #define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select slave */ +#define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */ /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; From a406b8b424fa01f244c1aab02ba186258448c36b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 10 Nov 2023 16:13:15 +0100 Subject: [PATCH 322/415] parisc: Prevent booting 64-bit kernels on PA1.x machines Bail out early with error message when trying to boot a 64-bit kernel on 32-bit machines. This fixes the previous commit to include the check for true 64-bit kernels as well. Signed-off-by: Helge Deller Fixes: 591d2108f3abc ("parisc: Add runtime check to prevent PA2.0 kernels on PA1.x machines") Cc: # v6.0+ --- arch/parisc/kernel/head.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index a171bf3c6b31..96e0264ac961 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -70,9 +70,8 @@ $bss_loop: stw,ma %arg2,4(%r1) stw,ma %arg3,4(%r1) -#if !defined(CONFIG_64BIT) && defined(CONFIG_PA20) - /* This 32-bit kernel was compiled for PA2.0 CPUs. Check current CPU - * and halt kernel if we detect a PA1.x CPU. */ +#if defined(CONFIG_PA20) + /* check for 64-bit capable CPU as required by current kernel */ ldi 32,%r10 mtctl %r10,%cr11 .level 2.0 From 784e0e20b4c97c270b2892f677d3fad658e2c1d5 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 10 Nov 2023 01:24:16 -0600 Subject: [PATCH 323/415] Missing field not being returned in ioctl CIFS_IOC_GET_MNT_INFO The tcon_flags field was always being set to zero in the information about the mount returned by the ioctl CIFS_IOC_GET_MNT_INFO instead of being set to the value of the Flags field in the tree connection structure as intended. Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 73ededa8eba5..e2f92c21fff5 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -143,6 +143,7 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon, fsinf->version = 1; fsinf->protocol_id = tcon->ses->server->vals->protocol_id; + fsinf->tcon_flags = tcon->Flags; fsinf->device_characteristics = le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics); fsinf->device_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); From 705fc522fe9d58848c253ee0948567060f36e2a7 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 13 Oct 2023 11:33:21 +0000 Subject: [PATCH 324/415] cifs: handle when server starts supporting multichannel When the user mounts with multichannel option, but the server does not support it, there can be a time in future where it can be supported. With this change, such a case is handled. Signed-off-by: Shyam Prasad N --- fs/smb/client/cifsproto.h | 1 + fs/smb/client/connect.c | 3 +++ fs/smb/client/smb2pdu.c | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index eed8dbb6b2fb..00ad41633e26 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -132,6 +132,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *in_buf, struct smb_hdr *out_buf, int *bytes_returned); + void cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, bool all_channels); diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index b514b41cc9f0..1490547ec493 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -132,6 +132,9 @@ static void smb2_query_server_interfaces(struct work_struct *work) free_xid(xid); if (rc) { + if (rc == -EOPNOTSUPP) + return; + cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", __func__, rc); } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index b7665155f4e2..0d4351e43069 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -163,6 +163,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, int rc = 0; struct nls_table *nls_codepage = NULL; struct cifs_ses *ses; + int xid; /* * SMB2s NegProt, SessSetup, Logoff do not have tcon yet so @@ -307,17 +308,44 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, tcon->need_reopen_files = true; rc = cifs_tree_connect(0, tcon, nls_codepage); - mutex_unlock(&ses->session_mutex); cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); if (rc) { /* If sess reconnected but tcon didn't, something strange ... */ + mutex_unlock(&ses->session_mutex); cifs_dbg(VFS, "reconnect tcon failed rc = %d\n", rc); goto out; } + if (!rc && + (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { + mutex_unlock(&ses->session_mutex); + + /* + * query server network interfaces, in case they change + */ + xid = get_xid(); + rc = SMB3_request_interfaces(xid, tcon, false); + free_xid(xid); + + if (rc) + cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", + __func__, rc); + + if (ses->chan_max > ses->chan_count && + !SERVER_IS_CHAN(server)) { + if (ses->chan_count == 1) + cifs_server_dbg(VFS, "supports multichannel now\n"); + + cifs_try_adding_channels(ses); + } + } else { + mutex_unlock(&ses->session_mutex); + } + if (smb2_command != SMB2_INTERNAL_CMD) - mod_delayed_work(cifsiod_wq, &server->reconnect, 0); + if (mod_delayed_work(cifsiod_wq, &server->reconnect, 0)) + cifs_put_tcp_session(server, false); atomic_inc(&tconInfoReconnectCount); out: From ee1d21794e55ab76505745d24101331552182002 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 13 Oct 2023 11:40:09 +0000 Subject: [PATCH 325/415] cifs: handle when server stops supporting multichannel When a server stops supporting multichannel, we will keep attempting reconnects to the secondary channels today. Avoid this by freeing extra channels when negotiate returns no multichannel support. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 1 + fs/smb/client/cifsproto.h | 2 ++ fs/smb/client/connect.c | 10 ++++++ fs/smb/client/sess.c | 64 ++++++++++++++++++++++++++++----- fs/smb/client/smb2pdu.c | 76 ++++++++++++++++++++++++++++++++++++++- fs/smb/client/transport.c | 2 +- 6 files changed, 145 insertions(+), 10 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 30763c68cc39..6ffbd81bd109 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -650,6 +650,7 @@ struct TCP_Server_Info { bool noautotune; /* do not autotune send buf sizes */ bool nosharesock; bool tcp_nodelay; + bool terminate; unsigned int credits; /* send no more requests at once */ unsigned int max_credits; /* can override large 32000 default at mnt */ unsigned int in_flight; /* number of requests on the wire to server */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 00ad41633e26..d87e2c26cce2 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -641,6 +641,8 @@ cifs_chan_needs_reconnect(struct cifs_ses *ses, bool cifs_chan_is_iface_active(struct cifs_ses *ses, struct TCP_Server_Info *server); +void +cifs_disable_secondary_channels(struct cifs_ses *ses); int cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server); int diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 1490547ec493..57c2a7df3457 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -219,6 +219,14 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, spin_lock(&cifs_tcp_ses_lock); list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) { + /* + * if channel has been marked for termination, nothing to do + * for the channel. in fact, we cannot find the channel for the + * server. So safe to exit here + */ + if (server->terminate) + break; + /* check if iface is still active */ if (!cifs_chan_is_iface_active(ses, server)) cifs_chan_update_iface(ses, server); @@ -253,6 +261,8 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, spin_lock(&tcon->tc_lock); tcon->status = TID_NEED_RECON; spin_unlock(&tcon->tc_lock); + + cancel_delayed_work(&tcon->query_interfaces); } if (ses->tcon_ipc) { ses->tcon_ipc->need_reconnect = true; diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 4e4cc0cd5206..0bb2ac929061 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -296,6 +296,60 @@ int cifs_try_adding_channels(struct cifs_ses *ses) return new_chan_count - old_chan_count; } +/* + * called when multichannel is disabled by the server. + * this always gets called from smb2_reconnect + * and cannot get called in parallel threads. + */ +void +cifs_disable_secondary_channels(struct cifs_ses *ses) +{ + int i, chan_count; + struct TCP_Server_Info *server; + struct cifs_server_iface *iface; + + spin_lock(&ses->chan_lock); + chan_count = ses->chan_count; + if (chan_count == 1) + goto done; + + ses->chan_count = 1; + + /* for all secondary channels reset the need reconnect bit */ + ses->chans_need_reconnect &= 1; + + for (i = 1; i < chan_count; i++) { + iface = ses->chans[i].iface; + server = ses->chans[i].server; + + if (iface) { + spin_lock(&ses->iface_lock); + kref_put(&iface->refcount, release_iface); + ses->chans[i].iface = NULL; + iface->num_channels--; + if (iface->weight_fulfilled) + iface->weight_fulfilled--; + spin_unlock(&ses->iface_lock); + } + + spin_unlock(&ses->chan_lock); + if (server && !server->terminate) { + server->terminate = true; + cifs_signal_cifsd_for_reconnect(server, false); + } + spin_lock(&ses->chan_lock); + + if (server) { + ses->chans[i].server = NULL; + cifs_put_tcp_session(server, false); + } + + } + +done: + spin_unlock(&ses->chan_lock); +} + /* * update the iface for the channel if necessary. * will return 0 when iface is updated, 1 if removed, 2 otherwise @@ -595,14 +649,10 @@ cifs_ses_add_channel(struct cifs_ses *ses, out: if (rc && chan->server) { - /* - * we should avoid race with these delayed works before we - * remove this channel - */ - cancel_delayed_work_sync(&chan->server->echo); - cancel_delayed_work_sync(&chan->server->reconnect); + cifs_put_tcp_session(chan->server, 0); spin_lock(&ses->chan_lock); + /* we rely on all bits beyond chan_count to be clear */ cifs_chan_clear_need_reconnect(ses, chan->server); ses->chan_count--; @@ -612,8 +662,6 @@ cifs_ses_add_channel(struct cifs_ses *ses, */ WARN_ON(ses->chan_count < 1); spin_unlock(&ses->chan_lock); - - cifs_put_tcp_session(chan->server, 0); } kfree(ctx->UNC); diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 0d4351e43069..2eb29fa278c3 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -164,6 +164,8 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, struct nls_table *nls_codepage = NULL; struct cifs_ses *ses; int xid; + struct TCP_Server_Info *pserver; + unsigned int chan_index; /* * SMB2s NegProt, SessSetup, Logoff do not have tcon yet so @@ -224,6 +226,12 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, return -EAGAIN; } } + + /* if server is marked for termination, cifsd will cleanup */ + if (server->terminate) { + spin_unlock(&server->srv_lock); + return -EHOSTDOWN; + } spin_unlock(&server->srv_lock); again: @@ -242,12 +250,24 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, tcon->need_reconnect); mutex_lock(&ses->session_mutex); + /* + * if this is called by delayed work, and the channel has been disabled + * in parallel, the delayed work can continue to execute in parallel + * there's a chance that this channel may not exist anymore + */ + spin_lock(&server->srv_lock); + if (server->tcpStatus == CifsExiting) { + spin_unlock(&server->srv_lock); + mutex_unlock(&ses->session_mutex); + rc = -EHOSTDOWN; + goto out; + } + /* * Recheck after acquire mutex. If another thread is negotiating * and the server never sends an answer the socket will be closed * and tcpStatus set to reconnect. */ - spin_lock(&server->srv_lock); if (server->tcpStatus == CifsNeedReconnect) { spin_unlock(&server->srv_lock); mutex_unlock(&ses->session_mutex); @@ -284,6 +304,53 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, rc = cifs_negotiate_protocol(0, ses, server); if (!rc) { + /* + * if server stopped supporting multichannel + * and the first channel reconnected, disable all the others. + */ + if (ses->chan_count > 1 && + !(server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { + if (SERVER_IS_CHAN(server)) { + cifs_dbg(VFS, "server %s does not support " \ + "multichannel anymore. skipping secondary channel\n", + ses->server->hostname); + + spin_lock(&ses->chan_lock); + chan_index = cifs_ses_get_chan_index(ses, server); + if (chan_index == CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + goto skip_terminate; + } + + ses->chans[chan_index].server = NULL; + spin_unlock(&ses->chan_lock); + + /* + * the above reference of server by channel + * needs to be dropped without holding chan_lock + * as cifs_put_tcp_session takes a higher lock + * i.e. cifs_tcp_ses_lock + */ + cifs_put_tcp_session(server, 1); + + server->terminate = true; + cifs_signal_cifsd_for_reconnect(server, false); + + /* mark primary server as needing reconnect */ + pserver = server->primary_server; + cifs_signal_cifsd_for_reconnect(pserver, false); + +skip_terminate: + mutex_unlock(&ses->session_mutex); + rc = -EHOSTDOWN; + goto out; + } else { + cifs_server_dbg(VFS, "does not support " \ + "multichannel anymore. disabling all other channels\n"); + cifs_disable_secondary_channels(ses); + } + } + rc = cifs_setup_session(0, ses, server, nls_codepage); if ((rc == -EACCES) && !tcon->retry) { mutex_unlock(&ses->session_mutex); @@ -3836,6 +3903,13 @@ void smb2_reconnect_server(struct work_struct *work) /* Prevent simultaneous reconnects that can corrupt tcon->rlist list */ mutex_lock(&pserver->reconnect_mutex); + /* if the server is marked for termination, drop the ref count here */ + if (server->terminate) { + cifs_put_tcp_session(server, true); + mutex_unlock(&pserver->reconnect_mutex); + return; + } + INIT_LIST_HEAD(&tmp_list); INIT_LIST_HEAD(&tmp_ses_list); cifs_dbg(FYI, "Reconnecting tcons and channels\n"); diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index d553b7a54621..4f717ad7c21b 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1023,7 +1023,7 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) spin_lock(&ses->chan_lock); for (i = 0; i < ses->chan_count; i++) { server = ses->chans[i].server; - if (!server) + if (!server || server->terminate) continue; /* From fd2bd7c0539e28f267a84da8d68f9378511b50a7 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 20 Jul 2023 08:30:32 -0500 Subject: [PATCH 326/415] cifs: update internal module version number for cifs.ko From 2.45 to 2.46 Signed-off-by: Steve French --- fs/smb/client/cifsfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index 8ca3d7606bb4..3adea10aa9da 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -152,6 +152,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 45 -#define CIFS_VERSION "2.45" +#define SMB3_PRODUCT_BUILD 46 +#define CIFS_VERSION "2.46" #endif /* _CIFSFS_H */ From e4c44b1a19625348fc004ce8c5f828d5d80d037e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 9 Nov 2023 11:23:46 -0600 Subject: [PATCH 327/415] drm/amd: Explicitly check for GFXOFF to be enabled for s0ix If a user has disabled GFXOFF this may cause problems for the suspend sequence. Ensure that it is enabled in amdgpu_acpi_is_s0ix_active(). The system won't reach the deepest state but it also won't hang. Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index d62e49758635..e550067e5c5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1497,6 +1497,9 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) if (adev->asic_type < CHIP_RAVEN) return false; + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) + return false; + /* * If ACPI_FADT_LOW_POWER_S0 is not set in the FADT, it is generally * risky to do any special firmware-related preparations for entering From 8ed79c409ecb216ee2b0ec334568a1104505c62a Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 19 Oct 2023 15:50:43 +0800 Subject: [PATCH 328/415] drm/amdgpu: move kfd_resume before the ip late init The kfd_resume needs to touch GC registers to enable the interrupts, it needs to be done before GFXOFF is enabled to ensure that the GFX is not off and GC registers can be touched. So move kfd_resume before the amdgpu_device_ip_late_init which enables the CGPG/GFXOFF. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d93240e8cb6a..e8a26852eb32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4571,19 +4571,18 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) } amdgpu_fence_driver_hw_init(adev); - r = amdgpu_device_ip_late_init(adev); - if (r) - goto exit; - - queue_delayed_work(system_wq, &adev->delayed_init_work, - msecs_to_jiffies(AMDGPU_RESUME_MS)); - if (!adev->in_s0ix) { r = amdgpu_amdkfd_resume(adev, adev->in_runpm); if (r) goto exit; } + r = amdgpu_device_ip_late_init(adev); + if (r) + goto exit; + + queue_delayed_work(system_wq, &adev->delayed_init_work, + msecs_to_jiffies(AMDGPU_RESUME_MS)); exit: if (amdgpu_sriov_vf(adev)) { amdgpu_virt_init_data_exchange(adev); From 037b98a2312e2587163de14afae8ae1b64b67dda Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Nov 2023 09:40:44 -0500 Subject: [PATCH 329/415] drm/amdgpu: move UVD and VCE sched entity init after sched init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need kernel scheduling entities to deal with handle clean up if apps are not cleaned up properly. With commit 56e449603f0ac5 ("drm/sched: Convert the GPU scheduler to variable number of run-queues") the scheduler entities have to be created after scheduler init, so change the ordering to fix this. v2: Leave logic in UVD and VCE code Fixes: 56e449603f0a ("drm/sched: Convert the GPU scheduler to variable number of run-queues") Reviewed-by: Christian König Reviewed-by: Luben Tuikov Signed-off-by: Alex Deucher Cc: ltuikov89@gmail.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 22 +++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 22 +++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c | 2 -- drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 2 -- drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 4 ---- drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 5 ----- 13 files changed, 36 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e8a26852eb32..7eeaf0aa7f81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2584,6 +2584,18 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) ring->name); return r; } + r = amdgpu_uvd_entity_init(adev, ring); + if (r) { + DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n", + ring->name); + return r; + } + r = amdgpu_vce_entity_init(adev, ring); + if (r) { + DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n", + ring->name); + return r; + } } amdgpu_xcp_update_partition_sched_list(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 815b7c34ed33..65949cc7abb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -399,20 +399,20 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * + * Initialize the entity used for handle management in the kernel driver. */ -int amdgpu_uvd_entity_init(struct amdgpu_device *adev) +int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - struct amdgpu_ring *ring; - struct drm_gpu_scheduler *sched; - int r; + if (ring == &adev->uvd.inst[0].ring) { + struct drm_gpu_scheduler *sched = &ring->sched; + int r; - ring = &adev->uvd.inst[0].ring; - sched = &ring->sched; - r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); - if (r) { - DRM_ERROR("Failed setting up UVD kernel entity.\n"); - return r; + r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (r) { + DRM_ERROR("Failed setting up UVD kernel entity.\n"); + return r; + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index a9f342537c68..9dfad2f48ef4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -73,7 +73,7 @@ struct amdgpu_uvd { int amdgpu_uvd_sw_init(struct amdgpu_device *adev); int amdgpu_uvd_sw_fini(struct amdgpu_device *adev); -int amdgpu_uvd_entity_init(struct amdgpu_device *adev); +int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev); int amdgpu_uvd_suspend(struct amdgpu_device *adev); int amdgpu_uvd_resume(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 1904edf68407..0954447f689d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -231,20 +231,20 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * + * Initialize the entity used for handle management in the kernel driver. */ -int amdgpu_vce_entity_init(struct amdgpu_device *adev) +int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - struct amdgpu_ring *ring; - struct drm_gpu_scheduler *sched; - int r; + if (ring == &adev->vce.ring[0]) { + struct drm_gpu_scheduler *sched = &ring->sched; + int r; - ring = &adev->vce.ring[0]; - sched = &ring->sched; - r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); - if (r != 0) { - DRM_ERROR("Failed setting up VCE run queue.\n"); - return r; + r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (r != 0) { + DRM_ERROR("Failed setting up VCE run queue.\n"); + return r; + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index ea680fc9a6c3..6e53f872d084 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -55,7 +55,7 @@ struct amdgpu_vce { int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size); int amdgpu_vce_sw_fini(struct amdgpu_device *adev); -int amdgpu_vce_entity_init(struct amdgpu_device *adev); +int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 58a8f78c003c..a6006f231c65 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -577,8 +577,6 @@ static int uvd_v3_1_sw_init(void *handle) ptr += ucode_len; memcpy(&adev->uvd.keyselect, ptr, 4); - r = amdgpu_uvd_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index d3b1e31f5450..1aa09ad7bbe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -127,8 +127,6 @@ static int uvd_v4_2_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 5a8116437abf..f8b229b75435 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -125,8 +125,6 @@ static int uvd_v5_0_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 74c09230aeb3..a9a6880f44e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -432,8 +432,6 @@ static int uvd_v6_0_sw_init(void *handle) } } - r = amdgpu_uvd_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 1c42cf10cc29..6068b784dc69 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -480,10 +480,6 @@ static int uvd_v7_0_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_entity_init(adev); - if (r) - return r; - r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 67eb01fef789..a08e7abca423 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -441,8 +441,6 @@ static int vce_v2_0_sw_init(void *handle) return r; } - r = amdgpu_vce_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 18f6e62af339..f4760748d349 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -450,8 +450,6 @@ static int vce_v3_0_sw_init(void *handle) return r; } - r = amdgpu_vce_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index e0b70cd3b697..06d787385ad4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -486,11 +486,6 @@ static int vce_v4_0_sw_init(void *handle) return r; } - - r = amdgpu_vce_entity_init(adev); - if (r) - return r; - r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; From 256503071c2de2b5b5c20e06654aa9a44f13aa62 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 31 Oct 2023 13:30:00 -0400 Subject: [PATCH 330/415] drm/amdgpu: Fix possible null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mem = bo->tbo.resource may be NULL in amdgpu_vm_bo_update. Fixes: 180253782038 ("drm/ttm: stop allocating dummy resources during BO creation") Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 904252456d25..100497cb19e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1098,8 +1098,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bo = gem_to_amdgpu_bo(gobj); } mem = bo->tbo.resource; - if (mem->mem_type == TTM_PL_TT || - mem->mem_type == AMDGPU_PL_PREEMPT) + if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_PREEMPT)) pages_addr = bo->tbo.ttm->dma_address; } From 8473bfdcb5b1a32fd05629c4535ccacd73bc5567 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 31 Oct 2023 15:35:27 +0100 Subject: [PATCH 331/415] drm/amdgpu: fix error handling in amdgpu_vm_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When clearing the root PD fails we need to properly release it again. Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 33 +++++++++++++------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 100497cb19e6..d1b8afd105c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2139,7 +2139,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int32_t xcp_id) { struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; @@ -2158,6 +2159,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp INIT_LIST_HEAD(&vm->done); INIT_LIST_HEAD(&vm->pt_freed); INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); + INIT_KFIFO(vm->faults); r = amdgpu_vm_init_entities(adev, vm); if (r) @@ -2192,34 +2194,33 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp false, &root, xcp_id); if (r) goto error_free_delayed; - root_bo = &root->bo; + + root_bo = amdgpu_bo_ref(&root->bo); r = amdgpu_bo_reserve(root_bo, true); + if (r) { + amdgpu_bo_unref(&root->shadow); + amdgpu_bo_unref(&root_bo); + goto error_free_delayed; + } + + amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); + r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1); if (r) goto error_free_root; - r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1); - if (r) - goto error_unreserve; - - amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); - r = amdgpu_vm_pt_clear(adev, vm, root, false); if (r) - goto error_unreserve; + goto error_free_root; amdgpu_bo_unreserve(vm->root.bo); - - INIT_KFIFO(vm->faults); + amdgpu_bo_unref(&root_bo); return 0; -error_unreserve: - amdgpu_bo_unreserve(vm->root.bo); - error_free_root: - amdgpu_bo_unref(&root->shadow); + amdgpu_vm_pt_free_root(adev, vm); + amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); - vm->root.bo = NULL; error_free_delayed: dma_fence_put(vm->last_tlb_flush); From f032c53bea6d2057c14553832d846be2f151cfb2 Mon Sep 17 00:00:00 2001 From: Yujie Liu Date: Tue, 31 Oct 2023 12:13:05 +0800 Subject: [PATCH 332/415] tracing/kprobes: Fix the order of argument descriptions The order of descriptions should be consistent with the argument list of the function, so "kretprobe" should be the second one. int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, const char *name, const char *loc, ...) Link: https://lore.kernel.org/all/20231031041305.3363712-1-yujie.liu@intel.com/ Fixes: 2a588dd1d5d6 ("tracing: Add kprobe event command generation functions") Suggested-by: Mukesh Ojha Signed-off-by: Yujie Liu Reviewed-by: Mukesh Ojha Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_kprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index a3442db35670..52f8b537dd0a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1020,9 +1020,9 @@ EXPORT_SYMBOL_GPL(kprobe_event_cmd_init); /** * __kprobe_event_gen_cmd_start - Generate a kprobe event command from arg list * @cmd: A pointer to the dynevent_cmd struct representing the new event + * @kretprobe: Is this a return probe? * @name: The name of the kprobe event * @loc: The location of the kprobe event - * @kretprobe: Is this a return probe? * @...: Variable number of arg (pairs), one pair for each field * * NOTE: Users normally won't want to call this function directly, but From e409d7346648c9acff84c3cc8d291767ee2d5326 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 10 Nov 2023 17:13:02 +0100 Subject: [PATCH 333/415] net: ti: icssg-prueth: Add missing icss_iep_put to error path Analogously to prueth_remove, just also taking care for NULL'ing the iep pointers. Fixes: 186734c15886 ("net: ti: icssg-prueth: add packet timestamping and ptp support") Fixes: 443a2367ba3c ("net: ti: icssg-prueth: am65x SR2.0 add 10M full duplex support") Signed-off-by: Jan Kiszka Reviewed-by: Wojciech Drewek Reviewed-by: MD Danish Anwar Reviewed-by: Roger Quadros Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 6c4b64227ac8..3abbeba26f1b 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -2105,10 +2105,7 @@ static int prueth_probe(struct platform_device *pdev) prueth->iep1 = icss_iep_get_idx(np, 1); if (IS_ERR(prueth->iep1)) { ret = dev_err_probe(dev, PTR_ERR(prueth->iep1), "iep1 get failed\n"); - icss_iep_put(prueth->iep0); - prueth->iep0 = NULL; - prueth->iep1 = NULL; - goto free_pool; + goto put_iep0; } if (prueth->pdata.quirk_10m_link_issue) { @@ -2205,6 +2202,12 @@ static int prueth_probe(struct platform_device *pdev) exit_iep: if (prueth->pdata.quirk_10m_link_issue) icss_iep_exit_fw(prueth->iep1); + icss_iep_put(prueth->iep1); + +put_iep0: + icss_iep_put(prueth->iep0); + prueth->iep0 = NULL; + prueth->iep1 = NULL; free_pool: gen_pool_free(prueth->sram_pool, From 2bd5b559a1f391f05927bbb0b31381fa71c61e26 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 10 Nov 2023 17:13:08 +0100 Subject: [PATCH 334/415] net: ti: icssg-prueth: Fix error cleanup on failing pruss_request_mem_region We were just continuing in this case, surely not desired. Fixes: 128d5874c082 ("net: ti: icssg-prueth: Add ICSSG ethernet driver") Signed-off-by: Jan Kiszka Reviewed-by: Wojciech Drewek Reviewed-by: Roger Quadros Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 3abbeba26f1b..411898a4f38c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -2063,7 +2063,7 @@ static int prueth_probe(struct platform_device *pdev) &prueth->shram); if (ret) { dev_err(dev, "unable to get PRUSS SHRD RAM2: %d\n", ret); - pruss_put(prueth->pruss); + goto put_pruss; } prueth->sram_pool = of_gen_pool_get(np, "sram", 0); @@ -2215,6 +2215,8 @@ static int prueth_probe(struct platform_device *pdev) put_mem: pruss_release_mem_region(prueth->pruss, &prueth->shram); + +put_pruss: pruss_put(prueth->pruss); put_cores: From e257da5715365b853439243f89cf5d8a9d382355 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 12 Nov 2023 16:36:20 +0200 Subject: [PATCH 335/415] wifi: iwlwifi: fix system commands group ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commands should be sorted inside the group definition. Fix the ordering so we won't get following warning: WARN_ON(iwl_cmd_groups_verify_sorted(trans_cfg)) Link: https://lore.kernel.org/regressions/2fa930bb-54dd-4942-a88d-05a47c8e9731@gmail.com/ Link: https://lore.kernel.org/linux-wireless/CAHk-=wix6kqQ5vHZXjOPpZBfM7mMm9bBZxi2Jh7XnaKCqVf94w@mail.gmail.com/ Fixes: b6e3d1ba4fcf ("wifi: iwlwifi: mvm: implement new firmware API for statistics") Tested-by: Niklāvs Koļesņikovs Tested-by: Damian Tometzki Acked-by: Kalle Valo Signed-off-by: Miri Korenblit Signed-off-by: Emmanuel Grumbach Signed-off-by: Linus Torvalds --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index fef86a8b4163..1627b2f819db 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -550,9 +550,9 @@ static const struct iwl_hcmd_names iwl_mvm_system_names[] = { HCMD_NAME(RFI_CONFIG_CMD), HCMD_NAME(RFI_GET_FREQ_TABLE_CMD), HCMD_NAME(SYSTEM_FEATURES_CONTROL_CMD), - HCMD_NAME(RFI_DEACTIVATE_NOTIF), HCMD_NAME(SYSTEM_STATISTICS_CMD), HCMD_NAME(SYSTEM_STATISTICS_END_NOTIF), + HCMD_NAME(RFI_DEACTIVATE_NOTIF), }; /* Please keep this array *SORTED* by hex value. From b85ea95d086471afb4ad062012a4d73cd328fa86 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Nov 2023 16:19:07 -0800 Subject: [PATCH 336/415] Linux 6.7-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index a82587fbdad3..ede0bd241056 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 6 +PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From e64e7c74b99ec9e439abca75f522f4b98f220bd1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 24 Oct 2023 13:51:36 +0200 Subject: [PATCH 337/415] xen/events: avoid using info_for_irq() in xen_send_IPI_one() xen_send_IPI_one() is being used by cpuhp_report_idle_dead() after it calls rcu_report_dead(), meaning that any RCU usage by xen_send_IPI_one() is a bad idea. Unfortunately xen_send_IPI_one() is using notify_remote_via_irq() today, which is using irq_get_chip_data() via info_for_irq(). And irq_get_chip_data() in turn is using a maple-tree lookup requiring RCU. Avoid this problem by caching the ipi event channels in another percpu variable, allowing the use notify_remote_via_evtchn() in xen_send_IPI_one(). Fixes: 721255b9826b ("genirq: Use a maple tree for interrupt descriptor management") Reported-by: David Woodhouse Signed-off-by: Juergen Gross Tested-by: David Woodhouse Acked-by: Stefano Stabellini Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6de6b084ea60..237e6b884f72 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -164,6 +164,8 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; /* IRQ <-> IPI mapping */ static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; +/* Cache for IPI event channels - needed for hot cpu unplug (avoid RCU usage). */ +static DEFINE_PER_CPU(evtchn_port_t [XEN_NR_IPIS], ipi_to_evtchn) = {[0 ... XEN_NR_IPIS-1] = 0}; /* Event channel distribution data */ static atomic_t channels_on_cpu[NR_CPUS]; @@ -366,6 +368,7 @@ static int xen_irq_info_ipi_setup(unsigned cpu, info->u.ipi = ipi; per_cpu(ipi_to_irq, cpu)[ipi] = irq; + per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn; return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0); } @@ -981,6 +984,7 @@ static void __unbind_from_irq(unsigned int irq) break; case IRQT_IPI: per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1; + per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(irq)] = 0; break; case IRQT_EVTCHN: dev = info->u.interdomain; @@ -1632,7 +1636,7 @@ EXPORT_SYMBOL_GPL(evtchn_put); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) { - int irq; + evtchn_port_t evtchn; #ifdef CONFIG_X86 if (unlikely(vector == XEN_NMI_VECTOR)) { @@ -1643,9 +1647,9 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) return; } #endif - irq = per_cpu(ipi_to_irq, cpu)[vector]; - BUG_ON(irq < 0); - notify_remote_via_irq(irq); + evtchn = per_cpu(ipi_to_evtchn, cpu)[vector]; + BUG_ON(evtchn == 0); + notify_remote_via_evtchn(evtchn); } struct evtchn_loop_ctrl { From bfa993b355d33a438a746523e7129391c8664e8a Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 8 Nov 2023 16:25:15 -0500 Subject: [PATCH 338/415] acpi/processor: sanitize _OSC/_PDC capabilities for Xen dom0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Processor capability bits notify ACPI of the OS capabilities, and so ACPI can adjust the return of other Processor methods taking the OS capabilities into account. When Linux is running as a Xen dom0, the hypervisor is the entity in charge of processor power management, and hence Xen needs to make sure the capabilities reported by _OSC/_PDC match the capabilities of the driver in Xen. Introduce a small helper to sanitize the buffer when running as Xen dom0. When Xen supports HWP, this serves as the equivalent of commit a21211672c9a ("ACPI / processor: Request native thermal interrupt handling via _OSC") to avoid SMM crashes. Xen will set bit ACPI_PROC_CAP_COLLAB_PROC_PERF (bit 12) in the capability bits and the _OSC/_PDC call will apply it. [ jandryuk: Mention Xen HWP's need. Support _OSC & _PDC ] Signed-off-by: Roger Pau Monné Cc: stable@vger.kernel.org Signed-off-by: Jason Andryuk Reviewed-by: Michal Wilczynski Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20231108212517.72279-1-jandryuk@gmail.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/acpi.h | 14 ++++++++++++++ arch/x86/include/asm/xen/hypervisor.h | 9 +++++++++ drivers/xen/pcpu.c | 22 ++++++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index c8a7fc23f63c..f896eed4516c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -16,6 +16,9 @@ #include #include #include +#include + +#include #ifdef CONFIG_ACPI_APEI # include @@ -127,6 +130,17 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap) if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_option_idle_override == IDLE_NOMWAIT) *cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH); + + if (xen_initial_domain()) { + /* + * When Linux is running as Xen dom0, the hypervisor is the + * entity in charge of the processor power management, and so + * Xen needs to check the OS capabilities reported in the + * processor capabilities buffer matches what the hypervisor + * driver supports. + */ + xen_sanitize_proc_cap_bits(cap); + } } static inline bool acpi_has_cpu_in_madt(void) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 7048dfacc04b..a9088250770f 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -100,4 +100,13 @@ static inline void leave_lazy(enum xen_lazy_mode mode) enum xen_lazy_mode xen_get_lazy_mode(void); +#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI) +void xen_sanitize_proc_cap_bits(uint32_t *buf); +#else +static inline void xen_sanitize_proc_cap_bits(uint32_t *buf) +{ + BUG(); +} +#endif + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index b3e3d1bb37f3..508655273145 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -47,6 +47,9 @@ #include #include +#ifdef CONFIG_ACPI +#include +#endif /* * @cpu_id: Xen physical cpu logic number @@ -400,4 +403,23 @@ bool __init xen_processor_present(uint32_t acpi_id) return online; } + +void xen_sanitize_proc_cap_bits(uint32_t *cap) +{ + struct xen_platform_op op = { + .cmd = XENPF_set_processor_pminfo, + .u.set_pminfo.id = -1, + .u.set_pminfo.type = XEN_PM_PDC, + }; + u32 buf[3] = { ACPI_PDC_REVISION_ID, 1, *cap }; + int ret; + + set_xen_guest_handle(op.u.set_pminfo.pdc, buf); + ret = HYPERVISOR_platform_op(&op); + if (ret) + pr_err("sanitize of _PDC buffer bits from Xen failed: %d\n", + ret); + else + *cap = buf[2]; +} #endif From 50e865a56876bd7a74a79c1778025631150f104a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 5 Nov 2023 21:56:31 -0800 Subject: [PATCH 339/415] xen/shbuf: eliminate 17 kernel-doc warnings Don't use kernel-doc markers ("/**") for comments that are not in kernel-doc format. This prevents multiple kernel-doc warnings: xen-front-pgdir-shbuf.c:25: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * This structure represents the structure of a shared page xen-front-pgdir-shbuf.c:37: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Shared buffer ops which are differently implemented xen-front-pgdir-shbuf.c:65: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Get granted reference to the very first page of the xen-front-pgdir-shbuf.c:85: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Map granted references of the shared buffer. xen-front-pgdir-shbuf.c:106: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Unmap granted references of the shared buffer. xen-front-pgdir-shbuf.c:127: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Free all the resources of the shared buffer. xen-front-pgdir-shbuf.c:154: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Get the number of pages the page directory consumes itself. xen-front-pgdir-shbuf.c:164: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Calculate the number of grant references needed to share the buffer xen-front-pgdir-shbuf.c:176: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Calculate the number of grant references needed to share the buffer xen-front-pgdir-shbuf.c:194: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Unmap the buffer previously mapped with grant references xen-front-pgdir-shbuf.c:242: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Map the buffer with grant references provided by the backend. xen-front-pgdir-shbuf.c:324: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Fill page directory with grant references to the pages of the xen-front-pgdir-shbuf.c:354: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Fill page directory with grant references to the pages of the xen-front-pgdir-shbuf.c:393: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Grant references to the frontend's buffer pages. xen-front-pgdir-shbuf.c:422: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Grant all the references needed to share the buffer. xen-front-pgdir-shbuf.c:470: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Allocate all required structures to mange shared buffer. xen-front-pgdir-shbuf.c:510: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Allocate a new instance of a shared buffer. Signed-off-by: Randy Dunlap Reported-by: kernel test robot Closes: lore.kernel.org/r/202311060203.yQrpPZhm-lkp@intel.com Acked-by: Juergen Gross Cc: Juergen Gross Cc: Stefano Stabellini Cc: Oleksandr Tyshchenko Cc: xen-devel@lists.xenproject.org Link: https://lore.kernel.org/r/20231106055631.21520-1-rdunlap@infradead.org Signed-off-by: Juergen Gross --- drivers/xen/xen-front-pgdir-shbuf.c | 34 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/xen/xen-front-pgdir-shbuf.c b/drivers/xen/xen-front-pgdir-shbuf.c index b52e0fa595a9..223870a0111b 100644 --- a/drivers/xen/xen-front-pgdir-shbuf.c +++ b/drivers/xen/xen-front-pgdir-shbuf.c @@ -21,7 +21,7 @@ #include -/** +/* * This structure represents the structure of a shared page * that contains grant references to the pages of the shared * buffer. This structure is common to many Xen para-virtualized @@ -33,7 +33,7 @@ struct xen_page_directory { grant_ref_t gref[]; /* Variable length */ }; -/** +/* * Shared buffer ops which are differently implemented * depending on the allocation mode, e.g. if the buffer * is allocated by the corresponding backend or frontend. @@ -61,7 +61,7 @@ struct xen_front_pgdir_shbuf_ops { int (*unmap)(struct xen_front_pgdir_shbuf *buf); }; -/** +/* * Get granted reference to the very first page of the * page directory. Usually this is passed to the backend, * so it can find/fill the grant references to the buffer's @@ -81,7 +81,7 @@ xen_front_pgdir_shbuf_get_dir_start(struct xen_front_pgdir_shbuf *buf) } EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_get_dir_start); -/** +/* * Map granted references of the shared buffer. * * Depending on the shared buffer mode of allocation @@ -102,7 +102,7 @@ int xen_front_pgdir_shbuf_map(struct xen_front_pgdir_shbuf *buf) } EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_map); -/** +/* * Unmap granted references of the shared buffer. * * Depending on the shared buffer mode of allocation @@ -123,7 +123,7 @@ int xen_front_pgdir_shbuf_unmap(struct xen_front_pgdir_shbuf *buf) } EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_unmap); -/** +/* * Free all the resources of the shared buffer. * * \param buf shared buffer which resources to be freed. @@ -150,7 +150,7 @@ EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_free); offsetof(struct xen_page_directory, \ gref)) / sizeof(grant_ref_t)) -/** +/* * Get the number of pages the page directory consumes itself. * * \param buf shared buffer. @@ -160,7 +160,7 @@ static int get_num_pages_dir(struct xen_front_pgdir_shbuf *buf) return DIV_ROUND_UP(buf->num_pages, XEN_NUM_GREFS_PER_PAGE); } -/** +/* * Calculate the number of grant references needed to share the buffer * and its pages when backend allocates the buffer. * @@ -172,7 +172,7 @@ static void backend_calc_num_grefs(struct xen_front_pgdir_shbuf *buf) buf->num_grefs = get_num_pages_dir(buf); } -/** +/* * Calculate the number of grant references needed to share the buffer * and its pages when frontend allocates the buffer. * @@ -190,7 +190,7 @@ static void guest_calc_num_grefs(struct xen_front_pgdir_shbuf *buf) #define xen_page_to_vaddr(page) \ ((uintptr_t)pfn_to_kaddr(page_to_xen_pfn(page))) -/** +/* * Unmap the buffer previously mapped with grant references * provided by the backend. * @@ -238,7 +238,7 @@ static int backend_unmap(struct xen_front_pgdir_shbuf *buf) return ret; } -/** +/* * Map the buffer with grant references provided by the backend. * * \param buf shared buffer. @@ -320,7 +320,7 @@ static int backend_map(struct xen_front_pgdir_shbuf *buf) return ret; } -/** +/* * Fill page directory with grant references to the pages of the * page directory itself. * @@ -350,7 +350,7 @@ static void backend_fill_page_dir(struct xen_front_pgdir_shbuf *buf) page_dir->gref_dir_next_page = XEN_GREF_LIST_END; } -/** +/* * Fill page directory with grant references to the pages of the * page directory and the buffer we share with the backend. * @@ -389,7 +389,7 @@ static void guest_fill_page_dir(struct xen_front_pgdir_shbuf *buf) } } -/** +/* * Grant references to the frontend's buffer pages. * * These will be shared with the backend, so it can @@ -418,7 +418,7 @@ static int guest_grant_refs_for_buffer(struct xen_front_pgdir_shbuf *buf, return 0; } -/** +/* * Grant all the references needed to share the buffer. * * Grant references to the page directory pages and, if @@ -466,7 +466,7 @@ static int grant_references(struct xen_front_pgdir_shbuf *buf) return 0; } -/** +/* * Allocate all required structures to mange shared buffer. * * \param buf shared buffer. @@ -506,7 +506,7 @@ static const struct xen_front_pgdir_shbuf_ops local_ops = { .grant_refs_for_buffer = guest_grant_refs_for_buffer, }; -/** +/* * Allocate a new instance of a shared buffer. * * \param cfg configuration to be used while allocating a new shared buffer. From 472a2ff63efb30234cbf6b2cdaf8117f21b4f8bc Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Fri, 10 Nov 2023 17:37:07 +0800 Subject: [PATCH 340/415] net: hns3: fix add VLAN fail issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hclge_sync_vlan_filter is called in periodic task, trying to remove VLAN from vlan_del_fail_bmap. It can be concurrence with VLAN adding operation from user. So once user failed to delete a VLAN id, and add it again soon, it may be removed by the periodic task, which may cause the software configuration being inconsistent with hardware. So add mutex handling to avoid this. user hns3 driver periodic task │ add vlan 10 ───── hns3_vlan_rx_add_vid │ │ (suppose success) │ │ │ del vlan 10 ───── hns3_vlan_rx_kill_vid │ │ (suppose fail,add to │ │ vlan_del_fail_bmap) │ │ │ add vlan 10 ───── hns3_vlan_rx_add_vid │ (suppose success) │ foreach vlan_del_fail_bmp del vlan 10 Fixes: fe4144d47eef ("net: hns3: sync VLAN filter entries when kill VLAN ID failed") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 28 +++++++++++++------ .../hisilicon/hns3/hns3vf/hclgevf_main.c | 11 ++++++-- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 66e5807903a0..e22279e5d43f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -10025,8 +10025,6 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, struct hclge_vport_vlan_cfg *vlan, *tmp; struct hclge_dev *hdev = vport->back; - mutex_lock(&hdev->vport_lock); - list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { if (vlan->vlan_id == vlan_id) { if (is_write_tbl && vlan->hd_tbl_status) @@ -10041,8 +10039,6 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, break; } } - - mutex_unlock(&hdev->vport_lock); } void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) @@ -10451,11 +10447,16 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, * handle mailbox. Just record the vlan id, and remove it after * reset finished. */ + mutex_lock(&hdev->vport_lock); if ((test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) && is_kill) { set_bit(vlan_id, vport->vlan_del_fail_bmap); + mutex_unlock(&hdev->vport_lock); return -EBUSY; + } else if (!is_kill && test_bit(vlan_id, vport->vlan_del_fail_bmap)) { + clear_bit(vlan_id, vport->vlan_del_fail_bmap); } + mutex_unlock(&hdev->vport_lock); /* when port base vlan enabled, we use port base vlan as the vlan * filter entry. In this case, we don't update vlan filter table @@ -10470,17 +10471,22 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, } if (!ret) { - if (!is_kill) + if (!is_kill) { hclge_add_vport_vlan_table(vport, vlan_id, writen_to_tbl); - else if (is_kill && vlan_id != 0) + } else if (is_kill && vlan_id != 0) { + mutex_lock(&hdev->vport_lock); hclge_rm_vport_vlan_table(vport, vlan_id, false); + mutex_unlock(&hdev->vport_lock); + } } else if (is_kill) { /* when remove hw vlan filter failed, record the vlan id, * and try to remove it from hw later, to be consistence * with stack */ + mutex_lock(&hdev->vport_lock); set_bit(vlan_id, vport->vlan_del_fail_bmap); + mutex_unlock(&hdev->vport_lock); } hclge_set_vport_vlan_fltr_change(vport); @@ -10520,6 +10526,7 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev) int i, ret, sync_cnt = 0; u16 vlan_id; + mutex_lock(&hdev->vport_lock); /* start from vport 1 for PF is always alive */ for (i = 0; i < hdev->num_alloc_vport; i++) { struct hclge_vport *vport = &hdev->vport[i]; @@ -10530,21 +10537,26 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev) ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), vport->vport_id, vlan_id, true); - if (ret && ret != -EINVAL) + if (ret && ret != -EINVAL) { + mutex_unlock(&hdev->vport_lock); return; + } clear_bit(vlan_id, vport->vlan_del_fail_bmap); hclge_rm_vport_vlan_table(vport, vlan_id, false); hclge_set_vport_vlan_fltr_change(vport); sync_cnt++; - if (sync_cnt >= HCLGE_MAX_SYNC_COUNT) + if (sync_cnt >= HCLGE_MAX_SYNC_COUNT) { + mutex_unlock(&hdev->vport_lock); return; + } vlan_id = find_first_bit(vport->vlan_del_fail_bmap, VLAN_N_VID); } } + mutex_unlock(&hdev->vport_lock); hclge_sync_vlan_fltr_state(hdev); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index a4d68fb216fb..1c62e58ff6d8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1206,6 +1206,8 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle, test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) && is_kill) { set_bit(vlan_id, hdev->vlan_del_fail_bmap); return -EBUSY; + } else if (!is_kill && test_bit(vlan_id, hdev->vlan_del_fail_bmap)) { + clear_bit(vlan_id, hdev->vlan_del_fail_bmap); } hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, @@ -1233,20 +1235,25 @@ static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev) int ret, sync_cnt = 0; u16 vlan_id; + if (bitmap_empty(hdev->vlan_del_fail_bmap, VLAN_N_VID)) + return; + + rtnl_lock(); vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID); while (vlan_id != VLAN_N_VID) { ret = hclgevf_set_vlan_filter(handle, htons(ETH_P_8021Q), vlan_id, true); if (ret) - return; + break; clear_bit(vlan_id, hdev->vlan_del_fail_bmap); sync_cnt++; if (sync_cnt >= HCLGEVF_MAX_SYNC_COUNT) - return; + break; vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID); } + rtnl_unlock(); } static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) From ac92c0a9a0603fb448e60f38e63302e4eebb8035 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Fri, 10 Nov 2023 17:37:08 +0800 Subject: [PATCH 341/415] net: hns3: add barrier in vf mailbox reply process In hclgevf_mbx_handler() and hclgevf_get_mbx_resp() functions, there is a typical store-store and load-load scenario between received_resp and additional_info. This patch adds barrier to fix the problem. Fixes: 4671042f1ef0 ("net: hns3: add match_id to check mailbox response from PF to VF") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index bbf7b14079de..85c2a634c8f9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -63,6 +63,9 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1, i++; } + /* ensure additional_info will be seen after received_resp */ + smp_rmb(); + if (i >= HCLGEVF_MAX_TRY_TIMES) { dev_err(&hdev->pdev->dev, "VF could not get mbx(%u,%u) resp(=%d) from PF in %d tries\n", @@ -178,6 +181,10 @@ static void hclgevf_handle_mbx_response(struct hclgevf_dev *hdev, resp->resp_status = hclgevf_resp_to_errno(resp_status); memcpy(resp->additional_info, req->msg.resp_data, HCLGE_MBX_MAX_RESP_DATA_SIZE * sizeof(u8)); + + /* ensure additional_info will be seen before setting received_resp */ + smp_wmb(); + if (match_id) { /* If match_id is not zero, it means PF support match_id. * if the match_id is right, VF get the right response, or From 75b247b57d8b71bcb679e4cb37d0db104848806c Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Fri, 10 Nov 2023 17:37:09 +0800 Subject: [PATCH 342/415] net: hns3: fix incorrect capability bit display for copper port Currently, the FEC capability bit is default set for device version V2. It's incorrect for the copper port. Eventhough it doesn't make the nic work abnormal, but the capability information display in debugfs may confuse user. So clear it when driver get the port type inforamtion. Fixes: 433ccce83504 ("net: hns3: use FEC capability queried from firmware") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e22279e5d43f..c393b4ee4a32 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11663,6 +11663,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_msi_irq_uninit; if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) { + clear_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps); if (hnae3_dev_phy_imp_supported(hdev)) ret = hclge_update_tp_port_info(hdev); else From 53aba458f23846112c0d44239580ff59bc5c36c3 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Fri, 10 Nov 2023 17:37:10 +0800 Subject: [PATCH 343/415] net: hns3: fix out-of-bounds access may occur when coalesce info is read via debugfs The hns3 driver define an array of string to show the coalesce info, but if the kernel adds a new mode or a new state, out-of-bounds access may occur when coalesce info is read via debugfs, this patch fix the problem. Fixes: c99fead7cb07 ("net: hns3: add debugfs support for interrupt coalesce") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 0b138635bafa..c083d1d10767 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -503,11 +503,14 @@ static void hns3_get_coal_info(struct hns3_enet_tqp_vector *tqp_vector, } sprintf(result[j++], "%d", i); - sprintf(result[j++], "%s", dim_state_str[dim->state]); + sprintf(result[j++], "%s", dim->state < ARRAY_SIZE(dim_state_str) ? + dim_state_str[dim->state] : "unknown"); sprintf(result[j++], "%u", dim->profile_ix); - sprintf(result[j++], "%s", dim_cqe_mode_str[dim->mode]); + sprintf(result[j++], "%s", dim->mode < ARRAY_SIZE(dim_cqe_mode_str) ? + dim_cqe_mode_str[dim->mode] : "unknown"); sprintf(result[j++], "%s", - dim_tune_stat_str[dim->tune_state]); + dim->tune_state < ARRAY_SIZE(dim_tune_stat_str) ? + dim_tune_stat_str[dim->tune_state] : "unknown"); sprintf(result[j++], "%u", dim->steps_left); sprintf(result[j++], "%u", dim->steps_right); sprintf(result[j++], "%u", dim->tired); From dbd2f3b20c6ae425665b6975d766e3653d453e73 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Fri, 10 Nov 2023 17:37:11 +0800 Subject: [PATCH 344/415] net: hns3: fix variable may not initialized problem in hns3_init_mac_addr() When a VF is calling hns3_init_mac_addr(), get_mac_addr() may return fail, then the value of mac_addr_temp is not initialized. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 06117502001f..b618797a7e8d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -5139,7 +5139,7 @@ static int hns3_init_mac_addr(struct net_device *netdev) struct hns3_nic_priv *priv = netdev_priv(netdev); char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN]; struct hnae3_handle *h = priv->ae_handle; - u8 mac_addr_temp[ETH_ALEN]; + u8 mac_addr_temp[ETH_ALEN] = {0}; int ret = 0; if (h->ae_algo->ops->get_mac_addr) From 65e98bb56fa3ce2edb400930c05238c9b380500e Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Fri, 10 Nov 2023 17:37:12 +0800 Subject: [PATCH 345/415] net: hns3: fix VF reset fail issue Currently the reset process in hns3 and firmware watchdog init process is asynchronous. We think firmware watchdog initialization is completed before VF clear the interrupt source. However, firmware initialization may not complete early. So VF will receive multiple reset interrupts and fail to reset. So we add delay before VF interrupt source and 5 ms delay is enough to avoid second reset interrupt. Fixes: 427900d27d86 ("net: hns3: fix the timing issue of VF clearing interrupt sources") Signed-off-by: Jijie Shao Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 14 +++++++++++++- .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 1c62e58ff6d8..0aa9beefd1c7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1981,8 +1981,18 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev, return HCLGEVF_VECTOR0_EVENT_OTHER; } +static void hclgevf_reset_timer(struct timer_list *t) +{ + struct hclgevf_dev *hdev = from_timer(hdev, t, reset_timer); + + hclgevf_clear_event_cause(hdev, HCLGEVF_VECTOR0_EVENT_RST); + hclgevf_reset_task_schedule(hdev); +} + static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) { +#define HCLGEVF_RESET_DELAY 5 + enum hclgevf_evt_cause event_cause; struct hclgevf_dev *hdev = data; u32 clearval; @@ -1994,7 +2004,8 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) switch (event_cause) { case HCLGEVF_VECTOR0_EVENT_RST: - hclgevf_reset_task_schedule(hdev); + mod_timer(&hdev->reset_timer, + jiffies + msecs_to_jiffies(HCLGEVF_RESET_DELAY)); break; case HCLGEVF_VECTOR0_EVENT_MBX: hclgevf_mbx_handler(hdev); @@ -2937,6 +2948,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) HCLGEVF_DRIVER_NAME); hclgevf_task_schedule(hdev, round_jiffies_relative(HZ)); + timer_setup(&hdev->reset_timer, hclgevf_reset_timer, 0); return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 81c16b8c8da2..a73f2bf3a56a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -219,6 +219,7 @@ struct hclgevf_dev { enum hnae3_reset_type reset_level; unsigned long reset_pending; enum hnae3_reset_type reset_type; + struct timer_list reset_timer; #define HCLGEVF_RESET_REQUESTED 0 #define HCLGEVF_RESET_PENDING 1 From dff655e82faffc287d4a72a59f66fa120bf904e4 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Fri, 10 Nov 2023 17:37:13 +0800 Subject: [PATCH 346/415] net: hns3: fix VF wrong speed and duplex issue If PF is down, firmware will returns 10 Mbit/s rate and half-duplex mode when PF queries the port information from firmware. After imp reset command is executed, PF status changes to down, and PF will query link status and updates port information from firmware in a periodic scheduled task. However, there is a low probability that port information is updated when PF is down, and then PF link status changes to up. In this case, PF synchronizes incorrect rate and duplex mode to VF. This patch fixes it by updating port information before PF synchronizes the rate and duplex to the VF when PF changes to up. Fixes: 18b6e31f8bf4 ("net: hns3: PF add support for pushing link status to VFs") Signed-off-by: Jijie Shao Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c393b4ee4a32..5ea9e59569ef 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -61,6 +61,7 @@ static void hclge_sync_fd_table(struct hclge_dev *hdev); static void hclge_update_fec_stats(struct hclge_dev *hdev); static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret, int wait_cnt); +static int hclge_update_port_info(struct hclge_dev *hdev); static struct hnae3_ae_algo ae_algo; @@ -3041,6 +3042,9 @@ static void hclge_update_link_status(struct hclge_dev *hdev) if (state != hdev->hw.mac.link) { hdev->hw.mac.link = state; + if (state == HCLGE_LINK_STATUS_UP) + hclge_update_port_info(hdev); + client->ops->link_status_change(handle, state); hclge_config_mac_tnl_int(hdev, state); if (rclient && rclient->ops->link_status_change) From 6979a51ecaec4dfb7c768eb2a77b77df73a74c8e Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Mon, 13 Nov 2023 15:16:56 +0530 Subject: [PATCH 347/415] MAINTAINERS: add entry for TI ICSSG Ethernet driver Add record for TI Industrial Communication Subsystem - Gigabit (ICSSG) Ethernet driver. Also add Roger and myself as maintainer. Signed-off-by: MD Danish Anwar Signed-off-by: David S. Miller --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index e3acb36989f0..0443f4d9f736 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21795,6 +21795,15 @@ F: Documentation/devicetree/bindings/media/i2c/ti,ds90* F: drivers/media/i2c/ds90* F: include/media/i2c/ds90* +TI ICSSG ETHERNET DRIVER (ICSSG) +R: MD Danish Anwar +R: Roger Quadros +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/net/ti,icss*.yaml +F: drivers/net/ethernet/ti/icssg/* + TI J721E CSI2RX DRIVER M: Jai Luthra L: linux-media@vger.kernel.org From 438cbcdf105d84449fceb39a2d0e16d0ec20708f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Fri, 10 Nov 2023 13:05:46 +0100 Subject: [PATCH 348/415] net: mdio: fix typo in header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The quotes symbol in "EEE "link partner ability 1 should be at the end of the register name "EEE link partner ability 1" Signed-off-by: Marek Behún Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mdio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 8fa23bdcedbf..007fd9c3e4b6 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -420,7 +420,7 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) * A function that translates value of following registers to the linkmode: * IEEE 802.3-2018 45.2.3.10 "EEE control and capability 1" register (3.20) * IEEE 802.3-2018 45.2.7.13 "EEE advertisement 1" register (7.60) - * IEEE 802.3-2018 45.2.7.14 "EEE "link partner ability 1 register (7.61) + * IEEE 802.3-2018 45.2.7.14 "EEE link partner ability 1" register (7.61) */ static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val) { From e6daf129ccb79d3781129f623f82bc676f2cb02c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 10 Nov 2023 10:36:00 -0500 Subject: [PATCH 349/415] net: gso_test: support CONFIG_MAX_SKB_FRAGS up to 45 The test allocs a single page to hold all the frag_list skbs. This is insufficient on kernels with CONFIG_MAX_SKB_FRAGS=45, due to the increased skb_shared_info frags[] array length. gso_test_func: ASSERTION FAILED at net/core/gso_test.c:210 Expected alloc_size <= ((1UL) << 12), but alloc_size == 5075 (0x13d3) ((1UL) << 12) == 4096 (0x1000) Simplify the logic. Just allocate a page for each frag_list skb. Fixes: 4688ecb1385f ("net: expand skb_segment unit test with frag_list coverage") Signed-off-by: Willem de Bruijn Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/core/gso_test.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/core/gso_test.c b/net/core/gso_test.c index ceb684be4cbf..4c2e77bd12f4 100644 --- a/net/core/gso_test.c +++ b/net/core/gso_test.c @@ -180,18 +180,17 @@ static void gso_test_func(struct kunit *test) } if (tcase->frag_skbs) { - unsigned int total_size = 0, total_true_size = 0, alloc_size = 0; + unsigned int total_size = 0, total_true_size = 0; struct sk_buff *frag_skb, *prev = NULL; - page = alloc_page(GFP_KERNEL); - KUNIT_ASSERT_NOT_NULL(test, page); - page_ref_add(page, tcase->nr_frag_skbs - 1); - for (i = 0; i < tcase->nr_frag_skbs; i++) { unsigned int frag_size; + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + frag_size = tcase->frag_skbs[i]; - frag_skb = build_skb(page_address(page) + alloc_size, + frag_skb = build_skb(page_address(page), frag_size + shinfo_size); KUNIT_ASSERT_NOT_NULL(test, frag_skb); __skb_put(frag_skb, frag_size); @@ -204,11 +203,8 @@ static void gso_test_func(struct kunit *test) total_size += frag_size; total_true_size += frag_skb->truesize; - alloc_size += frag_size + shinfo_size; } - KUNIT_ASSERT_LE(test, alloc_size, PAGE_SIZE); - skb->len += total_size; skb->data_len += total_size; skb->truesize += total_true_size; From fb317eb23b5ee4c37b0656a9a52a3db58d9dd072 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sat, 11 Nov 2023 01:39:47 +0900 Subject: [PATCH 350/415] tipc: Fix kernel-infoleak due to uninitialized TLV value KMSAN reported the following kernel-infoleak issue: ===================================================== BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in copy_to_user_iter lib/iov_iter.c:24 [inline] BUG: KMSAN: kernel-infoleak in iterate_ubuf include/linux/iov_iter.h:29 [inline] BUG: KMSAN: kernel-infoleak in iterate_and_advance2 include/linux/iov_iter.h:245 [inline] BUG: KMSAN: kernel-infoleak in iterate_and_advance include/linux/iov_iter.h:271 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x4ec/0x2bc0 lib/iov_iter.c:186 instrument_copy_to_user include/linux/instrumented.h:114 [inline] copy_to_user_iter lib/iov_iter.c:24 [inline] iterate_ubuf include/linux/iov_iter.h:29 [inline] iterate_and_advance2 include/linux/iov_iter.h:245 [inline] iterate_and_advance include/linux/iov_iter.h:271 [inline] _copy_to_iter+0x4ec/0x2bc0 lib/iov_iter.c:186 copy_to_iter include/linux/uio.h:197 [inline] simple_copy_to_iter net/core/datagram.c:532 [inline] __skb_datagram_iter.5+0x148/0xe30 net/core/datagram.c:420 skb_copy_datagram_iter+0x52/0x210 net/core/datagram.c:546 skb_copy_datagram_msg include/linux/skbuff.h:3960 [inline] netlink_recvmsg+0x43d/0x1630 net/netlink/af_netlink.c:1967 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg net/socket.c:1066 [inline] __sys_recvfrom+0x476/0x860 net/socket.c:2246 __do_sys_recvfrom net/socket.c:2264 [inline] __se_sys_recvfrom net/socket.c:2260 [inline] __x64_sys_recvfrom+0x130/0x200 net/socket.c:2260 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook+0x103/0x9e0 mm/slab.h:768 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x5f7/0xb50 mm/slub.c:3523 kmalloc_reserve+0x13c/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x2fd/0x770 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1286 [inline] tipc_tlv_alloc net/tipc/netlink_compat.c:156 [inline] tipc_get_err_tlv+0x90/0x5d0 net/tipc/netlink_compat.c:170 tipc_nl_compat_recv+0x1042/0x15d0 net/tipc/netlink_compat.c:1324 genl_family_rcv_msg_doit net/netlink/genetlink.c:972 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1052 [inline] genl_rcv_msg+0x1220/0x12c0 net/netlink/genetlink.c:1067 netlink_rcv_skb+0x4a4/0x6a0 net/netlink/af_netlink.c:2545 genl_rcv+0x41/0x60 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline] netlink_unicast+0xf4b/0x1230 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x1242/0x1420 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x997/0xd60 net/socket.c:2588 ___sys_sendmsg+0x271/0x3b0 net/socket.c:2642 __sys_sendmsg net/socket.c:2671 [inline] __do_sys_sendmsg net/socket.c:2680 [inline] __se_sys_sendmsg net/socket.c:2678 [inline] __x64_sys_sendmsg+0x2fa/0x4a0 net/socket.c:2678 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Bytes 34-35 of 36 are uninitialized Memory access of size 36 starts at ffff88802d464a00 Data copied to user address 00007ff55033c0a0 CPU: 0 PID: 30322 Comm: syz-executor.0 Not tainted 6.6.0-14500-g1c41041124bd #10 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 ===================================================== tipc_add_tlv() puts TLV descriptor and value onto `skb`. This size is calculated with TLV_SPACE() macro. It adds the size of struct tlv_desc and the length of TLV value passed as an argument, and aligns the result to a multiple of TLV_ALIGNTO, i.e., a multiple of 4 bytes. If the size of struct tlv_desc plus the length of TLV value is not aligned, the current implementation leaves the remaining bytes uninitialized. This is the cause of the above kernel-infoleak issue. This patch resolves this issue by clearing data up to an aligned size. Fixes: d0796d1ef63d ("tipc: convert legacy nl bearer dump to nl compat") Signed-off-by: Shigeru Yoshida Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 5bc076f2fa74..c763008a8adb 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -102,6 +102,7 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) return -EMSGSIZE; skb_put(skb, TLV_SPACE(len)); + memset(tlv, 0, TLV_SPACE(len)); tlv->tlv_type = htons(type); tlv->tlv_len = htons(TLV_LENGTH(len)); if (len && data) From ca8add922f9c7f6e2e3c71039da8e0dcc64b87ed Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Sat, 11 Nov 2023 05:41:12 +0100 Subject: [PATCH 351/415] net: mvneta: fix calls to page_pool_get_stats Calling page_pool_get_stats in the mvneta driver without checks leads to kernel crashes. First the page pool is only available if the bm is not used. The page pool is also not allocated when the port is stopped. It can also be not allocated in case of errors. The current implementation leads to the following crash calling ethstats on a port that is down or when calling it at the wrong moment: ble to handle kernel NULL pointer dereference at virtual address 00000070 [00000070] *pgd=00000000 Internal error: Oops: 5 [#1] SMP ARM Hardware name: Marvell Armada 380/385 (Device Tree) PC is at page_pool_get_stats+0x18/0x1cc LR is at mvneta_ethtool_get_stats+0xa0/0xe0 [mvneta] pc : [] lr : [] psr: a0000013 sp : f1439d48 ip : f1439dc0 fp : 0000001d r10: 00000100 r9 : c4816b80 r8 : f0d75150 r7 : bf0b400c r6 : c238f000 r5 : 00000000 r4 : f1439d68 r3 : c2091040 r2 : ffffffd8 r1 : f1439d68 r0 : 00000000 Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 066b004a DAC: 00000051 Register r0 information: NULL pointer Register r1 information: 2-page vmalloc region starting at 0xf1438000 allocated at kernel_clone+0x9c/0x390 Register r2 information: non-paged memory Register r3 information: slab kmalloc-2k start c2091000 pointer offset 64 size 2048 Register r4 information: 2-page vmalloc region starting at 0xf1438000 allocated at kernel_clone+0x9c/0x390 Register r5 information: NULL pointer Register r6 information: slab kmalloc-cg-4k start c238f000 pointer offset 0 size 4096 Register r7 information: 15-page vmalloc region starting at 0xbf0a8000 allocated at load_module+0xa30/0x219c Register r8 information: 1-page vmalloc region starting at 0xf0d75000 allocated at ethtool_get_stats+0x138/0x208 Register r9 information: slab task_struct start c4816b80 pointer offset 0 Register r10 information: non-paged memory Register r11 information: non-paged memory Register r12 information: 2-page vmalloc region starting at 0xf1438000 allocated at kernel_clone+0x9c/0x390 Process snmpd (pid: 733, stack limit = 0x38de3a88) Stack: (0xf1439d48 to 0xf143a000) 9d40: 000000c0 00000001 c238f000 bf0b400c f0d75150 c4816b80 9d60: 00000100 bf0a98d8 00000000 00000000 00000000 00000000 00000000 00000000 9d80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9da0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9dc0: 00000dc0 5335509c 00000035 c238f000 bf0b2214 01067f50 f0d75000 c0b9b9c8 9de0: 0000001d 00000035 c2212094 5335509c c4816b80 c238f000 c5ad6e00 01067f50 9e00: c1b0be80 c4816b80 00014813 c0b9d7f0 00000000 00000000 0000001d 0000001d 9e20: 00000000 00001200 00000000 00000000 c216ed90 c73943b8 00000000 00000000 9e40: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9e60: 00000000 c0ad9034 00000000 00000000 00000000 00000000 00000000 00000000 9e80: 00000000 00000000 00000000 5335509c c1b0be80 f1439ee4 00008946 c1b0be80 9ea0: 01067f50 f1439ee3 00000000 00000046 b6d77ae0 c0b383f0 00008946 becc83e8 9ec0: c1b0be80 00000051 0000000b c68ca480 c7172d00 c0ad8ff0 f1439ee3 cf600e40 9ee0: 01600e40 32687465 00000000 00000000 00000000 01067f50 00000000 00000000 9f00: 00000000 5335509c 00008946 00008946 00000000 c68ca480 becc83e8 c05e2de0 9f20: f1439fb0 c03002f0 00000006 5ac3c35a c4816b80 00000006 b6d77ae0 c030caf0 9f40: c4817350 00000014 f1439e1c 0000000c 00000000 00000051 01000000 00000014 9f60: 00003fec f1439edc 00000001 c0372abc b6d77ae0 c0372abc cf600e40 5335509c 9f80: c21e6800 01015c9c 0000000b 00008946 00000036 c03002f0 c4816b80 00000036 9fa0: b6d77ae0 c03000c0 01015c9c 0000000b 0000000b 00008946 becc83e8 00000000 9fc0: 01015c9c 0000000b 00008946 00000036 00000035 010678a0 b6d797ec b6d77ae0 9fe0: b6dbf738 becc838c b6d186d7 b6baa858 40000030 0000000b 00000000 00000000 page_pool_get_stats from mvneta_ethtool_get_stats+0xa0/0xe0 [mvneta] mvneta_ethtool_get_stats [mvneta] from ethtool_get_stats+0x154/0x208 ethtool_get_stats from dev_ethtool+0xf48/0x2480 dev_ethtool from dev_ioctl+0x538/0x63c dev_ioctl from sock_ioctl+0x49c/0x53c sock_ioctl from sys_ioctl+0x134/0xbd8 sys_ioctl from ret_fast_syscall+0x0/0x1c Exception stack(0xf1439fa8 to 0xf1439ff0) 9fa0: 01015c9c 0000000b 0000000b 00008946 becc83e8 00000000 9fc0: 01015c9c 0000000b 00008946 00000036 00000035 010678a0 b6d797ec b6d77ae0 9fe0: b6dbf738 becc838c b6d186d7 b6baa858 Code: e28dd004 e1a05000 e2514000 0a00006a (e5902070) This commit adds the proper checks before calling page_pool_get_stats. Fixes: b3fc79225f05 ("net: mvneta: add support for page_pool_get_stats") Signed-off-by: Sven Auhagen Reported-by: Paulo Da Silva Acked-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 28 +++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 90817136808d..29aac327574d 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4790,14 +4790,17 @@ static void mvneta_ethtool_get_strings(struct net_device *netdev, u32 sset, u8 *data) { if (sset == ETH_SS_STATS) { + struct mvneta_port *pp = netdev_priv(netdev); int i; for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++) memcpy(data + i * ETH_GSTRING_LEN, mvneta_statistics[i].name, ETH_GSTRING_LEN); - data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics); - page_pool_ethtool_stats_get_strings(data); + if (!pp->bm_priv) { + data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics); + page_pool_ethtool_stats_get_strings(data); + } } } @@ -4915,8 +4918,10 @@ static void mvneta_ethtool_pp_stats(struct mvneta_port *pp, u64 *data) struct page_pool_stats stats = {}; int i; - for (i = 0; i < rxq_number; i++) - page_pool_get_stats(pp->rxqs[i].page_pool, &stats); + for (i = 0; i < rxq_number; i++) { + if (pp->rxqs[i].page_pool) + page_pool_get_stats(pp->rxqs[i].page_pool, &stats); + } page_pool_ethtool_stats_get(data, &stats); } @@ -4932,14 +4937,21 @@ static void mvneta_ethtool_get_stats(struct net_device *dev, for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++) *data++ = pp->ethtool_stats[i]; - mvneta_ethtool_pp_stats(pp, data); + if (!pp->bm_priv) + mvneta_ethtool_pp_stats(pp, data); } static int mvneta_ethtool_get_sset_count(struct net_device *dev, int sset) { - if (sset == ETH_SS_STATS) - return ARRAY_SIZE(mvneta_statistics) + - page_pool_ethtool_stats_get_count(); + if (sset == ETH_SS_STATS) { + int count = ARRAY_SIZE(mvneta_statistics); + struct mvneta_port *pp = netdev_priv(dev); + + if (!pp->bm_priv) + count += page_pool_ethtool_stats_get_count(); + + return count; + } return -EOPNOTSUPP; } From c0a2a1b0d631fc460d830f52d06211838874d655 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 12 Nov 2023 22:16:32 -0500 Subject: [PATCH 352/415] ppp: limit MRU to 64K ppp_sync_ioctl allows setting device MRU, but does not sanity check this input. Limit to a sane upper bound of 64KB. No implementation I could find generates larger than 64KB frames. RFC 2823 mentions an upper bound of PPP over SDL of 64KB based on the 16-bit length field. Other protocols will be smaller, such as PPPoE (9KB jumbo frame) and PPPoA (18190 maximum CPCS-SDU size, RFC 2364). PPTP and L2TP encapsulate in IP. Syzbot managed to trigger alloc warning in __alloc_pages: if (WARN_ON_ONCE_GFP(order > MAX_ORDER, gfp)) WARNING: CPU: 1 PID: 37 at mm/page_alloc.c:4544 __alloc_pages+0x3ab/0x4a0 mm/page_alloc.c:4544 __alloc_skb+0x12b/0x330 net/core/skbuff.c:651 __netdev_alloc_skb+0x72/0x3f0 net/core/skbuff.c:715 netdev_alloc_skb include/linux/skbuff.h:3225 [inline] dev_alloc_skb include/linux/skbuff.h:3238 [inline] ppp_sync_input drivers/net/ppp/ppp_synctty.c:669 [inline] ppp_sync_receive+0xff/0x680 drivers/net/ppp/ppp_synctty.c:334 tty_ldisc_receive_buf+0x14c/0x180 drivers/tty/tty_buffer.c:390 tty_port_default_receive_buf+0x70/0xb0 drivers/tty/tty_port.c:37 receive_buf drivers/tty/tty_buffer.c:444 [inline] flush_to_ldisc+0x261/0x780 drivers/tty/tty_buffer.c:494 process_one_work+0x884/0x15c0 kernel/workqueue.c:2630 With call ioctl$PPPIOCSMRU1(r1, 0x40047452, &(0x7f0000000100)=0x5e6417a8) Similar code exists in other drivers that implement ppp_channel_ops ioctl PPPIOCSMRU. Those might also be in scope. Notably excluded from this are pppol2tp_ioctl and pppoe_ioctl. This code goes back to the start of git history. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+6177e1f90d92583bcc58@syzkaller.appspotmail.com Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_synctty.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index ea261a628786..52d05ce4a281 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -453,6 +453,10 @@ ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) case PPPIOCSMRU: if (get_user(val, (int __user *) argp)) break; + if (val > U16_MAX) { + err = -EINVAL; + break; + } if (val < PPP_MRU) val = PPP_MRU; ap->mru = val; From 47d970204054f859f35a2237baa75c2d84fcf436 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 25 Sep 2023 17:54:13 +0200 Subject: [PATCH 353/415] xen/events: fix delayed eoi list handling When delaying eoi handling of events, the related elements are queued into the percpu lateeoi list. In case the list isn't empty, the elements should be sorted by the time when eoi handling is to happen. Unfortunately a new element will never be queued at the start of the list, even if it has a handling time lower than all other list elements. Fix that by handling that case the same way as for an empty list. Fixes: e99502f76271 ("xen/events: defer eoi in case of excessive number of events") Reported-by: Jan Beulich Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 237e6b884f72..cd33a418344a 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -604,7 +604,9 @@ static void lateeoi_list_add(struct irq_info *info) spin_lock_irqsave(&eoi->eoi_list_lock, flags); - if (list_empty(&eoi->eoi_list)) { + elem = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, + eoi_list); + if (!elem || info->eoi_time < elem->eoi_time) { list_add(&info->eoi_list, &eoi->eoi_list); mod_delayed_work_on(info->eoi_cpu, system_wq, &eoi->delayed, delay); From f96c6c588ca81255566a5168e51c9cbbe7b86def Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 26 Sep 2023 14:29:54 +0200 Subject: [PATCH 354/415] xen/events: remove unused functions There are no users of xen_irq_from_pirq() and xen_set_irq_pending(). Remove those functions. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 30 ------------------------------ include/xen/events.h | 4 ---- 2 files changed, 34 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index cd33a418344a..c5d86128eb73 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1169,29 +1169,6 @@ int xen_destroy_irq(int irq) return rc; } -int xen_irq_from_pirq(unsigned pirq) -{ - int irq; - - struct irq_info *info; - - mutex_lock(&irq_mapping_update_lock); - - list_for_each_entry(info, &xen_irq_list_head, list) { - if (info->type != IRQT_PIRQ) - continue; - irq = info->irq; - if (info->u.pirq.pirq == pirq) - goto out; - } - irq = -1; -out: - mutex_unlock(&irq_mapping_update_lock); - - return irq; -} - - int xen_pirq_from_irq(unsigned irq) { return pirq_from_irq(irq); @@ -2031,13 +2008,6 @@ void xen_clear_irq_pending(int irq) event_handler_exit(info); } EXPORT_SYMBOL(xen_clear_irq_pending); -void xen_set_irq_pending(int irq) -{ - evtchn_port_t evtchn = evtchn_from_irq(irq); - - if (VALID_EVTCHN(evtchn)) - set_evtchn(evtchn); -} bool xen_test_irq_pending(int irq) { diff --git a/include/xen/events.h b/include/xen/events.h index 23932b0673dc..a129cafa80ed 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -88,7 +88,6 @@ void xen_irq_resume(void); /* Clear an irq's pending state, in preparation for polling on it */ void xen_clear_irq_pending(int irq); -void xen_set_irq_pending(int irq); bool xen_test_irq_pending(int irq); /* Poll waiting for an irq to become pending. In the usual case, the @@ -122,9 +121,6 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, /* De-allocates the above mentioned physical interrupt. */ int xen_destroy_irq(int irq); -/* Return irq from pirq */ -int xen_irq_from_pirq(unsigned pirq); - /* Return the pirq allocated to the irq. */ int xen_pirq_from_irq(unsigned irq); From 58c09cad1754c56cb000ef07477e8781e3fad4d3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 13 Nov 2023 08:52:24 -0800 Subject: [PATCH 355/415] drm/ci: make github dependabot happy again The drm CI scripts for gitlab have a requirements file that makes the github 'dependabot' worry about a few of the required tooling versions. It wants to update the pip requirements from 23.2.1 to 23.3: "When installing a package from a Mercurial VCS URL, e.g. pip install hg+..., with pip prior to v23.3, the specified Mercurial revision could be used to inject arbitrary configuration options to the hg clone call (e.g. --config). Controlling the Mercurial configuration can modify how and which repository is installed. This vulnerability does not affect users who aren't installing from Mercurial" and upgrade the urllib3 requirements from 2.0.4 to 2.0.7 due to two issues: "urllib3's request body not stripped after redirect from 303 status changes request method to GET" "`Cookie` HTTP header isn't stripped on cross-origin redirects" The file also ends up not having a newline at the end, that my editor ends up wanting to fix automatically. Link: https://github.com/dependabot Tested-by: Helen Koike Signed-off-by: Linus Torvalds --- drivers/gpu/drm/ci/xfails/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ci/xfails/requirements.txt b/drivers/gpu/drm/ci/xfails/requirements.txt index d8856d1581fd..e9994c9db799 100644 --- a/drivers/gpu/drm/ci/xfails/requirements.txt +++ b/drivers/gpu/drm/ci/xfails/requirements.txt @@ -5,7 +5,7 @@ termcolor==2.3.0 certifi==2023.7.22 charset-normalizer==3.2.0 idna==3.4 -pip==23.2.1 +pip==23.3 python-gitlab==3.15.0 requests==2.31.0 requests-toolbelt==1.0.0 @@ -13,5 +13,5 @@ ruamel.yaml==0.17.32 ruamel.yaml.clib==0.2.7 setuptools==68.0.0 tenacity==8.2.3 -urllib3==2.0.4 -wheel==0.41.1 \ No newline at end of file +urllib3==2.0.7 +wheel==0.41.1 From 7a1aba89ac54ccf6cad23a91a34c0ab24b1d7997 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 13 Oct 2023 12:25:10 +0200 Subject: [PATCH 356/415] ice: dpll: fix initial lock status of dpll When dpll device is registered and dpll subsystem performs notify of a new device, the lock state value provided to dpll subsystem equals 0 which is invalid value for the `enum dpll_lock_status`. Provide correct value by obtaining it from firmware before registering the dpll device. Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu") Signed-off-by: Aleksandr Loktionov Signed-off-by: Arkadiusz Kubalewski Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dpll.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 835c419ccc74..607f534055b6 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -1756,6 +1756,7 @@ ice_dpll_init_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu, } d->pf = pf; if (cgu) { + ice_dpll_update_state(pf, d, true); ret = dpll_device_register(d->dpll, type, &ice_dpll_ops, d); if (ret) { dpll_device_put(d->dpll); @@ -1796,8 +1797,6 @@ static int ice_dpll_init_worker(struct ice_pf *pf) struct ice_dplls *d = &pf->dplls; struct kthread_worker *kworker; - ice_dpll_update_state(pf, &d->eec, true); - ice_dpll_update_state(pf, &d->pps, true); kthread_init_delayed_work(&d->work, ice_dpll_periodic_work); kworker = kthread_create_worker(0, "ice-dplls-%s", dev_name(ice_pf_to_dev(pf))); From 4a4027f25dc3f39c2aafb3bf8926125c5378c9dc Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Tue, 31 Oct 2023 18:06:54 +0100 Subject: [PATCH 357/415] ice: dpll: fix check for dpll input priority range Supported priority value for input pins may differ with regard of NIC firmware version. E810T NICs with 3.20/4.00 FW versions would accept priority range 0-31, where firmware 4.10+ would support the range 0-9 and extra value of 255. Remove the in-range check as the driver has no information on supported values from the running firmware, let firmware decide if given value is correct and return extack error if the value is not supported. Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu") Reviewed-by: Przemek Kitszel Reviewed-by: Jacob Keller Signed-off-by: Arkadiusz Kubalewski Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dpll.c | 6 ------ drivers/net/ethernet/intel/ice/ice_dpll.h | 1 - 2 files changed, 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 607f534055b6..831ba6683962 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -815,12 +815,6 @@ ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv, struct ice_pf *pf = d->pf; int ret; - if (prio > ICE_DPLL_PRIO_MAX) { - NL_SET_ERR_MSG_FMT(extack, "prio out of supported range 0-%d", - ICE_DPLL_PRIO_MAX); - return -EINVAL; - } - mutex_lock(&pf->dplls.lock); ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack); mutex_unlock(&pf->dplls.lock); diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.h b/drivers/net/ethernet/intel/ice/ice_dpll.h index bb32b6d88373..93172e93995b 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.h +++ b/drivers/net/ethernet/intel/ice/ice_dpll.h @@ -6,7 +6,6 @@ #include "ice.h" -#define ICE_DPLL_PRIO_MAX 0xF #define ICE_DPLL_RCLK_NUM_MAX 4 /** ice_dpll_pin - store info about pins From 6db5f2cd9ebb12e930a82c01714a6589576cd50f Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Tue, 31 Oct 2023 18:08:00 +0100 Subject: [PATCH 358/415] ice: dpll: fix output pin capabilities The dpll output pins which are used to feed clock signal of PHY and MAC circuits cannot be disconnected, those integrated circuits require clock signal for operation. By stopping assignment of DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE pin capability, prevent the user from invoking the state set callback on those pins, setting the state on those pins already returns error, as firmware doesn't allow the change of their state. Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu") Fixes: 8a3a565ff210 ("ice: add admin commands to access cgu configuration") Reviewed-by: Andrii Staikov Signed-off-by: Arkadiusz Kubalewski Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dpll.c | 12 +++-- drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 54 +++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_ptp_hw.h | 2 + 3 files changed, 64 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 831ba6683962..86b180cb32a0 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -1823,6 +1823,7 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf, int num_pins, i, ret = -EINVAL; struct ice_hw *hw = &pf->hw; struct ice_dpll_pin *pins; + unsigned long caps; u8 freq_supp_num; bool input; @@ -1842,6 +1843,7 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf, } for (i = 0; i < num_pins; i++) { + caps = 0; pins[i].idx = i; pins[i].prop.board_label = ice_cgu_get_pin_name(hw, i, input); pins[i].prop.type = ice_cgu_get_pin_type(hw, i, input); @@ -1854,8 +1856,8 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf, &dp->input_prio[i]); if (ret) return ret; - pins[i].prop.capabilities |= - DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE; + caps |= (DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE | + DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE); pins[i].prop.phase_range.min = pf->dplls.input_phase_adj_max; pins[i].prop.phase_range.max = @@ -1865,9 +1867,11 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf, pf->dplls.output_phase_adj_max; pins[i].prop.phase_range.max = -pf->dplls.output_phase_adj_max; + ret = ice_cgu_get_output_pin_state_caps(hw, i, &caps); + if (ret) + return ret; } - pins[i].prop.capabilities |= - DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE; + pins[i].prop.capabilities = caps; ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL); if (ret) return ret; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 6d573908de7a..a00b55e14aac 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -3961,3 +3961,57 @@ int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num) return ret; } + +/** + * ice_cgu_get_output_pin_state_caps - get output pin state capabilities + * @hw: pointer to the hw struct + * @pin_id: id of a pin + * @caps: capabilities to modify + * + * Return: + * * 0 - success, state capabilities were modified + * * negative - failure, capabilities were not modified + */ +int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id, + unsigned long *caps) +{ + bool can_change = true; + + switch (hw->device_id) { + case ICE_DEV_ID_E810C_SFP: + if (pin_id == ZL_OUT2 || pin_id == ZL_OUT3) + can_change = false; + break; + case ICE_DEV_ID_E810C_QSFP: + if (pin_id == ZL_OUT2 || pin_id == ZL_OUT3 || pin_id == ZL_OUT4) + can_change = false; + break; + case ICE_DEV_ID_E823L_10G_BASE_T: + case ICE_DEV_ID_E823L_1GBE: + case ICE_DEV_ID_E823L_BACKPLANE: + case ICE_DEV_ID_E823L_QSFP: + case ICE_DEV_ID_E823L_SFP: + case ICE_DEV_ID_E823C_10G_BASE_T: + case ICE_DEV_ID_E823C_BACKPLANE: + case ICE_DEV_ID_E823C_QSFP: + case ICE_DEV_ID_E823C_SFP: + case ICE_DEV_ID_E823C_SGMII: + if (hw->cgu_part_number == + ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032 && + pin_id == ZL_OUT2) + can_change = false; + else if (hw->cgu_part_number == + ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384 && + pin_id == SI_OUT1) + can_change = false; + break; + default: + return -EINVAL; + } + if (can_change) + *caps |= DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE; + else + *caps &= ~DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE; + + return 0; +} diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 36aeeef99ec0..cf76701566c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -282,6 +282,8 @@ int ice_get_cgu_state(struct ice_hw *hw, u8 dpll_idx, int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num); void ice_ptp_init_phy_model(struct ice_hw *hw); +int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id, + unsigned long *caps); #define PFTSYN_SEM_BYTES 4 From a778616e4cc2d5e3a253c7d8959aafa5218fc5e4 Mon Sep 17 00:00:00 2001 From: Dan Nowlin Date: Tue, 7 Nov 2023 12:32:27 -0500 Subject: [PATCH 359/415] ice: fix DDP package download for packages without signature segment Commit 3cbdb0343022 ("ice: Add support for E830 DDP package segment") incorrectly removed support for package download for packages without a signature segment. These packages include the signature buffer inline in the configurations buffers, and not in a signature segment. Fix package download by providing download support for both packages with (ice_download_pkg_with_sig_seg()) and without signature segment (ice_download_pkg_without_sig_seg()). Fixes: 3cbdb0343022 ("ice: Add support for E830 DDP package segment") Reported-by: Maciej Fijalkowski Closes: https://lore.kernel.org/netdev/ZUT50a94kk2pMGKb@boxer/ Tested-by: Maciej Fijalkowski Reviewed-by: Wojciech Drewek Reviewed-by: Jacob Keller Signed-off-by: Dan Nowlin Signed-off-by: Paul Greenwalt Reviewed-by: Simon Horman Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ddp.c | 103 ++++++++++++++++++++++- 1 file changed, 100 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index cfb1580f5850..8b7504a9df31 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -1479,14 +1479,14 @@ ice_post_dwnld_pkg_actions(struct ice_hw *hw) } /** - * ice_download_pkg + * ice_download_pkg_with_sig_seg * @hw: pointer to the hardware structure * @pkg_hdr: pointer to package header * * Handles the download of a complete package. */ static enum ice_ddp_state -ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr) +ice_download_pkg_with_sig_seg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr) { enum ice_aq_err aq_err = hw->adminq.sq_last_status; enum ice_ddp_state state = ICE_DDP_PKG_ERR; @@ -1519,6 +1519,103 @@ ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr) state = ice_post_dwnld_pkg_actions(hw); ice_release_global_cfg_lock(hw); + + return state; +} + +/** + * ice_dwnld_cfg_bufs + * @hw: pointer to the hardware structure + * @bufs: pointer to an array of buffers + * @count: the number of buffers in the array + * + * Obtains global config lock and downloads the package configuration buffers + * to the firmware. + */ +static enum ice_ddp_state +ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count) +{ + enum ice_ddp_state state; + struct ice_buf_hdr *bh; + int status; + + if (!bufs || !count) + return ICE_DDP_PKG_ERR; + + /* If the first buffer's first section has its metadata bit set + * then there are no buffers to be downloaded, and the operation is + * considered a success. + */ + bh = (struct ice_buf_hdr *)bufs; + if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF) + return ICE_DDP_PKG_SUCCESS; + + status = ice_acquire_global_cfg_lock(hw, ICE_RES_WRITE); + if (status) { + if (status == -EALREADY) + return ICE_DDP_PKG_ALREADY_LOADED; + return ice_map_aq_err_to_ddp_state(hw->adminq.sq_last_status); + } + + state = ice_dwnld_cfg_bufs_no_lock(hw, bufs, 0, count, true); + if (!state) + state = ice_post_dwnld_pkg_actions(hw); + + ice_release_global_cfg_lock(hw); + + return state; +} + +/** + * ice_download_pkg_without_sig_seg + * @hw: pointer to the hardware structure + * @ice_seg: pointer to the segment of the package to be downloaded + * + * Handles the download of a complete package without signature segment. + */ +static enum ice_ddp_state +ice_download_pkg_without_sig_seg(struct ice_hw *hw, struct ice_seg *ice_seg) +{ + struct ice_buf_table *ice_buf_tbl; + + ice_debug(hw, ICE_DBG_PKG, "Segment format version: %d.%d.%d.%d\n", + ice_seg->hdr.seg_format_ver.major, + ice_seg->hdr.seg_format_ver.minor, + ice_seg->hdr.seg_format_ver.update, + ice_seg->hdr.seg_format_ver.draft); + + ice_debug(hw, ICE_DBG_PKG, "Seg: type 0x%X, size %d, name %s\n", + le32_to_cpu(ice_seg->hdr.seg_type), + le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_id); + + ice_buf_tbl = ice_find_buf_table(ice_seg); + + ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n", + le32_to_cpu(ice_buf_tbl->buf_count)); + + return ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array, + le32_to_cpu(ice_buf_tbl->buf_count)); +} + +/** + * ice_download_pkg + * @hw: pointer to the hardware structure + * @pkg_hdr: pointer to package header + * @ice_seg: pointer to the segment of the package to be downloaded + * + * Handles the download of a complete package. + */ +static enum ice_ddp_state +ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr, + struct ice_seg *ice_seg) +{ + enum ice_ddp_state state; + + if (hw->pkg_has_signing_seg) + state = ice_download_pkg_with_sig_seg(hw, pkg_hdr); + else + state = ice_download_pkg_without_sig_seg(hw, ice_seg); + ice_post_pkg_dwnld_vlan_mode_cfg(hw); return state; @@ -2083,7 +2180,7 @@ enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len) /* initialize package hints and then download package */ ice_init_pkg_hints(hw, seg); - state = ice_download_pkg(hw, pkg); + state = ice_download_pkg(hw, pkg, seg); if (state == ICE_DDP_PKG_ALREADY_LOADED) { ice_debug(hw, ICE_DBG_INIT, "package previously loaded - no work.\n"); From 4b3812d90b2c93723adf4b6ce99240d301f7d5f9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 13 Nov 2023 20:49:39 -0800 Subject: [PATCH 360/415] Revert "ptp: Fixes a null pointer dereference in ptp_ioctl" This reverts commit 8a4f030dbced6fc255cbe67b2d0a129947e18493. Richard says: The test itself is harmless, but keeping it will make people think, "oh this pointer can be invalid." In fact the core stack ensures that ioctl() can't be invoked after release(), otherwise Bad Stuff happens. Fixes: 8a4f030dbced ("ptp: Fixes a null pointer dereference in ptp_ioctl") Link: https://lore.kernel.org/all/ZVAf_qdRfDAQYUt-@hoboy.vegasvil.org/ Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_chardev.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index e95a6ed130ef..3f7a74788802 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -176,8 +176,6 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, int enable, err = 0; tsevq = pccontext->private_clkdata; - if (!tsevq) - return -EINVAL; switch (cmd) { From 73bde5a3294853947252cd9092a3517c7cb0cd2d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Nov 2023 17:48:59 +0000 Subject: [PATCH 361/415] ptp: annotate data-race around q->head and q->tail As I was working on a syzbot report, I found that KCSAN would probably complain that reading q->head or q->tail without barriers could lead to invalid results. Add corresponding READ_ONCE() and WRITE_ONCE() to avoid load-store tearing. Fixes: d94ba80ebbea ("ptp: Added a brand new class driver for ptp clocks.") Signed-off-by: Eric Dumazet Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20231109174859.3995880-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_chardev.c | 3 ++- drivers/ptp/ptp_clock.c | 5 +++-- drivers/ptp/ptp_private.h | 8 ++++++-- drivers/ptp/ptp_sysfs.c | 3 ++- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 3f7a74788802..7513018c9f9a 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -572,7 +572,8 @@ ssize_t ptp_read(struct posix_clock_context *pccontext, uint rdflags, for (i = 0; i < cnt; i++) { event[i] = queue->buf[queue->head]; - queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS; + /* Paired with READ_ONCE() in queue_cnt() */ + WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS); } spin_unlock_irqrestore(&queue->lock, flags); diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 3134568af622..15b804ba4868 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -57,10 +57,11 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue, dst->t.sec = seconds; dst->t.nsec = remainder; + /* Both WRITE_ONCE() are paired with READ_ONCE() in queue_cnt() */ if (!queue_free(queue)) - queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS; + WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS); - queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS; + WRITE_ONCE(queue->tail, (queue->tail + 1) % PTP_MAX_TIMESTAMPS); spin_unlock_irqrestore(&queue->lock, flags); } diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 35fde0a05746..45f9002a5dca 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -85,9 +85,13 @@ struct ptp_vclock { * that a writer might concurrently increment the tail does not * matter, since the queue remains nonempty nonetheless. */ -static inline int queue_cnt(struct timestamp_event_queue *q) +static inline int queue_cnt(const struct timestamp_event_queue *q) { - int cnt = q->tail - q->head; + /* + * Paired with WRITE_ONCE() in enqueue_external_timestamp(), + * ptp_read(), extts_fifo_show(). + */ + int cnt = READ_ONCE(q->tail) - READ_ONCE(q->head); return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt; } diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index 7d023d9d0acb..f7a499a1bd39 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -94,7 +94,8 @@ static ssize_t extts_fifo_show(struct device *dev, qcnt = queue_cnt(queue); if (qcnt) { event = queue->buf[queue->head]; - queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS; + /* Paired with READ_ONCE() in queue_cnt() */ + WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS); } spin_unlock_irqrestore(&queue->lock, flags); From 3cffa2ddc4d3fcf70cde361236f5a614f81a09b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Nov 2023 18:01:02 +0000 Subject: [PATCH 362/415] bonding: stop the device in bond_setup_by_slave() Commit 9eed321cde22 ("net: lapbether: only support ethernet devices") has been able to keep syzbot away from net/lapb, until today. In the following splat [1], the issue is that a lapbether device has been created on a bonding device without members. Then adding a non ARPHRD_ETHER member forced the bonding master to change its type. The fix is to make sure we call dev_close() in bond_setup_by_slave() so that the potential linked lapbether devices (or any other devices having assumptions on the physical device) are removed. A similar bug has been addressed in commit 40baec225765 ("bonding: fix panic on non-ARPHRD_ETHER enslave failure") [1] skbuff: skb_under_panic: text:ffff800089508810 len:44 put:40 head:ffff0000c78e7c00 data:ffff0000c78e7bea tail:0x16 end:0x140 dev:bond0 kernel BUG at net/core/skbuff.c:192 ! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 6007 Comm: syz-executor383 Not tainted 6.6.0-rc3-syzkaller-gbf6547d8715b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_panic net/core/skbuff.c:188 [inline] pc : skb_under_panic+0x13c/0x140 net/core/skbuff.c:202 lr : skb_panic net/core/skbuff.c:188 [inline] lr : skb_under_panic+0x13c/0x140 net/core/skbuff.c:202 sp : ffff800096a06aa0 x29: ffff800096a06ab0 x28: ffff800096a06ba0 x27: dfff800000000000 x26: ffff0000ce9b9b50 x25: 0000000000000016 x24: ffff0000c78e7bea x23: ffff0000c78e7c00 x22: 000000000000002c x21: 0000000000000140 x20: 0000000000000028 x19: ffff800089508810 x18: ffff800096a06100 x17: 0000000000000000 x16: ffff80008a629a3c x15: 0000000000000001 x14: 1fffe00036837a32 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000201 x10: 0000000000000000 x9 : cb50b496c519aa00 x8 : cb50b496c519aa00 x7 : 0000000000000001 x6 : 0000000000000001 x5 : ffff800096a063b8 x4 : ffff80008e280f80 x3 : ffff8000805ad11c x2 : 0000000000000001 x1 : 0000000100000201 x0 : 0000000000000086 Call trace: skb_panic net/core/skbuff.c:188 [inline] skb_under_panic+0x13c/0x140 net/core/skbuff.c:202 skb_push+0xf0/0x108 net/core/skbuff.c:2446 ip6gre_header+0xbc/0x738 net/ipv6/ip6_gre.c:1384 dev_hard_header include/linux/netdevice.h:3136 [inline] lapbeth_data_transmit+0x1c4/0x298 drivers/net/wan/lapbether.c:257 lapb_data_transmit+0x8c/0xb0 net/lapb/lapb_iface.c:447 lapb_transmit_buffer+0x178/0x204 net/lapb/lapb_out.c:149 lapb_send_control+0x220/0x320 net/lapb/lapb_subr.c:251 __lapb_disconnect_request+0x9c/0x17c net/lapb/lapb_iface.c:326 lapb_device_event+0x288/0x4e0 net/lapb/lapb_iface.c:492 notifier_call_chain+0x1a4/0x510 kernel/notifier.c:93 raw_notifier_call_chain+0x3c/0x50 kernel/notifier.c:461 call_netdevice_notifiers_info net/core/dev.c:1970 [inline] call_netdevice_notifiers_extack net/core/dev.c:2008 [inline] call_netdevice_notifiers net/core/dev.c:2022 [inline] __dev_close_many+0x1b8/0x3c4 net/core/dev.c:1508 dev_close_many+0x1e0/0x470 net/core/dev.c:1559 dev_close+0x174/0x250 net/core/dev.c:1585 lapbeth_device_event+0x2e4/0x958 drivers/net/wan/lapbether.c:466 notifier_call_chain+0x1a4/0x510 kernel/notifier.c:93 raw_notifier_call_chain+0x3c/0x50 kernel/notifier.c:461 call_netdevice_notifiers_info net/core/dev.c:1970 [inline] call_netdevice_notifiers_extack net/core/dev.c:2008 [inline] call_netdevice_notifiers net/core/dev.c:2022 [inline] __dev_close_many+0x1b8/0x3c4 net/core/dev.c:1508 dev_close_many+0x1e0/0x470 net/core/dev.c:1559 dev_close+0x174/0x250 net/core/dev.c:1585 bond_enslave+0x2298/0x30cc drivers/net/bonding/bond_main.c:2332 bond_do_ioctl+0x268/0xc64 drivers/net/bonding/bond_main.c:4539 dev_ifsioc+0x754/0x9ac dev_ioctl+0x4d8/0xd34 net/core/dev_ioctl.c:786 sock_do_ioctl+0x1d4/0x2d0 net/socket.c:1217 sock_ioctl+0x4e8/0x834 net/socket.c:1322 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __arm64_sys_ioctl+0x14c/0x1c8 fs/ioctl.c:857 __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155 el0_svc+0x58/0x16c arch/arm64/kernel/entry-common.c:678 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:696 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591 Code: aa1803e6 aa1903e7 a90023f5 94785b8b (d4210000) Fixes: 872254dd6b1f ("net/bonding: Enable bonding to enslave non ARPHRD_ETHER") Reported-by: syzbot Signed-off-by: Eric Dumazet Acked-by: Jay Vosburgh Reviewed-by: Hangbin Liu Link: https://lore.kernel.org/r/20231109180102.4085183-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 51d47eda1c87..8e6cc0e133b7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1500,6 +1500,10 @@ static void bond_compute_features(struct bonding *bond) static void bond_setup_by_slave(struct net_device *bond_dev, struct net_device *slave_dev) { + bool was_up = !!(bond_dev->flags & IFF_UP); + + dev_close(bond_dev); + bond_dev->header_ops = slave_dev->header_ops; bond_dev->type = slave_dev->type; @@ -1514,6 +1518,8 @@ static void bond_setup_by_slave(struct net_device *bond_dev, bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST); bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP); } + if (was_up) + dev_open(bond_dev, NULL); } /* On bonding slaves other than the currently active slave, suppress From 510e35fb931ffc3b100e5d5ae4595cd3beca9f1a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 9 Nov 2023 10:03:12 +0100 Subject: [PATCH 363/415] net: ethernet: cortina: Fix max RX frame define Enumerator 3 is 1548 bytes according to the datasheet. Not 1542. Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") Reviewed-by: Andrew Lunn Signed-off-by: Linus Walleij Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20231109-gemini-largeframe-fix-v4-1-6e611528db08@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cortina/gemini.c | 4 ++-- drivers/net/ethernet/cortina/gemini.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 5423fe26b4ef..562b0917316a 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -432,8 +432,8 @@ static const struct gmac_max_framelen gmac_maxlens[] = { .val = CONFIG0_MAXLEN_1536, }, { - .max_l3_len = 1542, - .val = CONFIG0_MAXLEN_1542, + .max_l3_len = 1548, + .val = CONFIG0_MAXLEN_1548, }, { .max_l3_len = 9212, diff --git a/drivers/net/ethernet/cortina/gemini.h b/drivers/net/ethernet/cortina/gemini.h index 9fdf77d5eb37..99efb1155743 100644 --- a/drivers/net/ethernet/cortina/gemini.h +++ b/drivers/net/ethernet/cortina/gemini.h @@ -787,7 +787,7 @@ union gmac_config0 { #define CONFIG0_MAXLEN_1536 0 #define CONFIG0_MAXLEN_1518 1 #define CONFIG0_MAXLEN_1522 2 -#define CONFIG0_MAXLEN_1542 3 +#define CONFIG0_MAXLEN_1548 3 #define CONFIG0_MAXLEN_9k 4 /* 9212 */ #define CONFIG0_MAXLEN_10k 5 /* 10236 */ #define CONFIG0_MAXLEN_1518__6 6 From d4d0c5b4d279bfe3585fbd806efefd3e51c82afa Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 9 Nov 2023 10:03:13 +0100 Subject: [PATCH 364/415] net: ethernet: cortina: Handle large frames The Gemini ethernet controller provides hardware checksumming for frames up to 1514 bytes including ethernet headers but not FCS. If we start sending bigger frames (after first bumping up the MTU on both interfaces sending and receiving the frames), truncated packets start to appear on the target such as in this tcpdump resulting from ping -s 1474: 23:34:17.241983 14:d6:4d:a8:3c:4f (oui Unknown) > bc:ae:c5:6b:a8:3d (oui Unknown), ethertype IPv4 (0x0800), length 1514: truncated-ip - 2 bytes missing! (tos 0x0, ttl 64, id 32653, offset 0, flags [DF], proto ICMP (1), length 1502) OpenWrt.lan > Fecusia: ICMP echo request, id 1672, seq 50, length 1482 If we bypass the hardware checksumming and provide a software fallback, everything starts working fine up to the max TX MTU of 2047 bytes, for example ping -s2000 192.168.1.2: 00:44:29.587598 bc:ae:c5:6b:a8:3d (oui Unknown) > 14:d6:4d:a8:3c:4f (oui Unknown), ethertype IPv4 (0x0800), length 2042: (tos 0x0, ttl 64, id 51828, offset 0, flags [none], proto ICMP (1), length 2028) Fecusia > OpenWrt.lan: ICMP echo reply, id 1683, seq 4, length 2008 The bit enabling to bypass hardware checksum (or any of the "TSS" bits) are undocumented in the hardware reference manual. The entire hardware checksum unit appears undocumented. The conclusion that we need to use the "bypass" bit was found by trial-and-error. Since no hardware checksum will happen, we slot in a software checksum fallback. Check for the condition where we need to compute checksum on the skb with either hardware or software using == CHECKSUM_PARTIAL instead of != CHECKSUM_NONE which is an incomplete check according to . On the D-Link DIR-685 router this fixes a bug on the conduit interface to the RTL8366RB DSA switch: as the switch needs to add space for its tag it increases the MTU on the conduit interface to 1504 and that means that when the router sends packages of 1500 bytes these get an extra 4 bytes of DSA tag and the transfer fails because of the erroneous hardware checksumming, affecting such basic functionality as the LuCI web interface. Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") Signed-off-by: Linus Walleij Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20231109-gemini-largeframe-fix-v4-2-6e611528db08@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cortina/gemini.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 562b0917316a..e5e73dee13ac 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1145,6 +1145,7 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb, dma_addr_t mapping; unsigned short mtu; void *buffer; + int ret; mtu = ETH_HLEN; mtu += netdev->mtu; @@ -1159,9 +1160,30 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb, word3 |= mtu; } - if (skb->ip_summed != CHECKSUM_NONE) { + if (skb->len >= ETH_FRAME_LEN) { + /* Hardware offloaded checksumming isn't working on frames + * bigger than 1514 bytes. A hypothesis about this is that the + * checksum buffer is only 1518 bytes, so when the frames get + * bigger they get truncated, or the last few bytes get + * overwritten by the FCS. + * + * Just use software checksumming and bypass on bigger frames. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + ret = skb_checksum_help(skb); + if (ret) + return ret; + } + word1 |= TSS_BYPASS_BIT; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { int tcp = 0; + /* We do not switch off the checksumming on non TCP/UDP + * frames: as is shown from tests, the checksumming engine + * is smart enough to see that a frame is not actually TCP + * or UDP and then just pass it through without any changes + * to the frame. + */ if (skb->protocol == htons(ETH_P_IP)) { word1 |= TSS_IP_CHKSUM_BIT; tcp = ip_hdr(skb)->protocol == IPPROTO_TCP; From dc6c0bfbaa947dd7976e30e8c29b10c868b6fa42 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 9 Nov 2023 10:03:14 +0100 Subject: [PATCH 365/415] net: ethernet: cortina: Fix MTU max setting The RX max frame size is over 10000 for the Gemini ethernet, but the TX max frame size is actually just 2047 (0x7ff after checking the datasheet). Reflect this in what we offer to Linux, cap the MTU at the TX max frame minus ethernet headers. We delete the code disabling the hardware checksum for large MTUs as netdev->mtu can no longer be larger than netdev->max_mtu meaning the if()-clause in gmac_fix_features() is never true. Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") Reviewed-by: Andrew Lunn Signed-off-by: Linus Walleij Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20231109-gemini-largeframe-fix-v4-3-6e611528db08@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cortina/gemini.c | 17 ++++------------- drivers/net/ethernet/cortina/gemini.h | 2 +- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index e5e73dee13ac..78287cfcbf63 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -2000,15 +2000,6 @@ static int gmac_change_mtu(struct net_device *netdev, int new_mtu) return 0; } -static netdev_features_t gmac_fix_features(struct net_device *netdev, - netdev_features_t features) -{ - if (netdev->mtu + ETH_HLEN + VLAN_HLEN > MTU_SIZE_BIT_MASK) - features &= ~GMAC_OFFLOAD_FEATURES; - - return features; -} - static int gmac_set_features(struct net_device *netdev, netdev_features_t features) { @@ -2234,7 +2225,6 @@ static const struct net_device_ops gmac_351x_ops = { .ndo_set_mac_address = gmac_set_mac_address, .ndo_get_stats64 = gmac_get_stats64, .ndo_change_mtu = gmac_change_mtu, - .ndo_fix_features = gmac_fix_features, .ndo_set_features = gmac_set_features, }; @@ -2486,11 +2476,12 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev) netdev->hw_features = GMAC_OFFLOAD_FEATURES; netdev->features |= GMAC_OFFLOAD_FEATURES | NETIF_F_GRO; - /* We can handle jumbo frames up to 10236 bytes so, let's accept - * payloads of 10236 bytes minus VLAN and ethernet header + /* We can receive jumbo frames up to 10236 bytes but only + * transmit 2047 bytes so, let's accept payloads of 2047 + * bytes minus VLAN and ethernet header */ netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = 10236 - VLAN_ETH_HLEN; + netdev->max_mtu = MTU_SIZE_BIT_MASK - VLAN_ETH_HLEN; port->freeq_refill = 0; netif_napi_add(netdev, &port->napi, gmac_napi_poll); diff --git a/drivers/net/ethernet/cortina/gemini.h b/drivers/net/ethernet/cortina/gemini.h index 99efb1155743..24bb989981f2 100644 --- a/drivers/net/ethernet/cortina/gemini.h +++ b/drivers/net/ethernet/cortina/gemini.h @@ -502,7 +502,7 @@ union gmac_txdesc_3 { #define SOF_BIT 0x80000000 #define EOF_BIT 0x40000000 #define EOFIE_BIT BIT(29) -#define MTU_SIZE_BIT_MASK 0x1fff +#define MTU_SIZE_BIT_MASK 0x7ff /* Max MTU 2047 bytes */ /* GMAC Tx Descriptor */ struct gmac_txdesc { From 0ab0c45d8aaea5192328bfa6989673aceafc767c Mon Sep 17 00:00:00 2001 From: ChunHao Lin Date: Fri, 10 Nov 2023 01:33:59 +0800 Subject: [PATCH 366/415] r8169: add handling DASH when DASH is disabled For devices that support DASH, even DASH is disabled, there may still exist a default firmware that will influence device behavior. So driver needs to handle DASH for devices that support DASH, no matter the DASH status is. This patch also prepares for "fix network lost after resume on DASH systems". Fixes: ee7a1beb9759 ("r8169:call "rtl8168_driver_start" "rtl8168_driver_stop" only when hardware dash function is enabled") Cc: stable@vger.kernel.org Signed-off-by: ChunHao Lin Reviewed-by: Heiner Kallweit Link: https://lore.kernel.org/r/20231109173400.4573-2-hau@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 36 ++++++++++++++++------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 0c76c162b8a9..cfcb40d90920 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -624,6 +624,7 @@ struct rtl8169_private { unsigned supports_gmii:1; unsigned aspm_manageable:1; + unsigned dash_enabled:1; dma_addr_t counters_phys_addr; struct rtl8169_counters *counters; struct rtl8169_tc_offsets tc_offset; @@ -1253,14 +1254,26 @@ static bool r8168ep_check_dash(struct rtl8169_private *tp) return r8168ep_ocp_read(tp, 0x128) & BIT(0); } -static enum rtl_dash_type rtl_check_dash(struct rtl8169_private *tp) +static bool rtl_dash_is_enabled(struct rtl8169_private *tp) +{ + switch (tp->dash_type) { + case RTL_DASH_DP: + return r8168dp_check_dash(tp); + case RTL_DASH_EP: + return r8168ep_check_dash(tp); + default: + return false; + } +} + +static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp) { switch (tp->mac_version) { case RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: - return r8168dp_check_dash(tp) ? RTL_DASH_DP : RTL_DASH_NONE; + return RTL_DASH_DP; case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53: - return r8168ep_check_dash(tp) ? RTL_DASH_EP : RTL_DASH_NONE; + return RTL_DASH_EP; default: return RTL_DASH_NONE; } @@ -1453,7 +1466,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) device_set_wakeup_enable(tp_to_dev(tp), wolopts); - if (tp->dash_type == RTL_DASH_NONE) { + if (!tp->dash_enabled) { rtl_set_d3_pll_down(tp, !wolopts); tp->dev->wol_enabled = wolopts ? 1 : 0; } @@ -2512,7 +2525,7 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp) static void rtl_prepare_power_down(struct rtl8169_private *tp) { - if (tp->dash_type != RTL_DASH_NONE) + if (tp->dash_enabled) return; if (tp->mac_version == RTL_GIGA_MAC_VER_32 || @@ -4869,7 +4882,7 @@ static int rtl8169_runtime_idle(struct device *device) { struct rtl8169_private *tp = dev_get_drvdata(device); - if (tp->dash_type != RTL_DASH_NONE) + if (tp->dash_enabled) return -EBUSY; if (!netif_running(tp->dev) || !netif_carrier_ok(tp->dev)) @@ -4895,8 +4908,7 @@ static void rtl_shutdown(struct pci_dev *pdev) /* Restore original MAC address */ rtl_rar_set(tp, tp->dev->perm_addr); - if (system_state == SYSTEM_POWER_OFF && - tp->dash_type == RTL_DASH_NONE) { + if (system_state == SYSTEM_POWER_OFF && !tp->dash_enabled) { pci_wake_from_d3(pdev, tp->saved_wolopts); pci_set_power_state(pdev, PCI_D3hot); } @@ -5254,7 +5266,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1); tp->aspm_manageable = !rc; - tp->dash_type = rtl_check_dash(tp); + tp->dash_type = rtl_get_dash_type(tp); + tp->dash_enabled = rtl_dash_is_enabled(tp); tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK; @@ -5325,7 +5338,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* configure chip for default features */ rtl8169_set_features(dev, dev->features); - if (tp->dash_type == RTL_DASH_NONE) { + if (!tp->dash_enabled) { rtl_set_d3_pll_down(tp, true); } else { rtl_set_d3_pll_down(tp, false); @@ -5365,7 +5378,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) "ok" : "ko"); if (tp->dash_type != RTL_DASH_NONE) { - netdev_info(dev, "DASH enabled\n"); + netdev_info(dev, "DASH %s\n", + tp->dash_enabled ? "enabled" : "disabled"); rtl8168_driver_start(tp); } From 868c3b95afef4883bfb66c9397482da6840b5baf Mon Sep 17 00:00:00 2001 From: ChunHao Lin Date: Fri, 10 Nov 2023 01:34:00 +0800 Subject: [PATCH 367/415] r8169: fix network lost after resume on DASH systems Device that support DASH may be reseted or powered off during suspend. So driver needs to handle DASH during system suspend and resume. Or DASH firmware will influence device behavior and causes network lost. Fixes: b646d90053f8 ("r8169: magic.") Cc: stable@vger.kernel.org Reviewed-by: Heiner Kallweit Signed-off-by: ChunHao Lin Link: https://lore.kernel.org/r/20231109173400.4573-3-hau@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index cfcb40d90920..b9bb1d2f0237 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4661,10 +4661,16 @@ static void rtl8169_down(struct rtl8169_private *tp) rtl8169_cleanup(tp); rtl_disable_exit_l1(tp); rtl_prepare_power_down(tp); + + if (tp->dash_type != RTL_DASH_NONE) + rtl8168_driver_stop(tp); } static void rtl8169_up(struct rtl8169_private *tp) { + if (tp->dash_type != RTL_DASH_NONE) + rtl8168_driver_start(tp); + pci_set_master(tp->pci_dev); phy_init_hw(tp->phydev); phy_resume(tp->phydev); From 686464514fbebb6c8de4415238319e414c3500a4 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 27 Sep 2023 08:58:05 +0200 Subject: [PATCH 368/415] xen/events: reduce externally visible helper functions get_evtchn_to_irq() has only one external user while irq_from_evtchn() provides the same functionality and is exported for a wider user base. Modify the only external user of get_evtchn_to_irq() to use irq_from_evtchn() instead and make get_evtchn_to_irq() static. evtchn_from_irq() and irq_from_virq() have a single external user and can easily be combined to a new helper irq_evtchn_from_virq() allowing to drop irq_from_virq() and to make evtchn_from_irq() static. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Signed-off-by: Juergen Gross --- drivers/xen/events/events_2l.c | 8 ++++---- drivers/xen/events/events_base.c | 13 +++++++++---- drivers/xen/events/events_internal.h | 1 - include/xen/events.h | 4 ++-- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index b8f2f971c2f0..e3585330cf98 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -171,11 +171,11 @@ static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl) int i; struct shared_info *s = HYPERVISOR_shared_info; struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + evtchn_port_t evtchn; /* Timer interrupt has highest priority. */ - irq = irq_from_virq(cpu, VIRQ_TIMER); + irq = irq_evtchn_from_virq(cpu, VIRQ_TIMER, &evtchn); if (irq != -1) { - evtchn_port_t evtchn = evtchn_from_irq(irq); word_idx = evtchn / BITS_PER_LONG; bit_idx = evtchn % BITS_PER_LONG; if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx)) @@ -328,9 +328,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) { if (sync_test_bit(i, BM(sh->evtchn_pending))) { int word_idx = i / BITS_PER_EVTCHN_WORD; - printk(" %d: event %d -> irq %d%s%s%s\n", + printk(" %d: event %d -> irq %u%s%s%s\n", cpu_from_evtchn(i), i, - get_evtchn_to_irq(i), + irq_from_evtchn(i), sync_test_bit(word_idx, BM(&v->evtchn_pending_sel)) ? "" : " l2-clear", !sync_test_bit(i, BM(sh->evtchn_mask)) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index c5d86128eb73..a74ea28dcc3d 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -248,7 +248,7 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) return 0; } -int get_evtchn_to_irq(evtchn_port_t evtchn) +static int get_evtchn_to_irq(evtchn_port_t evtchn) { if (evtchn >= xen_evtchn_max_channels()) return -1; @@ -415,7 +415,7 @@ static void xen_irq_info_cleanup(struct irq_info *info) /* * Accessors for packed IRQ information. */ -evtchn_port_t evtchn_from_irq(unsigned irq) +static evtchn_port_t evtchn_from_irq(unsigned int irq) { const struct irq_info *info = NULL; @@ -433,9 +433,14 @@ unsigned int irq_from_evtchn(evtchn_port_t evtchn) } EXPORT_SYMBOL_GPL(irq_from_evtchn); -int irq_from_virq(unsigned int cpu, unsigned int virq) +int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq, + evtchn_port_t *evtchn) { - return per_cpu(virq_to_irq, cpu)[virq]; + int irq = per_cpu(virq_to_irq, cpu)[virq]; + + *evtchn = evtchn_from_irq(irq); + + return irq; } static enum ipi_vector ipi_from_irq(unsigned irq) diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 4d3398eff9cd..19ae31695edc 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -33,7 +33,6 @@ struct evtchn_ops { extern const struct evtchn_ops *evtchn_ops; -int get_evtchn_to_irq(evtchn_port_t evtchn); void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl); unsigned int cpu_from_evtchn(evtchn_port_t evtchn); diff --git a/include/xen/events.h b/include/xen/events.h index a129cafa80ed..3b07409f8032 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -100,8 +100,8 @@ void xen_poll_irq_timeout(int irq, u64 timeout); /* Determine the IRQ which is bound to an event channel */ unsigned int irq_from_evtchn(evtchn_port_t evtchn); -int irq_from_virq(unsigned int cpu, unsigned int virq); -evtchn_port_t evtchn_from_irq(unsigned irq); +int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq, + evtchn_port_t *evtchn); int xen_set_callback_via(uint64_t via); int xen_evtchn_do_upcall(void); From 3bdb0ac350fe5e6301562143e4573971dd01ae0b Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 27 Sep 2023 08:24:46 +0200 Subject: [PATCH 369/415] xen/events: remove some simple helpers from events_base.c The helper functions type_from_irq() and cpu_from_irq() are just one line functions used only internally. Open code them where needed. At the same time modify and rename get_evtchn_to_irq() to return a struct irq_info instead of the IRQ number. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 97 +++++++++++++------------------- 1 file changed, 38 insertions(+), 59 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index a74ea28dcc3d..a810e8904fbf 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -248,15 +248,6 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) return 0; } -static int get_evtchn_to_irq(evtchn_port_t evtchn) -{ - if (evtchn >= xen_evtchn_max_channels()) - return -1; - if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) - return -1; - return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); -} - /* Get info for IRQ */ static struct irq_info *info_for_irq(unsigned irq) { @@ -274,6 +265,19 @@ static void set_info_for_irq(unsigned int irq, struct irq_info *info) irq_set_chip_data(irq, info); } +static struct irq_info *evtchn_to_info(evtchn_port_t evtchn) +{ + int irq; + + if (evtchn >= xen_evtchn_max_channels()) + return NULL; + if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) + return NULL; + irq = READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); + + return (irq < 0) ? NULL : info_for_irq(irq); +} + /* Per CPU channel accounting */ static void channels_on_cpu_dec(struct irq_info *info) { @@ -429,7 +433,9 @@ static evtchn_port_t evtchn_from_irq(unsigned int irq) unsigned int irq_from_evtchn(evtchn_port_t evtchn) { - return get_evtchn_to_irq(evtchn); + struct irq_info *info = evtchn_to_info(evtchn); + + return info ? info->irq : -1; } EXPORT_SYMBOL_GPL(irq_from_evtchn); @@ -473,25 +479,11 @@ static unsigned pirq_from_irq(unsigned irq) return info->u.pirq.pirq; } -static enum xen_irq_type type_from_irq(unsigned irq) -{ - return info_for_irq(irq)->type; -} - -static unsigned cpu_from_irq(unsigned irq) -{ - return info_for_irq(irq)->cpu; -} - unsigned int cpu_from_evtchn(evtchn_port_t evtchn) { - int irq = get_evtchn_to_irq(evtchn); - unsigned ret = 0; + struct irq_info *info = evtchn_to_info(evtchn); - if (irq != -1) - ret = cpu_from_irq(irq); - - return ret; + return info ? info->cpu : 0; } static void do_mask(struct irq_info *info, u8 reason) @@ -540,13 +532,12 @@ static bool pirq_needs_eoi_flag(unsigned irq) static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, bool force_affinity) { - int irq = get_evtchn_to_irq(evtchn); - struct irq_info *info = info_for_irq(irq); + struct irq_info *info = evtchn_to_info(evtchn); - BUG_ON(irq == -1); + BUG_ON(info == NULL); if (IS_ENABLED(CONFIG_SMP) && force_affinity) { - struct irq_data *data = irq_get_irq_data(irq); + struct irq_data *data = irq_get_irq_data(info->irq); irq_data_update_affinity(data, cpumask_of(cpu)); irq_data_update_effective_affinity(data, cpumask_of(cpu)); @@ -979,13 +970,13 @@ static void __unbind_from_irq(unsigned int irq) } if (VALID_EVTCHN(evtchn)) { - unsigned int cpu = cpu_from_irq(irq); + unsigned int cpu = info->cpu; struct xenbus_device *dev; if (!info->is_static) xen_evtchn_close(evtchn); - switch (type_from_irq(irq)) { + switch (info->type) { case IRQT_VIRQ: per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; break; @@ -1185,15 +1176,16 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, { int irq; int ret; + struct irq_info *info; if (evtchn >= xen_evtchn_max_channels()) return -ENOMEM; mutex_lock(&irq_mapping_update_lock); - irq = get_evtchn_to_irq(evtchn); + info = evtchn_to_info(evtchn); - if (irq == -1) { + if (!info) { irq = xen_allocate_irq_dynamic(); if (irq < 0) goto out; @@ -1216,9 +1208,9 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, */ bind_evtchn_to_cpu(evtchn, 0, false); } else { - struct irq_info *info = info_for_irq(irq); - if (!WARN_ON(!info || info->type != IRQT_EVTCHN)) + if (!WARN_ON(info->type != IRQT_EVTCHN)) info->refcnt++; + irq = info->irq; } out: @@ -1556,13 +1548,7 @@ EXPORT_SYMBOL_GPL(xen_set_irq_priority); int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static) { - int irq = get_evtchn_to_irq(evtchn); - struct irq_info *info; - - if (irq == -1) - return -ENOENT; - - info = info_for_irq(irq); + struct irq_info *info = evtchn_to_info(evtchn); if (!info) return -ENOENT; @@ -1578,7 +1564,6 @@ EXPORT_SYMBOL_GPL(evtchn_make_refcounted); int evtchn_get(evtchn_port_t evtchn) { - int irq; struct irq_info *info; int err = -ENOENT; @@ -1587,11 +1572,7 @@ int evtchn_get(evtchn_port_t evtchn) mutex_lock(&irq_mapping_update_lock); - irq = get_evtchn_to_irq(evtchn); - if (irq == -1) - goto done; - - info = info_for_irq(irq); + info = evtchn_to_info(evtchn); if (!info) goto done; @@ -1611,10 +1592,11 @@ EXPORT_SYMBOL_GPL(evtchn_get); void evtchn_put(evtchn_port_t evtchn) { - int irq = get_evtchn_to_irq(evtchn); - if (WARN_ON(irq == -1)) + struct irq_info *info = evtchn_to_info(evtchn); + + if (WARN_ON(!info)) return; - unbind_from_irq(irq); + unbind_from_irq(info->irq); } EXPORT_SYMBOL_GPL(evtchn_put); @@ -1644,12 +1626,10 @@ struct evtchn_loop_ctrl { void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) { - int irq; - struct irq_info *info; + struct irq_info *info = evtchn_to_info(port); struct xenbus_device *dev; - irq = get_evtchn_to_irq(port); - if (irq == -1) + if (!info) return; /* @@ -1674,7 +1654,6 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) } } - info = info_for_irq(irq); if (xchg_acquire(&info->is_active, 1)) return; @@ -1688,7 +1667,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) info->eoi_time = get_jiffies_64() + event_eoi_delay; } - generic_handle_irq(irq); + generic_handle_irq(info->irq); } int xen_evtchn_do_upcall(void) @@ -1746,7 +1725,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) mutex_lock(&irq_mapping_update_lock); /* After resume the irq<->evtchn mappings are all cleared out */ - BUG_ON(get_evtchn_to_irq(evtchn) != -1); + BUG_ON(evtchn_to_info(evtchn)); /* Expect irq to have been bound before, so there should be a proper type */ BUG_ON(info->type == IRQT_UNBOUND); From 4b7b492615cf3017190f55444f7016812b66611d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Nov 2023 13:49:38 +0000 Subject: [PATCH 370/415] af_unix: fix use-after-free in unix_stream_read_actor() syzbot reported the following crash [1] After releasing unix socket lock, u->oob_skb can be changed by another thread. We must temporarily increase skb refcount to make sure this other thread will not free the skb under us. [1] BUG: KASAN: slab-use-after-free in unix_stream_read_actor+0xa7/0xc0 net/unix/af_unix.c:2866 Read of size 4 at addr ffff88801f3b9cc4 by task syz-executor107/5297 CPU: 1 PID: 5297 Comm: syz-executor107 Not tainted 6.6.0-syzkaller-15910-gb8e3a87a627b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0xc4/0x620 mm/kasan/report.c:475 kasan_report+0xda/0x110 mm/kasan/report.c:588 unix_stream_read_actor+0xa7/0xc0 net/unix/af_unix.c:2866 unix_stream_recv_urg net/unix/af_unix.c:2587 [inline] unix_stream_read_generic+0x19a5/0x2480 net/unix/af_unix.c:2666 unix_stream_recvmsg+0x189/0x1b0 net/unix/af_unix.c:2903 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg+0xe2/0x170 net/socket.c:1066 ____sys_recvmsg+0x21f/0x5c0 net/socket.c:2803 ___sys_recvmsg+0x115/0x1a0 net/socket.c:2845 __sys_recvmsg+0x114/0x1e0 net/socket.c:2875 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7fc67492c559 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fc6748ab228 EFLAGS: 00000246 ORIG_RAX: 000000000000002f RAX: ffffffffffffffda RBX: 000000000000001c RCX: 00007fc67492c559 RDX: 0000000040010083 RSI: 0000000020000140 RDI: 0000000000000004 RBP: 00007fc6749b6348 R08: 00007fc6748ab6c0 R09: 00007fc6748ab6c0 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc6749b6340 R13: 00007fc6749b634c R14: 00007ffe9fac52a0 R15: 00007ffe9fac5388 Allocated by task 5295: kasan_save_stack+0x33/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 __kasan_slab_alloc+0x81/0x90 mm/kasan/common.c:328 kasan_slab_alloc include/linux/kasan.h:188 [inline] slab_post_alloc_hook mm/slab.h:763 [inline] slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x180/0x3c0 mm/slub.c:3523 __alloc_skb+0x287/0x330 net/core/skbuff.c:641 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xe4/0x710 net/core/skbuff.c:6331 sock_alloc_send_pskb+0x7e4/0x970 net/core/sock.c:2780 sock_alloc_send_skb include/net/sock.h:1884 [inline] queue_oob net/unix/af_unix.c:2147 [inline] unix_stream_sendmsg+0xb5f/0x10a0 net/unix/af_unix.c:2301 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2584 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2638 __sys_sendmsg+0x117/0x1e0 net/socket.c:2667 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Freed by task 5295: kasan_save_stack+0x33/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:522 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x15b/0x1b0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:164 [inline] slab_free_hook mm/slub.c:1800 [inline] slab_free_freelist_hook+0x114/0x1e0 mm/slub.c:1826 slab_free mm/slub.c:3809 [inline] kmem_cache_free+0xf8/0x340 mm/slub.c:3831 kfree_skbmem+0xef/0x1b0 net/core/skbuff.c:1015 __kfree_skb net/core/skbuff.c:1073 [inline] consume_skb net/core/skbuff.c:1288 [inline] consume_skb+0xdf/0x170 net/core/skbuff.c:1282 queue_oob net/unix/af_unix.c:2178 [inline] unix_stream_sendmsg+0xd49/0x10a0 net/unix/af_unix.c:2301 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2584 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2638 __sys_sendmsg+0x117/0x1e0 net/socket.c:2667 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b The buggy address belongs to the object at ffff88801f3b9c80 which belongs to the cache skbuff_head_cache of size 240 The buggy address is located 68 bytes inside of freed 240-byte region [ffff88801f3b9c80, ffff88801f3b9d70) The buggy address belongs to the physical page: page:ffffea00007cee40 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1f3b9 flags: 0xfff00000000800(slab|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000800 ffff888142a60640 dead000000000122 0000000000000000 raw: 0000000000000000 00000000000c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x12cc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY), pid 5299, tgid 5283 (syz-executor107), ts 103803840339, free_ts 103600093431 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x2cf/0x340 mm/page_alloc.c:1537 prep_new_page mm/page_alloc.c:1544 [inline] get_page_from_freelist+0xa25/0x36c0 mm/page_alloc.c:3312 __alloc_pages+0x1d0/0x4a0 mm/page_alloc.c:4568 alloc_pages_mpol+0x258/0x5f0 mm/mempolicy.c:2133 alloc_slab_page mm/slub.c:1870 [inline] allocate_slab+0x251/0x380 mm/slub.c:2017 new_slab mm/slub.c:2070 [inline] ___slab_alloc+0x8c7/0x1580 mm/slub.c:3223 __slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3322 __slab_alloc_node mm/slub.c:3375 [inline] slab_alloc_node mm/slub.c:3468 [inline] kmem_cache_alloc_node+0x132/0x3c0 mm/slub.c:3523 __alloc_skb+0x287/0x330 net/core/skbuff.c:641 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xe4/0x710 net/core/skbuff.c:6331 sock_alloc_send_pskb+0x7e4/0x970 net/core/sock.c:2780 sock_alloc_send_skb include/net/sock.h:1884 [inline] queue_oob net/unix/af_unix.c:2147 [inline] unix_stream_sendmsg+0xb5f/0x10a0 net/unix/af_unix.c:2301 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2584 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2638 __sys_sendmsg+0x117/0x1e0 net/socket.c:2667 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1137 [inline] free_unref_page_prepare+0x4f8/0xa90 mm/page_alloc.c:2347 free_unref_page+0x33/0x3b0 mm/page_alloc.c:2487 __unfreeze_partials+0x21d/0x240 mm/slub.c:2655 qlink_free mm/kasan/quarantine.c:168 [inline] qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187 kasan_quarantine_reduce+0x18e/0x1d0 mm/kasan/quarantine.c:294 __kasan_slab_alloc+0x65/0x90 mm/kasan/common.c:305 kasan_slab_alloc include/linux/kasan.h:188 [inline] slab_post_alloc_hook mm/slab.h:763 [inline] slab_alloc_node mm/slub.c:3478 [inline] slab_alloc mm/slub.c:3486 [inline] __kmem_cache_alloc_lru mm/slub.c:3493 [inline] kmem_cache_alloc+0x15d/0x380 mm/slub.c:3502 vm_area_dup+0x21/0x2f0 kernel/fork.c:500 __split_vma+0x17d/0x1070 mm/mmap.c:2365 split_vma mm/mmap.c:2437 [inline] vma_modify+0x25d/0x450 mm/mmap.c:2472 vma_modify_flags include/linux/mm.h:3271 [inline] mprotect_fixup+0x228/0xc80 mm/mprotect.c:635 do_mprotect_pkey+0x852/0xd60 mm/mprotect.c:809 __do_sys_mprotect mm/mprotect.c:830 [inline] __se_sys_mprotect mm/mprotect.c:827 [inline] __x64_sys_mprotect+0x78/0xb0 mm/mprotect.c:827 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Memory state around the buggy address: ffff88801f3b9b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88801f3b9c00: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc >ffff88801f3b9c80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88801f3b9d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc ffff88801f3b9d80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb Fixes: 876c14ad014d ("af_unix: fix holding spinlock in oob handling") Reported-and-tested-by: syzbot+7a2d546fa43e49315ed3@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Rao Shoaib Reviewed-by: Rao shoaib Link: https://lore.kernel.org/r/20231113134938.168151-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 45506a95b25f..a357dc5f2404 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2581,15 +2581,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) if (!(state->flags & MSG_PEEK)) WRITE_ONCE(u->oob_skb, NULL); - + else + skb_get(oob_skb); unix_state_unlock(sk); chunk = state->recv_actor(oob_skb, 0, chunk, state); - if (!(state->flags & MSG_PEEK)) { + if (!(state->flags & MSG_PEEK)) UNIXCB(oob_skb).consumed += 1; - kfree_skb(oob_skb); - } + + consume_skb(oob_skb); mutex_unlock(&u->iolock); From 67059b61597c004e23b074547b40604222bee3a0 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 1 Nov 2023 09:33:51 +0800 Subject: [PATCH 371/415] netfilter: nft_set_rbtree: Remove unused variable nft_net MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code that uses nft_net has been removed, and the nft_pernet function is merely obtaining a reference to shared data through the net pointer. The content of the net pointer is not modified or changed, so both of them should be removed. silence the warning: net/netfilter/nft_set_rbtree.c:627:26: warning: variable ‘nft_net’ set but not used Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7103 Signed-off-by: Yang Li Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 6f1186abd47b..baa3fea4fe65 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -624,14 +624,12 @@ static void nft_rbtree_gc(struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe, *rbe_end = NULL; - struct nftables_pernet *nft_net; struct rb_node *node, *next; struct nft_trans_gc *gc; struct net *net; set = nft_set_container_of(priv); net = read_pnet(&set->net); - nft_net = nft_pernet(net); gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); if (!gc) From a44af08e3d4d7566eeea98d7a29fe06e7b9de944 Mon Sep 17 00:00:00 2001 From: Linkui Xiao Date: Wed, 1 Nov 2023 11:20:18 +0800 Subject: [PATCH 372/415] netfilter: nf_conntrack_bridge: initialize err to 0 K2CI reported a problem: consume_skb(skb); return err; [nf_br_ip_fragment() error] uninitialized symbol 'err'. err is not initialized, because returning 0 is expected, initialize err to 0. Fixes: 3c171f496ef5 ("netfilter: bridge: add connection tracking system") Reported-by: k2ci Signed-off-by: Linkui Xiao Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nf_conntrack_bridge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index b5c406a6e765..abb090f94ed2 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -37,7 +37,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, ktime_t tstamp = skb->tstamp; struct ip_frag_state state; struct iphdr *iph; - int err; + int err = 0; /* for offloaded checksums cleanup checksum before fragmentation */ if (skb->ip_summed == CHECKSUM_PARTIAL && From c301f0981fdd3fd1ffac6836b423c4d7a8e0eb63 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Nov 2023 09:42:51 +0300 Subject: [PATCH 373/415] netfilter: nf_tables: fix pointer math issue in nft_byteorder_eval() The problem is in nft_byteorder_eval() where we are iterating through a loop and writing to dst[0], dst[1], dst[2] and so on... On each iteration we are writing 8 bytes. But dst[] is an array of u32 so each element only has space for 4 bytes. That means that every iteration overwrites part of the previous element. I spotted this bug while reviewing commit caf3ef7468f7 ("netfilter: nf_tables: prevent OOB access in nft_byteorder_eval") which is a related issue. I think that the reason we have not detected this bug in testing is that most of time we only write one element. Fixes: ce1e7989d989 ("netfilter: nft_byteorder: provide 64bit le/be conversion") Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- net/netfilter/nft_byteorder.c | 5 +++-- net/netfilter/nft_meta.c | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3bbd13ab1ecf..b157c5cafd14 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -178,9 +178,9 @@ static inline __be32 nft_reg_load_be32(const u32 *sreg) return *(__force __be32 *)sreg; } -static inline void nft_reg_store64(u32 *dreg, u64 val) +static inline void nft_reg_store64(u64 *dreg, u64 val) { - put_unaligned(val, (u64 *)dreg); + put_unaligned(val, dreg); } static inline u64 nft_reg_load64(const u32 *sreg) diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index e596d1a842f7..f6e791a68101 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -38,13 +38,14 @@ void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->size) { case 8: { + u64 *dst64 = (void *)dst; u64 src64; switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 8; i++) { src64 = nft_reg_load64(&src[i]); - nft_reg_store64(&dst[i], + nft_reg_store64(&dst64[i], be64_to_cpu((__force __be64)src64)); } break; @@ -52,7 +53,7 @@ void nft_byteorder_eval(const struct nft_expr *expr, for (i = 0; i < priv->len / 8; i++) { src64 = (__force __u64) cpu_to_be64(nft_reg_load64(&src[i])); - nft_reg_store64(&dst[i], src64); + nft_reg_store64(&dst64[i], src64); } break; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index f7da7c43333b..ba0d3683a45d 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -63,7 +63,7 @@ nft_meta_get_eval_time(enum nft_meta_keys key, { switch (key) { case NFT_META_TIME_NS: - nft_reg_store64(dest, ktime_get_real_ns()); + nft_reg_store64((u64 *)dest, ktime_get_real_ns()); break; case NFT_META_TIME_DAY: nft_reg_store8(dest, nft_meta_weekday()); From a7d5a955bfa854ac6b0c53aaf933394b4e6139e4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 13 Nov 2023 20:34:56 +0100 Subject: [PATCH 374/415] netfilter: nf_tables: bogus ENOENT when destroying element which does not exist destroy element command bogusly reports ENOENT in case a set element does not exist. ENOENT errors are skipped, however, err is still set and propagated to userspace. # nft destroy element ip raw BLACKLIST { 1.2.3.4 } Error: Could not process rule: No such file or directory destroy element ip raw BLACKLIST { 1.2.3.4 } ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Fixes: f80a612dd77c ("netfilter: nf_tables: add support to destroy operation") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a761ee6796f6..debea1c67701 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7263,10 +7263,11 @@ static int nf_tables_delsetelem(struct sk_buff *skb, if (err < 0) { NL_SET_BAD_ATTR(extack, attr); - break; + return err; } } - return err; + + return 0; } /* From 28628fa952fefc7f2072ce6e8016968cc452b1ba Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 13 Nov 2023 21:13:23 +0100 Subject: [PATCH 375/415] netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test Linkui Xiao reported that there's a race condition when ipset swap and destroy is called, which can lead to crash in add/del/test element operations. Swap then destroy are usual operations to replace a set with another one in a production system. The issue can in some cases be reproduced with the script: ipset create hash_ip1 hash:net family inet hashsize 1024 maxelem 1048576 ipset add hash_ip1 172.20.0.0/16 ipset add hash_ip1 192.168.0.0/16 iptables -A INPUT -m set --match-set hash_ip1 src -j ACCEPT while [ 1 ] do # ... Ongoing traffic... ipset create hash_ip2 hash:net family inet hashsize 1024 maxelem 1048576 ipset add hash_ip2 172.20.0.0/16 ipset swap hash_ip1 hash_ip2 ipset destroy hash_ip2 sleep 0.05 done In the race case the possible order of the operations are CPU0 CPU1 ip_set_test ipset swap hash_ip1 hash_ip2 ipset destroy hash_ip2 hash_net_kadt Swap replaces hash_ip1 with hash_ip2 and then destroy removes hash_ip2 which is the original hash_ip1. ip_set_test was called on hash_ip1 and because destroy removed it, hash_net_kadt crashes. The fix is to force ip_set_swap() to wait for all readers to finish accessing the old set pointers by calling synchronize_rcu(). The first version of the patch was written by Linkui Xiao . v2: synchronize_rcu() is moved into ip_set_swap() in order not to burden ip_set_destroy() unnecessarily when all sets are destroyed. v3: Florian Westphal pointed out that all netfilter hooks run with rcu_read_lock() held and em_ipset.c wraps the entire ip_set_test() in rcu read lock/unlock pair. So there's no need to extend the rcu read locked area in ipset itself. Closes: https://lore.kernel.org/all/69e7963b-e7f8-3ad0-210-7b86eebf7f78@netfilter.org/ Reported by: Linkui Xiao Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 35d2f9c9ada0..4c133e06be1d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); ip_set_dereference((inst)->ip_set_list)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] +#define ip_set_dereference_nfnl(p) \ + rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is @@ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set) static struct ip_set * ip_set_rcu_get(struct net *net, ip_set_id_t index) { - struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - rcu_read_lock(); - /* ip_set_list itself needs to be protected */ - set = rcu_dereference(inst->ip_set_list)[index]; - rcu_read_unlock(); - - return set; + /* ip_set_list and the set pointer need to be protected */ + return ip_set_dereference_nfnl(inst->ip_set_list)[index]; } static inline void @@ -1397,6 +1394,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); + /* Make sure all readers of the old set pointers are completed. */ + synchronize_rcu(); + return 0; } From 8837ba3e58ea1e3d09ae36db80b1e80853aada95 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 13 Nov 2023 21:13:31 +0100 Subject: [PATCH 376/415] netfilter: nf_tables: split async and sync catchall in two functions list_for_each_entry_safe() does not work for the async case which runs under RCU, therefore, split GC logic for catchall in two functions instead, one for each of the sync and async GC variants. The catchall sync GC variant never sees a _DEAD bit set on ever, thus, this handling is removed in such case, moreover, allocate GC sync batch via GFP_KERNEL. Fixes: 93995bf4af2c ("netfilter: nf_tables: remove catchall element in GC sync path") Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 55 +++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index debea1c67701..c0a42989b982 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9680,16 +9680,14 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) call_rcu(&trans->rcu, nft_trans_gc_trans_free); } -static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, - unsigned int gc_seq, - bool sync) +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq) { - struct nft_set_elem_catchall *catchall, *next; + struct nft_set_elem_catchall *catchall; const struct nft_set *set = gc->set; - struct nft_elem_priv *elem_priv; struct nft_set_ext *ext; - list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { + list_for_each_entry_rcu(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_expired(ext)) @@ -9699,35 +9697,42 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, nft_set_elem_dead(ext); dead_elem: - if (sync) - gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); - else - gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); - + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); if (!gc) return NULL; - elem_priv = catchall->elem; - if (sync) { - nft_setelem_data_deactivate(gc->net, gc->set, elem_priv); - nft_setelem_catchall_destroy(catchall); - } - - nft_trans_gc_elem_add(gc, elem_priv); + nft_trans_gc_elem_add(gc, catchall->elem); } return gc; } -struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, - unsigned int gc_seq) -{ - return nft_trans_gc_catchall(gc, gc_seq, false); -} - struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc) { - return nft_trans_gc_catchall(gc, 0, true); + struct nft_set_elem_catchall *catchall, *next; + const struct nft_set *set = gc->set; + struct nft_elem_priv *elem_priv; + struct nft_set_ext *ext; + + WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)); + + list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + + if (!nft_set_elem_expired(ext)) + continue; + + gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); + if (!gc) + return NULL; + + elem_priv = catchall->elem; + nft_setelem_data_deactivate(gc->net, gc->set, elem_priv); + nft_setelem_catchall_destroy(catchall); + nft_trans_gc_elem_add(gc, elem_priv); + } + + return gc; } static void nf_tables_module_autoload_cleanup(struct net *net) From 2e6ef8aaba6b709ce91164401fa1c12668510360 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 14 Nov 2023 07:57:53 -0600 Subject: [PATCH 377/415] Remove myself as maintainer of GFS2 I am retiring from Red Hat and will no longer be a maintainer of the gfs2 file system. Signed-off-by: Bob Peterson Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 97f51d5ec1cf..5c9f868e13b6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8950,7 +8950,6 @@ S: Maintained F: scripts/get_maintainer.pl GFS2 FILE SYSTEM -M: Bob Peterson M: Andreas Gruenbacher L: gfs2@lists.linux.dev S: Supported From 782ce431613cf08c3a00dca42ad925c3b1108d09 Mon Sep 17 00:00:00 2001 From: Konstantin Runov Date: Mon, 30 Oct 2023 12:45:08 +0300 Subject: [PATCH 378/415] gcc-plugins: latent_entropy: Fix typo (args -> argc) in plugin description Fix the typo in the plugin description comment. Clearly, "argc" should be used. Signed-off-by: Konstantin Runov Link: https://lore.kernel.org/r/20231030094508.245432-1-runebone1@gmail.com Signed-off-by: Kees Cook --- scripts/gcc-plugins/latent_entropy_plugin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 39e86be60dd2..ff0b192be91f 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -17,7 +17,7 @@ * if (argc <= 1) * printf("%s: no command arguments :(\n", *argv); * else - * printf("%s: %d command arguments!\n", *argv, args - 1); + * printf("%s: %d command arguments!\n", *argv, argc - 1); * } * * after: @@ -47,7 +47,7 @@ * // perturb_local_entropy() * } else { * local_entropy ^= 3896280633962944730; - * printf("%s: %d command arguments!\n", *argv, args - 1); + * printf("%s: %d command arguments!\n", *argv, argc - 1); * } * * // latent_entropy_execute() 4. From 77618db346455129424fadbbaec596a09feaf3bb Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 12 Oct 2023 12:55:34 -0700 Subject: [PATCH 379/415] zstd: Fix array-index-out-of-bounds UBSAN warning Zstd used an array of length 1 to mean a flexible array for C89 compatibility. Switch to a C99 flexible array to fix the UBSAN warning. Tested locally by booting the kernel and writing to and reading from a BtrFS filesystem with zstd compression enabled. I was unable to reproduce the issue before the fix, however it is a trivial change. Link: https://lkml.kernel.org/r/20231012213428.1390905-1-nickrterrell@gmail.com Reported-by: syzbot+1f2eb3e8cd123ffce499@syzkaller.appspotmail.com Reported-by: Eric Biggers Reported-by: Kees Cook Signed-off-by: Nick Terrell Reviewed-by: Kees Cook --- lib/zstd/common/fse_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c index a0d06095be83..8dcb8ca39767 100644 --- a/lib/zstd/common/fse_decompress.c +++ b/lib/zstd/common/fse_decompress.c @@ -312,7 +312,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size typedef struct { short ncount[FSE_MAX_SYMBOL_VALUE + 1]; - FSE_DTable dtable[1]; /* Dynamically sized */ + FSE_DTable dtable[]; /* Dynamically sized */ } FSE_DecompressWksp; From fa02de9e75889915b554eda1964a631fd019973b Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 13 Nov 2023 19:42:49 +0200 Subject: [PATCH 380/415] net: stmmac: fix rx budget limit check The while loop condition verifies 'count < limit'. Neither value change before the 'count >= limit' check. As is this check is dead code. But code inspection reveals a code path that modifies 'count' and then goto 'drain_data' and back to 'read_again'. So there is a need to verify count value sanity after 'read_again'. Move 'read_again' up to fix the count limit check. Fixes: ec222003bd94 ("net: stmmac: Prepare to add Split Header support") Signed-off-by: Baruch Siach Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/d9486296c3b6b12ab3a0515fcd47d56447a07bfc.1699897370.git.baruch@tkos.co.il Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3e50fd53a617..f28838c8cdb3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5328,10 +5328,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) len = 0; } +read_again: if (count >= limit) break; -read_again: buf1_len = 0; buf2_len = 0; entry = next_entry; From b6cb4541853c7ee512111b0e7ddf3cb66c99c137 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 13 Nov 2023 19:42:50 +0200 Subject: [PATCH 381/415] net: stmmac: avoid rx queue overrun dma_rx_size can be set as low as 64. Rx budget might be higher than that. Make sure to not overrun allocated rx buffers when budget is larger. Leave one descriptor unused to avoid wrap around of 'dirty_rx' vs 'cur_rx'. Signed-off-by: Baruch Siach Reviewed-by: Serge Semin Fixes: 47dd7a540b8a ("net: add support for STMicroelectronics Ethernet controllers.") Link: https://lore.kernel.org/r/d95413e44c97d4692e72cec13a75f894abeb6998.1699897370.git.baruch@tkos.co.il Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f28838c8cdb3..2afb2bd25977 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5293,6 +5293,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dma_dir = page_pool_get_dma_dir(rx_q->page_pool); buf_sz = DIV_ROUND_UP(priv->dma_conf.dma_buf_sz, PAGE_SIZE) * PAGE_SIZE; + limit = min(priv->dma_conf.dma_rx_size - 1, (unsigned int)limit); if (netif_msg_rx_status(priv)) { void *rx_head; From 907d1bdb8b2cc0357d03a1c34d2a08d9943760b1 Mon Sep 17 00:00:00 2001 From: Alex Pakhunov Date: Mon, 13 Nov 2023 10:23:49 -0800 Subject: [PATCH 382/415] tg3: Move the [rt]x_dropped counters to tg3_napi This change moves [rt]x_dropped counters to tg3_napi so that they can be updated by a single writer, race-free. Signed-off-by: Alex Pakhunov Signed-off-by: Vincent Wong Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20231113182350.37472-1-alexey.pakhunov@spacex.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 38 +++++++++++++++++++++++++---- drivers/net/ethernet/broadcom/tg3.h | 4 +-- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1dee27349367..884e9eecc471 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6889,7 +6889,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) desc_idx, *post_ptr); drop_it_no_recycle: /* Other statistics kept track of by card. */ - tp->rx_dropped++; + tnapi->rx_dropped++; goto next_pkt; } @@ -8190,7 +8190,7 @@ static netdev_tx_t __tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) drop: dev_kfree_skb_any(skb); drop_nofree: - tp->tx_dropped++; + tnapi->tx_dropped++; return NETDEV_TX_OK; } @@ -9405,7 +9405,7 @@ static void __tg3_set_rx_mode(struct net_device *); /* tp->lock is held. */ static int tg3_halt(struct tg3 *tp, int kind, bool silent) { - int err; + int err, i; tg3_stop_fw(tp); @@ -9426,6 +9426,13 @@ static int tg3_halt(struct tg3 *tp, int kind, bool silent) /* And make sure the next sample is new data */ memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats)); + + for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) { + struct tg3_napi *tnapi = &tp->napi[i]; + + tnapi->rx_dropped = 0; + tnapi->tx_dropped = 0; + } } return err; @@ -11975,6 +11982,9 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) { struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev; struct tg3_hw_stats *hw_stats = tp->hw_stats; + unsigned long rx_dropped; + unsigned long tx_dropped; + int i; stats->rx_packets = old_stats->rx_packets + get_stat64(&hw_stats->rx_ucast_packets) + @@ -12021,8 +12031,26 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) stats->rx_missed_errors = old_stats->rx_missed_errors + get_stat64(&hw_stats->rx_discards); - stats->rx_dropped = tp->rx_dropped; - stats->tx_dropped = tp->tx_dropped; + /* Aggregate per-queue counters. The per-queue counters are updated + * by a single writer, race-free. The result computed by this loop + * might not be 100% accurate (counters can be updated in the middle of + * the loop) but the next tg3_get_nstats() will recompute the current + * value so it is acceptable. + * + * Note that these counters wrap around at 4G on 32bit machines. + */ + rx_dropped = (unsigned long)(old_stats->rx_dropped); + tx_dropped = (unsigned long)(old_stats->tx_dropped); + + for (i = 0; i < tp->irq_cnt; i++) { + struct tg3_napi *tnapi = &tp->napi[i]; + + rx_dropped += tnapi->rx_dropped; + tx_dropped += tnapi->tx_dropped; + } + + stats->rx_dropped = rx_dropped; + stats->tx_dropped = tx_dropped; } static int tg3_get_regs_len(struct net_device *dev) diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index ae5c01bd1110..5016475e5005 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -3018,6 +3018,7 @@ struct tg3_napi { u16 *rx_rcb_prod_idx; struct tg3_rx_prodring_set prodring; struct tg3_rx_buffer_desc *rx_rcb; + unsigned long rx_dropped; u32 tx_prod ____cacheline_aligned; u32 tx_cons; @@ -3026,6 +3027,7 @@ struct tg3_napi { u32 prodmbox; struct tg3_tx_buffer_desc *tx_ring; struct tg3_tx_ring_info *tx_buffers; + unsigned long tx_dropped; dma_addr_t status_mapping; dma_addr_t rx_rcb_mapping; @@ -3220,8 +3222,6 @@ struct tg3 { /* begin "everything else" cacheline(s) section */ - unsigned long rx_dropped; - unsigned long tx_dropped; struct rtnl_link_stats64 net_stats_prev; struct tg3_ethtool_stats estats_prev; From 17dd5efe5f36a96bd78012594fabe21efb01186b Mon Sep 17 00:00:00 2001 From: Alex Pakhunov Date: Mon, 13 Nov 2023 10:23:50 -0800 Subject: [PATCH 383/415] tg3: Increment tx_dropped in tg3_tso_bug() tg3_tso_bug() drops a packet if it cannot be segmented for any reason. The number of discarded frames should be incremented accordingly. Signed-off-by: Alex Pakhunov Signed-off-by: Vincent Wong Reviewed-by: Pavan Chebbi Link: https://lore.kernel.org/r/20231113182350.37472-2-alexey.pakhunov@spacex.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 884e9eecc471..48b6191efa56 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7918,8 +7918,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi, segs = skb_gso_segment(skb, tp->dev->features & ~(NETIF_F_TSO | NETIF_F_TSO6)); - if (IS_ERR(segs) || !segs) + if (IS_ERR(segs) || !segs) { + tnapi->tx_dropped++; goto tg3_tso_bug_end; + } skb_list_walk_safe(segs, seg, next) { skb_mark_not_on_list(seg); From 09d4c14c6c5e6e781a3879fed7f8e116a18b8c65 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 13 Nov 2023 10:32:56 -0800 Subject: [PATCH 384/415] pds_core: use correct index to mask irq Use the qcq's interrupt index, not the irq number, to mask the interrupt. Since the irq number can be out of range from the number of possible interrupts, we can end up accessing and potentially scribbling on out-of-range and/or unmapped memory, making the kernel angry. [ 3116.039364] BUG: unable to handle page fault for address: ffffbeea1c3edf84 [ 3116.047059] #PF: supervisor write access in kernel mode [ 3116.052895] #PF: error_code(0x0002) - not-present page [ 3116.058636] PGD 100000067 P4D 100000067 PUD 1001f2067 PMD 10f82e067 PTE 0 [ 3116.066221] Oops: 0002 [#1] SMP NOPTI [ 3116.092948] RIP: 0010:iowrite32+0x9/0x76 [ 3116.190452] Call Trace: [ 3116.193185] [ 3116.195430] ? show_trace_log_lvl+0x1d6/0x2f9 [ 3116.200298] ? show_trace_log_lvl+0x1d6/0x2f9 [ 3116.205166] ? pdsc_adminq_isr+0x43/0x55 [pds_core] [ 3116.210618] ? __die_body.cold+0x8/0xa [ 3116.214806] ? page_fault_oops+0x16d/0x1ac [ 3116.219382] ? exc_page_fault+0xbe/0x13b [ 3116.223764] ? asm_exc_page_fault+0x22/0x27 [ 3116.228440] ? iowrite32+0x9/0x76 [ 3116.232143] pdsc_adminq_isr+0x43/0x55 [pds_core] [ 3116.237627] __handle_irq_event_percpu+0x3a/0x184 [ 3116.243088] handle_irq_event+0x57/0xb0 [ 3116.247575] handle_edge_irq+0x87/0x225 [ 3116.252062] __common_interrupt+0x3e/0xbc [ 3116.256740] common_interrupt+0x7b/0x98 [ 3116.261216] [ 3116.263745] [ 3116.266268] asm_common_interrupt+0x22/0x27 Reported-by: Joao Martins Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231113183257.71110-2-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/adminq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 045fe133f6ee..5beadabc2136 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -146,7 +146,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) } queue_work(pdsc->wq, &qcq->work); - pds_core_intr_mask(&pdsc->intr_ctrl[irq], PDS_CORE_INTR_MASK_CLEAR); + pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR); return IRQ_HANDLED; } From 7c02f6ae676a954216a192612040f9a0cde3adf7 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 13 Nov 2023 10:32:57 -0800 Subject: [PATCH 385/415] pds_core: fix up some format-truncation complaints Our friendly kernel test robot pointed out a couple of potential string truncation issues. None of which were we worried about, but can be relatively easily fixed to quiet the complaints. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310211736.66syyDpp-lkp@intel.com/ Fixes: 45d76f492938 ("pds_core: set up device and adminq") Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231113183257.71110-3-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/core.h | 2 +- drivers/net/ethernet/amd/pds_core/dev.c | 8 ++++++-- drivers/net/ethernet/amd/pds_core/devlink.c | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index f3a7deda9972..e35d3e7006bf 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -15,7 +15,7 @@ #define PDSC_DRV_DESCRIPTION "AMD/Pensando Core Driver" #define PDSC_WATCHDOG_SECS 5 -#define PDSC_QUEUE_NAME_MAX_SZ 32 +#define PDSC_QUEUE_NAME_MAX_SZ 16 #define PDSC_ADMINQ_MIN_LENGTH 16 /* must be a power of two */ #define PDSC_NOTIFYQ_LENGTH 64 /* must be a power of two */ #define PDSC_TEARDOWN_RECOVERY false diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index 7c1b965d61a9..31940b857e0e 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -261,10 +261,14 @@ static int pdsc_identify(struct pdsc *pdsc) struct pds_core_drv_identity drv = {}; size_t sz; int err; + int n; drv.drv_type = cpu_to_le32(PDS_DRIVER_LINUX); - snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str), - "%s %s", PDS_CORE_DRV_NAME, utsname()->release); + /* Catching the return quiets a Wformat-truncation complaint */ + n = snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str), + "%s %s", PDS_CORE_DRV_NAME, utsname()->release); + if (n > sizeof(drv.driver_ver_str)) + dev_dbg(pdsc->dev, "release name truncated, don't care\n"); /* Next let's get some info about the device * We use the devcmd_lock at this level in order to diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index 57f88c8b37de..e9948ea5bbcd 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -104,7 +104,7 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, struct pds_core_fw_list_info fw_list; struct pdsc *pdsc = devlink_priv(dl); union pds_core_dev_comp comp; - char buf[16]; + char buf[32]; int listlen; int err; int i; From 278a370c1766060d2144d6cf0b06c101e1043b6d Mon Sep 17 00:00:00 2001 From: Ziwei Xiao Date: Mon, 13 Nov 2023 16:41:44 -0800 Subject: [PATCH 386/415] gve: Fixes for napi_poll when budget is 0 Netpoll will explicilty pass the polling call with a budget of 0 to indicate it's clearing the Tx path only. For the gve_rx_poll and gve_xdp_poll, they were mistakenly taking the 0 budget as the indication to do all the work. Add check to avoid the rx path and xdp path being called when budget is 0. And also avoid napi_complete_done being called when budget is 0 for netpoll. Fixes: f5cedc84a30d ("gve: Add transmit and receive support") Signed-off-by: Ziwei Xiao Link: https://lore.kernel.org/r/20231114004144.2022268-1-ziweixiao@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_main.c | 8 +++++++- drivers/net/ethernet/google/gve/gve_rx.c | 4 ---- drivers/net/ethernet/google/gve/gve_tx.c | 4 ---- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 276f996f95dc..2d42e733837b 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -254,10 +254,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) if (block->tx) { if (block->tx->q_num < priv->tx_cfg.num_queues) reschedule |= gve_tx_poll(block, budget); - else + else if (budget) reschedule |= gve_xdp_poll(block, budget); } + if (!budget) + return 0; + if (block->rx) { work_done = gve_rx_poll(block, budget); reschedule |= work_done == budget; @@ -298,6 +301,9 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) if (block->tx) reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + if (!budget) + return 0; + if (block->rx) { work_done = gve_rx_poll_dqo(block, budget); reschedule |= work_done == budget; diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index e84a066aa1a4..73655347902d 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -1007,10 +1007,6 @@ int gve_rx_poll(struct gve_notify_block *block, int budget) feat = block->napi.dev->features; - /* If budget is 0, do all the work */ - if (budget == 0) - budget = INT_MAX; - if (budget > 0) work_done = gve_clean_rx_done(rx, budget, feat); diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 6957a865cff3..9f6ffc4a54f0 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -925,10 +925,6 @@ bool gve_xdp_poll(struct gve_notify_block *block, int budget) bool repoll; u32 to_do; - /* If budget is 0, do all the work */ - if (budget == 0) - budget = INT_MAX; - /* Find out how much work there is to be done */ nic_done = gve_tx_load_event_counter(priv, tx); to_do = min_t(u32, (nic_done - tx->done), budget); From 9fce92f050f448a0d1ddd9083ef967d9930f1e52 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 14 Nov 2023 00:16:13 +0100 Subject: [PATCH 387/415] mptcp: deal with large GSO size After the blamed commit below, the TCP sockets (and the MPTCP subflows) can build egress packets larger than 64K. That exceeds the maximum DSS data size, the length being misrepresent on the wire and the stream being corrupted, as later observed on the receiver: WARNING: CPU: 0 PID: 9696 at net/mptcp/protocol.c:705 __mptcp_move_skbs_from_subflow+0x2604/0x26e0 CPU: 0 PID: 9696 Comm: syz-executor.7 Not tainted 6.6.0-rc5-gcd8bdf563d46 #45 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 netlink: 8 bytes leftover after parsing attributes in process `syz-executor.4'. RIP: 0010:__mptcp_move_skbs_from_subflow+0x2604/0x26e0 net/mptcp/protocol.c:705 RSP: 0018:ffffc90000006e80 EFLAGS: 00010246 RAX: ffffffff83e9f674 RBX: ffff88802f45d870 RCX: ffff888102ad0000 netlink: 8 bytes leftover after parsing attributes in process `syz-executor.4'. RDX: 0000000080000303 RSI: 0000000000013908 RDI: 0000000000003908 RBP: ffffc90000007110 R08: ffffffff83e9e078 R09: 1ffff1100e548c8a R10: dffffc0000000000 R11: ffffed100e548c8b R12: 0000000000013908 R13: dffffc0000000000 R14: 0000000000003908 R15: 000000000031cf29 FS: 00007f239c47e700(0000) GS:ffff88811b200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f239c45cd78 CR3: 000000006a66c006 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600 PKRU: 55555554 Call Trace: mptcp_data_ready+0x263/0xac0 net/mptcp/protocol.c:819 subflow_data_ready+0x268/0x6d0 net/mptcp/subflow.c:1409 tcp_data_queue+0x21a1/0x7a60 net/ipv4/tcp_input.c:5151 tcp_rcv_established+0x950/0x1d90 net/ipv4/tcp_input.c:6098 tcp_v6_do_rcv+0x554/0x12f0 net/ipv6/tcp_ipv6.c:1483 tcp_v6_rcv+0x2e26/0x3810 net/ipv6/tcp_ipv6.c:1749 ip6_protocol_deliver_rcu+0xd6b/0x1ae0 net/ipv6/ip6_input.c:438 ip6_input+0x1c5/0x470 net/ipv6/ip6_input.c:483 ipv6_rcv+0xef/0x2c0 include/linux/netfilter.h:304 __netif_receive_skb+0x1ea/0x6a0 net/core/dev.c:5532 process_backlog+0x353/0x660 net/core/dev.c:5974 __napi_poll+0xc6/0x5a0 net/core/dev.c:6536 net_rx_action+0x6a0/0xfd0 net/core/dev.c:6603 __do_softirq+0x184/0x524 kernel/softirq.c:553 do_softirq+0xdd/0x130 kernel/softirq.c:454 Address the issue explicitly bounding the maximum GSO size to what MPTCP actually allows. Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/450 Fixes: 7c4e983c4f3c ("net: allow gso_max_size to exceed 65536") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-1-7b9cd6a7b7f4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a0b8356cd8c5..66e947054945 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1230,6 +1230,8 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk, mptcp_do_fallback(ssk); } +#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1)) + static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct mptcp_data_frag *dfrag, struct mptcp_sendmsg_info *info) @@ -1256,6 +1258,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, return -EAGAIN; /* compute send limit */ + if (unlikely(ssk->sk_gso_max_size > MPTCP_MAX_GSO_SIZE)) + ssk->sk_gso_max_size = MPTCP_MAX_GSO_SIZE; info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags); copy = info->size_goal; From d109a7767273d1706b541c22b83a0323823dfde4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 14 Nov 2023 00:16:14 +0100 Subject: [PATCH 388/415] mptcp: fix possible NULL pointer dereference on close After the blamed commit below, the MPTCP release callback can dereference the first subflow pointer via __mptcp_set_connected() and send buffer auto-tuning. Such pointer is always expected to be valid, except at socket destruction time, when the first subflow is deleted and the pointer zeroed. If the connect event is handled by the release callback while the msk socket is finally released, MPTCP hits the following splat: general protection fault, probably for non-canonical address 0xdffffc00000000f2: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000790-0x0000000000000797] CPU: 1 PID: 26719 Comm: syz-executor.2 Not tainted 6.6.0-syzkaller-10102-gff269e2cd5ad #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 RIP: 0010:mptcp_subflow_ctx net/mptcp/protocol.h:542 [inline] RIP: 0010:__mptcp_propagate_sndbuf net/mptcp/protocol.h:813 [inline] RIP: 0010:__mptcp_set_connected+0x57/0x3e0 net/mptcp/subflow.c:424 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8a62323c RDX: 00000000000000f2 RSI: ffffffff8a630116 RDI: 0000000000000790 RBP: ffff88803334b100 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000034 R12: ffff88803334b198 R13: ffff888054f0b018 R14: 0000000000000000 R15: ffff88803334b100 FS: 0000000000000000(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fbcb4f75198 CR3: 000000006afb5000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: mptcp_release_cb+0xa2c/0xc40 net/mptcp/protocol.c:3405 release_sock+0xba/0x1f0 net/core/sock.c:3537 mptcp_close+0x32/0xf0 net/mptcp/protocol.c:3084 inet_release+0x132/0x270 net/ipv4/af_inet.c:433 inet6_release+0x4f/0x70 net/ipv6/af_inet6.c:485 __sock_release+0xae/0x260 net/socket.c:659 sock_close+0x1c/0x20 net/socket.c:1419 __fput+0x270/0xbb0 fs/file_table.c:394 task_work_run+0x14d/0x240 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xa92/0x2a20 kernel/exit.c:876 do_group_exit+0xd4/0x2a0 kernel/exit.c:1026 get_signal+0x23ba/0x2790 kernel/signal.c:2900 arch_do_signal_or_restart+0x90/0x7f0 arch/x86/kernel/signal.c:309 exit_to_user_mode_loop kernel/entry/common.c:168 [inline] exit_to_user_mode_prepare+0x11f/0x240 kernel/entry/common.c:204 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x1d/0x60 kernel/entry/common.c:296 do_syscall_64+0x4b/0x110 arch/x86/entry/common.c:88 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7fb515e7cae9 Code: Unable to access opcode bytes at 0x7fb515e7cabf. RSP: 002b:00007fb516c560c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: 000000000000003c RBX: 00007fb515f9c120 RCX: 00007fb515e7cae9 RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000006 RBP: 00007fb515ec847a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000006e R14: 00007fb515f9c120 R15: 00007ffc631eb968 To avoid sparkling unneeded conditionals, address the issue explicitly checking msk->first only in the critical place. Fixes: 8005184fd1ca ("mptcp: refactor sndbuf auto-tuning") Cc: stable@vger.kernel.org Reported-by: Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/454 Reported-by: Eric Dumazet Closes: https://lore.kernel.org/netdev/CANn89iLZUA6S2a=K8GObnS62KK6Jt4B7PsAs7meMFooM8xaTgw@mail.gmail.com/ Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-2-7b9cd6a7b7f4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 66e947054945..bc81ea53a049 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3402,10 +3402,11 @@ static void mptcp_release_cb(struct sock *sk) if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) __mptcp_clean_una_wakeup(sk); if (unlikely(msk->cb_flags)) { - /* be sure to set the current sk state before tacking actions - * depending on sk_state, that is processing MPTCP_ERROR_REPORT + /* be sure to set the current sk state before taking actions + * depending on sk_state (MPTCP_ERROR_REPORT) + * On sk release avoid actions depending on the first subflow */ - if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags)) + if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags) && msk->first) __mptcp_set_connected(sk); if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) __mptcp_error_report(sk); From 8df220b29282e8b450ea57be62e1eccd4996837c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 14 Nov 2023 00:16:15 +0100 Subject: [PATCH 389/415] mptcp: add validity check for sending RM_ADDR This patch adds the validity check for sending RM_ADDRs for userspace PM in mptcp_pm_remove_addrs(), only send a RM_ADDR when the address is in the anno_list or conn_list. Fixes: 8b1c94da1e48 ("mptcp: only send RM_ADDR in nl_cmd_remove") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-3-7b9cd6a7b7f4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 1529ec358815..bf4d96f6f99a 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1515,8 +1515,9 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, rm_list, list) { - remove_anno_list_by_saddr(msk, &entry->addr); - if (alist.nr < MPTCP_RM_IDS_MAX) + if ((remove_anno_list_by_saddr(msk, &entry->addr) || + lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) && + alist.nr < MPTCP_RM_IDS_MAX) alist.ids[alist.nr++] = entry->addr.id; } From 7679d34f97b7a09fd565f5729f79fd61b7c55329 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 14 Nov 2023 00:16:16 +0100 Subject: [PATCH 390/415] mptcp: fix setsockopt(IP_TOS) subflow locking The MPTCP implementation of the IP_TOS socket option uses the lockless variant of the TOS manipulation helper and does not hold such lock at the helper invocation time. Add the required locking. Fixes: ffcacff87cd6 ("mptcp: Support for IP_TOS for MPTCP setsockopt()") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/457 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-4-7b9cd6a7b7f4@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/sockopt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 77f5e8932abf..353680733700 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -738,8 +738,11 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, val = READ_ONCE(inet_sk(sk)->tos); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow; + slow = lock_sock_fast(ssk); __ip_sock_set_tos(ssk, val); + unlock_sock_fast(ssk, slow); } release_sock(sk); From 7cefbe5e1dacc7236caa77e9d072423f21422fe2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 14 Nov 2023 00:16:17 +0100 Subject: [PATCH 391/415] selftests: mptcp: fix fastclose with csum failure Running the mp_join selftest manually with the following command line: ./mptcp_join.sh -z -C leads to some failures: 002 fastclose server test # ... rtx [fail] got 1 MP_RST[s] TX expected 0 # ... rstrx [fail] got 1 MP_RST[s] RX expected 0 The problem is really in the wrong expectations for the RST checks implied by the csum validation. Note that the same check is repeated explicitly in the same test-case, with the correct expectation and pass successfully. Address the issue explicitly setting the correct expectation for the failing checks. Reported-by: Xiumei Mu Fixes: 6bf41020b72b ("selftests: mptcp: update and extend fastclose test-cases") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-5-7b9cd6a7b7f4@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 75a2438efdf3..3c94f2f194d6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3240,7 +3240,7 @@ fastclose_tests() if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then test_linkfail=1024 fastclose=server \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 + chk_join_nr 0 0 0 0 0 0 1 chk_fclose_nr 1 1 invert chk_rst_nr 1 1 fi From 5dd9ad32d7758b1a76742f394acf0eb3ac8a636a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 27 Sep 2023 10:29:02 +0200 Subject: [PATCH 392/415] xen/events: drop xen_allocate_irqs_dynamic() Instead of having a common function for allocating a single IRQ or a consecutive number of IRQs, split up the functionality into the callers of xen_allocate_irqs_dynamic(). This allows to handle any allocation error in xen_irq_init() gracefully instead of panicing the system. Let xen_irq_init() return the irq_info pointer or NULL in case of an allocation error. Additionally set the IRQ into irq_info already at allocation time, as otherwise the IRQ would be '0' (which is a valid IRQ number) until being set. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 78 +++++++++++++++++++------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index a810e8904fbf..75f8d1dcbbb7 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -304,6 +304,13 @@ static void channels_on_cpu_inc(struct irq_info *info) info->is_accounted = 1; } +static void xen_irq_free_desc(unsigned int irq) +{ + /* Legacy IRQ descriptors are managed by the arch. */ + if (irq >= nr_legacy_irqs()) + irq_free_desc(irq); +} + static void delayed_free_irq(struct work_struct *work) { struct irq_info *info = container_of(to_rcu_work(work), struct irq_info, @@ -315,9 +322,7 @@ static void delayed_free_irq(struct work_struct *work) kfree(info); - /* Legacy IRQ descriptors are managed by the arch. */ - if (irq >= nr_legacy_irqs()) - irq_free_desc(irq); + xen_irq_free_desc(irq); } /* Constructors for packed IRQ information. */ @@ -332,7 +337,6 @@ static int xen_irq_info_common_setup(struct irq_info *info, BUG_ON(info->type != IRQT_UNBOUND && info->type != type); info->type = type; - info->irq = irq; info->evtchn = evtchn; info->cpu = cpu; info->mask_reason = EVT_MASK_REASON_EXPLICIT; @@ -733,45 +737,43 @@ void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) } EXPORT_SYMBOL_GPL(xen_irq_lateeoi); -static void xen_irq_init(unsigned irq) +static struct irq_info *xen_irq_init(unsigned int irq) { struct irq_info *info; info = kzalloc(sizeof(*info), GFP_KERNEL); - if (info == NULL) - panic("Unable to allocate metadata for IRQ%d\n", irq); + if (info) { + info->irq = irq; + info->type = IRQT_UNBOUND; + info->refcnt = -1; + INIT_RCU_WORK(&info->rwork, delayed_free_irq); - info->type = IRQT_UNBOUND; - info->refcnt = -1; - INIT_RCU_WORK(&info->rwork, delayed_free_irq); + set_info_for_irq(irq, info); + /* + * Interrupt affinity setting can be immediate. No point + * in delaying it until an interrupt is handled. + */ + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - set_info_for_irq(irq, info); - /* - * Interrupt affinity setting can be immediate. No point - * in delaying it until an interrupt is handled. - */ - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - - INIT_LIST_HEAD(&info->eoi_list); - list_add_tail(&info->list, &xen_irq_list_head); -} - -static int __must_check xen_allocate_irqs_dynamic(int nvec) -{ - int i, irq = irq_alloc_descs(-1, 0, nvec, -1); - - if (irq >= 0) { - for (i = 0; i < nvec; i++) - xen_irq_init(irq + i); + INIT_LIST_HEAD(&info->eoi_list); + list_add_tail(&info->list, &xen_irq_list_head); } - return irq; + return info; } static inline int __must_check xen_allocate_irq_dynamic(void) { + int irq = irq_alloc_desc_from(0, -1); - return xen_allocate_irqs_dynamic(1); + if (irq >= 0) { + if (!xen_irq_init(irq)) { + xen_irq_free_desc(irq); + irq = -1; + } + } + + return irq; } static int __must_check xen_allocate_irq_gsi(unsigned gsi) @@ -793,7 +795,10 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) else irq = irq_alloc_desc_at(gsi, -1); - xen_irq_init(irq); + if (!xen_irq_init(irq)) { + xen_irq_free_desc(irq); + irq = -1; + } return irq; } @@ -963,6 +968,11 @@ static void __unbind_from_irq(unsigned int irq) evtchn_port_t evtchn = evtchn_from_irq(irq); struct irq_info *info = info_for_irq(irq); + if (!info) { + xen_irq_free_desc(irq); + return; + } + if (info->refcnt > 0) { info->refcnt--; if (info->refcnt != 0) @@ -1101,11 +1111,14 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, mutex_lock(&irq_mapping_update_lock); - irq = xen_allocate_irqs_dynamic(nvec); + irq = irq_alloc_descs(-1, 0, nvec, -1); if (irq < 0) goto out; for (i = 0; i < nvec; i++) { + if (!xen_irq_init(irq + i)) + goto error_irq; + irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name); ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid, @@ -1730,6 +1743,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) so there should be a proper type */ BUG_ON(info->type == IRQT_UNBOUND); + info->irq = irq; (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL); mutex_unlock(&irq_mapping_update_lock); From 3fcdaf3d7634338c3f5cbfa7451eb0b6b0024844 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 28 Sep 2023 09:09:52 +0200 Subject: [PATCH 393/415] xen/events: modify internal [un]bind interfaces Modify the internal bind- and unbind-interfaces to take a struct irq_info parameter. When allocating a new IRQ pass the pointer from the allocating function further up. This will reduce the number of info_for_irq() calls and make the code more efficient. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 259 +++++++++++++++---------------- 1 file changed, 124 insertions(+), 135 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 75f8d1dcbbb7..88f0c80d0f87 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -327,7 +327,6 @@ static void delayed_free_irq(struct work_struct *work) /* Constructors for packed IRQ information. */ static int xen_irq_info_common_setup(struct irq_info *info, - unsigned irq, enum xen_irq_type type, evtchn_port_t evtchn, unsigned short cpu) @@ -342,23 +341,22 @@ static int xen_irq_info_common_setup(struct irq_info *info, info->mask_reason = EVT_MASK_REASON_EXPLICIT; raw_spin_lock_init(&info->lock); - ret = set_evtchn_to_irq(evtchn, irq); + ret = set_evtchn_to_irq(evtchn, info->irq); if (ret < 0) return ret; - irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); + irq_clear_status_flags(info->irq, IRQ_NOREQUEST | IRQ_NOAUTOEN); return xen_evtchn_port_setup(evtchn); } -static int xen_irq_info_evtchn_setup(unsigned irq, +static int xen_irq_info_evtchn_setup(struct irq_info *info, evtchn_port_t evtchn, struct xenbus_device *dev) { - struct irq_info *info = info_for_irq(irq); int ret; - ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0); + ret = xen_irq_info_common_setup(info, IRQT_EVTCHN, evtchn, 0); info->u.interdomain = dev; if (dev) atomic_inc(&dev->event_channels); @@ -366,50 +364,37 @@ static int xen_irq_info_evtchn_setup(unsigned irq, return ret; } -static int xen_irq_info_ipi_setup(unsigned cpu, - unsigned irq, - evtchn_port_t evtchn, - enum ipi_vector ipi) +static int xen_irq_info_ipi_setup(struct irq_info *info, unsigned int cpu, + evtchn_port_t evtchn, enum ipi_vector ipi) { - struct irq_info *info = info_for_irq(irq); - info->u.ipi = ipi; - per_cpu(ipi_to_irq, cpu)[ipi] = irq; + per_cpu(ipi_to_irq, cpu)[ipi] = info->irq; per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn; - return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0); + return xen_irq_info_common_setup(info, IRQT_IPI, evtchn, 0); } -static int xen_irq_info_virq_setup(unsigned cpu, - unsigned irq, - evtchn_port_t evtchn, - unsigned virq) +static int xen_irq_info_virq_setup(struct irq_info *info, unsigned int cpu, + evtchn_port_t evtchn, unsigned int virq) { - struct irq_info *info = info_for_irq(irq); - info->u.virq = virq; - per_cpu(virq_to_irq, cpu)[virq] = irq; + per_cpu(virq_to_irq, cpu)[virq] = info->irq; - return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0); + return xen_irq_info_common_setup(info, IRQT_VIRQ, evtchn, 0); } -static int xen_irq_info_pirq_setup(unsigned irq, - evtchn_port_t evtchn, - unsigned pirq, - unsigned gsi, - uint16_t domid, - unsigned char flags) +static int xen_irq_info_pirq_setup(struct irq_info *info, evtchn_port_t evtchn, + unsigned int pirq, unsigned int gsi, + uint16_t domid, unsigned char flags) { - struct irq_info *info = info_for_irq(irq); - info->u.pirq.pirq = pirq; info->u.pirq.gsi = gsi; info->u.pirq.domid = domid; info->u.pirq.flags = flags; - return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0); + return xen_irq_info_common_setup(info, IRQT_PIRQ, evtchn, 0); } static void xen_irq_info_cleanup(struct irq_info *info) @@ -453,20 +438,16 @@ int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq, return irq; } -static enum ipi_vector ipi_from_irq(unsigned irq) +static enum ipi_vector ipi_from_irq(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); - BUG_ON(info == NULL); BUG_ON(info->type != IRQT_IPI); return info->u.ipi; } -static unsigned virq_from_irq(unsigned irq) +static unsigned int virq_from_irq(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); - BUG_ON(info == NULL); BUG_ON(info->type != IRQT_VIRQ); @@ -533,13 +514,9 @@ static bool pirq_needs_eoi_flag(unsigned irq) return info->u.pirq.flags & PIRQ_NEEDS_EOI; } -static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, +static void bind_evtchn_to_cpu(struct irq_info *info, unsigned int cpu, bool force_affinity) { - struct irq_info *info = evtchn_to_info(evtchn); - - BUG_ON(info == NULL); - if (IS_ENABLED(CONFIG_SMP) && force_affinity) { struct irq_data *data = irq_get_irq_data(info->irq); @@ -547,7 +524,7 @@ static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, irq_data_update_effective_affinity(data, cpumask_of(cpu)); } - xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu); + xen_evtchn_port_bind_to_cpu(info->evtchn, cpu, info->cpu); channels_on_cpu_dec(info); info->cpu = cpu; @@ -762,23 +739,24 @@ static struct irq_info *xen_irq_init(unsigned int irq) return info; } -static inline int __must_check xen_allocate_irq_dynamic(void) +static struct irq_info *xen_allocate_irq_dynamic(void) { int irq = irq_alloc_desc_from(0, -1); + struct irq_info *info = NULL; if (irq >= 0) { - if (!xen_irq_init(irq)) { + info = xen_irq_init(irq); + if (!info) xen_irq_free_desc(irq); - irq = -1; - } } - return irq; + return info; } -static int __must_check xen_allocate_irq_gsi(unsigned gsi) +static struct irq_info *xen_allocate_irq_gsi(unsigned int gsi) { int irq; + struct irq_info *info; /* * A PV guest has no concept of a GSI (since it has no ACPI @@ -795,18 +773,15 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) else irq = irq_alloc_desc_at(gsi, -1); - if (!xen_irq_init(irq)) { + info = xen_irq_init(irq); + if (!info) xen_irq_free_desc(irq); - irq = -1; - } - return irq; + return info; } -static void xen_free_irq(unsigned irq) +static void xen_free_irq(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); - if (WARN_ON(!info)) return; @@ -897,7 +872,7 @@ static unsigned int __startup_pirq(unsigned int irq) goto err; info->evtchn = evtchn; - bind_evtchn_to_cpu(evtchn, 0, false); + bind_evtchn_to_cpu(info, 0, false); rc = xen_evtchn_port_setup(evtchn); if (rc) @@ -963,10 +938,9 @@ int xen_irq_from_gsi(unsigned gsi) } EXPORT_SYMBOL_GPL(xen_irq_from_gsi); -static void __unbind_from_irq(unsigned int irq) +static void __unbind_from_irq(struct irq_info *info, unsigned int irq) { - evtchn_port_t evtchn = evtchn_from_irq(irq); - struct irq_info *info = info_for_irq(irq); + evtchn_port_t evtchn; if (!info) { xen_irq_free_desc(irq); @@ -979,6 +953,8 @@ static void __unbind_from_irq(unsigned int irq) return; } + evtchn = info->evtchn; + if (VALID_EVTCHN(evtchn)) { unsigned int cpu = info->cpu; struct xenbus_device *dev; @@ -988,11 +964,11 @@ static void __unbind_from_irq(unsigned int irq) switch (info->type) { case IRQT_VIRQ: - per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; + per_cpu(virq_to_irq, cpu)[virq_from_irq(info)] = -1; break; case IRQT_IPI: - per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1; - per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(irq)] = 0; + per_cpu(ipi_to_irq, cpu)[ipi_from_irq(info)] = -1; + per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(info)] = 0; break; case IRQT_EVTCHN: dev = info->u.interdomain; @@ -1006,7 +982,7 @@ static void __unbind_from_irq(unsigned int irq) xen_irq_info_cleanup(info); } - xen_free_irq(irq); + xen_free_irq(info); } /* @@ -1022,24 +998,24 @@ static void __unbind_from_irq(unsigned int irq) int xen_bind_pirq_gsi_to_irq(unsigned gsi, unsigned pirq, int shareable, char *name) { - int irq; + struct irq_info *info; struct physdev_irq irq_op; int ret; mutex_lock(&irq_mapping_update_lock); - irq = xen_irq_from_gsi(gsi); - if (irq != -1) { + ret = xen_irq_from_gsi(gsi); + if (ret != -1) { pr_info("%s: returning irq %d for gsi %u\n", - __func__, irq, gsi); + __func__, ret, gsi); goto out; } - irq = xen_allocate_irq_gsi(gsi); - if (irq < 0) + info = xen_allocate_irq_gsi(gsi); + if (!info) goto out; - irq_op.irq = irq; + irq_op.irq = info->irq; irq_op.vector = 0; /* Only the privileged domain can do this. For non-priv, the pcifront @@ -1047,20 +1023,19 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, * this in the priv domain. */ if (xen_initial_domain() && HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { - xen_free_irq(irq); - irq = -ENOSPC; + xen_free_irq(info); + ret = -ENOSPC; goto out; } - ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF, + ret = xen_irq_info_pirq_setup(info, 0, pirq, gsi, DOMID_SELF, shareable ? PIRQ_SHAREABLE : 0); if (ret < 0) { - __unbind_from_irq(irq); - irq = ret; + __unbind_from_irq(info, info->irq); goto out; } - pirq_query_unmask(irq); + pirq_query_unmask(info->irq); /* We try to use the handler with the appropriate semantic for the * type of interrupt: if the interrupt is an edge triggered * interrupt we use handle_edge_irq. @@ -1077,16 +1052,18 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, * is the right choice either way. */ if (shareable) - irq_set_chip_and_handler_name(irq, &xen_pirq_chip, + irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip, handle_fasteoi_irq, name); else - irq_set_chip_and_handler_name(irq, &xen_pirq_chip, + irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip, handle_edge_irq, name); + ret = info->irq; + out: mutex_unlock(&irq_mapping_update_lock); - return irq; + return ret; } #ifdef CONFIG_PCI_MSI @@ -1108,6 +1085,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, int pirq, int nvec, const char *name, domid_t domid) { int i, irq, ret; + struct irq_info *info; mutex_lock(&irq_mapping_update_lock); @@ -1116,12 +1094,13 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, goto out; for (i = 0; i < nvec; i++) { - if (!xen_irq_init(irq + i)) + info = xen_irq_init(irq + i); + if (!info) goto error_irq; irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name); - ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid, + ret = xen_irq_info_pirq_setup(info, 0, pirq + i, 0, domid, i == 0 ? 0 : PIRQ_MSI_GROUP); if (ret < 0) goto error_irq; @@ -1133,9 +1112,12 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, out: mutex_unlock(&irq_mapping_update_lock); return irq; + error_irq: - while (nvec--) - __unbind_from_irq(irq + nvec); + while (nvec--) { + info = info_for_irq(irq + nvec); + __unbind_from_irq(info, irq + nvec); + } mutex_unlock(&irq_mapping_update_lock); return ret; } @@ -1171,7 +1153,7 @@ int xen_destroy_irq(int irq) } } - xen_free_irq(irq); + xen_free_irq(info); out: mutex_unlock(&irq_mapping_update_lock); @@ -1187,8 +1169,7 @@ EXPORT_SYMBOL_GPL(xen_pirq_from_irq); static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, struct xenbus_device *dev) { - int irq; - int ret; + int ret = -ENOMEM; struct irq_info *info; if (evtchn >= xen_evtchn_max_channels()) @@ -1199,17 +1180,16 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, info = evtchn_to_info(evtchn); if (!info) { - irq = xen_allocate_irq_dynamic(); - if (irq < 0) + info = xen_allocate_irq_dynamic(); + if (!info) goto out; - irq_set_chip_and_handler_name(irq, chip, + irq_set_chip_and_handler_name(info->irq, chip, handle_edge_irq, "event"); - ret = xen_irq_info_evtchn_setup(irq, evtchn, dev); + ret = xen_irq_info_evtchn_setup(info, evtchn, dev); if (ret < 0) { - __unbind_from_irq(irq); - irq = ret; + __unbind_from_irq(info, info->irq); goto out; } /* @@ -1219,17 +1199,17 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, * affinity setting is not invoked on them so nothing would * bind the channel. */ - bind_evtchn_to_cpu(evtchn, 0, false); - } else { - if (!WARN_ON(info->type != IRQT_EVTCHN)) - info->refcnt++; - irq = info->irq; + bind_evtchn_to_cpu(info, 0, false); + } else if (!WARN_ON(info->type != IRQT_EVTCHN)) { + info->refcnt++; } + ret = info->irq; + out: mutex_unlock(&irq_mapping_update_lock); - return irq; + return ret; } int bind_evtchn_to_irq(evtchn_port_t evtchn) @@ -1248,18 +1228,19 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; evtchn_port_t evtchn; - int ret, irq; + struct irq_info *info; + int ret; mutex_lock(&irq_mapping_update_lock); - irq = per_cpu(ipi_to_irq, cpu)[ipi]; + ret = per_cpu(ipi_to_irq, cpu)[ipi]; - if (irq == -1) { - irq = xen_allocate_irq_dynamic(); - if (irq < 0) + if (ret == -1) { + info = xen_allocate_irq_dynamic(); + if (!info) goto out; - irq_set_chip_and_handler_name(irq, &xen_percpu_chip, + irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip, handle_percpu_irq, "ipi"); bind_ipi.vcpu = xen_vcpu_nr(cpu); @@ -1268,25 +1249,25 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) BUG(); evtchn = bind_ipi.port; - ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); + ret = xen_irq_info_ipi_setup(info, cpu, evtchn, ipi); if (ret < 0) { - __unbind_from_irq(irq); - irq = ret; + __unbind_from_irq(info, info->irq); goto out; } /* * Force the affinity mask to the target CPU so proc shows * the correct target. */ - bind_evtchn_to_cpu(evtchn, cpu, true); + bind_evtchn_to_cpu(info, cpu, true); + ret = info->irq; } else { - struct irq_info *info = info_for_irq(irq); + info = info_for_irq(ret); WARN_ON(info == NULL || info->type != IRQT_IPI); } out: mutex_unlock(&irq_mapping_update_lock); - return irq; + return ret; } static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev, @@ -1354,22 +1335,23 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) { struct evtchn_bind_virq bind_virq; evtchn_port_t evtchn = 0; - int irq, ret; + struct irq_info *info; + int ret; mutex_lock(&irq_mapping_update_lock); - irq = per_cpu(virq_to_irq, cpu)[virq]; + ret = per_cpu(virq_to_irq, cpu)[virq]; - if (irq == -1) { - irq = xen_allocate_irq_dynamic(); - if (irq < 0) + if (ret == -1) { + info = xen_allocate_irq_dynamic(); + if (!info) goto out; if (percpu) - irq_set_chip_and_handler_name(irq, &xen_percpu_chip, + irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip, handle_percpu_irq, "virq"); else - irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, + irq_set_chip_and_handler_name(info->irq, &xen_dynamic_chip, handle_edge_irq, "virq"); bind_virq.virq = virq; @@ -1384,10 +1366,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) BUG_ON(ret < 0); } - ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq); + ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq); if (ret < 0) { - __unbind_from_irq(irq); - irq = ret; + __unbind_from_irq(info, info->irq); goto out; } @@ -1395,22 +1376,26 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) * Force the affinity mask for percpu interrupts so proc * shows the correct target. */ - bind_evtchn_to_cpu(evtchn, cpu, percpu); + bind_evtchn_to_cpu(info, cpu, percpu); + ret = info->irq; } else { - struct irq_info *info = info_for_irq(irq); + info = info_for_irq(ret); WARN_ON(info == NULL || info->type != IRQT_VIRQ); } out: mutex_unlock(&irq_mapping_update_lock); - return irq; + return ret; } static void unbind_from_irq(unsigned int irq) { + struct irq_info *info; + mutex_lock(&irq_mapping_update_lock); - __unbind_from_irq(irq); + info = info_for_irq(irq); + __unbind_from_irq(info, irq); mutex_unlock(&irq_mapping_update_lock); } @@ -1744,11 +1729,11 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) BUG_ON(info->type == IRQT_UNBOUND); info->irq = irq; - (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL); + (void)xen_irq_info_evtchn_setup(info, evtchn, NULL); mutex_unlock(&irq_mapping_update_lock); - bind_evtchn_to_cpu(evtchn, info->cpu, false); + bind_evtchn_to_cpu(info, info->cpu, false); /* Unmask the event channel. */ enable_irq(irq); @@ -1782,7 +1767,7 @@ static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu) * it, but don't do the xenlinux-level rebind in that case. */ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) - bind_evtchn_to_cpu(evtchn, tcpu, false); + bind_evtchn_to_cpu(info, tcpu, false); do_unmask(info, EVT_MASK_REASON_TEMPORARY); @@ -1933,7 +1918,7 @@ static void restore_pirqs(void) if (rc) { pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n", gsi, irq, pirq, rc); - xen_free_irq(irq); + xen_free_irq(info); continue; } @@ -1947,13 +1932,15 @@ static void restore_cpu_virqs(unsigned int cpu) { struct evtchn_bind_virq bind_virq; evtchn_port_t evtchn; + struct irq_info *info; int virq, irq; for (virq = 0; virq < NR_VIRQS; virq++) { if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) continue; + info = info_for_irq(irq); - BUG_ON(virq_from_irq(irq) != virq); + BUG_ON(virq_from_irq(info) != virq); /* Get a new binding from Xen. */ bind_virq.virq = virq; @@ -1964,9 +1951,9 @@ static void restore_cpu_virqs(unsigned int cpu) evtchn = bind_virq.port; /* Record the new mapping. */ - (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq); + xen_irq_info_virq_setup(info, cpu, evtchn, virq); /* The affinity mask is still valid */ - bind_evtchn_to_cpu(evtchn, cpu, false); + bind_evtchn_to_cpu(info, cpu, false); } } @@ -1974,13 +1961,15 @@ static void restore_cpu_ipis(unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; evtchn_port_t evtchn; + struct irq_info *info; int ipi, irq; for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) continue; + info = info_for_irq(irq); - BUG_ON(ipi_from_irq(irq) != ipi); + BUG_ON(ipi_from_irq(info) != ipi); /* Get a new binding from Xen. */ bind_ipi.vcpu = xen_vcpu_nr(cpu); @@ -1990,9 +1979,9 @@ static void restore_cpu_ipis(unsigned int cpu) evtchn = bind_ipi.port; /* Record the new mapping. */ - (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); + xen_irq_info_ipi_setup(info, cpu, evtchn, ipi); /* The affinity mask is still valid */ - bind_evtchn_to_cpu(evtchn, cpu, false); + bind_evtchn_to_cpu(info, cpu, false); } } From cee96422e863f0b0e9d3d0c2d617271ef2255858 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 28 Sep 2023 11:25:32 +0200 Subject: [PATCH 394/415] xen/events: remove some info_for_irq() calls in pirq handling Instead of the IRQ number user the struct irq_info pointer as parameter in the internal pirq related functions. This allows to drop some calls of info_for_irq(). Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 129 ++++++++++++++++++------------- 1 file changed, 74 insertions(+), 55 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 88f0c80d0f87..f5edb9e27e3c 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -174,7 +174,7 @@ static int **evtchn_to_irq; #ifdef CONFIG_X86 static unsigned long *pirq_eoi_map; #endif -static bool (*pirq_needs_eoi)(unsigned irq); +static bool (*pirq_needs_eoi)(struct irq_info *info); #define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq))) #define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq))) @@ -190,7 +190,6 @@ static struct irq_chip xen_lateeoi_chip; static struct irq_chip xen_percpu_chip; static struct irq_chip xen_pirq_chip; static void enable_dynirq(struct irq_data *data); -static void disable_dynirq(struct irq_data *data); static DEFINE_PER_CPU(unsigned int, irq_epoch); @@ -454,10 +453,8 @@ static unsigned int virq_from_irq(struct irq_info *info) return info->u.virq; } -static unsigned pirq_from_irq(unsigned irq) +static unsigned int pirq_from_irq(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); - BUG_ON(info == NULL); BUG_ON(info->type != IRQT_PIRQ); @@ -500,15 +497,14 @@ static void do_unmask(struct irq_info *info, u8 reason) } #ifdef CONFIG_X86 -static bool pirq_check_eoi_map(unsigned irq) +static bool pirq_check_eoi_map(struct irq_info *info) { - return test_bit(pirq_from_irq(irq), pirq_eoi_map); + return test_bit(pirq_from_irq(info), pirq_eoi_map); } #endif -static bool pirq_needs_eoi_flag(unsigned irq) +static bool pirq_needs_eoi_flag(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); BUG_ON(info->type != IRQT_PIRQ); return info->u.pirq.flags & PIRQ_NEEDS_EOI; @@ -802,14 +798,11 @@ static void event_handler_exit(struct irq_info *info) clear_evtchn(info->evtchn); } -static void pirq_query_unmask(int irq) +static void pirq_query_unmask(struct irq_info *info) { struct physdev_irq_status_query irq_status; - struct irq_info *info = info_for_irq(irq); - BUG_ON(info->type != IRQT_PIRQ); - - irq_status.irq = pirq_from_irq(irq); + irq_status.irq = pirq_from_irq(info); if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) irq_status.flags = 0; @@ -818,56 +811,76 @@ static void pirq_query_unmask(int irq) info->u.pirq.flags |= PIRQ_NEEDS_EOI; } -static void eoi_pirq(struct irq_data *data) +static void do_eoi_pirq(struct irq_info *info) { - struct irq_info *info = info_for_irq(data->irq); - evtchn_port_t evtchn = info ? info->evtchn : 0; - struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; + struct physdev_eoi eoi = { .irq = pirq_from_irq(info) }; int rc = 0; - if (!VALID_EVTCHN(evtchn)) + if (!VALID_EVTCHN(info->evtchn)) return; event_handler_exit(info); - if (pirq_needs_eoi(data->irq)) { + if (pirq_needs_eoi(info)) { rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); WARN_ON(rc); } } -static void mask_ack_pirq(struct irq_data *data) +static void eoi_pirq(struct irq_data *data) { - disable_dynirq(data); - eoi_pirq(data); + struct irq_info *info = info_for_irq(data->irq); + + do_eoi_pirq(info); } -static unsigned int __startup_pirq(unsigned int irq) +static void do_disable_dynirq(struct irq_info *info) +{ + if (VALID_EVTCHN(info->evtchn)) + do_mask(info, EVT_MASK_REASON_EXPLICIT); +} + +static void disable_dynirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + + if (info) + do_disable_dynirq(info); +} + +static void mask_ack_pirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + + if (info) { + do_disable_dynirq(info); + do_eoi_pirq(info); + } +} + +static unsigned int __startup_pirq(struct irq_info *info) { struct evtchn_bind_pirq bind_pirq; - struct irq_info *info = info_for_irq(irq); - evtchn_port_t evtchn = evtchn_from_irq(irq); + evtchn_port_t evtchn = info->evtchn; int rc; - BUG_ON(info->type != IRQT_PIRQ); - if (VALID_EVTCHN(evtchn)) goto out; - bind_pirq.pirq = pirq_from_irq(irq); + bind_pirq.pirq = pirq_from_irq(info); /* NB. We are happy to share unless we are probing. */ bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? BIND_PIRQ__WILL_SHARE : 0; rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq); if (rc != 0) { - pr_warn("Failed to obtain physical IRQ %d\n", irq); + pr_warn("Failed to obtain physical IRQ %d\n", info->irq); return 0; } evtchn = bind_pirq.port; - pirq_query_unmask(irq); + pirq_query_unmask(info); - rc = set_evtchn_to_irq(evtchn, irq); + rc = set_evtchn_to_irq(evtchn, info->irq); if (rc) goto err; @@ -881,26 +894,28 @@ static unsigned int __startup_pirq(unsigned int irq) out: do_unmask(info, EVT_MASK_REASON_EXPLICIT); - eoi_pirq(irq_get_irq_data(irq)); + do_eoi_pirq(info); return 0; err: - pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc); + pr_err("irq%d: Failed to set port to irq mapping (%d)\n", info->irq, + rc); xen_evtchn_close(evtchn); return 0; } static unsigned int startup_pirq(struct irq_data *data) { - return __startup_pirq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + + return __startup_pirq(info); } static void shutdown_pirq(struct irq_data *data) { - unsigned int irq = data->irq; - struct irq_info *info = info_for_irq(irq); - evtchn_port_t evtchn = evtchn_from_irq(irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info->evtchn; BUG_ON(info->type != IRQT_PIRQ); @@ -1035,7 +1050,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, goto out; } - pirq_query_unmask(info->irq); + pirq_query_unmask(info); /* We try to use the handler with the appropriate semantic for the * type of interrupt: if the interrupt is an edge triggered * interrupt we use handle_edge_irq. @@ -1162,7 +1177,9 @@ int xen_destroy_irq(int irq) int xen_pirq_from_irq(unsigned irq) { - return pirq_from_irq(irq); + struct irq_info *info = info_for_irq(irq); + + return pirq_from_irq(info); } EXPORT_SYMBOL_GPL(xen_pirq_from_irq); @@ -1824,28 +1841,30 @@ static void enable_dynirq(struct irq_data *data) do_unmask(info, EVT_MASK_REASON_EXPLICIT); } -static void disable_dynirq(struct irq_data *data) +static void do_ack_dynirq(struct irq_info *info) { - struct irq_info *info = info_for_irq(data->irq); - evtchn_port_t evtchn = info ? info->evtchn : 0; - - if (VALID_EVTCHN(evtchn)) - do_mask(info, EVT_MASK_REASON_EXPLICIT); -} - -static void ack_dynirq(struct irq_data *data) -{ - struct irq_info *info = info_for_irq(data->irq); - evtchn_port_t evtchn = info ? info->evtchn : 0; + evtchn_port_t evtchn = info->evtchn; if (VALID_EVTCHN(evtchn)) event_handler_exit(info); } +static void ack_dynirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + + if (info) + do_ack_dynirq(info); +} + static void mask_ack_dynirq(struct irq_data *data) { - disable_dynirq(data); - ack_dynirq(data); + struct irq_info *info = info_for_irq(data->irq); + + if (info) { + do_disable_dynirq(info); + do_ack_dynirq(info); + } } static void lateeoi_ack_dynirq(struct irq_data *data) @@ -1924,7 +1943,7 @@ static void restore_pirqs(void) printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); - __startup_pirq(irq); + __startup_pirq(info); } } From 9e2e7efbbbff69d8340abb56d375dd79d1f5770f Mon Sep 17 00:00:00 2001 From: Johnathan Mantey Date: Mon, 13 Nov 2023 08:30:29 -0800 Subject: [PATCH 395/415] Revert ncsi: Propagate carrier gain/loss events to the NCSI controller This reverts commit 3780bb29311eccb7a1c9641032a112eed237f7e3. The cited commit introduced unwanted behavior. The intent for the commit was to be able to detect carrier loss/gain for just the NIC connected to the BMC. The unwanted effect is a carrier loss for auxiliary paths also causes the BMC to lose carrier. The BMC never regains carrier despite the secondary NIC regaining a link. This change, when merged, needs to be backported to stable kernels. 5.4-stable, 5.10-stable, 5.15-stable, 6.1-stable, 6.5-stable Fixes: 3780bb29311e ("ncsi: Propagate carrier gain/loss events to the NCSI controller") CC: stable@vger.kernel.org Signed-off-by: Johnathan Mantey Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ncsi/ncsi-aen.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index f8854bff286c..62fb1031763d 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -89,11 +89,6 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, if ((had_link == has_link) || chained) return 0; - if (had_link) - netif_carrier_off(ndp->ndev.dev); - else - netif_carrier_on(ndp->ndev.dev); - if (!ndp->multi_package && !nc->package->multi_channel) { if (had_link) { ndp->flags |= NCSI_DEV_RESHUFFLE; From efc0c8363bc6dab2cd540acc886e6097deee8bb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Mon, 13 Nov 2023 17:44:12 +0100 Subject: [PATCH 396/415] dt-bindings: net: ethernet-controller: Fix formatting error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When moving the *-internal-delay-ps properties to only apply for RGMII interface modes there where a typo in the text formatting. Signed-off-by: Niklas Söderlund Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- .../devicetree/bindings/net/ethernet-controller.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 9f6a5ccbcefe..d14d123ad7a0 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -275,12 +275,12 @@ allOf: properties: rx-internal-delay-ps: description: - RGMII Receive Clock Delay defined in pico seconds.This is used for + RGMII Receive Clock Delay defined in pico seconds. This is used for controllers that have configurable RX internal delays. If this property is present then the MAC applies the RX delay. tx-internal-delay-ps: description: - RGMII Transmit Clock Delay defined in pico seconds.This is used for + RGMII Transmit Clock Delay defined in pico seconds. This is used for controllers that have configurable TX internal delays. If this property is present then the MAC applies the TX delay. From 674e318089468ece99aef4796eaef7add57f36b2 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 14 Nov 2023 09:56:18 +0200 Subject: [PATCH 397/415] net: Fix undefined behavior in netdev name allocation Cited commit removed the strscpy() call and kept the snprintf() only. It is common to use 'dev->name' as the format string before a netdev is registered, this results in 'res' and 'name' pointers being equal. According to POSIX, if copying takes place between objects that overlap as a result of a call to sprintf() or snprintf(), the results are undefined. Add back the strscpy() and use 'buf' as an intermediate buffer. Fixes: 7ad17b04dc7b ("net: trust the bitmap in __dev_alloc_name()") Cc: Jakub Kicinski Reviewed-by: Vlad Buslov Signed-off-by: Gal Pressman Reviewed-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 0d548431f3fa..af53f6d838ce 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1119,7 +1119,9 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res) if (i == max_netdevices) return -ENFILE; - snprintf(res, IFNAMSIZ, name, i); + /* 'res' and 'name' could overlap, use 'buf' as an intermediate buffer */ + strscpy(buf, name, IFNAMSIZ); + snprintf(res, IFNAMSIZ, buf, i); return i; } From 1fda5bb66ad8fb24ecb3858e61a13a6548428898 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 10 Nov 2023 17:39:28 -0800 Subject: [PATCH 398/415] bpf: Do not allocate percpu memory at init stage Kirill Shutemov reported significant percpu memory consumption increase after booting in 288-cpu VM ([1]) due to commit 41a5db8d8161 ("bpf: Add support for non-fix-size percpu mem allocation"). The percpu memory consumption is increased from 111MB to 969MB. The number is from /proc/meminfo. I tried to reproduce the issue with my local VM which at most supports upto 255 cpus. With 252 cpus, without the above commit, the percpu memory consumption immediately after boot is 57MB while with the above commit the percpu memory consumption is 231MB. This is not good since so far percpu memory from bpf memory allocator is not widely used yet. Let us change pre-allocation in init stage to on-demand allocation when verifier detects there is a need of percpu memory for bpf program. With this change, percpu memory consumption after boot can be reduced signicantly. [1] https://lore.kernel.org/lkml/20231109154934.4saimljtqx625l3v@box.shutemov.name/ Fixes: 41a5db8d8161 ("bpf: Add support for non-fix-size percpu mem allocation") Reported-and-tested-by: Kirill A. Shutemov Signed-off-by: Yonghong Song Acked-by: Hou Tao Link: https://lore.kernel.org/r/20231111013928.948838-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- kernel/bpf/core.c | 8 +++----- kernel/bpf/verifier.c | 20 ++++++++++++++++++-- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 35bff17396c0..6762dac3ef76 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -56,7 +56,7 @@ extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; -extern bool bpf_global_ma_set, bpf_global_percpu_ma_set; +extern bool bpf_global_ma_set; typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 08626b519ce2..cd3afe57ece3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -64,8 +64,8 @@ #define OFF insn->off #define IMM insn->imm -struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; -bool bpf_global_ma_set, bpf_global_percpu_ma_set; +struct bpf_mem_alloc bpf_global_ma; +bool bpf_global_ma_set; /* No hurry in this branch * @@ -2934,9 +2934,7 @@ static int __init bpf_global_ma_init(void) ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false); bpf_global_ma_set = !ret; - ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true); - bpf_global_percpu_ma_set = !ret; - return !bpf_global_ma_set || !bpf_global_percpu_ma_set; + return ret; } late_initcall(bpf_global_ma_init); #endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a2267d5ed14e..6da370a047fe 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "disasm.h" @@ -41,6 +42,9 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { #undef BPF_LINK_TYPE }; +struct bpf_mem_alloc bpf_global_percpu_ma; +static bool bpf_global_percpu_ma_set; + /* bpf_check() is a static code analyzer that walks eBPF program * instruction by instruction and updates register/stack state. * All paths of conditional branches are analyzed until 'bpf_exit' insn. @@ -336,6 +340,7 @@ struct bpf_kfunc_call_arg_meta { struct btf *btf_vmlinux; static DEFINE_MUTEX(bpf_verifier_lock); +static DEFINE_MUTEX(bpf_percpu_ma_lock); static const struct bpf_line_info * find_linfo(const struct bpf_verifier_env *env, u32 insn_off) @@ -12091,8 +12096,19 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set) return -ENOMEM; - if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set) - return -ENOMEM; + if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { + if (!bpf_global_percpu_ma_set) { + mutex_lock(&bpf_percpu_ma_lock); + if (!bpf_global_percpu_ma_set) { + err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true); + if (!err) + bpf_global_percpu_ma_set = true; + } + mutex_unlock(&bpf_percpu_ma_lock); + if (err) + return err; + } + } if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) { verbose(env, "local type ID argument must be in range [0, U32_MAX]\n"); From df3aafe501853c92bc9e25b05dcb030fee072962 Mon Sep 17 00:00:00 2001 From: Itamar Gozlan Date: Tue, 14 Nov 2023 13:58:32 -0800 Subject: [PATCH 399/415] Revert "net/mlx5: DR, Supporting inline WQE when possible" This reverts commit 95c337cce0e11d06a715da73e6796ade9216637f. The revert is required due to the suspicion it cause some tests fail and will be moved to further investigation. Fixes: 95c337cce0e1 ("net/mlx5: DR, Supporting inline WQE when possible") Signed-off-by: Itamar Gozlan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-2-saeed@kernel.org Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/dr_send.c | 115 ++---------------- 1 file changed, 13 insertions(+), 102 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 4e8527a724f5..6fa06ba2d346 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -52,7 +52,6 @@ struct dr_qp_init_attr { u32 cqn; u32 pdn; u32 max_send_wr; - u32 max_send_sge; struct mlx5_uars_page *uar; u8 isolate_vl_tc:1; }; @@ -247,37 +246,6 @@ static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) return err == CQ_POLL_ERR ? err : npolled; } -static int dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr *attr) -{ - return roundup_pow_of_two(sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_flow_update_ctrl_seg) + - sizeof(struct mlx5_wqe_header_modify_argument_update_seg)); -} - -/* We calculate for specific RC QP with the required functionality */ -static int dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr *attr) -{ - int update_arg_size; - int inl_size = 0; - int tot_size; - int size; - - update_arg_size = dr_qp_get_args_update_send_wqe_size(attr); - - size = sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg); - inl_size = size + ALIGN(sizeof(struct mlx5_wqe_inline_seg) + - DR_STE_SIZE, 16); - - size += attr->max_send_sge * sizeof(struct mlx5_wqe_data_seg); - - size = max(size, update_arg_size); - - tot_size = max(size, inl_size); - - return ALIGN(tot_size, MLX5_SEND_WQE_BB); -} - static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, struct dr_qp_init_attr *attr) { @@ -285,7 +253,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; struct mlx5_wq_param wqp; struct mlx5dr_qp *dr_qp; - int wqe_size; int inlen; void *qpc; void *in; @@ -365,15 +332,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, if (err) goto err_in; dr_qp->uar = attr->uar; - wqe_size = dr_qp_calc_rc_send_wqe(attr); - dr_qp->max_inline_data = min(wqe_size - - (sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_inline_seg)), - (2 * MLX5_SEND_WQE_BB - - (sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_inline_seg)))); return dr_qp; @@ -437,48 +395,8 @@ dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, MLX5_SEND_WQE_DS; } -static int dr_set_data_inl_seg(struct mlx5dr_qp *dr_qp, - struct dr_data_seg *data_seg, void *wqe) -{ - int inline_header_size = sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_inline_seg); - struct mlx5_wqe_inline_seg *seg; - int left_space; - int inl = 0; - void *addr; - int len; - int idx; - - seg = wqe; - wqe += sizeof(*seg); - addr = (void *)(unsigned long)(data_seg->addr); - len = data_seg->length; - inl += len; - left_space = MLX5_SEND_WQE_BB - inline_header_size; - - if (likely(len > left_space)) { - memcpy(wqe, addr, left_space); - len -= left_space; - addr += left_space; - idx = (dr_qp->sq.pc + 1) & (dr_qp->sq.wqe_cnt - 1); - wqe = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); - } - - memcpy(wqe, addr, len); - - if (likely(inl)) { - seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); - return DIV_ROUND_UP(inl + sizeof(seg->byte_count), - MLX5_SEND_WQE_DS); - } else { - return 0; - } -} - static void -dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp, - struct mlx5_wqe_ctrl_seg *wq_ctrl, +dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, u64 remote_addr, u32 rkey, struct dr_data_seg *data_seg, @@ -494,17 +412,15 @@ dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp, wq_raddr->reserved = 0; wq_dseg = (void *)(wq_raddr + 1); - /* WQE ctrl segment + WQE remote addr segment */ - *size = (sizeof(*wq_ctrl) + sizeof(*wq_raddr)) / MLX5_SEND_WQE_DS; - if (data_seg->send_flags & IB_SEND_INLINE) { - *size += dr_set_data_inl_seg(dr_qp, data_seg, wq_dseg); - } else { - wq_dseg->byte_count = cpu_to_be32(data_seg->length); - wq_dseg->lkey = cpu_to_be32(data_seg->lkey); - wq_dseg->addr = cpu_to_be64(data_seg->addr); - *size += sizeof(*wq_dseg) / MLX5_SEND_WQE_DS; /* WQE data segment */ - } + wq_dseg->byte_count = cpu_to_be32(data_seg->length); + wq_dseg->lkey = cpu_to_be32(data_seg->lkey); + wq_dseg->addr = cpu_to_be64(data_seg->addr); + + *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ + sizeof(*wq_dseg) + /* WQE data segment */ + sizeof(*wq_raddr)) / /* WQE remote addr segment */ + MLX5_SEND_WQE_DS; } static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl, @@ -535,7 +451,7 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, switch (opcode) { case MLX5_OPCODE_RDMA_READ: case MLX5_OPCODE_RDMA_WRITE: - dr_rdma_handle_icm_write_segments(dr_qp, wq_ctrl, remote_addr, + dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr, rkey, data_seg, &size); break; case MLX5_OPCODE_FLOW_TBL_ACCESS: @@ -656,7 +572,7 @@ static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring, if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->write.send_flags |= IB_SEND_SIGNALED; else - send_info->write.send_flags &= ~IB_SEND_SIGNALED; + send_info->write.send_flags = 0; } static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, @@ -680,13 +596,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, } send_ring->pending_wqe++; - if (!send_info->write.lkey) - send_info->write.send_flags |= IB_SEND_INLINE; if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->write.send_flags |= IB_SEND_SIGNALED; - else - send_info->write.send_flags &= ~IB_SEND_SIGNALED; send_ring->pending_wqe++; send_info->read.length = send_info->write.length; @@ -696,9 +608,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, send_info->read.lkey = send_ring->sync_mr->mkey; if (send_ring->pending_wqe % send_ring->signal_th == 0) - send_info->read.send_flags |= IB_SEND_SIGNALED; + send_info->read.send_flags = IB_SEND_SIGNALED; else - send_info->read.send_flags &= ~IB_SEND_SIGNALED; + send_info->read.send_flags = 0; } static void dr_fill_data_segs(struct mlx5dr_domain *dmn, @@ -1345,7 +1257,6 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) dmn->send_ring->cq->qp = dmn->send_ring->qp; dmn->info.max_send_wr = QUEUE_SIZE; - init_attr.max_send_sge = 1; dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, DR_STE_SIZE); From 7d2f74d1d4385a5bcf90618537f16a45121c30ae Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Tue, 14 Nov 2023 13:58:33 -0800 Subject: [PATCH 400/415] net/mlx5: Free used cpus mask when an IRQ is released Each EQ table maintains a cpumask of the already used CPUs that are mapped to IRQs to ensure that each IRQ gets mapped to a unique CPU. However, on IRQ release, the said cpumask is not updated by clearing the CPU from the mask to allow future IRQ request, causing the following error when a SF is reloaded after it has utilized all CPUs for its IRQs: mlx5_irq_affinity_request:135:(pid 306010): Didn't find a matching IRQ. err = -28 Thus, when releasing an IRQ, clear its mapped CPU from the used CPUs mask, to prevent the case described above. While at it, move the used cpumask update to the EQ layer as it is more fitting and preserves symmetricity of the IRQ request/release API. Fixes: a1772de78d73 ("net/mlx5: Refactor completion IRQ request/release API") Signed-off-by: Maher Sanalla Reviewed-by: Tariq Toukan Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-3-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 25 ++++++++--- .../mellanox/mlx5/core/irq_affinity.c | 42 ------------------- 2 files changed, 19 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ea0405e0a43f..40a6cb052a2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -885,11 +885,14 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_irq *irq; + int cpu; irq = xa_load(&table->comp_irqs, vecidx); if (!irq) return; + cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); + cpumask_clear_cpu(cpu, &table->used_cpus); xa_erase(&table->comp_irqs, vecidx); mlx5_irq_affinity_irq_release(dev, irq); } @@ -897,16 +900,26 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + struct irq_affinity_desc af_desc = {}; struct mlx5_irq *irq; - irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, vecidx); - if (IS_ERR(irq)) { - /* In case SF irq pool does not exist, fallback to the PF irqs*/ - if (PTR_ERR(irq) == -ENOENT) - return comp_irq_request_pci(dev, vecidx); + /* In case SF irq pool does not exist, fallback to the PF irqs*/ + if (!mlx5_irq_pool_is_sf_pool(pool)) + return comp_irq_request_pci(dev, vecidx); + af_desc.is_managed = 1; + cpumask_copy(&af_desc.mask, cpu_online_mask); + cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus); + irq = mlx5_irq_affinity_request(pool, &af_desc); + if (IS_ERR(irq)) return PTR_ERR(irq); - } + + cpumask_or(&table->used_cpus, &table->used_cpus, mlx5_irq_get_affinity_mask(irq)); + mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", + pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), + cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), + mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 047d5fed5f89..612e666ec263 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -168,45 +168,3 @@ void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *i if (pool->irqs_per_cpu) cpu_put(pool, cpu); } - -/** - * mlx5_irq_affinity_irq_request_auto - request one IRQ for mlx5 device. - * @dev: mlx5 device that is requesting the IRQ. - * @used_cpus: cpumask of bounded cpus by the device - * @vecidx: vector index to request an IRQ for. - * - * Each IRQ is bounded to at most 1 CPU. - * This function is requesting an IRQ according to the default assignment. - * The default assignment policy is: - * - request the least loaded IRQ which is not bound to any - * CPU of the previous IRQs requested. - * - * On success, this function updates used_cpus mask and returns an irq pointer. - * In case of an error, an appropriate error pointer is returned. - */ -struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, - struct cpumask *used_cpus, u16 vecidx) -{ - struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); - struct irq_affinity_desc af_desc = {}; - struct mlx5_irq *irq; - - if (!mlx5_irq_pool_is_sf_pool(pool)) - return ERR_PTR(-ENOENT); - - af_desc.is_managed = 1; - cpumask_copy(&af_desc.mask, cpu_online_mask); - cpumask_andnot(&af_desc.mask, &af_desc.mask, used_cpus); - irq = mlx5_irq_affinity_request(pool, &af_desc); - - if (IS_ERR(irq)) - return irq; - - cpumask_or(used_cpus, used_cpus, mlx5_irq_get_affinity_mask(irq)); - mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", - pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), - cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), - mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); - - return irq; -} From ad4d82c3eacdd500a246af736e6e01d96484e35e Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Tue, 14 Nov 2023 13:58:34 -0800 Subject: [PATCH 401/415] net/mlx5: DR, Allow old devices to use multi destination FTE The current check isn't aware of old devices that don't have the relevant FW capability. This patch allows multi destination FTE in old cards, as it was before this check. Fixes: f6f46e7173cb ("net/mlx5: DR, Add check for multi destination FTE") Signed-off-by: Erez Shitrit Reviewed-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-4-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 6ea88a581804..e3ec559369fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -57,7 +57,8 @@ static const char *dr_action_id_to_str(enum mlx5dr_action_type action_id) static bool mlx5dr_action_supp_fwd_fdb_multi_ft(struct mlx5_core_dev *dev) { - return (MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table_limit_regc) || + return (MLX5_CAP_GEN(dev, steering_format_version) < MLX5_STEERING_FORMAT_CONNECTX_6DX || + MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table_limit_regc) || MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table)); } From fd64fd13c49a53012ce9170449dcd9eb71c11284 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:35 -0800 Subject: [PATCH 402/415] net/mlx5: Decouple PHC .adjtime and .adjphase implementations When running a phase adjustment operation, the free running clock should not be modified at all. The phase control keyword is intended to trigger an internal servo on the device that will converge to the provided delta. A free running counter cannot implement phase adjustment. Fixes: 8e11a68e2e8a ("net/mlx5: Add adjphase function to support hardware-only offset control") Signed-off-by: Rahul Rameshbabu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-5-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index aa29f09e8356..0c83ef174275 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -384,7 +384,12 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta) { - return mlx5_ptp_adjtime(ptp, delta); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + + return mlx5_ptp_adjtime_real_time(mdev, delta); } static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm) From 6f9b1a0731662648949a1c0587f6acb3b7f8acf1 Mon Sep 17 00:00:00 2001 From: Dust Li Date: Tue, 14 Nov 2023 13:58:36 -0800 Subject: [PATCH 403/415] net/mlx5e: fix double free of encap_header When mlx5_packet_reformat_alloc() fails, the encap_header allocated in mlx5e_tc_tun_create_header_ipv4{6} will be released within it. However, e->encap_header is already set to the previously freed encap_header before mlx5_packet_reformat_alloc(). As a result, the later mlx5e_encap_put() will free e->encap_header again, causing a double free issue. mlx5e_encap_put() --> mlx5e_encap_dealloc() --> kfree(e->encap_header) This happens when cmd: MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT fail. This patch fix it by not setting e->encap_header until mlx5_packet_reformat_alloc() success. Fixes: d589e785baf5e ("net/mlx5e: Allow concurrent creation of encap entries") Reported-by: Cruz Zhao Reported-by: Tianchen Ding Signed-off-by: Dust Li Reviewed-by: Wojciech Drewek Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 00a04fdd756f..8bca696b6658 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -300,9 +300,6 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, if (err) goto destroy_neigh_entry; - e->encap_size = ipv4_encap_size; - e->encap_header = encap_header; - if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event @@ -322,6 +319,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, goto destroy_neigh_entry; } + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv4_put(&attr); @@ -568,9 +567,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, if (err) goto destroy_neigh_entry; - e->encap_size = ipv6_encap_size; - e->encap_header = encap_header; - if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event @@ -590,6 +586,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, goto destroy_neigh_entry; } + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv6_put(&attr); From 3a4aa3cb83563df942be49d145ee3b7ddf17d6bb Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Tue, 14 Nov 2023 13:58:37 -0800 Subject: [PATCH 404/415] net/mlx5e: fix double free of encap_header in update funcs Follow up to the previous patch to fix the same issue for mlx5e_tc_tun_update_header_ipv4{6} when mlx5_packet_reformat_alloc() fails. When mlx5_packet_reformat_alloc() fails, the encap_header allocated in mlx5e_tc_tun_update_header_ipv4{6} will be released within it. However, e->encap_header is already set to the previously freed encap_header before mlx5_packet_reformat_alloc(). As a result, the later mlx5e_encap_put() will free e->encap_header again, causing a double free issue. mlx5e_encap_put() --> mlx5e_encap_dealloc() --> kfree(e->encap_header) This patch fix it by not setting e->encap_header until mlx5_packet_reformat_alloc() success. Fixes: a54e20b4fcae ("net/mlx5e: Add basic TC tunnel set action for SRIOV offloads") Signed-off-by: Gavin Li Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-7-saeed@kernel.org Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 8bca696b6658..668da5c70e63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -403,16 +403,12 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, if (err) goto free_encap; - e->encap_size = ipv4_encap_size; - kfree(e->encap_header); - e->encap_header = encap_header; - if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto release_neigh; + goto free_encap; } memset(&reformat_params, 0, sizeof(reformat_params)); @@ -426,6 +422,10 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, goto free_encap; } + e->encap_size = ipv4_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv4_put(&attr); @@ -669,16 +669,12 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, if (err) goto free_encap; - e->encap_size = ipv6_encap_size; - kfree(e->encap_header); - e->encap_header = encap_header; - if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto release_neigh; + goto free_encap; } memset(&reformat_params, 0, sizeof(reformat_params)); @@ -692,6 +688,10 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, goto free_encap; } + e->encap_size = ipv6_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv6_put(&attr); From 0c101a23ca7eaf00eef1328eefb04b3a93401cc8 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 14 Nov 2023 13:58:38 -0800 Subject: [PATCH 405/415] net/mlx5e: Fix pedit endianness Referenced commit addressed endianness issue in mlx5 pedit implementation in ad hoc manner instead of systematically treating integer values according to their types which left pedit fields of sizes not equal to 4 and where the bytes being modified are not least significant ones broken on big endian machines since wrong bits will be consumed during parsing which leads to following example error when applying pedit to source and destination MAC addresses: [Wed Oct 18 12:52:42 2023] mlx5_core 0001:00:00.1 p1v3_r: attempt to offload an unsupported field (cmd 0) [Wed Oct 18 12:52:42 2023] mask: 00000000330c5b68: 00 00 00 00 ff ff 00 00 00 00 ff ff 00 00 00 00 ................ [Wed Oct 18 12:52:42 2023] mask: 0000000017d22fd9: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [Wed Oct 18 12:52:42 2023] mask: 000000008186d717: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [Wed Oct 18 12:52:42 2023] mask: 0000000029eb6149: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [Wed Oct 18 12:52:42 2023] mask: 000000007ed103e4: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [Wed Oct 18 12:52:42 2023] mask: 00000000db8101a6: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [Wed Oct 18 12:52:42 2023] mask: 00000000ec3c08a9: 00 00 00 00 00 00 00 00 00 00 00 00 ............ Treat masks and values of pedit and filter match as network byte order, refactor pointers to them to void pointers instead of confusing u32 pointers and only cast to pointer-to-integer when reading a value from them. Treat pedit mlx5_fields->field_mask as host byte order according to its type u32, change the constants in fields array accordingly. Fixes: 82198d8bcdef ("net/mlx5e: Fix endianness when calculating pedit mask first bit") Signed-off-by: Vlad Buslov Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-8-saeed@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 60 ++++++++++--------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9a5a5c2c7da9..7ca9e5b86778 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3147,7 +3147,7 @@ static struct mlx5_fields fields[] = { OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), - OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp), + OFFLOAD(IP_DSCP, 16, 0x0fc0, ip6, 0, ip_dscp), OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), @@ -3158,21 +3158,31 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), }; -static unsigned long mask_to_le(unsigned long mask, int size) +static u32 mask_field_get(void *mask, struct mlx5_fields *f) { - __be32 mask_be32; - __be16 mask_be16; - - if (size == 32) { - mask_be32 = (__force __be32)(mask); - mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); - } else if (size == 16) { - mask_be32 = (__force __be32)(mask); - mask_be16 = *(__be16 *)&mask_be32; - mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); + switch (f->field_bsize) { + case 32: + return be32_to_cpu(*(__be32 *)mask) & f->field_mask; + case 16: + return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask; + default: + return *(u8 *)mask & (u8)f->field_mask; } +} - return mask; +static void mask_field_clear(void *mask, struct mlx5_fields *f) +{ + switch (f->field_bsize) { + case 32: + *(__be32 *)mask &= ~cpu_to_be32(f->field_mask); + break; + case 16: + *(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask); + break; + default: + *(u8 *)mask &= ~(u8)f->field_mask; + break; + } } static int offload_pedit_fields(struct mlx5e_priv *priv, @@ -3184,11 +3194,12 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; struct pedit_headers_action *hdrs = parse_attr->hdrs; void *headers_c, *headers_v, *action, *vals_p; - u32 *s_masks_p, *a_masks_p, s_mask, a_mask; struct mlx5e_tc_mod_hdr_acts *mod_acts; - unsigned long mask, field_mask; + void *s_masks_p, *a_masks_p; int i, first, last, next_z; struct mlx5_fields *f; + unsigned long mask; + u32 s_mask, a_mask; u8 cmd; mod_acts = &parse_attr->mod_hdr_acts; @@ -3204,15 +3215,11 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, bool skip; f = &fields[i]; - /* avoid seeing bits set from previous iterations */ - s_mask = 0; - a_mask = 0; - s_masks_p = (void *)set_masks + f->offset; a_masks_p = (void *)add_masks + f->offset; - s_mask = *s_masks_p & f->field_mask; - a_mask = *a_masks_p & f->field_mask; + s_mask = mask_field_get(s_masks_p, f); + a_mask = mask_field_get(a_masks_p, f); if (!s_mask && !a_mask) /* nothing to offload here */ continue; @@ -3239,22 +3246,20 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, match_mask, f->field_bsize)) skip = true; /* clear to denote we consumed this field */ - *s_masks_p &= ~f->field_mask; + mask_field_clear(s_masks_p, f); } else { cmd = MLX5_ACTION_TYPE_ADD; mask = a_mask; vals_p = (void *)add_vals + f->offset; /* add 0 is no change */ - if ((*(u32 *)vals_p & f->field_mask) == 0) + if (!mask_field_get(vals_p, f)) skip = true; /* clear to denote we consumed this field */ - *a_masks_p &= ~f->field_mask; + mask_field_clear(a_masks_p, f); } if (skip) continue; - mask = mask_to_le(mask, f->field_bsize); - first = find_first_bit(&mask, f->field_bsize); next_z = find_next_zero_bit(&mask, f->field_bsize, first); last = find_last_bit(&mask, f->field_bsize); @@ -3281,10 +3286,9 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, MLX5_SET(set_action_in, action, field, f->field); if (cmd == MLX5_ACTION_TYPE_SET) { + unsigned long field_mask = f->field_mask; int start; - field_mask = mask_to_le(f->field_mask, f->field_bsize); - /* if field is bit sized it can start not from first bit */ start = find_first_bit(&field_mask, f->field_bsize); From bdf788cf224f61c20a01c58c00685d394d57887f Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 14 Nov 2023 13:58:39 -0800 Subject: [PATCH 406/415] net/mlx5e: Don't modify the peer sent-to-vport rules for IPSec offload As IPSec packet offload in switchdev mode is not supported with LAG, it's unnecessary to modify those sent-to-vport rules to the peer eswitch. Fixes: c6c2bf5db4ea ("net/mlx5e: Support IPsec packet offload for TX in switchdev mode") Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-9-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b296ac52a439..88236e75fd90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -984,7 +984,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - if (rep->vport == MLX5_VPORT_UPLINK && on_esw->offloads.ft_ipsec_tx_pol) { + if (rep->vport == MLX5_VPORT_UPLINK && + on_esw == from_esw && on_esw->offloads.ft_ipsec_tx_pol) { dest.ft = on_esw->offloads.ft_ipsec_tx_pol; flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; From 64f14d16eef1f939000f2617b50c7c996b5117d4 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:40 -0800 Subject: [PATCH 407/415] net/mlx5e: Avoid referencing skb after free-ing in drop path of mlx5e_sq_xmit_wqe When SQ is a port timestamping SQ for PTP, do not access tx flags of skb after free-ing the skb. Free the skb only after all references that depend on it have been handled in the dropped WQE path. Fixes: 3178308ad4ca ("net/mlx5e: Make tx_port_ts logic resilient to out-of-order CQEs") Signed-off-by: Rahul Rameshbabu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-10-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index d41435c22ce5..19f2c25b05a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -494,10 +494,10 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; - dev_kfree_skb_any(skb); if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist, be32_to_cpu(eseg->flow_table_metadata)); + dev_kfree_skb_any(skb); mlx5e_tx_flush(sq); } From 7e3f3ba97e6cc6fce5bf62df2ca06c8e59040167 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:41 -0800 Subject: [PATCH 408/415] net/mlx5e: Track xmit submission to PTP WQ after populating metadata map Ensure the skb is available in metadata mapping to skbs before tracking the metadata index for detecting undelivered CQEs. If the metadata index is put in the tracking list before putting the skb in the map, the metadata index might be used for detecting undelivered CQEs before the relevant skb is available in the map, which can lead to a null-ptr-deref. Log: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] CPU: 0 PID: 1243 Comm: kworker/0:2 Not tainted 6.6.0-rc4+ #108 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: events mlx5e_rx_dim_work [mlx5_core] RIP: 0010:mlx5e_ptp_napi_poll+0x9a4/0x2290 [mlx5_core] Code: 8c 24 38 cc ff ff 4c 8d 3c c1 4c 89 f9 48 c1 e9 03 42 80 3c 31 00 0f 85 97 0f 00 00 4d 8b 3f 49 8d 7f 28 48 89 f9 48 c1 e9 03 <42> 80 3c 31 00 0f 85 8b 0f 00 00 49 8b 47 28 48 85 c0 0f 84 05 07 RSP: 0018:ffff8884d3c09c88 EFLAGS: 00010206 RAX: 0000000000000069 RBX: ffff8881160349d8 RCX: 0000000000000005 RDX: ffffed10218f48cf RSI: 0000000000000004 RDI: 0000000000000028 RBP: ffff888122707700 R08: 0000000000000001 R09: ffffed109a781383 R10: 0000000000000003 R11: 0000000000000003 R12: ffff88810c7a7a40 R13: ffff888122707700 R14: dffffc0000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8884d3c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f4f878dd6e0 CR3: 000000014d108002 CR4: 0000000000370eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? die_addr+0x3c/0xa0 ? exc_general_protection+0x144/0x210 ? asm_exc_general_protection+0x22/0x30 ? mlx5e_ptp_napi_poll+0x9a4/0x2290 [mlx5_core] ? mlx5e_ptp_napi_poll+0x8f6/0x2290 [mlx5_core] __napi_poll.constprop.0+0xa4/0x580 net_rx_action+0x460/0xb80 ? _raw_spin_unlock_irqrestore+0x32/0x60 ? __napi_poll.constprop.0+0x580/0x580 ? tasklet_action_common.isra.0+0x2ef/0x760 __do_softirq+0x26c/0x827 irq_exit_rcu+0xc2/0x100 common_interrupt+0x7f/0xa0 asm_common_interrupt+0x22/0x40 RIP: 0010:__kmem_cache_alloc_node+0xb/0x330 Code: 41 5d 41 5e 41 5f c3 8b 44 24 14 8b 4c 24 10 09 c8 eb d5 e8 b7 43 ca 01 0f 1f 80 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 57 <41> 56 41 89 d6 41 55 41 89 f5 41 54 49 89 fc 53 48 83 e4 f0 48 83 RSP: 0018:ffff88812c4079c0 EFLAGS: 00000246 RAX: 1ffffffff083c7fe RBX: ffff888100042dc0 RCX: 0000000000000218 RDX: 00000000ffffffff RSI: 0000000000000dc0 RDI: ffff888100042dc0 RBP: ffff88812c4079c8 R08: ffffffffa0289f96 R09: ffffed1025880ea9 R10: ffff888138839f80 R11: 0000000000000002 R12: 0000000000000dc0 R13: 0000000000000100 R14: 000000000000008c R15: ffff8881271fc450 ? cmd_exec+0x796/0x2200 [mlx5_core] kmalloc_trace+0x26/0xc0 cmd_exec+0x796/0x2200 [mlx5_core] mlx5_cmd_do+0x22/0xc0 [mlx5_core] mlx5_cmd_exec+0x17/0x30 [mlx5_core] mlx5_core_modify_cq_moderation+0x139/0x1b0 [mlx5_core] ? mlx5_add_cq_to_tasklet+0x280/0x280 [mlx5_core] ? lockdep_set_lock_cmp_fn+0x190/0x190 ? process_one_work+0x659/0x1220 mlx5e_rx_dim_work+0x9d/0x100 [mlx5_core] process_one_work+0x730/0x1220 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? max_active_store+0xf0/0xf0 ? assign_work+0x168/0x240 worker_thread+0x70f/0x12d0 ? __kthread_parkme+0xd1/0x1d0 ? process_one_work+0x1220/0x1220 kthread+0x2d9/0x3b0 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x2d/0x70 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork_asm+0x11/0x20 Modules linked in: xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_ib ib_uverbs ib_core zram zsmalloc mlx5_core fuse ---[ end trace 0000000000000000 ]--- Fixes: 3178308ad4ca ("net/mlx5e: Make tx_port_ts logic resilient to out-of-order CQEs") Signed-off-by: Rahul Rameshbabu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-11-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 19f2c25b05a0..f0b506e562df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -399,9 +399,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata); mlx5e_skb_cb_hwtstamp_init(skb); - mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb, metadata_index); + mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); if (!netif_tx_queue_stopped(sq->txq) && mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) { netif_tx_stop_queue(sq->txq); From 92214be5979c0961a471b7eaaaeacab41bdf456c Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:42 -0800 Subject: [PATCH 409/415] net/mlx5e: Update doorbell for port timestamping CQ before the software counter Previously, mlx5e_ptp_poll_ts_cq would update the device doorbell with the incremented consumer index after the relevant software counters in the kernel were updated. In the mlx5e_sq_xmit_wqe context, this would lead to either overrunning the device CQ or exceeding the expected software buffer size in the device CQ if the device CQ size was greater than the software buffer size. Update the relevant software counter only after updating the device CQ consumer index in the port timestamping napi_poll context. Log: mlx5_core 0000:08:00.0: cq_err_event_notifier:517:(pid 0): CQ error on CQN 0x487, syndrome 0x1 mlx5_core 0000:08:00.0 eth2: mlx5e_cq_error_event: cqn=0x000487 event=0x04 Fixes: 1880bc4e4a96 ("net/mlx5e: Add TX port timestamp support") Signed-off-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-12-saeed@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en/ptp.c | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index bb11e644d24f..af3928eddafd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -177,6 +177,8 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, struct mlx5_cqe64 *cqe, + u8 *md_buff, + u8 *md_buff_sz, int budget) { struct mlx5e_ptp_port_ts_cqe_list *pending_cqe_list = ptpsq->ts_cqe_pending_list; @@ -211,19 +213,24 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp); out: napi_consume_skb(skb, budget); - mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist, metadata_id); + md_buff[*md_buff_sz++] = metadata_id; if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) && !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work); } -static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) +static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_ptpsq *ptpsq = container_of(cq, struct mlx5e_ptpsq, ts_cq); - struct mlx5_cqwq *cqwq = &cq->wq; + int budget = min(napi_budget, MLX5E_TX_CQ_POLL_BUDGET); + u8 metadata_buff[MLX5E_TX_CQ_POLL_BUDGET]; + u8 metadata_buff_sz = 0; + struct mlx5_cqwq *cqwq; struct mlx5_cqe64 *cqe; int work_done = 0; + cqwq = &cq->wq; + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state))) return false; @@ -234,7 +241,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) do { mlx5_cqwq_pop(cqwq); - mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, budget); + mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, + metadata_buff, &metadata_buff_sz, napi_budget); } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); mlx5_cqwq_update_db_record(cqwq); @@ -242,6 +250,10 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) /* ensure cq space is freed before enabling more cqes */ wmb(); + while (metadata_buff_sz > 0) + mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist, + metadata_buff[--metadata_buff_sz]); + mlx5e_txqsq_wake(&ptpsq->txqsq); return work_done == budget; From 3338bebfc26a1e2cebbba82a1cf12c0159608e73 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:43 -0800 Subject: [PATCH 410/415] net/mlx5: Increase size of irq name buffer Without increased buffer size, will trigger -Wformat-truncation with W=1 for the snprintf operation writing to the buffer. drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c: In function 'mlx5_irq_alloc': drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c:296:7: error: '@pci:' directive output may be truncated writing 5 bytes into a region of size between 1 and 32 [-Werror=format-truncation=] 296 | "%s@pci:%s", name, pci_name(dev->pdev)); | ^~~~~ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c:295:2: note: 'snprintf' output 6 or more bytes (assuming 37) into a destination of size 32 295 | snprintf(irq->name, MLX5_MAX_IRQ_NAME, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 296 | "%s@pci:%s", name, pci_name(dev->pdev)); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: ada9f5d00797 ("IB/mlx5: Fix eq names to display nicely in /proc/interrupts") Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c Signed-off-by: Rahul Rameshbabu Reviewed-by: Dragos Tatulea Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-13-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 653648216730..4dcf995cb1a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -28,7 +28,7 @@ struct mlx5_irq { struct atomic_notifier_head nh; cpumask_var_t mask; - char name[MLX5_MAX_IRQ_NAME]; + char name[MLX5_MAX_IRQ_FORMATTED_NAME]; struct mlx5_irq_pool *pool; int refcount; struct msi_map map; @@ -292,8 +292,8 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, else irq_sf_set_name(pool, name, i); ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); - snprintf(irq->name, MLX5_MAX_IRQ_NAME, - "%s@pci:%s", name, pci_name(dev->pdev)); + snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME, + MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev)); err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name, &irq->nh); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h index d3a77a0ab848..c4d377f8df30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -7,6 +7,9 @@ #include #define MLX5_MAX_IRQ_NAME (32) +#define MLX5_IRQ_NAME_FORMAT_STR ("%s@pci:%s") +#define MLX5_MAX_IRQ_FORMATTED_NAME \ + (MLX5_MAX_IRQ_NAME + sizeof(MLX5_IRQ_NAME_FORMAT_STR)) /* max irq_index is 2047, so four chars */ #define MLX5_MAX_IRQ_IDX_CHARS (4) #define MLX5_EQ_REFS_PER_IRQ (2) From dce94142842e119b982c27c1b62bd20890c7fd21 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 14 Nov 2023 13:58:44 -0800 Subject: [PATCH 411/415] net/mlx5e: Reduce the size of icosq_str icosq_str size is unnecessarily too long, and it causes a build warning -Wformat-truncation with W=1. Looking closely, It doesn't need to be 255B, hence this patch reduces the size to 32B which should be more than enough to host the string: "ICOSQ: 0x%x, ". While here, add a missing space in the formatted string. This fixes the following build warning: $ KCFLAGS='-Wall -Werror' $ make O=/tmp/kbuild/linux W=1 -s -j12 drivers/net/ethernet/mellanox/mlx5/core/ drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c: In function 'mlx5e_reporter_rx_timeout': drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c:718:56: error: ', CQ: 0x' directive output may be truncated writing 8 bytes into a region of size between 0 and 255 [-Werror=format-truncation=] 718 | "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x", | ^~~~~~~~ drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c:717:9: note: 'snprintf' output between 43 and 322 bytes into a destination of size 288 717 | snprintf(err_str, sizeof(err_str), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 718 | "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x", | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 719 | rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 521f31af004a ("net/mlx5e: Allow RQ outside of channel context") Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-14-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index fea8c0a5fe89..4358798d6ce1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -492,11 +492,11 @@ static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter, void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) { - char icosq_str[MLX5E_REPORTER_PER_Q_MAX_LEN] = {}; char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; struct mlx5e_icosq *icosq = rq->icosq; struct mlx5e_priv *priv = rq->priv; struct mlx5e_err_ctx err_ctx = {}; + char icosq_str[32] = {}; err_ctx.ctx = rq; err_ctx.recover = mlx5e_rx_reporter_timeout_recover; @@ -505,7 +505,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) if (icosq) snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn); snprintf(err_str, sizeof(err_str), - "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x", + "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x", rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn); mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); From 41e63c2baa11dc2aa71df5dd27a5bd87d11b6bbb Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:45 -0800 Subject: [PATCH 412/415] net/mlx5e: Check return value of snprintf writing to fw_version buffer Treat the operation as an error case when the return value is equivalent to the size of the name buffer. Failed to write null terminator to the name buffer, making the string malformed and should not be used. Provide a string with only the firmware version when forming the string with the board id fails. Without check, will trigger -Wformat-truncation with W=1. drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c: In function 'mlx5e_ethtool_get_drvinfo': drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c:49:31: warning: '%.16s' directive output may be truncated writing up to 16 bytes into a region of size between 13 and 22 [-Wformat-truncation=] 49 | "%d.%d.%04d (%.16s)", | ^~~~~ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c:48:9: note: 'snprintf' output between 12 and 37 bytes into a destination of size 32 48 | snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 49 | "%d.%d.%04d (%.16s)", | ~~~~~~~~~~~~~~~~~~~~~ 50 | fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 51 | mdev->board_id); | ~~~~~~~~~~~~~~~ Fixes: 84e11edb71de ("net/mlx5e: Show board id in ethtool driver information") Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c Signed-off-by: Rahul Rameshbabu Reviewed-by: Dragos Tatulea Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 215261a69255..792a0ea544cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -43,12 +43,17 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) { struct mlx5_core_dev *mdev = priv->mdev; + int count; strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); - snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d.%04d (%.16s)", - fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), - mdev->board_id); + count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); + if (count == sizeof(drvinfo->fw_version)) + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev)); + strscpy(drvinfo->bus_info, dev_name(mdev->device), sizeof(drvinfo->bus_info)); } From 1b2bd0c0264febcd8d47209079a6671c38e6558b Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:46 -0800 Subject: [PATCH 413/415] net/mlx5e: Check return value of snprintf writing to fw_version buffer for representors Treat the operation as an error case when the return value is equivalent to the size of the name buffer. Failed to write null terminator to the name buffer, making the string malformed and should not be used. Provide a string with only the firmware version when forming the string with the board id fails. This logic for representors is identical to normal flow with ethtool. Without check, will trigger -Wformat-truncation with W=1. drivers/net/ethernet/mellanox/mlx5/core/en_rep.c: In function 'mlx5e_rep_get_drvinfo': drivers/net/ethernet/mellanox/mlx5/core/en_rep.c:78:31: warning: '%.16s' directive output may be truncated writing up to 16 bytes into a region of size between 13 and 22 [-Wformat-truncation=] 78 | "%d.%d.%04d (%.16s)", | ^~~~~ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c:77:9: note: 'snprintf' output between 12 and 37 bytes into a destination of size 32 77 | snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 78 | "%d.%d.%04d (%.16s)", | ~~~~~~~~~~~~~~~~~~~~~ 79 | fw_rev_maj(mdev), fw_rev_min(mdev), | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 80 | fw_rev_sub(mdev), mdev->board_id); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: cf83c8fdcd47 ("net/mlx5e: Add missing ethtool driver info for representors") Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c Signed-off-by: Rahul Rameshbabu Reviewed-by: Dragos Tatulea Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-16-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 693e55b010d9..3ab682bbcf86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -71,13 +71,17 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + int count; strscpy(drvinfo->driver, mlx5e_rep_driver_name, sizeof(drvinfo->driver)); - snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d.%04d (%.16s)", - fw_rev_maj(mdev), fw_rev_min(mdev), - fw_rev_sub(mdev), mdev->board_id); + count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); + if (count == sizeof(drvinfo->fw_version)) + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev)); } static const struct counter_desc sw_rep_stats_desc[] = { From 7cd5af0e937a197295f3aa3721031f0fbae49cff Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 13 Nov 2023 12:53:28 -0500 Subject: [PATCH 414/415] net: sched: do not offload flows with a helper in act_ct There is no hardware supporting ct helper offload. However, prior to this patch, a flower filter with a helper in the ct action can be successfully set into the HW, for example (eth1 is a bnxt NIC): # tc qdisc add dev eth1 ingress_block 22 ingress # tc filter add block 22 proto ip flower skip_sw ip_proto tcp \ dst_port 21 ct_state -trk action ct helper ipv4-tcp-ftp # tc filter show dev eth1 ingress filter block 22 protocol ip pref 49152 flower chain 0 handle 0x1 eth_type ipv4 ip_proto tcp dst_port 21 ct_state -trk skip_sw in_hw in_hw_count 1 <---- action order 1: ct zone 0 helper ipv4-tcp-ftp pipe index 2 ref 1 bind 1 used_hw_stats delayed This might cause the flower filter not to work as expected in the HW. This patch avoids this problem by simply returning -EOPNOTSUPP in tcf_ct_offload_act_setup() to not allow to offload flows with a helper in act_ct. Fixes: a21b06e73191 ("net: sched: add helper support in act_ct") Signed-off-by: Xin Long Reviewed-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/f8685ec7702c4a448a1371a8b34b43217b583b9d.1699898008.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni --- include/net/tc_act/tc_ct.h | 9 +++++++++ net/sched/act_ct.c | 3 +++ 2 files changed, 12 insertions(+) diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index 8a6dbfb23336..77f87c622a2e 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -58,6 +58,11 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) return to_ct_params(a)->nf_ft; } +static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a) +{ + return to_ct_params(a)->helper; +} + #else static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; } static inline int tcf_ct_action(const struct tc_action *a) { return 0; } @@ -65,6 +70,10 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) { return NULL; } +static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a) +{ + return NULL; +} #endif /* CONFIG_NF_CONNTRACK */ #if IS_ENABLED(CONFIG_NET_ACT_CT) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 0db0ecf1d110..b3f4a503ee2b 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1549,6 +1549,9 @@ static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data, if (bind) { struct flow_action_entry *entry = entry_data; + if (tcf_ct_helper(act)) + return -EOPNOTSUPP; + entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); entry->ct.zone = tcf_ct_zone(act); From 7e1caeace0418381f36b3aa8403dfd82fc57fc53 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 14 Nov 2023 18:59:15 +0100 Subject: [PATCH 415/415] macvlan: Don't propagate promisc change to lower dev in passthru Macvlan device in passthru mode sets its lower device promiscuous mode according to its MACVLAN_FLAG_NOPROMISC flag instead of synchronizing it to its own promiscuity setting. However, macvlan_change_rx_flags() function doesn't check the mode before propagating such changes to the lower device which can cause net_device->promiscuity counter overflow as illustrated by reproduction example [0] and resulting dmesg log [1]. Fix the issue by first verifying the mode in macvlan_change_rx_flags() function before propagating promiscuous mode change to the lower device. [0]: ip link add macvlan1 link enp8s0f0 type macvlan mode passthru ip link set macvlan1 promisc on ip l set dev macvlan1 up ip link set macvlan1 promisc off ip l set dev macvlan1 down ip l set dev macvlan1 up [1]: [ 5156.281724] macvlan1: entered promiscuous mode [ 5156.285467] mlx5_core 0000:08:00.0 enp8s0f0: entered promiscuous mode [ 5156.287639] macvlan1: left promiscuous mode [ 5156.288339] mlx5_core 0000:08:00.0 enp8s0f0: left promiscuous mode [ 5156.290907] mlx5_core 0000:08:00.0 enp8s0f0: entered promiscuous mode [ 5156.317197] mlx5_core 0000:08:00.0 enp8s0f0: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken. Fixes: efdbd2b30caa ("macvlan: Propagate promiscuity setting to lower devices.") Reviewed-by: Gal Pressman Signed-off-by: Vlad Buslov Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231114175915.1649154-1-vladbu@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 02bd201bc7e5..c8da94af4161 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -780,7 +780,7 @@ static void macvlan_change_rx_flags(struct net_device *dev, int change) if (dev->flags & IFF_UP) { if (change & IFF_ALLMULTI) dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) + if (!macvlan_passthru(vlan->port) && change & IFF_PROMISC) dev_set_promiscuity(lowerdev, dev->flags & IFF_PROMISC ? 1 : -1);