From 80e4e1f472889f31a4dcaea3a4eb7a565296f1f3 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sun, 20 Feb 2022 19:19:10 +1300 Subject: [PATCH 1/3] irqchip/gic-v3: Use dsb(ishst) to order writes with ICC_SGI1R_EL1 accesses A dsb(ishst) barrier should be enough to order previous writes with the system register generating the SGI, as we only need to guarantee the visibility of data to other CPUs in the inner shareable domain before we send the SGI. A micro-benchmark is written to verify the performance impact on kunpeng920 machine with 2 sockets, each socket has 2 dies, and each die has 24 CPUs, so totally the system has 2 * 2 * 24 = 96 CPUs. ~2% performance improvement can be seen by this benchmark. The code of benchmark module: #include #include volatile int data0 ____cacheline_aligned; volatile int data1 ____cacheline_aligned; volatile int data2 ____cacheline_aligned; volatile int data3 ____cacheline_aligned; volatile int data4 ____cacheline_aligned; volatile int data5 ____cacheline_aligned; volatile int data6 ____cacheline_aligned; static void ipi_latency_func(void *val) { } static int __init ipi_latency_init(void) { ktime_t stime, etime, delta; int cpu, i; int start = smp_processor_id(); stime = ktime_get(); for ( i = 0; i < 1000; i++) for (cpu = 0; cpu < 96; cpu++) { data0 = data1 = data2 = data3 = data4 = data5 = data6 = cpu; smp_call_function_single(cpu, ipi_latency_func, NULL, 1); } etime = ktime_get(); delta = ktime_sub(etime, stime); printk("%s ipi from cpu%d to cpu0-95 delta of 1000times:%lld\n", __func__, start, delta); return 0; } module_init(ipi_latency_init); static void ipi_latency_exit(void) { } module_exit(ipi_latency_exit); MODULE_DESCRIPTION("IPI benchmark"); MODULE_LICENSE("GPL"); run the below commands 10 times on both Vanilla and the kernel with this patch: # taskset -c 0 insmod test.ko # rmmod test The result on vanilla: ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126757449 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126784249 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126177703 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:127022281 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126184883 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:127374585 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:125778089 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126974441 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:127357625 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126228184 The result on the kernel with this patch: ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:124467401 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123474209 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123558497 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:122993951 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:122984223 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123323609 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:124507583 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123386963 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123340664 ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123285324 Signed-off-by: Barry Song [maz: tidied up commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220220061910.6155-1-21cnbao@gmail.com --- drivers/irqchip/irq-gic-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 5e935d97207d..0efe1a9a9f3b 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1211,7 +1211,7 @@ static void gic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask) * Ensure that stores to Normal memory are visible to the * other CPUs before issuing the IPI. */ - wmb(); + dsb(ishst); for_each_cpu(cpu, mask) { u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu)); From e414c25e3399b2b3d7337dc47abccab5c71b7c8f Mon Sep 17 00:00:00 2001 From: "Souptick Joarder (HPE)" Date: Fri, 18 Feb 2022 22:03:03 +0530 Subject: [PATCH 2/3] irqchip/nvic: Release nvic_base upon failure smatch warning was reported as below -> smatch warnings: drivers/irqchip/irq-nvic.c:131 nvic_of_init() warn: 'nvic_base' not released on lines: 97. Release nvic_base upon failure. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Souptick Joarder (HPE) Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220218163303.33344-1-jrdr.linux@gmail.com --- drivers/irqchip/irq-nvic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c index ba4759b3e269..94230306e0ee 100644 --- a/drivers/irqchip/irq-nvic.c +++ b/drivers/irqchip/irq-nvic.c @@ -107,6 +107,7 @@ static int __init nvic_of_init(struct device_node *node, if (!nvic_irq_domain) { pr_warn("Failed to allocate irq domain\n"); + iounmap(nvic_base); return -ENOMEM; } @@ -116,6 +117,7 @@ static int __init nvic_of_init(struct device_node *node, if (ret) { pr_warn("Failed to allocate irq chips\n"); irq_domain_remove(nvic_irq_domain); + iounmap(nvic_base); return ret; } From 1e364921b0085780153b1d03640ca41e1be83e8b Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Fri, 4 Mar 2022 08:51:29 +0100 Subject: [PATCH 3/3] irqchip/xilinx: Switch to GENERIC_IRQ_MULTI_HANDLER Register the Xilinx driver as the root interrupt controller using the GENERIC_IRQ_MULTI_HANDLER API, instead of the arch-specific hack. Signed-off-by: Michal Simek Reviewed-by: Stefan Asserhall [maz: repainted commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/e6c6595a81f662bf839cee3109d0fa58a596ea47.1646380284.git.michal.simek@xilinx.com --- arch/microblaze/Kconfig | 2 ++ arch/microblaze/include/asm/irq.h | 3 --- arch/microblaze/kernel/irq.c | 16 +--------------- drivers/irqchip/irq-xilinx-intc.c | 30 ++++++++++++++++-------------- 4 files changed, 19 insertions(+), 32 deletions(-) diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 59798e43cdb0..da568e981604 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -45,6 +45,8 @@ config MICROBLAZE select SET_FS select ZONE_DMA select TRACE_IRQFLAGS_SUPPORT + select GENERIC_IRQ_MULTI_HANDLER + select HANDLE_DOMAIN_IRQ # Endianness selection choice diff --git a/arch/microblaze/include/asm/irq.h b/arch/microblaze/include/asm/irq.h index 0a28e80bbab0..cb6ab55d1d01 100644 --- a/arch/microblaze/include/asm/irq.h +++ b/arch/microblaze/include/asm/irq.h @@ -11,7 +11,4 @@ struct pt_regs; extern void do_IRQ(struct pt_regs *regs); -/* should be defined in each interrupt controller driver */ -extern unsigned int xintc_get_irq(void); - #endif /* _ASM_MICROBLAZE_IRQ_H */ diff --git a/arch/microblaze/kernel/irq.c b/arch/microblaze/kernel/irq.c index 903dad822fad..1f8cb4c4f74f 100644 --- a/arch/microblaze/kernel/irq.c +++ b/arch/microblaze/kernel/irq.c @@ -20,27 +20,13 @@ #include #include -static u32 concurrent_irq; - void __irq_entry do_IRQ(struct pt_regs *regs) { - unsigned int irq; struct pt_regs *old_regs = set_irq_regs(regs); trace_hardirqs_off(); irq_enter(); - irq = xintc_get_irq(); -next_irq: - BUG_ON(!irq); - generic_handle_irq(irq); - - irq = xintc_get_irq(); - if (irq != -1U) { - pr_debug("next irq: %d\n", irq); - ++concurrent_irq; - goto next_irq; - } - + handle_arch_irq(regs); irq_exit(); set_irq_regs(old_regs); trace_hardirqs_on(); diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c index 356a59755d63..238d3d344949 100644 --- a/drivers/irqchip/irq-xilinx-intc.c +++ b/drivers/irqchip/irq-xilinx-intc.c @@ -32,6 +32,8 @@ #define MER_ME (1<<0) #define MER_HIE (1<<1) +#define SPURIOUS_IRQ (-1U) + static DEFINE_STATIC_KEY_FALSE(xintc_is_be); struct xintc_irq_chip { @@ -110,20 +112,6 @@ static struct irq_chip intc_dev = { .irq_mask_ack = intc_mask_ack, }; -unsigned int xintc_get_irq(void) -{ - unsigned int irq = -1; - u32 hwirq; - - hwirq = xintc_read(primary_intc, IVR); - if (hwirq != -1U) - irq = irq_find_mapping(primary_intc->root_domain, hwirq); - - pr_debug("irq-xilinx: hwirq=%d, irq=%d\n", hwirq, irq); - - return irq; -} - static int xintc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { struct xintc_irq_chip *irqc = d->host_data; @@ -164,6 +152,19 @@ static void xil_intc_irq_handler(struct irq_desc *desc) chained_irq_exit(chip, desc); } +static void xil_intc_handle_irq(struct pt_regs *regs) +{ + u32 hwirq; + + do { + hwirq = xintc_read(primary_intc, IVR); + if (unlikely(hwirq == SPURIOUS_IRQ)) + break; + + generic_handle_domain_irq(primary_intc->root_domain, hwirq); + } while (true); +} + static int __init xilinx_intc_of_init(struct device_node *intc, struct device_node *parent) { @@ -233,6 +234,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc, } else { primary_intc = irqc; irq_set_default_host(primary_intc->root_domain); + set_handle_irq(xil_intc_handle_irq); } return 0;