From 0cfb4a1af386427cdaba98f18f501eb074985cfd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 15 Jun 2023 14:58:52 +0100 Subject: [PATCH 1/4] genirq: Use BIT() for the IRQD_* state flags As we're about to use the last bit available in the IRQD_* state flags, rewrite these flags with BIT(), which ensures that these constant do not represent a signed value. Signed-off-by: Marc Zyngier --- include/linux/irq.h | 46 ++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index b1b28affb32a..d9c86db69982 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -226,29 +226,29 @@ struct irq_data { */ enum { IRQD_TRIGGER_MASK = 0xf, - IRQD_SETAFFINITY_PENDING = (1 << 8), - IRQD_ACTIVATED = (1 << 9), - IRQD_NO_BALANCING = (1 << 10), - IRQD_PER_CPU = (1 << 11), - IRQD_AFFINITY_SET = (1 << 12), - IRQD_LEVEL = (1 << 13), - IRQD_WAKEUP_STATE = (1 << 14), - IRQD_MOVE_PCNTXT = (1 << 15), - IRQD_IRQ_DISABLED = (1 << 16), - IRQD_IRQ_MASKED = (1 << 17), - IRQD_IRQ_INPROGRESS = (1 << 18), - IRQD_WAKEUP_ARMED = (1 << 19), - IRQD_FORWARDED_TO_VCPU = (1 << 20), - IRQD_AFFINITY_MANAGED = (1 << 21), - IRQD_IRQ_STARTED = (1 << 22), - IRQD_MANAGED_SHUTDOWN = (1 << 23), - IRQD_SINGLE_TARGET = (1 << 24), - IRQD_DEFAULT_TRIGGER_SET = (1 << 25), - IRQD_CAN_RESERVE = (1 << 26), - IRQD_MSI_NOMASK_QUIRK = (1 << 27), - IRQD_HANDLE_ENFORCE_IRQCTX = (1 << 28), - IRQD_AFFINITY_ON_ACTIVATE = (1 << 29), - IRQD_IRQ_ENABLED_ON_SUSPEND = (1 << 30), + IRQD_SETAFFINITY_PENDING = BIT(8), + IRQD_ACTIVATED = BIT(9), + IRQD_NO_BALANCING = BIT(10), + IRQD_PER_CPU = BIT(11), + IRQD_AFFINITY_SET = BIT(12), + IRQD_LEVEL = BIT(13), + IRQD_WAKEUP_STATE = BIT(14), + IRQD_MOVE_PCNTXT = BIT(15), + IRQD_IRQ_DISABLED = BIT(16), + IRQD_IRQ_MASKED = BIT(17), + IRQD_IRQ_INPROGRESS = BIT(18), + IRQD_WAKEUP_ARMED = BIT(19), + IRQD_FORWARDED_TO_VCPU = BIT(20), + IRQD_AFFINITY_MANAGED = BIT(21), + IRQD_IRQ_STARTED = BIT(22), + IRQD_MANAGED_SHUTDOWN = BIT(23), + IRQD_SINGLE_TARGET = BIT(24), + IRQD_DEFAULT_TRIGGER_SET = BIT(25), + IRQD_CAN_RESERVE = BIT(26), + IRQD_MSI_NOMASK_QUIRK = BIT(27), + IRQD_HANDLE_ENFORCE_IRQCTX = BIT(28), + IRQD_AFFINITY_ON_ACTIVATE = BIT(29), + IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(30), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) From 7cc148a32f1e7496e22c0005dd113a31d4a3b3d4 Mon Sep 17 00:00:00 2001 From: James Gowans Date: Thu, 8 Jun 2023 14:00:19 +0200 Subject: [PATCH 2/4] genirq: Expand doc for PENDING and REPLAY flags Adding a bit more info about what the flags are used for may help future code readers. Signed-off-by: James Gowans Cc: Thomas Gleixner Cc: Marc Zyngier Cc: Liao Chang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230608120021.3273400-2-jgowans@amazon.com --- kernel/irq/internals.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 5fdc0b557579..c443a0ddc07e 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -47,9 +47,12 @@ enum { * detection * IRQS_POLL_INPROGRESS - polling in progress * IRQS_ONESHOT - irq is not unmasked in primary handler - * IRQS_REPLAY - irq is replayed + * IRQS_REPLAY - irq has been resent and will not be resent + * again until the handler has run and cleared + * this flag. * IRQS_WAITING - irq is waiting - * IRQS_PENDING - irq is pending and replayed later + * IRQS_PENDING - irq needs to be resent and should be resent + * at the next available opportunity. * IRQS_SUSPENDED - irq is suspended * IRQS_NMI - irq line is used to deliver NMIs * IRQS_SYSFS - descriptor has been added to sysfs From 9c15eeb5362c48dd27d51bd72e8873341fa9383c Mon Sep 17 00:00:00 2001 From: James Gowans Date: Thu, 8 Jun 2023 14:00:20 +0200 Subject: [PATCH 3/4] genirq: Allow fasteoi handler to resend interrupts on concurrent handling There is a class of interrupt controllers out there that, once they have signalled a given interrupt number, will still signal incoming instances of the *same* interrupt despite the original interrupt not having been EOIed yet. As long as the new interrupt reaches the *same* CPU, nothing bad happens, as that CPU still has its interrupts globally disabled, and we will only take the new interrupt once the interrupt has been EOIed. However, things become more "interesting" if an affinity change comes in while the interrupt is being handled. More specifically, while the per-irq lock is being dropped. This results in the affinity change taking place immediately. At this point, there is nothing that prevents the interrupt from firing on the new target CPU. We end-up with the interrupt running concurrently on two CPUs, which isn't a good thing. And that's where things become worse: the new CPU notices that the interrupt handling is in progress (irq_may_run() return false), and *drops the interrupt on the floor*. The whole race looks like this: CPU 0 | CPU 1 -----------------------------|----------------------------- interrupt start | handle_fasteoi_irq | set_affinity(CPU 1) handler | ... | interrupt start ... | handle_fasteoi_irq -> early out handle_fasteoi_irq return | interrupt end interrupt end | If the interrupt was an edge, too bad. The interrupt is lost, and the system will eventually die one way or another. Not great. A way to avoid this situation is to detect this problem at the point we handle the interrupt on the new target. Instead of dropping the interrupt, use the resend mechanism to force it to be replayed. Also, in order to limit the impact of this workaround to the pathetic architectures that require it, gate it behind a new irq flag aptly named IRQD_RESEND_WHEN_IN_PROGRESS. Suggested-by: Marc Zyngier Signed-off-by: James Gowans Cc: Thomas Gleixner Cc: Marc Zyngier Cc: KarimAllah Raslan Cc: Yipeng Zou Cc: Zhang Jianhua [maz: reworded commit mesage] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230608120021.3273400-3-jgowans@amazon.com --- include/linux/irq.h | 13 +++++++++++++ kernel/irq/chip.c | 16 +++++++++++++++- kernel/irq/debugfs.c | 2 ++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index d9c86db69982..d8a6fdce9373 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -223,6 +223,8 @@ struct irq_data { * irq_chip::irq_set_affinity() when deactivated. * IRQD_IRQ_ENABLED_ON_SUSPEND - Interrupt is enabled on suspend by irq pm if * irqchip have flag IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND set. + * IRQD_RESEND_WHEN_IN_PROGRESS - Interrupt may fire when already in progress in which + * case it must be resent at the next available opportunity. */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -249,6 +251,7 @@ enum { IRQD_HANDLE_ENFORCE_IRQCTX = BIT(28), IRQD_AFFINITY_ON_ACTIVATE = BIT(29), IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(30), + IRQD_RESEND_WHEN_IN_PROGRESS = BIT(31), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -448,6 +451,16 @@ static inline bool irqd_affinity_on_activate(struct irq_data *d) return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE; } +static inline void irqd_set_resend_when_in_progress(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_RESEND_WHEN_IN_PROGRESS; +} + +static inline bool irqd_needs_resend_when_in_progress(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_RESEND_WHEN_IN_PROGRESS; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 49e7bc871fec..57cd8f475302 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -692,8 +692,16 @@ void handle_fasteoi_irq(struct irq_desc *desc) raw_spin_lock(&desc->lock); - if (!irq_may_run(desc)) + /* + * When an affinity change races with IRQ handling, the next interrupt + * can arrive on the new CPU before the original CPU has completed + * handling the previous one - it may need to be resent. + */ + if (!irq_may_run(desc)) { + if (irqd_needs_resend_when_in_progress(&desc->irq_data)) + desc->istate |= IRQS_PENDING; goto out; + } desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); @@ -715,6 +723,12 @@ void handle_fasteoi_irq(struct irq_desc *desc) cond_unmask_eoi_irq(desc, chip); + /* + * When the race described above happens this will resend the interrupt. + */ + if (unlikely(desc->istate & IRQS_PENDING)) + check_irq_resend(desc, false); + raw_spin_unlock(&desc->lock); return; out: diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index bbcaac64038e..5971a66be034 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -133,6 +133,8 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX), BIT_MASK_DESCR(IRQD_IRQ_ENABLED_ON_SUSPEND), + + BIT_MASK_DESCR(IRQD_RESEND_WHEN_IN_PROGRESS), }; static const struct irq_bit_descr irqdesc_states[] = { From 8f4b589595d01f882d63d21efe15af4a5ad7c59b Mon Sep 17 00:00:00 2001 From: James Gowans Date: Thu, 8 Jun 2023 14:00:21 +0200 Subject: [PATCH 4/4] irqchip/gic-v3-its: Enable RESEND_WHEN_IN_PROGRESS for LPIs GICv3 LPIs are impacted by an architectural design issue: they do not have a global active state and as such a given LPI can be delivered to a new CPU after an affinity change while the previous instance of the same LPI handler has not yet completed on the original CPU. If LPIs had an active state, this second LPI would not be delivered until the first CPU deactivated the initial LPI, just like SPIs. To solve this issue, use the newly introduced IRQD_RESEND_WHEN_IN_PROGRESS flag, ensuring that we do not lose an LPI being delivered during that window by getting the GIC to resend it. This workaround gets enabled for all LPIs, including the VPE doorbells. Suggested-by: Marc Zyngier Signed-off-by: James Gowans Cc: Thomas Gleixner Cc: Marc Zyngier Cc: KarimAllah Raslan Cc: Yipeng Zou Cc: Zhang Jianhua [maz: massaged commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230608120021.3273400-4-jgowans@amazon.com --- drivers/irqchip/irq-gic-v3-its.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 0ec2b1e1df75..1994541eaef8 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3585,6 +3585,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, irqd = irq_get_irq_data(virq + i); irqd_set_single_target(irqd); irqd_set_affinity_on_activate(irqd); + irqd_set_resend_when_in_progress(irqd); pr_debug("ID:%d pID:%d vID:%d\n", (int)(hwirq + i - its_dev->event_map.lpi_base), (int)(hwirq + i), virq + i); @@ -4523,6 +4524,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq irq_domain_set_hwirq_and_chip(domain, virq + i, i, irqchip, vm->vpes[i]); set_bit(i, bitmap); + irqd_set_resend_when_in_progress(irq_get_irq_data(virq + i)); } if (err) {