x86/irq: Plug vector setup race

Hogan reported a vector setup race, which overwrites the interrupt
descriptor in the per CPU vector array resulting in a disfunctional device.

CPU0				CPU1
				interrupt is raised in APIC IRR
				but not handled
  free_irq()
    per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN;

  request_irq()			common_interrupt()
  				  d = this_cpu_read(vector_irq[vector]);

    per_cpu(vector_irq, CPU1)[vector] = desc;

    				  if (d == VECTOR_SHUTDOWN)
				    this_cpu_write(vector_irq[vector], VECTOR_UNUSED);

free_irq() cannot observe the pending vector in the CPU1 APIC as there is
no way to query the remote CPUs APIC IRR.

This requires that request_irq() uses the same vector/CPU as the one which
was freed, but this also can be triggered by a spurious interrupt.

Interestingly enough this problem managed to be hidden for more than a
decade.

Prevent this by reevaluating vector_irq under the vector lock, which is
held by the interrupt activation code when vector_irq is updated.

To avoid ifdeffery or IS_ENABLED() nonsense, move the
[un]lock_vector_lock() declarations out under the
CONFIG_IRQ_DOMAIN_HIERARCHY guard as it's only provided when
CONFIG_X86_LOCAL_APIC=y.

The current CONFIG_IRQ_DOMAIN_HIERARCHY guard is selected by
CONFIG_X86_LOCAL_APIC, but can also be selected by other parts of the
Kconfig system, which makes 32-bit UP builds with CONFIG_X86_LOCAL_APIC=n
fail.

Can we just get rid of this !APIC nonsense once and forever?

Fixes: 9345005f4e ("x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered irqs")
Reported-by: Hogan Wang <hogan.wang@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Hogan Wang <hogan.wang@huawei.com>
Link: https://lore.kernel.org/all/draft-87ikjhrhhh.ffs@tglx
This commit is contained in:
Thomas Gleixner
2025-07-24 12:49:30 +02:00
parent 49f848788a
commit ce0b5eedcb
2 changed files with 55 additions and 20 deletions

View File

@@ -92,8 +92,6 @@ struct irq_cfg {
extern struct irq_cfg *irq_cfg(unsigned int irq);
extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
extern void lock_vector_lock(void);
extern void unlock_vector_lock(void);
#ifdef CONFIG_SMP
extern void vector_schedule_cleanup(struct irq_cfg *);
extern void irq_complete_move(struct irq_cfg *cfg);
@@ -101,12 +99,16 @@ extern void irq_complete_move(struct irq_cfg *cfg);
static inline void vector_schedule_cleanup(struct irq_cfg *c) { }
static inline void irq_complete_move(struct irq_cfg *c) { }
#endif
extern void apic_ack_edge(struct irq_data *data);
#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
#ifdef CONFIG_X86_LOCAL_APIC
extern void lock_vector_lock(void);
extern void unlock_vector_lock(void);
#else
static inline void lock_vector_lock(void) {}
static inline void unlock_vector_lock(void) {}
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
#endif
/* Statistics */
extern atomic_t irq_err_count;

View File

@@ -256,26 +256,59 @@ static __always_inline void handle_irq(struct irq_desc *desc,
__handle_irq(desc, regs);
}
static __always_inline int call_irq_handler(int vector, struct pt_regs *regs)
static struct irq_desc *reevaluate_vector(int vector)
{
struct irq_desc *desc;
int ret = 0;
struct irq_desc *desc = __this_cpu_read(vector_irq[vector]);
if (!IS_ERR_OR_NULL(desc))
return desc;
if (desc == VECTOR_UNUSED)
pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector);
else
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
return NULL;
}
static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs)
{
struct irq_desc *desc = __this_cpu_read(vector_irq[vector]);
desc = __this_cpu_read(vector_irq[vector]);
if (likely(!IS_ERR_OR_NULL(desc))) {
handle_irq(desc, regs);
} else {
ret = -EINVAL;
if (desc == VECTOR_UNUSED) {
pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n",
__func__, smp_processor_id(),
vector);
} else {
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
}
return true;
}
return ret;
/*
* Reevaluate with vector_lock held to prevent a race against
* request_irq() setting up the vector:
*
* CPU0 CPU1
* interrupt is raised in APIC IRR
* but not handled
* free_irq()
* per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN;
*
* request_irq() common_interrupt()
* d = this_cpu_read(vector_irq[vector]);
*
* per_cpu(vector_irq, CPU1)[vector] = desc;
*
* if (d == VECTOR_SHUTDOWN)
* this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
*
* This requires that the same vector on the same target CPU is
* handed out or that a spurious interrupt hits that CPU/vector.
*/
lock_vector_lock();
desc = reevaluate_vector(vector);
unlock_vector_lock();
if (!desc)
return false;
handle_irq(desc, regs);
return true;
}
/*
@@ -289,7 +322,7 @@ DEFINE_IDTENTRY_IRQ(common_interrupt)
/* entry code tells RCU that we're not quiescent. Check it. */
RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
if (unlikely(call_irq_handler(vector, regs)))
if (unlikely(!call_irq_handler(vector, regs)))
apic_eoi();
set_irq_regs(old_regs);