Merge tag 'irq-msi-2026-02-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull MSI updates from Thomas Gleixner:
 "Updates for the [PCI] MSI subsystem:

   - Add interrupt redirection infrastructure

     Some PCI controllers use a single demultiplexing interrupt for the
     MSI interrupts of subordinate devices.

     This prevents setting the interrupt affinity of device interrupts,
     which causes device interrupts to be delivered to a single CPU.
     That obviously is counterproductive for multi-queue devices and
     interrupt balancing.

     To work around this limitation the new infrastructure installs a
     dummy irq_set_affinity() callback which captures the affinity mask
     and picks a redirection target CPU out of the mask.

     When the PCI controller demultiplexes the interrupts it invokes a
     new handling function in the core, which either runs the interrupt
     handler in the context of the target CPU or delegates it to
     irq_work on the target CPU.

   - Utilize the interrupt redirection mechanism in the PCI DWC host
     controller driver.

     This allows affinity control for the subordinate device MSI
     interrupts instead of being randomly executed on the CPU which runs
     the demultiplex handler.

   - Replace the binary 64-bit MSI flag with a DMA mask

     Some PCI devices have PCI_MSI_FLAGS_64BIT in the MSI capability,
     but implement less than 64 address bits. This breaks on platforms
     where such a device is assigned an MSI address higher than what's
     supported.

     With the binary 64-bit flag there is no other choice than disabling
     64-bit MSI support which leaves the device disfunctional.

     By using a DMA mask the address limit of a device can be described
     correctly which provides support for the above scenario.

   - Make use of the DMA mask based address limit in the hda/intel and
     radeon drivers to enable them on affected platforms

   - The usual small cleanups and improvements"

* tag 'irq-msi-2026-02-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  ALSA: hda/intel: Make MSI address limit based on the device DMA limit
  drm/radeon: Make MSI address limit based on the device DMA limit
  PCI/MSI: Check the device specific address mask in msi_verify_entries()
  PCI/MSI: Convert the boolean no_64bit_msi flag to a DMA address mask
  genirq/redirect: Prevent writing MSI message on affinity change
  PCI/MSI: Unmap MSI-X region on error
  genirq: Update effective affinity for redirected interrupts
  PCI: dwc: Enable MSI affinity support
  PCI: dwc: Code cleanup
  genirq: Add interrupt redirection infrastructure
  genirq/msi: Correct kernel-doc in <linux/msi.h>
This commit is contained in:
Linus Torvalds
2026-02-10 16:30:29 -08:00
18 changed files with 247 additions and 112 deletions

View File

@@ -1666,7 +1666,7 @@ static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
return -ENXIO;
/* Force 32-bit MSI on some broken devices */
if (dev->no_64bit_msi)
if (dev->msi_addr_mask < DMA_BIT_MASK(64))
is_64 = 0;
/* Assign XIVE to PE */

View File

@@ -383,7 +383,7 @@ static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type,
*/
again:
if (type == PCI_CAP_ID_MSI) {
if (pdev->no_64bit_msi) {
if (pdev->msi_addr_mask < DMA_BIT_MASK(64)) {
rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec);
if (rc < 0) {
/*
@@ -409,7 +409,7 @@ static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type,
if (use_32bit_msi_hack && rc > 0)
rtas_hack_32bit_msi_gen2(pdev);
} else {
if (pdev->no_64bit_msi)
if (pdev->msi_addr_mask < DMA_BIT_MASK(64))
rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSIX_FN, nvec);
else
rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec);

View File

@@ -1374,6 +1374,7 @@ int radeon_device_init(struct radeon_device *rdev,
pr_warn("radeon: No suitable DMA available\n");
return r;
}
rdev->pdev->msi_addr_mask = DMA_BIT_MASK(dma_bits);
rdev->need_swiotlb = drm_need_swiotlb(dma_bits);
/* Registers mapping */

View File

@@ -245,16 +245,6 @@ static bool radeon_msi_ok(struct radeon_device *rdev)
if (rdev->flags & RADEON_IS_AGP)
return false;
/*
* Older chips have a HW limitation, they can only generate 40 bits
* of address for "64-bit" MSIs which breaks on some platforms, notably
* IBM POWER servers, so we limit them
*/
if (rdev->family < CHIP_BONAIRE) {
dev_info(rdev->dev, "radeon: MSI limited to 32-bit\n");
rdev->pdev->no_64bit_msi = 1;
}
/* force MSI on */
if (radeon_msi == 1)
return true;

View File

@@ -331,7 +331,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
#ifdef CONFIG_PPC64
/* Ensure MSI/MSI-X interrupts lie within addressable physical memory */
pdev->no_64bit_msi = 1;
pdev->msi_addr_mask = DMA_BIT_MASK(32);
#endif
err = ionic_setup_one(ionic);

View File

@@ -26,9 +26,27 @@ static struct pci_ops dw_pcie_ops;
static struct pci_ops dw_pcie_ecam_ops;
static struct pci_ops dw_child_pcie_ops;
#ifdef CONFIG_SMP
static void dw_irq_noop(struct irq_data *d) { }
#endif
static bool dw_pcie_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
struct irq_domain *real_parent, struct msi_domain_info *info)
{
if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
return false;
#ifdef CONFIG_SMP
info->chip->irq_ack = dw_irq_noop;
info->chip->irq_pre_redirect = irq_chip_pre_redirect_parent;
#else
info->chip->irq_ack = irq_chip_ack_parent;
#endif
return true;
}
#define DW_PCIE_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \
MSI_FLAG_USE_DEF_CHIP_OPS | \
MSI_FLAG_NO_AFFINITY | \
MSI_FLAG_PCI_MSI_MASK_PARENT)
#define DW_PCIE_MSI_FLAGS_SUPPORTED (MSI_FLAG_MULTI_PCI_MSI | \
MSI_FLAG_PCI_MSIX | \
@@ -40,41 +58,30 @@ static const struct msi_parent_ops dw_pcie_msi_parent_ops = {
.required_flags = DW_PCIE_MSI_FLAGS_REQUIRED,
.supported_flags = DW_PCIE_MSI_FLAGS_SUPPORTED,
.bus_select_token = DOMAIN_BUS_PCI_MSI,
.chip_flags = MSI_CHIP_FLAG_SET_ACK,
.prefix = "DW-",
.init_dev_msi_info = msi_lib_init_dev_msi_info,
.init_dev_msi_info = dw_pcie_init_dev_msi_info,
};
/* MSI int handler */
irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp)
void dw_handle_msi_irq(struct dw_pcie_rp *pp)
{
int i, pos;
unsigned long val;
u32 status, num_ctrls;
irqreturn_t ret = IRQ_NONE;
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
unsigned int i, num_ctrls;
num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL;
for (i = 0; i < num_ctrls; i++) {
status = dw_pcie_readl_dbi(pci, PCIE_MSI_INTR0_STATUS +
(i * MSI_REG_CTRL_BLOCK_SIZE));
unsigned int reg_off = i * MSI_REG_CTRL_BLOCK_SIZE;
unsigned int irq_off = i * MAX_MSI_IRQS_PER_CTRL;
unsigned long status, pos;
status = dw_pcie_readl_dbi(pci, PCIE_MSI_INTR0_STATUS + reg_off);
if (!status)
continue;
ret = IRQ_HANDLED;
val = status;
pos = 0;
while ((pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL,
pos)) != MAX_MSI_IRQS_PER_CTRL) {
generic_handle_domain_irq(pp->irq_domain,
(i * MAX_MSI_IRQS_PER_CTRL) +
pos);
pos++;
}
for_each_set_bit(pos, &status, MAX_MSI_IRQS_PER_CTRL)
generic_handle_demux_domain_irq(pp->irq_domain, irq_off + pos);
}
return ret;
}
/* Chained MSI interrupt service routine */
@@ -95,13 +102,10 @@ static void dw_pci_setup_msi_msg(struct irq_data *d, struct msi_msg *msg)
{
struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d);
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
u64 msi_target;
msi_target = (u64)pp->msi_data;
u64 msi_target = (u64)pp->msi_data;
msg->address_lo = lower_32_bits(msi_target);
msg->address_hi = upper_32_bits(msi_target);
msg->data = d->hwirq;
dev_dbg(pci->dev, "msi#%d address_hi %#x address_lo %#x\n",
@@ -113,18 +117,14 @@ static void dw_pci_bottom_mask(struct irq_data *d)
struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d);
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
unsigned int res, bit, ctrl;
unsigned long flags;
raw_spin_lock_irqsave(&pp->lock, flags);
guard(raw_spinlock)(&pp->lock);
ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
pp->irq_mask[ctrl] |= BIT(bit);
dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res, pp->irq_mask[ctrl]);
raw_spin_unlock_irqrestore(&pp->lock, flags);
}
static void dw_pci_bottom_unmask(struct irq_data *d)
@@ -132,18 +132,14 @@ static void dw_pci_bottom_unmask(struct irq_data *d)
struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d);
struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
unsigned int res, bit, ctrl;
unsigned long flags;
raw_spin_lock_irqsave(&pp->lock, flags);
guard(raw_spinlock)(&pp->lock);
ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL;
res = ctrl * MSI_REG_CTRL_BLOCK_SIZE;
bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL;
pp->irq_mask[ctrl] &= ~BIT(bit);
dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res, pp->irq_mask[ctrl]);
raw_spin_unlock_irqrestore(&pp->lock, flags);
}
static void dw_pci_bottom_ack(struct irq_data *d)
@@ -160,54 +156,48 @@ static void dw_pci_bottom_ack(struct irq_data *d)
}
static struct irq_chip dw_pci_msi_bottom_irq_chip = {
.name = "DWPCI-MSI",
.irq_ack = dw_pci_bottom_ack,
.irq_compose_msi_msg = dw_pci_setup_msi_msg,
.irq_mask = dw_pci_bottom_mask,
.irq_unmask = dw_pci_bottom_unmask,
.name = "DWPCI-MSI",
.irq_compose_msi_msg = dw_pci_setup_msi_msg,
.irq_mask = dw_pci_bottom_mask,
.irq_unmask = dw_pci_bottom_unmask,
#ifdef CONFIG_SMP
.irq_ack = dw_irq_noop,
.irq_pre_redirect = dw_pci_bottom_ack,
.irq_set_affinity = irq_chip_redirect_set_affinity,
#else
.irq_ack = dw_pci_bottom_ack,
#endif
};
static int dw_pcie_irq_domain_alloc(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs,
void *args)
static int dw_pcie_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *args)
{
struct dw_pcie_rp *pp = domain->host_data;
unsigned long flags;
u32 i;
int bit;
raw_spin_lock_irqsave(&pp->lock, flags);
bit = bitmap_find_free_region(pp->msi_irq_in_use, pp->num_vectors,
order_base_2(nr_irqs));
raw_spin_unlock_irqrestore(&pp->lock, flags);
scoped_guard (raw_spinlock_irq, &pp->lock) {
bit = bitmap_find_free_region(pp->msi_irq_in_use, pp->num_vectors,
order_base_2(nr_irqs));
}
if (bit < 0)
return -ENOSPC;
for (i = 0; i < nr_irqs; i++)
irq_domain_set_info(domain, virq + i, bit + i,
pp->msi_irq_chip,
pp, handle_edge_irq,
NULL, NULL);
for (unsigned int i = 0; i < nr_irqs; i++) {
irq_domain_set_info(domain, virq + i, bit + i, pp->msi_irq_chip,
pp, handle_edge_irq, NULL, NULL);
}
return 0;
}
static void dw_pcie_irq_domain_free(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs)
static void dw_pcie_irq_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs)
{
struct irq_data *d = irq_domain_get_irq_data(domain, virq);
struct dw_pcie_rp *pp = domain->host_data;
unsigned long flags;
raw_spin_lock_irqsave(&pp->lock, flags);
bitmap_release_region(pp->msi_irq_in_use, d->hwirq,
order_base_2(nr_irqs));
raw_spin_unlock_irqrestore(&pp->lock, flags);
guard(raw_spinlock_irq)(&pp->lock);
bitmap_release_region(pp->msi_irq_in_use, d->hwirq, order_base_2(nr_irqs));
}
static const struct irq_domain_ops dw_pcie_msi_domain_ops = {
@@ -241,8 +231,7 @@ void dw_pcie_free_msi(struct dw_pcie_rp *pp)
for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++) {
if (pp->msi_irq[ctrl] > 0)
irq_set_chained_handler_and_data(pp->msi_irq[ctrl],
NULL, NULL);
irq_set_chained_handler_and_data(pp->msi_irq[ctrl], NULL, NULL);
}
irq_domain_remove(pp->irq_domain);

View File

@@ -821,7 +821,7 @@ static inline enum dw_pcie_ltssm dw_pcie_get_ltssm(struct dw_pcie *pci)
#ifdef CONFIG_PCIE_DW_HOST
int dw_pcie_suspend_noirq(struct dw_pcie *pci);
int dw_pcie_resume_noirq(struct dw_pcie *pci);
irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp);
void dw_handle_msi_irq(struct dw_pcie_rp *pp);
void dw_pcie_msi_init(struct dw_pcie_rp *pp);
int dw_pcie_msi_host_init(struct dw_pcie_rp *pp);
void dw_pcie_free_msi(struct dw_pcie_rp *pp);
@@ -842,10 +842,7 @@ static inline int dw_pcie_resume_noirq(struct dw_pcie *pci)
return 0;
}
static inline irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp)
{
return IRQ_NONE;
}
static inline void dw_handle_msi_irq(struct dw_pcie_rp *pp) { }
static inline void dw_pcie_msi_init(struct dw_pcie_rp *pp)
{ }

View File

@@ -321,14 +321,16 @@ static int msi_setup_msi_desc(struct pci_dev *dev, int nvec,
static int msi_verify_entries(struct pci_dev *dev)
{
struct msi_desc *entry;
u64 address;
if (!dev->no_64bit_msi)
if (dev->msi_addr_mask == DMA_BIT_MASK(64))
return 0;
msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
if (entry->msg.address_hi) {
pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n",
entry->msg.address_hi, entry->msg.address_lo);
address = (u64)entry->msg.address_hi << 32 | entry->msg.address_lo;
if (address & ~dev->msi_addr_mask) {
pci_err(dev, "arch assigned 64-bit MSI address %#llx above device MSI address mask %#llx\n",
address, dev->msi_addr_mask);
break;
}
}
@@ -737,7 +739,7 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
ret = msix_setup_interrupts(dev, entries, nvec, affd);
if (ret)
goto out_disable;
goto out_unmap;
/* Disable INTX */
pci_intx_for_msi(dev, 0);
@@ -758,6 +760,8 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
pcibios_free_irq(dev);
return 0;
out_unmap:
iounmap(dev->msix_base);
out_disable:
dev->msix_enabled = 0;
pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0);

View File

@@ -24,7 +24,7 @@ void pci_msi_init(struct pci_dev *dev)
}
if (!(ctrl & PCI_MSI_FLAGS_64BIT))
dev->no_64bit_msi = 1;
dev->msi_addr_mask = DMA_BIT_MASK(32);
}
void pci_msix_init(struct pci_dev *dev)

View File

@@ -2047,6 +2047,13 @@ int pci_setup_device(struct pci_dev *dev)
*/
dev->dma_mask = 0xffffffff;
/*
* Assume 64-bit addresses for MSI initially. Will be changed to 32-bit
* if MSI (rather than MSI-X) capability does not have
* PCI_MSI_FLAGS_64BIT. Can also be overridden by driver.
*/
dev->msi_addr_mask = DMA_BIT_MASK(64);
dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
dev->bus->number, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn));

View File

@@ -459,6 +459,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
* checks against the supplied affinity mask are not
* required. This is used for CPU hotplug where the
* target CPU is not yet set in the cpu_online_mask.
* @irq_pre_redirect: Optional function to be invoked before redirecting
* an interrupt via irq_work. Called only on CONFIG_SMP.
* @irq_retrigger: resend an IRQ to the CPU
* @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
* @irq_set_wake: enable/disable power-management wake-on of an IRQ
@@ -503,6 +505,7 @@ struct irq_chip {
void (*irq_eoi)(struct irq_data *data);
int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force);
void (*irq_pre_redirect)(struct irq_data *data);
int (*irq_retrigger)(struct irq_data *data);
int (*irq_set_type)(struct irq_data *data, unsigned int flow_type);
int (*irq_set_wake)(struct irq_data *data, unsigned int on);
@@ -684,6 +687,13 @@ extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data,
extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type);
extern int irq_chip_request_resources_parent(struct irq_data *data);
extern void irq_chip_release_resources_parent(struct irq_data *data);
#ifdef CONFIG_SMP
void irq_chip_pre_redirect_parent(struct irq_data *data);
#endif
#endif
#ifdef CONFIG_SMP
int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force);
#endif
/* Disable or mask interrupts during a kernel kexec */

View File

@@ -2,9 +2,10 @@
#ifndef _LINUX_IRQDESC_H
#define _LINUX_IRQDESC_H
#include <linux/rcupdate.h>
#include <linux/irq_work.h>
#include <linux/kobject.h>
#include <linux/mutex.h>
#include <linux/rcupdate.h>
/*
* Core internal functions to deal with irq descriptors
@@ -29,6 +30,17 @@ struct irqstat {
#endif
};
/**
* struct irq_redirect - interrupt redirection metadata
* @work: Harg irq_work item for handler execution on a different CPU
* @target_cpu: CPU to run irq handler on in case the current CPU is not part
* of the irq affinity mask
*/
struct irq_redirect {
struct irq_work work;
unsigned int target_cpu;
};
/**
* struct irq_desc - interrupt descriptor
* @irq_common_data: per irq and chip data passed down to chip functions
@@ -46,6 +58,7 @@ struct irqstat {
* @threads_handled: stats field for deferred spurious detection of threaded handlers
* @threads_handled_last: comparator field for deferred spurious detection of threaded handlers
* @lock: locking for SMP
* @redirect: Facility for redirecting interrupts via irq_work
* @affinity_hint: hint to user space for preferred irq affinity
* @affinity_notify: context for notification of affinity changes
* @pending_mask: pending rebalanced interrupts
@@ -83,6 +96,7 @@ struct irq_desc {
raw_spinlock_t lock;
struct cpumask *percpu_enabled;
#ifdef CONFIG_SMP
struct irq_redirect redirect;
const struct cpumask *affinity_hint;
struct irq_affinity_notify *affinity_notify;
#ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -185,6 +199,7 @@ int generic_handle_irq_safe(unsigned int irq);
int generic_handle_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq);
int generic_handle_domain_irq_safe(struct irq_domain *domain, irq_hw_number_t hwirq);
int generic_handle_domain_nmi(struct irq_domain *domain, irq_hw_number_t hwirq);
bool generic_handle_demux_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq);
#endif
/* Test to see if a driver has successfully requested an irq */

View File

@@ -49,12 +49,12 @@ typedef struct arch_msi_msg_data {
#endif
/**
* msi_msg - Representation of a MSI message
* struct msi_msg - Representation of a MSI message
* @address_lo: Low 32 bits of msi message address
* @arch_addrlo: Architecture specific shadow of @address_lo
* @arch_addr_lo: Architecture specific shadow of @address_lo
* @address_hi: High 32 bits of msi message address
* (only used when device supports it)
* @arch_addrhi: Architecture specific shadow of @address_hi
* @arch_addr_hi: Architecture specific shadow of @address_hi
* @data: MSI message data (usually 16 bits)
* @arch_data: Architecture specific shadow of @data
*/
@@ -91,7 +91,7 @@ typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc,
struct msi_msg *msg);
/**
* pci_msi_desc - PCI/MSI specific MSI descriptor data
* struct pci_msi_desc - PCI/MSI specific MSI descriptor data
*
* @msi_mask: [PCI MSI] MSI cached mask bits
* @msix_ctrl: [PCI MSI-X] MSI-X cached per vector control bits
@@ -101,6 +101,7 @@ typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc,
* @can_mask: [PCI MSI/X] Masking supported?
* @is_64: [PCI MSI/X] Address size: 0=32bit 1=64bit
* @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq
* @msi_attrib: [PCI MSI/X] Compound struct of MSI/X attributes
* @mask_pos: [PCI MSI] Mask register position
* @mask_base: [PCI MSI-X] Mask register base address
*/
@@ -169,7 +170,7 @@ struct msi_desc_data {
* Only used if iommu_msi_shift != 0
* @iommu_msi_shift: Indicates how many bits of the original address should be
* preserved when using iommu_msi_iova.
* @sysfs_attr: Pointer to sysfs device attribute
* @sysfs_attrs: Pointer to sysfs device attribute
*
* @write_msi_msg: Callback that may be called when the MSI message
* address or data changes
@@ -220,7 +221,7 @@ enum msi_desc_filter {
/**
* struct msi_dev_domain - The internals of MSI domain info per device
* @store: Xarray for storing MSI descriptor pointers
* @irqdomain: Pointer to a per device interrupt domain
* @domain: Pointer to a per device interrupt domain
*/
struct msi_dev_domain {
struct xarray store;

View File

@@ -377,6 +377,13 @@ struct pci_dev {
0xffffffff. You only need to change
this if your device has broken DMA
or supports 64-bit transfers. */
u64 msi_addr_mask; /* Mask of the bits of bus address for
MSI that this device implements.
Normally set based on device
capabilities. You only need to
change this if your device claims
to support 64-bit MSI but implements
fewer than 64 address bits. */
struct device_dma_parameters dma_parms;
@@ -441,7 +448,6 @@ struct pci_dev {
unsigned int is_busmaster:1; /* Is busmaster */
unsigned int no_msi:1; /* May not use MSI */
unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */
unsigned int block_cfg_access:1; /* Config space access blocked */
unsigned int broken_parity_status:1; /* Generates false positive parity */
unsigned int irq_reroute_variant:2; /* Needs IRQ rerouting variant */

View File

@@ -1122,7 +1122,7 @@ void irq_cpu_offline(void)
}
#endif
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
#ifdef CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS
/**
@@ -1194,6 +1194,15 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq);
#endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */
#ifdef CONFIG_SMP
void irq_chip_pre_redirect_parent(struct irq_data *data)
{
data = data->parent_data;
data->chip->irq_pre_redirect(data);
}
EXPORT_SYMBOL_GPL(irq_chip_pre_redirect_parent);
#endif
/**
* irq_chip_set_parent_state - set the state of a parent interrupt.
*
@@ -1476,6 +1485,19 @@ void irq_chip_release_resources_parent(struct irq_data *data)
data->chip->irq_release_resources(data);
}
EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent);
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
#ifdef CONFIG_SMP
int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force)
{
struct irq_redirect *redir = &irq_data_to_desc(data)->redirect;
WRITE_ONCE(redir->target_cpu, cpumask_first(dest));
irq_data_update_effective_affinity(data, dest);
return IRQ_SET_MASK_OK_DONE;
}
EXPORT_SYMBOL_GPL(irq_chip_redirect_set_affinity);
#endif
/**

View File

@@ -78,8 +78,12 @@ static int alloc_masks(struct irq_desc *desc, int node)
return 0;
}
static void desc_smp_init(struct irq_desc *desc, int node,
const struct cpumask *affinity)
static void irq_redirect_work(struct irq_work *work)
{
handle_irq_desc(container_of(work, struct irq_desc, redirect.work));
}
static void desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity)
{
if (!affinity)
affinity = irq_default_affinity;
@@ -91,6 +95,7 @@ static void desc_smp_init(struct irq_desc *desc, int node,
#ifdef CONFIG_NUMA
desc->irq_common_data.node = node;
#endif
desc->redirect.work = IRQ_WORK_INIT_HARD(irq_redirect_work);
}
static void free_masks(struct irq_desc *desc)
@@ -767,6 +772,83 @@ int generic_handle_domain_nmi(struct irq_domain *domain, irq_hw_number_t hwirq)
WARN_ON_ONCE(!in_nmi());
return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
}
#ifdef CONFIG_SMP
static bool demux_redirect_remote(struct irq_desc *desc)
{
guard(raw_spinlock)(&desc->lock);
const struct cpumask *m = irq_data_get_effective_affinity_mask(&desc->irq_data);
unsigned int target_cpu = READ_ONCE(desc->redirect.target_cpu);
if (desc->irq_data.chip->irq_pre_redirect)
desc->irq_data.chip->irq_pre_redirect(&desc->irq_data);
/*
* If the interrupt handler is already running on a CPU that's included
* in the interrupt's affinity mask, redirection is not necessary.
*/
if (cpumask_test_cpu(smp_processor_id(), m))
return false;
/*
* The desc->action check protects against IRQ shutdown: __free_irq() sets
* desc->action to NULL while holding desc->lock, which we also hold.
*
* Calling irq_work_queue_on() here is safe w.r.t. CPU unplugging:
* - takedown_cpu() schedules multi_cpu_stop() on all active CPUs,
* including the one that's taken down.
* - multi_cpu_stop() acts like a barrier, which means all active
* CPUs go through MULTI_STOP_DISABLE_IRQ and disable hard IRQs
* *before* the dying CPU runs take_cpu_down() in MULTI_STOP_RUN.
* - Hard IRQs are re-enabled at the end of multi_cpu_stop(), *after*
* the dying CPU has run take_cpu_down() in MULTI_STOP_RUN.
* - Since we run in hard IRQ context, we run either before or after
* take_cpu_down() but never concurrently.
* - If we run before take_cpu_down(), the dying CPU hasn't been marked
* offline yet (it's marked via take_cpu_down() -> __cpu_disable()),
* so the WARN in irq_work_queue_on() can't occur.
* - Furthermore, the work item we queue will be flushed later via
* take_cpu_down() -> cpuhp_invoke_callback_range_nofail() ->
* smpcfd_dying_cpu() -> irq_work_run().
* - If we run after take_cpu_down(), target_cpu has been already
* updated via take_cpu_down() -> __cpu_disable(), which eventually
* calls irq_do_set_affinity() during IRQ migration. So, target_cpu
* no longer points to the dying CPU in this case.
*/
if (desc->action)
irq_work_queue_on(&desc->redirect.work, target_cpu);
return true;
}
#else /* CONFIG_SMP */
static bool demux_redirect_remote(struct irq_desc *desc)
{
return false;
}
#endif
/**
* generic_handle_demux_domain_irq - Invoke the handler for a hardware interrupt
* of a demultiplexing domain.
* @domain: The domain where to perform the lookup
* @hwirq: The hardware interrupt number to convert to a logical one
*
* Returns: True on success, or false if lookup has failed
*/
bool generic_handle_demux_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq)
{
struct irq_desc *desc = irq_resolve_mapping(domain, hwirq);
if (unlikely(!desc))
return false;
if (demux_redirect_remote(desc))
return true;
return !handle_irq_desc(desc);
}
EXPORT_SYMBOL_GPL(generic_handle_demux_domain_irq);
#endif
/* Dynamic interrupt handling */

View File

@@ -35,6 +35,16 @@ static int __init setup_forced_irqthreads(char *arg)
early_param("threadirqs", setup_forced_irqthreads);
#endif
#ifdef CONFIG_SMP
static inline void synchronize_irqwork(struct irq_desc *desc)
{
/* Synchronize pending or on the fly redirect work */
irq_work_sync(&desc->redirect.work);
}
#else
static inline void synchronize_irqwork(struct irq_desc *desc) { }
#endif
static int __irq_get_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool *state);
static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
@@ -107,7 +117,9 @@ EXPORT_SYMBOL(synchronize_hardirq);
static void __synchronize_irq(struct irq_desc *desc)
{
synchronize_irqwork(desc);
__synchronize_hardirq(desc, true);
/*
* We made sure that no hardirq handler is running. Now verify that no
* threaded handlers are active.
@@ -217,8 +229,7 @@ static inline void irq_validate_effective_affinity(struct irq_data *data) { }
static DEFINE_PER_CPU(struct cpumask, __tmp_mask);
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
{
struct cpumask *tmp_mask = this_cpu_ptr(&__tmp_mask);
struct irq_desc *desc = irq_data_to_desc(data);

View File

@@ -1903,11 +1903,6 @@ static int azx_first_init(struct azx *chip)
chip->gts_present = true;
#endif
if (chip->msi && chip->driver_caps & AZX_DCAPS_NO_MSI64) {
dev_dbg(card->dev, "Disabling 64bit MSI\n");
pci->no_64bit_msi = true;
}
pci_set_master(pci);
gcap = azx_readw(chip, GCAP);
@@ -1958,6 +1953,11 @@ static int azx_first_init(struct azx *chip)
dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32));
dma_set_max_seg_size(&pci->dev, UINT_MAX);
if (chip->msi && chip->driver_caps & AZX_DCAPS_NO_MSI64) {
dev_dbg(card->dev, "Restricting MSI to %u-bit\n", dma_bits);
pci->msi_addr_mask = DMA_BIT_MASK(dma_bits);
}
/* read number of streams from GCAP register instead of using
* hardcoded value
*/