diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b0c1d9d16fb5..1c78fdfb7b03 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1666,7 +1666,7 @@ static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, return -ENXIO; /* Force 32-bit MSI on some broken devices */ - if (dev->no_64bit_msi) + if (dev->msi_addr_mask < DMA_BIT_MASK(64)) is_64 = 0; /* Assign XIVE to PE */ diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index a82aaa786e9e..7473c7ca1db0 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -383,7 +383,7 @@ static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type, */ again: if (type == PCI_CAP_ID_MSI) { - if (pdev->no_64bit_msi) { + if (pdev->msi_addr_mask < DMA_BIT_MASK(64)) { rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec); if (rc < 0) { /* @@ -409,7 +409,7 @@ static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type, if (use_32bit_msi_hack && rc > 0) rtas_hack_32bit_msi_gen2(pdev); } else { - if (pdev->no_64bit_msi) + if (pdev->msi_addr_mask < DMA_BIT_MASK(64)) rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSIX_FN, nvec); else rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 60afaa8e56b4..5faae0361361 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1374,6 +1374,7 @@ int radeon_device_init(struct radeon_device *rdev, pr_warn("radeon: No suitable DMA available\n"); return r; } + rdev->pdev->msi_addr_mask = DMA_BIT_MASK(dma_bits); rdev->need_swiotlb = drm_need_swiotlb(dma_bits); /* Registers mapping */ diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 9961251b44ba..839d619e5602 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -245,16 +245,6 @@ static bool radeon_msi_ok(struct radeon_device *rdev) if (rdev->flags & RADEON_IS_AGP) return false; - /* - * Older chips have a HW limitation, they can only generate 40 bits - * of address for "64-bit" MSIs which breaks on some platforms, notably - * IBM POWER servers, so we limit them - */ - if (rdev->family < CHIP_BONAIRE) { - dev_info(rdev->dev, "radeon: MSI limited to 32-bit\n"); - rdev->pdev->no_64bit_msi = 1; - } - /* force MSI on */ if (radeon_msi == 1) return true; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 70d86c5f52fb..0671deae9a28 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -331,7 +331,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #ifdef CONFIG_PPC64 /* Ensure MSI/MSI-X interrupts lie within addressable physical memory */ - pdev->no_64bit_msi = 1; + pdev->msi_addr_mask = DMA_BIT_MASK(32); #endif err = ionic_setup_one(ionic); diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 372207c33a85..f116591975ff 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -26,9 +26,27 @@ static struct pci_ops dw_pcie_ops; static struct pci_ops dw_pcie_ecam_ops; static struct pci_ops dw_child_pcie_ops; +#ifdef CONFIG_SMP +static void dw_irq_noop(struct irq_data *d) { } +#endif + +static bool dw_pcie_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, struct msi_domain_info *info) +{ + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) + return false; + +#ifdef CONFIG_SMP + info->chip->irq_ack = dw_irq_noop; + info->chip->irq_pre_redirect = irq_chip_pre_redirect_parent; +#else + info->chip->irq_ack = irq_chip_ack_parent; +#endif + return true; +} + #define DW_PCIE_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ MSI_FLAG_USE_DEF_CHIP_OPS | \ - MSI_FLAG_NO_AFFINITY | \ MSI_FLAG_PCI_MSI_MASK_PARENT) #define DW_PCIE_MSI_FLAGS_SUPPORTED (MSI_FLAG_MULTI_PCI_MSI | \ MSI_FLAG_PCI_MSIX | \ @@ -40,41 +58,30 @@ static const struct msi_parent_ops dw_pcie_msi_parent_ops = { .required_flags = DW_PCIE_MSI_FLAGS_REQUIRED, .supported_flags = DW_PCIE_MSI_FLAGS_SUPPORTED, .bus_select_token = DOMAIN_BUS_PCI_MSI, - .chip_flags = MSI_CHIP_FLAG_SET_ACK, .prefix = "DW-", - .init_dev_msi_info = msi_lib_init_dev_msi_info, + .init_dev_msi_info = dw_pcie_init_dev_msi_info, }; /* MSI int handler */ -irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp) +void dw_handle_msi_irq(struct dw_pcie_rp *pp) { - int i, pos; - unsigned long val; - u32 status, num_ctrls; - irqreturn_t ret = IRQ_NONE; struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + unsigned int i, num_ctrls; num_ctrls = pp->num_vectors / MAX_MSI_IRQS_PER_CTRL; for (i = 0; i < num_ctrls; i++) { - status = dw_pcie_readl_dbi(pci, PCIE_MSI_INTR0_STATUS + - (i * MSI_REG_CTRL_BLOCK_SIZE)); + unsigned int reg_off = i * MSI_REG_CTRL_BLOCK_SIZE; + unsigned int irq_off = i * MAX_MSI_IRQS_PER_CTRL; + unsigned long status, pos; + + status = dw_pcie_readl_dbi(pci, PCIE_MSI_INTR0_STATUS + reg_off); if (!status) continue; - ret = IRQ_HANDLED; - val = status; - pos = 0; - while ((pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, - pos)) != MAX_MSI_IRQS_PER_CTRL) { - generic_handle_domain_irq(pp->irq_domain, - (i * MAX_MSI_IRQS_PER_CTRL) + - pos); - pos++; - } + for_each_set_bit(pos, &status, MAX_MSI_IRQS_PER_CTRL) + generic_handle_demux_domain_irq(pp->irq_domain, irq_off + pos); } - - return ret; } /* Chained MSI interrupt service routine */ @@ -95,13 +102,10 @@ static void dw_pci_setup_msi_msg(struct irq_data *d, struct msi_msg *msg) { struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d); struct dw_pcie *pci = to_dw_pcie_from_pp(pp); - u64 msi_target; - - msi_target = (u64)pp->msi_data; + u64 msi_target = (u64)pp->msi_data; msg->address_lo = lower_32_bits(msi_target); msg->address_hi = upper_32_bits(msi_target); - msg->data = d->hwirq; dev_dbg(pci->dev, "msi#%d address_hi %#x address_lo %#x\n", @@ -113,18 +117,14 @@ static void dw_pci_bottom_mask(struct irq_data *d) struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d); struct dw_pcie *pci = to_dw_pcie_from_pp(pp); unsigned int res, bit, ctrl; - unsigned long flags; - - raw_spin_lock_irqsave(&pp->lock, flags); + guard(raw_spinlock)(&pp->lock); ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL; res = ctrl * MSI_REG_CTRL_BLOCK_SIZE; bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL; pp->irq_mask[ctrl] |= BIT(bit); dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res, pp->irq_mask[ctrl]); - - raw_spin_unlock_irqrestore(&pp->lock, flags); } static void dw_pci_bottom_unmask(struct irq_data *d) @@ -132,18 +132,14 @@ static void dw_pci_bottom_unmask(struct irq_data *d) struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d); struct dw_pcie *pci = to_dw_pcie_from_pp(pp); unsigned int res, bit, ctrl; - unsigned long flags; - - raw_spin_lock_irqsave(&pp->lock, flags); + guard(raw_spinlock)(&pp->lock); ctrl = d->hwirq / MAX_MSI_IRQS_PER_CTRL; res = ctrl * MSI_REG_CTRL_BLOCK_SIZE; bit = d->hwirq % MAX_MSI_IRQS_PER_CTRL; pp->irq_mask[ctrl] &= ~BIT(bit); dw_pcie_writel_dbi(pci, PCIE_MSI_INTR0_MASK + res, pp->irq_mask[ctrl]); - - raw_spin_unlock_irqrestore(&pp->lock, flags); } static void dw_pci_bottom_ack(struct irq_data *d) @@ -160,54 +156,48 @@ static void dw_pci_bottom_ack(struct irq_data *d) } static struct irq_chip dw_pci_msi_bottom_irq_chip = { - .name = "DWPCI-MSI", - .irq_ack = dw_pci_bottom_ack, - .irq_compose_msi_msg = dw_pci_setup_msi_msg, - .irq_mask = dw_pci_bottom_mask, - .irq_unmask = dw_pci_bottom_unmask, + .name = "DWPCI-MSI", + .irq_compose_msi_msg = dw_pci_setup_msi_msg, + .irq_mask = dw_pci_bottom_mask, + .irq_unmask = dw_pci_bottom_unmask, +#ifdef CONFIG_SMP + .irq_ack = dw_irq_noop, + .irq_pre_redirect = dw_pci_bottom_ack, + .irq_set_affinity = irq_chip_redirect_set_affinity, +#else + .irq_ack = dw_pci_bottom_ack, +#endif }; -static int dw_pcie_irq_domain_alloc(struct irq_domain *domain, - unsigned int virq, unsigned int nr_irqs, - void *args) +static int dw_pcie_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *args) { struct dw_pcie_rp *pp = domain->host_data; - unsigned long flags; - u32 i; int bit; - raw_spin_lock_irqsave(&pp->lock, flags); - - bit = bitmap_find_free_region(pp->msi_irq_in_use, pp->num_vectors, - order_base_2(nr_irqs)); - - raw_spin_unlock_irqrestore(&pp->lock, flags); + scoped_guard (raw_spinlock_irq, &pp->lock) { + bit = bitmap_find_free_region(pp->msi_irq_in_use, pp->num_vectors, + order_base_2(nr_irqs)); + } if (bit < 0) return -ENOSPC; - for (i = 0; i < nr_irqs; i++) - irq_domain_set_info(domain, virq + i, bit + i, - pp->msi_irq_chip, - pp, handle_edge_irq, - NULL, NULL); - + for (unsigned int i = 0; i < nr_irqs; i++) { + irq_domain_set_info(domain, virq + i, bit + i, pp->msi_irq_chip, + pp, handle_edge_irq, NULL, NULL); + } return 0; } -static void dw_pcie_irq_domain_free(struct irq_domain *domain, - unsigned int virq, unsigned int nr_irqs) +static void dw_pcie_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) { struct irq_data *d = irq_domain_get_irq_data(domain, virq); struct dw_pcie_rp *pp = domain->host_data; - unsigned long flags; - raw_spin_lock_irqsave(&pp->lock, flags); - - bitmap_release_region(pp->msi_irq_in_use, d->hwirq, - order_base_2(nr_irqs)); - - raw_spin_unlock_irqrestore(&pp->lock, flags); + guard(raw_spinlock_irq)(&pp->lock); + bitmap_release_region(pp->msi_irq_in_use, d->hwirq, order_base_2(nr_irqs)); } static const struct irq_domain_ops dw_pcie_msi_domain_ops = { @@ -241,8 +231,7 @@ void dw_pcie_free_msi(struct dw_pcie_rp *pp) for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++) { if (pp->msi_irq[ctrl] > 0) - irq_set_chained_handler_and_data(pp->msi_irq[ctrl], - NULL, NULL); + irq_set_chained_handler_and_data(pp->msi_irq[ctrl], NULL, NULL); } irq_domain_remove(pp->irq_domain); diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h index 31685951a080..403f6cfe8191 100644 --- a/drivers/pci/controller/dwc/pcie-designware.h +++ b/drivers/pci/controller/dwc/pcie-designware.h @@ -821,7 +821,7 @@ static inline enum dw_pcie_ltssm dw_pcie_get_ltssm(struct dw_pcie *pci) #ifdef CONFIG_PCIE_DW_HOST int dw_pcie_suspend_noirq(struct dw_pcie *pci); int dw_pcie_resume_noirq(struct dw_pcie *pci); -irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp); +void dw_handle_msi_irq(struct dw_pcie_rp *pp); void dw_pcie_msi_init(struct dw_pcie_rp *pp); int dw_pcie_msi_host_init(struct dw_pcie_rp *pp); void dw_pcie_free_msi(struct dw_pcie_rp *pp); @@ -842,10 +842,7 @@ static inline int dw_pcie_resume_noirq(struct dw_pcie *pci) return 0; } -static inline irqreturn_t dw_handle_msi_irq(struct dw_pcie_rp *pp) -{ - return IRQ_NONE; -} +static inline void dw_handle_msi_irq(struct dw_pcie_rp *pp) { } static inline void dw_pcie_msi_init(struct dw_pcie_rp *pp) { } diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 34d664139f48..e2412175d7af 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -321,14 +321,16 @@ static int msi_setup_msi_desc(struct pci_dev *dev, int nvec, static int msi_verify_entries(struct pci_dev *dev) { struct msi_desc *entry; + u64 address; - if (!dev->no_64bit_msi) + if (dev->msi_addr_mask == DMA_BIT_MASK(64)) return 0; msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { - if (entry->msg.address_hi) { - pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n", - entry->msg.address_hi, entry->msg.address_lo); + address = (u64)entry->msg.address_hi << 32 | entry->msg.address_lo; + if (address & ~dev->msi_addr_mask) { + pci_err(dev, "arch assigned 64-bit MSI address %#llx above device MSI address mask %#llx\n", + address, dev->msi_addr_mask); break; } } @@ -737,7 +739,7 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, ret = msix_setup_interrupts(dev, entries, nvec, affd); if (ret) - goto out_disable; + goto out_unmap; /* Disable INTX */ pci_intx_for_msi(dev, 0); @@ -758,6 +760,8 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, pcibios_free_irq(dev); return 0; +out_unmap: + iounmap(dev->msix_base); out_disable: dev->msix_enabled = 0; pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0); diff --git a/drivers/pci/msi/pcidev_msi.c b/drivers/pci/msi/pcidev_msi.c index 5520aff53b56..0b0346813092 100644 --- a/drivers/pci/msi/pcidev_msi.c +++ b/drivers/pci/msi/pcidev_msi.c @@ -24,7 +24,7 @@ void pci_msi_init(struct pci_dev *dev) } if (!(ctrl & PCI_MSI_FLAGS_64BIT)) - dev->no_64bit_msi = 1; + dev->msi_addr_mask = DMA_BIT_MASK(32); } void pci_msix_init(struct pci_dev *dev) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 41183aed8f5d..a2bff57176a3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2047,6 +2047,13 @@ int pci_setup_device(struct pci_dev *dev) */ dev->dma_mask = 0xffffffff; + /* + * Assume 64-bit addresses for MSI initially. Will be changed to 32-bit + * if MSI (rather than MSI-X) capability does not have + * PCI_MSI_FLAGS_64BIT. Can also be overridden by driver. + */ + dev->msi_addr_mask = DMA_BIT_MASK(64); + dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus), dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); diff --git a/include/linux/irq.h b/include/linux/irq.h index 91afd25faa28..951acbdb9f84 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -459,6 +459,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * checks against the supplied affinity mask are not * required. This is used for CPU hotplug where the * target CPU is not yet set in the cpu_online_mask. + * @irq_pre_redirect: Optional function to be invoked before redirecting + * an interrupt via irq_work. Called only on CONFIG_SMP. * @irq_retrigger: resend an IRQ to the CPU * @irq_set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ * @irq_set_wake: enable/disable power-management wake-on of an IRQ @@ -503,6 +505,7 @@ struct irq_chip { void (*irq_eoi)(struct irq_data *data); int (*irq_set_affinity)(struct irq_data *data, const struct cpumask *dest, bool force); + void (*irq_pre_redirect)(struct irq_data *data); int (*irq_retrigger)(struct irq_data *data); int (*irq_set_type)(struct irq_data *data, unsigned int flow_type); int (*irq_set_wake)(struct irq_data *data, unsigned int on); @@ -684,6 +687,13 @@ extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type); extern int irq_chip_request_resources_parent(struct irq_data *data); extern void irq_chip_release_resources_parent(struct irq_data *data); +#ifdef CONFIG_SMP +void irq_chip_pre_redirect_parent(struct irq_data *data); +#endif +#endif + +#ifdef CONFIG_SMP +int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force); #endif /* Disable or mask interrupts during a kernel kexec */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 17902861de76..dae9a9b93665 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -2,9 +2,10 @@ #ifndef _LINUX_IRQDESC_H #define _LINUX_IRQDESC_H -#include +#include #include #include +#include /* * Core internal functions to deal with irq descriptors @@ -29,6 +30,17 @@ struct irqstat { #endif }; +/** + * struct irq_redirect - interrupt redirection metadata + * @work: Harg irq_work item for handler execution on a different CPU + * @target_cpu: CPU to run irq handler on in case the current CPU is not part + * of the irq affinity mask + */ +struct irq_redirect { + struct irq_work work; + unsigned int target_cpu; +}; + /** * struct irq_desc - interrupt descriptor * @irq_common_data: per irq and chip data passed down to chip functions @@ -46,6 +58,7 @@ struct irqstat { * @threads_handled: stats field for deferred spurious detection of threaded handlers * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers * @lock: locking for SMP + * @redirect: Facility for redirecting interrupts via irq_work * @affinity_hint: hint to user space for preferred irq affinity * @affinity_notify: context for notification of affinity changes * @pending_mask: pending rebalanced interrupts @@ -83,6 +96,7 @@ struct irq_desc { raw_spinlock_t lock; struct cpumask *percpu_enabled; #ifdef CONFIG_SMP + struct irq_redirect redirect; const struct cpumask *affinity_hint; struct irq_affinity_notify *affinity_notify; #ifdef CONFIG_GENERIC_PENDING_IRQ @@ -185,6 +199,7 @@ int generic_handle_irq_safe(unsigned int irq); int generic_handle_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq); int generic_handle_domain_irq_safe(struct irq_domain *domain, irq_hw_number_t hwirq); int generic_handle_domain_nmi(struct irq_domain *domain, irq_hw_number_t hwirq); +bool generic_handle_demux_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq); #endif /* Test to see if a driver has successfully requested an irq */ diff --git a/include/linux/msi.h b/include/linux/msi.h index 8ddb05d5c96a..fa41eed62868 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -49,12 +49,12 @@ typedef struct arch_msi_msg_data { #endif /** - * msi_msg - Representation of a MSI message + * struct msi_msg - Representation of a MSI message * @address_lo: Low 32 bits of msi message address - * @arch_addrlo: Architecture specific shadow of @address_lo + * @arch_addr_lo: Architecture specific shadow of @address_lo * @address_hi: High 32 bits of msi message address * (only used when device supports it) - * @arch_addrhi: Architecture specific shadow of @address_hi + * @arch_addr_hi: Architecture specific shadow of @address_hi * @data: MSI message data (usually 16 bits) * @arch_data: Architecture specific shadow of @data */ @@ -91,7 +91,7 @@ typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, struct msi_msg *msg); /** - * pci_msi_desc - PCI/MSI specific MSI descriptor data + * struct pci_msi_desc - PCI/MSI specific MSI descriptor data * * @msi_mask: [PCI MSI] MSI cached mask bits * @msix_ctrl: [PCI MSI-X] MSI-X cached per vector control bits @@ -101,6 +101,7 @@ typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, * @can_mask: [PCI MSI/X] Masking supported? * @is_64: [PCI MSI/X] Address size: 0=32bit 1=64bit * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq + * @msi_attrib: [PCI MSI/X] Compound struct of MSI/X attributes * @mask_pos: [PCI MSI] Mask register position * @mask_base: [PCI MSI-X] Mask register base address */ @@ -169,7 +170,7 @@ struct msi_desc_data { * Only used if iommu_msi_shift != 0 * @iommu_msi_shift: Indicates how many bits of the original address should be * preserved when using iommu_msi_iova. - * @sysfs_attr: Pointer to sysfs device attribute + * @sysfs_attrs: Pointer to sysfs device attribute * * @write_msi_msg: Callback that may be called when the MSI message * address or data changes @@ -220,7 +221,7 @@ enum msi_desc_filter { /** * struct msi_dev_domain - The internals of MSI domain info per device * @store: Xarray for storing MSI descriptor pointers - * @irqdomain: Pointer to a per device interrupt domain + * @domain: Pointer to a per device interrupt domain */ struct msi_dev_domain { struct xarray store; diff --git a/include/linux/pci.h b/include/linux/pci.h index 9357e9b00e1c..b30631673b5b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -377,6 +377,13 @@ struct pci_dev { 0xffffffff. You only need to change this if your device has broken DMA or supports 64-bit transfers. */ + u64 msi_addr_mask; /* Mask of the bits of bus address for + MSI that this device implements. + Normally set based on device + capabilities. You only need to + change this if your device claims + to support 64-bit MSI but implements + fewer than 64 address bits. */ struct device_dma_parameters dma_parms; @@ -441,7 +448,6 @@ struct pci_dev { unsigned int is_busmaster:1; /* Is busmaster */ unsigned int no_msi:1; /* May not use MSI */ - unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */ unsigned int block_cfg_access:1; /* Config space access blocked */ unsigned int broken_parity_status:1; /* Generates false positive parity */ unsigned int irq_reroute_variant:2; /* Needs IRQ rerouting variant */ diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 23f22f3d5207..6147a07d0127 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1122,7 +1122,7 @@ void irq_cpu_offline(void) } #endif -#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY #ifdef CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS /** @@ -1194,6 +1194,15 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq); #endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */ +#ifdef CONFIG_SMP +void irq_chip_pre_redirect_parent(struct irq_data *data) +{ + data = data->parent_data; + data->chip->irq_pre_redirect(data); +} +EXPORT_SYMBOL_GPL(irq_chip_pre_redirect_parent); +#endif + /** * irq_chip_set_parent_state - set the state of a parent interrupt. * @@ -1476,6 +1485,19 @@ void irq_chip_release_resources_parent(struct irq_data *data) data->chip->irq_release_resources(data); } EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent); +#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ + +#ifdef CONFIG_SMP +int irq_chip_redirect_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force) +{ + struct irq_redirect *redir = &irq_data_to_desc(data)->redirect; + + WRITE_ONCE(redir->target_cpu, cpumask_first(dest)); + irq_data_update_effective_affinity(data, dest); + + return IRQ_SET_MASK_OK_DONE; +} +EXPORT_SYMBOL_GPL(irq_chip_redirect_set_affinity); #endif /** diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index c3bc00e08c58..022b3741dd7a 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -78,8 +78,12 @@ static int alloc_masks(struct irq_desc *desc, int node) return 0; } -static void desc_smp_init(struct irq_desc *desc, int node, - const struct cpumask *affinity) +static void irq_redirect_work(struct irq_work *work) +{ + handle_irq_desc(container_of(work, struct irq_desc, redirect.work)); +} + +static void desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) { if (!affinity) affinity = irq_default_affinity; @@ -91,6 +95,7 @@ static void desc_smp_init(struct irq_desc *desc, int node, #ifdef CONFIG_NUMA desc->irq_common_data.node = node; #endif + desc->redirect.work = IRQ_WORK_INIT_HARD(irq_redirect_work); } static void free_masks(struct irq_desc *desc) @@ -767,6 +772,83 @@ int generic_handle_domain_nmi(struct irq_domain *domain, irq_hw_number_t hwirq) WARN_ON_ONCE(!in_nmi()); return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); } + +#ifdef CONFIG_SMP +static bool demux_redirect_remote(struct irq_desc *desc) +{ + guard(raw_spinlock)(&desc->lock); + const struct cpumask *m = irq_data_get_effective_affinity_mask(&desc->irq_data); + unsigned int target_cpu = READ_ONCE(desc->redirect.target_cpu); + + if (desc->irq_data.chip->irq_pre_redirect) + desc->irq_data.chip->irq_pre_redirect(&desc->irq_data); + + /* + * If the interrupt handler is already running on a CPU that's included + * in the interrupt's affinity mask, redirection is not necessary. + */ + if (cpumask_test_cpu(smp_processor_id(), m)) + return false; + + /* + * The desc->action check protects against IRQ shutdown: __free_irq() sets + * desc->action to NULL while holding desc->lock, which we also hold. + * + * Calling irq_work_queue_on() here is safe w.r.t. CPU unplugging: + * - takedown_cpu() schedules multi_cpu_stop() on all active CPUs, + * including the one that's taken down. + * - multi_cpu_stop() acts like a barrier, which means all active + * CPUs go through MULTI_STOP_DISABLE_IRQ and disable hard IRQs + * *before* the dying CPU runs take_cpu_down() in MULTI_STOP_RUN. + * - Hard IRQs are re-enabled at the end of multi_cpu_stop(), *after* + * the dying CPU has run take_cpu_down() in MULTI_STOP_RUN. + * - Since we run in hard IRQ context, we run either before or after + * take_cpu_down() but never concurrently. + * - If we run before take_cpu_down(), the dying CPU hasn't been marked + * offline yet (it's marked via take_cpu_down() -> __cpu_disable()), + * so the WARN in irq_work_queue_on() can't occur. + * - Furthermore, the work item we queue will be flushed later via + * take_cpu_down() -> cpuhp_invoke_callback_range_nofail() -> + * smpcfd_dying_cpu() -> irq_work_run(). + * - If we run after take_cpu_down(), target_cpu has been already + * updated via take_cpu_down() -> __cpu_disable(), which eventually + * calls irq_do_set_affinity() during IRQ migration. So, target_cpu + * no longer points to the dying CPU in this case. + */ + if (desc->action) + irq_work_queue_on(&desc->redirect.work, target_cpu); + + return true; +} +#else /* CONFIG_SMP */ +static bool demux_redirect_remote(struct irq_desc *desc) +{ + return false; +} +#endif + +/** + * generic_handle_demux_domain_irq - Invoke the handler for a hardware interrupt + * of a demultiplexing domain. + * @domain: The domain where to perform the lookup + * @hwirq: The hardware interrupt number to convert to a logical one + * + * Returns: True on success, or false if lookup has failed + */ +bool generic_handle_demux_domain_irq(struct irq_domain *domain, irq_hw_number_t hwirq) +{ + struct irq_desc *desc = irq_resolve_mapping(domain, hwirq); + + if (unlikely(!desc)) + return false; + + if (demux_redirect_remote(desc)) + return true; + + return !handle_irq_desc(desc); +} +EXPORT_SYMBOL_GPL(generic_handle_demux_domain_irq); + #endif /* Dynamic interrupt handling */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9cc2a37f21a9..cded3d960eb7 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -35,6 +35,16 @@ static int __init setup_forced_irqthreads(char *arg) early_param("threadirqs", setup_forced_irqthreads); #endif +#ifdef CONFIG_SMP +static inline void synchronize_irqwork(struct irq_desc *desc) +{ + /* Synchronize pending or on the fly redirect work */ + irq_work_sync(&desc->redirect.work); +} +#else +static inline void synchronize_irqwork(struct irq_desc *desc) { } +#endif + static int __irq_get_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool *state); static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) @@ -107,7 +117,9 @@ EXPORT_SYMBOL(synchronize_hardirq); static void __synchronize_irq(struct irq_desc *desc) { + synchronize_irqwork(desc); __synchronize_hardirq(desc, true); + /* * We made sure that no hardirq handler is running. Now verify that no * threaded handlers are active. @@ -217,8 +229,7 @@ static inline void irq_validate_effective_affinity(struct irq_data *data) { } static DEFINE_PER_CPU(struct cpumask, __tmp_mask); -int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) +int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { struct cpumask *tmp_mask = this_cpu_ptr(&__tmp_mask); struct irq_desc *desc = irq_data_to_desc(data); diff --git a/sound/hda/controllers/intel.c b/sound/hda/controllers/intel.c index 1e8e3d61291a..a44de2306a2b 100644 --- a/sound/hda/controllers/intel.c +++ b/sound/hda/controllers/intel.c @@ -1903,11 +1903,6 @@ static int azx_first_init(struct azx *chip) chip->gts_present = true; #endif - if (chip->msi && chip->driver_caps & AZX_DCAPS_NO_MSI64) { - dev_dbg(card->dev, "Disabling 64bit MSI\n"); - pci->no_64bit_msi = true; - } - pci_set_master(pci); gcap = azx_readw(chip, GCAP); @@ -1958,6 +1953,11 @@ static int azx_first_init(struct azx *chip) dma_set_mask_and_coherent(&pci->dev, DMA_BIT_MASK(32)); dma_set_max_seg_size(&pci->dev, UINT_MAX); + if (chip->msi && chip->driver_caps & AZX_DCAPS_NO_MSI64) { + dev_dbg(card->dev, "Restricting MSI to %u-bit\n", dma_bits); + pci->msi_addr_mask = DMA_BIT_MASK(dma_bits); + } + /* read number of streams from GCAP register instead of using * hardcoded value */