Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:

 - Several error unwind misses on system calls in mlx5, mana, ocrdma,
   vmw_pvrdma, mlx4, and hns

 - More rxe bugs processing network packets

 - User triggerable races in mlx5 when destroying and creating the same
   same object when the FW returns the same object ID

 - Incorrect passing of an IPv6 address through netlink
   RDMA_NL_LS_OP_IP_RESOLVE

 - Add memory ordering for mlx5's lock avoidance pattenr

 - Protect mana from kernel memory overflow

 - Use safe patterns for xarray/radix_tree look up in mlx5 and hns

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (24 commits)
  RDMA/hns: Fix unlocked call to hns_roce_qp_remove()
  RDMA/hns: Fix xarray race in hns_roce_create_qp_common()
  RDMA/hns: Fix xarray race in hns_roce_create_srq()
  RDMA/mlx4: Fix mis-use of RCU in mlx4_srq_event()
  RDMA/mlx4: Fix resource leak on error in mlx4_ib_create_srq()
  RDMA/vmw_pvrdma: Fix double free on pvrdma_alloc_ucontext() error path
  RDMA/ocrdma: Don't NULL deref uctx on errors in ocrdma_copy_pd_uresp()
  RDMA/ocrdma: Clarify the mm_head searching
  RDMA/mana: Fix error unwind in mana_ib_create_qp_rss()
  RDMA/mana: Fix mana_destroy_wq_obj() cleanup in mana_ib_create_qp_rss()
  RDMA/mana: Remove user triggerable WARN_ON() in mana_ib_create_qp_rss()
  RDMA/mana: Validate rx_hash_key_len
  RDMA/mlx5: Add missing store/release for lock elision pattern
  RDMA/mlx5: Restore zero-init to mlx5_ib_modify_qp() ucmd
  RDMA/ionic: Fix typo in format string
  RDMA/mlx5: Fix null-ptr-deref in Raw Packet QP creation
  RDMA/core: Fix rereg_mr use-after-free race
  IB/core: Fix IPv6 netlink message size in ib_nl_ip_send_msg()
  RDMA/mlx5: Fix UAF in DCT destroy due to race with create
  RDMA/mlx5: Fix UAF in SRQ destroy due to race with create
  ...
This commit is contained in:
Linus Torvalds
2026-05-05 09:11:52 -07:00
20 changed files with 113 additions and 45 deletions

View File

@@ -149,7 +149,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
}
len = nla_total_size(sizeof(size));
len = nla_total_size(size);
len += NLMSG_ALIGN(sizeof(*header));
skb = nlmsg_new(len, GFP_KERNEL);

View File

@@ -778,6 +778,7 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
struct ib_pd *orig_pd;
struct ib_pd *new_pd;
struct ib_mr *new_mr;
u32 lkey, rkey;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
@@ -846,6 +847,8 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
new_mr->uobject = uobj;
atomic_inc(&new_pd->usecnt);
new_uobj->object = new_mr;
lkey = new_mr->lkey;
rkey = new_mr->rkey;
rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR);
rdma_restrack_set_name(&new_mr->res, NULL);
@@ -871,11 +874,13 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
mr->iova = cmd.hca_va;
mr->length = cmd.length;
}
lkey = mr->lkey;
rkey = mr->rkey;
}
memset(&resp, 0, sizeof(resp));
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
resp.lkey = lkey;
resp.rkey = rkey;
ret = uverbs_response(attrs, &resp, sizeof(resp));

View File

@@ -1942,13 +1942,16 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
void free_pio_map(struct hfi1_devdata *dd)
{
struct pio_vl_map *map;
/* Free PIO map if allocated */
if (rcu_access_pointer(dd->pio_map)) {
spin_lock_irq(&dd->pio_map_lock);
pio_map_free(rcu_access_pointer(dd->pio_map));
map = rcu_access_pointer(dd->pio_map);
RCU_INIT_POINTER(dd->pio_map, NULL);
spin_unlock_irq(&dd->pio_map_lock);
synchronize_rcu();
pio_map_free(map);
}
kfree(dd->kernel_send_context);
dd->kernel_send_context = NULL;

View File

@@ -1255,6 +1255,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
{
size_t i;
struct sdma_engine *sde;
struct sdma_vl_map *map;
if (dd->sdma_pad_dma) {
dma_free_coherent(&dd->pcidev->dev, SDMA_PAD,
@@ -1291,10 +1292,11 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
}
if (rcu_access_pointer(dd->sdma_map)) {
spin_lock_irq(&dd->sde_map_lock);
sdma_map_free(rcu_access_pointer(dd->sdma_map));
map = rcu_access_pointer(dd->sdma_map);
RCU_INIT_POINTER(dd->sdma_map, NULL);
spin_unlock_irq(&dd->sde_map_lock);
synchronize_rcu();
sdma_map_free(map);
}
kfree(dd->per_sdma);
dd->per_sdma = NULL;

View File

@@ -47,8 +47,8 @@ static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev,
xa_lock_irqsave(&hr_dev->qp_table_xa, flags);
qp = __hns_roce_qp_lookup(hr_dev, qpn);
if (qp)
refcount_inc(&qp->refcount);
if (qp && !refcount_inc_not_zero(&qp->refcount))
qp = NULL;
xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags);
if (!qp)
@@ -1171,6 +1171,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
struct hns_roce_ib_create_qp_resp resp = {};
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_ib_create_qp ucmd = {};
unsigned long flags;
int ret;
mutex_init(&hr_qp->mutex);
@@ -1251,13 +1252,19 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
hr_qp->ibqp.qp_num = hr_qp->qpn;
hr_qp->event = hns_roce_ib_qp_event;
refcount_set(&hr_qp->refcount, 1);
init_completion(&hr_qp->free);
refcount_set_release(&hr_qp->refcount, 1);
return 0;
err_flow_ctrl:
spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
hns_roce_lock_cqs(init_attr->send_cq ? to_hr_cq(init_attr->send_cq) : NULL,
init_attr->recv_cq ? to_hr_cq(init_attr->recv_cq) : NULL);
hns_roce_qp_remove(hr_dev, hr_qp);
hns_roce_unlock_cqs(init_attr->send_cq ? to_hr_cq(init_attr->send_cq) : NULL,
init_attr->recv_cq ? to_hr_cq(init_attr->recv_cq) : NULL);
spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
err_store:
free_qpc(hr_dev, hr_qp);
err_qpc:

View File

@@ -16,8 +16,8 @@ void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
xa_lock(&srq_table->xa);
srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1));
if (srq)
refcount_inc(&srq->refcount);
if (srq && !refcount_inc_not_zero(&srq->refcount))
srq = NULL;
xa_unlock(&srq_table->xa);
if (!srq) {
@@ -470,6 +470,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
if (ret)
goto err_srqn;
srq->event = hns_roce_ib_srq_event;
init_completion(&srq->free);
refcount_set_release(&srq->refcount, 1);
if (udata) {
resp.cap_flags = srq->cap_flags;
resp.srqn = srq->srqn;
@@ -480,10 +484,6 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
}
}
srq->event = hns_roce_ib_srq_event;
refcount_set(&srq->refcount, 1);
init_completion(&srq->free);
return 0;
err_srqc:

View File

@@ -185,7 +185,7 @@ static ssize_t hca_type_show(struct device *device,
struct ionic_ibdev *dev =
rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev);
return sysfs_emit(buf, "%s.64\n", dev->ibdev.node_desc);
return sysfs_emit(buf, "%.64s\n", dev->ibdev.node_desc);
}
static DEVICE_ATTR_RO(hca_type);

View File

@@ -137,8 +137,9 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
if (cq->queue.id >= gc->max_num_cqs)
return -EINVAL;
/* Create CQ table entry */
WARN_ON(gc->cq_table[cq->queue.id]);
/* Create CQ table entry, sharing a CQ between WQs is not supported */
if (gc->cq_table[cq->queue.id])
return -EINVAL;
if (cq->queue.kmem)
gdma_cq = cq->queue.kmem;
else

View File

@@ -21,6 +21,9 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
gc = mdev_to_gc(dev);
if (rx_hash_key_len > sizeof(req->hashkey))
return -EINVAL;
req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE);
req = kzalloc(req_buf_size, GFP_KERNEL);
if (!req)
@@ -173,11 +176,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
&wq_spec, &cq_spec, &wq->rx_object);
if (ret) {
/* Do cleanup starting with index i-1 */
i--;
if (ret)
goto fail;
}
/* The GDMA regions are now owned by the WQ object */
wq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
@@ -197,8 +197,10 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
/* Create CQ table entry */
ret = mana_ib_install_cq_cb(mdev, cq);
if (ret)
if (ret) {
mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
goto fail;
}
}
resp.num_entries = i;
@@ -215,13 +217,15 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
ibdev_dbg(&mdev->ib_dev,
"Failed to copy to udata create rss-qp, %d\n",
ret);
goto fail;
goto err_disable_vport_rx;
}
kfree(mana_ind_table);
return 0;
err_disable_vport_rx:
mana_disable_vport_rx(mpc);
fail:
while (i-- > 0) {
ibwq = ind_tbl->ind_tbl[i];

View File

@@ -194,13 +194,15 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq,
if (udata)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
err = -EFAULT;
goto err_wrid;
goto err_srq;
}
init_attr->attr.max_wr = srq->msrq.max - 1;
return 0;
err_srq:
mlx4_srq_free(dev->dev, &srq->msrq);
err_wrid:
if (udata)
mlx4_ib_db_unmap_user(ucontext, &srq->db);

View File

@@ -3310,7 +3310,7 @@ int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev)
* devr->c0 is set once, never changed until device unload.
* Avoid taking the mutex if initialization is already done.
*/
if (devr->c0)
if (smp_load_acquire(&devr->c0))
return 0;
mutex_lock(&devr->cq_lock);
@@ -3336,7 +3336,7 @@ int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev)
}
devr->p0 = pd;
devr->c0 = cq;
smp_store_release(&devr->c0, cq);
unlock:
mutex_unlock(&devr->cq_lock);
@@ -3354,7 +3354,7 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev)
* devr->s1 is set once, never changed until device unload.
* Avoid taking the mutex if initialization is already done.
*/
if (devr->s1)
if (smp_load_acquire(&devr->s1))
return 0;
mutex_lock(&devr->srq_lock);
@@ -3392,10 +3392,11 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev)
"Couldn't create SRQ 1 for res init, err=%pe\n",
s1);
ib_destroy_srq(s0);
goto unlock;
}
devr->s0 = s0;
devr->s1 = s1;
smp_store_release(&devr->s1, s1);
unlock:
mutex_unlock(&devr->srq_lock);

View File

@@ -1603,6 +1603,11 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
}
if (qp->rq.wqe_cnt) {
if (!rq->base.ubuffer.umem) {
err = -EINVAL;
goto err_destroy_sq;
}
rq->base.container_mibqp = qp;
if (qp->flags & IB_QP_CREATE_CVLAN_STRIPPING)
@@ -4692,7 +4697,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_modify_qp_resp resp = {};
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_modify_qp ucmd;
struct mlx5_ib_modify_qp ucmd = {};
enum ib_qp_type qp_type;
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;

View File

@@ -314,7 +314,14 @@ int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev,
xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, XA_ZERO_ENTRY, dct, 0);
return err;
}
xa_erase_irq(&table->dct_xa, dct->mqp.qpn);
/*
* A race can occur where a concurrent create gets the same dctn
* (after hardware released it) and overwrites XA_ZERO_ENTRY with
* its new DCT before we reach here. In that case, we must not erase
* the entry as it now belongs to the new DCT.
*/
xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, XA_ZERO_ENTRY, NULL, 0);
return 0;
}

View File

@@ -683,7 +683,14 @@ int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0);
return err;
}
xa_erase_irq(&table->array, srq->srqn);
/*
* A race can occur where a concurrent create gets the same srqn
* (after hardware released it) and overwrites XA_ZERO_ENTRY with
* its new SRQ before we reach here. In that case, we must not erase
* the entry as it now belongs to the new SRQ.
*/
xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, NULL, 0);
mlx5_core_res_put(&srq->common);
wait_for_completion(&srq->common.free);

View File

@@ -147,7 +147,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
* UMR qp is set once, never changed until device unload.
* Avoid taking the mutex if initialization is already done.
*/
if (dev->umrc.qp)
if (smp_load_acquire(&dev->umrc.qp))
return 0;
mutex_lock(&dev->umrc.init_lock);
@@ -185,7 +185,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
sema_init(&dev->umrc.sem, MAX_UMR_WR);
mutex_init(&dev->umrc.lock);
dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
dev->umrc.qp = qp;
smp_store_release(&dev->umrc.qp, qp);
mutex_unlock(&dev->umrc.init_lock);
return 0;

View File

@@ -215,7 +215,7 @@ static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
mutex_lock(&uctx->mm_list_lock);
list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
if (len != mm->key.len && phy_addr != mm->key.phy_addr)
if (len != mm->key.len || phy_addr != mm->key.phy_addr)
continue;
list_del(&mm->entry);
@@ -233,7 +233,7 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
mutex_lock(&uctx->mm_list_lock);
list_for_each_entry(mm, &uctx->mm_head, entry) {
if (len != mm->key.len && phy_addr != mm->key.phy_addr)
if (len != mm->key.len || phy_addr != mm->key.phy_addr)
continue;
found = true;
@@ -620,9 +620,9 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
ucopy_err:
if (pd->dpp_enabled)
ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE);
ocrdma_del_mmap(uctx, dpp_page_addr, PAGE_SIZE);
dpp_map_err:
ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
ocrdma_del_mmap(uctx, db_page_addr, db_page_size);
return status;
}

View File

@@ -322,7 +322,7 @@ int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
uresp.qp_tab_size = vdev->dsr->caps.max_qp;
ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (ret) {
pvrdma_uar_free(vdev, &context->uar);
/* pvrdma_dealloc_ucontext() also frees the UAR */
pvrdma_dealloc_ucontext(&context->ibucontext);
return -EFAULT;
}

View File

@@ -330,6 +330,17 @@ void rxe_rcv(struct sk_buff *skb)
pkt->qp = NULL;
pkt->mask |= rxe_opcode[pkt->opcode].mask;
/*
* Unknown opcodes have a zero-initialized rxe_opcode[] entry, so
* both mask and length are 0. Reject them before any length math:
* rxe_icrc_hdr() would otherwise compute length - RXE_BTH_BYTES
* and pass the underflowed value to rxe_crc32(), producing an
* out-of-bounds read.
*/
if (unlikely(!rxe_opcode[pkt->opcode].mask ||
!rxe_opcode[pkt->opcode].length))
goto drop;
if (unlikely(pkt->paylen < header_size(pkt) + bth_pad(pkt) +
RXE_ICRC_SIZE))
goto drop;

View File

@@ -540,7 +540,19 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
}
skip_check_range:
if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
/* IBA oA19-28: ATOMIC_WRITE payload is exactly 8 bytes.
* Reject any other length before the responder reads
* sizeof(u64) bytes from payload_addr(pkt); a shorter
* payload would read past the logical end of the packet
* into skb->head tailroom.
*/
if (resid != sizeof(u64) || pktlen != sizeof(u64) ||
bth_pad(pkt)) {
state = RESPST_ERR_LENGTH;
goto err;
}
} else if (pkt->mask & RXE_WRITE_MASK) {
if (resid > mtu) {
if (pktlen != mtu || bth_pad(pkt)) {
state = RESPST_ERR_LENGTH;

View File

@@ -44,13 +44,14 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
struct mlx4_srq *srq;
unsigned long flags;
rcu_read_lock();
spin_lock_irqsave(&srq_table->lock, flags);
srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
rcu_read_unlock();
if (srq)
refcount_inc(&srq->refcount);
else {
if (!srq || !refcount_inc_not_zero(&srq->refcount))
srq = NULL;
spin_unlock_irqrestore(&srq_table->lock, flags);
if (!srq) {
mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
return;
}
@@ -203,8 +204,8 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
if (err)
goto err_radix;
refcount_set(&srq->refcount, 1);
init_completion(&srq->free);
refcount_set_release(&srq->refcount, 1);
return 0;