mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-04 04:28:10 -04:00
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma fixes from Jason Gunthorpe: - Fix a mlx5 malfunction if the UMR QP gets an error - Return the correct port number to userspace for a mlx5 DCT - Don't cause a UMR QP error if DMABUF teardown races with invalidation - Fix a WARN splat when unregisering so mlx5 device memory MR types - Use the correct alignment for the mana doorbell so that two processes do not share the same physical page on non-4k page systems - MAINTAINERS updates for MANA - Retry failed HNS FW commands because some can take a long time - Cast void * handle to the correct type in bnxt to fix corruption - Avoid a NULL pointer crash in bnxt_re - Fix skipped ib_device_unregsiter() for bnxt_re due to some earlier rework - Correctly detect if the bnxt supports extended statistics - Fix refcount leak in mlx5 odp introduced by a previous fix - Map the FW result for the port rate to the userspace values properly in mlx5, returns correct values for newer 800G ports - Don't wrongly destroy counters objects that were not automatically created during mlx5 bind qp - Set page size/shift members of kernel owned SRQs to fix a crash in nvme target * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: RDMA/bnxt_re: Fix the page details for the srq created by kernel consumers RDMA/mlx5: Fix bind QP error cleanup flow RDMA/mlx5: Fix AH static rate parsing RDMA/mlx5: Fix implicit ODP hang on parent deregistration RDMA/bnxt_re: Fix the statistics for Gen P7 VF RDMA/bnxt_re: Fix issue in the unload path RDMA/bnxt_re: Add sanity checks on rdev validity RDMA/bnxt_re: Fix an issue in bnxt_re_async_notifier RDMA/hns: Fix mbox timing out by adding retry mechanism MAINTAINERS: update maintainer for Microsoft MANA RDMA driver RDMA/mana_ib: Allocate PAGE aligned doorbell index RDMA/mlx5: Fix a WARN during dereg_mr for DM type RDMA/mlx5: Fix a race for DMABUF MR which can lead to CQE with error IB/mlx5: Set and get correct qp_num for a DCT QP RDMA/mlx5: Fix the recovery flow of the UMR QP
This commit is contained in:
@@ -15680,7 +15680,7 @@ F: include/uapi/linux/cciss*.h
|
||||
|
||||
MICROSOFT MANA RDMA DRIVER
|
||||
M: Long Li <longli@microsoft.com>
|
||||
M: Ajay Sharma <sharmaajay@microsoft.com>
|
||||
M: Konstantin Taranov <kotaranov@microsoft.com>
|
||||
L: linux-rdma@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/infiniband/hw/mana/
|
||||
|
||||
@@ -187,7 +187,6 @@ struct bnxt_re_dev {
|
||||
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
|
||||
struct net_device *netdev;
|
||||
struct auxiliary_device *adev;
|
||||
struct notifier_block nb;
|
||||
unsigned int version, major, minor;
|
||||
struct bnxt_qplib_chip_ctx *chip_ctx;
|
||||
struct bnxt_en_dev *en_dev;
|
||||
|
||||
@@ -348,8 +348,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
|
||||
goto done;
|
||||
}
|
||||
bnxt_re_copy_err_stats(rdev, stats, err_s);
|
||||
if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags) &&
|
||||
!rdev->is_virtfn) {
|
||||
if (bnxt_ext_stats_supported(rdev->chip_ctx, rdev->dev_attr->dev_cap_flags,
|
||||
rdev->is_virtfn)) {
|
||||
rc = bnxt_re_get_ext_stat(rdev, stats);
|
||||
if (rc) {
|
||||
clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
|
||||
|
||||
@@ -1870,6 +1870,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
|
||||
srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit;
|
||||
srq->srq_limit = srq_init_attr->attr.srq_limit;
|
||||
srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id;
|
||||
srq->qplib_srq.sg_info.pgsize = PAGE_SIZE;
|
||||
srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT;
|
||||
nq = &rdev->nqr->nq[0];
|
||||
|
||||
if (udata) {
|
||||
|
||||
@@ -396,11 +396,16 @@ static void bnxt_re_dcb_wq_task(struct work_struct *work)
|
||||
|
||||
static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl)
|
||||
{
|
||||
struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
|
||||
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
|
||||
struct bnxt_re_dcb_work *dcb_work;
|
||||
struct bnxt_re_dev *rdev;
|
||||
u32 data1, data2;
|
||||
u16 event_id;
|
||||
|
||||
rdev = en_info->rdev;
|
||||
if (!rdev)
|
||||
return;
|
||||
|
||||
event_id = le16_to_cpu(cmpl->event_id);
|
||||
data1 = le32_to_cpu(cmpl->event_data1);
|
||||
data2 = le32_to_cpu(cmpl->event_data2);
|
||||
@@ -433,6 +438,8 @@ static void bnxt_re_stop_irq(void *handle, bool reset)
|
||||
int indx;
|
||||
|
||||
rdev = en_info->rdev;
|
||||
if (!rdev)
|
||||
return;
|
||||
rcfw = &rdev->rcfw;
|
||||
|
||||
if (reset) {
|
||||
@@ -461,6 +468,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
|
||||
int indx, rc;
|
||||
|
||||
rdev = en_info->rdev;
|
||||
if (!rdev)
|
||||
return;
|
||||
msix_ent = rdev->nqr->msix_entries;
|
||||
rcfw = &rdev->rcfw;
|
||||
if (!ent) {
|
||||
@@ -1350,7 +1359,6 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev,
|
||||
return NULL;
|
||||
}
|
||||
/* Default values */
|
||||
rdev->nb.notifier_call = NULL;
|
||||
rdev->netdev = en_dev->net;
|
||||
rdev->en_dev = en_dev;
|
||||
rdev->adev = adev;
|
||||
@@ -2345,15 +2353,6 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type)
|
||||
static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type,
|
||||
struct auxiliary_device *aux_dev)
|
||||
{
|
||||
if (rdev->nb.notifier_call) {
|
||||
unregister_netdevice_notifier(&rdev->nb);
|
||||
rdev->nb.notifier_call = NULL;
|
||||
} else {
|
||||
/* If notifier is null, we should have already done a
|
||||
* clean up before coming here.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
bnxt_re_setup_cc(rdev, false);
|
||||
ib_unregister_device(&rdev->ibdev);
|
||||
bnxt_re_dev_uninit(rdev, op_type);
|
||||
@@ -2433,6 +2432,7 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
|
||||
ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx",
|
||||
__func__, en_dev->en_state);
|
||||
bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev);
|
||||
bnxt_re_update_en_info_rdev(NULL, en_info, adev);
|
||||
mutex_unlock(&bnxt_re_mutex);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -547,6 +547,14 @@ static inline bool _is_ext_stats_supported(u16 dev_cap_flags)
|
||||
CREQ_QUERY_FUNC_RESP_SB_EXT_STATS;
|
||||
}
|
||||
|
||||
static inline int bnxt_ext_stats_supported(struct bnxt_qplib_chip_ctx *ctx,
|
||||
u16 flags, bool virtfn)
|
||||
{
|
||||
/* ext stats supported if cap flag is set AND is a PF OR a Thor2 VF */
|
||||
return (_is_ext_stats_supported(flags) &&
|
||||
((virtfn && bnxt_qplib_is_chip_gen_p7(ctx)) || (!virtfn)));
|
||||
}
|
||||
|
||||
static inline bool _is_hw_retx_supported(u16 dev_cap_flags)
|
||||
{
|
||||
return dev_cap_flags &
|
||||
|
||||
@@ -1286,10 +1286,8 @@ static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
|
||||
return tx_timeout;
|
||||
}
|
||||
|
||||
static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
|
||||
static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout)
|
||||
{
|
||||
struct hns_roce_v2_priv *priv = hr_dev->priv;
|
||||
u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
|
||||
u32 timeout = 0;
|
||||
|
||||
do {
|
||||
@@ -1299,8 +1297,9 @@ static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
|
||||
} while (++timeout < tx_timeout);
|
||||
}
|
||||
|
||||
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_cmq_desc *desc, int num)
|
||||
static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_cmq_desc *desc,
|
||||
int num, u32 tx_timeout)
|
||||
{
|
||||
struct hns_roce_v2_priv *priv = hr_dev->priv;
|
||||
struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
|
||||
@@ -1309,8 +1308,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
spin_lock_bh(&csq->lock);
|
||||
|
||||
tail = csq->head;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
@@ -1324,22 +1321,17 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
|
||||
|
||||
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);
|
||||
|
||||
hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode));
|
||||
hns_roce_wait_csq_done(hr_dev, tx_timeout);
|
||||
if (hns_roce_cmq_csq_done(hr_dev)) {
|
||||
ret = 0;
|
||||
for (i = 0; i < num; i++) {
|
||||
/* check the result of hardware write back */
|
||||
desc[i] = csq->desc[tail++];
|
||||
desc_ret = le16_to_cpu(csq->desc[tail++].retval);
|
||||
if (tail == csq->desc_num)
|
||||
tail = 0;
|
||||
|
||||
desc_ret = le16_to_cpu(desc[i].retval);
|
||||
if (likely(desc_ret == CMD_EXEC_SUCCESS))
|
||||
continue;
|
||||
|
||||
dev_err_ratelimited(hr_dev->dev,
|
||||
"Cmdq IO error, opcode = 0x%x, return = 0x%x.\n",
|
||||
desc->opcode, desc_ret);
|
||||
ret = hns_roce_cmd_err_convert_errno(desc_ret);
|
||||
}
|
||||
} else {
|
||||
@@ -1354,14 +1346,54 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
|
||||
ret = -EAGAIN;
|
||||
}
|
||||
|
||||
spin_unlock_bh(&csq->lock);
|
||||
|
||||
if (ret)
|
||||
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_cmq_desc *desc, int num)
|
||||
{
|
||||
struct hns_roce_v2_priv *priv = hr_dev->priv;
|
||||
struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
|
||||
u16 opcode = le16_to_cpu(desc->opcode);
|
||||
u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
|
||||
u8 try_cnt = HNS_ROCE_OPC_POST_MB_TRY_CNT;
|
||||
u32 rsv_tail;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
while (try_cnt) {
|
||||
try_cnt--;
|
||||
|
||||
spin_lock_bh(&csq->lock);
|
||||
rsv_tail = csq->head;
|
||||
ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout);
|
||||
if (opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME &&
|
||||
try_cnt) {
|
||||
spin_unlock_bh(&csq->lock);
|
||||
mdelay(HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
desc[i] = csq->desc[rsv_tail++];
|
||||
if (rsv_tail == csq->desc_num)
|
||||
rsv_tail = 0;
|
||||
}
|
||||
spin_unlock_bh(&csq->lock);
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
dev_err_ratelimited(hr_dev->dev,
|
||||
"Cmdq IO error, opcode = 0x%x, return = %d.\n",
|
||||
opcode, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_cmq_desc *desc, int num)
|
||||
{
|
||||
|
||||
@@ -230,6 +230,8 @@ enum hns_roce_opcode_type {
|
||||
};
|
||||
|
||||
#define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000
|
||||
#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8
|
||||
#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5
|
||||
struct hns_roce_cmdq_tx_timeout_map {
|
||||
u16 opcode;
|
||||
u32 tx_timeout;
|
||||
|
||||
@@ -174,7 +174,7 @@ static int mana_gd_allocate_doorbell_page(struct gdma_context *gc,
|
||||
|
||||
req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
|
||||
req.num_resources = 1;
|
||||
req.alignment = 1;
|
||||
req.alignment = PAGE_SIZE / MANA_PAGE_SIZE;
|
||||
|
||||
/* Have GDMA start searching from 0 */
|
||||
req.allocated_resources = 0;
|
||||
|
||||
@@ -67,7 +67,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
|
||||
ah->av.tclass = grh->traffic_class;
|
||||
}
|
||||
|
||||
ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
|
||||
ah->av.stat_rate_sl =
|
||||
(mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)) << 4);
|
||||
|
||||
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
|
||||
if (init_attr->xmit_slave)
|
||||
|
||||
@@ -546,6 +546,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
|
||||
struct ib_qp *qp)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(qp->device);
|
||||
bool new = false;
|
||||
int err;
|
||||
|
||||
if (!counter->id) {
|
||||
@@ -560,6 +561,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
|
||||
return err;
|
||||
counter->id =
|
||||
MLX5_GET(alloc_q_counter_out, out, counter_set_id);
|
||||
new = true;
|
||||
}
|
||||
|
||||
err = mlx5_ib_qp_set_counter(qp, counter);
|
||||
@@ -569,8 +571,10 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
|
||||
return 0;
|
||||
|
||||
fail_set_counter:
|
||||
mlx5_ib_counter_dealloc(counter);
|
||||
counter->id = 0;
|
||||
if (new) {
|
||||
mlx5_ib_counter_dealloc(counter);
|
||||
counter->id = 0;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -1550,7 +1550,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
|
||||
|
||||
dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
|
||||
|
||||
if (!umem_dmabuf->sgt)
|
||||
if (!umem_dmabuf->sgt || !mr)
|
||||
return;
|
||||
|
||||
mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
|
||||
@@ -1935,7 +1935,8 @@ mlx5_alloc_priv_descs(struct ib_device *device,
|
||||
static void
|
||||
mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
|
||||
{
|
||||
if (!mr->umem && !mr->data_direct && mr->descs) {
|
||||
if (!mr->umem && !mr->data_direct &&
|
||||
mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) {
|
||||
struct ib_device *device = mr->ibmr.device;
|
||||
int size = mr->max_descs * mr->desc_size;
|
||||
struct mlx5_ib_dev *dev = to_mdev(device);
|
||||
@@ -2022,11 +2023,16 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
|
||||
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
|
||||
struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
|
||||
bool is_odp = is_odp_mr(mr);
|
||||
bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
|
||||
!to_ib_umem_dmabuf(mr->umem)->pinned;
|
||||
int ret = 0;
|
||||
|
||||
if (is_odp)
|
||||
mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
|
||||
|
||||
if (is_odp_dma_buf)
|
||||
dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL);
|
||||
|
||||
if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) {
|
||||
ent = mr->mmkey.cache_ent;
|
||||
/* upon storing to a clean temp entry - schedule its cleanup */
|
||||
@@ -2054,6 +2060,12 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
|
||||
mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
|
||||
}
|
||||
|
||||
if (is_odp_dma_buf) {
|
||||
if (!ret)
|
||||
to_ib_umem_dmabuf(mr->umem)->private = NULL;
|
||||
dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -242,6 +242,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
|
||||
if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_KERNEL) !=
|
||||
mr) {
|
||||
xa_unlock(&imr->implicit_children);
|
||||
mlx5r_deref_odp_mkey(&imr->mmkey);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -3447,11 +3447,11 @@ static int ib_to_mlx5_rate_map(u8 rate)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
|
||||
int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate)
|
||||
{
|
||||
u32 stat_rate_support;
|
||||
|
||||
if (rate == IB_RATE_PORT_CURRENT)
|
||||
if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS)
|
||||
return 0;
|
||||
|
||||
if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_800_GBPS)
|
||||
@@ -3596,7 +3596,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
|
||||
sizeof(grh->dgid.raw));
|
||||
}
|
||||
|
||||
err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah));
|
||||
err = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah));
|
||||
if (err < 0)
|
||||
return err;
|
||||
MLX5_SET(ads, path, stat_rate, err);
|
||||
@@ -4579,6 +4579,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
|
||||
|
||||
set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1);
|
||||
MLX5_SET(dctc, dctc, counter_set_id, set_id);
|
||||
|
||||
qp->port = attr->port_num;
|
||||
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
|
||||
struct mlx5_ib_modify_qp_resp resp = {};
|
||||
u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {};
|
||||
@@ -5074,7 +5076,7 @@ static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp,
|
||||
}
|
||||
|
||||
if (qp_attr_mask & IB_QP_PORT)
|
||||
qp_attr->port_num = MLX5_GET(dctc, dctc, port);
|
||||
qp_attr->port_num = mqp->port;
|
||||
if (qp_attr_mask & IB_QP_MIN_RNR_TIMER)
|
||||
qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak);
|
||||
if (qp_attr_mask & IB_QP_AV) {
|
||||
|
||||
@@ -56,4 +56,5 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn);
|
||||
int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
|
||||
int mlx5_ib_qp_event_init(void);
|
||||
void mlx5_ib_qp_event_cleanup(void);
|
||||
int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate);
|
||||
#endif /* _MLX5_IB_QP_H */
|
||||
|
||||
@@ -231,30 +231,6 @@ void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev)
|
||||
ib_dealloc_pd(dev->umrc.pd);
|
||||
}
|
||||
|
||||
static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
struct umr_common *umrc = &dev->umrc;
|
||||
struct ib_qp_attr attr;
|
||||
int err;
|
||||
|
||||
attr.qp_state = IB_QPS_RESET;
|
||||
err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
|
||||
if (err) {
|
||||
mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
umrc->state = MLX5_UMR_STATE_ACTIVE;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
umrc->state = MLX5_UMR_STATE_ERR;
|
||||
return err;
|
||||
}
|
||||
|
||||
static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
|
||||
struct mlx5r_umr_wqe *wqe, bool with_data)
|
||||
@@ -302,6 +278,61 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey,
|
||||
struct mlx5r_umr_context *umr_context,
|
||||
struct mlx5r_umr_wqe *wqe, bool with_data)
|
||||
{
|
||||
struct umr_common *umrc = &dev->umrc;
|
||||
struct ib_qp_attr attr;
|
||||
int err;
|
||||
|
||||
mutex_lock(&umrc->lock);
|
||||
/* Preventing any further WRs to be sent now */
|
||||
if (umrc->state != MLX5_UMR_STATE_RECOVER) {
|
||||
mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n",
|
||||
umrc->state);
|
||||
umrc->state = MLX5_UMR_STATE_RECOVER;
|
||||
}
|
||||
mutex_unlock(&umrc->lock);
|
||||
|
||||
/* Sending a final/barrier WR (the failed one) and wait for its completion.
|
||||
* This will ensure that all the previous WRs got a completion before
|
||||
* we set the QP state to RESET.
|
||||
*/
|
||||
err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe,
|
||||
with_data);
|
||||
if (err) {
|
||||
mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Since the QP is in an error state, it will only receive
|
||||
* IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier
|
||||
* we don't care about its status.
|
||||
*/
|
||||
wait_for_completion(&umr_context->done);
|
||||
|
||||
attr.qp_state = IB_QPS_RESET;
|
||||
err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
|
||||
if (err) {
|
||||
mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err);
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
|
||||
if (err) {
|
||||
mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err);
|
||||
goto err;
|
||||
}
|
||||
|
||||
umrc->state = MLX5_UMR_STATE_ACTIVE;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
umrc->state = MLX5_UMR_STATE_ERR;
|
||||
return err;
|
||||
}
|
||||
|
||||
static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
|
||||
{
|
||||
struct mlx5_ib_umr_context *context =
|
||||
@@ -366,9 +397,7 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
|
||||
mlx5_ib_warn(dev,
|
||||
"reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n",
|
||||
umr_context.status, mkey);
|
||||
mutex_lock(&umrc->lock);
|
||||
err = mlx5r_umr_recover(dev);
|
||||
mutex_unlock(&umrc->lock);
|
||||
err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data);
|
||||
if (err)
|
||||
mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
|
||||
err);
|
||||
|
||||
Reference in New Issue
Block a user