mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-06 07:34:03 -04:00
RDMA/hns: Fix missing flush CQE for DWQE
Flush CQE handler has not been called if QP state gets into errored
mode in DWQE path. So, the new added outstanding WQEs will never be
flushed.
It leads to a hung task timeout when using NFS over RDMA:
__switch_to+0x7c/0xd0
__schedule+0x350/0x750
schedule+0x50/0xf0
schedule_timeout+0x2c8/0x340
wait_for_common+0xf4/0x2b0
wait_for_completion+0x20/0x40
__ib_drain_sq+0x140/0x1d0 [ib_core]
ib_drain_sq+0x98/0xb0 [ib_core]
rpcrdma_xprt_disconnect+0x68/0x270 [rpcrdma]
xprt_rdma_close+0x20/0x60 [rpcrdma]
xprt_autoclose+0x64/0x1cc [sunrpc]
process_one_work+0x1d8/0x4e0
worker_thread+0x154/0x420
kthread+0x108/0x150
ret_from_fork+0x10/0x18
Fixes: 01584a5edc ("RDMA/hns: Add support of direct wqe")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://patch.msgid.link/20241220055249.146943-5-huangjunxian6@hisilicon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
This commit is contained in:
committed by
Leon Romanovsky
parent
fa5c4ba8cd
commit
e3debdd484
@@ -670,6 +670,10 @@ static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
|
||||
#define HNS_ROCE_SL_SHIFT 2
|
||||
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
|
||||
|
||||
if (unlikely(qp->state == IB_QPS_ERR)) {
|
||||
flush_cqe(hr_dev, qp);
|
||||
return;
|
||||
}
|
||||
/* All kinds of DirectWQE have the same header field layout */
|
||||
hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG);
|
||||
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl);
|
||||
|
||||
Reference in New Issue
Block a user