mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-10 09:09:55 -04:00
RDMA/rtrs-clt: Avoid run destroy_con_cq_qp/create_con_cq_qp in parallel
It could happen two kworkers race with each other:
CPU0 CPU1
addr_resolver kworker reconnect kworker
rtrs_clt_rdma_cm_handler
rtrs_rdma_addr_resolved
create_con_cq_qp: s.dev_ref++
"s.dev_ref is 1"
wait in create_cm fails with TIMEOUT
destroy_con_cq_qp: --s.dev_ref
"s.dev_ref is 0"
destroy_con_cq_qp: sess->s.dev = NULL
rtrs_cq_qp_create -> create_qp(con, sess->dev->ib_pd...)
sess->dev is NULL, panic.
To fix the problem using mutex to serialize create_con_cq_qp and
destroy_con_cq_qp.
Fixes: 6a98d71dae ("RDMA/rtrs: client: main functionality")
Link: https://lore.kernel.org/r/20201023074353.21946-4-jinpu.wang@cloud.ionos.com
Signed-off-by: Jack Wang <jinpu.wang@cloud.ionos.com>
Reviewed-by: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
committed by
Jason Gunthorpe
parent
73385fdbc4
commit
fcf2959da6
@@ -1499,6 +1499,7 @@ static int create_con(struct rtrs_clt_sess *sess, unsigned int cid)
|
||||
con->c.cid = cid;
|
||||
con->c.sess = &sess->s;
|
||||
atomic_set(&con->io_cnt, 0);
|
||||
mutex_init(&con->con_mutex);
|
||||
|
||||
sess->s.con[cid] = &con->c;
|
||||
|
||||
@@ -1510,6 +1511,7 @@ static void destroy_con(struct rtrs_clt_con *con)
|
||||
struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
|
||||
|
||||
sess->s.con[con->c.cid] = NULL;
|
||||
mutex_destroy(&con->con_mutex);
|
||||
kfree(con);
|
||||
}
|
||||
|
||||
@@ -1520,6 +1522,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
|
||||
int err, cq_vector;
|
||||
struct rtrs_msg_rkey_rsp *rsp;
|
||||
|
||||
lockdep_assert_held(&con->con_mutex);
|
||||
if (con->c.cid == 0) {
|
||||
/*
|
||||
* One completion for each receive and two for each send
|
||||
@@ -1593,7 +1596,7 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con)
|
||||
* Be careful here: destroy_con_cq_qp() can be called even
|
||||
* create_con_cq_qp() failed, see comments there.
|
||||
*/
|
||||
|
||||
lockdep_assert_held(&con->con_mutex);
|
||||
rtrs_cq_qp_destroy(&con->c);
|
||||
if (con->rsp_ius) {
|
||||
rtrs_iu_free(con->rsp_ius, DMA_FROM_DEVICE,
|
||||
@@ -1625,7 +1628,9 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con)
|
||||
struct rtrs_sess *s = con->c.sess;
|
||||
int err;
|
||||
|
||||
mutex_lock(&con->con_mutex);
|
||||
err = create_con_cq_qp(con);
|
||||
mutex_unlock(&con->con_mutex);
|
||||
if (err) {
|
||||
rtrs_err(s, "create_con_cq_qp(), err: %d\n", err);
|
||||
return err;
|
||||
@@ -1938,8 +1943,9 @@ static int create_cm(struct rtrs_clt_con *con)
|
||||
|
||||
errr:
|
||||
stop_cm(con);
|
||||
/* Is safe to call destroy if cq_qp is not inited */
|
||||
mutex_lock(&con->con_mutex);
|
||||
destroy_con_cq_qp(con);
|
||||
mutex_unlock(&con->con_mutex);
|
||||
destroy_cm:
|
||||
destroy_cm(con);
|
||||
|
||||
@@ -2046,7 +2052,9 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
|
||||
if (!sess->s.con[cid])
|
||||
break;
|
||||
con = to_clt_con(sess->s.con[cid]);
|
||||
mutex_lock(&con->con_mutex);
|
||||
destroy_con_cq_qp(con);
|
||||
mutex_unlock(&con->con_mutex);
|
||||
destroy_cm(con);
|
||||
destroy_con(con);
|
||||
}
|
||||
@@ -2213,7 +2221,10 @@ static int init_conns(struct rtrs_clt_sess *sess)
|
||||
struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]);
|
||||
|
||||
stop_cm(con);
|
||||
|
||||
mutex_lock(&con->con_mutex);
|
||||
destroy_con_cq_qp(con);
|
||||
mutex_unlock(&con->con_mutex);
|
||||
destroy_cm(con);
|
||||
destroy_con(con);
|
||||
}
|
||||
|
||||
@@ -72,6 +72,7 @@ struct rtrs_clt_con {
|
||||
struct rtrs_iu *rsp_ius;
|
||||
u32 queue_size;
|
||||
unsigned int cpu;
|
||||
struct mutex con_mutex;
|
||||
atomic_t io_cnt;
|
||||
int cm_err;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user