diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index e1adb0d20a0a..54fd30abf2b8 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -316,7 +316,7 @@ struct io_ring_ctx { * manipulate the list, hence no extra locking is needed there. */ bool poll_multi_queue; - struct io_wq_work_list iopoll_list; + struct list_head iopoll_list; struct io_file_table file_table; struct io_rsrc_data buf_table; @@ -708,7 +708,16 @@ struct io_kiocb { atomic_t refs; bool cancel_seq_set; - struct io_task_work io_task_work; + + /* + * IOPOLL doesn't use task_work, so use the ->iopoll_node list + * entry to manage pending iopoll requests. + */ + union { + struct io_task_work io_task_work; + struct list_head iopoll_node; + }; + union { /* * for polled requests, i.e. IORING_OP_POLL_ADD and async armed diff --git a/io_uring/cancel.c b/io_uring/cancel.c index ca12ac10c0ae..4136bf04464a 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -534,7 +534,7 @@ __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, /* SQPOLL thread does its own polling */ if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) || is_sqpoll_thread) { - while (!wq_list_empty(&ctx->iopoll_list)) { + while (!list_empty(&ctx->iopoll_list)) { io_iopoll_try_reap_events(ctx); ret = true; cond_resched(); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 6cb24cdf8e68..05a660c97316 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -334,7 +334,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) init_waitqueue_head(&ctx->poll_wq); spin_lock_init(&ctx->completion_lock); raw_spin_lock_init(&ctx->timeout_lock); - INIT_WQ_LIST(&ctx->iopoll_list); + INIT_LIST_HEAD(&ctx->iopoll_list); INIT_LIST_HEAD(&ctx->defer_list); INIT_LIST_HEAD(&ctx->timeout_list); INIT_LIST_HEAD(&ctx->ltimeout_list); @@ -1561,7 +1561,7 @@ __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) return; mutex_lock(&ctx->uring_lock); - while (!wq_list_empty(&ctx->iopoll_list)) { + while (!list_empty(&ctx->iopoll_list)) { /* let it sleep and repeat later if can't complete a request */ if (io_do_iopoll(ctx, true) == 0) break; @@ -1626,21 +1626,18 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events) * forever, while the workqueue is stuck trying to acquire the * very same mutex. */ - if (wq_list_empty(&ctx->iopoll_list) || - io_task_work_pending(ctx)) { + if (list_empty(&ctx->iopoll_list) || io_task_work_pending(ctx)) { u32 tail = ctx->cached_cq_tail; (void) io_run_local_work_locked(ctx, min_events); - if (task_work_pending(current) || - wq_list_empty(&ctx->iopoll_list)) { + if (task_work_pending(current) || list_empty(&ctx->iopoll_list)) { mutex_unlock(&ctx->uring_lock); io_run_task_work(); mutex_lock(&ctx->uring_lock); } /* some requests don't go through iopoll_list */ - if (tail != ctx->cached_cq_tail || - wq_list_empty(&ctx->iopoll_list)) + if (tail != ctx->cached_cq_tail || list_empty(&ctx->iopoll_list)) break; } ret = io_do_iopoll(ctx, !min_events); @@ -1683,25 +1680,17 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags) * how we do polling eventually, not spinning if we're on potentially * different devices. */ - if (wq_list_empty(&ctx->iopoll_list)) { + if (list_empty(&ctx->iopoll_list)) { ctx->poll_multi_queue = false; } else if (!ctx->poll_multi_queue) { struct io_kiocb *list_req; - list_req = container_of(ctx->iopoll_list.first, struct io_kiocb, - comp_list); + list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, iopoll_node); if (list_req->file != req->file) ctx->poll_multi_queue = true; } - /* - * For fast devices, IO may have already completed. If it has, add - * it to the front so we find it first. - */ - if (READ_ONCE(req->iopoll_completed)) - wq_list_add_head(&req->comp_list, &ctx->iopoll_list); - else - wq_list_add_tail(&req->comp_list, &ctx->iopoll_list); + list_add_tail(&req->iopoll_node, &ctx->iopoll_list); if (unlikely(needs_lock)) { /* diff --git a/io_uring/rw.c b/io_uring/rw.c index 70ca88cc1f54..307f1f39d9f3 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -1315,9 +1315,9 @@ static int io_uring_hybrid_poll(struct io_kiocb *req, int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) { - struct io_wq_work_node *pos, *start, *prev; unsigned int poll_flags = 0; DEFINE_IO_COMP_BATCH(iob); + struct io_kiocb *req, *tmp; int nr_events = 0; /* @@ -1327,8 +1327,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) if (ctx->poll_multi_queue || force_nonspin) poll_flags |= BLK_POLL_ONESHOT; - wq_list_for_each(pos, start, &ctx->iopoll_list) { - struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list); + list_for_each_entry(req, &ctx->iopoll_list, iopoll_node) { int ret; /* @@ -1357,31 +1356,20 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) if (!rq_list_empty(&iob.req_list)) iob.complete(&iob); - else if (!pos) - return 0; - - prev = start; - wq_list_for_each_resume(pos, prev) { - struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list); + list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, iopoll_node) { /* order with io_complete_rw_iopoll(), e.g. ->result updates */ if (!smp_load_acquire(&req->iopoll_completed)) - break; + continue; + list_del(&req->iopoll_node); + wq_list_add_tail(&req->comp_list, &ctx->submit_state.compl_reqs); nr_events++; req->cqe.flags = io_put_kbuf(req, req->cqe.res, NULL); if (req->opcode != IORING_OP_URING_CMD) io_req_rw_cleanup(req, 0); } - if (unlikely(!nr_events)) - return 0; - - pos = start ? start->next : ctx->iopoll_list.first; - wq_list_cut(&ctx->iopoll_list, prev, start); - - if (WARN_ON_ONCE(!wq_list_empty(&ctx->submit_state.compl_reqs))) - return 0; - ctx->submit_state.compl_reqs.first = pos; - __io_submit_flush_completions(ctx); + if (nr_events) + __io_submit_flush_completions(ctx); return nr_events; } diff --git a/io_uring/slist.h b/io_uring/slist.h index 7ef747442754..0aec51076bad 100644 --- a/io_uring/slist.h +++ b/io_uring/slist.h @@ -9,9 +9,6 @@ #define wq_list_for_each(pos, prv, head) \ for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next) -#define wq_list_for_each_resume(pos, prv) \ - for (; pos; prv = pos, pos = (pos)->next) - #define wq_list_empty(list) (READ_ONCE((list)->first) == NULL) #define INIT_WQ_LIST(list) do { \ @@ -43,15 +40,6 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node, } } -static inline void wq_list_add_head(struct io_wq_work_node *node, - struct io_wq_work_list *list) -{ - node->next = list->first; - if (!node->next) - list->last = node; - WRITE_ONCE(list->first, node); -} - static inline void wq_list_cut(struct io_wq_work_list *list, struct io_wq_work_node *last, struct io_wq_work_node *prev) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 74c1a130cd87..becdfdd323a9 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -212,7 +212,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd, if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE) to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE; - if (to_submit || !wq_list_empty(&ctx->iopoll_list)) { + if (to_submit || !list_empty(&ctx->iopoll_list)) { const struct cred *creds = NULL; io_sq_start_worktime(ist); @@ -221,7 +221,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd, creds = override_creds(ctx->sq_creds); mutex_lock(&ctx->uring_lock); - if (!wq_list_empty(&ctx->iopoll_list)) + if (!list_empty(&ctx->iopoll_list)) io_do_iopoll(ctx, true); /* @@ -344,7 +344,7 @@ static int io_sq_thread(void *data) list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist); - if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list))) + if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list))) sqt_spin = true; } if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE)) @@ -379,7 +379,7 @@ static int io_sq_thread(void *data) atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags); if ((ctx->flags & IORING_SETUP_IOPOLL) && - !wq_list_empty(&ctx->iopoll_list)) { + !list_empty(&ctx->iopoll_list)) { needs_sched = false; break; }