From ccd25890f73c082fe2657ed227b497d6ac5fdc40 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 May 2026 10:19:09 -0600 Subject: [PATCH 1/5] io_uring/net: punt IORING_OP_BIND async if it needs file create For two reasons: 1) An opcode cannot block inside io_uring_enter() doing submissions, as it'll stall the submission side pipeline. 2) Ending up in sb_start_write() -> __sb_start_write() -> percpu_down_read_freezable() introduces a new lockdep edge, which it correctly complains about. Check if the socket type is AF_UNIX and has a non-empty pathname. If it does, mark it REQ_F_FORCE_ASYNC to punt the submission to io-wq rather than attempt to do it inline. Fixes: 7481fd93fa0a ("io_uring: Introduce IORING_OP_BIND") Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe --- io_uring/net.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index 30cd22c0b934..8df15b639358 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -1799,11 +1800,29 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) return IOU_COMPLETE; } +/* + * Check if bind request would potentially end up with filename_create(), + * which in turn end up in mnt_want_write() which will grab the fs + * percpu start write sem. This can trigger a lockdep warning. + */ +static int io_bind_file_create(const struct io_async_msghdr *io, int addr_len) +{ + const struct sockaddr_un *sun; + + if (io->addr.ss_family != AF_UNIX) + return 0; + if (addr_len <= offsetof(struct sockaddr_un, sun_path)) + return 0; + sun = (const struct sockaddr_un *) &io->addr; + return sun->sun_path[0] != '\0'; +} + int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind); struct sockaddr __user *uaddr; struct io_async_msghdr *io; + int ret; if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) return -EINVAL; @@ -1814,7 +1833,12 @@ int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) io = io_msg_alloc_async(req); if (unlikely(!io)) return -ENOMEM; - return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); + ret = move_addr_to_kernel(uaddr, bind->addr_len, &io->addr); + if (unlikely(ret)) + return ret; + if (io_bind_file_create(io, bind->addr_len)) + req->flags |= REQ_F_FORCE_ASYNC; + return 0; } int io_bind(struct io_kiocb *req, unsigned int issue_flags) From 93d93f5f8da791e98159795c6ef683f45bd95d13 Mon Sep 17 00:00:00 2001 From: Heechan Kang Date: Sun, 17 May 2026 03:47:09 +0900 Subject: [PATCH 2/5] io_uring/waitid: clear waitid info before copying it to userspace IORING_OP_WAITID stores its result fields in struct io_waitid::info and later copies them to userspace siginfo. The prep path initializes the request arguments, but it does not initialize info itself. If the wait operation completes without reporting a child event, the common wait code can return without writing wo_info. In that case io_waitid_finish() still copies iw->info to userspace, exposing stale bytes from the reused io_kiocb command storage. Clear the result storage during prep so the io_uring path matches the regular waitid syscall, which uses a zero-initialized struct waitid_info. Fixes: f31ecf671ddc ("io_uring: add IORING_OP_WAITID support") Cc: stable@vger.kernel.org # 6.7+ Signed-off-by: Heechan Kang Link: https://patch.msgid.link/20260516184709.852814-1-gganji11@naver.com Signed-off-by: Jens Axboe --- io_uring/waitid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/waitid.c b/io_uring/waitid.c index d25d60aed6af..32f68fd7fcdd 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -275,6 +275,7 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) iw->options = READ_ONCE(sqe->file_index); iw->head = NULL; iw->infop = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + memset(&iw->info, 0, sizeof(iw->info)); return 0; } From cf18e36455603d65d4745de83e2d1743c54ada47 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 17 May 2026 17:30:10 -0400 Subject: [PATCH 3/5] io_uring: propagate array_index_nospec opcode into req->opcode Commit 1e988c3fe126 ("io_uring: prevent opcode speculation") added array_index_nospec() to io_init_req(), but applied it only to a local opcode variable. req->opcode is initialized from sqe->opcode before the bounds check and remains the raw value. Keep req->opcode as the canonical opcode in io_init_req(): reject out-of-range values architecturally, then write the array_index_nospec() result back to req->opcode before any table lookup. This keeps downstream users of req->opcode from observing the raw user byte on a mispredicted path. No functional change: array_index_nospec() is a no-op for opcodes in [0, IORING_OP_LAST), and out-of-range opcodes are still rejected at the bounds check above the assignment. Fixes: 1e988c3fe126 ("io_uring: prevent opcode speculation") Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Michael Bommarito Link: https://patch.msgid.link/20260517213010.696135-1-michael.bommarito@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 036145ee466c..103b6c88f252 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1738,10 +1738,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, const struct io_issue_def *def; unsigned int sqe_flags; int personality; - u8 opcode; req->ctx = ctx; - req->opcode = opcode = READ_ONCE(sqe->opcode); + req->opcode = READ_ONCE(sqe->opcode); /* same numerical values with corresponding REQ_F_*, safe to copy */ sqe_flags = READ_ONCE(sqe->flags); req->flags = (__force io_req_flags_t) sqe_flags; @@ -1751,13 +1750,13 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->cancel_seq_set = false; req->async_data = NULL; - if (unlikely(opcode >= IORING_OP_LAST)) { + if (unlikely(req->opcode >= IORING_OP_LAST)) { req->opcode = 0; return io_init_fail_req(req, -EINVAL); } - opcode = array_index_nospec(opcode, IORING_OP_LAST); + req->opcode = array_index_nospec(req->opcode, IORING_OP_LAST); - def = &io_issue_defs[opcode]; + def = &io_issue_defs[req->opcode]; if (def->is_128 && !(ctx->flags & IORING_SETUP_SQE128)) { /* * A 128b op on a non-128b SQ requires mixed SQE support as From 3d879647fb03dab6fe6e1dd9404a2dd324096218 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 May 2026 10:02:58 -0600 Subject: [PATCH 4/5] io_uring/timeout: splice timed out link in timeout handler A previous commit deferred this to the task_work part of it, so it could be protected by ->uring_lock. But that's actually not necessary here, and in fact the head clearing is not enough to make that safe. For those two reasons, just re-instate the local splicing. Fixes: 49ae66eb8c27 ("io_uring: defer linked-timeout chain splice out of hrtimer context") Signed-off-by: Jens Axboe --- io_uring/timeout.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 6353a4d979dc..c4dd26cf342d 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -417,8 +417,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) * done in io_req_task_link_timeout(), if needed. */ if (prev) { - if (!req_ref_inc_not_zero(prev)) + if (!req_ref_inc_not_zero(prev)) { + io_remove_next_linked(prev); prev = NULL; + } } list_del(&timeout->list); timeout->prev = prev; From e97ff8b62d4690c69297f0f6de874f0564cc01a4 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 20 May 2026 20:00:44 +0200 Subject: [PATCH 5/5] io_uring/nop: pass all errors to userspace This fixes an inconsistency where io_nop() called req_set_fail() based on ret, but passed just nop->result to userspace. Originally, ret is a even copy of nop->result, but is set to an error when such happens subsequently. Now that's also passed to userspace. Fixes: a85f31052bce ("io_uring/nop: add support for testing registered files and buffers") Signed-off-by: Alexander A. Klimov Link: https://patch.msgid.link/20260520180045.538533-1-grandmaster@al2klimov.de Signed-off-by: Jens Axboe --- io_uring/nop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/nop.c b/io_uring/nop.c index 3caf07878f8a..f5c9969e7f64 100644 --- a/io_uring/nop.c +++ b/io_uring/nop.c @@ -79,9 +79,9 @@ int io_nop(struct io_kiocb *req, unsigned int issue_flags) if (ret < 0) req_set_fail(req); if (nop->flags & IORING_NOP_CQE32) - io_req_set_res32(req, nop->result, 0, nop->extra1, nop->extra2); + io_req_set_res32(req, ret, 0, nop->extra1, nop->extra2); else - io_req_set_res(req, nop->result, 0); + io_req_set_res(req, ret, 0); if (nop->flags & IORING_NOP_TW) { req->io_task_work.func = io_req_task_complete; io_req_task_work_add(req);