From 845db023a8aeba8b14315a846dcfba31ee727fb1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 1 May 2026 19:23:12 +0800 Subject: [PATCH 1/4] ublk: don't issue uring_cmd from fallback task work When ublk_ch_uring_cmd_cb() runs as fallback task work (e.g., because the submitting task is exiting), the command should not be issued as current is a kworker, not the daemon task. This can cause io->task to capture the wrong task in __ublk_fetch(), leading to a task mismatch warning in ublk_uring_cmd_cancel_fn(). Check tw.cancel and return -ECANCELED instead of issuing the command from fallback context. Fixes: 3421c7f68bba ("ublk: make sure io cmd handled in submitter task context") Signed-off-by: Ming Lei Link: https://patch.msgid.link/20260501112312.947327-1-tom.leiming@gmail.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 8e5f3738c203..d10460d29e4a 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -3496,8 +3496,10 @@ static void ublk_ch_uring_cmd_cb(struct io_tw_req tw_req, io_tw_token_t tw) { unsigned int issue_flags = IO_URING_CMD_TASK_WORK_ISSUE_FLAGS; struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req); - int ret = ublk_ch_uring_cmd_local(cmd, issue_flags); + int ret = -ECANCELED; + if (!tw.cancel) + ret = ublk_ch_uring_cmd_local(cmd, issue_flags); if (ret != -EIOCBQUEUED) io_uring_cmd_done(cmd, ret, issue_flags); } From 212ec34e4e726e8cd4af7bea4740db24de8a9dab Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 4 May 2026 08:34:32 -0600 Subject: [PATCH 2/4] block: only read from sqe on initial invocation of blkdev_uring_cmd() This passthrough helper currently only supports discards. Part of that command is the start and length, which is read from the SQE. It does so on every invocation, where it really should just make it stable on the first invocation. This avoids needing to copy the SQE upfront, as we only really need those two 8b values stored in our per-req payload. Cc: stable@vger.kernel.org # 6.17+ Signed-off-by: Jens Axboe --- block/ioctl.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/block/ioctl.c b/block/ioctl.c index fc3be0549aa7..ab2c9ed79946 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -857,6 +857,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) #endif struct blk_iou_cmd { + u64 start; + u64 len; int res; bool nowait; }; @@ -946,23 +948,27 @@ int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); - const struct io_uring_sqe *sqe = cmd->sqe; u32 cmd_op = cmd->cmd_op; - uint64_t start, len; - if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || - sqe->rw_flags || sqe->file_index)) - return -EINVAL; + /* Read what we need from the SQE on the first issue */ + if (!(issue_flags & IORING_URING_CMD_REISSUE)) { + const struct io_uring_sqe *sqe = cmd->sqe; + + if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || + sqe->rw_flags || sqe->file_index)) + return -EINVAL; + + bic->start = READ_ONCE(sqe->addr); + bic->len = READ_ONCE(sqe->addr3); + } bic->res = 0; bic->nowait = issue_flags & IO_URING_F_NONBLOCK; - start = READ_ONCE(sqe->addr); - len = READ_ONCE(sqe->addr3); - switch (cmd_op) { case BLOCK_URING_CMD_DISCARD: - return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); + return blkdev_cmd_discard(cmd, bdev, bic->start, bic->len, + bic->nowait); } return -EINVAL; } From 86f33ca9bea30cf011f2b1edad4593faea9c6e98 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 6 May 2026 16:22:38 +0800 Subject: [PATCH 3/4] ublk: validate physical_bs_shift, io_min_shift and io_opt_shift ublk_validate_params() checks logical_bs_shift is within [9, PAGE_SHIFT] but has no upper bound for physical_bs_shift, io_min_shift, or io_opt_shift. A malicious userspace can set any of these to a large value (e.g., 44), causing undefined behavior from `1 << shift` in ublk_ctrl_start_dev() since the result is stored in 32-bit unsigned int. Cap all three at ilog2(SZ_256M) (28). 256M is big enough to cover all practical block sizes, and originates from the maximum physical block size possible in NVMe (lba_size * (1 + npwg), where npwg is 16-bit). Also zero out ub->params with memset() when copy_from_user() fails or ublk_validate_params() returns error, so that no stale or partial params survive for a subsequent START_DEV to consume. Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") Signed-off-by: Ming Lei Link: https://patch.msgid.link/20260506082238.22363-1-tom.leiming@gmail.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index d10460d29e4a..57ec900f0ce0 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -900,6 +900,20 @@ static int ublk_validate_params(const struct ublk_device *ub) if (p->logical_bs_shift > PAGE_SHIFT || p->logical_bs_shift < 9) return -EINVAL; + /* + * 256M is a reasonable upper bound for physical block size, + * io_min and io_opt; it aligns with the maximum physical + * block size possible in NVMe. + */ + if (p->physical_bs_shift > ilog2(SZ_256M)) + return -EINVAL; + + if (p->io_min_shift > ilog2(SZ_256M)) + return -EINVAL; + + if (p->io_opt_shift > ilog2(SZ_256M)) + return -EINVAL; + if (p->logical_bs_shift > p->physical_bs_shift) return -EINVAL; @@ -4992,13 +5006,15 @@ static int ublk_ctrl_set_params(struct ublk_device *ub, */ ret = -EACCES; } else if (copy_from_user(&ub->params, argp, ph.len)) { + /* zero out partial copy so no stale params survive */ + memset(&ub->params, 0, sizeof(ub->params)); ret = -EFAULT; } else { /* clear all we don't support yet */ ub->params.types &= UBLK_PARAM_TYPE_ALL; ret = ublk_validate_params(ub); if (ret) - ub->params.types = 0; + memset(&ub->params, 0, sizeof(ub->params)); } mutex_unlock(&ub->mutex); From f7700a4415afb3ac1767a556094e4ef8bd440e41 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 8 May 2026 20:37:46 +0800 Subject: [PATCH 4/4] ublk: fix use-after-free in ublk_cancel_cmd() When ublk_reset_ch_dev() clears io->cmd via ublk_queue_reinit() concurrently with ublk_cancel_cmd(), ublk_cancel_cmd() can read a stale pointer and pass it to io_uring_cmd_done(), causing a use-after-free. Fix by synchronizing the two paths with ubq->cancel_lock: - ublk_cancel_cmd(): read and clear io->cmd under cancel_lock, then call io_uring_cmd_done() on the saved local copy outside the lock. - ublk_reset_ch_dev(): hold cancel_lock across ublk_queue_reinit() so that io->cmd and io->flags are cleared atomically with respect to ublk_cancel_cmd(). Fixes: 216c8f5ef0f2 ("ublk: replace monitor with cancelable uring_cmd") Signed-off-by: Ming Lei Link: https://patch.msgid.link/20260508123746.242018-1-tom.leiming@gmail.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 57ec900f0ce0..6d13f1481de0 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2411,8 +2411,14 @@ static void ublk_reset_ch_dev(struct ublk_device *ub) { int i; - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_queue_reinit(ub, ublk_get_queue(ub, i)); + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + /* Sync with ublk_cancel_cmd() */ + spin_lock(&ubq->cancel_lock); + ublk_queue_reinit(ub, ubq); + spin_unlock(&ubq->cancel_lock); + } /* set to NULL, otherwise new tasks cannot mmap io_cmd_buf */ ub->mm = NULL; @@ -2753,6 +2759,7 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, { struct ublk_io *io = &ubq->ios[tag]; struct ublk_device *ub = ubq->dev; + struct io_uring_cmd *cmd = NULL; struct request *req; bool done; @@ -2775,12 +2782,15 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, spin_lock(&ubq->cancel_lock); done = !!(io->flags & UBLK_IO_FLAG_CANCELED); - if (!done) + if (!done) { io->flags |= UBLK_IO_FLAG_CANCELED; + cmd = io->cmd; + io->cmd = NULL; + } spin_unlock(&ubq->cancel_lock); - if (!done) - io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, issue_flags); + if (!done && cmd) + io_uring_cmd_done(cmd, UBLK_IO_RES_ABORT, issue_flags); } /*