io_uring/timeout: immediate timeout arg

One the things the user has always keep in mind is that any user
pointers they put into an SQE is not going to be read by the kernel
until submission happens, and the user has to ensure the pointee stays
alive until then. For example, snippet below will lead to UAF of the on
stack variable ts. Instead of passing the timeout value as a pointer
allow to store it immediately in the SQE. The user has to set a new flag
called IORING_TIMEOUT_IMMEDIATE_ARG, in which case sqe->addr for timeout
or sqe->addr2 for timeout update requests will be interpreted as a time
value in nanosecods.

void prep_timeout(struct io_uring_sqe *sqe) {
    struct __kernel_timespec ts = {...};
    prep_timeout(sqe, &ts);
}

void submit() {
    sqe = get_sqe();
    prep_timeout(sqe);
    io_uring_submit();
}

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Pavel Begunkov
2026-03-02 13:10:37 +00:00
committed by Jens Axboe
parent 0e78aa188c
commit d8345a2190
2 changed files with 20 additions and 5 deletions

View File

@@ -343,6 +343,10 @@ enum io_uring_op {
/*
* sqe->timeout_flags
*
* IORING_TIMEOUT_IMMEDIATE_ARG: If set, sqe->addr stores the timeout
* value in nanoseconds instead of
* pointing to a timespec.
*/
#define IORING_TIMEOUT_ABS (1U << 0)
#define IORING_TIMEOUT_UPDATE (1U << 1)
@@ -351,6 +355,7 @@ enum io_uring_op {
#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
#define IORING_TIMEOUT_MULTISHOT (1U << 6)
#define IORING_TIMEOUT_IMMEDIATE_ARG (1U << 7)
#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
/*

View File

@@ -35,10 +35,17 @@ struct io_timeout_rem {
bool ltimeout;
};
static int io_parse_user_time(ktime_t *time, u64 arg)
static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags)
{
struct timespec64 ts;
if (flags & IORING_TIMEOUT_IMMEDIATE_ARG) {
*time = ns_to_ktime(arg);
if (*time < 0)
return -EINVAL;
return 0;
}
if (get_timespec64(&ts, u64_to_user_ptr(arg)))
return -EFAULT;
if (ts.tv_sec < 0 || ts.tv_nsec < 0)
@@ -475,9 +482,11 @@ int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EINVAL;
if (tr->flags & IORING_LINK_TIMEOUT_UPDATE)
tr->ltimeout = true;
if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK |
IORING_TIMEOUT_ABS |
IORING_TIMEOUT_IMMEDIATE_ARG))
return -EINVAL;
ret = io_parse_user_time(&tr->time, READ_ONCE(sqe->addr2));
ret = io_parse_user_time(&tr->time, READ_ONCE(sqe->addr2), tr->flags);
if (ret)
return ret;
} else if (tr->flags) {
@@ -545,7 +554,8 @@ static int __io_timeout_prep(struct io_kiocb *req,
flags = READ_ONCE(sqe->timeout_flags);
if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK |
IORING_TIMEOUT_ETIME_SUCCESS |
IORING_TIMEOUT_MULTISHOT))
IORING_TIMEOUT_MULTISHOT |
IORING_TIMEOUT_IMMEDIATE_ARG))
return -EINVAL;
/* more than one clock specified is invalid, obviously */
if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
@@ -574,7 +584,7 @@ static int __io_timeout_prep(struct io_kiocb *req,
data->req = req;
data->flags = flags;
ret = io_parse_user_time(&data->time, READ_ONCE(sqe->addr));
ret = io_parse_user_time(&data->time, READ_ONCE(sqe->addr), flags);
if (ret)
return ret;