mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-26 17:44:08 -04:00
Merge tag 'for-6.8/io_uring-2024-01-08' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe:
"Mostly just come fixes and cleanups, but one feature as well. In
detail:
- Harden the check for handling IOPOLL based on return (Pavel)
- Various minor optimizations (Pavel)
- Drop remnants of SCM_RIGHTS fd passing support, now that it's no
longer supported since 6.7 (me)
- Fix for a case where bytes_done wasn't initialized properly on a
failure condition for read/write requests (me)
- Move the register related code to a separate file (me)
- Add support for returning the provided ring buffer head (me)
- Add support for adding a direct descriptor to the normal file table
(me, Christian Brauner)
- Fix for ensuring pending task_work for a ring with DEFER_TASKRUN is
run even if we timeout waiting (me)"
* tag 'for-6.8/io_uring-2024-01-08' of git://git.kernel.dk/linux:
io_uring: ensure local task_work is run on wait timeout
io_uring/kbuf: add method for returning provided buffer ring head
io_uring/rw: ensure io->bytes_done is always initialized
io_uring: drop any code related to SCM_RIGHTS
io_uring/unix: drop usage of io_uring socket
io_uring/register: move io_uring_register(2) related code to register.c
io_uring/openclose: add support for IORING_OP_FIXED_FD_INSTALL
io_uring/cmd: inline io_uring_cmd_get_task
io_uring/cmd: inline io_uring_cmd_do_in_task_lazy
io_uring: split out cmd api into a separate header
io_uring: optimise ltimeout for inline execution
io_uring: don't check iopoll if request completes
This commit is contained in:
@@ -6,66 +6,13 @@
|
||||
#include <linux/xarray.h>
|
||||
#include <uapi/linux/io_uring.h>
|
||||
|
||||
enum io_uring_cmd_flags {
|
||||
IO_URING_F_COMPLETE_DEFER = 1,
|
||||
IO_URING_F_UNLOCKED = 2,
|
||||
/* the request is executed from poll, it should not be freed */
|
||||
IO_URING_F_MULTISHOT = 4,
|
||||
/* executed by io-wq */
|
||||
IO_URING_F_IOWQ = 8,
|
||||
/* int's last bit, sign checks are usually faster than a bit test */
|
||||
IO_URING_F_NONBLOCK = INT_MIN,
|
||||
|
||||
/* ctx state flags, for URING_CMD */
|
||||
IO_URING_F_SQE128 = (1 << 8),
|
||||
IO_URING_F_CQE32 = (1 << 9),
|
||||
IO_URING_F_IOPOLL = (1 << 10),
|
||||
|
||||
/* set when uring wants to cancel a previously issued command */
|
||||
IO_URING_F_CANCEL = (1 << 11),
|
||||
IO_URING_F_COMPAT = (1 << 12),
|
||||
};
|
||||
|
||||
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
|
||||
#define IORING_URING_CMD_CANCELABLE (1U << 30)
|
||||
|
||||
struct io_uring_cmd {
|
||||
struct file *file;
|
||||
const struct io_uring_sqe *sqe;
|
||||
/* callback to defer completions to task context */
|
||||
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
|
||||
u32 cmd_op;
|
||||
u32 flags;
|
||||
u8 pdu[32]; /* available inline for free use */
|
||||
};
|
||||
|
||||
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
|
||||
{
|
||||
return sqe->cmd;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_IO_URING)
|
||||
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd);
|
||||
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
|
||||
unsigned issue_flags);
|
||||
struct sock *io_uring_get_socket(struct file *file);
|
||||
void __io_uring_cancel(bool cancel_all);
|
||||
void __io_uring_free(struct task_struct *tsk);
|
||||
void io_uring_unreg_ringfd(void);
|
||||
const char *io_uring_get_opcode(u8 opcode);
|
||||
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags);
|
||||
/* users should follow semantics of IOU_F_TWQ_LAZY_WAKE */
|
||||
void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned));
|
||||
|
||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
|
||||
}
|
||||
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
|
||||
bool io_is_uring_fops(struct file *file);
|
||||
|
||||
static inline void io_uring_files_cancel(void)
|
||||
{
|
||||
@@ -84,32 +31,7 @@ static inline void io_uring_free(struct task_struct *tsk)
|
||||
if (tsk->io_uring)
|
||||
__io_uring_free(tsk);
|
||||
}
|
||||
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
|
||||
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags);
|
||||
struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd);
|
||||
#else
|
||||
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
|
||||
ssize_t ret2, unsigned issue_flags)
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
}
|
||||
static inline struct sock *io_uring_get_socket(struct file *file)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline void io_uring_task_cancel(void)
|
||||
{
|
||||
}
|
||||
@@ -128,13 +50,9 @@ static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd,
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
static inline bool io_is_uring_fops(struct file *file)
|
||||
{
|
||||
}
|
||||
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
|
||||
{
|
||||
return NULL;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
77
include/linux/io_uring/cmd.h
Normal file
77
include/linux/io_uring/cmd.h
Normal file
@@ -0,0 +1,77 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#ifndef _LINUX_IO_URING_CMD_H
|
||||
#define _LINUX_IO_URING_CMD_H
|
||||
|
||||
#include <uapi/linux/io_uring.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
|
||||
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
|
||||
#define IORING_URING_CMD_CANCELABLE (1U << 30)
|
||||
|
||||
struct io_uring_cmd {
|
||||
struct file *file;
|
||||
const struct io_uring_sqe *sqe;
|
||||
/* callback to defer completions to task context */
|
||||
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
|
||||
u32 cmd_op;
|
||||
u32 flags;
|
||||
u8 pdu[32]; /* available inline for free use */
|
||||
};
|
||||
|
||||
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
|
||||
{
|
||||
return sqe->cmd;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_IO_URING)
|
||||
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd);
|
||||
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
|
||||
unsigned issue_flags);
|
||||
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags);
|
||||
|
||||
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags);
|
||||
|
||||
#else
|
||||
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
|
||||
ssize_t ret2, unsigned issue_flags)
|
||||
{
|
||||
}
|
||||
static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags)
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
|
||||
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
|
||||
}
|
||||
|
||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
|
||||
}
|
||||
|
||||
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
|
||||
{
|
||||
return cmd_to_io_kiocb(cmd)->task;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_IO_URING_CMD_H */
|
||||
@@ -7,6 +7,37 @@
|
||||
#include <linux/llist.h>
|
||||
#include <uapi/linux/io_uring.h>
|
||||
|
||||
enum {
|
||||
/*
|
||||
* A hint to not wake right away but delay until there are enough of
|
||||
* tw's queued to match the number of CQEs the task is waiting for.
|
||||
*
|
||||
* Must not be used wirh requests generating more than one CQE.
|
||||
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
|
||||
*/
|
||||
IOU_F_TWQ_LAZY_WAKE = 1,
|
||||
};
|
||||
|
||||
enum io_uring_cmd_flags {
|
||||
IO_URING_F_COMPLETE_DEFER = 1,
|
||||
IO_URING_F_UNLOCKED = 2,
|
||||
/* the request is executed from poll, it should not be freed */
|
||||
IO_URING_F_MULTISHOT = 4,
|
||||
/* executed by io-wq */
|
||||
IO_URING_F_IOWQ = 8,
|
||||
/* int's last bit, sign checks are usually faster than a bit test */
|
||||
IO_URING_F_NONBLOCK = INT_MIN,
|
||||
|
||||
/* ctx state flags, for URING_CMD */
|
||||
IO_URING_F_SQE128 = (1 << 8),
|
||||
IO_URING_F_CQE32 = (1 << 9),
|
||||
IO_URING_F_IOPOLL = (1 << 10),
|
||||
|
||||
/* set when uring wants to cancel a previously issued command */
|
||||
IO_URING_F_CANCEL = (1 << 11),
|
||||
IO_URING_F_COMPAT = (1 << 12),
|
||||
};
|
||||
|
||||
struct io_wq_work_node {
|
||||
struct io_wq_work_node *next;
|
||||
};
|
||||
@@ -358,9 +389,6 @@ struct io_ring_ctx {
|
||||
struct wait_queue_head rsrc_quiesce_wq;
|
||||
unsigned rsrc_quiesce;
|
||||
|
||||
#if defined(CONFIG_UNIX)
|
||||
struct socket *ring_sock;
|
||||
#endif
|
||||
/* hashed buffered write serialization */
|
||||
struct io_wq_hash *hash_map;
|
||||
|
||||
|
||||
@@ -71,6 +71,7 @@ struct io_uring_sqe {
|
||||
__u32 uring_cmd_flags;
|
||||
__u32 waitid_flags;
|
||||
__u32 futex_flags;
|
||||
__u32 install_fd_flags;
|
||||
};
|
||||
__u64 user_data; /* data to be passed back at completion time */
|
||||
/* pack this to avoid bogus arm OABI complaints */
|
||||
@@ -253,6 +254,7 @@ enum io_uring_op {
|
||||
IORING_OP_FUTEX_WAIT,
|
||||
IORING_OP_FUTEX_WAKE,
|
||||
IORING_OP_FUTEX_WAITV,
|
||||
IORING_OP_FIXED_FD_INSTALL,
|
||||
|
||||
/* this goes last, obviously */
|
||||
IORING_OP_LAST,
|
||||
@@ -386,6 +388,13 @@ enum {
|
||||
/* Pass through the flags from sqe->file_index to cqe->flags */
|
||||
#define IORING_MSG_RING_FLAGS_PASS (1U << 1)
|
||||
|
||||
/*
|
||||
* IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags)
|
||||
*
|
||||
* IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC
|
||||
*/
|
||||
#define IORING_FIXED_FD_NO_CLOEXEC (1U << 0)
|
||||
|
||||
/*
|
||||
* IO completion data structure (Completion Queue Entry)
|
||||
*/
|
||||
@@ -558,6 +567,9 @@ enum {
|
||||
/* register a range of fixed file slots for automatic slot allocation */
|
||||
IORING_REGISTER_FILE_ALLOC_RANGE = 25,
|
||||
|
||||
/* return status information for a buffer group */
|
||||
IORING_REGISTER_PBUF_STATUS = 26,
|
||||
|
||||
/* this goes last */
|
||||
IORING_REGISTER_LAST,
|
||||
|
||||
@@ -684,6 +696,13 @@ struct io_uring_buf_reg {
|
||||
__u64 resv[3];
|
||||
};
|
||||
|
||||
/* argument for IORING_REGISTER_PBUF_STATUS */
|
||||
struct io_uring_buf_status {
|
||||
__u32 buf_group; /* input */
|
||||
__u32 head; /* output */
|
||||
__u32 resv[8];
|
||||
};
|
||||
|
||||
/*
|
||||
* io_uring_restriction->opcode values
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user