Merge tag 'for-6.8/io_uring-2024-01-08' of git://git.kernel.dk/linux

Pull io_uring updates from Jens Axboe:
 "Mostly just come fixes and cleanups, but one feature as well. In
  detail:

   - Harden the check for handling IOPOLL based on return (Pavel)

   - Various minor optimizations (Pavel)

   - Drop remnants of SCM_RIGHTS fd passing support, now that it's no
     longer supported since 6.7 (me)

   - Fix for a case where bytes_done wasn't initialized properly on a
     failure condition for read/write requests (me)

   - Move the register related code to a separate file (me)

   - Add support for returning the provided ring buffer head (me)

   - Add support for adding a direct descriptor to the normal file table
     (me, Christian Brauner)

   - Fix for ensuring pending task_work for a ring with DEFER_TASKRUN is
     run even if we timeout waiting (me)"

* tag 'for-6.8/io_uring-2024-01-08' of git://git.kernel.dk/linux:
  io_uring: ensure local task_work is run on wait timeout
  io_uring/kbuf: add method for returning provided buffer ring head
  io_uring/rw: ensure io->bytes_done is always initialized
  io_uring: drop any code related to SCM_RIGHTS
  io_uring/unix: drop usage of io_uring socket
  io_uring/register: move io_uring_register(2) related code to register.c
  io_uring/openclose: add support for IORING_OP_FIXED_FD_INSTALL
  io_uring/cmd: inline io_uring_cmd_get_task
  io_uring/cmd: inline io_uring_cmd_do_in_task_lazy
  io_uring: split out cmd api into a separate header
  io_uring: optimise ltimeout for inline execution
  io_uring: don't check iopoll if request completes
This commit is contained in:
Linus Torvalds
2024-01-11 14:19:23 -08:00
26 changed files with 890 additions and 947 deletions

View File

@@ -6,66 +6,13 @@
#include <linux/xarray.h>
#include <uapi/linux/io_uring.h>
enum io_uring_cmd_flags {
IO_URING_F_COMPLETE_DEFER = 1,
IO_URING_F_UNLOCKED = 2,
/* the request is executed from poll, it should not be freed */
IO_URING_F_MULTISHOT = 4,
/* executed by io-wq */
IO_URING_F_IOWQ = 8,
/* int's last bit, sign checks are usually faster than a bit test */
IO_URING_F_NONBLOCK = INT_MIN,
/* ctx state flags, for URING_CMD */
IO_URING_F_SQE128 = (1 << 8),
IO_URING_F_CQE32 = (1 << 9),
IO_URING_F_IOPOLL = (1 << 10),
/* set when uring wants to cancel a previously issued command */
IO_URING_F_CANCEL = (1 << 11),
IO_URING_F_COMPAT = (1 << 12),
};
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
#define IORING_URING_CMD_CANCELABLE (1U << 30)
struct io_uring_cmd {
struct file *file;
const struct io_uring_sqe *sqe;
/* callback to defer completions to task context */
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
u32 cmd_op;
u32 flags;
u8 pdu[32]; /* available inline for free use */
};
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
{
return sqe->cmd;
}
#if defined(CONFIG_IO_URING)
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd);
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
unsigned issue_flags);
struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
void io_uring_unreg_ringfd(void);
const char *io_uring_get_opcode(u8 opcode);
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
unsigned flags);
/* users should follow semantics of IOU_F_TWQ_LAZY_WAKE */
void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned));
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
{
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
}
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
bool io_is_uring_fops(struct file *file);
static inline void io_uring_files_cancel(void)
{
@@ -84,32 +31,7 @@ static inline void io_uring_free(struct task_struct *tsk)
if (tsk->io_uring)
__io_uring_free(tsk);
}
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags);
struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd);
#else
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
{
return -EOPNOTSUPP;
}
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
ssize_t ret2, unsigned issue_flags)
{
}
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
{
}
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
{
}
static inline struct sock *io_uring_get_socket(struct file *file)
{
return NULL;
}
static inline void io_uring_task_cancel(void)
{
}
@@ -128,13 +50,9 @@ static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd,
{
return -EOPNOTSUPP;
}
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags)
static inline bool io_is_uring_fops(struct file *file)
{
}
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
{
return NULL;
return false;
}
#endif

View File

@@ -0,0 +1,77 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _LINUX_IO_URING_CMD_H
#define _LINUX_IO_URING_CMD_H
#include <uapi/linux/io_uring.h>
#include <linux/io_uring_types.h>
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
#define IORING_URING_CMD_CANCELABLE (1U << 30)
struct io_uring_cmd {
struct file *file;
const struct io_uring_sqe *sqe;
/* callback to defer completions to task context */
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
u32 cmd_op;
u32 flags;
u8 pdu[32]; /* available inline for free use */
};
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
{
return sqe->cmd;
}
#if defined(CONFIG_IO_URING)
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd);
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
unsigned issue_flags);
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
unsigned flags);
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags);
#else
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
{
return -EOPNOTSUPP;
}
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
ssize_t ret2, unsigned issue_flags)
{
}
static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
unsigned flags)
{
}
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
}
#endif
/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
{
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
}
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
{
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
}
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
{
return cmd_to_io_kiocb(cmd)->task;
}
#endif /* _LINUX_IO_URING_CMD_H */

View File

@@ -7,6 +7,37 @@
#include <linux/llist.h>
#include <uapi/linux/io_uring.h>
enum {
/*
* A hint to not wake right away but delay until there are enough of
* tw's queued to match the number of CQEs the task is waiting for.
*
* Must not be used wirh requests generating more than one CQE.
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
*/
IOU_F_TWQ_LAZY_WAKE = 1,
};
enum io_uring_cmd_flags {
IO_URING_F_COMPLETE_DEFER = 1,
IO_URING_F_UNLOCKED = 2,
/* the request is executed from poll, it should not be freed */
IO_URING_F_MULTISHOT = 4,
/* executed by io-wq */
IO_URING_F_IOWQ = 8,
/* int's last bit, sign checks are usually faster than a bit test */
IO_URING_F_NONBLOCK = INT_MIN,
/* ctx state flags, for URING_CMD */
IO_URING_F_SQE128 = (1 << 8),
IO_URING_F_CQE32 = (1 << 9),
IO_URING_F_IOPOLL = (1 << 10),
/* set when uring wants to cancel a previously issued command */
IO_URING_F_CANCEL = (1 << 11),
IO_URING_F_COMPAT = (1 << 12),
};
struct io_wq_work_node {
struct io_wq_work_node *next;
};
@@ -358,9 +389,6 @@ struct io_ring_ctx {
struct wait_queue_head rsrc_quiesce_wq;
unsigned rsrc_quiesce;
#if defined(CONFIG_UNIX)
struct socket *ring_sock;
#endif
/* hashed buffered write serialization */
struct io_wq_hash *hash_map;

View File

@@ -71,6 +71,7 @@ struct io_uring_sqe {
__u32 uring_cmd_flags;
__u32 waitid_flags;
__u32 futex_flags;
__u32 install_fd_flags;
};
__u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
@@ -253,6 +254,7 @@ enum io_uring_op {
IORING_OP_FUTEX_WAIT,
IORING_OP_FUTEX_WAKE,
IORING_OP_FUTEX_WAITV,
IORING_OP_FIXED_FD_INSTALL,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -386,6 +388,13 @@ enum {
/* Pass through the flags from sqe->file_index to cqe->flags */
#define IORING_MSG_RING_FLAGS_PASS (1U << 1)
/*
* IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags)
*
* IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC
*/
#define IORING_FIXED_FD_NO_CLOEXEC (1U << 0)
/*
* IO completion data structure (Completion Queue Entry)
*/
@@ -558,6 +567,9 @@ enum {
/* register a range of fixed file slots for automatic slot allocation */
IORING_REGISTER_FILE_ALLOC_RANGE = 25,
/* return status information for a buffer group */
IORING_REGISTER_PBUF_STATUS = 26,
/* this goes last */
IORING_REGISTER_LAST,
@@ -684,6 +696,13 @@ struct io_uring_buf_reg {
__u64 resv[3];
};
/* argument for IORING_REGISTER_PBUF_STATUS */
struct io_uring_buf_status {
__u32 buf_group; /* input */
__u32 head; /* output */
__u32 resv[8];
};
/*
* io_uring_restriction->opcode values
*/