mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-16 11:21:26 -04:00
ublk: add UBLK_U_CMD_REG_BUF/UNREG_BUF control commands
Add control commands for registering and unregistering shared memory buffers for zero-copy I/O: - UBLK_U_CMD_REG_BUF (0x18): pins pages from userspace, inserts PFN ranges into a per-device maple tree for O(log n) lookup during I/O. Buffer pointers are tracked in a per-device xarray. Returns the assigned buffer index. - UBLK_U_CMD_UNREG_BUF (0x19): removes PFN entries and unpins pages. Queue freeze/unfreeze is handled internally so userspace need not quiesce the device during registration. Also adds: - UBLK_IO_F_SHMEM_ZC flag and addr encoding helpers in UAPI header (16-bit buffer index supporting up to 65536 buffers) - Data structures (ublk_buf, ublk_buf_range) and xarray/maple tree - __ublk_ctrl_reg_buf() helper for PFN insertion with error unwinding - __ublk_ctrl_unreg_buf() helper for cleanup reuse - ublk_support_shmem_zc() / ublk_dev_support_shmem_zc() stubs (returning false — feature not enabled yet) Signed-off-by: Ming Lei <ming.lei@redhat.com> Link: https://patch.msgid.link/20260331153207.3635125-2-ming.lei@redhat.com [axboe: fixup ublk_buf_reg -> ublk_shmem_buf_reg errors, comments] Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
@@ -57,6 +57,44 @@
|
||||
_IOWR('u', 0x16, struct ublksrv_ctrl_cmd)
|
||||
#define UBLK_U_CMD_TRY_STOP_DEV \
|
||||
_IOWR('u', 0x17, struct ublksrv_ctrl_cmd)
|
||||
/*
|
||||
* Register a shared memory buffer for zero-copy I/O.
|
||||
* Input: ctrl_cmd.addr points to struct ublk_shmem_buf_reg (buffer VA + size)
|
||||
* ctrl_cmd.len = sizeof(struct ublk_shmem_buf_reg)
|
||||
* Result: >= 0 is the assigned buffer index, < 0 is error
|
||||
*
|
||||
* The kernel pins pages from the calling process's address space
|
||||
* and inserts PFN ranges into a per-device maple tree. When a block
|
||||
* request's pages match registered pages, the driver sets
|
||||
* UBLK_IO_F_SHMEM_ZC and encodes the buffer index + offset in addr,
|
||||
* allowing the server to access the data via its own mapping of the
|
||||
* same shared memory — true zero copy.
|
||||
*
|
||||
* The memory can be backed by memfd, hugetlbfs, or any GUP-compatible
|
||||
* shared mapping. Queue freeze is handled internally.
|
||||
*
|
||||
* The buffer VA and size are passed via a user buffer (not inline in
|
||||
* ctrl_cmd) so that unprivileged devices can prepend the device path
|
||||
* to ctrl_cmd.addr without corrupting the VA.
|
||||
*/
|
||||
#define UBLK_U_CMD_REG_BUF \
|
||||
_IOWR('u', 0x18, struct ublksrv_ctrl_cmd)
|
||||
/*
|
||||
* Unregister a shared memory buffer.
|
||||
* Input: ctrl_cmd.data[0] = buffer index
|
||||
*/
|
||||
#define UBLK_U_CMD_UNREG_BUF \
|
||||
_IOWR('u', 0x19, struct ublksrv_ctrl_cmd)
|
||||
|
||||
/* Parameter buffer for UBLK_U_CMD_REG_BUF, pointed to by ctrl_cmd.addr */
|
||||
struct ublk_shmem_buf_reg {
|
||||
__u64 addr; /* userspace virtual address of shared memory */
|
||||
__u32 len; /* buffer size in bytes (page-aligned, max 4GB) */
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
/* Pin pages without FOLL_WRITE; usable with write-sealed memfd */
|
||||
#define UBLK_SHMEM_BUF_READ_ONLY (1U << 0)
|
||||
/*
|
||||
* 64bits are enough now, and it should be easy to extend in case of
|
||||
* running out of feature flags
|
||||
@@ -370,6 +408,7 @@
|
||||
/* Disable automatic partition scanning when device is started */
|
||||
#define UBLK_F_NO_AUTO_PART_SCAN (1ULL << 18)
|
||||
|
||||
|
||||
/* device state */
|
||||
#define UBLK_S_DEV_DEAD 0
|
||||
#define UBLK_S_DEV_LIVE 1
|
||||
@@ -469,6 +508,12 @@ struct ublksrv_ctrl_dev_info {
|
||||
#define UBLK_IO_F_NEED_REG_BUF (1U << 17)
|
||||
/* Request has an integrity data buffer */
|
||||
#define UBLK_IO_F_INTEGRITY (1UL << 18)
|
||||
/*
|
||||
* I/O buffer is in a registered shared memory buffer. When set, the addr
|
||||
* field in ublksrv_io_desc encodes buffer index and byte offset instead
|
||||
* of a userspace virtual address.
|
||||
*/
|
||||
#define UBLK_IO_F_SHMEM_ZC (1U << 19)
|
||||
|
||||
/*
|
||||
* io cmd is described by this structure, and stored in share memory, indexed
|
||||
@@ -743,4 +788,31 @@ struct ublk_params {
|
||||
struct ublk_param_integrity integrity;
|
||||
};
|
||||
|
||||
/*
|
||||
* Shared memory zero-copy addr encoding for UBLK_IO_F_SHMEM_ZC.
|
||||
*
|
||||
* When UBLK_IO_F_SHMEM_ZC is set, ublksrv_io_desc.addr is encoded as:
|
||||
* bits [0:31] = byte offset within the buffer (up to 4GB)
|
||||
* bits [32:47] = buffer index (up to 65536)
|
||||
* bits [48:63] = reserved (must be zero)
|
||||
*/
|
||||
#define UBLK_SHMEM_ZC_OFF_MASK 0xffffffffULL
|
||||
#define UBLK_SHMEM_ZC_IDX_OFF 32
|
||||
#define UBLK_SHMEM_ZC_IDX_MASK 0xffffULL
|
||||
|
||||
static inline __u64 ublk_shmem_zc_addr(__u16 index, __u32 offset)
|
||||
{
|
||||
return ((__u64)index << UBLK_SHMEM_ZC_IDX_OFF) | offset;
|
||||
}
|
||||
|
||||
static inline __u16 ublk_shmem_zc_index(__u64 addr)
|
||||
{
|
||||
return (addr >> UBLK_SHMEM_ZC_IDX_OFF) & UBLK_SHMEM_ZC_IDX_MASK;
|
||||
}
|
||||
|
||||
static inline __u32 ublk_shmem_zc_offset(__u64 addr)
|
||||
{
|
||||
return (__u32)(addr & UBLK_SHMEM_ZC_OFF_MASK);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user