mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-09 23:48:56 -04:00
io_uring/zcrx: add copy fallback
There are scenarios in which the zerocopy path can get a kernel buffer instead of a net_iov and needs to copy it to the user, whether it is because of mis-steering or simply getting an skb with the linear part. In this case, grab a net_iov, copy into it and return it to the user as normally. At the moment the user doesn't get any indication whether there was a copy or not, which is left for follow up work. Reviewed-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: David Wei <dw@davidwei.uk> Acked-by: Jakub Kicinski <kuba@kernel.org> Link: https://lore.kernel.org/r/20250215000947.789731-10-dw@davidwei.uk Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
committed by
Jens Axboe
parent
931dfae190
commit
bc57c7d36c
120
io_uring/zcrx.c
120
io_uring/zcrx.c
@@ -7,6 +7,7 @@
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
#include <linux/skbuff_ref.h>
|
||||
|
||||
#include <net/page_pool/helpers.h>
|
||||
#include <net/page_pool/memory_provider.h>
|
||||
@@ -134,6 +135,13 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov)
|
||||
atomic_inc(io_get_user_counter(niov));
|
||||
}
|
||||
|
||||
static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
|
||||
{
|
||||
struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
|
||||
|
||||
return area->pages[net_iov_idx(niov)];
|
||||
}
|
||||
|
||||
static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
|
||||
struct io_uring_zcrx_ifq_reg *reg,
|
||||
struct io_uring_region_desc *rd)
|
||||
@@ -448,6 +456,11 @@ static void io_zcrx_return_niov(struct net_iov *niov)
|
||||
{
|
||||
netmem_ref netmem = net_iov_to_netmem(niov);
|
||||
|
||||
if (!niov->pp) {
|
||||
/* copy fallback allocated niovs */
|
||||
io_zcrx_return_niov_freelist(niov);
|
||||
return;
|
||||
}
|
||||
page_pool_put_unrefed_netmem(niov->pp, netmem, -1, false);
|
||||
}
|
||||
|
||||
@@ -686,13 +699,93 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct net_iov *io_zcrx_alloc_fallback(struct io_zcrx_area *area)
|
||||
{
|
||||
struct net_iov *niov = NULL;
|
||||
|
||||
spin_lock_bh(&area->freelist_lock);
|
||||
if (area->free_count)
|
||||
niov = __io_zcrx_get_free_niov(area);
|
||||
spin_unlock_bh(&area->freelist_lock);
|
||||
|
||||
if (niov)
|
||||
page_pool_fragment_netmem(net_iov_to_netmem(niov), 1);
|
||||
return niov;
|
||||
}
|
||||
|
||||
static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
|
||||
void *src_base, struct page *src_page,
|
||||
unsigned int src_offset, size_t len)
|
||||
{
|
||||
struct io_zcrx_area *area = ifq->area;
|
||||
size_t copied = 0;
|
||||
int ret = 0;
|
||||
|
||||
while (len) {
|
||||
size_t copy_size = min_t(size_t, PAGE_SIZE, len);
|
||||
const int dst_off = 0;
|
||||
struct net_iov *niov;
|
||||
struct page *dst_page;
|
||||
void *dst_addr;
|
||||
|
||||
niov = io_zcrx_alloc_fallback(area);
|
||||
if (!niov) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
dst_page = io_zcrx_iov_page(niov);
|
||||
dst_addr = kmap_local_page(dst_page);
|
||||
if (src_page)
|
||||
src_base = kmap_local_page(src_page);
|
||||
|
||||
memcpy(dst_addr, src_base + src_offset, copy_size);
|
||||
|
||||
if (src_page)
|
||||
kunmap_local(src_base);
|
||||
kunmap_local(dst_addr);
|
||||
|
||||
if (!io_zcrx_queue_cqe(req, niov, ifq, dst_off, copy_size)) {
|
||||
io_zcrx_return_niov(niov);
|
||||
ret = -ENOSPC;
|
||||
break;
|
||||
}
|
||||
|
||||
io_zcrx_get_niov_uref(niov);
|
||||
src_offset += copy_size;
|
||||
len -= copy_size;
|
||||
copied += copy_size;
|
||||
}
|
||||
|
||||
return copied ? copied : ret;
|
||||
}
|
||||
|
||||
static int io_zcrx_copy_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
|
||||
const skb_frag_t *frag, int off, int len)
|
||||
{
|
||||
struct page *page = skb_frag_page(frag);
|
||||
u32 p_off, p_len, t, copied = 0;
|
||||
int ret = 0;
|
||||
|
||||
off += skb_frag_off(frag);
|
||||
|
||||
skb_frag_foreach_page(frag, off, len,
|
||||
page, p_off, p_len, t) {
|
||||
ret = io_zcrx_copy_chunk(req, ifq, NULL, page, p_off, p_len);
|
||||
if (ret < 0)
|
||||
return copied ? copied : ret;
|
||||
copied += ret;
|
||||
}
|
||||
return copied;
|
||||
}
|
||||
|
||||
static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
|
||||
const skb_frag_t *frag, int off, int len)
|
||||
{
|
||||
struct net_iov *niov;
|
||||
|
||||
if (unlikely(!skb_frag_is_net_iov(frag)))
|
||||
return -EOPNOTSUPP;
|
||||
return io_zcrx_copy_frag(req, ifq, frag, off, len);
|
||||
|
||||
niov = netmem_to_net_iov(frag->netmem);
|
||||
if (niov->pp->mp_ops != &io_uring_pp_zc_ops ||
|
||||
@@ -719,18 +812,33 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
|
||||
struct io_zcrx_ifq *ifq = args->ifq;
|
||||
struct io_kiocb *req = args->req;
|
||||
struct sk_buff *frag_iter;
|
||||
unsigned start, start_off;
|
||||
unsigned start, start_off = offset;
|
||||
int i, copy, end, off;
|
||||
int ret = 0;
|
||||
|
||||
if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT))
|
||||
return -EAGAIN;
|
||||
|
||||
start = skb_headlen(skb);
|
||||
start_off = offset;
|
||||
if (unlikely(offset < skb_headlen(skb))) {
|
||||
ssize_t copied;
|
||||
size_t to_copy;
|
||||
|
||||
if (offset < start)
|
||||
return -EOPNOTSUPP;
|
||||
to_copy = min_t(size_t, skb_headlen(skb) - offset, len);
|
||||
copied = io_zcrx_copy_chunk(req, ifq, skb->data, NULL,
|
||||
offset, to_copy);
|
||||
if (copied < 0) {
|
||||
ret = copied;
|
||||
goto out;
|
||||
}
|
||||
offset += copied;
|
||||
len -= copied;
|
||||
if (!len)
|
||||
goto out;
|
||||
if (offset != skb_headlen(skb))
|
||||
goto out;
|
||||
}
|
||||
|
||||
start = skb_headlen(skb);
|
||||
|
||||
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
|
||||
const skb_frag_t *frag;
|
||||
|
||||
Reference in New Issue
Block a user