Merge branch 'xdp-a-fistful-of-generic-changes-pt-ii'

Alexander Lobakin says:

====================
xdp: a fistful of generic changes pt. II (part)

XDP for idpf is currently 5.5 chapters:
* convert Rx to libeth;
* convert Tx and stats to libeth;
* generic XDP and XSk code changes;
* generic XDP and XSk code additions (you are here);
* actual XDP for idpf via new libeth_xdp;
* XSk for idpf (via ^).

Part III.2.1 does the following:
* allows mixing pages from several Page Pools within one XDP frame;
* optimizes &xdp_frame structure and removes no-more-used field;

Everything is prereq for libeth_xdp, but will be useful standalone
as well: faster xdp_return_frame_bulk() and xdp_frame fields access.
====================

Link: https://patch.msgid.link/20241211172649.761483-1-aleksander.lobakin@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2024-12-12 18:23:08 -08:00
10 changed files with 158 additions and 135 deletions

View File

@@ -2281,7 +2281,7 @@ static int dpaa_a050385_wa_xdpf(struct dpaa_priv *priv,
new_xdpf->len = xdpf->len;
new_xdpf->headroom = priv->tx_headroom;
new_xdpf->frame_sz = DPAA_BP_RAW_SIZE;
new_xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
new_xdpf->mem_type = MEM_TYPE_PAGE_ORDER0;
/* Release the initial buffer */
xdp_return_frame_rx_napi(xdpf);

View File

@@ -634,7 +634,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
break;
case XDP_TX:
orig_frame = *frame;
xdp->rxq->mem = frame->mem;
xdp->rxq->mem.type = frame->mem_type;
if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
trace_xdp_exception(rq->dev, xdp_prog, act);
frame = &orig_frame;
@@ -646,7 +646,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
goto xdp_xmit;
case XDP_REDIRECT:
orig_frame = *frame;
xdp->rxq->mem = frame->mem;
xdp->rxq->mem.type = frame->mem_type;
if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
frame = &orig_frame;
stats->rx_drops++;

View File

@@ -3674,7 +3674,7 @@ static inline void skb_frag_page_copy(skb_frag_t *fragto,
bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
/**
* skb_frag_dma_map - maps a paged fragment via the DMA API
* __skb_frag_dma_map - maps a paged fragment via the DMA API
* @dev: the device to map the fragment to
* @frag: the paged fragment to map
* @offset: the offset within the fragment (starting at the
@@ -3684,15 +3684,36 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
*
* Maps the page associated with @frag to @device.
*/
static inline dma_addr_t skb_frag_dma_map(struct device *dev,
const skb_frag_t *frag,
size_t offset, size_t size,
enum dma_data_direction dir)
static inline dma_addr_t __skb_frag_dma_map(struct device *dev,
const skb_frag_t *frag,
size_t offset, size_t size,
enum dma_data_direction dir)
{
return dma_map_page(dev, skb_frag_page(frag),
skb_frag_off(frag) + offset, size, dir);
}
#define skb_frag_dma_map(dev, frag, ...) \
CONCATENATE(_skb_frag_dma_map, \
COUNT_ARGS(__VA_ARGS__))(dev, frag, ##__VA_ARGS__)
#define __skb_frag_dma_map1(dev, frag, offset, uf, uo) ({ \
const skb_frag_t *uf = (frag); \
size_t uo = (offset); \
\
__skb_frag_dma_map(dev, uf, uo, skb_frag_size(uf) - uo, \
DMA_TO_DEVICE); \
})
#define _skb_frag_dma_map1(dev, frag, offset) \
__skb_frag_dma_map1(dev, frag, offset, __UNIQUE_ID(frag_), \
__UNIQUE_ID(offset_))
#define _skb_frag_dma_map0(dev, frag) \
_skb_frag_dma_map1(dev, frag, 0)
#define _skb_frag_dma_map2(dev, frag, offset, size) \
__skb_frag_dma_map(dev, frag, offset, size, DMA_TO_DEVICE)
#define _skb_frag_dma_map3(dev, frag, offset, size, dir) \
__skb_frag_dma_map(dev, frag, offset, size, dir)
static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
gfp_t gfp_mask)
{

View File

@@ -259,8 +259,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
const struct xdp_mem_info *mem);
void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data,
u32 count);
void page_pool_put_netmem_bulk(netmem_ref *data, u32 count);
#else
static inline void page_pool_destroy(struct page_pool *pool)
{
@@ -272,8 +271,7 @@ static inline void page_pool_use_xdp_mem(struct page_pool *pool,
{
}
static inline void page_pool_put_netmem_bulk(struct page_pool *pool,
netmem_ref *data, u32 count)
static inline void page_pool_put_netmem_bulk(netmem_ref *data, u32 count)
{
}
#endif

View File

@@ -11,6 +11,8 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h> /* skb_shared_info */
#include <net/page_pool/types.h>
/**
* DOC: XDP RX-queue information
*
@@ -167,13 +169,13 @@ xdp_get_buff_len(const struct xdp_buff *xdp)
struct xdp_frame {
void *data;
u16 len;
u16 headroom;
u32 len;
u32 headroom;
u32 metasize; /* uses lower 8-bits */
/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
* while mem info is valid on remote CPU.
* while mem_type is valid on remote CPU.
*/
struct xdp_mem_info mem;
enum xdp_mem_type mem_type:32;
struct net_device *dev_rx; /* used by cpumap */
u32 frame_sz;
u32 flags; /* supported values defined in xdp_buff_flags */
@@ -193,14 +195,12 @@ xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame)
#define XDP_BULK_QUEUE_SIZE 16
struct xdp_frame_bulk {
int count;
void *xa;
netmem_ref q[XDP_BULK_QUEUE_SIZE];
};
static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq)
{
/* bq->count will be zero'ed when bq->xa gets updated */
bq->xa = NULL;
bq->count = 0;
}
static inline struct skb_shared_info *
@@ -306,21 +306,29 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0))
return NULL;
/* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
xdp_frame->mem = xdp->rxq->mem;
/* rxq only valid until napi_schedule ends, convert to xdp_mem_type */
xdp_frame->mem_type = xdp->rxq->mem.type;
return xdp_frame;
}
void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
struct xdp_buff *xdp);
void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type,
bool napi_direct, struct xdp_buff *xdp);
void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq);
static inline void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq)
{
if (unlikely(!bq->count))
return;
page_pool_put_netmem_bulk(bq->q, bq->count);
bq->count = 0;
}
static __always_inline unsigned int
xdp_get_frame_len(const struct xdp_frame *xdpf)
{

View File

@@ -190,7 +190,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
int err;
rxq.dev = xdpf->dev_rx;
rxq.mem = xdpf->mem;
rxq.mem.type = xdpf->mem_type;
/* TODO: report queue_index to xdp_rxq_info */
xdp_convert_frame_to_buff(xdpf, &xdp);

View File

@@ -153,7 +153,7 @@ static void xdp_test_run_init_page(netmem_ref netmem, void *arg)
new_ctx->data = new_ctx->data_meta + meta_len;
xdp_update_frame_from_buff(new_ctx, frm);
frm->mem = new_ctx->rxq->mem;
frm->mem_type = new_ctx->rxq->mem.type;
memcpy(&head->orig_ctx, new_ctx, sizeof(head->orig_ctx));
}
@@ -246,7 +246,7 @@ static void reset_ctx(struct xdp_page_head *head)
head->ctx.data_meta = head->orig_ctx.data_meta;
head->ctx.data_end = head->orig_ctx.data_end;
xdp_update_frame_from_buff(&head->ctx, head->frame);
head->frame->mem = head->orig_ctx.rxq->mem;
head->frame->mem_type = head->orig_ctx.rxq->mem.type;
}
static int xdp_recv_frames(struct xdp_frame **frames, int nframes,

View File

@@ -4119,13 +4119,13 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
}
static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
struct xdp_mem_info *mem_info, bool release)
enum xdp_mem_type mem_type, bool release)
{
struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
if (release) {
xsk_buff_del_tail(zc_frag);
__xdp_return(NULL, mem_info, false, zc_frag);
__xdp_return(0, mem_type, false, zc_frag);
} else {
zc_frag->data_end -= shrink;
}
@@ -4134,19 +4134,16 @@ static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
int shrink)
{
struct xdp_mem_info *mem_info = &xdp->rxq->mem;
enum xdp_mem_type mem_type = xdp->rxq->mem.type;
bool release = skb_frag_size(frag) == shrink;
if (mem_info->type == MEM_TYPE_XSK_BUFF_POOL) {
bpf_xdp_shrink_data_zc(xdp, shrink, mem_info, release);
if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
goto out;
}
if (release) {
struct page *page = skb_frag_page(frag);
__xdp_return(page_address(page), mem_info, false, NULL);
}
if (release)
__xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
out:
return release;

View File

@@ -839,9 +839,41 @@ void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
}
EXPORT_SYMBOL(page_pool_put_unrefed_page);
static void page_pool_recycle_ring_bulk(struct page_pool *pool,
netmem_ref *bulk,
u32 bulk_len)
{
bool in_softirq;
u32 i;
/* Bulk produce into ptr_ring page_pool cache */
in_softirq = page_pool_producer_lock(pool);
for (i = 0; i < bulk_len; i++) {
if (__ptr_ring_produce(&pool->ring, (__force void *)bulk[i])) {
/* ring full */
recycle_stat_inc(pool, ring_full);
break;
}
}
page_pool_producer_unlock(pool, in_softirq);
recycle_stat_add(pool, ring, i);
/* Hopefully all pages were returned into ptr_ring */
if (likely(i == bulk_len))
return;
/*
* ptr_ring cache is full, free remaining pages outside producer lock
* since put_page() with refcnt == 1 can be an expensive operation.
*/
for (; i < bulk_len; i++)
page_pool_return_page(pool, bulk[i]);
}
/**
* page_pool_put_netmem_bulk() - release references on multiple netmems
* @pool: pool from which pages were allocated
* @data: array holding netmem references
* @count: number of entries in @data
*
@@ -854,52 +886,55 @@ EXPORT_SYMBOL(page_pool_put_unrefed_page);
* Please note the caller must not use data area after running
* page_pool_put_netmem_bulk(), as this function overwrites it.
*/
void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data,
u32 count)
void page_pool_put_netmem_bulk(netmem_ref *data, u32 count)
{
int i, bulk_len = 0;
bool allow_direct;
bool in_softirq;
u32 bulk_len = 0;
allow_direct = page_pool_napi_local(pool);
for (i = 0; i < count; i++) {
for (u32 i = 0; i < count; i++) {
netmem_ref netmem = netmem_compound_head(data[i]);
/* It is not the last user for the page frag case */
if (!page_pool_is_last_ref(netmem))
continue;
netmem = __page_pool_put_page(pool, netmem, -1, allow_direct);
/* Approved for bulk recycling in ptr_ring cache */
if (netmem)
if (page_pool_is_last_ref(netmem))
data[bulk_len++] = netmem;
}
if (!bulk_len)
return;
count = bulk_len;
while (count) {
netmem_ref bulk[XDP_BULK_QUEUE_SIZE];
struct page_pool *pool = NULL;
bool allow_direct;
u32 foreign = 0;
/* Bulk producer into ptr_ring page_pool cache */
in_softirq = page_pool_producer_lock(pool);
for (i = 0; i < bulk_len; i++) {
if (__ptr_ring_produce(&pool->ring, (__force void *)data[i])) {
/* ring full */
recycle_stat_inc(pool, ring_full);
break;
bulk_len = 0;
for (u32 i = 0; i < count; i++) {
struct page_pool *netmem_pp;
netmem_ref netmem = data[i];
netmem_pp = netmem_get_pp(netmem);
if (unlikely(!pool)) {
pool = netmem_pp;
allow_direct = page_pool_napi_local(pool);
} else if (netmem_pp != pool) {
/*
* If the netmem belongs to a different
* page_pool, save it for another round.
*/
data[foreign++] = netmem;
continue;
}
netmem = __page_pool_put_page(pool, netmem, -1,
allow_direct);
/* Approved for bulk recycling in ptr_ring cache */
if (netmem)
bulk[bulk_len++] = netmem;
}
if (bulk_len)
page_pool_recycle_ring_bulk(pool, bulk, bulk_len);
count = foreign;
}
recycle_stat_add(pool, ring, i);
page_pool_producer_unlock(pool, in_softirq);
/* Hopefully all pages was return into ptr_ring */
if (likely(i == bulk_len))
return;
/* ptr_ring cache full, free remaining pages outside producer lock
* since put_page() with refcnt == 1 can be an expensive operation
*/
for (; i < bulk_len; i++)
page_pool_return_page(pool, data[i]);
}
EXPORT_SYMBOL(page_pool_put_netmem_bulk);

View File

@@ -430,27 +430,25 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool);
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
struct xdp_buff *xdp)
void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type,
bool napi_direct, struct xdp_buff *xdp)
{
struct page *page;
switch (mem->type) {
switch (mem_type) {
case MEM_TYPE_PAGE_POOL:
page = virt_to_head_page(data);
netmem = netmem_compound_head(netmem);
if (napi_direct && xdp_return_frame_no_direct())
napi_direct = false;
/* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
* as mem->type knows this a page_pool page
*/
page_pool_put_full_page(page->pp, page, napi_direct);
page_pool_put_full_netmem(netmem_get_pp(netmem), netmem,
napi_direct);
break;
case MEM_TYPE_PAGE_SHARED:
page_frag_free(data);
page_frag_free(__netmem_address(netmem));
break;
case MEM_TYPE_PAGE_ORDER0:
page = virt_to_page(data); /* Assumes order0 page*/
put_page(page);
put_page(__netmem_to_page(netmem));
break;
case MEM_TYPE_XSK_BUFF_POOL:
/* NB! Only valid from an xdp_buff! */
@@ -458,7 +456,7 @@ void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
break;
default:
/* Not possible, checked in xdp_rxq_info_reg_mem_model() */
WARN(1, "Incorrect XDP memory type (%d) usage", mem->type);
WARN(1, "Incorrect XDP memory type (%d) usage", mem_type);
break;
}
}
@@ -466,38 +464,34 @@ void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
void xdp_return_frame(struct xdp_frame *xdpf)
{
struct skb_shared_info *sinfo;
int i;
if (likely(!xdp_frame_has_frags(xdpf)))
goto out;
sinfo = xdp_get_shared_info_from_frame(xdpf);
for (i = 0; i < sinfo->nr_frags; i++) {
struct page *page = skb_frag_page(&sinfo->frags[i]);
for (u32 i = 0; i < sinfo->nr_frags; i++)
__xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type,
false, NULL);
__xdp_return(page_address(page), &xdpf->mem, false, NULL);
}
out:
__xdp_return(xdpf->data, &xdpf->mem, false, NULL);
__xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, false, NULL);
}
EXPORT_SYMBOL_GPL(xdp_return_frame);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
{
struct skb_shared_info *sinfo;
int i;
if (likely(!xdp_frame_has_frags(xdpf)))
goto out;
sinfo = xdp_get_shared_info_from_frame(xdpf);
for (i = 0; i < sinfo->nr_frags; i++) {
struct page *page = skb_frag_page(&sinfo->frags[i]);
for (u32 i = 0; i < sinfo->nr_frags; i++)
__xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type,
true, NULL);
__xdp_return(page_address(page), &xdpf->mem, true, NULL);
}
out:
__xdp_return(xdpf->data, &xdpf->mem, true, NULL);
__xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, true, NULL);
}
EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
@@ -511,46 +505,19 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
* xdp_frame_bulk is usually stored/allocated on the function
* call-stack to avoid locking penalties.
*/
void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq)
{
struct xdp_mem_allocator *xa = bq->xa;
if (unlikely(!xa || !bq->count))
return;
page_pool_put_netmem_bulk(xa->page_pool, bq->q, bq->count);
/* bq->xa is not cleared to save lookup, if mem.id same in next bulk */
bq->count = 0;
}
EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk);
/* Must be called with rcu_read_lock held */
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq)
{
struct xdp_mem_info *mem = &xdpf->mem;
struct xdp_mem_allocator *xa;
if (mem->type != MEM_TYPE_PAGE_POOL) {
if (xdpf->mem_type != MEM_TYPE_PAGE_POOL) {
xdp_return_frame(xdpf);
return;
}
xa = bq->xa;
if (unlikely(!xa)) {
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
bq->count = 0;
bq->xa = xa;
}
if (bq->count == XDP_BULK_QUEUE_SIZE)
xdp_flush_frame_bulk(bq);
if (unlikely(mem->id != xa->mem.id)) {
xdp_flush_frame_bulk(bq);
bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
}
if (unlikely(xdp_frame_has_frags(xdpf))) {
struct skb_shared_info *sinfo;
int i;
@@ -571,19 +538,17 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
void xdp_return_buff(struct xdp_buff *xdp)
{
struct skb_shared_info *sinfo;
int i;
if (likely(!xdp_buff_has_frags(xdp)))
goto out;
sinfo = xdp_get_shared_info_from_buff(xdp);
for (i = 0; i < sinfo->nr_frags; i++) {
struct page *page = skb_frag_page(&sinfo->frags[i]);
for (u32 i = 0; i < sinfo->nr_frags; i++)
__xdp_return(skb_frag_netmem(&sinfo->frags[i]),
xdp->rxq->mem.type, true, xdp);
__xdp_return(page_address(page), &xdp->rxq->mem, true, xdp);
}
out:
__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
__xdp_return(virt_to_netmem(xdp->data), xdp->rxq->mem.type, true, xdp);
}
EXPORT_SYMBOL_GPL(xdp_return_buff);
@@ -629,7 +594,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
xdpf->headroom = 0;
xdpf->metasize = metasize;
xdpf->frame_sz = PAGE_SIZE;
xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
xdpf->mem_type = MEM_TYPE_PAGE_ORDER0;
xsk_buff_free(xdp);
return xdpf;
@@ -699,7 +664,7 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
* - RX ring dev queue index (skb_record_rx_queue)
*/
if (xdpf->mem.type == MEM_TYPE_PAGE_POOL)
if (xdpf->mem_type == MEM_TYPE_PAGE_POOL)
skb_mark_for_recycle(skb);
/* Allow SKB to reuse area used by xdp_frame */
@@ -746,8 +711,7 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
nxdpf = addr;
nxdpf->data = addr + headroom;
nxdpf->frame_sz = PAGE_SIZE;
nxdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
nxdpf->mem.id = 0;
nxdpf->mem_type = MEM_TYPE_PAGE_ORDER0;
return nxdpf;
}