mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 13:30:45 -05:00
net: mana: Use page pool fragments for RX buffers instead of full pages to improve memory efficiency.
This patch enhances RX buffer handling in the mana driver by allocating pages from a page pool and slicing them into MTU-sized fragments, rather than dedicating a full page per packet. This approach is especially beneficial on systems with large base page sizes like 64KB. Key improvements: - Proper integration of page pool for RX buffer allocations. - MTU-sized buffer slicing to improve memory utilization. - Reduce overall per Rx queue memory footprint. - Automatic fallback to full-page buffers when: * Jumbo frames are enabled (MTU > PAGE_SIZE / 2). * The XDP path is active, to avoid complexities with fragment reuse. Testing on VMs with 64KB pages shows around 200% throughput improvement. Memory efficiency is significantly improved due to reduced wastage in page allocations. Example: We are now able to fit 35 rx buffers in a single 64kb page for MTU size of 1500, instead of 1 rx buffer per page previously. Tested: - iperf3, iperf2, and nttcp benchmarks. - Jumbo frames with MTU 9000. - Native XDP programs (XDP_PASS, XDP_DROP, XDP_TX, XDP_REDIRECT) for testing the XDP path in driver. - Memory leak detection (kmemleak). - Driver load/unload, reboot, and stress scenarios. Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> Reviewed-by: Saurabh Sengar <ssengar@linux.microsoft.com> Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com> Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com> Link: https://patch.msgid.link/20250814140410.GA22089@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
committed by
Paolo Abeni
parent
a8bdd935d1
commit
730ff06d3f
@@ -174,6 +174,7 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog,
|
||||
struct mana_port_context *apc = netdev_priv(ndev);
|
||||
struct bpf_prog *old_prog;
|
||||
struct gdma_context *gc;
|
||||
int err;
|
||||
|
||||
gc = apc->ac->gdma_dev->gdma_context;
|
||||
|
||||
@@ -195,18 +196,57 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog,
|
||||
*/
|
||||
apc->bpf_prog = prog;
|
||||
|
||||
if (apc->port_is_up) {
|
||||
/* Re-create rxq's after xdp prog was loaded or unloaded.
|
||||
* Ex: re create rxq's to switch from full pages to smaller
|
||||
* size page fragments when xdp prog is unloaded and
|
||||
* vice-versa.
|
||||
*/
|
||||
|
||||
/* Pre-allocate buffers to prevent failure in mana_attach */
|
||||
err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues);
|
||||
if (err) {
|
||||
NL_SET_ERR_MSG_MOD(extack,
|
||||
"XDP: Insufficient memory for tx/rx re-config");
|
||||
return err;
|
||||
}
|
||||
|
||||
err = mana_detach(ndev, false);
|
||||
if (err) {
|
||||
netdev_err(ndev,
|
||||
"mana_detach failed at xdp set: %d\n", err);
|
||||
NL_SET_ERR_MSG_MOD(extack,
|
||||
"XDP: Re-config failed at detach");
|
||||
goto err_dealloc_rxbuffs;
|
||||
}
|
||||
|
||||
err = mana_attach(ndev);
|
||||
if (err) {
|
||||
netdev_err(ndev,
|
||||
"mana_attach failed at xdp set: %d\n", err);
|
||||
NL_SET_ERR_MSG_MOD(extack,
|
||||
"XDP: Re-config failed at attach");
|
||||
goto err_dealloc_rxbuffs;
|
||||
}
|
||||
|
||||
mana_chn_setxdp(apc, prog);
|
||||
mana_pre_dealloc_rxbufs(apc);
|
||||
}
|
||||
|
||||
if (old_prog)
|
||||
bpf_prog_put(old_prog);
|
||||
|
||||
if (apc->port_is_up)
|
||||
mana_chn_setxdp(apc, prog);
|
||||
|
||||
if (prog)
|
||||
ndev->max_mtu = MANA_XDP_MTU_MAX;
|
||||
else
|
||||
ndev->max_mtu = gc->adapter_mtu - ETH_HLEN;
|
||||
|
||||
return 0;
|
||||
|
||||
err_dealloc_rxbuffs:
|
||||
apc->bpf_prog = old_prog;
|
||||
mana_pre_dealloc_rxbufs(apc);
|
||||
return err;
|
||||
}
|
||||
|
||||
int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
|
||||
|
||||
@@ -57,6 +57,15 @@ static bool mana_en_need_log(struct mana_port_context *apc, int err)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void mana_put_rx_page(struct mana_rxq *rxq, struct page *page,
|
||||
bool from_pool)
|
||||
{
|
||||
if (from_pool)
|
||||
page_pool_put_full_page(rxq->page_pool, page, false);
|
||||
else
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
/* Microsoft Azure Network Adapter (MANA) functions */
|
||||
|
||||
static int mana_open(struct net_device *ndev)
|
||||
@@ -630,21 +639,40 @@ static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da)
|
||||
}
|
||||
|
||||
/* Get RX buffer's data size, alloc size, XDP headroom based on MTU */
|
||||
static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,
|
||||
u32 *headroom)
|
||||
static void mana_get_rxbuf_cfg(struct mana_port_context *apc,
|
||||
int mtu, u32 *datasize, u32 *alloc_size,
|
||||
u32 *headroom, u32 *frag_count)
|
||||
{
|
||||
if (mtu > MANA_XDP_MTU_MAX)
|
||||
*headroom = 0; /* no support for XDP */
|
||||
else
|
||||
*headroom = XDP_PACKET_HEADROOM;
|
||||
|
||||
*alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom);
|
||||
|
||||
/* Using page pool in this case, so alloc_size is PAGE_SIZE */
|
||||
if (*alloc_size < PAGE_SIZE)
|
||||
*alloc_size = PAGE_SIZE;
|
||||
u32 len, buf_size;
|
||||
|
||||
/* Calculate datasize first (consistent across all cases) */
|
||||
*datasize = mtu + ETH_HLEN;
|
||||
|
||||
/* For xdp and jumbo frames make sure only one packet fits per page */
|
||||
if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) {
|
||||
if (mana_xdp_get(apc)) {
|
||||
*headroom = XDP_PACKET_HEADROOM;
|
||||
*alloc_size = PAGE_SIZE;
|
||||
} else {
|
||||
*headroom = 0; /* no support for XDP */
|
||||
*alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD +
|
||||
*headroom);
|
||||
}
|
||||
|
||||
*frag_count = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Standard MTU case - optimize for multiple packets per page */
|
||||
*headroom = 0;
|
||||
|
||||
/* Calculate base buffer size needed */
|
||||
len = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom);
|
||||
buf_size = ALIGN(len, MANA_RX_FRAG_ALIGNMENT);
|
||||
|
||||
/* Calculate how many packets can fit in a page */
|
||||
*frag_count = PAGE_SIZE / buf_size;
|
||||
*alloc_size = buf_size;
|
||||
}
|
||||
|
||||
int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_queues)
|
||||
@@ -656,8 +684,9 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu
|
||||
void *va;
|
||||
int i;
|
||||
|
||||
mana_get_rxbuf_cfg(new_mtu, &mpc->rxbpre_datasize,
|
||||
&mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom);
|
||||
mana_get_rxbuf_cfg(mpc, new_mtu, &mpc->rxbpre_datasize,
|
||||
&mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom,
|
||||
&mpc->rxbpre_frag_count);
|
||||
|
||||
dev = mpc->ac->gdma_dev->gdma_context->dev;
|
||||
|
||||
@@ -1842,8 +1871,11 @@ static void mana_rx_skb(void *buf_va, bool from_pool,
|
||||
|
||||
drop:
|
||||
if (from_pool) {
|
||||
page_pool_recycle_direct(rxq->page_pool,
|
||||
virt_to_head_page(buf_va));
|
||||
if (rxq->frag_count == 1)
|
||||
page_pool_recycle_direct(rxq->page_pool,
|
||||
virt_to_head_page(buf_va));
|
||||
else
|
||||
page_pool_free_va(rxq->page_pool, buf_va, true);
|
||||
} else {
|
||||
WARN_ON_ONCE(rxq->xdp_save_va);
|
||||
/* Save for reuse */
|
||||
@@ -1859,33 +1891,46 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
|
||||
dma_addr_t *da, bool *from_pool)
|
||||
{
|
||||
struct page *page;
|
||||
u32 offset;
|
||||
void *va;
|
||||
|
||||
*from_pool = false;
|
||||
|
||||
/* Reuse XDP dropped page if available */
|
||||
if (rxq->xdp_save_va) {
|
||||
va = rxq->xdp_save_va;
|
||||
rxq->xdp_save_va = NULL;
|
||||
} else {
|
||||
page = page_pool_dev_alloc_pages(rxq->page_pool);
|
||||
if (!page)
|
||||
/* Don't use fragments for jumbo frames or XDP where it's 1 fragment
|
||||
* per page.
|
||||
*/
|
||||
if (rxq->frag_count == 1) {
|
||||
/* Reuse XDP dropped page if available */
|
||||
if (rxq->xdp_save_va) {
|
||||
va = rxq->xdp_save_va;
|
||||
page = virt_to_head_page(va);
|
||||
rxq->xdp_save_va = NULL;
|
||||
} else {
|
||||
page = page_pool_dev_alloc_pages(rxq->page_pool);
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
*from_pool = true;
|
||||
va = page_to_virt(page);
|
||||
}
|
||||
|
||||
*da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
|
||||
DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(dev, *da)) {
|
||||
mana_put_rx_page(rxq, page, *from_pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*from_pool = true;
|
||||
va = page_to_virt(page);
|
||||
return va;
|
||||
}
|
||||
|
||||
*da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
|
||||
DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(dev, *da)) {
|
||||
if (*from_pool)
|
||||
page_pool_put_full_page(rxq->page_pool, page, false);
|
||||
else
|
||||
put_page(virt_to_head_page(va));
|
||||
|
||||
page = page_pool_dev_alloc_frag(rxq->page_pool, &offset,
|
||||
rxq->alloc_size);
|
||||
if (!page)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
va = page_to_virt(page) + offset;
|
||||
*da = page_pool_get_dma_addr(page) + offset + rxq->headroom;
|
||||
*from_pool = true;
|
||||
|
||||
return va;
|
||||
}
|
||||
@@ -1902,9 +1947,9 @@ static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq,
|
||||
va = mana_get_rxfrag(rxq, dev, &da, &from_pool);
|
||||
if (!va)
|
||||
return;
|
||||
|
||||
dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
|
||||
DMA_FROM_DEVICE);
|
||||
if (!rxoob->from_pool || rxq->frag_count == 1)
|
||||
dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
|
||||
DMA_FROM_DEVICE);
|
||||
*old_buf = rxoob->buf_va;
|
||||
*old_fp = rxoob->from_pool;
|
||||
|
||||
@@ -2315,15 +2360,15 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
|
||||
if (!rx_oob->buf_va)
|
||||
continue;
|
||||
|
||||
dma_unmap_single(dev, rx_oob->sgl[0].address,
|
||||
rx_oob->sgl[0].size, DMA_FROM_DEVICE);
|
||||
|
||||
page = virt_to_head_page(rx_oob->buf_va);
|
||||
|
||||
if (rx_oob->from_pool)
|
||||
page_pool_put_full_page(rxq->page_pool, page, false);
|
||||
else
|
||||
put_page(page);
|
||||
if (rxq->frag_count == 1 || !rx_oob->from_pool) {
|
||||
dma_unmap_single(dev, rx_oob->sgl[0].address,
|
||||
rx_oob->sgl[0].size, DMA_FROM_DEVICE);
|
||||
mana_put_rx_page(rxq, page, rx_oob->from_pool);
|
||||
} else {
|
||||
page_pool_free_va(rxq->page_pool, rx_oob->buf_va, true);
|
||||
}
|
||||
|
||||
rx_oob->buf_va = NULL;
|
||||
}
|
||||
@@ -2429,11 +2474,22 @@ static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc)
|
||||
struct page_pool_params pprm = {};
|
||||
int ret;
|
||||
|
||||
pprm.pool_size = mpc->rx_queue_size;
|
||||
pprm.pool_size = mpc->rx_queue_size / rxq->frag_count + 1;
|
||||
pprm.nid = gc->numa_node;
|
||||
pprm.napi = &rxq->rx_cq.napi;
|
||||
pprm.netdev = rxq->ndev;
|
||||
pprm.order = get_order(rxq->alloc_size);
|
||||
pprm.queue_idx = rxq->rxq_idx;
|
||||
pprm.dev = gc->dev;
|
||||
|
||||
/* Let the page pool do the dma map when page sharing with multiple
|
||||
* fragments enabled for rx buffers.
|
||||
*/
|
||||
if (rxq->frag_count > 1) {
|
||||
pprm.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
|
||||
pprm.max_len = PAGE_SIZE;
|
||||
pprm.dma_dir = DMA_FROM_DEVICE;
|
||||
}
|
||||
|
||||
rxq->page_pool = page_pool_create(&pprm);
|
||||
|
||||
@@ -2472,9 +2528,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
|
||||
rxq->rxq_idx = rxq_idx;
|
||||
rxq->rxobj = INVALID_MANA_HANDLE;
|
||||
|
||||
mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
|
||||
&rxq->headroom);
|
||||
|
||||
mana_get_rxbuf_cfg(apc, ndev->mtu, &rxq->datasize, &rxq->alloc_size,
|
||||
&rxq->headroom, &rxq->frag_count);
|
||||
/* Create page pool for RX queue */
|
||||
err = mana_create_page_pool(rxq, gc);
|
||||
if (err) {
|
||||
|
||||
@@ -65,6 +65,8 @@ enum TRI_STATE {
|
||||
#define MANA_STATS_RX_COUNT 5
|
||||
#define MANA_STATS_TX_COUNT 11
|
||||
|
||||
#define MANA_RX_FRAG_ALIGNMENT 64
|
||||
|
||||
struct mana_stats_rx {
|
||||
u64 packets;
|
||||
u64 bytes;
|
||||
@@ -328,6 +330,7 @@ struct mana_rxq {
|
||||
u32 datasize;
|
||||
u32 alloc_size;
|
||||
u32 headroom;
|
||||
u32 frag_count;
|
||||
|
||||
mana_handle_t rxobj;
|
||||
|
||||
@@ -510,6 +513,7 @@ struct mana_port_context {
|
||||
u32 rxbpre_datasize;
|
||||
u32 rxbpre_alloc_size;
|
||||
u32 rxbpre_headroom;
|
||||
u32 rxbpre_frag_count;
|
||||
|
||||
struct bpf_prog *bpf_prog;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user