From d8ea89fe8a49bfa18b009b16b66c137dba263f87 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Wed, 9 Jun 2021 15:47:13 +0200 Subject: [PATCH 1/2] mvpp2: prefetch right address In the RX buffer, the received data starts after a headroom used to align the IP header and to allow prepending headers efficiently. The prefetch() should take this into account, and prefetch from the very start of the received data. We can see that ether_addr_equal_64bits(), which is the first function to access the data, drops from the top of the perf top output. prefetch(data): Overhead Shared Object Symbol 11.64% [kernel] [k] eth_type_trans prefetch(data + MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM): Overhead Shared Object Symbol 13.42% [kernel] [k] build_skb 10.35% [mvpp2] [k] mvpp2_rx 9.35% [kernel] [k] __netif_receive_skb_core 8.24% [kernel] [k] kmem_cache_free 7.97% [kernel] [k] dev_gro_receive 7.68% [kernel] [k] page_pool_put_page 7.32% [kernel] [k] kmem_cache_alloc 7.09% [mvpp2] [k] mvpp2_bm_pool_put 3.36% [kernel] [k] eth_type_trans Also, move the eth_type_trans() call a bit down, to give the RAM more time to prefetch the data. Signed-off-by: Matteo Croce Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 5663c1b21870..07d8f3e31b52 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3938,7 +3938,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, goto err_drop_frame; /* Prefetch header */ - prefetch(data); + prefetch(data + MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM); if (bm_pool->frag_size > PAGE_SIZE) frag_size = 0; @@ -4008,8 +4008,8 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, skb_reserve(skb, MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM); skb_put(skb, rx_bytes); - skb->protocol = eth_type_trans(skb, dev); mvpp2_rx_csum(port, rx_status, skb); + skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(napi, skb); continue; From 2f128eb3308a74ef478286b75e26aa6d0ed3c6a6 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Wed, 9 Jun 2021 15:47:14 +0200 Subject: [PATCH 2/2] mvpp2: prefetch page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most of the time during the RX is caused by the compound_head() call done at the end of the RX loop: │ build_skb(): [...] │ static inline struct page *compound_head(struct page *page) │ { │ unsigned long head = READ_ONCE(page->compound_head); 65.23 │ ldr x2, [x1, #8] Prefetch the page struct as soon as possible, to speedup the RX path noticeabily by a ~3-4% packet rate in a drop test. │ build_skb(): [...] │ static inline struct page *compound_head(struct page *page) │ { │ unsigned long head = READ_ONCE(page->compound_head); 17.92 │ ldr x2, [x1, #8] Signed-off-by: Matteo Croce Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 07d8f3e31b52..9bca8c8f9f8d 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3900,15 +3900,19 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, phys_addr_t phys_addr; u32 rx_status, timestamp; int pool, rx_bytes, err, ret; + struct page *page; void *data; + phys_addr = mvpp2_rxdesc_cookie_get(port, rx_desc); + data = (void *)phys_to_virt(phys_addr); + page = virt_to_page(data); + prefetch(page); + rx_done++; rx_status = mvpp2_rxdesc_status_get(port, rx_desc); rx_bytes = mvpp2_rxdesc_size_get(port, rx_desc); rx_bytes -= MVPP2_MH_SIZE; dma_addr = mvpp2_rxdesc_dma_addr_get(port, rx_desc); - phys_addr = mvpp2_rxdesc_cookie_get(port, rx_desc); - data = (void *)phys_to_virt(phys_addr); pool = (rx_status & MVPP2_RXD_BM_POOL_ID_MASK) >> MVPP2_RXD_BM_POOL_ID_OFFS; @@ -3997,7 +4001,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, } if (pp) - skb_mark_for_recycle(skb, virt_to_page(data), pp); + skb_mark_for_recycle(skb, page, pp); else dma_unmap_single_attrs(dev->dev.parent, dma_addr, bm_pool->buf_size, DMA_FROM_DEVICE,