From bc97f9c6f988b31b728eb47a94ca825401dbeffe Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 8 Dec 2021 15:06:54 +0100 Subject: [PATCH 1/9] i40e: don't reserve excessive XDP_PACKET_HEADROOM on XSK Rx to skb {__,}napi_alloc_skb() allocates and reserves additional NET_SKB_PAD + NET_IP_ALIGN for any skb. OTOH, i40e_construct_skb_zc() currently allocates and reserves additional `xdp->data - xdp->data_hard_start`, which is XDP_PACKET_HEADROOM for XSK frames. There's no need for that at all as the frame is post-XDP and will go only to the networking stack core. Pass the size of the actual data only to __napi_alloc_skb() and don't reserve anything. This will give enough headroom for stack processing. Fixes: 0a714186d3c0 ("i40e: add AF_XDP zero-copy Rx support") Signed-off-by: Alexander Lobakin Reviewed-by: Michal Swiatkowski Acked-by: Jesper Dangaard Brouer Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 945b1bb9c6f4..a449c84fe357 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -246,13 +246,11 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, struct sk_buff *skb; /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) goto out; - skb_reserve(skb, xdp->data - xdp->data_hard_start); memcpy(__skb_put(skb, datasize), xdp->data, datasize); if (metasize) skb_metadata_set(skb, metasize); From 6dba29537c0f639b482bd8f8bbd50ab4ae74b48d Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 8 Dec 2021 15:06:55 +0100 Subject: [PATCH 2/9] i40e: respect metadata on XSK Rx to skb For now, if the XDP prog returns XDP_PASS on XSK, the metadata will be lost as it doesn't get copied to the skb. Copy it along with the frame headers. Account its size on skb allocation, and when copying just treat it as a part of the frame and do a pull after to "move" it to the "reserved" zone. net_prefetch() xdp->data_meta and align the copy size to speed-up memcpy() a little and better match i40e_construct_skb(). Fixes: 0a714186d3c0 ("i40e: add AF_XDP zero-copy Rx support") Suggested-by: Jesper Dangaard Brouer Suggested-by: Maciej Fijalkowski Signed-off-by: Alexander Lobakin Reviewed-by: Michal Swiatkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index a449c84fe357..67e9844e2076 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -241,19 +241,25 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) { + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; + net_prefetch(xdp->data_meta); + /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) goto out; - memcpy(__skb_put(skb, datasize), xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } out: xsk_buff_free(xdp); From ee803dca967ae5bd360e0e090a3bfb3ad09e8ecf Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 8 Dec 2021 15:06:56 +0100 Subject: [PATCH 3/9] ice: respect metadata in legacy-rx/ice_construct_skb() In "legacy-rx" mode represented by ice_construct_skb(), we can still use XDP (and XDP metadata), but after XDP_PASS the metadata will be lost as it doesn't get copied to the skb. Copy it along with the frame headers. Account its size on skb allocation, and when copying just treat it as a part of the frame and do a pull after to "move" it to the "reserved" zone. Point net_prefetch() to xdp->data_meta instead of data. This won't change anything when the meta is not here, but will save some cache misses otherwise. Suggested-by: Jesper Dangaard Brouer Suggested-by: Maciej Fijalkowski Signed-off-by: Alexander Lobakin Reviewed-by: Michal Swiatkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 3e38695f1c9d..c2258bee8ecb 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -983,15 +983,17 @@ static struct sk_buff * ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, struct xdp_buff *xdp) { + unsigned int metasize = xdp->data - xdp->data_meta; unsigned int size = xdp->data_end - xdp->data; unsigned int headlen; struct sk_buff *skb; /* prefetch first cache line of first page */ - net_prefetch(xdp->data); + net_prefetch(xdp->data_meta); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, + ICE_RX_HDR_SIZE + metasize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; @@ -1003,8 +1005,13 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, - sizeof(long))); + memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta, + ALIGN(headlen + metasize, sizeof(long))); + + if (metasize) { + skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } /* if we exhaust the linear part then add what is left as a frag */ size -= headlen; From dc44572d195e10ec41a03e09b3b5addab4af5cea Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 8 Dec 2021 15:06:57 +0100 Subject: [PATCH 4/9] ice: don't reserve excessive XDP_PACKET_HEADROOM on XSK Rx to skb {__,}napi_alloc_skb() allocates and reserves additional NET_SKB_PAD + NET_IP_ALIGN for any skb. OTOH, ice_construct_skb_zc() currently allocates and reserves additional `xdp->data - xdp->data_hard_start`, which is XDP_PACKET_HEADROOM for XSK frames. There's no need for that at all as the frame is post-XDP and will go only to the networking stack core. Pass the size of the actual data only to __napi_alloc_skb() and don't reserve anything. This will give enough headroom for stack processing. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Alexander Lobakin Reviewed-by: Michal Swiatkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 2388837d6d6c..8fd8052edf09 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -428,17 +428,15 @@ static void ice_bump_ntc(struct ice_rx_ring *rx_ring) static struct sk_buff * ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { - unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start; unsigned int metasize = xdp->data - xdp->data_meta; unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data - xdp->data_hard_start); memcpy(__skb_put(skb, datasize), xdp->data, datasize); if (metasize) skb_metadata_set(skb, metasize); From 45a34ca68070e34e09d5bf4309f7f1f286a27fc7 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 8 Dec 2021 15:06:58 +0100 Subject: [PATCH 5/9] ice: respect metadata on XSK Rx to skb For now, if the XDP prog returns XDP_PASS on XSK, the metadata will be lost as it doesn't get copied to the skb. Copy it along with the frame headers. Account its size on skb allocation, and when copying just treat it as a part of the frame and do a pull after to "move" it to the "reserved" zone. net_prefetch() xdp->data_meta and align the copy size to speed-up memcpy() a little and better match ice_construct_skb(). Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Suggested-by: Jesper Dangaard Brouer Suggested-by: Maciej Fijalkowski Signed-off-by: Alexander Lobakin Reviewed-by: Michal Swiatkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 8fd8052edf09..feb874bde171 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -428,18 +428,24 @@ static void ice_bump_ntc(struct ice_rx_ring *rx_ring) static struct sk_buff * ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize, + net_prefetch(xdp->data_meta); + + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - memcpy(__skb_put(skb, datasize), xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } xsk_buff_free(xdp); return skb; From f9e61d365bafdee40fe2586fc6be490c3e824dad Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 8 Dec 2021 15:06:59 +0100 Subject: [PATCH 6/9] igc: don't reserve excessive XDP_PACKET_HEADROOM on XSK Rx to skb {__,}napi_alloc_skb() allocates and reserves additional NET_SKB_PAD + NET_IP_ALIGN for any skb. OTOH, igc_construct_skb_zc() currently allocates and reserves additional `xdp->data_meta - xdp->data_hard_start`, which is about XDP_PACKET_HEADROOM for XSK frames. There's no need for that at all as the frame is post-XDP and will go only to the networking stack core. Pass the size of the actual data only (+ meta) to __napi_alloc_skb() and don't reserve anything. This will give enough headroom for stack processing. Also, net_prefetch() xdp->data_meta and align the copy size to speed-up memcpy() a little and better match igc_construct_skb(). Fixes: fc9df2a0b520 ("igc: Enable RX via AF_XDP zero-copy") Signed-off-by: Alexander Lobakin Reviewed-by: Michal Swiatkowski Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 6b51baadee3d..b965fb809d84 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2446,19 +2446,20 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, struct xdp_buff *xdp) { + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; - unsigned int totalsize = metasize + datasize; struct sk_buff *skb; - skb = __napi_alloc_skb(&ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, + net_prefetch(xdp->data_meta); + + skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); - memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize); + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + if (metasize) { skb_metadata_set(skb, metasize); __skb_pull(skb, metasize); From 1fbdaa13386804a31eefd3db3c5fe00e80ce9bc3 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 8 Dec 2021 15:07:00 +0100 Subject: [PATCH 7/9] ixgbe: pass bi->xdp to ixgbe_construct_skb_zc() directly To not dereference bi->xdp each time in ixgbe_construct_skb_zc(), pass bi->xdp as an argument instead of bi. We can also call xsk_buff_free() outside of the function as well as assign bi->xdp to NULL, which seems to make it closer to its name. Suggested-by: Maciej Fijalkowski Signed-off-by: Alexander Lobakin Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index b3fd8e5cd85b..5447bfb31838 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -207,26 +207,24 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count) } static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *bi) + const struct xdp_buff *xdp) { - unsigned int metasize = bi->xdp->data - bi->xdp->data_meta; - unsigned int datasize = bi->xdp->data_end - bi->xdp->data; + unsigned int metasize = xdp->data - xdp->data_meta; + unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; /* allocate a skb to store the frags */ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - bi->xdp->data_end - bi->xdp->data_hard_start, + xdp->data_end - xdp->data_hard_start, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, bi->xdp->data - bi->xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), bi->xdp->data, datasize); + skb_reserve(skb, xdp->data - xdp->data_hard_start); + memcpy(__skb_put(skb, datasize), xdp->data, datasize); if (metasize) skb_metadata_set(skb, metasize); - xsk_buff_free(bi->xdp); - bi->xdp = NULL; return skb; } @@ -317,12 +315,15 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, } /* XDP_PASS path */ - skb = ixgbe_construct_skb_zc(rx_ring, bi); + skb = ixgbe_construct_skb_zc(rx_ring, bi->xdp); if (!skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; break; } + xsk_buff_free(bi->xdp); + bi->xdp = NULL; + cleaned_count++; ixgbe_inc_ntc(rx_ring); From 8f405221a73a53234486c185d8ef647377a53cc6 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 8 Dec 2021 15:07:01 +0100 Subject: [PATCH 8/9] ixgbe: don't reserve excessive XDP_PACKET_HEADROOM on XSK Rx to skb {__,}napi_alloc_skb() allocates and reserves additional NET_SKB_PAD + NET_IP_ALIGN for any skb. OTOH, ixgbe_construct_skb_zc() currently allocates and reserves additional `xdp->data - xdp->data_hard_start`, which is XDP_PACKET_HEADROOM for XSK frames. There's no need for that at all as the frame is post-XDP and will go only to the networking stack core. Pass the size of the actual data only to __napi_alloc_skb() and don't reserve anything. This will give enough headroom for stack processing. Fixes: d0bcacd0a130 ("ixgbe: add AF_XDP zero-copy Rx support") Signed-off-by: Alexander Lobakin Reviewed-by: Michal Swiatkowski Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 5447bfb31838..80078762ed24 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -214,13 +214,11 @@ static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring, struct sk_buff *skb; /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - xdp->data_end - xdp->data_hard_start, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - skb_reserve(skb, xdp->data - xdp->data_hard_start); memcpy(__skb_put(skb, datasize), xdp->data, datasize); if (metasize) skb_metadata_set(skb, metasize); From f322a620be69e95594eda89502b478aa7dbf6ec2 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 8 Dec 2021 15:07:02 +0100 Subject: [PATCH 9/9] ixgbe: respect metadata on XSK Rx to skb For now, if the XDP prog returns XDP_PASS on XSK, the metadata will be lost as it doesn't get copied to the skb. Copy it along with the frame headers. Account its size on skb allocation, and when copying just treat it as a part of the frame and do a pull after to "move" it to the "reserved" zone. net_prefetch() xdp->data_meta and align the copy size to speed-up memcpy() a little and better match ixgbe_construct_skb(). Fixes: d0bcacd0a130 ("ixgbe: add AF_XDP zero-copy Rx support") Suggested-by: Jesper Dangaard Brouer Suggested-by: Maciej Fijalkowski Signed-off-by: Alexander Lobakin Reviewed-by: Michal Swiatkowski Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 80078762ed24..ee28929b9c5f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -209,19 +209,25 @@ bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count) static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring, const struct xdp_buff *xdp) { + unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int datasize = xdp->data_end - xdp->data; struct sk_buff *skb; + net_prefetch(xdp->data_meta); + /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; - memcpy(__skb_put(skb, datasize), xdp->data, datasize); - if (metasize) + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } return skb; }