mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-02 15:43:35 -04:00
Merge branch 'skb-gro-optimize'
Paolo Abeni says: ==================== sk_buff: optimize GRO for the common case This is a trimmed down revision of "sk_buff: optimize layout for GRO", specifically dropping the changes to the sk_buff layout[1]. This series tries to accomplish 2 goals: - optimize the GRO stage for the most common scenario, avoiding a bunch of conditional and some more code - let owned skbs entering the GRO engine, allowing backpressure in the veth GRO forward path. A new sk_buff flag (!!!) is introduced and maintained for GRO's sake. Such field uses an existing hole, so there is no change to the sk_buff size. [1] two main reasons: - move skb->inner_ field requires some extra care, as some in kernel users access and the fields regardless of skb->encapsulation. - extending secmark size clash with ct and nft uAPIs address the all above is possible, I think, but for sure not in a single series. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
@@ -713,7 +713,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
|
||||
int mac_len, delta, off;
|
||||
struct xdp_buff xdp;
|
||||
|
||||
skb_orphan_partial(skb);
|
||||
skb_prepare_for_gro(skb);
|
||||
|
||||
rcu_read_lock();
|
||||
xdp_prog = rcu_dereference(rq->xdp_prog);
|
||||
|
||||
@@ -689,6 +689,7 @@ typedef unsigned char *sk_buff_data_t;
|
||||
* CHECKSUM_UNNECESSARY (max 3)
|
||||
* @dst_pending_confirm: need to confirm neighbour
|
||||
* @decrypted: Decrypted SKB
|
||||
* @slow_gro: state present at GRO time, slower prepare step required
|
||||
* @napi_id: id of the NAPI struct this skb came from
|
||||
* @sender_cpu: (aka @napi_id) source CPU in XPS
|
||||
* @secmark: security marking
|
||||
@@ -870,6 +871,7 @@ struct sk_buff {
|
||||
#ifdef CONFIG_TLS_DEVICE
|
||||
__u8 decrypted:1;
|
||||
#endif
|
||||
__u8 slow_gro:1;
|
||||
|
||||
#ifdef CONFIG_NET_SCHED
|
||||
__u16 tc_index; /* traffic control index */
|
||||
@@ -990,6 +992,7 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
|
||||
*/
|
||||
static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
|
||||
{
|
||||
skb->slow_gro |= !!dst;
|
||||
skb->_skb_refdst = (unsigned long)dst;
|
||||
}
|
||||
|
||||
@@ -1006,6 +1009,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
|
||||
static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
|
||||
{
|
||||
WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
|
||||
skb->slow_gro = !!dst;
|
||||
skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
|
||||
}
|
||||
|
||||
@@ -4216,6 +4220,7 @@ static inline unsigned long skb_get_nfct(const struct sk_buff *skb)
|
||||
static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
|
||||
skb->slow_gro |= !!nfct;
|
||||
skb->_nfct = nfct;
|
||||
#endif
|
||||
}
|
||||
@@ -4375,6 +4380,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
|
||||
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
|
||||
nf_conntrack_put(skb_nfct(dst));
|
||||
#endif
|
||||
dst->slow_gro = src->slow_gro;
|
||||
__nf_copy(dst, src, true);
|
||||
}
|
||||
|
||||
|
||||
@@ -277,6 +277,7 @@ static inline void skb_dst_drop(struct sk_buff *skb)
|
||||
|
||||
static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)
|
||||
{
|
||||
nskb->slow_gro |= !!refdst;
|
||||
nskb->_skb_refdst = refdst;
|
||||
if (!(nskb->_skb_refdst & SKB_DST_NOREF))
|
||||
dst_clone(skb_dst(nskb));
|
||||
@@ -316,6 +317,7 @@ static inline bool skb_dst_force(struct sk_buff *skb)
|
||||
dst = NULL;
|
||||
|
||||
skb->_skb_refdst = (unsigned long)dst;
|
||||
skb->slow_gro |= !!dst;
|
||||
}
|
||||
|
||||
return skb->_skb_refdst != 0UL;
|
||||
|
||||
@@ -2249,6 +2249,15 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void skb_prepare_for_gro(struct sk_buff *skb)
|
||||
{
|
||||
if (skb->destructor != sock_wfree) {
|
||||
skb_orphan(skb);
|
||||
return;
|
||||
}
|
||||
skb->slow_gro = 1;
|
||||
}
|
||||
|
||||
void sk_reset_timer(struct sock *sk, struct timer_list *timer,
|
||||
unsigned long expires);
|
||||
|
||||
|
||||
@@ -6014,7 +6014,6 @@ static void gro_list_prepare(const struct list_head *head,
|
||||
diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
|
||||
if (skb_vlan_tag_present(p))
|
||||
diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb);
|
||||
diffs |= skb_metadata_dst_cmp(p, skb);
|
||||
diffs |= skb_metadata_differs(p, skb);
|
||||
if (maclen == ETH_HLEN)
|
||||
diffs |= compare_ether_header(skb_mac_header(p),
|
||||
@@ -6024,17 +6023,30 @@ static void gro_list_prepare(const struct list_head *head,
|
||||
skb_mac_header(skb),
|
||||
maclen);
|
||||
|
||||
diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
|
||||
/* in most common scenarions _state is 0
|
||||
* otherwise we are already on some slower paths
|
||||
* either skip all the infrequent tests altogether or
|
||||
* avoid trying too hard to skip each of them individually
|
||||
*/
|
||||
if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) {
|
||||
#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
|
||||
if (!diffs) {
|
||||
struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT);
|
||||
struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT);
|
||||
struct tc_skb_ext *skb_ext;
|
||||
struct tc_skb_ext *p_ext;
|
||||
#endif
|
||||
|
||||
diffs |= p->sk != skb->sk;
|
||||
diffs |= skb_metadata_dst_cmp(p, skb);
|
||||
diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
|
||||
|
||||
#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
|
||||
skb_ext = skb_ext_find(skb, TC_SKB_EXT);
|
||||
p_ext = skb_ext_find(p, TC_SKB_EXT);
|
||||
|
||||
diffs |= (!!p_ext) ^ (!!skb_ext);
|
||||
if (!diffs && unlikely(skb_ext))
|
||||
diffs |= p_ext->chain ^ skb_ext->chain;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
NAPI_GRO_CB(p)->same_flow = !diffs;
|
||||
}
|
||||
@@ -6299,8 +6311,12 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
|
||||
skb->encapsulation = 0;
|
||||
skb_shinfo(skb)->gso_type = 0;
|
||||
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
|
||||
skb_ext_reset(skb);
|
||||
nf_reset_ct(skb);
|
||||
if (unlikely(skb->slow_gro)) {
|
||||
skb_orphan(skb);
|
||||
skb_ext_reset(skb);
|
||||
nf_reset_ct(skb);
|
||||
skb->slow_gro = 0;
|
||||
}
|
||||
|
||||
napi->skb = skb;
|
||||
}
|
||||
|
||||
@@ -954,9 +954,13 @@ void __kfree_skb_defer(struct sk_buff *skb)
|
||||
|
||||
void napi_skb_free_stolen_head(struct sk_buff *skb)
|
||||
{
|
||||
nf_reset_ct(skb);
|
||||
skb_dst_drop(skb);
|
||||
skb_ext_put(skb);
|
||||
if (unlikely(skb->slow_gro)) {
|
||||
nf_reset_ct(skb);
|
||||
skb_dst_drop(skb);
|
||||
skb_ext_put(skb);
|
||||
skb_orphan(skb);
|
||||
skb->slow_gro = 0;
|
||||
}
|
||||
napi_skb_cache_put(skb);
|
||||
}
|
||||
|
||||
@@ -3889,6 +3893,9 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
|
||||
NAPI_GRO_CB(p)->last = skb;
|
||||
NAPI_GRO_CB(p)->count++;
|
||||
p->data_len += skb->len;
|
||||
|
||||
/* sk owenrship - if any - completely transferred to the aggregated packet */
|
||||
skb->destructor = NULL;
|
||||
p->truesize += skb->truesize;
|
||||
p->len += skb->len;
|
||||
|
||||
@@ -4256,6 +4263,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
|
||||
unsigned int headlen = skb_headlen(skb);
|
||||
unsigned int len = skb_gro_len(skb);
|
||||
unsigned int delta_truesize;
|
||||
unsigned int new_truesize;
|
||||
struct sk_buff *lp;
|
||||
|
||||
if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush))
|
||||
@@ -4287,10 +4295,10 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
|
||||
skb_frag_size_sub(frag, offset);
|
||||
|
||||
/* all fragments truesize : remove (head size + sk_buff) */
|
||||
delta_truesize = skb->truesize -
|
||||
SKB_TRUESIZE(skb_end_offset(skb));
|
||||
new_truesize = SKB_TRUESIZE(skb_end_offset(skb));
|
||||
delta_truesize = skb->truesize - new_truesize;
|
||||
|
||||
skb->truesize -= skb->data_len;
|
||||
skb->truesize = new_truesize;
|
||||
skb->len -= skb->data_len;
|
||||
skb->data_len = 0;
|
||||
|
||||
@@ -4319,12 +4327,16 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
|
||||
memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
|
||||
/* We dont need to clear skbinfo->nr_frags here */
|
||||
|
||||
delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
|
||||
new_truesize = SKB_TRUESIZE(sizeof(struct sk_buff));
|
||||
delta_truesize = skb->truesize - new_truesize;
|
||||
skb->truesize = new_truesize;
|
||||
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
|
||||
goto done;
|
||||
}
|
||||
|
||||
merge:
|
||||
/* sk owenrship - if any - completely transferred to the aggregated packet */
|
||||
skb->destructor = NULL;
|
||||
delta_truesize = skb->truesize;
|
||||
if (offset > headlen) {
|
||||
unsigned int eat = offset - headlen;
|
||||
@@ -6449,6 +6461,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
|
||||
new->chunks = newlen;
|
||||
new->offset[id] = newoff;
|
||||
set_active:
|
||||
skb->slow_gro = 1;
|
||||
skb->extensions = new;
|
||||
skb->active_extensions |= 1 << id;
|
||||
return skb_ext_get_ptr(new, id);
|
||||
|
||||
Reference in New Issue
Block a user