From 41b3caa7c0761141aa6d508924b9d23db57a17bc Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 7 Nov 2024 16:04:38 +0000 Subject: [PATCH 1/6] neighbour: Add hlist_node to struct neighbour Add a doubly-linked node to neighbours, so that they can be deleted without iterating the entire bucket they're in. Signed-off-by: Gilad Naaman Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241107160444.2913124-2-gnaaman@drivenets.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 2 ++ net/core/neighbour.c | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 3887ed9e5026..0402447854c7 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -136,6 +136,7 @@ struct neigh_statistics { struct neighbour { struct neighbour __rcu *next; + struct hlist_node hash; struct neigh_table *tbl; struct neigh_parms *parms; unsigned long confirmed; @@ -191,6 +192,7 @@ struct pneigh_entry { struct neigh_hash_table { struct neighbour __rcu **hash_buckets; + struct hlist_head *hash_heads; unsigned int hash_shift; __u32 hash_rnd[NEIGH_NUM_HASH_RND]; struct rcu_head rcu; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 4b871cecd2ce..5552e6b05c82 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -216,6 +216,7 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, neigh = rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock)); rcu_assign_pointer(*np, neigh); + hlist_del_rcu(&n->hash); neigh_mark_dead(n); retval = true; } @@ -402,6 +403,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, rcu_assign_pointer(*np, rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock))); + hlist_del_rcu(&n->hash); write_lock(&n->lock); neigh_del_timer(n); neigh_mark_dead(n); @@ -529,20 +531,30 @@ static void neigh_get_hash_rnd(u32 *x) static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) { + size_t hash_heads_size = (1 << shift) * sizeof(struct hlist_head); size_t size = (1 << shift) * sizeof(struct neighbour *); - struct neigh_hash_table *ret; struct neighbour __rcu **buckets; + struct hlist_head *hash_heads; + struct neigh_hash_table *ret; int i; ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) return NULL; + buckets = kvzalloc(size, GFP_ATOMIC); if (!buckets) { kfree(ret); return NULL; } + hash_heads = kvzalloc(hash_heads_size, GFP_ATOMIC); + if (!hash_heads) { + kvfree(buckets); + kfree(ret); + return NULL; + } ret->hash_buckets = buckets; + ret->hash_heads = hash_heads; ret->hash_shift = shift; for (i = 0; i < NEIGH_NUM_HASH_RND; i++) neigh_get_hash_rnd(&ret->hash_rnd[i]); @@ -556,6 +568,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) rcu); kvfree(nht->hash_buckets); + kvfree(nht->hash_heads); kfree(nht); } @@ -592,6 +605,8 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, new_nht->hash_buckets[hash], lockdep_is_held(&tbl->lock))); rcu_assign_pointer(new_nht->hash_buckets[hash], n); + hlist_del_rcu(&n->hash); + hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]); } } @@ -702,6 +717,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, rcu_dereference_protected(nht->hash_buckets[hash_val], lockdep_is_held(&tbl->lock))); rcu_assign_pointer(nht->hash_buckets[hash_val], n); + hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]); write_unlock_bh(&tbl->lock); neigh_dbg(2, "neigh %p is created\n", n); rc = n; @@ -987,6 +1003,7 @@ static void neigh_periodic_work(struct work_struct *work) rcu_assign_pointer(*np, rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock))); + hlist_del_rcu(&n->hash); neigh_mark_dead(n); write_unlock(&n->lock); neigh_cleanup_and_release(n); @@ -3116,6 +3133,7 @@ void __neigh_for_each_release(struct neigh_table *tbl, rcu_assign_pointer(*np, rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock))); + hlist_del_rcu(&n->hash); neigh_mark_dead(n); } else np = &n->next; From d7ddee1a522ddf5b28e2a3f7093cf238c96f492a Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 7 Nov 2024 16:04:39 +0000 Subject: [PATCH 2/6] neighbour: Define neigh_for_each_in_bucket Introduce neigh_for_each_in_bucket in neighbour.h, to help iterate over the neighbour table more succinctly. Signed-off-by: Gilad Naaman Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241107160444.2913124-3-gnaaman@drivenets.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 0402447854c7..4b9068c5e668 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -277,6 +277,12 @@ static inline void *neighbour_priv(const struct neighbour *n) extern const struct nla_policy nda_policy[]; +#define neigh_for_each_in_bucket(pos, head) hlist_for_each_entry(pos, head, hash) +#define neigh_for_each_in_bucket_rcu(pos, head) \ + hlist_for_each_entry_rcu(pos, head, hash) +#define neigh_for_each_in_bucket_safe(pos, tmp, head) \ + hlist_for_each_entry_safe(pos, tmp, head, hash) + static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey) { return *(const u32 *)n->primary_key == *(const u32 *)pkey; From 00df5e1a3fdf36624d59ef6ab09010ebaee6e66a Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 7 Nov 2024 16:04:40 +0000 Subject: [PATCH 3/6] neighbour: Convert seq_file functions to use hlist Convert seq_file-related neighbour functionality to use neighbour::hash and the related for_each macro. Signed-off-by: Gilad Naaman Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241107160444.2913124-4-gnaaman@drivenets.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 104 ++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 56 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5552e6b05c82..3485d6b3ba99 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3193,43 +3193,53 @@ EXPORT_SYMBOL(neigh_xmit); #ifdef CONFIG_PROC_FS -static struct neighbour *neigh_get_first(struct seq_file *seq) +static struct neighbour *neigh_get_valid(struct seq_file *seq, + struct neighbour *n, + loff_t *pos) { struct neigh_seq_state *state = seq->private; struct net *net = seq_file_net(seq); + + if (!net_eq(dev_net(n->dev), net)) + return NULL; + + if (state->neigh_sub_iter) { + loff_t fakep = 0; + void *v; + + v = state->neigh_sub_iter(state, n, pos ? pos : &fakep); + if (!v) + return NULL; + if (pos) + return v; + } + + if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) + return n; + + if (READ_ONCE(n->nud_state) & ~NUD_NOARP) + return n; + + return NULL; +} + +static struct neighbour *neigh_get_first(struct seq_file *seq) +{ + struct neigh_seq_state *state = seq->private; struct neigh_hash_table *nht = state->nht; - struct neighbour *n = NULL; - int bucket; + struct neighbour *n, *tmp; state->flags &= ~NEIGH_SEQ_IS_PNEIGH; - for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { - n = rcu_dereference(nht->hash_buckets[bucket]); - while (n) { - if (!net_eq(dev_net(n->dev), net)) - goto next; - if (state->neigh_sub_iter) { - loff_t fakep = 0; - void *v; - - v = state->neigh_sub_iter(state, n, &fakep); - if (!v) - goto next; - } - if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) - break; - if (READ_ONCE(n->nud_state) & ~NUD_NOARP) - break; -next: - n = rcu_dereference(n->next); + while (++state->bucket < (1 << nht->hash_shift)) { + neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) { + tmp = neigh_get_valid(seq, n, NULL); + if (tmp) + return tmp; } - - if (n) - break; } - state->bucket = bucket; - return n; + return NULL; } static struct neighbour *neigh_get_next(struct seq_file *seq, @@ -3237,46 +3247,28 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; - struct net *net = seq_file_net(seq); - struct neigh_hash_table *nht = state->nht; + struct neighbour *tmp; if (state->neigh_sub_iter) { void *v = state->neigh_sub_iter(state, n, pos); + if (v) return n; } - n = rcu_dereference(n->next); - while (1) { - while (n) { - if (!net_eq(dev_net(n->dev), net)) - goto next; - if (state->neigh_sub_iter) { - void *v = state->neigh_sub_iter(state, n, pos); - if (v) - return n; - goto next; - } - if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) - break; - - if (READ_ONCE(n->nud_state) & ~NUD_NOARP) - break; -next: - n = rcu_dereference(n->next); + hlist_for_each_entry_continue(n, hash) { + tmp = neigh_get_valid(seq, n, pos); + if (tmp) { + n = tmp; + goto out; } - - if (n) - break; - - if (++state->bucket >= (1 << nht->hash_shift)) - break; - - n = rcu_dereference(nht->hash_buckets[state->bucket]); } + n = neigh_get_first(seq); +out: if (n && pos) --(*pos); + return n; } @@ -3379,7 +3371,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl struct neigh_seq_state *state = seq->private; state->tbl = tbl; - state->bucket = 0; + state->bucket = -1; state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); rcu_read_lock(); From 0e3bcb0f78a0ca7cfdb7906dc79d922e19ba09b5 Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 7 Nov 2024 16:04:41 +0000 Subject: [PATCH 4/6] neighbour: Convert iteration to use hlist+macro Remove all usage of the bare neighbour::next pointer, replacing them with neighbour::hash and its for_each macro. Signed-off-by: Gilad Naaman Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241107160444.2913124-5-gnaaman@drivenets.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 5 +---- net/core/neighbour.c | 49 ++++++++++++++++------------------------- 2 files changed, 20 insertions(+), 34 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4b9068c5e668..94cf4f8c118f 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -311,12 +311,9 @@ static inline struct neighbour *___neigh_lookup_noref( u32 hash_val; hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); - for (n = rcu_dereference(nht->hash_buckets[hash_val]); - n != NULL; - n = rcu_dereference(n->next)) { + neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[hash_val]) if (n->dev == dev && key_eq(n, pkey)) return n; - } return NULL; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3485d6b3ba99..f99354d768c2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -387,11 +387,11 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, lockdep_is_held(&tbl->lock)); for (i = 0; i < (1 << nht->hash_shift); i++) { - struct neighbour *n; struct neighbour __rcu **np = &nht->hash_buckets[i]; + struct hlist_node *tmp; + struct neighbour *n; - while ((n = rcu_dereference_protected(*np, - lockdep_is_held(&tbl->lock))) != NULL) { + neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) { if (dev && n->dev != dev) { np = &n->next; continue; @@ -587,18 +587,14 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, return old_nht; for (i = 0; i < (1 << old_nht->hash_shift); i++) { - struct neighbour *n, *next; + struct hlist_node *tmp; + struct neighbour *n; - for (n = rcu_dereference_protected(old_nht->hash_buckets[i], - lockdep_is_held(&tbl->lock)); - n != NULL; - n = next) { + neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) { hash = tbl->hash(n->primary_key, n->dev, new_nht->hash_rnd); hash >>= (32 - new_nht->hash_shift); - next = rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock)); rcu_assign_pointer(n->next, rcu_dereference_protected( @@ -693,11 +689,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, goto out_tbl_unlock; } - for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], - lockdep_is_held(&tbl->lock)); - n1 != NULL; - n1 = rcu_dereference_protected(n1->next, - lockdep_is_held(&tbl->lock))) { + neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) { if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { if (want_ref) neigh_hold(n1); @@ -949,10 +941,11 @@ static void neigh_connect(struct neighbour *neigh) static void neigh_periodic_work(struct work_struct *work) { struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); - struct neighbour *n; - struct neighbour __rcu **np; - unsigned int i; struct neigh_hash_table *nht; + struct neighbour __rcu **np; + struct hlist_node *tmp; + struct neighbour *n; + unsigned int i; NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); @@ -979,8 +972,7 @@ static void neigh_periodic_work(struct work_struct *work) for (i = 0 ; i < (1 << nht->hash_shift); i++) { np = &nht->hash_buckets[i]; - while ((n = rcu_dereference_protected(*np, - lockdep_is_held(&tbl->lock))) != NULL) { + neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) { unsigned int state; write_lock(&n->lock); @@ -2730,9 +2722,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, for (h = s_h; h < (1 << nht->hash_shift); h++) { if (h > s_h) s_idx = 0; - for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0; - n != NULL; - n = rcu_dereference(n->next)) { + idx = 0; + neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) { if (idx < s_idx || !net_eq(dev_net(n->dev), net)) goto next; if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || @@ -3099,9 +3090,7 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; - for (n = rcu_dereference(nht->hash_buckets[chain]); - n != NULL; - n = rcu_dereference(n->next)) + neigh_for_each_in_bucket(n, &nht->hash_heads[chain]) cb(n, cookie); } read_unlock_bh(&tbl->lock); @@ -3113,18 +3102,18 @@ EXPORT_SYMBOL(neigh_for_each); void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)) { - int chain; struct neigh_hash_table *nht; + int chain; nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); for (chain = 0; chain < (1 << nht->hash_shift); chain++) { - struct neighbour *n; struct neighbour __rcu **np; + struct hlist_node *tmp; + struct neighbour *n; np = &nht->hash_buckets[chain]; - while ((n = rcu_dereference_protected(*np, - lockdep_is_held(&tbl->lock))) != NULL) { + neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) { int release; write_lock(&n->lock); From a01a67ab2fffa7458354f0a666a6d550fa2b82fc Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 7 Nov 2024 16:04:42 +0000 Subject: [PATCH 5/6] neighbour: Remove bare neighbour::next pointer Remove the now-unused neighbour::next pointer, leaving struct neighbour solely with the hlist_node implementation. Signed-off-by: Gilad Naaman Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241107160444.2913124-6-gnaaman@drivenets.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 4 +- net/core/neighbour.c | 90 +++++------------------------------------ net/ipv4/arp.c | 2 +- 3 files changed, 12 insertions(+), 84 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 94cf4f8c118f..40aac1e24c68 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -135,7 +135,6 @@ struct neigh_statistics { #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) struct neighbour { - struct neighbour __rcu *next; struct hlist_node hash; struct neigh_table *tbl; struct neigh_parms *parms; @@ -191,7 +190,6 @@ struct pneigh_entry { #define NEIGH_NUM_HASH_RND 4 struct neigh_hash_table { - struct neighbour __rcu **hash_buckets; struct hlist_head *hash_heads; unsigned int hash_shift; __u32 hash_rnd[NEIGH_NUM_HASH_RND]; @@ -354,7 +352,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); -bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl); +bool neigh_remove_one(struct neighbour *ndel); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f99354d768c2..59f359c7b5e3 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -204,18 +204,12 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify, } } -static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, - struct neigh_table *tbl) +bool neigh_remove_one(struct neighbour *n) { bool retval = false; write_lock(&n->lock); if (refcount_read(&n->refcnt) == 1) { - struct neighbour *neigh; - - neigh = rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock)); - rcu_assign_pointer(*np, neigh); hlist_del_rcu(&n->hash); neigh_mark_dead(n); retval = true; @@ -226,29 +220,6 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, return retval; } -bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) -{ - struct neigh_hash_table *nht; - void *pkey = ndel->primary_key; - u32 hash_val; - struct neighbour *n; - struct neighbour __rcu **np; - - nht = rcu_dereference_protected(tbl->nht, - lockdep_is_held(&tbl->lock)); - hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); - hash_val = hash_val >> (32 - nht->hash_shift); - - np = &nht->hash_buckets[hash_val]; - while ((n = rcu_dereference_protected(*np, - lockdep_is_held(&tbl->lock)))) { - if (n == ndel) - return neigh_del(n, np, tbl); - np = &n->next; - } - return false; -} - static int neigh_forced_gc(struct neigh_table *tbl) { int max_clean = atomic_read(&tbl->gc_entries) - @@ -276,7 +247,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) remove = true; write_unlock(&n->lock); - if (remove && neigh_remove_one(n, tbl)) + if (remove && neigh_remove_one(n)) shrunk++; if (shrunk >= max_clean) break; @@ -387,22 +358,15 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, lockdep_is_held(&tbl->lock)); for (i = 0; i < (1 << nht->hash_shift); i++) { - struct neighbour __rcu **np = &nht->hash_buckets[i]; struct hlist_node *tmp; struct neighbour *n; neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) { - if (dev && n->dev != dev) { - np = &n->next; + if (dev && n->dev != dev) continue; - } - if (skip_perm && n->nud_state & NUD_PERMANENT) { - np = &n->next; + if (skip_perm && n->nud_state & NUD_PERMANENT) continue; - } - rcu_assign_pointer(*np, - rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock))); + hlist_del_rcu(&n->hash); write_lock(&n->lock); neigh_del_timer(n); @@ -531,9 +495,7 @@ static void neigh_get_hash_rnd(u32 *x) static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) { - size_t hash_heads_size = (1 << shift) * sizeof(struct hlist_head); - size_t size = (1 << shift) * sizeof(struct neighbour *); - struct neighbour __rcu **buckets; + size_t size = (1 << shift) * sizeof(struct hlist_head); struct hlist_head *hash_heads; struct neigh_hash_table *ret; int i; @@ -542,18 +504,11 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) if (!ret) return NULL; - buckets = kvzalloc(size, GFP_ATOMIC); - if (!buckets) { - kfree(ret); - return NULL; - } - hash_heads = kvzalloc(hash_heads_size, GFP_ATOMIC); + hash_heads = kvzalloc(size, GFP_ATOMIC); if (!hash_heads) { - kvfree(buckets); kfree(ret); return NULL; } - ret->hash_buckets = buckets; ret->hash_heads = hash_heads; ret->hash_shift = shift; for (i = 0; i < NEIGH_NUM_HASH_RND; i++) @@ -567,7 +522,6 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table, rcu); - kvfree(nht->hash_buckets); kvfree(nht->hash_heads); kfree(nht); } @@ -596,11 +550,6 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, hash >>= (32 - new_nht->hash_shift); - rcu_assign_pointer(n->next, - rcu_dereference_protected( - new_nht->hash_buckets[hash], - lockdep_is_held(&tbl->lock))); - rcu_assign_pointer(new_nht->hash_buckets[hash], n); hlist_del_rcu(&n->hash); hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]); } @@ -705,10 +654,6 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, list_add_tail(&n->managed_list, &n->tbl->managed_list); if (want_ref) neigh_hold(n); - rcu_assign_pointer(n->next, - rcu_dereference_protected(nht->hash_buckets[hash_val], - lockdep_is_held(&tbl->lock))); - rcu_assign_pointer(nht->hash_buckets[hash_val], n); hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]); write_unlock_bh(&tbl->lock); neigh_dbg(2, "neigh %p is created\n", n); @@ -942,7 +887,6 @@ static void neigh_periodic_work(struct work_struct *work) { struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); struct neigh_hash_table *nht; - struct neighbour __rcu **np; struct hlist_node *tmp; struct neighbour *n; unsigned int i; @@ -970,8 +914,6 @@ static void neigh_periodic_work(struct work_struct *work) goto out; for (i = 0 ; i < (1 << nht->hash_shift); i++) { - np = &nht->hash_buckets[i]; - neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) { unsigned int state; @@ -981,7 +923,7 @@ static void neigh_periodic_work(struct work_struct *work) if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || (n->flags & NTF_EXT_LEARNED)) { write_unlock(&n->lock); - goto next_elt; + continue; } if (time_before(n->used, n->confirmed) && @@ -992,9 +934,6 @@ static void neigh_periodic_work(struct work_struct *work) (state == NUD_FAILED || !time_in_range_open(jiffies, n->used, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { - rcu_assign_pointer(*np, - rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock))); hlist_del_rcu(&n->hash); neigh_mark_dead(n); write_unlock(&n->lock); @@ -1002,9 +941,6 @@ static void neigh_periodic_work(struct work_struct *work) continue; } write_unlock(&n->lock); - -next_elt: - np = &n->next; } /* * It's fine to release lock here, even if hash table @@ -1951,7 +1887,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).portid, extack); write_lock_bh(&tbl->lock); neigh_release(neigh); - neigh_remove_one(neigh, tbl); + neigh_remove_one(neigh); write_unlock_bh(&tbl->lock); out: @@ -3108,24 +3044,18 @@ void __neigh_for_each_release(struct neigh_table *tbl, nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); for (chain = 0; chain < (1 << nht->hash_shift); chain++) { - struct neighbour __rcu **np; struct hlist_node *tmp; struct neighbour *n; - np = &nht->hash_buckets[chain]; neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) { int release; write_lock(&n->lock); release = cb(n); if (release) { - rcu_assign_pointer(*np, - rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock))); hlist_del_rcu(&n->hash); neigh_mark_dead(n); - } else - np = &n->next; + } write_unlock(&n->lock); if (release) neigh_cleanup_and_release(n); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 11c1519b3699..cb9a7ed8abd3 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1215,7 +1215,7 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force) NEIGH_UPDATE_F_ADMIN, 0); write_lock_bh(&tbl->lock); neigh_release(neigh); - neigh_remove_one(neigh, tbl); + neigh_remove_one(neigh); write_unlock_bh(&tbl->lock); } From f7f52738637f4361c108cad36e23ee98959a9006 Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Thu, 7 Nov 2024 16:04:43 +0000 Subject: [PATCH 6/6] neighbour: Create netdev->neighbour association Create a mapping between a netdev and its neighoburs, allowing for much cheaper flushes. Signed-off-by: Gilad Naaman Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241107160444.2913124-7-gnaaman@drivenets.com Signed-off-by: Jakub Kicinski --- .../networking/net_cachelines/net_device.rst | 1 + include/linux/netdevice.h | 7 ++ include/net/neighbour.h | 9 +- include/net/neighbour_tables.h | 12 +++ net/core/neighbour.c | 96 +++++++++++-------- 5 files changed, 80 insertions(+), 45 deletions(-) create mode 100644 include/net/neighbour_tables.h diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index ade50d4e67cf..15e31ece675f 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -188,4 +188,5 @@ u64 max_pacing_offload_horizon struct_napi_config* napi_config unsigned_long gro_flush_timeout u32 napi_defer_hard_irqs +struct hlist_head neighbours[2] =================================== =========================== =================== =================== =================================================================================== diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3c552b648b27..df4483598628 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -52,6 +52,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -2032,6 +2033,9 @@ enum netdev_reg_state { * @napi_defer_hard_irqs: If not zero, provides a counter that would * allow to avoid NIC hard IRQ, on busy queues. * + * @neighbours: List heads pointing to this device's neighbours' + * dev_list, one per address-family. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2440,6 +2444,9 @@ struct net_device { */ struct net_shaper_hierarchy *net_shaper_hierarchy; #endif + + struct hlist_head neighbours[NEIGH_NR_TABLES]; + u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 40aac1e24c68..9a832cab5b1d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -29,6 +29,7 @@ #include #include #include +#include /* * NUD stands for "neighbor unreachability detection" @@ -136,6 +137,7 @@ struct neigh_statistics { struct neighbour { struct hlist_node hash; + struct hlist_node dev_list; struct neigh_table *tbl; struct neigh_parms *parms; unsigned long confirmed; @@ -236,13 +238,6 @@ struct neigh_table { struct pneigh_entry **phash_buckets; }; -enum { - NEIGH_ARP_TABLE = 0, - NEIGH_ND_TABLE = 1, - NEIGH_NR_TABLES, - NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */ -}; - static inline int neigh_parms_family(struct neigh_parms *p) { return p->tbl->family; diff --git a/include/net/neighbour_tables.h b/include/net/neighbour_tables.h new file mode 100644 index 000000000000..bcffbe8f7601 --- /dev/null +++ b/include/net/neighbour_tables.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NET_NEIGHBOUR_TABLES_H +#define _NET_NEIGHBOUR_TABLES_H + +enum { + NEIGH_ARP_TABLE = 0, + NEIGH_ND_TABLE = 1, + NEIGH_NR_TABLES, + NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */ +}; + +#endif diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 59f359c7b5e3..5e572f6eaf2c 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -60,6 +60,25 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, static const struct seq_operations neigh_stat_seq_ops; #endif +static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family) +{ + int i; + + switch (family) { + default: + DEBUG_NET_WARN_ON_ONCE(1); + fallthrough; /* to avoid panic by null-ptr-deref */ + case AF_INET: + i = NEIGH_ARP_TABLE; + break; + case AF_INET6: + i = NEIGH_ND_TABLE; + break; + } + + return &dev->neighbours[i]; +} + /* Neighbour hash table buckets are protected with rwlock tbl->lock. @@ -211,6 +230,7 @@ bool neigh_remove_one(struct neighbour *n) write_lock(&n->lock); if (refcount_read(&n->refcnt) == 1) { hlist_del_rcu(&n->hash); + hlist_del_rcu(&n->dev_list); neigh_mark_dead(n); retval = true; } @@ -351,48 +371,42 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net, static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, bool skip_perm) { - int i; - struct neigh_hash_table *nht; + struct hlist_head *dev_head; + struct hlist_node *tmp; + struct neighbour *n; - nht = rcu_dereference_protected(tbl->nht, - lockdep_is_held(&tbl->lock)); + dev_head = neigh_get_dev_table(dev, tbl->family); - for (i = 0; i < (1 << nht->hash_shift); i++) { - struct hlist_node *tmp; - struct neighbour *n; + hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) { + if (skip_perm && n->nud_state & NUD_PERMANENT) + continue; - neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) { - if (dev && n->dev != dev) - continue; - if (skip_perm && n->nud_state & NUD_PERMANENT) - continue; - - hlist_del_rcu(&n->hash); - write_lock(&n->lock); - neigh_del_timer(n); - neigh_mark_dead(n); - if (refcount_read(&n->refcnt) != 1) { - /* The most unpleasant situation. - We must destroy neighbour entry, - but someone still uses it. - - The destroy will be delayed until - the last user releases us, but - we must kill timers etc. and move - it to safe state. - */ - __skb_queue_purge(&n->arp_queue); - n->arp_queue_len_bytes = 0; - WRITE_ONCE(n->output, neigh_blackhole); - if (n->nud_state & NUD_VALID) - n->nud_state = NUD_NOARP; - else - n->nud_state = NUD_NONE; - neigh_dbg(2, "neigh %p is stray\n", n); - } - write_unlock(&n->lock); - neigh_cleanup_and_release(n); + hlist_del_rcu(&n->hash); + hlist_del_rcu(&n->dev_list); + write_lock(&n->lock); + neigh_del_timer(n); + neigh_mark_dead(n); + if (refcount_read(&n->refcnt) != 1) { + /* The most unpleasant situation. + * We must destroy neighbour entry, + * but someone still uses it. + * + * The destroy will be delayed until + * the last user releases us, but + * we must kill timers etc. and move + * it to safe state. + */ + __skb_queue_purge(&n->arp_queue); + n->arp_queue_len_bytes = 0; + WRITE_ONCE(n->output, neigh_blackhole); + if (n->nud_state & NUD_VALID) + n->nud_state = NUD_NOARP; + else + n->nud_state = NUD_NONE; + neigh_dbg(2, "neigh %p is stray\n", n); } + write_unlock(&n->lock); + neigh_cleanup_and_release(n); } } @@ -655,6 +669,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, if (want_ref) neigh_hold(n); hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]); + + hlist_add_head_rcu(&n->dev_list, + neigh_get_dev_table(dev, tbl->family)); + write_unlock_bh(&tbl->lock); neigh_dbg(2, "neigh %p is created\n", n); rc = n; @@ -935,6 +953,7 @@ static void neigh_periodic_work(struct work_struct *work) !time_in_range_open(jiffies, n->used, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { hlist_del_rcu(&n->hash); + hlist_del_rcu(&n->dev_list); neigh_mark_dead(n); write_unlock(&n->lock); neigh_cleanup_and_release(n); @@ -3054,6 +3073,7 @@ void __neigh_for_each_release(struct neigh_table *tbl, release = cb(n); if (release) { hlist_del_rcu(&n->hash); + hlist_del_rcu(&n->dev_list); neigh_mark_dead(n); } write_unlock(&n->lock);