Merge branch 'ipv4-preliminary-work-for-per-netns-rtnl'

Eric Dumazet says:

====================
ipv4: preliminary work for per-netns RTNL

Inspired by 9b8ca04854 ("ipv4: avoid quadratic behavior in
FIB insertion of common address") and per-netns RTNL conversion
started by Kuniyuki this week.

ip_fib_check_default() can use RCU instead of a shared spinlock.

fib_info_lock can be removed, RTNL is already used.

fib_info_devhash[] can be removed in favor of a single
pointer in net_device.
====================

Link: https://patch.msgid.link/20241004134720.579244-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2024-10-07 16:46:32 -07:00
3 changed files with 32 additions and 49 deletions

View File

@@ -83,6 +83,7 @@ unsigned_int allmulti
bool uc_promisc
unsigned_char nested_level
struct_in_device* ip_ptr read_mostly read_mostly __in_dev_get
struct hlist_head fib_nh_head
struct_inet6_dev* ip6_ptr read_mostly read_mostly __in6_dev_get
struct_vlan_info* vlan_info
struct_dsa_port* dsa_ptr

View File

@@ -2211,6 +2211,9 @@ struct net_device {
/* Protocol-specific pointers */
struct in_device __rcu *ip_ptr;
/** @fib_nh_head: nexthops associated with this netdev */
struct hlist_head fib_nh_head;
#if IS_ENABLED(CONFIG_VLAN_8021Q)
struct vlan_info __rcu *vlan_info;
#endif

View File

@@ -50,17 +50,12 @@
#include "fib_lookup.h"
static DEFINE_SPINLOCK(fib_info_lock);
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_info_hash_size;
static unsigned int fib_info_hash_bits;
static unsigned int fib_info_cnt;
#define DEVINDEX_HASHBITS 8
#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
/* for_nexthops and change_nexthops only used when nexthop object
* is not set in a fib_info. The logic within can reference fib_nh.
*/
@@ -260,12 +255,11 @@ EXPORT_SYMBOL_GPL(free_fib_info);
void fib_release_info(struct fib_info *fi)
{
spin_lock_bh(&fib_info_lock);
ASSERT_RTNL();
if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
hlist_del(&fi->fib_hash);
/* Paired with READ_ONCE() in fib_create_info(). */
WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1);
fib_info_cnt--;
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
@@ -275,14 +269,13 @@ void fib_release_info(struct fib_info *fi)
change_nexthops(fi) {
if (!nexthop_nh->fib_nh_dev)
continue;
hlist_del(&nexthop_nh->nh_hash);
hlist_del_rcu(&nexthop_nh->nh_hash);
} endfor_nexthops(fi)
}
/* Paired with READ_ONCE() from fib_table_lookup() */
WRITE_ONCE(fi->fib_dead, 1);
fib_info_put(fi);
}
spin_unlock_bh(&fib_info_lock);
}
static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
@@ -322,17 +315,9 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
return 0;
}
static inline unsigned int fib_devindex_hashfn(unsigned int val)
static struct hlist_head *fib_nh_head(struct net_device *dev)
{
return hash_32(val, DEVINDEX_HASHBITS);
}
static struct hlist_head *
fib_info_devhash_bucket(const struct net_device *dev)
{
u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
return &fib_info_devhash[fib_devindex_hashfn(val)];
return &dev->fib_nh_head;
}
static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -362,10 +347,10 @@ static inline unsigned int fib_info_hashfn(struct fib_info *fi)
fi->fib_priority);
if (fi->nh) {
val ^= fib_devindex_hashfn(fi->nh->id);
val ^= fi->nh->id;
} else {
for_nexthops(fi) {
val ^= fib_devindex_hashfn(nh->fib_nh_oif);
val ^= nh->fib_nh_oif;
} endfor_nexthops(fi)
}
@@ -380,7 +365,7 @@ static struct fib_info *fib_find_info_nh(struct net *net,
struct fib_info *fi;
unsigned int hash;
hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id),
hash = fib_info_hashfn_1(cfg->fc_nh_id,
cfg->fc_protocol, cfg->fc_scope,
(__force u32)cfg->fc_prefsrc,
cfg->fc_priority);
@@ -436,28 +421,23 @@ static struct fib_info *fib_find_info(struct fib_info *nfi)
}
/* Check, that the gateway is already configured.
* Used only by redirect accept routine.
* Used only by redirect accept routine, under rcu_read_lock();
*/
int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
struct fib_nh *nh;
spin_lock(&fib_info_lock);
head = fib_nh_head(dev);
head = fib_info_devhash_bucket(dev);
hlist_for_each_entry(nh, head, nh_hash) {
if (nh->fib_nh_dev == dev &&
nh->fib_nh_gw4 == gw &&
hlist_for_each_entry_rcu(nh, head, nh_hash) {
DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
if (nh->fib_nh_gw4 == gw &&
!(nh->fib_nh_flags & RTNH_F_DEAD)) {
spin_unlock(&fib_info_lock);
return 0;
}
}
spin_unlock(&fib_info_lock);
return -1;
}
@@ -1276,7 +1256,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
unsigned int old_size = fib_info_hash_size;
unsigned int i;
spin_lock_bh(&fib_info_lock);
ASSERT_RTNL();
old_info_hash = fib_info_hash;
old_laddrhash = fib_info_laddrhash;
fib_info_hash_size = new_size;
@@ -1313,8 +1293,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
}
}
spin_unlock_bh(&fib_info_lock);
kvfree(old_info_hash);
kvfree(old_laddrhash);
}
@@ -1390,6 +1368,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
int nhs = 1;
struct net *net = cfg->fc_nlinfo.nl_net;
ASSERT_RTNL();
if (cfg->fc_type > RTN_MAX)
goto err_inval;
@@ -1432,8 +1411,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
err = -ENOBUFS;
/* Paired with WRITE_ONCE() in fib_release_info() */
if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) {
if (fib_info_cnt >= fib_info_hash_size) {
unsigned int new_size = fib_info_hash_size << 1;
struct hlist_head *new_info_hash;
struct hlist_head *new_laddrhash;
@@ -1592,7 +1570,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
spin_lock_bh(&fib_info_lock);
fib_info_cnt++;
hlist_add_head(&fi->fib_hash,
&fib_info_hash[fib_info_hashfn(fi)]);
@@ -1610,11 +1588,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
if (!nexthop_nh->fib_nh_dev)
continue;
head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
hlist_add_head(&nexthop_nh->nh_hash, head);
head = fib_nh_head(nexthop_nh->fib_nh_dev);
hlist_add_head_rcu(&nexthop_nh->nh_hash, head);
} endfor_nexthops(fi)
}
spin_unlock_bh(&fib_info_lock);
return fi;
err_inval:
@@ -1964,12 +1941,12 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
struct hlist_head *head = fib_info_devhash_bucket(dev);
struct hlist_head *head = fib_nh_head(dev);
struct fib_nh *nh;
hlist_for_each_entry(nh, head, nh_hash) {
if (nh->fib_nh_dev == dev)
fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
}
}
@@ -1983,7 +1960,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
struct hlist_head *head = fib_info_devhash_bucket(dev);
struct hlist_head *head = fib_nh_head(dev);
struct fib_info *prev_fi = NULL;
int scope = RT_SCOPE_NOWHERE;
struct fib_nh *nh;
@@ -1997,7 +1974,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
int dead;
BUG_ON(!fi->fib_nhs);
if (nh->fib_nh_dev != dev || fi == prev_fi)
DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
if (fi == prev_fi)
continue;
prev_fi = fi;
dead = 0;
@@ -2147,7 +2125,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
}
prev_fi = NULL;
head = fib_info_devhash_bucket(dev);
head = fib_nh_head(dev);
ret = 0;
hlist_for_each_entry(nh, head, nh_hash) {
@@ -2155,7 +2133,8 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
int alive;
BUG_ON(!fi->fib_nhs);
if (nh->fib_nh_dev != dev || fi == prev_fi)
DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
if (fi == prev_fi)
continue;
prev_fi = fi;