Merge branch 'net-hold-instance-lock-during-netdev_up-register'

Stanislav Fomichev says:

====================
net: hold instance lock during NETDEV_UP/REGISTER

Solving the issue reported by Cosmin in [0] requires consistent
lock during NETDEV_UP/REGISTER notifiers. This series
addresses that (along with some other fixes in net/ipv4/devinet.c
and net/ipv6/addrconf.c) and appends the patches from Jakub
that were conditional on consistent locking in NETDEV_UNREGISTER.

0: https://lore.kernel.org/700fa36b94cbd57cfea2622029b087643c80cbc9.camel@nvidia.com
====================

Link: https://patch.msgid.link/20250401163452.622454-1-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-04-03 15:32:20 -07:00
16 changed files with 125 additions and 36 deletions

View File

@@ -343,6 +343,29 @@ there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g.,
acquiring the instance lock themselves, while the ``netif_xxx`` functions
assume that the driver has already acquired the instance lock.
Notifiers and netdev instance lock
==================================
For device drivers that implement shaping or queue management APIs,
some of the notifiers (``enum netdev_cmd``) are running under the netdev
instance lock.
For devices with locked ops, currently only the following notifiers are
running under the lock:
* ``NETDEV_REGISTER``
* ``NETDEV_UP``
The following notifiers are running without the lock:
* ``NETDEV_UNREGISTER``
There are no clear expectations for the remaining notifiers. Notifiers not on
the list may run with or without the instance lock, potentially even invoking
the same notifier type with and without the lock from different code paths.
The goal is to eventually ensure that all (or most, with a few documented
exceptions) notifiers run under the instance lock. Please extend this
documentation whenever you make explicit assumption about lock being held
from a notifier.
NETDEV_INTERNAL symbol namespace
================================

View File

@@ -105,6 +105,7 @@ static void dummy_setup(struct net_device *dev)
dev->netdev_ops = &dummy_netdev_ops;
dev->ethtool_ops = &dummy_ethtool_ops;
dev->needs_free_netdev = true;
dev->request_ops_lock = true;
/* Fill in device structure with ethernet-generic values. */
dev->flags |= IFF_NOARP;

View File

@@ -939,6 +939,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns)
ns->netdev->netdev_ops = &nsim_netdev_ops;
ns->netdev->stat_ops = &nsim_stat_ops;
ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
netdev_lockdep_set_classes(ns->netdev);
err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
if (err)
@@ -960,6 +961,14 @@ static int nsim_init_netdevsim(struct netdevsim *ns)
if (err)
goto err_ipsec_teardown;
rtnl_unlock();
if (IS_ENABLED(CONFIG_DEBUG_NET)) {
ns->nb.notifier_call = netdev_debug_event;
if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
&ns->nn))
ns->nb.notifier_call = NULL;
}
return 0;
err_ipsec_teardown:
@@ -1043,6 +1052,10 @@ void nsim_destroy(struct netdevsim *ns)
debugfs_remove(ns->qr_dfs);
debugfs_remove(ns->pp_dfs);
if (ns->nb.notifier_call)
unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
&ns->nn);
rtnl_lock();
peer = rtnl_dereference(ns->peer);
if (peer)

View File

@@ -144,6 +144,9 @@ struct netdevsim {
struct nsim_ethtool ethtool;
struct netdevsim __rcu *peer;
struct notifier_block nb;
struct netdev_net_notifier nn;
};
struct netdevsim *

View File

@@ -4192,7 +4192,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags,
int netif_set_alias(struct net_device *dev, const char *alias, size_t len);
int dev_set_alias(struct net_device *, const char *, size_t);
int dev_get_alias(const struct net_device *, char *, size_t);
int netif_change_net_namespace(struct net_device *dev, struct net *net,
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex,
struct netlink_ext_ack *extack);
int dev_change_net_namespace(struct net_device *dev, struct net *net,

View File

@@ -667,14 +667,6 @@ static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast,
memcpy(buf, &naddr, sizeof(naddr));
}
#if IS_MODULE(CONFIG_IPV6)
#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X)
#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X)
#else
#define EXPORT_IPV6_MOD(X)
#define EXPORT_IPV6_MOD_GPL(X)
#endif
#if IS_ENABLED(CONFIG_IPV6)
#include <linux/ipv6.h>
#endif
@@ -694,6 +686,14 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
#endif
#if IS_MODULE(CONFIG_IPV6)
#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X)
#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X)
#else
#define EXPORT_IPV6_MOD(X)
#define EXPORT_IPV6_MOD_GPL(X)
#endif
static inline unsigned int ipv4_addr_hash(__be32 ip)
{
return (__force unsigned int) ip;

View File

@@ -98,4 +98,7 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a,
&qdisc_xmit_lock_key); \
}
int netdev_debug_event(struct notifier_block *nb, unsigned long event,
void *ptr);
#endif

View File

@@ -45,5 +45,5 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
obj-$(CONFIG_OF) += of_net.o
obj-$(CONFIG_NET_TEST) += net_test.o
obj-$(CONFIG_NET_DEVMEM) += devmem.o
obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o
obj-$(CONFIG_DEBUG_NET) += lock_debug.o
obj-$(CONFIG_FAIL_SKB_REALLOC) += skb_fault_injection.o

View File

@@ -1771,6 +1771,7 @@ void netif_disable_lro(struct net_device *dev)
netdev_unlock_ops(lower_dev);
}
}
EXPORT_IPV6_MOD(netif_disable_lro);
/**
* dev_disable_gro_hw - disable HW Generic Receive Offload on a device
@@ -1858,7 +1859,9 @@ static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
int err;
for_each_netdev(net, dev) {
netdev_lock_ops(dev);
err = call_netdevice_register_notifiers(nb, dev);
netdev_unlock_ops(dev);
if (err)
goto rollback;
}
@@ -11047,7 +11050,9 @@ int register_netdevice(struct net_device *dev)
memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
/* Notify protocols, that a new device appeared. */
netdev_lock_ops(dev);
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
netdev_unlock_ops(dev);
ret = notifier_to_errno(ret);
if (ret) {
/* Expect explicit free_netdev() on failure */
@@ -12059,7 +12064,7 @@ void unregister_netdev(struct net_device *dev)
}
EXPORT_SYMBOL(unregister_netdev);
int netif_change_net_namespace(struct net_device *dev, struct net *net,
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex,
struct netlink_ext_ack *extack)
{
@@ -12144,11 +12149,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net,
* And now a mini version of register_netdevice unregister_netdevice.
*/
netdev_lock_ops(dev);
/* If device is running close it first. */
netif_close(dev);
/* And unlink it from device chain */
unlist_netdevice(dev);
netdev_unlock_ops(dev);
synchronize_net();
@@ -12210,11 +12216,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net,
err = netdev_change_owner(dev, net_old, net);
WARN_ON(err);
netdev_lock_ops(dev);
/* Add the device back in the hashes */
list_netdevice(dev);
/* Notify protocols, that a new device appeared. */
call_netdevice_notifiers(NETDEV_REGISTER, dev);
netdev_unlock_ops(dev);
/*
* Prevent userspace races by waiting until the network

View File

@@ -117,13 +117,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user);
int dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat)
{
int ret;
netdev_lock_ops(dev);
ret = netif_change_net_namespace(dev, net, pat, 0, NULL);
netdev_unlock_ops(dev);
return ret;
return __dev_change_net_namespace(dev, net, pat, 0, NULL);
}
EXPORT_SYMBOL_GPL(dev_change_net_namespace);

View File

@@ -6,10 +6,11 @@
#include <linux/notifier.h>
#include <linux/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/netdev_lock.h>
#include <net/netns/generic.h>
static int rtnl_net_debug_event(struct notifier_block *nb,
unsigned long event, void *ptr)
int netdev_debug_event(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
@@ -17,11 +18,13 @@ static int rtnl_net_debug_event(struct notifier_block *nb,
/* Keep enum and don't add default to trigger -Werror=switch */
switch (cmd) {
case NETDEV_REGISTER:
case NETDEV_UP:
netdev_ops_assert_locked(dev);
fallthrough;
case NETDEV_DOWN:
case NETDEV_REBOOT:
case NETDEV_CHANGE:
case NETDEV_REGISTER:
case NETDEV_UNREGISTER:
case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
@@ -66,6 +69,7 @@ static int rtnl_net_debug_event(struct notifier_block *nb,
return NOTIFY_DONE;
}
EXPORT_SYMBOL_NS_GPL(netdev_debug_event, "NETDEV_INTERNAL");
static int rtnl_net_debug_net_id;
@@ -74,7 +78,7 @@ static int __net_init rtnl_net_debug_net_init(struct net *net)
struct notifier_block *nb;
nb = net_generic(net, rtnl_net_debug_net_id);
nb->notifier_call = rtnl_net_debug_event;
nb->notifier_call = netdev_debug_event;
return register_netdevice_notifier_net(net, nb);
}
@@ -95,7 +99,7 @@ static struct pernet_operations rtnl_net_debug_net_ops __net_initdata = {
};
static struct notifier_block rtnl_net_debug_block = {
.notifier_call = rtnl_net_debug_event,
.notifier_call = netdev_debug_event,
};
static int __init rtnl_net_debug_init(void)

View File

@@ -3025,8 +3025,6 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
char ifname[IFNAMSIZ];
int err;
netdev_lock_ops(dev);
err = validate_linkmsg(dev, tb, extack);
if (err < 0)
goto errout;
@@ -3042,14 +3040,16 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0);
err = netif_change_net_namespace(dev, tgt_net, pat,
err = __dev_change_net_namespace(dev, tgt_net, pat,
new_ifindex, extack);
if (err)
goto errout;
return err;
status |= DO_SETLINK_MODIFIED;
}
netdev_lock_ops(dev);
if (tb[IFLA_MAP]) {
struct rtnl_link_ifmap *u_map;
struct ifmap k_map;

View File

@@ -281,7 +281,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
if (!in_dev->arp_parms)
goto out_kfree;
if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
dev_disable_lro(dev);
netif_disable_lro(dev);
/* Reference in_dev->dev */
netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
/* Account for reference dev->ip_ptr (below) */

View File

@@ -80,6 +80,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/l3mdev.h>
#include <net/netdev_lock.h>
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
#include <linux/netconf.h>
@@ -377,6 +378,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
int err = -ENOMEM;
ASSERT_RTNL();
netdev_ops_assert_locked(dev);
if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
return ERR_PTR(-EINVAL);
@@ -402,7 +404,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
return ERR_PTR(err);
}
if (ndev->cnf.forwarding)
dev_disable_lro(dev);
netif_disable_lro(dev);
/* We refer to the device */
netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL);
@@ -3152,10 +3154,12 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
rtnl_net_lock(net);
dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
netdev_lock_ops(dev);
if (dev)
err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL);
else
err = -ENODEV;
netdev_unlock_ops(dev);
rtnl_net_unlock(net);
return err;
}
@@ -5026,9 +5030,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!dev) {
NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
err = -ENODEV;
goto unlock;
goto unlock_rtnl;
}
netdev_lock_ops(dev);
idev = ipv6_find_idev(dev);
if (IS_ERR(idev)) {
err = PTR_ERR(idev);
@@ -5065,6 +5070,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
in6_ifa_put(ifa);
unlock:
netdev_unlock_ops(dev);
unlock_rtnl:
rtnl_net_unlock(net);
return err;
@@ -6516,7 +6523,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
if (idev->cnf.addr_gen_mode != new_val) {
WRITE_ONCE(idev->cnf.addr_gen_mode, new_val);
netdev_lock_ops(idev->dev);
addrconf_init_auto_addrs(idev->dev);
netdev_unlock_ops(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
struct net_device *dev;
@@ -6528,7 +6537,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
idev->cnf.addr_gen_mode != new_val) {
WRITE_ONCE(idev->cnf.addr_gen_mode,
new_val);
netdev_lock_ops(idev->dev);
addrconf_init_auto_addrs(idev->dev);
netdev_unlock_ops(idev->dev);
}
}
}

View File

@@ -222,6 +222,31 @@ setup_ns()
NS_LIST+=("${ns_list[@]}")
}
# Create netdevsim with given id and net namespace.
create_netdevsim() {
local id="$1"
local ns="$2"
modprobe netdevsim &> /dev/null
udevadm settle
echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null
local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net)
ip -netns $ns link set dev $dev name nsim$id
ip -netns $ns link set dev nsim$id up
echo nsim$id
}
# Remove netdevsim with given id.
cleanup_netdevsim() {
local id="$1"
if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then
echo "$id" > /sys/bus/netdevsim/del_device
fi
}
tc_rule_stats_get()
{
local dev=$1; shift

View File

@@ -7,10 +7,12 @@ set -o pipefail
DEV=dummy-dev0
DEV2=dummy-dev1
ALT_NAME=some-alt-name
NSIM_ADDR=2025
RET_CODE=0
cleanup() {
cleanup_netdevsim $NSIM_ADDR
cleanup_ns $NS $test_ns
}
@@ -25,12 +27,15 @@ setup_ns NS test_ns
#
# Test basic move without a rename
# Use netdevsim because it has extra asserts for notifiers.
#
ip -netns $NS link add name $DEV type dummy || fail
ip -netns $NS link set dev $DEV netns $test_ns ||
nsim=$(create_netdevsim $NSIM_ADDR $NS)
ip -netns $NS link set dev $nsim netns $test_ns ||
fail "Can't perform a netns move"
ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move"
ip -netns $test_ns link del $DEV || fail
ip -netns $test_ns link show dev $nsim >> /dev/null ||
fail "Device not found after move"
cleanup_netdevsim $NSIM_ADDR
#
# Test move with a conflict