mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-10 06:49:29 -04:00
Merge branch 'net-hold-instance-lock-during-netdev_up-register'
Stanislav Fomichev says: ==================== net: hold instance lock during NETDEV_UP/REGISTER Solving the issue reported by Cosmin in [0] requires consistent lock during NETDEV_UP/REGISTER notifiers. This series addresses that (along with some other fixes in net/ipv4/devinet.c and net/ipv6/addrconf.c) and appends the patches from Jakub that were conditional on consistent locking in NETDEV_UNREGISTER. 0: https://lore.kernel.org/700fa36b94cbd57cfea2622029b087643c80cbc9.camel@nvidia.com ==================== Link: https://patch.msgid.link/20250401163452.622454-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -343,6 +343,29 @@ there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g.,
|
||||
acquiring the instance lock themselves, while the ``netif_xxx`` functions
|
||||
assume that the driver has already acquired the instance lock.
|
||||
|
||||
Notifiers and netdev instance lock
|
||||
==================================
|
||||
|
||||
For device drivers that implement shaping or queue management APIs,
|
||||
some of the notifiers (``enum netdev_cmd``) are running under the netdev
|
||||
instance lock.
|
||||
|
||||
For devices with locked ops, currently only the following notifiers are
|
||||
running under the lock:
|
||||
* ``NETDEV_REGISTER``
|
||||
* ``NETDEV_UP``
|
||||
|
||||
The following notifiers are running without the lock:
|
||||
* ``NETDEV_UNREGISTER``
|
||||
|
||||
There are no clear expectations for the remaining notifiers. Notifiers not on
|
||||
the list may run with or without the instance lock, potentially even invoking
|
||||
the same notifier type with and without the lock from different code paths.
|
||||
The goal is to eventually ensure that all (or most, with a few documented
|
||||
exceptions) notifiers run under the instance lock. Please extend this
|
||||
documentation whenever you make explicit assumption about lock being held
|
||||
from a notifier.
|
||||
|
||||
NETDEV_INTERNAL symbol namespace
|
||||
================================
|
||||
|
||||
|
||||
@@ -105,6 +105,7 @@ static void dummy_setup(struct net_device *dev)
|
||||
dev->netdev_ops = &dummy_netdev_ops;
|
||||
dev->ethtool_ops = &dummy_ethtool_ops;
|
||||
dev->needs_free_netdev = true;
|
||||
dev->request_ops_lock = true;
|
||||
|
||||
/* Fill in device structure with ethernet-generic values. */
|
||||
dev->flags |= IFF_NOARP;
|
||||
|
||||
@@ -939,6 +939,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns)
|
||||
ns->netdev->netdev_ops = &nsim_netdev_ops;
|
||||
ns->netdev->stat_ops = &nsim_stat_ops;
|
||||
ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
|
||||
netdev_lockdep_set_classes(ns->netdev);
|
||||
|
||||
err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
|
||||
if (err)
|
||||
@@ -960,6 +961,14 @@ static int nsim_init_netdevsim(struct netdevsim *ns)
|
||||
if (err)
|
||||
goto err_ipsec_teardown;
|
||||
rtnl_unlock();
|
||||
|
||||
if (IS_ENABLED(CONFIG_DEBUG_NET)) {
|
||||
ns->nb.notifier_call = netdev_debug_event;
|
||||
if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
|
||||
&ns->nn))
|
||||
ns->nb.notifier_call = NULL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_ipsec_teardown:
|
||||
@@ -1043,6 +1052,10 @@ void nsim_destroy(struct netdevsim *ns)
|
||||
debugfs_remove(ns->qr_dfs);
|
||||
debugfs_remove(ns->pp_dfs);
|
||||
|
||||
if (ns->nb.notifier_call)
|
||||
unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
|
||||
&ns->nn);
|
||||
|
||||
rtnl_lock();
|
||||
peer = rtnl_dereference(ns->peer);
|
||||
if (peer)
|
||||
|
||||
@@ -144,6 +144,9 @@ struct netdevsim {
|
||||
|
||||
struct nsim_ethtool ethtool;
|
||||
struct netdevsim __rcu *peer;
|
||||
|
||||
struct notifier_block nb;
|
||||
struct netdev_net_notifier nn;
|
||||
};
|
||||
|
||||
struct netdevsim *
|
||||
|
||||
@@ -4192,7 +4192,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags,
|
||||
int netif_set_alias(struct net_device *dev, const char *alias, size_t len);
|
||||
int dev_set_alias(struct net_device *, const char *, size_t);
|
||||
int dev_get_alias(const struct net_device *, char *, size_t);
|
||||
int netif_change_net_namespace(struct net_device *dev, struct net *net,
|
||||
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
|
||||
const char *pat, int new_ifindex,
|
||||
struct netlink_ext_ack *extack);
|
||||
int dev_change_net_namespace(struct net_device *dev, struct net *net,
|
||||
|
||||
@@ -667,14 +667,6 @@ static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast,
|
||||
memcpy(buf, &naddr, sizeof(naddr));
|
||||
}
|
||||
|
||||
#if IS_MODULE(CONFIG_IPV6)
|
||||
#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X)
|
||||
#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X)
|
||||
#else
|
||||
#define EXPORT_IPV6_MOD(X)
|
||||
#define EXPORT_IPV6_MOD_GPL(X)
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
#include <linux/ipv6.h>
|
||||
#endif
|
||||
@@ -694,6 +686,14 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
|
||||
|
||||
#endif
|
||||
|
||||
#if IS_MODULE(CONFIG_IPV6)
|
||||
#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X)
|
||||
#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X)
|
||||
#else
|
||||
#define EXPORT_IPV6_MOD(X)
|
||||
#define EXPORT_IPV6_MOD_GPL(X)
|
||||
#endif
|
||||
|
||||
static inline unsigned int ipv4_addr_hash(__be32 ip)
|
||||
{
|
||||
return (__force unsigned int) ip;
|
||||
|
||||
@@ -98,4 +98,7 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a,
|
||||
&qdisc_xmit_lock_key); \
|
||||
}
|
||||
|
||||
int netdev_debug_event(struct notifier_block *nb, unsigned long event,
|
||||
void *ptr);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -45,5 +45,5 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
|
||||
obj-$(CONFIG_OF) += of_net.o
|
||||
obj-$(CONFIG_NET_TEST) += net_test.o
|
||||
obj-$(CONFIG_NET_DEVMEM) += devmem.o
|
||||
obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o
|
||||
obj-$(CONFIG_DEBUG_NET) += lock_debug.o
|
||||
obj-$(CONFIG_FAIL_SKB_REALLOC) += skb_fault_injection.o
|
||||
|
||||
@@ -1771,6 +1771,7 @@ void netif_disable_lro(struct net_device *dev)
|
||||
netdev_unlock_ops(lower_dev);
|
||||
}
|
||||
}
|
||||
EXPORT_IPV6_MOD(netif_disable_lro);
|
||||
|
||||
/**
|
||||
* dev_disable_gro_hw - disable HW Generic Receive Offload on a device
|
||||
@@ -1858,7 +1859,9 @@ static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
|
||||
int err;
|
||||
|
||||
for_each_netdev(net, dev) {
|
||||
netdev_lock_ops(dev);
|
||||
err = call_netdevice_register_notifiers(nb, dev);
|
||||
netdev_unlock_ops(dev);
|
||||
if (err)
|
||||
goto rollback;
|
||||
}
|
||||
@@ -11047,7 +11050,9 @@ int register_netdevice(struct net_device *dev)
|
||||
memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
|
||||
|
||||
/* Notify protocols, that a new device appeared. */
|
||||
netdev_lock_ops(dev);
|
||||
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
|
||||
netdev_unlock_ops(dev);
|
||||
ret = notifier_to_errno(ret);
|
||||
if (ret) {
|
||||
/* Expect explicit free_netdev() on failure */
|
||||
@@ -12059,7 +12064,7 @@ void unregister_netdev(struct net_device *dev)
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_netdev);
|
||||
|
||||
int netif_change_net_namespace(struct net_device *dev, struct net *net,
|
||||
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
|
||||
const char *pat, int new_ifindex,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
@@ -12144,11 +12149,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net,
|
||||
* And now a mini version of register_netdevice unregister_netdevice.
|
||||
*/
|
||||
|
||||
netdev_lock_ops(dev);
|
||||
/* If device is running close it first. */
|
||||
netif_close(dev);
|
||||
|
||||
/* And unlink it from device chain */
|
||||
unlist_netdevice(dev);
|
||||
netdev_unlock_ops(dev);
|
||||
|
||||
synchronize_net();
|
||||
|
||||
@@ -12210,11 +12216,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net,
|
||||
err = netdev_change_owner(dev, net_old, net);
|
||||
WARN_ON(err);
|
||||
|
||||
netdev_lock_ops(dev);
|
||||
/* Add the device back in the hashes */
|
||||
list_netdevice(dev);
|
||||
|
||||
/* Notify protocols, that a new device appeared. */
|
||||
call_netdevice_notifiers(NETDEV_REGISTER, dev);
|
||||
netdev_unlock_ops(dev);
|
||||
|
||||
/*
|
||||
* Prevent userspace races by waiting until the network
|
||||
|
||||
@@ -117,13 +117,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user);
|
||||
int dev_change_net_namespace(struct net_device *dev, struct net *net,
|
||||
const char *pat)
|
||||
{
|
||||
int ret;
|
||||
|
||||
netdev_lock_ops(dev);
|
||||
ret = netif_change_net_namespace(dev, net, pat, 0, NULL);
|
||||
netdev_unlock_ops(dev);
|
||||
|
||||
return ret;
|
||||
return __dev_change_net_namespace(dev, net, pat, 0, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dev_change_net_namespace);
|
||||
|
||||
|
||||
@@ -6,10 +6,11 @@
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
#include <net/net_namespace.h>
|
||||
#include <net/netdev_lock.h>
|
||||
#include <net/netns/generic.h>
|
||||
|
||||
static int rtnl_net_debug_event(struct notifier_block *nb,
|
||||
unsigned long event, void *ptr)
|
||||
int netdev_debug_event(struct notifier_block *nb, unsigned long event,
|
||||
void *ptr)
|
||||
{
|
||||
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||
struct net *net = dev_net(dev);
|
||||
@@ -17,11 +18,13 @@ static int rtnl_net_debug_event(struct notifier_block *nb,
|
||||
|
||||
/* Keep enum and don't add default to trigger -Werror=switch */
|
||||
switch (cmd) {
|
||||
case NETDEV_REGISTER:
|
||||
case NETDEV_UP:
|
||||
netdev_ops_assert_locked(dev);
|
||||
fallthrough;
|
||||
case NETDEV_DOWN:
|
||||
case NETDEV_REBOOT:
|
||||
case NETDEV_CHANGE:
|
||||
case NETDEV_REGISTER:
|
||||
case NETDEV_UNREGISTER:
|
||||
case NETDEV_CHANGEMTU:
|
||||
case NETDEV_CHANGEADDR:
|
||||
@@ -66,6 +69,7 @@ static int rtnl_net_debug_event(struct notifier_block *nb,
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(netdev_debug_event, "NETDEV_INTERNAL");
|
||||
|
||||
static int rtnl_net_debug_net_id;
|
||||
|
||||
@@ -74,7 +78,7 @@ static int __net_init rtnl_net_debug_net_init(struct net *net)
|
||||
struct notifier_block *nb;
|
||||
|
||||
nb = net_generic(net, rtnl_net_debug_net_id);
|
||||
nb->notifier_call = rtnl_net_debug_event;
|
||||
nb->notifier_call = netdev_debug_event;
|
||||
|
||||
return register_netdevice_notifier_net(net, nb);
|
||||
}
|
||||
@@ -95,7 +99,7 @@ static struct pernet_operations rtnl_net_debug_net_ops __net_initdata = {
|
||||
};
|
||||
|
||||
static struct notifier_block rtnl_net_debug_block = {
|
||||
.notifier_call = rtnl_net_debug_event,
|
||||
.notifier_call = netdev_debug_event,
|
||||
};
|
||||
|
||||
static int __init rtnl_net_debug_init(void)
|
||||
@@ -3025,8 +3025,6 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
|
||||
char ifname[IFNAMSIZ];
|
||||
int err;
|
||||
|
||||
netdev_lock_ops(dev);
|
||||
|
||||
err = validate_linkmsg(dev, tb, extack);
|
||||
if (err < 0)
|
||||
goto errout;
|
||||
@@ -3042,14 +3040,16 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
|
||||
|
||||
new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0);
|
||||
|
||||
err = netif_change_net_namespace(dev, tgt_net, pat,
|
||||
err = __dev_change_net_namespace(dev, tgt_net, pat,
|
||||
new_ifindex, extack);
|
||||
if (err)
|
||||
goto errout;
|
||||
return err;
|
||||
|
||||
status |= DO_SETLINK_MODIFIED;
|
||||
}
|
||||
|
||||
netdev_lock_ops(dev);
|
||||
|
||||
if (tb[IFLA_MAP]) {
|
||||
struct rtnl_link_ifmap *u_map;
|
||||
struct ifmap k_map;
|
||||
|
||||
@@ -281,7 +281,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
|
||||
if (!in_dev->arp_parms)
|
||||
goto out_kfree;
|
||||
if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
|
||||
dev_disable_lro(dev);
|
||||
netif_disable_lro(dev);
|
||||
/* Reference in_dev->dev */
|
||||
netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
|
||||
/* Account for reference dev->ip_ptr (below) */
|
||||
|
||||
@@ -80,6 +80,7 @@
|
||||
#include <net/netlink.h>
|
||||
#include <net/pkt_sched.h>
|
||||
#include <net/l3mdev.h>
|
||||
#include <net/netdev_lock.h>
|
||||
#include <linux/if_tunnel.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
#include <linux/netconf.h>
|
||||
@@ -377,6 +378,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
|
||||
int err = -ENOMEM;
|
||||
|
||||
ASSERT_RTNL();
|
||||
netdev_ops_assert_locked(dev);
|
||||
|
||||
if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
|
||||
return ERR_PTR(-EINVAL);
|
||||
@@ -402,7 +404,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
if (ndev->cnf.forwarding)
|
||||
dev_disable_lro(dev);
|
||||
netif_disable_lro(dev);
|
||||
/* We refer to the device */
|
||||
netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL);
|
||||
|
||||
@@ -3152,10 +3154,12 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
|
||||
|
||||
rtnl_net_lock(net);
|
||||
dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
|
||||
netdev_lock_ops(dev);
|
||||
if (dev)
|
||||
err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL);
|
||||
else
|
||||
err = -ENODEV;
|
||||
netdev_unlock_ops(dev);
|
||||
rtnl_net_unlock(net);
|
||||
return err;
|
||||
}
|
||||
@@ -5026,9 +5030,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||
if (!dev) {
|
||||
NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
|
||||
err = -ENODEV;
|
||||
goto unlock;
|
||||
goto unlock_rtnl;
|
||||
}
|
||||
|
||||
netdev_lock_ops(dev);
|
||||
idev = ipv6_find_idev(dev);
|
||||
if (IS_ERR(idev)) {
|
||||
err = PTR_ERR(idev);
|
||||
@@ -5065,6 +5070,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
|
||||
|
||||
in6_ifa_put(ifa);
|
||||
unlock:
|
||||
netdev_unlock_ops(dev);
|
||||
unlock_rtnl:
|
||||
rtnl_net_unlock(net);
|
||||
|
||||
return err;
|
||||
@@ -6516,7 +6523,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
|
||||
|
||||
if (idev->cnf.addr_gen_mode != new_val) {
|
||||
WRITE_ONCE(idev->cnf.addr_gen_mode, new_val);
|
||||
netdev_lock_ops(idev->dev);
|
||||
addrconf_init_auto_addrs(idev->dev);
|
||||
netdev_unlock_ops(idev->dev);
|
||||
}
|
||||
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
|
||||
struct net_device *dev;
|
||||
@@ -6528,7 +6537,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
|
||||
idev->cnf.addr_gen_mode != new_val) {
|
||||
WRITE_ONCE(idev->cnf.addr_gen_mode,
|
||||
new_val);
|
||||
netdev_lock_ops(idev->dev);
|
||||
addrconf_init_auto_addrs(idev->dev);
|
||||
netdev_unlock_ops(idev->dev);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -222,6 +222,31 @@ setup_ns()
|
||||
NS_LIST+=("${ns_list[@]}")
|
||||
}
|
||||
|
||||
# Create netdevsim with given id and net namespace.
|
||||
create_netdevsim() {
|
||||
local id="$1"
|
||||
local ns="$2"
|
||||
|
||||
modprobe netdevsim &> /dev/null
|
||||
udevadm settle
|
||||
|
||||
echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null
|
||||
local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net)
|
||||
ip -netns $ns link set dev $dev name nsim$id
|
||||
ip -netns $ns link set dev nsim$id up
|
||||
|
||||
echo nsim$id
|
||||
}
|
||||
|
||||
# Remove netdevsim with given id.
|
||||
cleanup_netdevsim() {
|
||||
local id="$1"
|
||||
|
||||
if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then
|
||||
echo "$id" > /sys/bus/netdevsim/del_device
|
||||
fi
|
||||
}
|
||||
|
||||
tc_rule_stats_get()
|
||||
{
|
||||
local dev=$1; shift
|
||||
|
||||
@@ -7,10 +7,12 @@ set -o pipefail
|
||||
DEV=dummy-dev0
|
||||
DEV2=dummy-dev1
|
||||
ALT_NAME=some-alt-name
|
||||
NSIM_ADDR=2025
|
||||
|
||||
RET_CODE=0
|
||||
|
||||
cleanup() {
|
||||
cleanup_netdevsim $NSIM_ADDR
|
||||
cleanup_ns $NS $test_ns
|
||||
}
|
||||
|
||||
@@ -25,12 +27,15 @@ setup_ns NS test_ns
|
||||
|
||||
#
|
||||
# Test basic move without a rename
|
||||
# Use netdevsim because it has extra asserts for notifiers.
|
||||
#
|
||||
ip -netns $NS link add name $DEV type dummy || fail
|
||||
ip -netns $NS link set dev $DEV netns $test_ns ||
|
||||
|
||||
nsim=$(create_netdevsim $NSIM_ADDR $NS)
|
||||
ip -netns $NS link set dev $nsim netns $test_ns ||
|
||||
fail "Can't perform a netns move"
|
||||
ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move"
|
||||
ip -netns $test_ns link del $DEV || fail
|
||||
ip -netns $test_ns link show dev $nsim >> /dev/null ||
|
||||
fail "Device not found after move"
|
||||
cleanup_netdevsim $NSIM_ADDR
|
||||
|
||||
#
|
||||
# Test move with a conflict
|
||||
|
||||
Reference in New Issue
Block a user