Merge branch 'udp_tunnel-remove-rtnl_lock-dependency'

Stanislav Fomichev says:

====================
udp_tunnel: remove rtnl_lock dependency

Recently bnxt had to grow back a bunch of rtnl dependencies because
of udp_tunnel's infra. Add separate (global) mutext to protect
udp_tunnel state.
====================

Link: https://patch.msgid.link/20250616162117.287806-1-stfomichev@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-06-18 18:53:53 -07:00
22 changed files with 176 additions and 172 deletions

View File

@@ -10219,8 +10219,7 @@ static int bnx2x_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
static const struct udp_tunnel_nic_info bnx2x_udp_tunnels = {
.sync_table = bnx2x_udp_tunnel_sync,
.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },

View File

@@ -14055,28 +14055,13 @@ static void bnxt_unlock_sp(struct bnxt *bp)
netdev_unlock(bp->dev);
}
/* Same as bnxt_lock_sp() with additional rtnl_lock */
static void bnxt_rtnl_lock_sp(struct bnxt *bp)
{
clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
rtnl_lock();
netdev_lock(bp->dev);
}
static void bnxt_rtnl_unlock_sp(struct bnxt *bp)
{
set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
netdev_unlock(bp->dev);
rtnl_unlock();
}
/* Only called from bnxt_sp_task() */
static void bnxt_reset(struct bnxt *bp, bool silent)
{
bnxt_rtnl_lock_sp(bp);
bnxt_lock_sp(bp);
if (test_bit(BNXT_STATE_OPEN, &bp->state))
bnxt_reset_task(bp, silent);
bnxt_rtnl_unlock_sp(bp);
bnxt_unlock_sp(bp);
}
/* Only called from bnxt_sp_task() */
@@ -14084,9 +14069,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
{
int i;
bnxt_rtnl_lock_sp(bp);
bnxt_lock_sp(bp);
if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
bnxt_rtnl_unlock_sp(bp);
bnxt_unlock_sp(bp);
return;
}
/* Disable and flush TPA before resetting the RX ring */
@@ -14125,7 +14110,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
}
if (bp->flags & BNXT_FLAG_TPA)
bnxt_set_tpa(bp, true);
bnxt_rtnl_unlock_sp(bp);
bnxt_unlock_sp(bp);
}
static void bnxt_fw_fatal_close(struct bnxt *bp)
@@ -15017,17 +15002,15 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING;
fallthrough;
case BNXT_FW_RESET_STATE_OPENING:
while (!rtnl_trylock()) {
while (!netdev_trylock(bp->dev)) {
bnxt_queue_fw_reset_work(bp, HZ / 10);
return;
}
netdev_lock(bp->dev);
rc = bnxt_open(bp->dev);
if (rc) {
netdev_err(bp->dev, "bnxt_open() failed during FW reset\n");
bnxt_fw_reset_abort(bp, rc);
netdev_unlock(bp->dev);
rtnl_unlock();
goto ulp_start;
}
@@ -15047,7 +15030,6 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bnxt_dl_health_fw_status_update(bp, true);
}
netdev_unlock(bp->dev);
rtnl_unlock();
bnxt_ulp_start(bp, 0);
bnxt_reenable_sriov(bp);
netdev_lock(bp->dev);
@@ -15573,8 +15555,7 @@ static int bnxt_udp_tunnel_unset_port(struct net_device *netdev, unsigned int ta
static const struct udp_tunnel_nic_info bnxt_udp_tunnels = {
.set_port = bnxt_udp_tunnel_set_port,
.unset_port = bnxt_udp_tunnel_unset_port,
.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
@@ -15582,8 +15563,7 @@ static const struct udp_tunnel_nic_info bnxt_udp_tunnels = {
}, bnxt_udp_tunnels_p7 = {
.set_port = bnxt_udp_tunnel_set_port,
.unset_port = bnxt_udp_tunnel_unset_port,
.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
@@ -15998,7 +15978,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
rc);
napi_enable_locked(&bnapi->napi);
bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
netif_close(dev);
bnxt_reset_task(bp, true);
return rc;
}
@@ -16814,7 +16794,6 @@ static int bnxt_resume(struct device *device)
struct bnxt *bp = netdev_priv(dev);
int rc = 0;
rtnl_lock();
netdev_lock(dev);
rc = pci_enable_device(bp->pdev);
if (rc) {
@@ -16859,7 +16838,6 @@ static int bnxt_resume(struct device *device)
resume_exit:
netdev_unlock(bp->dev);
rtnl_unlock();
bnxt_ulp_start(bp, rc);
if (!rc)
bnxt_reenable_sriov(bp);
@@ -17025,7 +17003,6 @@ static void bnxt_io_resume(struct pci_dev *pdev)
int err;
netdev_info(bp->dev, "PCI Slot Resume\n");
rtnl_lock();
netdev_lock(netdev);
err = bnxt_hwrm_func_qcaps(bp);
@@ -17043,7 +17020,6 @@ static void bnxt_io_resume(struct pci_dev *pdev)
netif_device_attach(netdev);
netdev_unlock(netdev);
rtnl_unlock();
bnxt_ulp_start(bp, err);
if (!err)
bnxt_reenable_sriov(bp);

View File

@@ -4031,8 +4031,7 @@ static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table,
static const struct udp_tunnel_nic_info be_udp_tunnels = {
.set_port = be_vxlan_set_port,
.unset_port = be_vxlan_unset_port,
.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
},

View File

@@ -15895,7 +15895,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port;
pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port;
pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared;
pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS;
pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN |

View File

@@ -4767,7 +4767,6 @@ int ice_init_dev(struct ice_pf *pf)
pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port;
pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port;
pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared;
if (pf->hw.tnl.valid_count[TNL_VXLAN]) {
pf->hw.udp_tunnel_nic.tables[0].n_entries =

View File

@@ -2670,8 +2670,7 @@ static int mlx4_udp_tunnel_sync(struct net_device *dev, unsigned int table)
static const struct udp_tunnel_nic_info mlx4_udp_tunnels = {
.sync_table = mlx4_udp_tunnel_sync,
.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
UDP_TUNNEL_NIC_INFO_IPV4_ONLY,
.flags = UDP_TUNNEL_NIC_INFO_IPV4_ONLY,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
},

View File

@@ -5351,8 +5351,7 @@ void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv)
priv->nic_info.set_port = mlx5e_vxlan_set_port;
priv->nic_info.unset_port = mlx5e_vxlan_unset_port;
priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN;
priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN;
priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN;
/* Don't count the space hard-coded to the IANA port */
priv->nic_info.tables[0].n_entries =

View File

@@ -2394,8 +2394,7 @@ static int nfp_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
static const struct udp_tunnel_nic_info nfp_udp_tunnels = {
.sync_table = nfp_udp_tunnel_sync,
.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.flags = UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.tables = {
{
.n_entries = NFP_NET_N_VXLAN_PORTS,

View File

@@ -987,20 +987,17 @@ static int qede_udp_tunnel_sync(struct net_device *dev, unsigned int table)
static const struct udp_tunnel_nic_info qede_udp_tunnels_both = {
.sync_table = qede_udp_tunnel_sync,
.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
},
}, qede_udp_tunnels_vxlan = {
.sync_table = qede_udp_tunnel_sync,
.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
},
}, qede_udp_tunnels_geneve = {
.sync_table = qede_udp_tunnel_sync,
.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
},

View File

@@ -486,7 +486,6 @@ static int qlcnic_udp_tunnel_sync(struct net_device *dev, unsigned int table)
static const struct udp_tunnel_nic_info qlcnic_udp_tunnels = {
.sync_table = qlcnic_udp_tunnel_sync,
.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP,
.tables = {
{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
},

View File

@@ -3985,7 +3985,6 @@ static int efx_ef10_udp_tnl_unset_port(struct net_device *dev,
static const struct udp_tunnel_nic_info efx_ef10_udp_tunnels = {
.set_port = efx_ef10_udp_tnl_set_port,
.unset_port = efx_ef10_udp_tnl_unset_port,
.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP,
.tables = {
{
.n_entries = 16,

View File

@@ -41,6 +41,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
/* per-network namespace private data for this module */
struct geneve_net {
struct list_head geneve_list;
/* sock_list is protected by rtnl lock */
struct list_head sock_list;
};
@@ -1180,8 +1181,9 @@ static void geneve_offload_rx_ports(struct net_device *dev, bool push)
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_sock *gs;
rcu_read_lock();
list_for_each_entry_rcu(gs, &gn->sock_list, list) {
ASSERT_RTNL();
list_for_each_entry(gs, &gn->sock_list, list) {
if (push) {
udp_tunnel_push_rx_port(dev, gs->sock,
UDP_TUNNEL_TYPE_GENEVE);
@@ -1190,7 +1192,6 @@ static void geneve_offload_rx_ports(struct net_device *dev, bool push)
UDP_TUNNEL_TYPE_GENEVE);
}
}
rcu_read_unlock();
}
/* Initialize the device structure. */

View File

@@ -131,7 +131,6 @@ struct netdevsim {
struct nsim_macsec macsec;
struct {
u32 inject_error;
u32 sleep;
u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS];
u32 (*ports)[NSIM_UDP_TUNNEL_N_PORTS];
struct dentry *ddir;
@@ -342,7 +341,6 @@ struct nsim_dev {
bool ipv4_only;
bool shared;
bool static_iana_vxlan;
u32 sleep;
} udp_ports;
struct nsim_dev_psample *psample;
u16 esw_mode;

View File

@@ -18,9 +18,6 @@ nsim_udp_tunnel_set_port(struct net_device *dev, unsigned int table,
ret = -ns->udp_ports.inject_error;
ns->udp_ports.inject_error = 0;
if (ns->udp_ports.sleep)
msleep(ns->udp_ports.sleep);
if (!ret) {
if (ns->udp_ports.ports[table][entry]) {
WARN(1, "entry already in use\n");
@@ -47,8 +44,6 @@ nsim_udp_tunnel_unset_port(struct net_device *dev, unsigned int table,
ret = -ns->udp_ports.inject_error;
ns->udp_ports.inject_error = 0;
if (ns->udp_ports.sleep)
msleep(ns->udp_ports.sleep);
if (!ret) {
u32 val = be16_to_cpu(ti->port) << 16 | ti->type;
@@ -112,12 +107,10 @@ nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data,
struct net_device *dev = file->private_data;
struct netdevsim *ns = netdev_priv(dev);
rtnl_lock();
if (dev->reg_state == NETREG_REGISTERED) {
memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports));
udp_tunnel_nic_reset_ntf(dev);
}
rtnl_unlock();
return count;
}
@@ -172,7 +165,6 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev,
GFP_KERNEL);
if (!info)
return -ENOMEM;
ns->udp_ports.sleep = nsim_dev->udp_ports.sleep;
if (nsim_dev->udp_ports.sync_all) {
info->set_port = NULL;
@@ -181,8 +173,6 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev,
info->sync_table = NULL;
}
if (ns->udp_ports.sleep)
info->flags |= UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
if (nsim_dev->udp_ports.open_only)
info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY;
if (nsim_dev->udp_ports.ipv4_only)
@@ -217,6 +207,4 @@ void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev)
&nsim_dev->udp_ports.shared);
debugfs_create_bool("udp_ports_static_iana_vxlan", 0600, nsim_dev->ddir,
&nsim_dev->udp_ports.static_iana_vxlan);
debugfs_create_u32("udp_ports_sleep", 0600, nsim_dev->ddir,
&nsim_dev->udp_ports.sleep);
}

View File

@@ -1485,21 +1485,18 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)
{
struct vxlan_net *vn;
ASSERT_RTNL();
if (!vs)
return false;
if (!refcount_dec_and_test(&vs->refcnt))
return false;
vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
spin_lock(&vn->sock_lock);
hlist_del_rcu(&vs->hlist);
udp_tunnel_notify_del_rx_port(vs->sock,
(vs->flags & VXLAN_F_GPE) ?
UDP_TUNNEL_TYPE_VXLAN_GPE :
UDP_TUNNEL_TYPE_VXLAN);
spin_unlock(&vn->sock_lock);
return true;
}
@@ -2857,26 +2854,23 @@ static void vxlan_cleanup(struct timer_list *t)
static void vxlan_vs_del_dev(struct vxlan_dev *vxlan)
{
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
ASSERT_RTNL();
spin_lock(&vn->sock_lock);
hlist_del_init_rcu(&vxlan->hlist4.hlist);
#if IS_ENABLED(CONFIG_IPV6)
hlist_del_init_rcu(&vxlan->hlist6.hlist);
#endif
spin_unlock(&vn->sock_lock);
}
static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan,
struct vxlan_dev_node *node)
{
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
__be32 vni = vxlan->default_dst.remote_vni;
ASSERT_RTNL();
node->vxlan = vxlan;
spin_lock(&vn->sock_lock);
hlist_add_head_rcu(&node->hlist, vni_head(vs, vni));
spin_unlock(&vn->sock_lock);
}
/* Setup stats when device is created */
@@ -3301,9 +3295,10 @@ static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
unsigned int i;
spin_lock(&vn->sock_lock);
ASSERT_RTNL();
for (i = 0; i < PORT_HASH_SIZE; ++i) {
hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
hlist_for_each_entry(vs, &vn->sock_list[i], hlist) {
unsigned short type;
if (vs->flags & VXLAN_F_GPE)
@@ -3317,7 +3312,6 @@ static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
udp_tunnel_drop_rx_port(dev, vs->sock, type);
}
}
spin_unlock(&vn->sock_lock);
}
/* Initialize the device structure. */
@@ -3548,12 +3542,13 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
__be16 port, u32 flags,
int ifindex)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
struct socket *sock;
unsigned int h;
struct udp_tunnel_sock_cfg tunnel_cfg;
ASSERT_RTNL();
vs = kzalloc(sizeof(*vs), GFP_KERNEL);
if (!vs)
return ERR_PTR(-ENOMEM);
@@ -3571,13 +3566,11 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
refcount_set(&vs->refcnt, 1);
vs->flags = (flags & VXLAN_F_RCV_FLAGS);
spin_lock(&vn->sock_lock);
hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
udp_tunnel_notify_add_rx_port(sock,
(vs->flags & VXLAN_F_GPE) ?
UDP_TUNNEL_TYPE_VXLAN_GPE :
UDP_TUNNEL_TYPE_VXLAN);
spin_unlock(&vn->sock_lock);
/* Mark socket as an encapsulation socket. */
memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
@@ -3601,26 +3594,27 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
{
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA;
struct vxlan_sock *vs = NULL;
struct vxlan_dev_node *node;
int l3mdev_index = 0;
ASSERT_RTNL();
if (vxlan->cfg.remote_ifindex)
l3mdev_index = l3mdev_master_upper_ifindex_by_index(
vxlan->net, vxlan->cfg.remote_ifindex);
if (!vxlan->cfg.no_share) {
spin_lock(&vn->sock_lock);
rcu_read_lock();
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
vxlan->cfg.dst_port, vxlan->cfg.flags,
l3mdev_index);
if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
spin_unlock(&vn->sock_lock);
rcu_read_unlock();
return -EBUSY;
}
spin_unlock(&vn->sock_lock);
rcu_read_unlock();
}
if (!vs)
vs = vxlan_socket_create(vxlan->net, ipv6,
@@ -4894,7 +4888,6 @@ static __net_init int vxlan_init_net(struct net *net)
unsigned int h;
INIT_LIST_HEAD(&vn->vxlan_list);
spin_lock_init(&vn->sock_lock);
vn->nexthop_notifier_block.notifier_call = vxlan_nexthop_event;
for (h = 0; h < PORT_HASH_SIZE; ++h)

View File

@@ -19,8 +19,8 @@ extern const struct rhashtable_params vxlan_vni_rht_params;
/* per-network namespace private data for this module */
struct vxlan_net {
struct list_head vxlan_list;
/* sock_list is protected by rtnl lock */
struct hlist_head sock_list[PORT_HASH_SIZE];
spinlock_t sock_lock;
struct notifier_block nexthop_notifier_block;
};

View File

@@ -40,11 +40,11 @@ static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan,
struct vxlan_vni_node *v,
bool del)
{
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_dev_node *node;
struct vxlan_sock *vs;
spin_lock(&vn->sock_lock);
ASSERT_RTNL();
if (del) {
if (!hlist_unhashed(&v->hlist4.hlist))
hlist_del_init_rcu(&v->hlist4.hlist);
@@ -52,7 +52,7 @@ static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan,
if (!hlist_unhashed(&v->hlist6.hlist))
hlist_del_init_rcu(&v->hlist6.hlist);
#endif
goto out;
return;
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -67,23 +67,21 @@ static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan,
node = &v->hlist4;
hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
}
out:
spin_unlock(&vn->sock_lock);
}
void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan,
struct vxlan_sock *vs,
bool ipv6)
{
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
struct vxlan_vni_node *v, *tmp;
struct vxlan_dev_node *node;
ASSERT_RTNL();
if (!vg)
return;
spin_lock(&vn->sock_lock);
list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
#if IS_ENABLED(CONFIG_IPV6)
if (ipv6)
@@ -94,26 +92,24 @@ void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan,
node->vxlan = vxlan;
hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
}
spin_unlock(&vn->sock_lock);
}
void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan)
{
struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_vni_node *v, *tmp;
ASSERT_RTNL();
if (!vg)
return;
spin_lock(&vn->sock_lock);
list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
hlist_del_init_rcu(&v->hlist4.hlist);
#if IS_ENABLED(CONFIG_IPV6)
hlist_del_init_rcu(&v->hlist6.hlist);
#endif
}
spin_unlock(&vn->sock_lock);
}
static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode,

View File

@@ -130,22 +130,6 @@ void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type);
void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type);
static inline void udp_tunnel_get_rx_info(struct net_device *dev)
{
ASSERT_RTNL();
if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
return;
call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev);
}
static inline void udp_tunnel_drop_rx_info(struct net_device *dev)
{
ASSERT_RTNL();
if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
return;
call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev);
}
/* Transmit the skb using UDP encapsulation. */
void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl,
@@ -222,19 +206,17 @@ static inline void udp_tunnel_encap_enable(struct sock *sk)
#define UDP_TUNNEL_NIC_MAX_TABLES 4
enum udp_tunnel_nic_info_flags {
/* Device callbacks may sleep */
UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0),
/* Device only supports offloads when it's open, all ports
* will be removed before close and re-added after open.
*/
UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1),
UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(0),
/* Device supports only IPv4 tunnels */
UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2),
UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(1),
/* Device has hard-coded the IANA VXLAN port (4789) as VXLAN.
* This port must not be counted towards n_entries of any table.
* Driver will not receive any callback associated with port 4789.
*/
UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3),
UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(2),
};
struct udp_tunnel_nic;
@@ -325,6 +307,9 @@ struct udp_tunnel_nic_ops {
size_t (*dump_size)(struct net_device *dev, unsigned int table);
int (*dump_write)(struct net_device *dev, unsigned int table,
struct sk_buff *skb);
void (*assert_locked)(struct net_device *dev);
void (*lock)(struct net_device *dev);
void (*unlock)(struct net_device *dev);
};
#ifdef CONFIG_INET
@@ -353,8 +338,29 @@ static inline void
udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table,
unsigned int idx, u8 priv)
{
if (udp_tunnel_nic_ops)
if (udp_tunnel_nic_ops) {
udp_tunnel_nic_ops->lock(dev);
udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv);
udp_tunnel_nic_ops->unlock(dev);
}
}
static inline void udp_tunnel_nic_assert_locked(struct net_device *dev)
{
if (udp_tunnel_nic_ops)
udp_tunnel_nic_ops->assert_locked(dev);
}
static inline void udp_tunnel_nic_lock(struct net_device *dev)
{
if (udp_tunnel_nic_ops)
udp_tunnel_nic_ops->lock(dev);
}
static inline void udp_tunnel_nic_unlock(struct net_device *dev)
{
if (udp_tunnel_nic_ops)
udp_tunnel_nic_ops->unlock(dev);
}
static inline void
@@ -396,17 +402,50 @@ static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev)
static inline size_t
udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table)
{
size_t ret;
if (!udp_tunnel_nic_ops)
return 0;
return udp_tunnel_nic_ops->dump_size(dev, table);
udp_tunnel_nic_ops->lock(dev);
ret = udp_tunnel_nic_ops->dump_size(dev, table);
udp_tunnel_nic_ops->unlock(dev);
return ret;
}
static inline int
udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table,
struct sk_buff *skb)
{
int ret;
if (!udp_tunnel_nic_ops)
return 0;
return udp_tunnel_nic_ops->dump_write(dev, table, skb);
udp_tunnel_nic_ops->lock(dev);
ret = udp_tunnel_nic_ops->dump_write(dev, table, skb);
udp_tunnel_nic_ops->unlock(dev);
return ret;
}
static inline void udp_tunnel_get_rx_info(struct net_device *dev)
{
ASSERT_RTNL();
if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
return;
udp_tunnel_nic_assert_locked(dev);
call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev);
}
static inline void udp_tunnel_drop_rx_info(struct net_device *dev)
{
ASSERT_RTNL();
if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
return;
udp_tunnel_nic_assert_locked(dev);
call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev);
}
#endif

View File

@@ -3179,7 +3179,6 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
if (dev->reg_state == NETREG_REGISTERED ||
dev->reg_state == NETREG_UNREGISTERING) {
ASSERT_RTNL();
netdev_ops_assert_locked(dev);
rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
@@ -3229,7 +3228,6 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
return -EINVAL;
if (dev->reg_state == NETREG_REGISTERED) {
ASSERT_RTNL();
netdev_ops_assert_locked(dev);
rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
@@ -10771,12 +10769,14 @@ int __netdev_update_features(struct net_device *dev)
* *before* calling udp_tunnel_get_rx_info,
* but *after* calling udp_tunnel_drop_rx_info.
*/
udp_tunnel_nic_lock(dev);
if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
dev->features = features;
udp_tunnel_get_rx_info(dev);
} else {
udp_tunnel_drop_rx_info(dev);
}
udp_tunnel_nic_unlock(dev);
}
if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {

View File

@@ -134,15 +134,17 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
struct udp_tunnel_info ti;
struct net_device *dev;
ASSERT_RTNL();
ti.type = type;
ti.sa_family = sk->sk_family;
ti.port = inet_sk(sk)->inet_sport;
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
for_each_netdev(net, dev) {
udp_tunnel_nic_lock(dev);
udp_tunnel_nic_add_port(dev, &ti);
udp_tunnel_nic_unlock(dev);
}
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port);
@@ -154,15 +156,17 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type)
struct udp_tunnel_info ti;
struct net_device *dev;
ASSERT_RTNL();
ti.type = type;
ti.sa_family = sk->sk_family;
ti.port = inet_sk(sk)->inet_sport;
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
for_each_netdev(net, dev) {
udp_tunnel_nic_lock(dev);
udp_tunnel_nic_del_port(dev, &ti);
udp_tunnel_nic_unlock(dev);
}
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(udp_tunnel_notify_del_rx_port);

View File

@@ -29,6 +29,7 @@ struct udp_tunnel_nic_table_entry {
* struct udp_tunnel_nic - UDP tunnel port offload state
* @work: async work for talking to hardware from process context
* @dev: netdev pointer
* @lock: protects all fields
* @need_sync: at least one port start changed
* @need_replay: space was freed, we need a replay of all ports
* @work_pending: @work is currently scheduled
@@ -41,6 +42,8 @@ struct udp_tunnel_nic {
struct net_device *dev;
struct mutex lock;
u8 need_sync:1;
u8 need_replay:1;
u8 work_pending:1;
@@ -298,22 +301,11 @@ __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
static void
udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
{
const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
bool may_sleep;
if (!utn->need_sync)
return;
/* Drivers which sleep in the callback need to update from
* the workqueue, if we come from the tunnel driver's notification.
*/
may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
if (!may_sleep)
__udp_tunnel_nic_device_sync(dev, utn);
if (may_sleep || utn->need_replay) {
queue_work(udp_tunnel_nic_workqueue, &utn->work);
utn->work_pending = 1;
}
queue_work(udp_tunnel_nic_workqueue, &utn->work);
utn->work_pending = 1;
}
static bool
@@ -554,12 +546,12 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
struct udp_tunnel_nic *utn;
unsigned int i, j;
ASSERT_RTNL();
utn = dev->udp_tunnel_nic;
if (!utn)
return;
mutex_lock(&utn->lock);
utn->need_sync = false;
for (i = 0; i < utn->n_tables; i++)
for (j = 0; j < info->tables[i].n_entries; j++) {
@@ -569,7 +561,7 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL |
UDP_TUNNEL_NIC_ENTRY_OP_FAIL);
/* We don't release rtnl across ops */
/* We don't release utn lock across ops */
WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN);
if (!entry->use_cnt)
continue;
@@ -579,6 +571,8 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
}
__udp_tunnel_nic_device_sync(dev, utn);
mutex_unlock(&utn->lock);
}
static size_t
@@ -643,6 +637,33 @@ __udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table,
return -EMSGSIZE;
}
static void __udp_tunnel_nic_assert_locked(struct net_device *dev)
{
struct udp_tunnel_nic *utn;
utn = dev->udp_tunnel_nic;
if (utn)
lockdep_assert_held(&utn->lock);
}
static void __udp_tunnel_nic_lock(struct net_device *dev)
{
struct udp_tunnel_nic *utn;
utn = dev->udp_tunnel_nic;
if (utn)
mutex_lock(&utn->lock);
}
static void __udp_tunnel_nic_unlock(struct net_device *dev)
{
struct udp_tunnel_nic *utn;
utn = dev->udp_tunnel_nic;
if (utn)
mutex_unlock(&utn->lock);
}
static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = {
.get_port = __udp_tunnel_nic_get_port,
.set_port_priv = __udp_tunnel_nic_set_port_priv,
@@ -651,6 +672,9 @@ static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = {
.reset_ntf = __udp_tunnel_nic_reset_ntf,
.dump_size = __udp_tunnel_nic_dump_size,
.dump_write = __udp_tunnel_nic_dump_write,
.assert_locked = __udp_tunnel_nic_assert_locked,
.lock = __udp_tunnel_nic_lock,
.unlock = __udp_tunnel_nic_unlock,
};
static void
@@ -710,11 +734,15 @@ static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
container_of(work, struct udp_tunnel_nic, work);
rtnl_lock();
mutex_lock(&utn->lock);
utn->work_pending = 0;
__udp_tunnel_nic_device_sync(utn->dev, utn);
if (utn->need_replay)
udp_tunnel_nic_replay(utn->dev, utn);
mutex_unlock(&utn->lock);
rtnl_unlock();
}
@@ -730,6 +758,7 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
return NULL;
utn->n_tables = n_tables;
INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work);
mutex_init(&utn->lock);
for (i = 0; i < n_tables; i++) {
utn->entries[i] = kcalloc(info->tables[i].n_entries,
@@ -821,8 +850,11 @@ static int udp_tunnel_nic_register(struct net_device *dev)
dev_hold(dev);
dev->udp_tunnel_nic = utn;
if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) {
udp_tunnel_nic_lock(dev);
udp_tunnel_get_rx_info(dev);
udp_tunnel_nic_unlock(dev);
}
return 0;
}
@@ -832,6 +864,8 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
{
const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
udp_tunnel_nic_lock(dev);
/* For a shared table remove this dev from the list of sharing devices
* and if there are other devices just detach.
*/
@@ -841,8 +875,10 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
list_for_each_entry(node, &info->shared->devices, list)
if (node->dev == dev)
break;
if (list_entry_is_head(node, &info->shared->devices, list))
if (list_entry_is_head(node, &info->shared->devices, list)) {
udp_tunnel_nic_unlock(dev);
return;
}
list_del(&node->list);
kfree(node);
@@ -852,6 +888,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
if (first) {
udp_tunnel_drop_rx_info(dev);
utn->dev = first->dev;
udp_tunnel_nic_unlock(dev);
goto release_dev;
}
@@ -862,6 +899,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
* from the work which we will boot immediately.
*/
udp_tunnel_nic_flush(dev, utn);
udp_tunnel_nic_unlock(dev);
/* Wait for the work to be done using the state, netdev core will
* retry unregister until we give up our reference on this device.
@@ -910,12 +948,16 @@ udp_tunnel_nic_netdevice_event(struct notifier_block *unused,
return NOTIFY_DONE;
if (event == NETDEV_UP) {
udp_tunnel_nic_lock(dev);
WARN_ON(!udp_tunnel_nic_is_empty(dev, utn));
udp_tunnel_get_rx_info(dev);
udp_tunnel_nic_unlock(dev);
return NOTIFY_OK;
}
if (event == NETDEV_GOING_DOWN) {
udp_tunnel_nic_lock(dev);
udp_tunnel_nic_flush(dev, utn);
udp_tunnel_nic_unlock(dev);
return NOTIFY_OK;
}

View File

@@ -266,7 +266,6 @@ for port in 0 1; do
echo $NSIM_ID > /sys/bus/netdevsim/new_device
else
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_SYS/new_port
fi
NSIM_NETDEV=`get_netdev_name old_netdevs`
@@ -350,23 +349,11 @@ old_netdevs=$(ls /sys/class/net)
port=0
echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep
echo 0 > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
msg="create VxLANs"
exp0=( 0 0 0 0 ) # sleep is longer than out wait
new_vxlan vxlan0 10000 $NSIM_NETDEV
modprobe -r vxlan
modprobe -r udp_tunnel
msg="remove tunnels"
exp0=( 0 0 0 0 )
check_tables
msg="create VxLANs"
exp0=( 0 0 0 0 ) # sleep is longer than out wait
exp0=( `mke 10000 1` 0 0 0 )
new_vxlan vxlan0 10000 $NSIM_NETDEV
exp0=( 0 0 0 0 )
@@ -428,7 +415,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -486,7 +472,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -543,7 +528,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -573,7 +557,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -634,7 +617,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -690,7 +672,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -750,7 +731,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -809,7 +789,6 @@ echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
echo 0 > $NSIM_DEV_DFS/udp_ports_open_only
echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_DFS/udp_ports_shared
old_netdevs=$(ls /sys/class/net)