Merge branch 'vxlan-join-leave-mc-group-when-reconfigured'

Petr Machata says:

====================
vxlan: Join / leave MC group when reconfigured

When a vxlan netdevice is brought up, if its default remote is a multicast
address, the device joins the indicated group.

Therefore when the multicast remote address changes, the device should
leave the current group and subscribe to the new one. Similarly when the
interface used for endpoint communication is changed in a situation when
multicast remote is configured. This is currently not done.

Both vxlan_igmp_join() and vxlan_igmp_leave() can however fail. So it is
possible that with such fix, the netdevice will end up in an inconsistent
situation where the old group is not joined anymore, but joining the
new group fails. Should we join the new group first, and leave the old one
second, we might end up in the opposite situation, where both groups are
joined. Undoing any of this during rollback is going to be similarly
problematic.

One solution would be to just forbid the change when the netdevice is up.
However in vnifilter mode, changing the group address is allowed, and these
problems are simply ignored (see vxlan_vni_update_group()):

 # ip link add name br up type bridge vlan_filtering 1
 # ip link add vx1 up master br type vxlan external vnifilter local 192.0.2.1 dev lo dstport 4789
 # bridge vni add dev vx1 vni 200 group 224.0.0.1
 # tcpdump -i lo &
 # bridge vni add dev vx1 vni 200 group 224.0.0.2
 18:55:46.523438 IP 0.0.0.0 > 224.0.0.22: igmp v3 report, 1 group record(s)
 18:55:46.943447 IP 0.0.0.0 > 224.0.0.22: igmp v3 report, 1 group record(s)
 # bridge vni
 dev               vni                group/remote
 vx1               200                224.0.0.2

Having two different modes of operation for conceptually the same interface
is silly, so in this patchset, just do what the vnifilter code does and
deal with the errors by crossing fingers real hard.
====================

Link: https://patch.msgid.link/cover.1739548836.git.petrm@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni
2025-02-18 13:06:50 +01:00
4 changed files with 136 additions and 28 deletions

View File

@@ -3936,7 +3936,7 @@ static void vxlan_config_apply(struct net_device *dev,
}
static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
struct vxlan_config *conf, bool changelink,
struct vxlan_config *conf,
struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -3947,7 +3947,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
if (ret)
return ret;
vxlan_config_apply(dev, conf, lowerdev, src_net, changelink);
vxlan_config_apply(dev, conf, lowerdev, src_net, false);
return 0;
}
@@ -3965,7 +3965,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
int err;
dst = &vxlan->default_dst;
err = vxlan_dev_configure(net, dev, conf, false, extack);
err = vxlan_dev_configure(net, dev, conf, extack);
if (err)
return err;
@@ -4419,6 +4419,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
bool rem_ip_changed, change_igmp;
struct net_device *lowerdev;
struct vxlan_config conf;
struct vxlan_rdst *dst;
@@ -4442,8 +4443,13 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
if (err)
return err;
rem_ip_changed = !vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip);
change_igmp = vxlan->dev->flags & IFF_UP &&
(rem_ip_changed ||
dst->remote_ifindex != conf.remote_ifindex);
/* handle default dst entry */
if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
if (rem_ip_changed) {
u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
spin_lock_bh(&vxlan->hash_lock[hash_index]);
@@ -4487,6 +4493,9 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
}
}
if (change_igmp && vxlan_addr_multicast(&dst->remote_ip))
err = vxlan_multicast_leave(vxlan);
if (conf.age_interval != vxlan->cfg.age_interval)
mod_timer(&vxlan->age_timer, jiffies);
@@ -4494,7 +4503,12 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
if (lowerdev && lowerdev != dst->remote_dev)
dst->remote_dev = lowerdev;
vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
return 0;
if (!err && change_igmp &&
vxlan_addr_multicast(&dst->remote_ip))
err = vxlan_multicast_join(vxlan);
return err;
}
static void vxlan_dellink(struct net_device *dev, struct list_head *head)

View File

@@ -291,16 +291,6 @@ if [[ "$CHECK_TC" = "yes" ]]; then
check_tc_version
fi
require_command()
{
local cmd=$1; shift
if [[ ! -x "$(command -v "$cmd")" ]]; then
echo "SKIP: $cmd not installed"
exit $ksft_skip
fi
}
# IPv6 support was added in v3.0
check_mtools_version()
{

View File

@@ -450,6 +450,25 @@ kill_process()
{ kill $pid && wait $pid; } 2>/dev/null
}
check_command()
{
local cmd=$1; shift
if [[ ! -x "$(command -v "$cmd")" ]]; then
log_test_skip "$cmd not installed"
return $EXIT_STATUS
fi
}
require_command()
{
local cmd=$1; shift
if ! check_command "$cmd"; then
exit $EXIT_STATUS
fi
}
ip_link_add()
{
local name=$1; shift

View File

@@ -1,29 +1,114 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Check FDB default-remote handling across "ip link set".
ALL_TESTS="
test_set_remote
test_change_mc_remote
"
source lib.sh
check_remotes()
{
local what=$1; shift
local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l)
echo -ne "expected two remotes after $what\t"
if [[ $N != 2 ]]; then
echo "[FAIL]"
EXIT_STATUS=1
((N == 2))
check_err $? "expected 2 remotes after $what, got $N"
}
# Check FDB default-remote handling across "ip link set".
test_set_remote()
{
RET=0
ip_link_add vx up type vxlan id 2000 dstport 4789
bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
check_remotes "fdb append"
ip link set dev vx type vxlan remote 192.0.2.30
check_remotes "link set"
log_test 'FDB default-remote handling across "ip link set"'
}
fmt_remote()
{
local addr=$1; shift
if [[ $addr == 224.* ]]; then
echo "group $addr"
else
echo "[ OK ]"
echo "remote $addr"
fi
}
ip link add name vx up type vxlan id 2000 dstport 4789
bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
check_remotes "fdb append"
change_remote()
{
local remote=$1; shift
ip link set dev vx type vxlan remote 192.0.2.30
check_remotes "link set"
ip link set dev vx type vxlan $(fmt_remote $remote) dev v1
}
check_membership()
{
local check_vec=("$@")
local memberships
memberships=$(
netstat -n --groups |
sed -n '/^v1\b/p' |
grep -o '[^ ]*$'
)
check_err $? "Couldn't obtain group memberships"
local item
for item in "${check_vec[@]}"; do
eval "local $item"
echo "$memberships" | grep -q "\b$group\b"
check_err_fail $fail $? "$group is_ex reported in IGMP query response"
done
}
test_change_mc_remote()
{
check_command netstat || return
ip_link_add v1 up type veth peer name v2
ip_link_set_up v2
RET=0
ip_link_add vx up type vxlan dstport 4789 \
local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000
check_membership "group=224.1.1.1 fail=0" \
"group=224.1.1.2 fail=1" \
"group=224.1.1.3 fail=1"
log_test "MC group report after VXLAN creation"
RET=0
change_remote 224.1.1.2
check_membership "group=224.1.1.1 fail=1" \
"group=224.1.1.2 fail=0" \
"group=224.1.1.3 fail=1"
log_test "MC group report after changing VXLAN remote MC->MC"
RET=0
change_remote 192.0.2.2
check_membership "group=224.1.1.1 fail=1" \
"group=224.1.1.2 fail=1" \
"group=224.1.1.3 fail=1"
log_test "MC group report after changing VXLAN remote MC->UC"
}
trap defer_scopes_cleanup EXIT
tests_run
ip link del dev vx
exit $EXIT_STATUS