mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-07 20:58:14 -04:00
Merge branch 'vxlan-fix-npds-when-using-nexthop-objects'
Ido Schimmel says:
====================
vxlan: Fix NPDs when using nexthop objects
With FDB nexthop groups, VXLAN FDB entries do not necessarily point to
a remote destination but rather to an FDB nexthop group. This means that
first_remote_{rcu,rtnl}() can return NULL and a few places in the driver
were not ready for that, resulting in NULL pointer dereferences.
Patches #1-#2 fix these NPDs.
Note that vxlan_fdb_find_uc() still dereferences the remote returned by
first_remote_rcu() without checking that it is not NULL, but this
function is only invoked by a single driver which vetoes the creation of
FDB nexthop groups. I will patch this in net-next to make the code less
fragile.
Patch #3 adds a selftests which exercises these code paths and tests
basic Tx functionality with FDB nexthop groups. I verified that the test
crashes the kernel without the first two patches.
====================
Link: https://patch.msgid.link/20250901065035.159644-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -1445,6 +1445,10 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
|
||||
if (READ_ONCE(f->updated) != now)
|
||||
WRITE_ONCE(f->updated, now);
|
||||
|
||||
/* Don't override an fdb with nexthop with a learnt entry */
|
||||
if (rcu_access_pointer(f->nh))
|
||||
return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;
|
||||
|
||||
if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
|
||||
rdst->remote_ifindex == ifindex))
|
||||
return SKB_NOT_DROPPED_YET;
|
||||
@@ -1453,10 +1457,6 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
|
||||
if (f->state & (NUD_PERMANENT | NUD_NOARP))
|
||||
return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;
|
||||
|
||||
/* Don't override an fdb with nexthop with a learnt entry */
|
||||
if (rcu_access_pointer(f->nh))
|
||||
return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;
|
||||
|
||||
if (net_ratelimit())
|
||||
netdev_info(dev,
|
||||
"%pM migrated from %pIS to %pIS\n",
|
||||
@@ -1877,6 +1877,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
|
||||
n = neigh_lookup(&arp_tbl, &tip, dev);
|
||||
|
||||
if (n) {
|
||||
struct vxlan_rdst *rdst = NULL;
|
||||
struct vxlan_fdb *f;
|
||||
struct sk_buff *reply;
|
||||
|
||||
@@ -1887,7 +1888,9 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
|
||||
|
||||
rcu_read_lock();
|
||||
f = vxlan_find_mac_tx(vxlan, n->ha, vni);
|
||||
if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
|
||||
if (f)
|
||||
rdst = first_remote_rcu(f);
|
||||
if (rdst && vxlan_addr_any(&rdst->remote_ip)) {
|
||||
/* bridge-local neighbor */
|
||||
neigh_release(n);
|
||||
rcu_read_unlock();
|
||||
@@ -2044,6 +2047,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
|
||||
n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev);
|
||||
|
||||
if (n) {
|
||||
struct vxlan_rdst *rdst = NULL;
|
||||
struct vxlan_fdb *f;
|
||||
struct sk_buff *reply;
|
||||
|
||||
@@ -2053,7 +2057,9 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
|
||||
}
|
||||
|
||||
f = vxlan_find_mac_tx(vxlan, n->ha, vni);
|
||||
if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
|
||||
if (f)
|
||||
rdst = first_remote_rcu(f);
|
||||
if (rdst && vxlan_addr_any(&rdst->remote_ip)) {
|
||||
/* bridge-local neighbor */
|
||||
neigh_release(n);
|
||||
goto out;
|
||||
|
||||
@@ -61,9 +61,7 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port)
|
||||
return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
|
||||
}
|
||||
|
||||
/* First remote destination for a forwarding entry.
|
||||
* Guaranteed to be non-NULL because remotes are never deleted.
|
||||
*/
|
||||
/* First remote destination for a forwarding entry. */
|
||||
static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
|
||||
{
|
||||
if (rcu_access_pointer(fdb->nh))
|
||||
|
||||
@@ -99,6 +99,7 @@ TEST_GEN_PROGS += bind_wildcard
|
||||
TEST_GEN_PROGS += bind_timewait
|
||||
TEST_PROGS += test_vxlan_mdb.sh
|
||||
TEST_PROGS += test_bridge_neigh_suppress.sh
|
||||
TEST_PROGS += test_vxlan_nh.sh
|
||||
TEST_PROGS += test_vxlan_nolocalbypass.sh
|
||||
TEST_PROGS += test_bridge_backup_port.sh
|
||||
TEST_PROGS += test_neigh.sh
|
||||
|
||||
223
tools/testing/selftests/net/test_vxlan_nh.sh
Executable file
223
tools/testing/selftests/net/test_vxlan_nh.sh
Executable file
@@ -0,0 +1,223 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
source lib.sh
|
||||
TESTS="
|
||||
basic_tx_ipv4
|
||||
basic_tx_ipv6
|
||||
learning
|
||||
proxy_ipv4
|
||||
proxy_ipv6
|
||||
"
|
||||
VERBOSE=0
|
||||
|
||||
################################################################################
|
||||
# Utilities
|
||||
|
||||
run_cmd()
|
||||
{
|
||||
local cmd="$1"
|
||||
local out
|
||||
local stderr="2>/dev/null"
|
||||
|
||||
if [ "$VERBOSE" = "1" ]; then
|
||||
echo "COMMAND: $cmd"
|
||||
stderr=
|
||||
fi
|
||||
|
||||
out=$(eval "$cmd" "$stderr")
|
||||
rc=$?
|
||||
if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
|
||||
echo " $out"
|
||||
fi
|
||||
|
||||
return $rc
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Cleanup
|
||||
|
||||
exit_cleanup_all()
|
||||
{
|
||||
cleanup_all_ns
|
||||
exit "${EXIT_STATUS}"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Tests
|
||||
|
||||
nh_stats_get()
|
||||
{
|
||||
ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]"
|
||||
}
|
||||
|
||||
tc_stats_get()
|
||||
{
|
||||
tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1"
|
||||
}
|
||||
|
||||
basic_tx_common()
|
||||
{
|
||||
local af_str=$1; shift
|
||||
local proto=$1; shift
|
||||
local local_addr=$1; shift
|
||||
local plen=$1; shift
|
||||
local remote_addr=$1; shift
|
||||
|
||||
RET=0
|
||||
|
||||
# Test basic Tx functionality. Check that stats are incremented on
|
||||
# both the FDB nexthop group and the egress device.
|
||||
|
||||
run_cmd "ip -n $ns1 link add name dummy1 up type dummy"
|
||||
run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1"
|
||||
run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact"
|
||||
run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass"
|
||||
|
||||
run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
|
||||
|
||||
run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
|
||||
run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
|
||||
|
||||
run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789"
|
||||
run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
|
||||
|
||||
run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q"
|
||||
|
||||
busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null
|
||||
check_err $? "FDB nexthop group stats did not increase"
|
||||
|
||||
busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null
|
||||
check_err $? "tc filter stats did not increase"
|
||||
|
||||
log_test "VXLAN FDB nexthop: $af_str basic Tx"
|
||||
}
|
||||
|
||||
basic_tx_ipv4()
|
||||
{
|
||||
basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2
|
||||
}
|
||||
|
||||
basic_tx_ipv6()
|
||||
{
|
||||
basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2
|
||||
}
|
||||
|
||||
learning()
|
||||
{
|
||||
RET=0
|
||||
|
||||
# When learning is enabled on the VXLAN device, an incoming packet
|
||||
# might try to refresh an FDB entry that points to an FDB nexthop group
|
||||
# instead of an ordinary remote destination. Check that the kernel does
|
||||
# not crash in this situation.
|
||||
|
||||
run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo"
|
||||
run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo"
|
||||
|
||||
run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb"
|
||||
run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
|
||||
|
||||
run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass"
|
||||
run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning"
|
||||
|
||||
run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020"
|
||||
run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10"
|
||||
|
||||
run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q"
|
||||
|
||||
log_test "VXLAN FDB nexthop: learning"
|
||||
}
|
||||
|
||||
proxy_common()
|
||||
{
|
||||
local af_str=$1; shift
|
||||
local local_addr=$1; shift
|
||||
local plen=$1; shift
|
||||
local remote_addr=$1; shift
|
||||
local neigh_addr=$1; shift
|
||||
local ping_cmd=$1; shift
|
||||
|
||||
RET=0
|
||||
|
||||
# When the "proxy" option is enabled on the VXLAN device, the device
|
||||
# will suppress ARP requests and IPv6 Neighbor Solicitation messages if
|
||||
# it is able to reply on behalf of the remote host. That is, if a
|
||||
# matching and valid neighbor entry is configured on the VXLAN device
|
||||
# whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The
|
||||
# FDB entry for the neighbor's MAC address might point to an FDB
|
||||
# nexthop group instead of an ordinary remote destination. Check that
|
||||
# the kernel does not crash in this situation.
|
||||
|
||||
run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
|
||||
|
||||
run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
|
||||
run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
|
||||
|
||||
run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy"
|
||||
|
||||
run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0"
|
||||
|
||||
run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
|
||||
|
||||
run_cmd "ip netns exec $ns1 $ping_cmd"
|
||||
|
||||
log_test "VXLAN FDB nexthop: $af_str proxy"
|
||||
}
|
||||
|
||||
proxy_ipv4()
|
||||
{
|
||||
proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \
|
||||
"arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3"
|
||||
}
|
||||
|
||||
proxy_ipv6()
|
||||
{
|
||||
proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \
|
||||
"ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Usage
|
||||
|
||||
usage()
|
||||
{
|
||||
cat <<EOF
|
||||
usage: ${0##*/} OPTS
|
||||
|
||||
-t <test> Test(s) to run (default: all)
|
||||
(options: $TESTS)
|
||||
-p Pause on fail
|
||||
-v Verbose mode (show commands and output)
|
||||
EOF
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Main
|
||||
|
||||
while getopts ":t:pvh" opt; do
|
||||
case $opt in
|
||||
t) TESTS=$OPTARG;;
|
||||
p) PAUSE_ON_FAIL=yes;;
|
||||
v) VERBOSE=$((VERBOSE + 1));;
|
||||
h) usage; exit 0;;
|
||||
*) usage; exit 1;;
|
||||
esac
|
||||
done
|
||||
|
||||
require_command mausezahn
|
||||
require_command arping
|
||||
require_command ndisc6
|
||||
require_command jq
|
||||
|
||||
if ! ip nexthop help 2>&1 | grep -q "stats"; then
|
||||
echo "SKIP: iproute2 ip too old, missing nexthop stats support"
|
||||
exit "$ksft_skip"
|
||||
fi
|
||||
|
||||
trap exit_cleanup_all EXIT
|
||||
|
||||
for t in $TESTS
|
||||
do
|
||||
setup_ns ns1; $t; cleanup_all_ns;
|
||||
done
|
||||
Reference in New Issue
Block a user