Merge branch 'mptcp-pm-in-kernel-fullmesh-endp-nb-bind-cases'

Matthieu Baerts says:

====================
mptcp: pm: in-kernel: fullmesh endp nb + bind cases

Here is a small optimisation for the in-kernel PM, joined by a small
behavioural change to avoid confusions, and followed by a few more
tests.

- Patch 1: record fullmesh endpoints numbers, not to iterate over all
  endpoints to check if one is marked as fullmesh.

- Patch 2: when at least one endpoint is marked as fullmesh, only use
  these endpoints when reacting to an ADD_ADDR, even if there are no
  endpoints for this IP family: this is less confusing.

- Patch 3: reduce duplicated code to prepare the next patch.

- Patch 4: extra "bind" cases: the listen socket restrict the bind to
  one IP address, not allowing MP_JOIN to extra IP addresses, except if
  another listening socket accepts them.
====================

Link: https://patch.msgid.link/20251101-net-next-mptcp-fm-endp-nb-bind-v1-0-b4166772d6bb@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-11-04 17:16:06 -08:00
6 changed files with 213 additions and 26 deletions

View File

@@ -70,7 +70,8 @@ struct mptcp_info {
__u64 mptcpi_bytes_acked;
__u8 mptcpi_subflows_total;
__u8 mptcpi_endp_laminar_max;
__u8 reserved[2];
__u8 mptcpi_endp_fullmesh_max;
__u8 reserved;
__u32 mptcpi_last_data_sent;
__u32 mptcpi_last_data_recv;
__u32 mptcpi_last_ack_recv;

View File

@@ -22,6 +22,7 @@ struct pm_nl_pernet {
u8 endp_signal_max;
u8 endp_subflow_max;
u8 endp_laminar_max;
u8 endp_fullmesh_max;
u8 limit_add_addr_accepted;
u8 limit_extra_subflows;
u8 next_id;
@@ -70,6 +71,14 @@ u8 mptcp_pm_get_endp_laminar_max(const struct mptcp_sock *msk)
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_laminar_max);
u8 mptcp_pm_get_endp_fullmesh_max(const struct mptcp_sock *msk)
{
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
return READ_ONCE(pernet->endp_fullmesh_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_fullmesh_max);
u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk)
{
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
@@ -600,12 +609,11 @@ fill_local_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *remote,
struct mptcp_pm_local *locals)
{
bool c_flag_case = remote->id && mptcp_pm_add_addr_c_flag_case(msk);
int i;
/* If there is at least one MPTCP endpoint with a fullmesh flag */
i = fill_local_addresses_vec_fullmesh(msk, remote, locals, c_flag_case);
if (i)
return i;
if (mptcp_pm_get_endp_fullmesh_max(msk))
return fill_local_addresses_vec_fullmesh(msk, remote, locals,
c_flag_case);
/* If there is at least one MPTCP endpoint with a laminar flag */
if (mptcp_pm_get_endp_laminar_max(msk))
@@ -790,6 +798,10 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
addr_max = pernet->endp_laminar_max;
WRITE_ONCE(pernet->endp_laminar_max, addr_max + 1);
}
if (entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
addr_max = pernet->endp_fullmesh_max;
WRITE_ONCE(pernet->endp_fullmesh_max, addr_max + 1);
}
pernet->endpoints++;
if (!entry->addr.port)
@@ -1187,6 +1199,10 @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
addr_max = pernet->endp_laminar_max;
WRITE_ONCE(pernet->endp_laminar_max, addr_max - 1);
}
if (entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
addr_max = pernet->endp_fullmesh_max;
WRITE_ONCE(pernet->endp_fullmesh_max, addr_max - 1);
}
pernet->endpoints--;
list_del_rcu(&entry->list);
@@ -1502,6 +1518,18 @@ int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local,
changed = (local->flags ^ entry->flags) & mask;
entry->flags = (entry->flags & ~mask) | (local->flags & mask);
*local = *entry;
if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) {
u8 addr_max = pernet->endp_fullmesh_max;
if (entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)
addr_max++;
else
addr_max--;
WRITE_ONCE(pernet->endp_fullmesh_max, addr_max);
}
spin_unlock_bh(&pernet->lock);
mptcp_pm_nl_set_flags_all(net, local, changed);

View File

@@ -1183,6 +1183,7 @@ void __mptcp_pm_kernel_worker(struct mptcp_sock *msk);
u8 mptcp_pm_get_endp_signal_max(const struct mptcp_sock *msk);
u8 mptcp_pm_get_endp_subflow_max(const struct mptcp_sock *msk);
u8 mptcp_pm_get_endp_laminar_max(const struct mptcp_sock *msk);
u8 mptcp_pm_get_endp_fullmesh_max(const struct mptcp_sock *msk);
u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk);
u8 mptcp_pm_get_limit_extra_subflows(const struct mptcp_sock *msk);

View File

@@ -982,6 +982,8 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
mptcp_pm_get_endp_subflow_max(msk);
info->mptcpi_endp_laminar_max =
mptcp_pm_get_endp_laminar_max(msk);
info->mptcpi_endp_fullmesh_max =
mptcp_pm_get_endp_fullmesh_max(msk);
}
if (__mptcp_check_fallback(msk))

View File

@@ -1064,6 +1064,8 @@ static void check_getpeername_connect(int fd)
socklen_t salen = sizeof(ss);
char a[INET6_ADDRSTRLEN];
char b[INET6_ADDRSTRLEN];
const char *iface;
size_t len;
if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) {
perror("getpeername");
@@ -1073,7 +1075,13 @@ static void check_getpeername_connect(int fd)
xgetnameinfo((struct sockaddr *)&ss, salen,
a, sizeof(a), b, sizeof(b));
if (strcmp(cfg_host, a) || strcmp(cfg_port, b))
iface = strchr(cfg_host, '%');
if (iface)
len = iface - cfg_host;
else
len = strlen(cfg_host) + 1;
if (strncmp(cfg_host, a, len) || strcmp(cfg_port, b))
fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__,
cfg_host, a, cfg_port, b);
}

View File

@@ -62,6 +62,7 @@ unset sflags
unset fastclose
unset fullmesh
unset speed
unset bind_addr
unset join_syn_rej
unset join_csum_ns1
unset join_csum_ns2
@@ -645,6 +646,27 @@ wait_mpj()
done
}
wait_ll_ready()
{
local ns="${1}"
local i
for i in $(seq 50); do
ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" |
grep -qw "tentative" || break
sleep 0.1
done
}
get_ll_addr()
{
local ns="${1}"
local iface="${2}"
ip -n "${ns}" -6 addr show dev "${iface}" scope link |
grep "inet6 fe80" | sed 's#.*\(fe80::.*\)/.*#\1#'
}
kill_events_pids()
{
mptcp_lib_kill_wait $evts_ns1_pid
@@ -951,6 +973,9 @@ do_transfer()
local FAILING_LINKS=${FAILING_LINKS:-""}
local fastclose=${fastclose:-""}
local speed=${speed:-"fast"}
local bind_addr=${bind_addr:-"::"}
local listener_in="${sin}"
local connector_in="${cin}"
port=$(get_port)
:> "$cout"
@@ -999,16 +1024,12 @@ do_transfer()
extra_srv_args="$extra_args $extra_srv_args"
if [ "$test_linkfail" -gt 1 ];then
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
$extra_srv_args "::" < "$sinfail" > "$sout" &
else
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
$extra_srv_args "::" < "$sin" > "$sout" &
listener_in="${sinfail}"
fi
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \
${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" &
local spid=$!
mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
@@ -1020,6 +1041,7 @@ do_transfer()
./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
$extra_cl_args $connect_addr < "$cin" > "$cout" &
elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then
connector_in="${cinsent}"
( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
tee "$cinsent" | \
timeout ${timeout_test} \
@@ -1027,6 +1049,7 @@ do_transfer()
./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
$extra_cl_args $connect_addr > "$cout" &
else
connector_in="${cinsent}"
tee "$cinsent" < "$cinfail" | \
timeout ${timeout_test} \
ip netns exec ${connector_ns} \
@@ -1057,17 +1080,9 @@ do_transfer()
return 1
fi
if [ "$test_linkfail" -gt 1 ];then
check_transfer $sinfail $cout "file received by client" $trunc_size
else
check_transfer $sin $cout "file received by client" $trunc_size
fi
check_transfer $listener_in $cout "file received by client" $trunc_size
retc=$?
if [ "$test_linkfail" -eq 0 ];then
check_transfer $cin $sout "file received by server" $trunc_size
else
check_transfer $cinsent $sout "file received by server" $trunc_size
fi
check_transfer $connector_in $sout "file received by server" $trunc_size
rets=$?
[ $retc -eq 0 ] && [ $rets -eq 0 ]
@@ -2952,7 +2967,11 @@ mixed_tests()
pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
speed=slow \
run_tests $ns1 $ns2 dead:beef:2::1
chk_join_nr 1 1 1
if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_fullmesh_max$"; then
chk_join_nr 0 0 0
else
chk_join_nr 1 1 1
fi
fi
# fullmesh still tries to create all the possibly subflows with
@@ -3233,6 +3252,133 @@ add_addr_ports_tests()
fi
}
bind_tests()
{
# bind to one address should not allow extra subflows to other addresses
if reset "bind main address v4, no join v4"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
bind_addr="10.0.1.1" \
run_tests $ns1 $ns2 10.0.1.1
join_syn_tx=1 \
chk_join_nr 0 0 0
chk_add_nr 1 1
fi
# bind to one address should not allow extra subflows to other addresses
if reset "bind main address v6, no join v6"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
bind_addr="dead:beef:1::1" \
run_tests $ns1 $ns2 dead:beef:1::1
join_syn_tx=1 \
chk_join_nr 0 0 0
chk_add_nr 1 1
fi
# multiple binds to allow extra subflows to other addresses
if reset "multiple bind to allow joins v4"; then
local extra_bind
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
# Launching another app listening on a different address
# Note: it could be a totally different app, e.g. nc, socat, ...
ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
-s MPTCP 10.0.2.1 &
extra_bind=$!
bind_addr="10.0.1.1" \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
kill ${extra_bind}
fi
# multiple binds to allow extra subflows to other addresses
if reset "multiple bind to allow joins v6"; then
local extra_bind
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
# Launching another app listening on a different address
# Note: it could be a totally different app, e.g. nc, socat, ...
ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
-s MPTCP dead:beef:2::1 &
extra_bind=$!
bind_addr="dead:beef:1::1" \
run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr 1 1 1
chk_add_nr 1 1
kill ${extra_bind}
fi
# multiple binds to allow extra subflows to other addresses: v6 LL case
if reset "multiple bind to allow joins v6 link-local routing"; then
local extra_bind ns1ll1 ns1ll2
ns1ll1="$(get_ll_addr $ns1 ns1eth1)"
ns1ll2="$(get_ll_addr $ns1 ns1eth2)"
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal
wait_ll_ready $ns1 # to be able to bind
wait_ll_ready $ns2 # also needed to bind on the client side
ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
-s MPTCP "${ns1ll2}%ns1eth2" &
extra_bind=$!
bind_addr="${ns1ll1}%ns1eth1" \
run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1"
# it is not possible to connect to the announced LL addr without
# specifying the outgoing interface.
join_connect_err=1 \
chk_join_nr 0 0 0
chk_add_nr 1 1
kill ${extra_bind}
fi
# multiple binds to allow extra subflows to v6 LL addresses: laminar
if reset "multiple bind to allow joins v6 link-local laminar" &&
continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
local extra_bind ns1ll1 ns1ll2 ns2ll2
ns1ll1="$(get_ll_addr $ns1 ns1eth1)"
ns1ll2="$(get_ll_addr $ns1 ns1eth2)"
ns2ll2="$(get_ll_addr $ns2 ns2eth2)"
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal
pm_nl_add_endpoint $ns2 "${ns2ll2}" flags laminar dev ns2eth2
wait_ll_ready $ns1 # to be able to bind
wait_ll_ready $ns2 # also needed to bind on the client side
ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
-s MPTCP "${ns1ll2}%ns1eth2" &
extra_bind=$!
bind_addr="${ns1ll1}%ns1eth1" \
run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1"
chk_join_nr 1 1 1
chk_add_nr 1 1
kill ${extra_bind}
fi
}
syncookies_tests()
{
# single subflow, syncookies
@@ -4187,6 +4333,7 @@ all_tests_sorted=(
M@mixed_tests
b@backup_tests
p@add_addr_ports_tests
B@bind_tests
k@syncookies_tests
S@checksum_tests
d@deny_join_id0_tests