From f5360e9b314caed58970e811ae80a4c351e2ce8a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 11 Jul 2022 12:16:29 -0700 Subject: [PATCH 1/5] mptcp: introduce and use mptcp_pm_send_ack() The in-kernel PM has a bit of duplicate code related to ack generation. Create a new helper factoring out the PM-specific needs and use it in a couple of places. As a bonus, mptcp_subflow_send_ack() is not used anymore outside its own compilation unit and can become static. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 56 +++++++++++++++++++++++++----------------- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 1 - 3 files changed, 35 insertions(+), 24 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 5bdb559d5242..8e1d3aec94da 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -463,6 +463,37 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm return i; } +static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + bool prio, bool backup) +{ + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow; + + pr_debug("send ack for %s", + prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); + + slow = lock_sock_fast(ssk); + if (prio) { + if (subflow->backup != backup) + msk->last_snd = NULL; + + subflow->send_mp_prio = 1; + subflow->backup = backup; + subflow->request_bkup = backup; + } + + __mptcp_subflow_send_ack(ssk); + unlock_sock_fast(ssk, slow); +} + +static void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + bool prio, bool backup) +{ + spin_unlock_bh(&msk->pm.lock); + __mptcp_pm_send_ack(msk, subflow, prio, backup); + spin_lock_bh(&msk->pm.lock); +} + static struct mptcp_pm_addr_entry * __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) { @@ -705,16 +736,8 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) return; subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); - if (subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - spin_unlock_bh(&msk->pm.lock); - pr_debug("send ack for %s", - mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr"); - - mptcp_subflow_send_ack(ssk); - spin_lock_bh(&msk->pm.lock); - } + if (subflow) + mptcp_pm_send_ack(msk, subflow, false, false); } int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, @@ -729,7 +752,6 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct mptcp_addr_info local, remote; - bool slow; local_address((struct sock_common *)ssk, &local); if (!mptcp_addresses_equal(&local, addr, addr->port)) @@ -741,17 +763,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, continue; } - slow = lock_sock_fast(ssk); - if (subflow->backup != bkup) - msk->last_snd = NULL; - subflow->backup = bkup; - subflow->send_mp_prio = 1; - subflow->request_bkup = bkup; - - pr_debug("send ack for mp_prio"); - __mptcp_subflow_send_ack(ssk); - unlock_sock_fast(ssk, slow); - + __mptcp_pm_send_ack(msk, subflow, true, bkup); return 0; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2caad4a3adea..6cf5fa191b12 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -508,7 +508,7 @@ void __mptcp_subflow_send_ack(struct sock *ssk) tcp_send_ack(ssk); } -void mptcp_subflow_send_ack(struct sock *ssk) +static void mptcp_subflow_send_ack(struct sock *ssk) { bool slow; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 07871e10e510..e38b861263ce 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -607,7 +607,6 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void __mptcp_subflow_send_ack(struct sock *ssk); -void mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_subflow_queue_clean(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); From bedee0b561138346967cf1443f2afd1b48b3148f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 11 Jul 2022 12:16:30 -0700 Subject: [PATCH 2/5] mptcp: address lookup improvements When looking-up a socket address in the endpoint list, we must prefer port-based matches over address only match. Ensure that port-based endpoints are listed first, using head insertion for them. Additionally be sure that only port-based endpoints carry a non zero port number. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 8e1d3aec94da..fe8e22aff7d2 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -413,7 +413,7 @@ static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int i; for (i = 0; i < nr; i++) { - if (mptcp_addresses_equal(&addrs[i], addr, addr->port)) + if (addrs[i].id == addr->id) return true; } @@ -449,7 +449,8 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); - if (deny_id0 && mptcp_addresses_equal(&addrs[i], &remote, false)) + addrs[i].id = subflow->remote_id; + if (deny_id0 && !addrs[i].id) continue; if (!lookup_address_in_vec(addrs, i, &addrs[i]) && @@ -919,10 +920,11 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, /* do not insert duplicate address, differentiate on port only * singled addresses */ + if (!address_use_port(entry)) + entry->addr.port = 0; list_for_each_entry(cur, &pernet->local_addr_list, list) { if (mptcp_addresses_equal(&cur->addr, &entry->addr, - address_use_port(entry) && - address_use_port(cur))) { + cur->addr.port || entry->addr.port)) { /* allow replacing the exiting endpoint only if such * endpoint is an implicit one and the user-space * did not provide an endpoint id @@ -968,7 +970,10 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, } pernet->addrs++; - list_add_tail_rcu(&entry->list, &pernet->local_addr_list); + if (!entry->addr.port) + list_add_tail_rcu(&entry->list, &pernet->local_addr_list); + else + list_add_rcu(&entry->list, &pernet->local_addr_list); ret = entry->addr.id; out: From c157bbe776b799fba885577e193e94068cefe9c7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 11 Jul 2022 12:16:31 -0700 Subject: [PATCH 3/5] mptcp: allow the in kernel PM to set MPC subflow priority Any local endpoints configured on the address matching the MPC subflow are currently ignored. Specifically, setting a backup flag on them has no effect on the first subflow, as the MPC handshake can't carry such info. This change refactors the MPC endpoint id accounting to additionally fetch the priority info from the relevant endpoint and eventually trigger the MP_PRIO handshake as needed. As a result, the MPC subflow now switches to backup priority after that the MPTCP socket is fully established, according to the local endpoint configuration. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index fe8e22aff7d2..b767a336ad98 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -514,30 +514,14 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, &pernet->local_addr_list, list) { - if ((!lookup_by_id && mptcp_addresses_equal(&entry->addr, info, true)) || + if ((!lookup_by_id && + mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) || (lookup_by_id && entry->addr.id == info->id)) return entry; } return NULL; } -static int -lookup_id_by_addr(const struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr) -{ - const struct mptcp_pm_addr_entry *entry; - int ret = -1; - - rcu_read_lock(); - list_for_each_entry(entry, &pernet->local_addr_list, list) { - if (mptcp_addresses_equal(&entry->addr, addr, entry->addr.port)) { - ret = entry->addr.id; - break; - } - } - rcu_read_unlock(); - return ret; -} - static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; @@ -555,13 +539,22 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* do lazy endpoint usage accounting for the MPC subflows */ if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first); + struct mptcp_pm_addr_entry *entry; struct mptcp_addr_info mpc_addr; - int mpc_id; + bool backup = false; local_address((struct sock_common *)msk->first, &mpc_addr); - mpc_id = lookup_id_by_addr(pernet, &mpc_addr); - if (mpc_id >= 0) - __clear_bit(mpc_id, msk->pm.id_avail_bitmap); + rcu_read_lock(); + entry = __lookup_addr(pernet, &mpc_addr, false); + if (entry) { + __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + } + rcu_read_unlock(); + + if (backup) + mptcp_pm_send_ack(msk, subflow, true, backup); msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); } From 3ad14f54bd7448384458e69f0183843f683ecce8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 11 Jul 2022 12:16:32 -0700 Subject: [PATCH 4/5] mptcp: more accurate MPC endpoint tracking Currently the id accounting for the ID 0 subflow is not correct: at creation time we mark (correctly) as unavailable the endpoint id corresponding the MPC subflow source address, while at subflow removal time set as available the id 0. With this change we track explicitly the endpoint id corresponding to the MPC subflow so that we can mark it as available at removal time. Additionally this allow deleting the initial subflow via the NL PM specifying the corresponding endpoint id. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 21 ++++++++++++++------- net/mptcp/protocol.h | 1 + 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index b767a336ad98..291b5da42fdb 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -549,6 +549,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) entry = __lookup_addr(pernet, &mpc_addr, false); if (entry) { __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); + msk->mpc_endpoint_id = entry->addr.id; backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); } rcu_read_unlock(); @@ -764,6 +765,11 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, return -EINVAL; } +static bool mptcp_local_id_match(const struct mptcp_sock *msk, u8 local_id, u8 id) +{ + return local_id == id || (!local_id && msk->mpc_endpoint_id == id); +} + static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list, enum linux_mptcp_mib_field rm_type) @@ -787,6 +793,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, return; for (i = 0; i < rm_list->nr; i++) { + u8 rm_id = rm_list->ids[i]; bool removed = false; list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { @@ -794,15 +801,15 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, int how = RCV_SHUTDOWN | SEND_SHUTDOWN; u8 id = subflow->local_id; - if (rm_type == MPTCP_MIB_RMADDR) - id = subflow->remote_id; - - if (rm_list->ids[i] != id) + if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) + continue; + if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) continue; - pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u", + pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_list->ids[i], subflow->local_id, subflow->remote_id); + i, rm_id, subflow->local_id, subflow->remote_id, + msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); @@ -814,7 +821,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, __MPTCP_INC_STATS(sock_net(sk), rm_type); } if (rm_type == MPTCP_MIB_RMSUBFLOW) - __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap); + __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap); if (!removed) continue; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index e38b861263ce..5d6043c16b09 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -282,6 +282,7 @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; + u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, nodelay:1; From 914f6a59b10f41a8baf62d625087e6586d4762af Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 11 Jul 2022 12:16:33 -0700 Subject: [PATCH 5/5] selftests: mptcp: add MPC backup tests Add a couple of test-cases covering the newly introduced features - priority update for the MPC subflow. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 55efe2aafb84..ff83ef426df5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2428,6 +2428,36 @@ backup_tests() chk_add_nr 1 1 chk_prio_nr 1 1 fi + + if reset "mpc backup"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr 0 0 0 + chk_prio_nr 0 1 + fi + + if reset "mpc backup both sides"; then + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr 0 0 0 + chk_prio_nr 1 1 + fi + + if reset "mpc switch to backup"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + chk_join_nr 0 0 0 + chk_prio_nr 0 1 + fi + + if reset "mpc switch to backup both sides"; then + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + chk_join_nr 0 0 0 + chk_prio_nr 1 1 + fi } add_addr_ports_tests()