From 0c14846032f2c0a3b63234e1fc2759f4155b6067 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 16 Dec 2020 12:48:32 +0100 Subject: [PATCH 1/4] mptcp: fix security context on server socket Currently MPTCP is not propagating the security context from the ingress request socket to newly created msk at clone time. Address the issue invoking the missing security helper. Fixes: cf7da0d66cc1 ("mptcp: Create SUBFLOW socket for incoming connections") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b812aaae8044..d24243a28fce 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2699,6 +2699,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk, sock_reset_flag(nsk, SOCK_RCU_FREE); /* will be fully established after successful MPC subflow creation */ inet_sk_state_store(nsk, TCP_SYN_RECV); + + security_inet_csk_clone(nsk, req); bh_unlock_sock(nsk); /* keep a single reference */ From 3f8b2667f257c21a992bda33bfb919ee164a429c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 16 Dec 2020 12:48:33 +0100 Subject: [PATCH 2/4] mptcp: properly annotate nested lock MPTCP closes the subflows while holding the msk-level lock. While acquiring the subflow socket lock we need to use the correct nested annotation, or we can hit a lockdep splat at runtime. Reported-and-tested-by: Geliang Tang Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d24243a28fce..64c0c54c80e8 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2119,7 +2119,7 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, list_del(&subflow->node); - lock_sock(ssk); + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); /* if we are invoked by the msk cleanup code, the subflow is * already orphaned From 219d04992b689e0498ece02d2a451f2b6e2563a9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 16 Dec 2020 12:48:34 +0100 Subject: [PATCH 3/4] mptcp: push pending frames when subflow has free space When multiple subflows are active, we can receive a window update on subflow with no write space available. MPTCP will try to push frames on such subflow and will fail. Pending frames will be pushed only after receiving a window update on a subflow with some wspace available. Overall the above could lead to suboptimal aggregate bandwidth usage. Instead, we should try to push pending frames as soon as the subflow reaches both conditions mentioned above. We can finally enable self-tests with asymmetric links, as the above makes them finally pass. Fixes: 6f8a612a33e4 ("mptcp: keep track of advertised windows right edge") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 13 ++++++++----- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 2 +- tools/testing/selftests/net/mptcp/simult_flows.sh | 6 +++--- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index e8a1adf299d8..e0d21c0607e5 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -875,10 +875,13 @@ static void ack_update_msk(struct mptcp_sock *msk, new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; - if (after64(new_wnd_end, msk->wnd_end)) { + if (after64(new_wnd_end, msk->wnd_end)) msk->wnd_end = new_wnd_end; - __mptcp_wnd_updated(sk, ssk); - } + + /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ + if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) && + sk_stream_memory_free(ssk)) + __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { msk->snd_una = new_snd_una; @@ -944,8 +947,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) * helpers are cheap. */ mptcp_data_lock(subflow->conn); - if (mptcp_send_head(subflow->conn)) - __mptcp_wnd_updated(subflow->conn, sk); + if (sk_stream_memory_free(sk)) + __mptcp_check_push(subflow->conn, sk); __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); return; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 64c0c54c80e8..b53a91801a6c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2915,7 +2915,7 @@ void __mptcp_data_acked(struct sock *sk) mptcp_schedule_work(sk); } -void __mptcp_wnd_updated(struct sock *sk, struct sock *ssk) +void __mptcp_check_push(struct sock *sk, struct sock *ssk) { if (!mptcp_send_head(sk)) return; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7cf9d110b85f..d67de793d363 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -503,7 +503,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); bool mptcp_schedule_work(struct sock *sk); -void __mptcp_wnd_updated(struct sock *sk, struct sock *ssk); +void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 2f649b431456..f039ee57eb3c 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -287,7 +287,7 @@ run_test 10 10 0 0 "balanced bwidth" run_test 10 10 1 50 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -# run_test 30 10 0 0 "unbalanced bwidth" -# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 30 10 0 0 "unbalanced bwidth" +run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" +run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" exit $ret From 13e1603739e58e94e7a3c24191fa2dcd1a8a5df3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 16 Dec 2020 12:48:35 +0100 Subject: [PATCH 4/4] mptcp: fix pending data accounting When sendmsg() needs to wait for memory, the pending data is not updated. That causes a drift in forward memory allocation, leading to stall and/or warnings at socket close time. This change addresses the above issue moving the pending data counter update inside the sendmsg() main loop. Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b53a91801a6c..09b19aa2f205 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1658,6 +1658,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) frag_truesize += psize; pfrag->offset += frag_truesize; WRITE_ONCE(msk->write_seq, msk->write_seq + psize); + msk->tx_pending_data += psize; /* charge data on mptcp pending queue to the msk socket * Note: we charge such data both to sk and ssk @@ -1683,10 +1684,8 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; } - if (copied) { - msk->tx_pending_data += copied; + if (copied) mptcp_push_pending(sk, msg->msg_flags); - } out: release_sock(sk);