mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-04-29 09:22:53 -04:00
Merge branch 'mptcp-another-set-of-miscellaneous-mptcp-fixes'
Mat Martineau says: ==================== mptcp: Another set of miscellaneous MPTCP fixes This is another collection of MPTCP fixes and enhancements that we have tested in the MPTCP tree: Patch 1 cleans up cgroup attachment for in-kernel subflow sockets. Patches 2 and 3 make sure that deletion of advertised addresses by an MPTCP path manager when flushing all addresses behaves similarly to the remove-single-address operation, and adds related tests. Patches 4 and 8 do some minor cleanup. Patches 5-7 add MPTCP_FASTCLOSE functionality. Note that patch 6 adds MPTCP option parsing to tcp_reset(). Patch 9 optimizes skb size for outgoing MPTCP packets. ==================== Link: https://lore.kernel.org/r/20201210222506.222251-1-mathew.j.martineau@linux.intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -611,7 +611,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
|
||||
/* tcp_input.c */
|
||||
void tcp_rearm_rto(struct sock *sk);
|
||||
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
|
||||
void tcp_reset(struct sock *sk);
|
||||
void tcp_reset(struct sock *sk, struct sk_buff *skb);
|
||||
void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
|
||||
void tcp_fin(struct sock *sk);
|
||||
|
||||
|
||||
@@ -4218,10 +4218,13 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
|
||||
}
|
||||
|
||||
/* When we get a reset we do this. */
|
||||
void tcp_reset(struct sock *sk)
|
||||
void tcp_reset(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
trace_tcp_receive_reset(sk);
|
||||
|
||||
if (sk_is_mptcp(sk))
|
||||
mptcp_incoming_options(sk, skb);
|
||||
|
||||
/* We want the right error as BSD sees it (and indeed as we do). */
|
||||
switch (sk->sk_state) {
|
||||
case TCP_SYN_SENT:
|
||||
@@ -5604,7 +5607,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
|
||||
&tp->last_oow_ack_time))
|
||||
tcp_send_dupack(sk, skb);
|
||||
} else if (tcp_reset_check(sk, skb)) {
|
||||
tcp_reset(sk);
|
||||
tcp_reset(sk, skb);
|
||||
}
|
||||
goto discard;
|
||||
}
|
||||
@@ -5640,7 +5643,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
|
||||
}
|
||||
|
||||
if (rst_seq_match)
|
||||
tcp_reset(sk);
|
||||
tcp_reset(sk, skb);
|
||||
else {
|
||||
/* Disable TFO if RST is out-of-order
|
||||
* and no data has been received
|
||||
@@ -6077,7 +6080,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
|
||||
*/
|
||||
|
||||
if (th->rst) {
|
||||
tcp_reset(sk);
|
||||
tcp_reset(sk, skb);
|
||||
goto discard;
|
||||
}
|
||||
|
||||
@@ -6519,7 +6522,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
|
||||
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
|
||||
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
|
||||
tcp_reset(sk);
|
||||
tcp_reset(sk, skb);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -801,7 +801,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
|
||||
req->rsk_ops->send_reset(sk, skb);
|
||||
} else if (fastopen) { /* received a valid RST pkt */
|
||||
reqsk_fastopen_remove(sk, req, true);
|
||||
tcp_reset(sk);
|
||||
tcp_reset(sk, skb);
|
||||
}
|
||||
if (!fastopen) {
|
||||
inet_csk_reqsk_queue_drop(sk, req);
|
||||
|
||||
@@ -282,6 +282,16 @@ static void mptcp_parse_option(const struct sk_buff *skb,
|
||||
pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
|
||||
break;
|
||||
|
||||
case MPTCPOPT_MP_FASTCLOSE:
|
||||
if (opsize != TCPOLEN_MPTCP_FASTCLOSE)
|
||||
break;
|
||||
|
||||
ptr += 2;
|
||||
mp_opt->rcvr_key = get_unaligned_be64(ptr);
|
||||
ptr += 8;
|
||||
mp_opt->fastclose = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -299,6 +309,7 @@ void mptcp_get_options(const struct sk_buff *skb,
|
||||
mp_opt->mp_join = 0;
|
||||
mp_opt->add_addr = 0;
|
||||
mp_opt->ahmac = 0;
|
||||
mp_opt->fastclose = 0;
|
||||
mp_opt->port = 0;
|
||||
mp_opt->rm_addr = 0;
|
||||
mp_opt->dss = 0;
|
||||
@@ -942,6 +953,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
|
||||
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
|
||||
return;
|
||||
|
||||
if (mp_opt.fastclose &&
|
||||
msk->local_key == mp_opt.rcvr_key) {
|
||||
WRITE_ONCE(msk->rcv_fastclose, true);
|
||||
mptcp_schedule_work((struct sock *)msk);
|
||||
}
|
||||
|
||||
if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
|
||||
struct mptcp_addr_info addr;
|
||||
|
||||
|
||||
@@ -135,7 +135,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
|
||||
struct mptcp_pm_addr_entry *entry, *ret = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
spin_lock_bh(&msk->join_list_lock);
|
||||
__mptcp_flush_join_list(msk);
|
||||
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
|
||||
if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
|
||||
continue;
|
||||
@@ -144,13 +144,11 @@ select_local_address(const struct pm_nl_pernet *pernet,
|
||||
* pending join
|
||||
*/
|
||||
if (entry->addr.family == ((struct sock *)msk)->sk_family &&
|
||||
!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
|
||||
!lookup_subflow_by_saddr(&msk->join_list, &entry->addr)) {
|
||||
!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
|
||||
ret = entry;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&msk->join_list_lock);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
@@ -867,13 +865,14 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __flush_addrs(struct pm_nl_pernet *pernet)
|
||||
static void __flush_addrs(struct net *net, struct list_head *list)
|
||||
{
|
||||
while (!list_empty(&pernet->local_addr_list)) {
|
||||
while (!list_empty(list)) {
|
||||
struct mptcp_pm_addr_entry *cur;
|
||||
|
||||
cur = list_entry(pernet->local_addr_list.next,
|
||||
cur = list_entry(list->next,
|
||||
struct mptcp_pm_addr_entry, list);
|
||||
mptcp_nl_remove_subflow_and_signal_addr(net, &cur->addr);
|
||||
list_del_rcu(&cur->list);
|
||||
kfree_rcu(cur, rcu);
|
||||
}
|
||||
@@ -890,11 +889,13 @@ static void __reset_counters(struct pm_nl_pernet *pernet)
|
||||
static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
|
||||
{
|
||||
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
|
||||
LIST_HEAD(free_list);
|
||||
|
||||
spin_lock_bh(&pernet->lock);
|
||||
__flush_addrs(pernet);
|
||||
list_splice_init(&pernet->local_addr_list, &free_list);
|
||||
__reset_counters(pernet);
|
||||
spin_unlock_bh(&pernet->lock);
|
||||
__flush_addrs(sock_net(skb->sk), &free_list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1156,10 +1157,12 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list)
|
||||
struct net *net;
|
||||
|
||||
list_for_each_entry(net, net_list, exit_list) {
|
||||
struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
|
||||
|
||||
/* net is removed from namespace list, can't race with
|
||||
* other modifiers
|
||||
*/
|
||||
__flush_addrs(net_generic(net, pm_nl_pernet_id));
|
||||
__flush_addrs(net, &pernet->local_addr_list);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1256,6 +1256,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
||||
struct mptcp_ext *mpext = NULL;
|
||||
struct sk_buff *skb, *tail;
|
||||
bool can_collapse = false;
|
||||
int size_bias = 0;
|
||||
int avail_size;
|
||||
size_t ret = 0;
|
||||
|
||||
@@ -1277,10 +1278,12 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
||||
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
|
||||
can_collapse = (info->size_goal - skb->len > 0) &&
|
||||
mptcp_skb_can_collapse_to(data_seq, skb, mpext);
|
||||
if (!can_collapse)
|
||||
if (!can_collapse) {
|
||||
TCP_SKB_CB(skb)->eor = 1;
|
||||
else
|
||||
} else {
|
||||
size_bias = skb->len;
|
||||
avail_size = info->size_goal - skb->len;
|
||||
}
|
||||
}
|
||||
|
||||
/* Zero window and all data acked? Probe. */
|
||||
@@ -1300,8 +1303,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
||||
return 0;
|
||||
|
||||
ret = info->limit - info->sent;
|
||||
tail = tcp_build_frag(ssk, avail_size, info->flags, dfrag->page,
|
||||
dfrag->offset + info->sent, &ret);
|
||||
tail = tcp_build_frag(ssk, avail_size + size_bias, info->flags,
|
||||
dfrag->page, dfrag->offset + info->sent, &ret);
|
||||
if (!tail) {
|
||||
tcp_remove_empty_skb(sk, tcp_write_queue_tail(ssk));
|
||||
return -ENOMEM;
|
||||
@@ -1310,8 +1313,9 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
||||
/* if the tail skb is still the cached one, collapsing really happened.
|
||||
*/
|
||||
if (skb == tail) {
|
||||
WARN_ON_ONCE(!can_collapse);
|
||||
TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH;
|
||||
mpext->data_len += ret;
|
||||
WARN_ON_ONCE(!can_collapse);
|
||||
WARN_ON_ONCE(zero_window_probe);
|
||||
goto out;
|
||||
}
|
||||
@@ -2217,6 +2221,36 @@ static bool mptcp_check_close_timeout(const struct sock *sk)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void mptcp_check_fastclose(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow, *tmp;
|
||||
struct sock *sk = &msk->sk.icsk_inet.sk;
|
||||
|
||||
if (likely(!READ_ONCE(msk->rcv_fastclose)))
|
||||
return;
|
||||
|
||||
mptcp_token_destroy(msk);
|
||||
|
||||
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
|
||||
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
|
||||
|
||||
lock_sock(tcp_sk);
|
||||
if (tcp_sk->sk_state != TCP_CLOSE) {
|
||||
tcp_send_active_reset(tcp_sk, GFP_ATOMIC);
|
||||
tcp_set_state(tcp_sk, TCP_CLOSE);
|
||||
}
|
||||
release_sock(tcp_sk);
|
||||
}
|
||||
|
||||
inet_sk_state_store(sk, TCP_CLOSE);
|
||||
sk->sk_shutdown = SHUTDOWN_MASK;
|
||||
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
|
||||
set_bit(MPTCP_DATA_READY, &msk->flags);
|
||||
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
|
||||
|
||||
mptcp_close_wake_up(sk);
|
||||
}
|
||||
|
||||
static void mptcp_worker(struct work_struct *work)
|
||||
{
|
||||
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
|
||||
@@ -2233,6 +2267,9 @@ static void mptcp_worker(struct work_struct *work)
|
||||
|
||||
mptcp_check_data_fin_ack(sk);
|
||||
__mptcp_flush_join_list(msk);
|
||||
|
||||
mptcp_check_fastclose(msk);
|
||||
|
||||
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
|
||||
__mptcp_close_subflow(msk);
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#define OPTION_MPTCP_ADD_ADDR BIT(6)
|
||||
#define OPTION_MPTCP_ADD_ADDR6 BIT(7)
|
||||
#define OPTION_MPTCP_RM_ADDR BIT(8)
|
||||
#define OPTION_MPTCP_FASTCLOSE BIT(9)
|
||||
|
||||
/* MPTCP option subtypes */
|
||||
#define MPTCPOPT_MP_CAPABLE 0
|
||||
@@ -58,6 +59,7 @@
|
||||
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24
|
||||
#define TCPOLEN_MPTCP_PORT_LEN 4
|
||||
#define TCPOLEN_MPTCP_RM_ADDR_BASE 4
|
||||
#define TCPOLEN_MPTCP_FASTCLOSE 12
|
||||
|
||||
/* MPTCP MP_JOIN flags */
|
||||
#define MPTCPOPT_BACKUP BIT(0)
|
||||
@@ -110,6 +112,7 @@ struct mptcp_options_received {
|
||||
u16 data_len;
|
||||
u16 mp_capable : 1,
|
||||
mp_join : 1,
|
||||
fastclose : 1,
|
||||
dss : 1,
|
||||
add_addr : 1,
|
||||
rm_addr : 1,
|
||||
@@ -119,7 +122,7 @@ struct mptcp_options_received {
|
||||
u32 token;
|
||||
u32 nonce;
|
||||
u64 thmac;
|
||||
u8 hmac[20];
|
||||
u8 hmac[MPTCPOPT_HMAC_LEN];
|
||||
u8 join_id;
|
||||
u8 use_map:1,
|
||||
dsn64:1,
|
||||
@@ -237,6 +240,7 @@ struct mptcp_sock {
|
||||
bool fully_established;
|
||||
bool rcv_data_fin;
|
||||
bool snd_data_fin_enable;
|
||||
bool rcv_fastclose;
|
||||
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
|
||||
spinlock_t join_list_lock;
|
||||
struct sock *ack_hint;
|
||||
|
||||
@@ -313,12 +313,17 @@ void mptcp_subflow_reset(struct sock *ssk)
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
|
||||
struct sock *sk = subflow->conn;
|
||||
|
||||
/* must hold: tcp_done() could drop last reference on parent */
|
||||
sock_hold(sk);
|
||||
|
||||
tcp_set_state(ssk, TCP_CLOSE);
|
||||
tcp_send_active_reset(ssk, GFP_ATOMIC);
|
||||
tcp_done(ssk);
|
||||
if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
|
||||
schedule_work(&mptcp_sk(sk)->work))
|
||||
sock_hold(sk);
|
||||
return; /* worker will put sk for us */
|
||||
|
||||
sock_put(sk);
|
||||
}
|
||||
|
||||
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
|
||||
@@ -1167,6 +1172,30 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
|
||||
return err;
|
||||
}
|
||||
|
||||
static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
|
||||
{
|
||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||
struct sock_cgroup_data *parent_skcd = &parent->sk_cgrp_data,
|
||||
*child_skcd = &child->sk_cgrp_data;
|
||||
|
||||
/* only the additional subflows created by kworkers have to be modified */
|
||||
if (cgroup_id(sock_cgroup_ptr(parent_skcd)) !=
|
||||
cgroup_id(sock_cgroup_ptr(child_skcd))) {
|
||||
#ifdef CONFIG_MEMCG
|
||||
struct mem_cgroup *memcg = parent->sk_memcg;
|
||||
|
||||
mem_cgroup_sk_free(child);
|
||||
if (memcg && css_tryget(&memcg->css))
|
||||
child->sk_memcg = memcg;
|
||||
#endif /* CONFIG_MEMCG */
|
||||
|
||||
cgroup_sk_free(child_skcd);
|
||||
*child_skcd = *parent_skcd;
|
||||
cgroup_sk_clone(child_skcd);
|
||||
}
|
||||
#endif /* CONFIG_SOCK_CGROUP_DATA */
|
||||
}
|
||||
|
||||
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
@@ -1187,6 +1216,9 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
|
||||
|
||||
lock_sock(sf->sk);
|
||||
|
||||
/* the newly created socket has to be in the same cgroup as its parent */
|
||||
mptcp_attach_cgroup(sk, sf->sk);
|
||||
|
||||
/* kernel sockets do not by default acquire net ref, but TCP timer
|
||||
* needs it.
|
||||
*/
|
||||
|
||||
@@ -264,27 +264,37 @@ do_transfer()
|
||||
cpid=$!
|
||||
|
||||
if [ $rm_nr_ns1 -gt 0 ]; then
|
||||
counter=1
|
||||
sleep 1
|
||||
|
||||
while [ $counter -le $rm_nr_ns1 ]
|
||||
do
|
||||
ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
|
||||
if [ $rm_nr_ns1 -lt 8 ]; then
|
||||
counter=1
|
||||
sleep 1
|
||||
let counter+=1
|
||||
done
|
||||
|
||||
while [ $counter -le $rm_nr_ns1 ]
|
||||
do
|
||||
ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
|
||||
sleep 1
|
||||
let counter+=1
|
||||
done
|
||||
else
|
||||
sleep 1
|
||||
ip netns exec ${listener_ns} ./pm_nl_ctl flush
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $rm_nr_ns2 -gt 0 ]; then
|
||||
counter=1
|
||||
sleep 1
|
||||
|
||||
while [ $counter -le $rm_nr_ns2 ]
|
||||
do
|
||||
ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
|
||||
if [ $rm_nr_ns2 -lt 8 ]; then
|
||||
counter=1
|
||||
sleep 1
|
||||
let counter+=1
|
||||
done
|
||||
|
||||
while [ $counter -le $rm_nr_ns2 ]
|
||||
do
|
||||
ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
|
||||
sleep 1
|
||||
let counter+=1
|
||||
done
|
||||
else
|
||||
sleep 1
|
||||
ip netns exec ${connector_ns} ./pm_nl_ctl flush
|
||||
fi
|
||||
fi
|
||||
|
||||
wait $cpid
|
||||
@@ -663,6 +673,18 @@ chk_join_nr "remove subflows and signal" 3 3 3
|
||||
chk_add_nr 1 1
|
||||
chk_rm_nr 2 2
|
||||
|
||||
# subflows and signal, flush
|
||||
reset
|
||||
ip netns exec $ns1 ./pm_nl_ctl limits 0 3
|
||||
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
|
||||
ip netns exec $ns2 ./pm_nl_ctl limits 1 3
|
||||
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
|
||||
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
|
||||
run_tests $ns1 $ns2 10.0.1.1 0 8 8 slow
|
||||
chk_join_nr "flush subflows and signal" 3 3 3
|
||||
chk_add_nr 1 1
|
||||
chk_rm_nr 2 2
|
||||
|
||||
# subflow IPv6
|
||||
reset
|
||||
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
|
||||
|
||||
Reference in New Issue
Block a user