diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index b9e451197902..82ec15bcfd7f 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -32,7 +32,8 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf /* dequeue the skb from sk receive queue */ __skb_unlink(skb, &ssk->sk_receive_queue); skb_ext_reset(skb); - skb_orphan(skb); + + mptcp_subflow_lend_fwdmem(subflow, skb); /* We copy the fastopen data, but that don't belong to the mptcp sequence * space, need to offset it in the subflow sequence, see mptcp_subflow_get_map_offset() @@ -50,6 +51,7 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf mptcp_data_lock(sk); DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk)); + mptcp_borrow_fwdmem(sk, skb); skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); mptcp_sk(sk)->bytes_received += skb->len; diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 171643815076..f23fda0c55a7 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -71,7 +71,6 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPFastcloseRx", MPTCP_MIB_MPFASTCLOSERX), SNMP_MIB_ITEM("MPRstTx", MPTCP_MIB_MPRSTTX), SNMP_MIB_ITEM("MPRstRx", MPTCP_MIB_MPRSTRX), - SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE), SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER), SNMP_MIB_ITEM("SndWndShared", MPTCP_MIB_SNDWNDSHARED), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index a1d3e9369fbb..812218b5ed2b 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -70,7 +70,6 @@ enum linux_mptcp_mib_field { MPTCP_MIB_MPFASTCLOSERX, /* Received a MP_FASTCLOSE */ MPTCP_MIB_MPRSTTX, /* Transmit a MP_RST */ MPTCP_MIB_MPRSTRX, /* Received a MP_RST */ - MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */ MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */ MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */ MPTCP_MIB_SNDWNDSHARED, /* Subflow snd wnd is overridden by msk's one */ diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d22f792f4760..f5526855a2e5 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -358,7 +358,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb) static void mptcp_init_skb(struct sock *ssk, struct sk_buff *skb, int offset, int copy_len) { - const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); bool has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; /* the skb map_seq accounts for the skb offset: @@ -383,11 +383,7 @@ static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb) struct mptcp_sock *msk = mptcp_sk(sk); struct sk_buff *tail; - /* try to fetch required memory from subflow */ - if (!sk_rmem_schedule(sk, skb, skb->truesize)) { - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); - goto drop; - } + mptcp_borrow_fwdmem(sk, skb); if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { /* in sequence */ @@ -409,7 +405,6 @@ static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb) * will retransmit as needed, if needed. */ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA); -drop: mptcp_drop(sk, skb); return false; } @@ -710,7 +705,7 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, size_t len = skb->len - offset; mptcp_init_skb(ssk, skb, offset, len); - skb_orphan(skb); + mptcp_subflow_lend_fwdmem(subflow, skb); ret = __mptcp_move_skb(sk, skb) || ret; seq += len; @@ -2436,6 +2431,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, { struct mptcp_sock *msk = mptcp_sk(sk); bool dispose_it, need_push = false; + int fwd_remaining; /* Do not pass RX data to the msk, even if the subflow socket is not * going to be freed (i.e. even for the first subflow on graceful @@ -2444,6 +2440,17 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); subflow->closing = 1; + /* Borrow the fwd allocated page left-over; fwd memory for the subflow + * could be negative at this point, but will be reach zero soon - when + * the data allocated using such fragment will be freed. + */ + if (subflow->lent_mem_frag) { + fwd_remaining = PAGE_SIZE - subflow->lent_mem_frag; + sk_forward_alloc_add(sk, fwd_remaining); + sk_forward_alloc_add(ssk, -fwd_remaining); + subflow->lent_mem_frag = 0; + } + /* If the first subflow moved to a close state before accept, e.g. due * to an incoming reset or listener shutdown, the subflow socket is * already deleted by inet_child_forget() and the mptcp socket can't diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d30806b287d2..5e2749d92a49 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -547,6 +547,7 @@ struct mptcp_subflow_context { bool scheduled; bool pm_listener; /* a listener managed by the kernel PM? */ bool fully_established; /* path validated */ + u32 lent_mem_frag; u32 remote_nonce; u64 thmac; u32 local_nonce; @@ -646,6 +647,33 @@ mptcp_send_active_reset_reason(struct sock *sk) tcp_send_active_reset(sk, GFP_ATOMIC, reason); } +/* Made the fwd mem carried by the given skb available to the msk, + * To be paired with a previous mptcp_subflow_lend_fwdmem() before freeing + * the skb or setting the skb ownership. + */ +static inline void mptcp_borrow_fwdmem(struct sock *sk, struct sk_buff *skb) +{ + struct sock *ssk = skb->sk; + + /* The subflow just lend the skb fwd memory, and we know that the skb + * is only accounted on the incoming subflow rcvbuf. + */ + DEBUG_NET_WARN_ON_ONCE(skb->destructor); + skb->sk = NULL; + sk_forward_alloc_add(sk, skb->truesize); + atomic_sub(skb->truesize, &ssk->sk_rmem_alloc); +} + +static inline void +mptcp_subflow_lend_fwdmem(struct mptcp_subflow_context *subflow, + struct sk_buff *skb) +{ + int frag = (subflow->lent_mem_frag + skb->truesize) & (PAGE_SIZE - 1); + + skb->destructor = NULL; + subflow->lent_mem_frag = frag; +} + static inline u64 mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) {