From 63f367d9de77b30f58722c1be9e334fb0f5f342d Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 27 Jul 2021 10:42:57 -0400 Subject: [PATCH 1/2] tcp: more accurately detect spurious TLP probes Previously TLP is considered spurious if the sender receives any DSACK during a TLP episode. This patch further checks the DSACK sequences match the TLP's to improve accuracy. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Priyaranjan Jha Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 501d8d4d4ba4..98408d520c32 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -100,6 +100,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE; #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ #define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */ +#define FLAG_DSACK_TLP 0x20000 /* DSACK for tail loss probe */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -991,6 +992,8 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq, return 0; if (seq_len > tp->mss_cache) dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache); + else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq) + state->flag |= FLAG_DSACK_TLP; tp->dsack_dups += dup_segs; /* Skip the DSACK if dup segs weren't retransmitted by sender */ @@ -3650,7 +3653,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) if (!tp->tlp_retrans) { /* TLP of new data has been acknowledged */ tp->tlp_high_seq = 0; - } else if (flag & FLAG_DSACKING_ACK) { + } else if (flag & FLAG_DSACK_TLP) { /* This DSACK means original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; } else if (after(ack, tp->tlp_high_seq)) { From a657db0350bb8f568897835b6189c84a89f13292 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 27 Jul 2021 10:42:58 -0400 Subject: [PATCH 2/2] tcp: more accurately check DSACKs to grow RACK reordering window Previously, a DSACK could expand the RACK reordering window when no reordering has been seen, and/or when the DSACK was due to an unnecessary TLP retransmit (rather than a spurious fast recovery due to reordering). This could result in unnecessarily growing the RACK reordering window and thus unnecessarily delaying RACK-based fast recovery episodes. To avoid these issues, this commit tightens the conditions under which a DSACK triggers the RACK reordering window to grow, so that a connection only expands its RACK reordering window if: (a) reordering has been seen in the connection (b) a DSACKed range does not match the most recent TLP retransmit Signed-off-by: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Priyaranjan Jha Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 ++++++++- net/ipv4/tcp_recovery.c | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 98408d520c32..3f7bd7ae7d7a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1001,7 +1001,14 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq, return 0; tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; - tp->rack.dsack_seen = 1; + /* We increase the RACK ordering window in rounds where we receive + * DSACKs that may have been due to reordering causing RACK to trigger + * a spurious fast recovery. Thus RACK ignores DSACKs that happen + * without having seen reordering, or that match TLP probes (TLP + * is timer-driven, not triggered by RACK). + */ + if (tp->reord_seen && !(state->flag & FLAG_DSACK_TLP)) + tp->rack.dsack_seen = 1; state->flag |= FLAG_DSACKING_ACK; /* A spurious retransmission is delivered */ diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 6f1b4ac7fe99..fd113f6226ef 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -172,7 +172,8 @@ void tcp_rack_reo_timeout(struct sock *sk) /* Updates the RACK's reo_wnd based on DSACK and no. of recoveries. * - * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded + * If a DSACK is received that seems like it may have been due to reordering + * triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded * by srtt), since there is possibility that spurious retransmission was * due to reordering delay longer than reo_wnd. *