tcp: try to avoid safer when ACKs are thinned

Add newly acked pkts EWMA. When ACK thinning occurs, select
between safer and unsafe cep delta in AccECN processing based
on it. If the packets ACKed per ACK tends to be large, don't
conservatively assume ACE field overflow.

This patch uses the existing 2-byte holes in the rx group for new
u16 variables withtout creating more holes. Below are the pahole
outcomes before and after this patch:

[BEFORE THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ecn_bytes[3]; /*  2744    12 */
    /* XXX 4 bytes hole, try to pack */

    [...]
    __cacheline_group_end__tcp_sock_write_rx[0];       /*  2816     0 */

    [...]
    /* size: 3264, cachelines: 51, members: 177 */
}

[AFTER THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ecn_bytes[3]; /*  2744    12 */
    u16                        pkts_acked_ewma;        /*  2756     2 */
    /* XXX 2 bytes hole, try to pack */

    [...]
    __cacheline_group_end__tcp_sock_write_rx[0];       /*  2816     0 */

    [...]
    /* size: 3264, cachelines: 51, members: 178 */
}

Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Co-developed-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260131222515.8485-2-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Ilpo Järvinen
2026-01-31 23:25:01 +01:00
committed by Paolo Abeni
parent 72163a1909
commit 7885ce0147
4 changed files with 23 additions and 1 deletions

View File

@@ -105,6 +105,7 @@ u32 received_ce read_mostly read_w
u32[3] received_ecn_bytes read_mostly read_write
u8:4 received_ce_pending read_mostly read_write
u32[3] delivered_ecn_bytes read_write
u16 pkts_acked_ewma read_write
u8:2 syn_ect_snt write_mostly read_write
u8:2 syn_ect_rcv read_mostly read_write
u8:2 accecn_minlen write_mostly read_write

View File

@@ -342,6 +342,7 @@ struct tcp_sock {
u32 rate_interval_us; /* saved rate sample: time elapsed */
u32 rcv_rtt_last_tsecr;
u32 delivered_ecn_bytes[3];
u16 pkts_acked_ewma;/* Pkts acked EWMA for AccECN cep heuristic */
u64 first_tx_mstamp; /* start of window send phase */
u64 delivered_mstamp; /* time we reached "delivered" */
u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked

View File

@@ -3470,6 +3470,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_accecn_init_counters(tp);
tp->prev_ecnfield = 0;
tp->accecn_opt_tstamp = 0;
tp->pkts_acked_ewma = 0;
if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release)
icsk->icsk_ca_ops->release(sk);
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
@@ -5243,6 +5244,7 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_interval_us);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_last_tsecr);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_ecn_bytes);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, pkts_acked_ewma);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, first_tx_mstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_mstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked);

View File

@@ -488,6 +488,10 @@ static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
tcp_count_delivered_ce(tp, delivered);
}
#define PKTS_ACKED_WEIGHT 6
#define PKTS_ACKED_PREC 6
#define ACK_COMP_THRESH 4
/* Returns the ECN CE delta */
static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
u32 delivered_pkts, u32 delivered_bytes,
@@ -499,6 +503,7 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
u32 delta, safe_delta, d_ceb;
bool opt_deltas_valid;
u32 corrected_ace;
u32 ewma;
/* Reordered ACK or uncertain due to lack of data to send and ts */
if (!(flag & (FLAG_FORWARD_PROGRESS | FLAG_TS_PROGRESS)))
@@ -507,6 +512,18 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
opt_deltas_valid = tcp_accecn_process_option(tp, skb,
delivered_bytes, flag);
if (delivered_pkts) {
if (!tp->pkts_acked_ewma) {
ewma = delivered_pkts << PKTS_ACKED_PREC;
} else {
ewma = tp->pkts_acked_ewma;
ewma = (((ewma << PKTS_ACKED_WEIGHT) - ewma) +
(delivered_pkts << PKTS_ACKED_PREC)) >>
PKTS_ACKED_WEIGHT;
}
tp->pkts_acked_ewma = min_t(u32, ewma, 0xFFFFU);
}
if (!(flag & FLAG_SLOWPATH)) {
/* AccECN counter might overflow on large ACKs */
if (delivered_pkts <= TCP_ACCECN_CEP_ACE_MASK)
@@ -555,7 +572,8 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
if (d_ceb <
safe_delta * tp->mss_cache >> TCP_ACCECN_SAFETY_SHIFT)
return delta;
}
} else if (tp->pkts_acked_ewma > (ACK_COMP_THRESH << PKTS_ACKED_PREC))
return delta;
return safe_delta;
}