tcp: accecn: add AccECN rx byte counters

These three byte counters track IP ECN field payload byte sums for
all arriving (acceptable) packets for ECT0, ECT1, and CE. The
AccECN option (added by a later patch in the series) echoes these
counters back to sender side; therefore, it is placed within the
group of tcp_sock_write_txrx.

Below are the pahole outcomes before and after this patch, in which
the group size of tcp_sock_write_txrx is increased from 95 + 4 to
107 + 4 and an extra 4-byte hole is created but will be exploited
in later patches:

[BEFORE THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ce;         /*  2576     4 */
    u32                        received_ce;          /*  2580     4 */
    u32                        app_limited;          /*  2584     4 */
    u32                        rcv_wnd;              /*  2588     4 */
    struct tcp_options_received rx_opt;              /*  2592    24 */
    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2616     0 */

    [...]
    /* size: 3200, cachelines: 50, members: 166 */
}

[AFTER THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ce;         /*  2576     4 */
    u32                        received_ce;          /*  2580     4 */
    u32                        received_ecn_bytes[3];/*  2584    12 */
    u32                        app_limited;          /*  2596     4 */
    u32                        rcv_wnd;              /*  2600     4 */
    struct tcp_options_received rx_opt;              /*  2604    24 */
    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2628     0 */
    /* XXX 4 bytes hole, try to pack */

    [...]
    /* size: 3200, cachelines: 50, members: 167 */
}

Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Co-developed-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250916082434.100722-4-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Ilpo Järvinen
2025-09-16 10:24:27 +02:00
committed by Paolo Abeni
parent 3cae34274c
commit 9a01127744
6 changed files with 40 additions and 6 deletions

View File

@@ -102,6 +102,7 @@ u32 prr_out read_mostly read_m
u32 delivered read_mostly read_write tcp_rate_skb_sent, tcp_newly_delivered(tx);tcp_ack, tcp_rate_gen, tcp_clean_rtx_queue (rx)
u32 delivered_ce read_mostly read_write tcp_rate_skb_sent(tx);tcp_rate_gen(rx)
u32 received_ce read_mostly read_write
u32[3] received_ecn_bytes read_mostly read_write
u8:4 received_ce_pending read_mostly read_write
u8:2 syn_ect_snt write_mostly read_write
u8:2 syn_ect_rcv read_mostly read_write

View File

@@ -306,6 +306,10 @@ struct tcp_sock {
u32 delivered; /* Total data packets delivered incl. rexmits */
u32 delivered_ce; /* Like the above but only ECE marked packets */
u32 received_ce; /* Like the above but for rcvd CE marked pkts */
u32 received_ecn_bytes[3]; /* received byte counters for three ECN
* types: INET_ECN_ECT_1, INET_ECN_ECT_0,
* and INET_ECN_CE
*/
u32 app_limited; /* limited until "delivered" reaches this val */
u32 rcv_wnd; /* Current receiver window */
/*

View File

@@ -171,7 +171,7 @@ static inline void tcp_accecn_third_ack(struct sock *sk,
/* Updates Accurate ECN received counters from the received IP ECN field */
static inline void tcp_ecn_received_counters(struct sock *sk,
const struct sk_buff *skb)
const struct sk_buff *skb, u32 len)
{
u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
u8 is_ce = INET_ECN_is_ce(ecnfield);
@@ -191,9 +191,24 @@ static inline void tcp_ecn_received_counters(struct sock *sk,
tp->received_ce += pcount;
tp->received_ce_pending = min(tp->received_ce_pending + pcount,
0xfU);
if (len > 0)
tp->received_ecn_bytes[ecnfield - 1] += len;
}
}
/* AccECN specification, 2.2: [...] A Data Receiver maintains four counters
* initialized at the start of the half-connection. [...] These byte counters
* reflect only the TCP payload length, excluding TCP header and TCP options.
*/
static inline void tcp_ecn_received_counters_payload(struct sock *sk,
const struct sk_buff *skb)
{
const struct tcphdr *th = (const struct tcphdr *)skb->data;
tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4);
}
/* AccECN specification, 5.1: [...] a server can determine that it
* negotiated AccECN as [...] if the ACK contains an ACE field with
* the value 0b010 to 0b111 (decimal 2 to 7).
@@ -232,10 +247,22 @@ static inline bool tcp_accecn_syn_requested(const struct tcphdr *th)
return ace && ace != 0x3;
}
static inline void __tcp_accecn_init_bytes_counters(int *counter_array)
{
BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1);
BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2);
BUILD_BUG_ON(INET_ECN_CE != 0x3);
counter_array[INET_ECN_ECT_1 - 1] = 0;
counter_array[INET_ECN_ECT_0 - 1] = 0;
counter_array[INET_ECN_CE - 1] = 0;
}
static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
{
tp->received_ce = 0;
tp->received_ce_pending = 0;
__tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
}
/* Used for make_synack to form the ACE flags */

View File

@@ -5142,6 +5142,7 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ce);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
@@ -5149,7 +5150,7 @@ static void __init tcp_struct_check(void)
/* 32bit arches with 8byte alignment on u64 fields might need padding
* before tcp_clock_cache.
*/
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 95 + 4);
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 107 + 4);
/* RX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);

View File

@@ -6163,7 +6163,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
flag |= __tcp_replace_ts_recent(tp,
delta);
tcp_ecn_received_counters(sk, skb);
tcp_ecn_received_counters(sk, skb, 0);
/* We know that such packets are checksummed
* on entry.
@@ -6213,7 +6213,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
/* Bulk data transfer: receiver */
tcp_cleanup_skb(skb);
__skb_pull(skb, tcp_header_len);
tcp_ecn_received_counters(sk, skb);
tcp_ecn_received_counters(sk, skb,
len - tcp_header_len);
eaten = tcp_queue_rcv(sk, skb, &fragstolen);
tcp_event_data_recv(sk, skb);
@@ -6254,7 +6255,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
return;
step5:
tcp_ecn_received_counters(sk, skb);
tcp_ecn_received_counters_payload(sk, skb);
reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
if ((int)reason < 0) {

View File

@@ -463,7 +463,7 @@ static void tcp_ecn_openreq_child(struct sock *sk,
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
tp->syn_ect_snt = treq->syn_ect_snt;
tcp_accecn_third_ack(sk, skb, treq->syn_ect_snt);
tcp_ecn_received_counters(sk, skb);
tcp_ecn_received_counters_payload(sk, skb);
} else {
tcp_ecn_mode_set(tp, inet_rsk(req)->ecn_ok ?
TCP_ECN_MODE_RFC3168 :