mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-04-04 13:34:11 -04:00
tcp: accecn: add AccECN rx byte counters
These three byte counters track IP ECN field payload byte sums for
all arriving (acceptable) packets for ECT0, ECT1, and CE. The
AccECN option (added by a later patch in the series) echoes these
counters back to sender side; therefore, it is placed within the
group of tcp_sock_write_txrx.
Below are the pahole outcomes before and after this patch, in which
the group size of tcp_sock_write_txrx is increased from 95 + 4 to
107 + 4 and an extra 4-byte hole is created but will be exploited
in later patches:
[BEFORE THIS PATCH]
struct tcp_sock {
[...]
u32 delivered_ce; /* 2576 4 */
u32 received_ce; /* 2580 4 */
u32 app_limited; /* 2584 4 */
u32 rcv_wnd; /* 2588 4 */
struct tcp_options_received rx_opt; /* 2592 24 */
__cacheline_group_end__tcp_sock_write_txrx[0]; /* 2616 0 */
[...]
/* size: 3200, cachelines: 50, members: 166 */
}
[AFTER THIS PATCH]
struct tcp_sock {
[...]
u32 delivered_ce; /* 2576 4 */
u32 received_ce; /* 2580 4 */
u32 received_ecn_bytes[3];/* 2584 12 */
u32 app_limited; /* 2596 4 */
u32 rcv_wnd; /* 2600 4 */
struct tcp_options_received rx_opt; /* 2604 24 */
__cacheline_group_end__tcp_sock_write_txrx[0]; /* 2628 0 */
/* XXX 4 bytes hole, try to pack */
[...]
/* size: 3200, cachelines: 50, members: 167 */
}
Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Co-developed-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250916082434.100722-4-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
committed by
Paolo Abeni
parent
3cae34274c
commit
9a01127744
@@ -102,6 +102,7 @@ u32 prr_out read_mostly read_m
|
||||
u32 delivered read_mostly read_write tcp_rate_skb_sent, tcp_newly_delivered(tx);tcp_ack, tcp_rate_gen, tcp_clean_rtx_queue (rx)
|
||||
u32 delivered_ce read_mostly read_write tcp_rate_skb_sent(tx);tcp_rate_gen(rx)
|
||||
u32 received_ce read_mostly read_write
|
||||
u32[3] received_ecn_bytes read_mostly read_write
|
||||
u8:4 received_ce_pending read_mostly read_write
|
||||
u8:2 syn_ect_snt write_mostly read_write
|
||||
u8:2 syn_ect_rcv read_mostly read_write
|
||||
|
||||
@@ -306,6 +306,10 @@ struct tcp_sock {
|
||||
u32 delivered; /* Total data packets delivered incl. rexmits */
|
||||
u32 delivered_ce; /* Like the above but only ECE marked packets */
|
||||
u32 received_ce; /* Like the above but for rcvd CE marked pkts */
|
||||
u32 received_ecn_bytes[3]; /* received byte counters for three ECN
|
||||
* types: INET_ECN_ECT_1, INET_ECN_ECT_0,
|
||||
* and INET_ECN_CE
|
||||
*/
|
||||
u32 app_limited; /* limited until "delivered" reaches this val */
|
||||
u32 rcv_wnd; /* Current receiver window */
|
||||
/*
|
||||
|
||||
@@ -171,7 +171,7 @@ static inline void tcp_accecn_third_ack(struct sock *sk,
|
||||
|
||||
/* Updates Accurate ECN received counters from the received IP ECN field */
|
||||
static inline void tcp_ecn_received_counters(struct sock *sk,
|
||||
const struct sk_buff *skb)
|
||||
const struct sk_buff *skb, u32 len)
|
||||
{
|
||||
u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
|
||||
u8 is_ce = INET_ECN_is_ce(ecnfield);
|
||||
@@ -191,9 +191,24 @@ static inline void tcp_ecn_received_counters(struct sock *sk,
|
||||
tp->received_ce += pcount;
|
||||
tp->received_ce_pending = min(tp->received_ce_pending + pcount,
|
||||
0xfU);
|
||||
|
||||
if (len > 0)
|
||||
tp->received_ecn_bytes[ecnfield - 1] += len;
|
||||
}
|
||||
}
|
||||
|
||||
/* AccECN specification, 2.2: [...] A Data Receiver maintains four counters
|
||||
* initialized at the start of the half-connection. [...] These byte counters
|
||||
* reflect only the TCP payload length, excluding TCP header and TCP options.
|
||||
*/
|
||||
static inline void tcp_ecn_received_counters_payload(struct sock *sk,
|
||||
const struct sk_buff *skb)
|
||||
{
|
||||
const struct tcphdr *th = (const struct tcphdr *)skb->data;
|
||||
|
||||
tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4);
|
||||
}
|
||||
|
||||
/* AccECN specification, 5.1: [...] a server can determine that it
|
||||
* negotiated AccECN as [...] if the ACK contains an ACE field with
|
||||
* the value 0b010 to 0b111 (decimal 2 to 7).
|
||||
@@ -232,10 +247,22 @@ static inline bool tcp_accecn_syn_requested(const struct tcphdr *th)
|
||||
return ace && ace != 0x3;
|
||||
}
|
||||
|
||||
static inline void __tcp_accecn_init_bytes_counters(int *counter_array)
|
||||
{
|
||||
BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1);
|
||||
BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2);
|
||||
BUILD_BUG_ON(INET_ECN_CE != 0x3);
|
||||
|
||||
counter_array[INET_ECN_ECT_1 - 1] = 0;
|
||||
counter_array[INET_ECN_ECT_0 - 1] = 0;
|
||||
counter_array[INET_ECN_CE - 1] = 0;
|
||||
}
|
||||
|
||||
static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
|
||||
{
|
||||
tp->received_ce = 0;
|
||||
tp->received_ce_pending = 0;
|
||||
__tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
|
||||
}
|
||||
|
||||
/* Used for make_synack to form the ACE flags */
|
||||
|
||||
@@ -5142,6 +5142,7 @@ static void __init tcp_struct_check(void)
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ce);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
|
||||
@@ -5149,7 +5150,7 @@ static void __init tcp_struct_check(void)
|
||||
/* 32bit arches with 8byte alignment on u64 fields might need padding
|
||||
* before tcp_clock_cache.
|
||||
*/
|
||||
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 95 + 4);
|
||||
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 107 + 4);
|
||||
|
||||
/* RX read-write hotpath cache lines */
|
||||
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
|
||||
|
||||
@@ -6163,7 +6163,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
|
||||
flag |= __tcp_replace_ts_recent(tp,
|
||||
delta);
|
||||
|
||||
tcp_ecn_received_counters(sk, skb);
|
||||
tcp_ecn_received_counters(sk, skb, 0);
|
||||
|
||||
/* We know that such packets are checksummed
|
||||
* on entry.
|
||||
@@ -6213,7 +6213,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
|
||||
/* Bulk data transfer: receiver */
|
||||
tcp_cleanup_skb(skb);
|
||||
__skb_pull(skb, tcp_header_len);
|
||||
tcp_ecn_received_counters(sk, skb);
|
||||
tcp_ecn_received_counters(sk, skb,
|
||||
len - tcp_header_len);
|
||||
eaten = tcp_queue_rcv(sk, skb, &fragstolen);
|
||||
|
||||
tcp_event_data_recv(sk, skb);
|
||||
@@ -6254,7 +6255,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
|
||||
return;
|
||||
|
||||
step5:
|
||||
tcp_ecn_received_counters(sk, skb);
|
||||
tcp_ecn_received_counters_payload(sk, skb);
|
||||
|
||||
reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
|
||||
if ((int)reason < 0) {
|
||||
|
||||
@@ -463,7 +463,7 @@ static void tcp_ecn_openreq_child(struct sock *sk,
|
||||
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
|
||||
tp->syn_ect_snt = treq->syn_ect_snt;
|
||||
tcp_accecn_third_ack(sk, skb, treq->syn_ect_snt);
|
||||
tcp_ecn_received_counters(sk, skb);
|
||||
tcp_ecn_received_counters_payload(sk, skb);
|
||||
} else {
|
||||
tcp_ecn_mode_set(tp, inet_rsk(req)->ecn_ok ?
|
||||
TCP_ECN_MODE_RFC3168 :
|
||||
|
||||
Reference in New Issue
Block a user