mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-03 19:26:01 -04:00
Merge branch 'tcp-accecn'
Chia-Yu Chang says: ==================== AccECN protocol preparation patch series Please find the v7 v7 (03-Mar-2025) - Move 2 new patches added in v6 to the next AccECN patch series v6 (27-Dec-2024) - Avoid removing removing the potential CA_ACK_WIN_UPDATE in ack_ev_flags of patch #1 (Eric Dumazet <edumazet@google.com>) - Add reviewed-by tag in patches #2, #3, #4, #5, #6, #7, #8, #12, #14 - Foloiwng 2 new pathces are added after patch #9 (Patch that adds SKB_GSO_TCP_ACCECN) * New patch #10 to replace exisiting SKB_GSO_TCP_ECN with SKB_GSO_TCP_ACCECN in the driver to avoid CWR flag corruption * New patch #11 adds AccECN for virtio by adding new negotiation flag (VIRTIO_NET_F_HOST/GUEST_ACCECN) in feature handshake and translating Accurate ECN GSO flag between virtio_net_hdr (VIRTIO_NET_HDR_GSO_ACCECN) and skb header (SKB_GSO_TCP_ACCECN) - Add detailed changelog and comments in #13 (Eric Dumazet <edumazet@google.com>) - Move patch #14 to the next AccECN patch series (Eric Dumazet <edumazet@google.com>) v5 (5-Nov-2024) - Add helper function "tcp_flags_ntohs" to preserve last 2 bytes of TCP flags of patch #4 (Paolo Abeni <pabeni@redhat.com>) - Fix reverse X-max tree order of patches #4, #11 (Paolo Abeni <pabeni@redhat.com>) - Rename variable "delta" as "timestamp_delta" of patch #2 fo clariety - Remove patch #14 in this series (Paolo Abeni <pabeni@redhat.com>, Joel Granados <joel.granados@kernel.org>) v4 (21-Oct-2024) - Fix line length warning of patches #2, #4, #8, #10, #11, #14 - Fix spaces preferred around '|' (ctx:VxV) warning of patch #7 - Add missing CC'ed of patches #4, #12, #14 v3 (19-Oct-2024) - Fix build error in v2 v2 (18-Oct-2024) - Fix warning caused by NETIF_F_GSO_ACCECN_BIT in patch #9 (Jakub Kicinski <kuba@kernel.org>) The full patch series can be found in https://github.com/L4STeam/linux-net-next/commits/upstream_l4steam/ The Accurate ECN draft can be found in https://datatracker.ietf.org/doc/html/draft-ietf-tcpm-accurate-ecn-28 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
@@ -53,12 +53,12 @@ enum {
|
||||
NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */
|
||||
NETIF_F_GSO_UDP_L4_BIT, /* ... UDP payload GSO (not UFO) */
|
||||
NETIF_F_GSO_FRAGLIST_BIT, /* ... Fraglist GSO */
|
||||
NETIF_F_GSO_ACCECN_BIT, /* TCP AccECN w/ TSO (no clear CWR) */
|
||||
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
|
||||
NETIF_F_GSO_FRAGLIST_BIT,
|
||||
NETIF_F_GSO_ACCECN_BIT,
|
||||
|
||||
NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
|
||||
NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */
|
||||
__UNUSED_NETIF_F_37,
|
||||
NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */
|
||||
NETIF_F_RXHASH_BIT, /* Receive hashing offload */
|
||||
NETIF_F_RXCSUM_BIT, /* Receive checksumming offload */
|
||||
@@ -128,6 +128,7 @@ enum {
|
||||
#define NETIF_F_SG __NETIF_F(SG)
|
||||
#define NETIF_F_TSO6 __NETIF_F(TSO6)
|
||||
#define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN)
|
||||
#define NETIF_F_GSO_ACCECN __NETIF_F(GSO_ACCECN)
|
||||
#define NETIF_F_TSO __NETIF_F(TSO)
|
||||
#define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED)
|
||||
#define NETIF_F_RXFCS __NETIF_F(RXFCS)
|
||||
@@ -210,7 +211,8 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
|
||||
NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID)
|
||||
|
||||
/* List of features with software fallbacks. */
|
||||
#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \
|
||||
#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | \
|
||||
NETIF_F_GSO_ACCECN | NETIF_F_GSO_SCTP | \
|
||||
NETIF_F_GSO_UDP_L4 | NETIF_F_GSO_FRAGLIST)
|
||||
|
||||
/*
|
||||
|
||||
@@ -5269,6 +5269,8 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
|
||||
BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT));
|
||||
BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT));
|
||||
BUILD_BUG_ON(SKB_GSO_FRAGLIST != (NETIF_F_GSO_FRAGLIST >> NETIF_F_GSO_SHIFT));
|
||||
BUILD_BUG_ON(SKB_GSO_TCP_ACCECN !=
|
||||
(NETIF_F_GSO_ACCECN >> NETIF_F_GSO_SHIFT));
|
||||
|
||||
return (features & feature) == feature;
|
||||
}
|
||||
|
||||
@@ -708,6 +708,8 @@ enum {
|
||||
SKB_GSO_UDP_L4 = 1 << 17,
|
||||
|
||||
SKB_GSO_FRAGLIST = 1 << 18,
|
||||
|
||||
SKB_GSO_TCP_ACCECN = 1 << 19,
|
||||
};
|
||||
|
||||
#if BITS_PER_LONG > 32
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <linux/kref.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/indirect_call_wrapper.h>
|
||||
#include <linux/bits.h>
|
||||
|
||||
#include <net/inet_connection_sock.h>
|
||||
#include <net/inet_timewait_sock.h>
|
||||
@@ -373,16 +374,53 @@ static inline void tcp_dec_quickack_mode(struct sock *sk)
|
||||
}
|
||||
}
|
||||
|
||||
#define TCP_ECN_OK 1
|
||||
#define TCP_ECN_QUEUE_CWR 2
|
||||
#define TCP_ECN_DEMAND_CWR 4
|
||||
#define TCP_ECN_SEEN 8
|
||||
#define TCP_ECN_MODE_RFC3168 BIT(0)
|
||||
#define TCP_ECN_QUEUE_CWR BIT(1)
|
||||
#define TCP_ECN_DEMAND_CWR BIT(2)
|
||||
#define TCP_ECN_SEEN BIT(3)
|
||||
#define TCP_ECN_MODE_ACCECN BIT(4)
|
||||
|
||||
#define TCP_ECN_DISABLED 0
|
||||
#define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN)
|
||||
#define TCP_ECN_MODE_ANY (TCP_ECN_MODE_RFC3168 | TCP_ECN_MODE_ACCECN)
|
||||
|
||||
static inline bool tcp_ecn_mode_any(const struct tcp_sock *tp)
|
||||
{
|
||||
return tp->ecn_flags & TCP_ECN_MODE_ANY;
|
||||
}
|
||||
|
||||
static inline bool tcp_ecn_mode_rfc3168(const struct tcp_sock *tp)
|
||||
{
|
||||
return (tp->ecn_flags & TCP_ECN_MODE_ANY) == TCP_ECN_MODE_RFC3168;
|
||||
}
|
||||
|
||||
static inline bool tcp_ecn_mode_accecn(const struct tcp_sock *tp)
|
||||
{
|
||||
return (tp->ecn_flags & TCP_ECN_MODE_ANY) == TCP_ECN_MODE_ACCECN;
|
||||
}
|
||||
|
||||
static inline bool tcp_ecn_disabled(const struct tcp_sock *tp)
|
||||
{
|
||||
return !tcp_ecn_mode_any(tp);
|
||||
}
|
||||
|
||||
static inline bool tcp_ecn_mode_pending(const struct tcp_sock *tp)
|
||||
{
|
||||
return (tp->ecn_flags & TCP_ECN_MODE_PENDING) == TCP_ECN_MODE_PENDING;
|
||||
}
|
||||
|
||||
static inline void tcp_ecn_mode_set(struct tcp_sock *tp, u8 mode)
|
||||
{
|
||||
tp->ecn_flags &= ~TCP_ECN_MODE_ANY;
|
||||
tp->ecn_flags |= mode;
|
||||
}
|
||||
|
||||
enum tcp_tw_status {
|
||||
TCP_TW_SUCCESS = 0,
|
||||
TCP_TW_RST = 1,
|
||||
TCP_TW_ACK = 2,
|
||||
TCP_TW_SYN = 3
|
||||
TCP_TW_SYN = 3,
|
||||
TCP_TW_ACK_OOW = 4
|
||||
};
|
||||
|
||||
|
||||
@@ -669,7 +707,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority,
|
||||
enum sk_rst_reason reason);
|
||||
int tcp_send_synack(struct sock *);
|
||||
void tcp_push_one(struct sock *, unsigned int mss_now);
|
||||
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
|
||||
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt, u16 flags);
|
||||
void tcp_send_ack(struct sock *sk);
|
||||
void tcp_send_delayed_ack(struct sock *sk);
|
||||
void tcp_send_loss_probe(struct sock *sk);
|
||||
@@ -934,15 +972,22 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq)
|
||||
|
||||
#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
|
||||
|
||||
#define TCPHDR_FIN 0x01
|
||||
#define TCPHDR_SYN 0x02
|
||||
#define TCPHDR_RST 0x04
|
||||
#define TCPHDR_PSH 0x08
|
||||
#define TCPHDR_ACK 0x10
|
||||
#define TCPHDR_URG 0x20
|
||||
#define TCPHDR_ECE 0x40
|
||||
#define TCPHDR_CWR 0x80
|
||||
#define TCPHDR_FIN BIT(0)
|
||||
#define TCPHDR_SYN BIT(1)
|
||||
#define TCPHDR_RST BIT(2)
|
||||
#define TCPHDR_PSH BIT(3)
|
||||
#define TCPHDR_ACK BIT(4)
|
||||
#define TCPHDR_URG BIT(5)
|
||||
#define TCPHDR_ECE BIT(6)
|
||||
#define TCPHDR_CWR BIT(7)
|
||||
#define TCPHDR_AE BIT(8)
|
||||
#define TCPHDR_FLAGS_MASK (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \
|
||||
TCPHDR_PSH | TCPHDR_ACK | TCPHDR_URG | \
|
||||
TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)
|
||||
#define tcp_flags_ntohs(th) (ntohs(*(__be16 *)&tcp_flag_word(th)) & \
|
||||
TCPHDR_FLAGS_MASK)
|
||||
|
||||
#define TCPHDR_ACE (TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)
|
||||
#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR)
|
||||
|
||||
/* State flags for sacked in struct tcp_skb_cb */
|
||||
@@ -977,7 +1022,7 @@ struct tcp_skb_cb {
|
||||
u16 tcp_gso_size;
|
||||
};
|
||||
};
|
||||
__u8 tcp_flags; /* TCP header flags. (tcp[13]) */
|
||||
__u16 tcp_flags; /* TCP header flags (tcp[12-13])*/
|
||||
|
||||
__u8 sacked; /* State flags for SACK. */
|
||||
__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
|
||||
@@ -1132,9 +1177,9 @@ enum tcp_ca_ack_event_flags {
|
||||
#define TCP_CA_UNSPEC 0
|
||||
|
||||
/* Algorithm can be set on socket without CAP_NET_ADMIN privileges */
|
||||
#define TCP_CONG_NON_RESTRICTED 0x1
|
||||
#define TCP_CONG_NON_RESTRICTED BIT(0)
|
||||
/* Requires ECN/ECT set on all packets */
|
||||
#define TCP_CONG_NEEDS_ECN 0x2
|
||||
#define TCP_CONG_NEEDS_ECN BIT(1)
|
||||
#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
|
||||
|
||||
union tcp_cc_info;
|
||||
|
||||
@@ -28,7 +28,8 @@ struct tcphdr {
|
||||
__be32 seq;
|
||||
__be32 ack_seq;
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u16 res1:4,
|
||||
__u16 ae:1,
|
||||
res1:3,
|
||||
doff:4,
|
||||
fin:1,
|
||||
syn:1,
|
||||
@@ -40,7 +41,8 @@ struct tcphdr {
|
||||
cwr:1;
|
||||
#elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
__u16 doff:4,
|
||||
res1:4,
|
||||
res1:3,
|
||||
ae:1,
|
||||
cwr:1,
|
||||
ece:1,
|
||||
urg:1,
|
||||
@@ -70,6 +72,7 @@ union tcp_word_hdr {
|
||||
#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3])
|
||||
|
||||
enum {
|
||||
TCP_FLAG_AE = __constant_cpu_to_be32(0x01000000),
|
||||
TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000),
|
||||
TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000),
|
||||
TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000),
|
||||
@@ -78,7 +81,7 @@ enum {
|
||||
TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000),
|
||||
TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000),
|
||||
TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000),
|
||||
TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000),
|
||||
TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0E000000),
|
||||
TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000)
|
||||
};
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
|
||||
[NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
|
||||
[NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
|
||||
[NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
|
||||
[NETIF_F_GSO_ACCECN_BIT] = "tx-tcp-accecn-segmentation",
|
||||
[NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation",
|
||||
[NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
|
||||
[NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
|
||||
|
||||
@@ -121,7 +121,7 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
|
||||
BPF_CALL_2(bpf_tcp_send_ack, struct tcp_sock *, tp, u32, rcv_nxt)
|
||||
{
|
||||
/* bpf_tcp_ca prog cannot have NULL tp */
|
||||
__tcp_send_ack((struct sock *)tp, rcv_nxt);
|
||||
__tcp_send_ack((struct sock *)tp, rcv_nxt, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -75,7 +75,6 @@
|
||||
#include <net/checksum.h>
|
||||
#include <net/gso.h>
|
||||
#include <net/inetpeer.h>
|
||||
#include <net/inet_ecn.h>
|
||||
#include <net/lwtunnel.h>
|
||||
#include <net/inet_dscp.h>
|
||||
#include <linux/bpf-cgroup.h>
|
||||
@@ -1640,7 +1639,7 @@ void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk,
|
||||
if (IS_ERR(rt))
|
||||
return;
|
||||
|
||||
inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK;
|
||||
inet_sk(sk)->tos = arg->tos;
|
||||
|
||||
sk->sk_protocol = ip_hdr(skb)->protocol;
|
||||
sk->sk_bound_dev_if = arg->bound_dev_if;
|
||||
|
||||
@@ -4138,7 +4138,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
|
||||
info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
|
||||
}
|
||||
|
||||
if (tp->ecn_flags & TCP_ECN_OK)
|
||||
if (tcp_ecn_mode_any(tp))
|
||||
info->tcpi_options |= TCPI_OPT_ECN;
|
||||
if (tp->ecn_flags & TCP_ECN_SEEN)
|
||||
info->tcpi_options |= TCPI_OPT_ECN_SEEN;
|
||||
|
||||
@@ -90,7 +90,7 @@ __bpf_kfunc static void dctcp_init(struct sock *sk)
|
||||
{
|
||||
const struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
if ((tp->ecn_flags & TCP_ECN_OK) ||
|
||||
if (tcp_ecn_mode_any(tp) ||
|
||||
(sk->sk_state == TCP_LISTEN ||
|
||||
sk->sk_state == TCP_CLOSE)) {
|
||||
struct dctcp *ca = inet_csk_ca(sk);
|
||||
|
||||
@@ -28,7 +28,7 @@ static inline void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
|
||||
*/
|
||||
if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
|
||||
dctcp_ece_ack_cwr(sk, *ce_state);
|
||||
__tcp_send_ack(sk, *prior_rcv_nxt);
|
||||
__tcp_send_ack(sk, *prior_rcv_nxt, 0);
|
||||
}
|
||||
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
|
||||
}
|
||||
|
||||
@@ -102,6 +102,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
|
||||
#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
|
||||
#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */
|
||||
#define FLAG_DSACK_TLP 0x20000 /* DSACK for tail loss probe */
|
||||
#define FLAG_TS_PROGRESS 0x40000 /* Positive timestamp delta */
|
||||
|
||||
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
|
||||
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
|
||||
@@ -341,7 +342,7 @@ static bool tcp_in_quickack_mode(struct sock *sk)
|
||||
|
||||
static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
|
||||
{
|
||||
if (tp->ecn_flags & TCP_ECN_OK)
|
||||
if (tcp_ecn_mode_rfc3168(tp))
|
||||
tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
|
||||
}
|
||||
|
||||
@@ -364,10 +365,13 @@ static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
|
||||
tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
|
||||
}
|
||||
|
||||
static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
|
||||
static void tcp_data_ecn_check(struct sock *sk, const struct sk_buff *skb)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
if (tcp_ecn_disabled(tp))
|
||||
return;
|
||||
|
||||
switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
|
||||
case INET_ECN_NOT_ECT:
|
||||
/* Funny extension: if ECT is not set on a segment,
|
||||
@@ -396,31 +400,39 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
|
||||
}
|
||||
}
|
||||
|
||||
static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
|
||||
{
|
||||
if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
|
||||
__tcp_ecn_check_ce(sk, skb);
|
||||
}
|
||||
|
||||
static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
|
||||
{
|
||||
if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
|
||||
tp->ecn_flags &= ~TCP_ECN_OK;
|
||||
if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || th->cwr))
|
||||
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
|
||||
}
|
||||
|
||||
static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
|
||||
{
|
||||
if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
|
||||
tp->ecn_flags &= ~TCP_ECN_OK;
|
||||
if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr))
|
||||
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
|
||||
}
|
||||
|
||||
static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
|
||||
{
|
||||
if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
|
||||
if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count)
|
||||
{
|
||||
tp->delivered_ce += ecn_count;
|
||||
}
|
||||
|
||||
/* Updates the delivered and delivered_ce counts */
|
||||
static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
|
||||
bool ece_ack)
|
||||
{
|
||||
tp->delivered += delivered;
|
||||
if (ece_ack)
|
||||
tcp_count_delivered_ce(tp, delivered);
|
||||
}
|
||||
|
||||
/* Buffer size and advertised window tuning.
|
||||
*
|
||||
* 1. Tuning sk->sk_sndbuf, when connection enters established state.
|
||||
@@ -859,7 +871,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
|
||||
icsk->icsk_ack.lrcvtime = now;
|
||||
tcp_save_lrcv_flowlabel(sk, skb);
|
||||
|
||||
tcp_ecn_check_ce(sk, skb);
|
||||
tcp_data_ecn_check(sk, skb);
|
||||
|
||||
if (skb->len >= 128)
|
||||
tcp_grow_window(sk, skb, true);
|
||||
@@ -1156,15 +1168,6 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
|
||||
}
|
||||
}
|
||||
|
||||
/* Updates the delivered and delivered_ce counts */
|
||||
static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
|
||||
bool ece_ack)
|
||||
{
|
||||
tp->delivered += delivered;
|
||||
if (ece_ack)
|
||||
tp->delivered_ce += delivered;
|
||||
}
|
||||
|
||||
/* This procedure tags the retransmission queue when SACKs arrive.
|
||||
*
|
||||
* We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
|
||||
@@ -3816,8 +3819,16 @@ static void tcp_store_ts_recent(struct tcp_sock *tp)
|
||||
tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
|
||||
}
|
||||
|
||||
static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
|
||||
static int __tcp_replace_ts_recent(struct tcp_sock *tp, s32 tstamp_delta)
|
||||
{
|
||||
tcp_store_ts_recent(tp);
|
||||
return tstamp_delta > 0 ? FLAG_TS_PROGRESS : 0;
|
||||
}
|
||||
|
||||
static int tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
|
||||
{
|
||||
s32 delta;
|
||||
|
||||
if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
|
||||
/* PAWS bug workaround wrt. ACK frames, the PAWS discard
|
||||
* extra check below makes sure this can only happen
|
||||
@@ -3826,9 +3837,13 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
|
||||
* Not only, also it occurs for expired timestamps.
|
||||
*/
|
||||
|
||||
if (tcp_paws_check(&tp->rx_opt, 0))
|
||||
tcp_store_ts_recent(tp);
|
||||
if (tcp_paws_check(&tp->rx_opt, 0)) {
|
||||
delta = tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent;
|
||||
return __tcp_replace_ts_recent(tp, delta);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This routine deals with acks during a TLP episode and ends an episode by
|
||||
@@ -3864,12 +3879,23 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
|
||||
static void tcp_in_ack_event(struct sock *sk, int flag)
|
||||
{
|
||||
const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
|
||||
if (icsk->icsk_ca_ops->in_ack_event)
|
||||
icsk->icsk_ca_ops->in_ack_event(sk, flags);
|
||||
if (icsk->icsk_ca_ops->in_ack_event) {
|
||||
u32 ack_ev_flags = 0;
|
||||
|
||||
if (flag & FLAG_WIN_UPDATE)
|
||||
ack_ev_flags |= CA_ACK_WIN_UPDATE;
|
||||
if (flag & FLAG_SLOWPATH) {
|
||||
ack_ev_flags |= CA_ACK_SLOWPATH;
|
||||
if (flag & FLAG_ECE)
|
||||
ack_ev_flags |= CA_ACK_ECE;
|
||||
}
|
||||
|
||||
icsk->icsk_ca_ops->in_ack_event(sk, ack_ev_flags);
|
||||
}
|
||||
}
|
||||
|
||||
/* Congestion control has updated the cwnd already. So if we're in
|
||||
@@ -3974,7 +4000,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
||||
* is in window.
|
||||
*/
|
||||
if (flag & FLAG_UPDATE_TS_RECENT)
|
||||
tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
|
||||
flag |= tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
|
||||
|
||||
if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
|
||||
FLAG_SND_UNA_ADVANCED) {
|
||||
@@ -3986,12 +4012,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
||||
tcp_snd_una_update(tp, ack);
|
||||
flag |= FLAG_WIN_UPDATE;
|
||||
|
||||
tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
|
||||
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
|
||||
} else {
|
||||
u32 ack_ev_flags = CA_ACK_SLOWPATH;
|
||||
|
||||
if (ack_seq != TCP_SKB_CB(skb)->end_seq)
|
||||
flag |= FLAG_DATA;
|
||||
else
|
||||
@@ -4003,19 +4025,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
||||
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
|
||||
&sack_state);
|
||||
|
||||
if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
|
||||
if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb)))
|
||||
flag |= FLAG_ECE;
|
||||
ack_ev_flags |= CA_ACK_ECE;
|
||||
}
|
||||
|
||||
if (sack_state.sack_delivered)
|
||||
tcp_count_delivered(tp, sack_state.sack_delivered,
|
||||
flag & FLAG_ECE);
|
||||
|
||||
if (flag & FLAG_WIN_UPDATE)
|
||||
ack_ev_flags |= CA_ACK_WIN_UPDATE;
|
||||
|
||||
tcp_in_ack_event(sk, ack_ev_flags);
|
||||
}
|
||||
|
||||
/* This is a deviation from RFC3168 since it states that:
|
||||
@@ -4042,6 +4057,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
||||
|
||||
tcp_rack_update_reo_wnd(sk, &rs);
|
||||
|
||||
tcp_in_ack_event(sk, flag);
|
||||
|
||||
if (tp->tlp_high_seq)
|
||||
tcp_process_tlp_ack(sk, ack, flag);
|
||||
|
||||
@@ -4073,6 +4090,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
||||
return 1;
|
||||
|
||||
no_queue:
|
||||
tcp_in_ack_event(sk, flag);
|
||||
/* If data was DSACKed, see if we can undo a cwnd reduction. */
|
||||
if (flag & FLAG_DSACKING_ACK) {
|
||||
tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
|
||||
@@ -5020,7 +5038,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
|
||||
bool fragstolen;
|
||||
|
||||
tcp_save_lrcv_flowlabel(sk, skb);
|
||||
tcp_ecn_check_ce(sk, skb);
|
||||
tcp_data_ecn_check(sk, skb);
|
||||
|
||||
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
|
||||
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
|
||||
@@ -6157,6 +6175,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
|
||||
TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
|
||||
!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
|
||||
int tcp_header_len = tp->tcp_header_len;
|
||||
s32 delta = 0;
|
||||
int flag = 0;
|
||||
|
||||
/* Timestamp header prediction: tcp_header_len
|
||||
* is automatically equal to th->doff*4 due to pred_flags
|
||||
@@ -6169,8 +6189,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
|
||||
if (!tcp_parse_aligned_timestamp(tp, th))
|
||||
goto slow_path;
|
||||
|
||||
delta = tp->rx_opt.rcv_tsval -
|
||||
tp->rx_opt.ts_recent;
|
||||
/* If PAWS failed, check it more carefully in slow path */
|
||||
if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
|
||||
if (delta < 0)
|
||||
goto slow_path;
|
||||
|
||||
/* DO NOT update ts_recent here, if checksum fails
|
||||
@@ -6190,12 +6212,13 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
|
||||
if (tcp_header_len ==
|
||||
(sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
|
||||
tp->rcv_nxt == tp->rcv_wup)
|
||||
tcp_store_ts_recent(tp);
|
||||
flag |= __tcp_replace_ts_recent(tp,
|
||||
delta);
|
||||
|
||||
/* We know that such packets are checksummed
|
||||
* on entry.
|
||||
*/
|
||||
tcp_ack(sk, skb, 0);
|
||||
tcp_ack(sk, skb, flag);
|
||||
__kfree_skb(skb);
|
||||
tcp_data_snd_check(sk);
|
||||
/* When receiving pure ack in fast path, update
|
||||
@@ -6226,7 +6249,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
|
||||
if (tcp_header_len ==
|
||||
(sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
|
||||
tp->rcv_nxt == tp->rcv_wup)
|
||||
tcp_store_ts_recent(tp);
|
||||
flag |= __tcp_replace_ts_recent(tp,
|
||||
delta);
|
||||
|
||||
tcp_rcv_rtt_measure_ts(sk, skb);
|
||||
|
||||
@@ -6241,7 +6265,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
|
||||
|
||||
if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
|
||||
/* Well, only one small jumplet in fast path... */
|
||||
tcp_ack(sk, skb, FLAG_DATA);
|
||||
tcp_ack(sk, skb, flag | FLAG_DATA);
|
||||
tcp_data_snd_check(sk);
|
||||
if (!inet_csk_ack_scheduled(sk))
|
||||
goto no_ack;
|
||||
|
||||
@@ -66,6 +66,7 @@
|
||||
#include <net/transp_v6.h>
|
||||
#include <net/ipv6.h>
|
||||
#include <net/inet_common.h>
|
||||
#include <net/inet_ecn.h>
|
||||
#include <net/timewait_sock.h>
|
||||
#include <net/xfrm.h>
|
||||
#include <net/secure_seq.h>
|
||||
@@ -887,7 +888,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
|
||||
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
|
||||
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
|
||||
|
||||
arg.tos = ip_hdr(skb)->tos;
|
||||
/* ECN bits of TW reset are cleared */
|
||||
arg.tos = ip_hdr(skb)->tos & ~INET_ECN_MASK;
|
||||
arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
|
||||
local_bh_disable();
|
||||
local_lock_nested_bh(&ipv4_tcp_sk.bh_lock);
|
||||
@@ -1033,11 +1035,21 @@ static void tcp_v4_send_ack(const struct sock *sk,
|
||||
local_bh_enable();
|
||||
}
|
||||
|
||||
static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
|
||||
static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb,
|
||||
enum tcp_tw_status tw_status)
|
||||
{
|
||||
struct inet_timewait_sock *tw = inet_twsk(sk);
|
||||
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
|
||||
struct tcp_key key = {};
|
||||
u8 tos = tw->tw_tos;
|
||||
|
||||
/* Cleaning only ECN bits of TW ACKs of oow data or is paws_reject,
|
||||
* while not cleaning ECN bits of other TW ACKs to avoid these ACKs
|
||||
* being placed in a different service queues (Classic rather than L4S)
|
||||
*/
|
||||
if (tw_status == TCP_TW_ACK_OOW)
|
||||
tos &= ~INET_ECN_MASK;
|
||||
|
||||
#ifdef CONFIG_TCP_AO
|
||||
struct tcp_ao_info *ao_info;
|
||||
|
||||
@@ -1081,7 +1093,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
|
||||
READ_ONCE(tcptw->tw_ts_recent),
|
||||
tw->tw_bound_dev_if, &key,
|
||||
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
|
||||
tw->tw_tos,
|
||||
tos,
|
||||
tw->tw_txhash);
|
||||
|
||||
inet_twsk_put(tw);
|
||||
@@ -1151,6 +1163,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
||||
key.type = TCP_KEY_MD5;
|
||||
}
|
||||
|
||||
/* Cleaning ECN bits of TW ACKs of oow data or is paws_reject */
|
||||
tcp_v4_send_ack(sk, skb, seq,
|
||||
tcp_rsk(req)->rcv_nxt,
|
||||
tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
|
||||
@@ -1158,7 +1171,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
||||
req->ts_recent,
|
||||
0, &key,
|
||||
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
|
||||
ip_hdr(skb)->tos,
|
||||
ip_hdr(skb)->tos & ~INET_ECN_MASK,
|
||||
READ_ONCE(tcp_rsk(req)->txhash));
|
||||
if (tcp_key_is_ao(&key))
|
||||
kfree(key.traffic_key);
|
||||
@@ -2051,7 +2064,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
|
||||
!((TCP_SKB_CB(tail)->tcp_flags &
|
||||
TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
|
||||
((TCP_SKB_CB(tail)->tcp_flags ^
|
||||
TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
|
||||
TCP_SKB_CB(skb)->tcp_flags) &
|
||||
(TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)) ||
|
||||
!tcp_skb_can_collapse_rx(tail, skb) ||
|
||||
thtail->doff != th->doff ||
|
||||
memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
|
||||
@@ -2159,7 +2173,7 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
|
||||
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
|
||||
skb->len - th->doff * 4);
|
||||
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
|
||||
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
|
||||
TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th);
|
||||
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
|
||||
TCP_SKB_CB(skb)->sacked = 0;
|
||||
TCP_SKB_CB(skb)->has_rxtstamp =
|
||||
@@ -2174,6 +2188,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
||||
{
|
||||
struct net *net = dev_net_rcu(skb->dev);
|
||||
enum skb_drop_reason drop_reason;
|
||||
enum tcp_tw_status tw_status;
|
||||
int sdif = inet_sdif(skb);
|
||||
int dif = inet_iif(skb);
|
||||
const struct iphdr *iph;
|
||||
@@ -2401,7 +2416,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
||||
inet_twsk_put(inet_twsk(sk));
|
||||
goto csum_error;
|
||||
}
|
||||
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
|
||||
|
||||
tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn);
|
||||
switch (tw_status) {
|
||||
case TCP_TW_SYN: {
|
||||
struct sock *sk2 = inet_lookup_listener(net,
|
||||
net->ipv4.tcp_death_row.hashinfo,
|
||||
@@ -2422,7 +2439,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
|
||||
/* to ACK */
|
||||
fallthrough;
|
||||
case TCP_TW_ACK:
|
||||
tcp_v4_timewait_ack(sk, skb);
|
||||
case TCP_TW_ACK_OOW:
|
||||
tcp_v4_timewait_ack(sk, skb, tw_status);
|
||||
break;
|
||||
case TCP_TW_RST:
|
||||
tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
|
||||
|
||||
@@ -44,7 +44,7 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
|
||||
/* Send ACK. Note, we do not put the bucket,
|
||||
* it will be released by caller.
|
||||
*/
|
||||
return TCP_TW_ACK;
|
||||
return TCP_TW_ACK_OOW;
|
||||
}
|
||||
|
||||
/* We are rate-limiting, so just release the tw sock and drop skb. */
|
||||
@@ -461,7 +461,9 @@ void tcp_openreq_init_rwin(struct request_sock *req,
|
||||
static void tcp_ecn_openreq_child(struct tcp_sock *tp,
|
||||
const struct request_sock *req)
|
||||
{
|
||||
tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0;
|
||||
tcp_ecn_mode_set(tp, inet_rsk(req)->ecn_ok ?
|
||||
TCP_ECN_MODE_RFC3168 :
|
||||
TCP_ECN_DISABLED);
|
||||
}
|
||||
|
||||
void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
|
||||
|
||||
@@ -142,6 +142,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
|
||||
struct sk_buff *gso_skb = skb;
|
||||
__sum16 newcheck;
|
||||
bool ooo_okay, copy_destructor;
|
||||
bool ecn_cwr_mask;
|
||||
__wsum delta;
|
||||
|
||||
th = tcp_hdr(skb);
|
||||
@@ -201,6 +202,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
|
||||
|
||||
newcheck = ~csum_fold(csum_add(csum_unfold(th->check), delta));
|
||||
|
||||
ecn_cwr_mask = !!(skb_shinfo(gso_skb)->gso_type & SKB_GSO_TCP_ACCECN);
|
||||
|
||||
while (skb->next) {
|
||||
th->fin = th->psh = 0;
|
||||
th->check = newcheck;
|
||||
@@ -220,7 +223,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
|
||||
th = tcp_hdr(skb);
|
||||
|
||||
th->seq = htonl(seq);
|
||||
th->cwr = 0;
|
||||
|
||||
th->cwr &= ecn_cwr_mask;
|
||||
}
|
||||
|
||||
/* Following permits TCP Small Queues to work well with GSO :
|
||||
@@ -328,7 +332,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
|
||||
th2 = tcp_hdr(p);
|
||||
flush = (__force int)(flags & TCP_FLAG_CWR);
|
||||
flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
|
||||
~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
|
||||
~(TCP_FLAG_FIN | TCP_FLAG_PSH));
|
||||
flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
|
||||
for (i = sizeof(*th); i < thlen; i += 4)
|
||||
flush |= *(u32 *)((u8 *)th + i) ^
|
||||
@@ -404,7 +408,7 @@ void tcp_gro_complete(struct sk_buff *skb)
|
||||
shinfo->gso_segs = NAPI_GRO_CB(skb)->count;
|
||||
|
||||
if (th->cwr)
|
||||
shinfo->gso_type |= SKB_GSO_TCP_ECN;
|
||||
shinfo->gso_type |= SKB_GSO_TCP_ACCECN;
|
||||
}
|
||||
EXPORT_SYMBOL(tcp_gro_complete);
|
||||
|
||||
|
||||
@@ -325,7 +325,7 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
|
||||
const struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
|
||||
if (!(tp->ecn_flags & TCP_ECN_OK))
|
||||
if (tcp_ecn_disabled(tp))
|
||||
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
|
||||
else if (tcp_ca_needs_ecn(sk) ||
|
||||
tcp_bpf_ca_needs_ecn(sk))
|
||||
@@ -351,7 +351,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
|
||||
|
||||
if (use_ecn) {
|
||||
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
|
||||
tp->ecn_flags = TCP_ECN_OK;
|
||||
tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
|
||||
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
|
||||
INET_ECN_xmit(sk);
|
||||
}
|
||||
@@ -381,7 +381,7 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
|
||||
if (tp->ecn_flags & TCP_ECN_OK) {
|
||||
if (tcp_ecn_mode_rfc3168(tp)) {
|
||||
/* Not-retransmitted data segment: set ECT and inject CWR. */
|
||||
if (skb->len != tcp_header_len &&
|
||||
!before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
|
||||
@@ -403,7 +403,7 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
|
||||
/* Constructs common control bits of non-data skb. If SYN/FIN is present,
|
||||
* auto increment end seqno.
|
||||
*/
|
||||
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
|
||||
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u16 flags)
|
||||
{
|
||||
skb->ip_summed = CHECKSUM_PARTIAL;
|
||||
|
||||
@@ -1395,7 +1395,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
|
||||
th->seq = htonl(tcb->seq);
|
||||
th->ack_seq = htonl(rcv_nxt);
|
||||
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
|
||||
tcb->tcp_flags);
|
||||
(tcb->tcp_flags & TCPHDR_FLAGS_MASK));
|
||||
|
||||
th->check = 0;
|
||||
th->urg_ptr = 0;
|
||||
@@ -1616,8 +1616,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
|
||||
struct sk_buff *buff;
|
||||
int old_factor;
|
||||
long limit;
|
||||
u16 flags;
|
||||
int nlen;
|
||||
u8 flags;
|
||||
|
||||
if (WARN_ON(len > skb->len))
|
||||
return -EINVAL;
|
||||
@@ -2171,7 +2171,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
|
||||
{
|
||||
int nlen = skb->len - len;
|
||||
struct sk_buff *buff;
|
||||
u8 flags;
|
||||
u16 flags;
|
||||
|
||||
/* All of a TSO frame must be composed of paged data. */
|
||||
DEBUG_NET_WARN_ON_ONCE(skb->len != skb->data_len);
|
||||
@@ -4240,7 +4240,7 @@ void tcp_send_delayed_ack(struct sock *sk)
|
||||
}
|
||||
|
||||
/* This routine sends an ack and also updates the window. */
|
||||
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
|
||||
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt, u16 flags)
|
||||
{
|
||||
struct sk_buff *buff;
|
||||
|
||||
@@ -4269,7 +4269,7 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
|
||||
|
||||
/* Reserve space for headers and prepare control bits. */
|
||||
skb_reserve(buff, MAX_TCP_HEADER);
|
||||
tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
|
||||
tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK | flags);
|
||||
|
||||
/* We do not want pure acks influencing TCP Small Queues or fq/pacing
|
||||
* too much.
|
||||
@@ -4284,7 +4284,7 @@ EXPORT_SYMBOL_GPL(__tcp_send_ack);
|
||||
|
||||
void tcp_send_ack(struct sock *sk)
|
||||
{
|
||||
__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
|
||||
__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt, 0);
|
||||
}
|
||||
|
||||
/* This routine sends a packet with an out of date sequence
|
||||
|
||||
@@ -999,7 +999,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
|
||||
if (!IS_ERR(dst)) {
|
||||
skb_dst_set(buff, dst);
|
||||
ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
|
||||
tclass & ~INET_ECN_MASK, priority);
|
||||
tclass, priority);
|
||||
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
|
||||
if (rst)
|
||||
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
|
||||
@@ -1135,7 +1135,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
|
||||
trace_tcp_send_reset(sk, skb, reason);
|
||||
|
||||
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
|
||||
ipv6_get_dsfield(ipv6h), label, priority, txhash,
|
||||
ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK,
|
||||
label, priority, txhash,
|
||||
&key);
|
||||
|
||||
#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
|
||||
@@ -1155,11 +1156,16 @@ static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
|
||||
tclass, label, priority, txhash, key);
|
||||
}
|
||||
|
||||
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
|
||||
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb,
|
||||
enum tcp_tw_status tw_status)
|
||||
{
|
||||
struct inet_timewait_sock *tw = inet_twsk(sk);
|
||||
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
|
||||
u8 tclass = tw->tw_tclass;
|
||||
struct tcp_key key = {};
|
||||
|
||||
if (tw_status == TCP_TW_ACK_OOW)
|
||||
tclass &= ~INET_ECN_MASK;
|
||||
#ifdef CONFIG_TCP_AO
|
||||
struct tcp_ao_info *ao_info;
|
||||
|
||||
@@ -1203,7 +1209,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
|
||||
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
|
||||
tcp_tw_tsval(tcptw),
|
||||
READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
|
||||
&key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel),
|
||||
&key, tclass, cpu_to_be32(tw->tw_flowlabel),
|
||||
tw->tw_priority, tw->tw_txhash);
|
||||
|
||||
#ifdef CONFIG_TCP_AO
|
||||
@@ -1280,7 +1286,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
||||
tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
|
||||
tcp_rsk_tsval(tcp_rsk(req)),
|
||||
req->ts_recent, sk->sk_bound_dev_if,
|
||||
&key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
|
||||
&key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK,
|
||||
0,
|
||||
READ_ONCE(sk->sk_priority),
|
||||
READ_ONCE(tcp_rsk(req)->txhash));
|
||||
if (tcp_key_is_ao(&key))
|
||||
@@ -1731,7 +1738,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
|
||||
TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
|
||||
skb->len - th->doff*4);
|
||||
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
|
||||
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
|
||||
TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th);
|
||||
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
|
||||
TCP_SKB_CB(skb)->sacked = 0;
|
||||
TCP_SKB_CB(skb)->has_rxtstamp =
|
||||
@@ -1742,6 +1749,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
|
||||
{
|
||||
struct net *net = dev_net_rcu(skb->dev);
|
||||
enum skb_drop_reason drop_reason;
|
||||
enum tcp_tw_status tw_status;
|
||||
int sdif = inet6_sdif(skb);
|
||||
int dif = inet6_iif(skb);
|
||||
const struct tcphdr *th;
|
||||
@@ -1962,7 +1970,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
|
||||
goto csum_error;
|
||||
}
|
||||
|
||||
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
|
||||
tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn);
|
||||
switch (tw_status) {
|
||||
case TCP_TW_SYN:
|
||||
{
|
||||
struct sock *sk2;
|
||||
@@ -1987,7 +1996,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
|
||||
/* to ACK */
|
||||
fallthrough;
|
||||
case TCP_TW_ACK:
|
||||
tcp_v6_timewait_ack(sk, skb);
|
||||
case TCP_TW_ACK_OOW:
|
||||
tcp_v6_timewait_ack(sk, skb, tw_status);
|
||||
break;
|
||||
case TCP_TW_RST:
|
||||
tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
|
||||
|
||||
@@ -216,7 +216,9 @@ nf_log_dump_tcp_header(struct nf_log_buf *m,
|
||||
/* Max length: 9 "RES=0x3C " */
|
||||
nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) &
|
||||
TCP_RESERVED_BITS) >> 22));
|
||||
/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
|
||||
/* Max length: 35 "AE CWR ECE URG ACK PSH RST SYN FIN " */
|
||||
if (th->ae)
|
||||
nf_log_buf_add(m, "AE ");
|
||||
if (th->cwr)
|
||||
nf_log_buf_add(m, "CWR ");
|
||||
if (th->ece)
|
||||
@@ -516,7 +518,7 @@ dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
|
||||
|
||||
/* Proto Max log string length */
|
||||
/* IP: 40+46+6+11+127 = 230 */
|
||||
/* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
|
||||
/* TCP: 10+max(25,20+30+13+9+35+11+127) = 255 */
|
||||
/* UDP: 10+max(25,20) = 35 */
|
||||
/* UDPLITE: 14+max(25,20) = 39 */
|
||||
/* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
|
||||
@@ -526,7 +528,7 @@ dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
|
||||
|
||||
/* (ICMP allows recursion one level deep) */
|
||||
/* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */
|
||||
/* maxlen = 230+ 91 + 230 + 252 = 803 */
|
||||
/* maxlen = 230+ 91 + 230 + 255 = 806 */
|
||||
}
|
||||
|
||||
static noinline_for_stack void
|
||||
|
||||
Reference in New Issue
Block a user