mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-01 03:44:27 -04:00
Merge branch 'tcp-ts-usec-resolution'
Eric Dumazet says: ==================== tcp: add optional usec resolution to TCP TS As discussed in various public places in 2016, Google adopted usec resolution in RFC 7323 TS values, at Van Jacobson suggestion. Goals were : 1) better observability of delays in networking stacks/fabrics. 2) better disambiguation of events based on TSval/ecr values. 3) building block for congestion control modules needing usec resolution. Back then we implemented a schem based on private SYN options to safely negotiate the feature. For upstream submission, we chose to use a much simpler route attribute because this feature is probably going to be used in private networks. ip route add 10/8 ... features tcp_usec_ts References: https://www.ietf.org/proceedings/97/slides/slides-97-tcpm-tcp-options-for-low-latency-00.pdf https://datatracker.ietf.org/doc/draft-wang-tcpm-low-latency-opt/ First two patches are fixing old minor bugs and might be taken by stable teams (thanks to appropriate Fixes: tags) ==================== Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
@@ -2259,7 +2259,7 @@ static void chtls_rx_ack(struct sock *sk, struct sk_buff *skb)
|
||||
|
||||
if (tp->snd_una != snd_una) {
|
||||
tp->snd_una = snd_una;
|
||||
tp->rcv_tstamp = tcp_time_stamp(tp);
|
||||
tp->rcv_tstamp = tcp_jiffies32;
|
||||
if (tp->snd_una == tp->snd_nxt &&
|
||||
!csk_flag_nochk(csk, CSK_TX_FAILOVER))
|
||||
csk_reset_flag(csk, CSK_TX_WAIT_IDLE);
|
||||
|
||||
@@ -152,6 +152,7 @@ struct tcp_request_sock {
|
||||
u64 snt_synack; /* first SYNACK sent time */
|
||||
bool tfo_listener;
|
||||
bool is_mptcp;
|
||||
s8 req_usec_ts;
|
||||
#if IS_ENABLED(CONFIG_MPTCP)
|
||||
bool drop_req;
|
||||
#endif
|
||||
@@ -257,7 +258,8 @@ struct tcp_sock {
|
||||
u8 compressed_ack;
|
||||
u8 dup_ack_counter:2,
|
||||
tlp_retrans:1, /* TLP is a retransmission */
|
||||
unused:5;
|
||||
tcp_usec_ts:1, /* TSval values in usec */
|
||||
unused:4;
|
||||
u32 chrono_start; /* Start time in jiffies of a TCP chrono */
|
||||
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
|
||||
u8 chrono_type:2, /* current chronograph type */
|
||||
@@ -576,4 +578,9 @@ void tcp_sock_set_quickack(struct sock *sk, int val);
|
||||
int tcp_sock_set_syncnt(struct sock *sk, int val);
|
||||
int tcp_sock_set_user_timeout(struct sock *sk, int val);
|
||||
|
||||
static inline bool dst_tcp_usec_ts(const struct dst_entry *dst)
|
||||
{
|
||||
return dst_feature(dst, RTAX_FEATURE_TCP_USEC_TS);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_TCP_H */
|
||||
|
||||
@@ -67,7 +67,8 @@ struct inet_timewait_sock {
|
||||
/* And these are ours. */
|
||||
unsigned int tw_transparent : 1,
|
||||
tw_flowlabel : 20,
|
||||
tw_pad : 3, /* 3 bits hole */
|
||||
tw_usec_ts : 1,
|
||||
tw_pad : 2, /* 2 bits hole */
|
||||
tw_tos : 8;
|
||||
u32 tw_txhash;
|
||||
u32 tw_priority;
|
||||
|
||||
@@ -166,7 +166,12 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
|
||||
#define MAX_TCP_KEEPCNT 127
|
||||
#define MAX_TCP_SYNCNT 127
|
||||
|
||||
#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
|
||||
/* Ensure that TCP PAWS checks are relaxed after ~2147 seconds
|
||||
* to avoid overflows. This assumes a clock smaller than 1 Mhz.
|
||||
* Default clock is 1 Khz, tcp_usec_ts uses 1 Mhz.
|
||||
*/
|
||||
#define TCP_PAWS_WRAP (INT_MAX / USEC_PER_SEC)
|
||||
|
||||
#define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated
|
||||
* after this time. It should be equal
|
||||
* (or greater than) TCP_TIMEWAIT_LEN
|
||||
@@ -798,22 +803,31 @@ static inline u64 tcp_clock_us(void)
|
||||
return div_u64(tcp_clock_ns(), NSEC_PER_USEC);
|
||||
}
|
||||
|
||||
/* This should only be used in contexts where tp->tcp_mstamp is up to date */
|
||||
static inline u32 tcp_time_stamp(const struct tcp_sock *tp)
|
||||
static inline u64 tcp_clock_ms(void)
|
||||
{
|
||||
return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ);
|
||||
return div_u64(tcp_clock_ns(), NSEC_PER_MSEC);
|
||||
}
|
||||
|
||||
/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */
|
||||
static inline u32 tcp_ns_to_ts(u64 ns)
|
||||
/* TCP Timestamp included in TS option (RFC 1323) can either use ms
|
||||
* or usec resolution. Each socket carries a flag to select one or other
|
||||
* resolution, as the route attribute could change anytime.
|
||||
* Each flow must stick to initial resolution.
|
||||
*/
|
||||
static inline u32 tcp_clock_ts(bool usec_ts)
|
||||
{
|
||||
return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ);
|
||||
return usec_ts ? tcp_clock_us() : tcp_clock_ms();
|
||||
}
|
||||
|
||||
/* Could use tcp_clock_us() / 1000, but this version uses a single divide */
|
||||
static inline u32 tcp_time_stamp_raw(void)
|
||||
static inline u32 tcp_time_stamp_ms(const struct tcp_sock *tp)
|
||||
{
|
||||
return tcp_ns_to_ts(tcp_clock_ns());
|
||||
return div_u64(tp->tcp_mstamp, USEC_PER_MSEC);
|
||||
}
|
||||
|
||||
static inline u32 tcp_time_stamp_ts(const struct tcp_sock *tp)
|
||||
{
|
||||
if (tp->tcp_usec_ts)
|
||||
return tp->tcp_mstamp;
|
||||
return tcp_time_stamp_ms(tp);
|
||||
}
|
||||
|
||||
void tcp_mstamp_refresh(struct tcp_sock *tp);
|
||||
@@ -823,17 +837,30 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
|
||||
return max_t(s64, t1 - t0, 0);
|
||||
}
|
||||
|
||||
static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
|
||||
{
|
||||
return tcp_ns_to_ts(skb->skb_mstamp_ns);
|
||||
}
|
||||
|
||||
/* provide the departure time in us unit */
|
||||
static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
|
||||
{
|
||||
return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC);
|
||||
}
|
||||
|
||||
/* Provide skb TSval in usec or ms unit */
|
||||
static inline u32 tcp_skb_timestamp_ts(bool usec_ts, const struct sk_buff *skb)
|
||||
{
|
||||
if (usec_ts)
|
||||
return tcp_skb_timestamp_us(skb);
|
||||
|
||||
return div_u64(skb->skb_mstamp_ns, NSEC_PER_MSEC);
|
||||
}
|
||||
|
||||
static inline u32 tcp_tw_tsval(const struct tcp_timewait_sock *tcptw)
|
||||
{
|
||||
return tcp_clock_ts(tcptw->tw_sk.tw_usec_ts) + tcptw->tw_ts_offset;
|
||||
}
|
||||
|
||||
static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq)
|
||||
{
|
||||
return tcp_clock_ts(treq->req_usec_ts) + treq->ts_off;
|
||||
}
|
||||
|
||||
#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
|
||||
|
||||
@@ -1599,7 +1626,7 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
|
||||
if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
|
||||
return true;
|
||||
if (unlikely(!time_before32(ktime_get_seconds(),
|
||||
rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)))
|
||||
rx_opt->ts_recent_stamp + TCP_PAWS_WRAP)))
|
||||
return true;
|
||||
/*
|
||||
* Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
|
||||
|
||||
@@ -502,13 +502,17 @@ enum {
|
||||
|
||||
#define RTAX_MAX (__RTAX_MAX - 1)
|
||||
|
||||
#define RTAX_FEATURE_ECN (1 << 0)
|
||||
#define RTAX_FEATURE_SACK (1 << 1)
|
||||
#define RTAX_FEATURE_TIMESTAMP (1 << 2)
|
||||
#define RTAX_FEATURE_ALLFRAG (1 << 3)
|
||||
#define RTAX_FEATURE_ECN (1 << 0)
|
||||
#define RTAX_FEATURE_SACK (1 << 1) /* unused */
|
||||
#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */
|
||||
#define RTAX_FEATURE_ALLFRAG (1 << 3)
|
||||
#define RTAX_FEATURE_TCP_USEC_TS (1 << 4)
|
||||
|
||||
#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | RTAX_FEATURE_SACK | \
|
||||
RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG)
|
||||
#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \
|
||||
RTAX_FEATURE_SACK | \
|
||||
RTAX_FEATURE_TIMESTAMP | \
|
||||
RTAX_FEATURE_ALLFRAG | \
|
||||
RTAX_FEATURE_TCP_USEC_TS)
|
||||
|
||||
struct rta_session {
|
||||
__u8 proto;
|
||||
|
||||
@@ -170,6 +170,7 @@ enum tcp_fastopen_client_fail {
|
||||
#define TCPI_OPT_ECN 8 /* ECN was negociated at TCP session init */
|
||||
#define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */
|
||||
#define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */
|
||||
#define TCPI_OPT_USEC_TS 64 /* usec timestamps */
|
||||
|
||||
/*
|
||||
* Sender's congestion state indicating normal or abnormal situations
|
||||
|
||||
@@ -41,7 +41,6 @@ static siphash_aligned_key_t syncookie_secret[2];
|
||||
* requested/supported by the syn/synack exchange.
|
||||
*/
|
||||
#define TSBITS 6
|
||||
#define TSMASK (((__u32)1 << TSBITS) - 1)
|
||||
|
||||
static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
|
||||
u32 count, int c)
|
||||
@@ -52,6 +51,14 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
|
||||
count, &syncookie_secret[c]);
|
||||
}
|
||||
|
||||
/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */
|
||||
static u64 tcp_ns_to_ts(bool usec_ts, u64 val)
|
||||
{
|
||||
if (usec_ts)
|
||||
return div_u64(val, NSEC_PER_USEC);
|
||||
|
||||
return div_u64(val, NSEC_PER_MSEC);
|
||||
}
|
||||
|
||||
/*
|
||||
* when syncookies are in effect and tcp timestamps are enabled we encode
|
||||
@@ -62,27 +69,24 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
|
||||
*/
|
||||
u64 cookie_init_timestamp(struct request_sock *req, u64 now)
|
||||
{
|
||||
struct inet_request_sock *ireq;
|
||||
u32 ts, ts_now = tcp_ns_to_ts(now);
|
||||
const struct inet_request_sock *ireq = inet_rsk(req);
|
||||
u64 ts, ts_now = tcp_ns_to_ts(false, now);
|
||||
u32 options = 0;
|
||||
|
||||
ireq = inet_rsk(req);
|
||||
|
||||
options = ireq->wscale_ok ? ireq->snd_wscale : TS_OPT_WSCALE_MASK;
|
||||
if (ireq->sack_ok)
|
||||
options |= TS_OPT_SACK;
|
||||
if (ireq->ecn_ok)
|
||||
options |= TS_OPT_ECN;
|
||||
|
||||
ts = ts_now & ~TSMASK;
|
||||
ts = (ts_now >> TSBITS) << TSBITS;
|
||||
ts |= options;
|
||||
if (ts > ts_now) {
|
||||
ts >>= TSBITS;
|
||||
ts--;
|
||||
ts <<= TSBITS;
|
||||
ts |= options;
|
||||
}
|
||||
return (u64)ts * (NSEC_PER_SEC / TCP_TS_HZ);
|
||||
if (ts > ts_now)
|
||||
ts -= (1UL << TSBITS);
|
||||
|
||||
if (tcp_rsk(req)->req_usec_ts)
|
||||
return ts * NSEC_PER_USEC;
|
||||
return ts * NSEC_PER_MSEC;
|
||||
}
|
||||
|
||||
|
||||
@@ -302,6 +306,8 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
|
||||
treq->af_specific = af_ops;
|
||||
|
||||
treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
|
||||
treq->req_usec_ts = -1;
|
||||
|
||||
#if IS_ENABLED(CONFIG_MPTCP)
|
||||
treq->is_mptcp = sk_is_mptcp(sk);
|
||||
if (treq->is_mptcp) {
|
||||
|
||||
@@ -3629,10 +3629,16 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
|
||||
tp->fastopen_no_cookie = val;
|
||||
break;
|
||||
case TCP_TIMESTAMP:
|
||||
if (!tp->repair)
|
||||
if (!tp->repair) {
|
||||
err = -EPERM;
|
||||
else
|
||||
WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw());
|
||||
break;
|
||||
}
|
||||
/* val is an opaque field,
|
||||
* and low order bit contains usec_ts enable bit.
|
||||
* Its a best effort, and we do not care if user makes an error.
|
||||
*/
|
||||
tp->tcp_usec_ts = val & 1;
|
||||
WRITE_ONCE(tp->tsoffset, val - tcp_clock_ts(tp->tcp_usec_ts));
|
||||
break;
|
||||
case TCP_REPAIR_WINDOW:
|
||||
err = tcp_repair_set_window(tp, optval, optlen);
|
||||
@@ -3754,6 +3760,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
|
||||
info->tcpi_options |= TCPI_OPT_ECN_SEEN;
|
||||
if (tp->syn_data_acked)
|
||||
info->tcpi_options |= TCPI_OPT_SYN_DATA;
|
||||
if (tp->tcp_usec_ts)
|
||||
info->tcpi_options |= TCPI_OPT_USEC_TS;
|
||||
|
||||
info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
|
||||
info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato,
|
||||
@@ -3817,10 +3825,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
|
||||
info->tcpi_total_rto = tp->total_rto;
|
||||
info->tcpi_total_rto_recoveries = tp->total_rto_recoveries;
|
||||
info->tcpi_total_rto_time = tp->total_rto_time;
|
||||
if (tp->rto_stamp) {
|
||||
info->tcpi_total_rto_time += tcp_time_stamp_raw() -
|
||||
tp->rto_stamp;
|
||||
}
|
||||
if (tp->rto_stamp)
|
||||
info->tcpi_total_rto_time += tcp_clock_ms() - tp->rto_stamp;
|
||||
|
||||
unlock_sock_fast(sk, slow);
|
||||
}
|
||||
@@ -4145,7 +4151,11 @@ int do_tcp_getsockopt(struct sock *sk, int level,
|
||||
break;
|
||||
|
||||
case TCP_TIMESTAMP:
|
||||
val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
|
||||
val = tcp_clock_ts(tp->tcp_usec_ts) + READ_ONCE(tp->tsoffset);
|
||||
if (tp->tcp_usec_ts)
|
||||
val |= 1;
|
||||
else
|
||||
val &= ~1;
|
||||
break;
|
||||
case TCP_NOTSENT_LOWAT:
|
||||
val = READ_ONCE(tp->notsent_lowat);
|
||||
|
||||
@@ -693,6 +693,23 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
|
||||
tp->rcv_rtt_est.time = tp->tcp_mstamp;
|
||||
}
|
||||
|
||||
static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp)
|
||||
{
|
||||
u32 delta, delta_us;
|
||||
|
||||
delta = tcp_time_stamp_ts(tp) - tp->rx_opt.rcv_tsecr;
|
||||
if (tp->tcp_usec_ts)
|
||||
return delta;
|
||||
|
||||
if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
|
||||
if (!delta)
|
||||
delta = 1;
|
||||
delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
|
||||
return delta_us;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
|
||||
const struct sk_buff *skb)
|
||||
{
|
||||
@@ -704,15 +721,10 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
|
||||
|
||||
if (TCP_SKB_CB(skb)->end_seq -
|
||||
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
|
||||
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
|
||||
u32 delta_us;
|
||||
s32 delta = tcp_rtt_tsopt_us(tp);
|
||||
|
||||
if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
|
||||
if (!delta)
|
||||
delta = 1;
|
||||
delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
|
||||
tcp_rcv_rtt_update(tp, delta_us, 0);
|
||||
}
|
||||
if (delta >= 0)
|
||||
tcp_rcv_rtt_update(tp, delta, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2442,7 +2454,7 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
|
||||
const struct sk_buff *skb)
|
||||
{
|
||||
return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
|
||||
tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
|
||||
tcp_tsopt_ecr_before(tp, tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb));
|
||||
}
|
||||
|
||||
/* Nothing was retransmitted or returned timestamp is less
|
||||
@@ -2856,7 +2868,7 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
|
||||
static void tcp_update_rto_time(struct tcp_sock *tp)
|
||||
{
|
||||
if (tp->rto_stamp) {
|
||||
tp->total_rto_time += tcp_time_stamp(tp) - tp->rto_stamp;
|
||||
tp->total_rto_time += tcp_time_stamp_ms(tp) - tp->rto_stamp;
|
||||
tp->rto_stamp = 0;
|
||||
}
|
||||
}
|
||||
@@ -3146,17 +3158,10 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
|
||||
* left edge of the send window.
|
||||
* See draft-ietf-tcplw-high-performance-00, section 3.3.
|
||||
*/
|
||||
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
|
||||
flag & FLAG_ACKED) {
|
||||
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
|
||||
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp &&
|
||||
tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED)
|
||||
seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp);
|
||||
|
||||
if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
|
||||
if (!delta)
|
||||
delta = 1;
|
||||
seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
|
||||
ca_rtt_us = seq_rtt_us;
|
||||
}
|
||||
}
|
||||
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
|
||||
if (seq_rtt_us < 0)
|
||||
return false;
|
||||
@@ -6293,7 +6298,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
|
||||
|
||||
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
|
||||
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
|
||||
tcp_time_stamp(tp))) {
|
||||
tcp_time_stamp_ts(tp))) {
|
||||
NET_INC_STATS(sock_net(sk),
|
||||
LINUX_MIB_PAWSACTIVEREJECTED);
|
||||
goto reset_and_undo;
|
||||
@@ -7042,6 +7047,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
|
||||
req->syncookie = want_cookie;
|
||||
tcp_rsk(req)->af_specific = af_ops;
|
||||
tcp_rsk(req)->ts_off = 0;
|
||||
tcp_rsk(req)->req_usec_ts = -1;
|
||||
#if IS_ENABLED(CONFIG_MPTCP)
|
||||
tcp_rsk(req)->is_mptcp = 0;
|
||||
#endif
|
||||
|
||||
@@ -296,6 +296,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
||||
rt = NULL;
|
||||
goto failure;
|
||||
}
|
||||
tp->tcp_usec_ts = dst_tcp_usec_ts(&rt->dst);
|
||||
/* OK, now commit destination to socket. */
|
||||
sk->sk_gso_type = SKB_GSO_TCPV4;
|
||||
sk_setup_caps(sk, &rt->dst);
|
||||
@@ -954,7 +955,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
|
||||
tcp_v4_send_ack(sk, skb,
|
||||
tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
|
||||
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
|
||||
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
|
||||
tcp_tw_tsval(tcptw),
|
||||
tcptw->tw_ts_recent,
|
||||
tw->tw_bound_dev_if,
|
||||
tcp_twsk_md5_key(tcptw),
|
||||
@@ -988,7 +989,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
||||
tcp_v4_send_ack(sk, skb, seq,
|
||||
tcp_rsk(req)->rcv_nxt,
|
||||
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
|
||||
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
|
||||
tcp_rsk_tsval(tcp_rsk(req)),
|
||||
READ_ONCE(req->ts_recent),
|
||||
0,
|
||||
tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
|
||||
|
||||
@@ -272,7 +272,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct lp *lp = inet_csk_ca(sk);
|
||||
u32 now = tcp_time_stamp(tp);
|
||||
u32 now = tcp_time_stamp_ts(tp);
|
||||
u32 delta;
|
||||
|
||||
if (sample->rtt_us > 0)
|
||||
|
||||
@@ -300,6 +300,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
||||
tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
|
||||
tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
|
||||
tcptw->tw_ts_offset = tp->tsoffset;
|
||||
tw->tw_usec_ts = tp->tcp_usec_ts;
|
||||
tcptw->tw_last_oow_ack_time = 0;
|
||||
tcptw->tw_tx_delay = tp->tcp_tx_delay;
|
||||
tw->tw_txhash = sk->sk_txhash;
|
||||
@@ -554,21 +555,29 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
|
||||
newtp->max_window = newtp->snd_wnd;
|
||||
|
||||
if (newtp->rx_opt.tstamp_ok) {
|
||||
newtp->tcp_usec_ts = treq->req_usec_ts;
|
||||
newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent);
|
||||
newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
|
||||
newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
|
||||
} else {
|
||||
newtp->tcp_usec_ts = 0;
|
||||
newtp->rx_opt.ts_recent_stamp = 0;
|
||||
newtp->tcp_header_len = sizeof(struct tcphdr);
|
||||
}
|
||||
if (req->num_timeout) {
|
||||
newtp->undo_marker = treq->snt_isn;
|
||||
newtp->retrans_stamp = div_u64(treq->snt_synack,
|
||||
USEC_PER_SEC / TCP_TS_HZ);
|
||||
newtp->total_rto = req->num_timeout;
|
||||
newtp->total_rto_recoveries = 1;
|
||||
newtp->total_rto_time = tcp_time_stamp_raw() -
|
||||
newtp->undo_marker = treq->snt_isn;
|
||||
if (newtp->tcp_usec_ts) {
|
||||
newtp->retrans_stamp = treq->snt_synack;
|
||||
newtp->total_rto_time = (u32)(tcp_clock_us() -
|
||||
newtp->retrans_stamp) / USEC_PER_MSEC;
|
||||
} else {
|
||||
newtp->retrans_stamp = div_u64(treq->snt_synack,
|
||||
USEC_PER_SEC / TCP_TS_HZ);
|
||||
newtp->total_rto_time = tcp_clock_ms() -
|
||||
newtp->retrans_stamp;
|
||||
}
|
||||
newtp->total_rto_recoveries = 1;
|
||||
}
|
||||
newtp->tsoffset = treq->ts_off;
|
||||
#ifdef CONFIG_TCP_MD5SIG
|
||||
|
||||
@@ -799,7 +799,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
|
||||
|
||||
if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
|
||||
opts->options |= OPTION_TS;
|
||||
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
|
||||
opts->tsval = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + tp->tsoffset;
|
||||
opts->tsecr = tp->rx_opt.ts_recent;
|
||||
remaining -= TCPOLEN_TSTAMP_ALIGNED;
|
||||
}
|
||||
@@ -884,7 +884,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
|
||||
}
|
||||
if (likely(ireq->tstamp_ok)) {
|
||||
opts->options |= OPTION_TS;
|
||||
opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
|
||||
opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) +
|
||||
tcp_rsk(req)->ts_off;
|
||||
opts->tsecr = READ_ONCE(req->ts_recent);
|
||||
remaining -= TCPOLEN_TSTAMP_ALIGNED;
|
||||
}
|
||||
@@ -943,7 +944,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
|
||||
|
||||
if (likely(tp->rx_opt.tstamp_ok)) {
|
||||
opts->options |= OPTION_TS;
|
||||
opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
|
||||
opts->tsval = skb ? tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) +
|
||||
tp->tsoffset : 0;
|
||||
opts->tsecr = tp->rx_opt.ts_recent;
|
||||
size += TCPOLEN_TSTAMP_ALIGNED;
|
||||
}
|
||||
@@ -3379,7 +3381,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
|
||||
|
||||
/* Save stamp of the first (attempted) retransmit. */
|
||||
if (!tp->retrans_stamp)
|
||||
tp->retrans_stamp = tcp_skb_timestamp(skb);
|
||||
tp->retrans_stamp = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb);
|
||||
|
||||
if (tp->undo_retrans < 0)
|
||||
tp->undo_retrans = 0;
|
||||
@@ -3665,6 +3667,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
|
||||
mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
|
||||
|
||||
memset(&opts, 0, sizeof(opts));
|
||||
if (tcp_rsk(req)->req_usec_ts < 0)
|
||||
tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst);
|
||||
now = tcp_clock_ns();
|
||||
#ifdef CONFIG_SYN_COOKIES
|
||||
if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
|
||||
@@ -3961,7 +3965,7 @@ int tcp_connect(struct sock *sk)
|
||||
|
||||
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
|
||||
tcp_mstamp_refresh(tp);
|
||||
tp->retrans_stamp = tcp_time_stamp(tp);
|
||||
tp->retrans_stamp = tcp_time_stamp_ts(tp);
|
||||
tcp_connect_queue_skb(sk, buff);
|
||||
tcp_ecn_send_syn(sk, buff);
|
||||
tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
|
||||
|
||||
@@ -26,14 +26,18 @@
|
||||
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
|
||||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
u32 elapsed, start_ts, user_timeout;
|
||||
const struct tcp_sock *tp = tcp_sk(sk);
|
||||
u32 elapsed, user_timeout;
|
||||
s32 remaining;
|
||||
|
||||
start_ts = tcp_sk(sk)->retrans_stamp;
|
||||
user_timeout = READ_ONCE(icsk->icsk_user_timeout);
|
||||
if (!user_timeout)
|
||||
return icsk->icsk_rto;
|
||||
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
|
||||
|
||||
elapsed = tcp_time_stamp_ts(tp) - tp->retrans_stamp;
|
||||
if (tp->tcp_usec_ts)
|
||||
elapsed /= USEC_PER_MSEC;
|
||||
|
||||
remaining = user_timeout - elapsed;
|
||||
if (remaining <= 0)
|
||||
return 1; /* user timeout has passed; fire ASAP */
|
||||
@@ -212,12 +216,13 @@ static bool retransmits_timed_out(struct sock *sk,
|
||||
unsigned int boundary,
|
||||
unsigned int timeout)
|
||||
{
|
||||
unsigned int start_ts;
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
unsigned int start_ts, delta;
|
||||
|
||||
if (!inet_csk(sk)->icsk_retransmits)
|
||||
return false;
|
||||
|
||||
start_ts = tcp_sk(sk)->retrans_stamp;
|
||||
start_ts = tp->retrans_stamp;
|
||||
if (likely(timeout == 0)) {
|
||||
unsigned int rto_base = TCP_RTO_MIN;
|
||||
|
||||
@@ -226,7 +231,12 @@ static bool retransmits_timed_out(struct sock *sk,
|
||||
timeout = tcp_model_timeout(sk, boundary, rto_base);
|
||||
}
|
||||
|
||||
return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
|
||||
if (tp->tcp_usec_ts) {
|
||||
/* delta maybe off up to a jiffy due to timer granularity. */
|
||||
delta = tp->tcp_mstamp - start_ts + jiffies_to_usecs(1);
|
||||
return (s32)(delta - timeout * USEC_PER_MSEC) >= 0;
|
||||
}
|
||||
return (s32)(tcp_time_stamp_ts(tp) - start_ts - timeout) >= 0;
|
||||
}
|
||||
|
||||
/* A write timeout has occurred. Process the after effects. */
|
||||
@@ -422,7 +432,7 @@ static void tcp_update_rto_stats(struct sock *sk)
|
||||
|
||||
if (!icsk->icsk_retransmits) {
|
||||
tp->total_rto_recoveries++;
|
||||
tp->rto_stamp = tcp_time_stamp(tp);
|
||||
tp->rto_stamp = tcp_time_stamp_ms(tp);
|
||||
}
|
||||
icsk->icsk_retransmits++;
|
||||
tp->total_rto++;
|
||||
@@ -462,26 +472,24 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
|
||||
req->num_timeout++;
|
||||
tcp_update_rto_stats(sk);
|
||||
if (!tp->retrans_stamp)
|
||||
tp->retrans_stamp = tcp_time_stamp(tp);
|
||||
tp->retrans_stamp = tcp_time_stamp_ts(tp);
|
||||
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
|
||||
req->timeout << req->num_timeout, TCP_RTO_MAX);
|
||||
}
|
||||
|
||||
static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
|
||||
const struct sk_buff *skb)
|
||||
const struct sk_buff *skb,
|
||||
u32 rtx_delta)
|
||||
{
|
||||
const struct tcp_sock *tp = tcp_sk(sk);
|
||||
const int timeout = TCP_RTO_MAX * 2;
|
||||
u32 rcv_delta, rtx_delta;
|
||||
u32 rcv_delta;
|
||||
|
||||
rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
|
||||
if (rcv_delta <= timeout)
|
||||
return false;
|
||||
|
||||
rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) -
|
||||
(tp->retrans_stamp ?: tcp_skb_timestamp(skb)));
|
||||
|
||||
return rtx_delta > timeout;
|
||||
return msecs_to_jiffies(rtx_delta) > timeout;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -534,7 +542,11 @@ void tcp_retransmit_timer(struct sock *sk)
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
u32 rtx_delta;
|
||||
|
||||
rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp(skb));
|
||||
rtx_delta = tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?:
|
||||
tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb));
|
||||
if (tp->tcp_usec_ts)
|
||||
rtx_delta /= USEC_PER_MSEC;
|
||||
|
||||
if (sk->sk_family == AF_INET) {
|
||||
net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n",
|
||||
&inet->inet_daddr, ntohs(inet->inet_dport),
|
||||
@@ -551,7 +563,7 @@ void tcp_retransmit_timer(struct sock *sk)
|
||||
rtx_delta);
|
||||
}
|
||||
#endif
|
||||
if (tcp_rtx_probe0_timed_out(sk, skb)) {
|
||||
if (tcp_rtx_probe0_timed_out(sk, skb, rtx_delta)) {
|
||||
tcp_write_err(sk);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -286,6 +286,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
||||
goto failure;
|
||||
}
|
||||
|
||||
tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
|
||||
tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
|
||||
|
||||
if (!saddr) {
|
||||
@@ -1096,7 +1097,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
|
||||
|
||||
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
|
||||
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
|
||||
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
|
||||
tcp_tw_tsval(tcptw),
|
||||
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
|
||||
tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
|
||||
tw->tw_txhash);
|
||||
@@ -1123,7 +1124,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
|
||||
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
|
||||
tcp_rsk(req)->rcv_nxt,
|
||||
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
|
||||
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
|
||||
tcp_rsk_tsval(tcp_rsk(req)),
|
||||
READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
|
||||
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
|
||||
ipv6_get_dsfield(ipv6_hdr(skb)), 0,
|
||||
|
||||
@@ -153,7 +153,7 @@ void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info,
|
||||
struct synproxy_options *opts)
|
||||
{
|
||||
opts->tsecr = opts->tsval;
|
||||
opts->tsval = tcp_time_stamp_raw() & ~0x3f;
|
||||
opts->tsval = tcp_clock_ms() & ~0x3f;
|
||||
|
||||
if (opts->options & NF_SYNPROXY_OPT_WSCALE) {
|
||||
opts->tsval |= opts->wscale;
|
||||
|
||||
@@ -177,7 +177,7 @@ static __always_inline __u32 tcp_ns_to_ts(__u64 ns)
|
||||
return ns / (NSEC_PER_SEC / TCP_TS_HZ);
|
||||
}
|
||||
|
||||
static __always_inline __u32 tcp_time_stamp_raw(void)
|
||||
static __always_inline __u32 tcp_clock_ms(void)
|
||||
{
|
||||
return tcp_ns_to_ts(tcp_clock_ns());
|
||||
}
|
||||
@@ -274,7 +274,7 @@ static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
|
||||
if (!loop_ctx.option_timestamp)
|
||||
return false;
|
||||
|
||||
cookie = tcp_time_stamp_raw() & ~TSMASK;
|
||||
cookie = tcp_clock_ms() & ~TSMASK;
|
||||
cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK;
|
||||
if (loop_ctx.option_sack)
|
||||
cookie |= TS_OPT_SACK;
|
||||
|
||||
Reference in New Issue
Block a user