mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-12-27 11:06:41 -05:00
Merge branch 'inet_diag-make-dumps-faster-with-simple-filters'
Eric Dumazet says: ==================== inet_diag: make dumps faster with simple filters inet_diag_bc_sk() pulls five cache lines per socket, while most filters only need the two first ones. We can change it to only pull needed cache lines, to make things like "ss -temoi src :21456" much faster. First patches (1-3) are annotating data-races as a first step. ==================== Link: https://patch.msgid.link/20250828102738.2065992-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -38,6 +38,11 @@ struct inet_diag_dump_data {
|
||||
#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES]
|
||||
|
||||
struct bpf_sk_storage_diag *bpf_stg_diag;
|
||||
bool mark_needed; /* INET_DIAG_BC_MARK_COND present. */
|
||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||
bool cgroup_needed; /* INET_DIAG_BC_CGROUP_COND present. */
|
||||
#endif
|
||||
bool userlocks_needed; /* INET_DIAG_BC_AUTO present. */
|
||||
};
|
||||
|
||||
struct inet_connection_sock;
|
||||
@@ -46,7 +51,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
|
||||
const struct inet_diag_req_v2 *req,
|
||||
u16 nlmsg_flags, bool net_admin);
|
||||
|
||||
int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
|
||||
int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk);
|
||||
|
||||
void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
|
||||
|
||||
|
||||
@@ -71,25 +71,25 @@ static void inet_diag_unlock_handler(const struct inet_diag_handler *handler)
|
||||
|
||||
void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
|
||||
{
|
||||
r->idiag_family = sk->sk_family;
|
||||
r->idiag_family = READ_ONCE(sk->sk_family);
|
||||
|
||||
r->id.idiag_sport = htons(sk->sk_num);
|
||||
r->id.idiag_dport = sk->sk_dport;
|
||||
r->id.idiag_if = sk->sk_bound_dev_if;
|
||||
r->id.idiag_sport = htons(READ_ONCE(sk->sk_num));
|
||||
r->id.idiag_dport = READ_ONCE(sk->sk_dport);
|
||||
r->id.idiag_if = READ_ONCE(sk->sk_bound_dev_if);
|
||||
sock_diag_save_cookie(sk, r->id.idiag_cookie);
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
if (sk->sk_family == AF_INET6) {
|
||||
*(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr;
|
||||
*(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr;
|
||||
if (r->idiag_family == AF_INET6) {
|
||||
data_race(*(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr);
|
||||
data_race(*(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
|
||||
memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
|
||||
|
||||
r->id.idiag_src[0] = sk->sk_rcv_saddr;
|
||||
r->id.idiag_dst[0] = sk->sk_daddr;
|
||||
r->id.idiag_src[0] = READ_ONCE(sk->sk_rcv_saddr);
|
||||
r->id.idiag_dst[0] = READ_ONCE(sk->sk_daddr);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill);
|
||||
@@ -580,7 +580,7 @@ static void entry_fill_addrs(struct inet_diag_entry *entry,
|
||||
const struct sock *sk)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
if (sk->sk_family == AF_INET6) {
|
||||
if (entry->family == AF_INET6) {
|
||||
entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32;
|
||||
entry->daddr = sk->sk_v6_daddr.s6_addr32;
|
||||
} else
|
||||
@@ -591,31 +591,36 @@ static void entry_fill_addrs(struct inet_diag_entry *entry,
|
||||
}
|
||||
}
|
||||
|
||||
int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
|
||||
int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk)
|
||||
{
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
const struct nlattr *bc = cb_data->inet_diag_nla_bc;
|
||||
const struct inet_sock *inet = inet_sk(sk);
|
||||
struct inet_diag_entry entry;
|
||||
|
||||
if (!bc)
|
||||
return 1;
|
||||
|
||||
entry.family = sk->sk_family;
|
||||
entry.family = READ_ONCE(sk->sk_family);
|
||||
entry_fill_addrs(&entry, sk);
|
||||
entry.sport = inet->inet_num;
|
||||
entry.dport = ntohs(inet->inet_dport);
|
||||
entry.ifindex = sk->sk_bound_dev_if;
|
||||
entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
|
||||
if (sk_fullsock(sk))
|
||||
entry.mark = READ_ONCE(sk->sk_mark);
|
||||
else if (sk->sk_state == TCP_NEW_SYN_RECV)
|
||||
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
|
||||
else if (sk->sk_state == TCP_TIME_WAIT)
|
||||
entry.mark = inet_twsk(sk)->tw_mark;
|
||||
else
|
||||
entry.mark = 0;
|
||||
entry.sport = READ_ONCE(inet->inet_num);
|
||||
entry.dport = ntohs(READ_ONCE(inet->inet_dport));
|
||||
entry.ifindex = READ_ONCE(sk->sk_bound_dev_if);
|
||||
if (cb_data->userlocks_needed)
|
||||
entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0;
|
||||
if (cb_data->mark_needed) {
|
||||
if (sk_fullsock(sk))
|
||||
entry.mark = READ_ONCE(sk->sk_mark);
|
||||
else if (sk->sk_state == TCP_NEW_SYN_RECV)
|
||||
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
|
||||
else if (sk->sk_state == TCP_TIME_WAIT)
|
||||
entry.mark = inet_twsk(sk)->tw_mark;
|
||||
else
|
||||
entry.mark = 0;
|
||||
}
|
||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||
entry.cgroup_id = sk_fullsock(sk) ?
|
||||
cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
|
||||
if (cb_data->cgroup_needed)
|
||||
entry.cgroup_id = sk_fullsock(sk) ?
|
||||
cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
|
||||
#endif
|
||||
|
||||
return inet_diag_bc_run(bc, &entry);
|
||||
@@ -715,16 +720,21 @@ static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len,
|
||||
}
|
||||
#endif
|
||||
|
||||
static int inet_diag_bc_audit(const struct nlattr *attr,
|
||||
static int inet_diag_bc_audit(struct inet_diag_dump_data *cb_data,
|
||||
const struct sk_buff *skb)
|
||||
{
|
||||
bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
|
||||
const struct nlattr *attr = cb_data->inet_diag_nla_bc;
|
||||
const void *bytecode, *bc;
|
||||
int bytecode_len, len;
|
||||
bool net_admin;
|
||||
|
||||
if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
|
||||
if (!attr)
|
||||
return 0;
|
||||
|
||||
if (nla_len(attr) < sizeof(struct inet_diag_bc_op))
|
||||
return -EINVAL;
|
||||
|
||||
net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
|
||||
bytecode = bc = nla_data(attr);
|
||||
len = bytecode_len = nla_len(attr);
|
||||
|
||||
@@ -756,14 +766,18 @@ static int inet_diag_bc_audit(const struct nlattr *attr,
|
||||
return -EPERM;
|
||||
if (!valid_markcond(bc, len, &min_len))
|
||||
return -EINVAL;
|
||||
cb_data->mark_needed = true;
|
||||
break;
|
||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||
case INET_DIAG_BC_CGROUP_COND:
|
||||
if (!valid_cgroupcond(bc, len, &min_len))
|
||||
return -EINVAL;
|
||||
cb_data->cgroup_needed = true;
|
||||
break;
|
||||
#endif
|
||||
case INET_DIAG_BC_AUTO:
|
||||
cb_data->userlocks_needed = true;
|
||||
fallthrough;
|
||||
case INET_DIAG_BC_JMP:
|
||||
case INET_DIAG_BC_NOP:
|
||||
break;
|
||||
@@ -840,13 +854,10 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen)
|
||||
kfree(cb_data);
|
||||
return err;
|
||||
}
|
||||
nla = cb_data->inet_diag_nla_bc;
|
||||
if (nla) {
|
||||
err = inet_diag_bc_audit(nla, skb);
|
||||
if (err) {
|
||||
kfree(cb_data);
|
||||
return err;
|
||||
}
|
||||
err = inet_diag_bc_audit(cb_data, skb);
|
||||
if (err) {
|
||||
kfree(cb_data);
|
||||
return err;
|
||||
}
|
||||
|
||||
nla = cb_data->inet_diag_nla_bpf_stgs;
|
||||
|
||||
@@ -126,9 +126,9 @@ static int raw_diag_dump_one(struct netlink_callback *cb,
|
||||
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
|
||||
struct netlink_callback *cb,
|
||||
const struct inet_diag_req_v2 *r,
|
||||
struct nlattr *bc, bool net_admin)
|
||||
bool net_admin)
|
||||
{
|
||||
if (!inet_diag_bc_sk(bc, sk))
|
||||
if (!inet_diag_bc_sk(cb->data, sk))
|
||||
return 0;
|
||||
|
||||
return inet_sk_diag_fill(sk, NULL, skb, cb, r, NLM_F_MULTI, net_admin);
|
||||
@@ -140,17 +140,13 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
|
||||
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
|
||||
struct net *net = sock_net(skb->sk);
|
||||
struct inet_diag_dump_data *cb_data;
|
||||
int num, s_num, slot, s_slot;
|
||||
struct hlist_head *hlist;
|
||||
struct sock *sk = NULL;
|
||||
struct nlattr *bc;
|
||||
|
||||
if (IS_ERR(hashinfo))
|
||||
return;
|
||||
|
||||
cb_data = cb->data;
|
||||
bc = cb_data->inet_diag_nla_bc;
|
||||
s_slot = cb->args[0];
|
||||
num = s_num = cb->args[1];
|
||||
|
||||
@@ -174,7 +170,7 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
if (r->id.idiag_dport != inet->inet_dport &&
|
||||
r->id.idiag_dport)
|
||||
goto next;
|
||||
if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0)
|
||||
if (sk_diag_dump(sk, skb, cb, r, net_admin) < 0)
|
||||
goto out_unlock;
|
||||
next:
|
||||
num++;
|
||||
|
||||
@@ -248,12 +248,12 @@ static int tcp_req_diag_fill(struct sock *sk, struct sk_buff *skb,
|
||||
inet_diag_msg_common_fill(r, sk);
|
||||
r->idiag_state = TCP_SYN_RECV;
|
||||
r->idiag_timer = 1;
|
||||
r->idiag_retrans = reqsk->num_retrans;
|
||||
r->idiag_retrans = READ_ONCE(reqsk->num_retrans);
|
||||
|
||||
BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
|
||||
offsetof(struct sock, sk_cookie));
|
||||
|
||||
tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies;
|
||||
tmo = READ_ONCE(inet_reqsk(sk)->rsk_timer.expires) - jiffies;
|
||||
r->idiag_expires = jiffies_delta_to_msecs(tmo);
|
||||
r->idiag_rqueue = 0;
|
||||
r->idiag_wqueue = 0;
|
||||
@@ -320,11 +320,9 @@ static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
u32 idiag_states = r->idiag_states;
|
||||
struct inet_hashinfo *hashinfo;
|
||||
int i, num, s_i, s_num;
|
||||
struct nlattr *bc;
|
||||
struct sock *sk;
|
||||
|
||||
hashinfo = net->ipv4.tcp_death_row.hashinfo;
|
||||
bc = cb_data->inet_diag_nla_bc;
|
||||
if (idiag_states & TCPF_SYN_RECV)
|
||||
idiag_states |= TCPF_NEW_SYN_RECV;
|
||||
s_i = cb->args[1];
|
||||
@@ -365,7 +363,7 @@ static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
r->id.idiag_sport)
|
||||
goto next_listen;
|
||||
|
||||
if (!inet_diag_bc_sk(bc, sk))
|
||||
if (!inet_diag_bc_sk(cb_data, sk))
|
||||
goto next_listen;
|
||||
|
||||
if (inet_sk_diag_fill(sk, inet_csk(sk), skb,
|
||||
@@ -432,7 +430,7 @@ static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
r->sdiag_family != sk->sk_family)
|
||||
goto next_bind;
|
||||
|
||||
if (!inet_diag_bc_sk(bc, sk))
|
||||
if (!inet_diag_bc_sk(cb_data, sk))
|
||||
goto next_bind;
|
||||
|
||||
sock_hold(sk);
|
||||
@@ -519,7 +517,7 @@ static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
goto next_normal;
|
||||
twsk_build_assert();
|
||||
|
||||
if (!inet_diag_bc_sk(bc, sk))
|
||||
if (!inet_diag_bc_sk(cb_data, sk))
|
||||
goto next_normal;
|
||||
|
||||
if (!refcount_inc_not_zero(&sk->sk_refcnt))
|
||||
|
||||
@@ -4438,7 +4438,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
|
||||
tcp_sk_rw(sk)->total_retrans++;
|
||||
}
|
||||
trace_tcp_retransmit_synack(sk, req);
|
||||
req->num_retrans++;
|
||||
WRITE_ONCE(req->num_retrans, req->num_retrans + 1);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -16,9 +16,9 @@
|
||||
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
|
||||
struct netlink_callback *cb,
|
||||
const struct inet_diag_req_v2 *req,
|
||||
struct nlattr *bc, bool net_admin)
|
||||
bool net_admin)
|
||||
{
|
||||
if (!inet_diag_bc_sk(bc, sk))
|
||||
if (!inet_diag_bc_sk(cb->data, sk))
|
||||
return 0;
|
||||
|
||||
return inet_sk_diag_fill(sk, NULL, skb, cb, req, NLM_F_MULTI,
|
||||
@@ -92,12 +92,8 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
|
||||
{
|
||||
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
|
||||
struct net *net = sock_net(skb->sk);
|
||||
struct inet_diag_dump_data *cb_data;
|
||||
int num, s_num, slot, s_slot;
|
||||
struct nlattr *bc;
|
||||
|
||||
cb_data = cb->data;
|
||||
bc = cb_data->inet_diag_nla_bc;
|
||||
s_slot = cb->args[0];
|
||||
num = s_num = cb->args[1];
|
||||
|
||||
@@ -130,7 +126,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
|
||||
r->id.idiag_dport)
|
||||
goto next;
|
||||
|
||||
if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) {
|
||||
if (sk_diag_dump(sk, skb, cb, r, net_admin) < 0) {
|
||||
spin_unlock_bh(&hslot->lock);
|
||||
goto done;
|
||||
}
|
||||
|
||||
@@ -15,9 +15,9 @@
|
||||
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
|
||||
struct netlink_callback *cb,
|
||||
const struct inet_diag_req_v2 *req,
|
||||
struct nlattr *bc, bool net_admin)
|
||||
bool net_admin)
|
||||
{
|
||||
if (!inet_diag_bc_sk(bc, sk))
|
||||
if (!inet_diag_bc_sk(cb->data, sk))
|
||||
return 0;
|
||||
|
||||
return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, req, NLM_F_MULTI,
|
||||
@@ -76,9 +76,7 @@ static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callba
|
||||
const struct inet_diag_req_v2 *r,
|
||||
bool net_admin)
|
||||
{
|
||||
struct inet_diag_dump_data *cb_data = cb->data;
|
||||
struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
|
||||
struct nlattr *bc = cb_data->inet_diag_nla_bc;
|
||||
struct net *net = sock_net(skb->sk);
|
||||
struct inet_hashinfo *hinfo;
|
||||
int i;
|
||||
@@ -121,7 +119,7 @@ static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callba
|
||||
if (!refcount_inc_not_zero(&sk->sk_refcnt))
|
||||
goto next_listen;
|
||||
|
||||
ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
|
||||
ret = sk_diag_dump(sk, skb, cb, r, net_admin);
|
||||
|
||||
sock_put(sk);
|
||||
|
||||
@@ -154,15 +152,10 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
|
||||
struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
|
||||
struct net *net = sock_net(skb->sk);
|
||||
struct inet_diag_dump_data *cb_data;
|
||||
struct mptcp_sock *msk;
|
||||
struct nlattr *bc;
|
||||
|
||||
BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx));
|
||||
|
||||
cb_data = cb->data;
|
||||
bc = cb_data->inet_diag_nla_bc;
|
||||
|
||||
while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot,
|
||||
&diag_ctx->s_num)) != NULL) {
|
||||
struct inet_sock *inet = (struct inet_sock *)msk;
|
||||
@@ -181,7 +174,7 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
r->id.idiag_dport)
|
||||
goto next;
|
||||
|
||||
ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
|
||||
ret = sk_diag_dump(sk, skb, cb, r, net_admin);
|
||||
next:
|
||||
sock_put(sk);
|
||||
if (ret < 0) {
|
||||
|
||||
Reference in New Issue
Block a user