mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-03 15:51:40 -04:00
Merge branch 'add TCP_BPF_SOCK_OPS_CB_FLAGS to bpf_*sockopt()'
Alan Maguire says: ==================== As previously discussed here [1], long-lived sockets can miss a chance to set additional callbacks if a sock ops program was not attached early in their lifetime. Adding support to bpf_setsockopt() to set callback flags (and bpf_getsockopt() to retrieve them) provides other opportunities to enable callbacks, either directly via a cgroup/setsockopt intercepted setsockopt() or via a socket iterator. Patch 1 adds bpf_[get|set]sockopt() support; patch 2 adds testing for it via a sockops programs, along with verification via a cgroup/getsockopt program. Changes since v1 [2]: - Removed unneeded READ_ONCE() (Martin, patch 1) - Reworked sockopt test to leave existing tests undisturbed while adding test_nonstandard_opt() test to cover the TCP_BPF_SOCK_OPS_CB_FLAGS case; test verifies that value set via bpf_setsockopt() is what we expect via a call to getsockopt() which is caught by a cgroup/getsockopt program to provide the flags value (Martin, patch 2) - Removed unneeded iterator test (Martin) [1] https://lore.kernel.org/bpf/f42f157b-6e52-dd4d-3d97-9b86c84c0b00@oracle.com/ [2] https://lore.kernel.org/bpf/20240802152929.2695863-1-alan.maguire@oracle.com/ ==================== Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
This commit is contained in:
@@ -2851,7 +2851,7 @@ union bpf_attr {
|
||||
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
|
||||
* **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
|
||||
* **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
|
||||
* **TCP_BPF_RTO_MIN**.
|
||||
* **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**.
|
||||
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
|
||||
* * **IPPROTO_IPV6**, which supports the following *optname*\ s:
|
||||
* **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
|
||||
@@ -7080,6 +7080,7 @@ enum {
|
||||
TCP_BPF_SYN = 1005, /* Copy the TCP header */
|
||||
TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
|
||||
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
|
||||
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
|
||||
};
|
||||
|
||||
enum {
|
||||
|
||||
@@ -5278,6 +5278,11 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
|
||||
return -EINVAL;
|
||||
inet_csk(sk)->icsk_rto_min = timeout;
|
||||
break;
|
||||
case TCP_BPF_SOCK_OPS_CB_FLAGS:
|
||||
if (val & ~(BPF_SOCK_OPS_ALL_CB_FLAGS))
|
||||
return -EINVAL;
|
||||
tp->bpf_sock_ops_cb_flags = val;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -5366,6 +5371,17 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
|
||||
if (*optlen < 1)
|
||||
return -EINVAL;
|
||||
break;
|
||||
case TCP_BPF_SOCK_OPS_CB_FLAGS:
|
||||
if (*optlen != sizeof(int))
|
||||
return -EINVAL;
|
||||
if (getopt) {
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
int cb_flags = tp->bpf_sock_ops_cb_flags;
|
||||
|
||||
memcpy(optval, &cb_flags, *optlen);
|
||||
return 0;
|
||||
}
|
||||
return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
|
||||
default:
|
||||
if (getopt)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -2851,7 +2851,7 @@ union bpf_attr {
|
||||
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
|
||||
* **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
|
||||
* **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
|
||||
* **TCP_BPF_RTO_MIN**.
|
||||
* **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**.
|
||||
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
|
||||
* * **IPPROTO_IPV6**, which supports the following *optname*\ s:
|
||||
* **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
|
||||
@@ -7080,6 +7080,7 @@ enum {
|
||||
TCP_BPF_SYN = 1005, /* Copy the TCP header */
|
||||
TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
|
||||
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
|
||||
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
|
||||
};
|
||||
|
||||
enum {
|
||||
|
||||
@@ -154,6 +154,51 @@ static void test_ktls(int family)
|
||||
close(sfd);
|
||||
}
|
||||
|
||||
static void test_nonstandard_opt(int family)
|
||||
{
|
||||
struct setget_sockopt__bss *bss = skel->bss;
|
||||
struct bpf_link *getsockopt_link = NULL;
|
||||
int sfd = -1, fd = -1, cfd = -1, flags;
|
||||
socklen_t flagslen = sizeof(flags);
|
||||
|
||||
memset(bss, 0, sizeof(*bss));
|
||||
|
||||
sfd = start_server(family, SOCK_STREAM,
|
||||
family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
|
||||
if (!ASSERT_GE(sfd, 0, "start_server"))
|
||||
return;
|
||||
|
||||
fd = connect_to_fd(sfd, 0);
|
||||
if (!ASSERT_GE(fd, 0, "connect_to_fd_server"))
|
||||
goto err_out;
|
||||
|
||||
/* cgroup/getsockopt prog will intercept getsockopt() below and
|
||||
* retrieve the tcp socket bpf_sock_ops_cb_flags value for the
|
||||
* accept()ed socket; this was set earlier in the passive established
|
||||
* callback for the accept()ed socket via bpf_setsockopt().
|
||||
*/
|
||||
getsockopt_link = bpf_program__attach_cgroup(skel->progs._getsockopt, cg_fd);
|
||||
if (!ASSERT_OK_PTR(getsockopt_link, "getsockopt prog"))
|
||||
goto err_out;
|
||||
|
||||
cfd = accept(sfd, NULL, 0);
|
||||
if (!ASSERT_GE(cfd, 0, "accept"))
|
||||
goto err_out;
|
||||
|
||||
if (!ASSERT_OK(getsockopt(cfd, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, &flagslen),
|
||||
"getsockopt_flags"))
|
||||
goto err_out;
|
||||
ASSERT_EQ(flags & BPF_SOCK_OPS_STATE_CB_FLAG, BPF_SOCK_OPS_STATE_CB_FLAG,
|
||||
"cb_flags_set");
|
||||
err_out:
|
||||
close(sfd);
|
||||
if (fd != -1)
|
||||
close(fd);
|
||||
if (cfd != -1)
|
||||
close(cfd);
|
||||
bpf_link__destroy(getsockopt_link);
|
||||
}
|
||||
|
||||
void test_setget_sockopt(void)
|
||||
{
|
||||
cg_fd = test__join_cgroup(CG_NAME);
|
||||
@@ -191,6 +236,8 @@ void test_setget_sockopt(void)
|
||||
test_udp(AF_INET);
|
||||
test_ktls(AF_INET6);
|
||||
test_ktls(AF_INET);
|
||||
test_nonstandard_opt(AF_INET);
|
||||
test_nonstandard_opt(AF_INET6);
|
||||
|
||||
done:
|
||||
setget_sockopt__destroy(skel);
|
||||
|
||||
@@ -59,6 +59,8 @@ static const struct sockopt_test sol_tcp_tests[] = {
|
||||
{ .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, },
|
||||
{ .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, },
|
||||
{ .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
|
||||
{ .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS,
|
||||
.expected = BPF_SOCK_OPS_ALL_CB_FLAGS, },
|
||||
{ .opt = 0, },
|
||||
};
|
||||
|
||||
@@ -353,11 +355,30 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family,
|
||||
return 1;
|
||||
}
|
||||
|
||||
SEC("cgroup/getsockopt")
|
||||
int _getsockopt(struct bpf_sockopt *ctx)
|
||||
{
|
||||
struct bpf_sock *sk = ctx->sk;
|
||||
int *optval = ctx->optval;
|
||||
struct tcp_sock *tp;
|
||||
|
||||
if (!sk || ctx->level != SOL_TCP || ctx->optname != TCP_BPF_SOCK_OPS_CB_FLAGS)
|
||||
return 1;
|
||||
|
||||
tp = bpf_core_cast(sk, struct tcp_sock);
|
||||
if (ctx->optval + sizeof(int) <= ctx->optval_end) {
|
||||
*optval = tp->bpf_sock_ops_cb_flags;
|
||||
ctx->retval = 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
SEC("sockops")
|
||||
int skops_sockopt(struct bpf_sock_ops *skops)
|
||||
{
|
||||
struct bpf_sock *bpf_sk = skops->sk;
|
||||
struct sock *sk;
|
||||
int flags;
|
||||
|
||||
if (!bpf_sk)
|
||||
return 1;
|
||||
@@ -384,9 +405,8 @@ int skops_sockopt(struct bpf_sock_ops *skops)
|
||||
nr_passive += !(bpf_test_sockopt(skops, sk) ||
|
||||
test_tcp_maxseg(skops, sk) ||
|
||||
test_tcp_saved_syn(skops, sk));
|
||||
bpf_sock_ops_cb_flags_set(skops,
|
||||
skops->bpf_sock_ops_cb_flags |
|
||||
BPF_SOCK_OPS_STATE_CB_FLAG);
|
||||
flags = skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_STATE_CB_FLAG;
|
||||
bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, sizeof(flags));
|
||||
break;
|
||||
case BPF_SOCK_OPS_STATE_CB:
|
||||
if (skops->args[1] == BPF_TCP_CLOSE_WAIT)
|
||||
|
||||
Reference in New Issue
Block a user