Merge branch 'selftests-bpf-test-sockmap-sockhash-redirection'

Michal Luczaj says:

====================
selftests/bpf: Test sockmap/sockhash redirection

The idea behind this series is to comprehensively test the BPF redirection:

BPF_MAP_TYPE_SOCKMAP,
BPF_MAP_TYPE_SOCKHASH
	x
	sk_msg-to-egress,
	sk_msg-to-ingress,
	sk_skb-to-egress,
	sk_skb-to-ingress
		x
		AF_INET, SOCK_STREAM,
		AF_INET6, SOCK_STREAM,
		AF_INET, SOCK_DGRAM,
		AF_INET6, SOCK_DGRAM,
		AF_UNIX, SOCK_STREAM,
		AF_UNIX, SOCK_DGRAM,
		AF_VSOCK, SOCK_STREAM,
		AF_VSOCK, SOCK_SEQPACKET

New module is introduced, sockmap_redir: all supported and unsupported
redirect combinations are tested for success and failure respectively. Code
is pretty much stolen/adapted from Jakub Sitnicki's sockmap_redir_matrix.c
[1].

Usage:
$ cd tools/testing/selftests/bpf
$ make
$ sudo ./test_progs -t sockmap_redir
...
Summary: 1/576 PASSED, 0 SKIPPED, 0 FAILED

[1]: https://github.com/jsitnicki/sockmap-redir-matrix/blob/main/sockmap_redir_matrix.c

Changes in v3:
- Drop unrelated changes; sockmap_listen, test_sockmap_listen, doc
- Collect tags [Jakub, John]
- Introduce BPF verdict programs especially for sockmap_redir [Jiayuan]
- Link to v2: https://lore.kernel.org/r/20250411-selftests-sockmap-redir-v2-0-5f9b018d6704@rbox.co

Changes in v2:
- Verify that the unsupported redirect combos do fail [Jakub]
- Dedup tests in sockmap_listen
- Cosmetic changes and code reordering
- Link to v1: https://lore.kernel.org/bpf/42939687-20f9-4a45-b7c2-342a0e11a014@rbox.co/
====================

Link: https://patch.msgid.link/20250515-selftests-sockmap-redir-v3-0-a1ea723f7e7e@rbox.co
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
This commit is contained in:
Martin KaFai Lau
2025-05-22 14:26:59 -07:00
5 changed files with 623 additions and 476 deletions

View File

@@ -3,6 +3,7 @@
#ifndef __SOCKET_HELPERS__
#define __SOCKET_HELPERS__
#include <sys/un.h>
#include <linux/vm_sockets.h>
/* include/linux/net.h */
@@ -169,6 +170,15 @@ static inline void init_addr_loopback6(struct sockaddr_storage *ss,
*len = sizeof(*addr6);
}
static inline void init_addr_loopback_unix(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_un *addr = memset(ss, 0, sizeof(*ss));
addr->sun_family = AF_UNIX;
*len = sizeof(sa_family_t);
}
static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
socklen_t *len)
{
@@ -190,6 +200,9 @@ static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
case AF_INET6:
init_addr_loopback6(ss, len);
return;
case AF_UNIX:
init_addr_loopback_unix(ss, len);
return;
case AF_VSOCK:
init_addr_loopback_vsock(ss, len);
return;
@@ -315,21 +328,27 @@ static inline int create_pair(int family, int sotype, int *p0, int *p1)
{
__close_fd int s, c = -1, p = -1;
struct sockaddr_storage addr;
socklen_t len = sizeof(addr);
socklen_t len;
int err;
s = socket_loopback(family, sotype);
if (s < 0)
return s;
err = xgetsockname(s, sockaddr(&addr), &len);
if (err)
return err;
c = xsocket(family, sotype, 0);
if (c < 0)
return c;
init_addr_loopback(family, &addr, &len);
err = xbind(c, sockaddr(&addr), len);
if (err)
return err;
len = sizeof(addr);
err = xgetsockname(s, sockaddr(&addr), &len);
if (err)
return err;
err = connect(c, sockaddr(&addr), len);
if (err) {
if (errno != EINPROGRESS) {
@@ -391,4 +410,59 @@ static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
return err;
}
static inline const char *socket_kind_to_str(int sock_fd)
{
socklen_t opt_len;
int domain, type;
opt_len = sizeof(domain);
if (getsockopt(sock_fd, SOL_SOCKET, SO_DOMAIN, &domain, &opt_len))
FAIL_ERRNO("getsockopt(SO_DOMAIN)");
opt_len = sizeof(type);
if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &type, &opt_len))
FAIL_ERRNO("getsockopt(SO_TYPE)");
switch (domain) {
case AF_INET:
switch (type) {
case SOCK_STREAM:
return "tcp4";
case SOCK_DGRAM:
return "udp4";
}
break;
case AF_INET6:
switch (type) {
case SOCK_STREAM:
return "tcp6";
case SOCK_DGRAM:
return "udp6";
}
break;
case AF_UNIX:
switch (type) {
case SOCK_STREAM:
return "u_str";
case SOCK_DGRAM:
return "u_dgr";
case SOCK_SEQPACKET:
return "u_seq";
}
break;
case AF_VSOCK:
switch (type) {
case SOCK_STREAM:
return "v_str";
case SOCK_DGRAM:
return "v_dgr";
case SOCK_SEQPACKET:
return "v_seq";
}
break;
}
return "???";
}
#endif // __SOCKET_HELPERS__

View File

@@ -5,12 +5,15 @@
#define MAX_TEST_NAME 80
#define u32(v) ((u32){(v)})
#define u64(v) ((u64){(v)})
#define __always_unused __attribute__((__unused__))
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
if (__ret < 0) \
if (__ret < 0) \
FAIL_ERRNO("map_delete"); \
__ret; \
})
@@ -18,7 +21,7 @@
#define xbpf_map_lookup_elem(fd, key, val) \
({ \
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
if (__ret < 0) \
if (__ret < 0) \
FAIL_ERRNO("map_lookup"); \
__ret; \
})
@@ -26,7 +29,7 @@
#define xbpf_map_update_elem(fd, key, val, flags) \
({ \
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
if (__ret < 0) \
if (__ret < 0) \
FAIL_ERRNO("map_update"); \
__ret; \
})
@@ -35,7 +38,7 @@
({ \
int __ret = \
bpf_prog_attach((prog), (target), (type), (flags)); \
if (__ret < 0) \
if (__ret < 0) \
FAIL_ERRNO("prog_attach(" #type ")"); \
__ret; \
})
@@ -43,7 +46,7 @@
#define xbpf_prog_detach2(prog, target, type) \
({ \
int __ret = bpf_prog_detach2((prog), (target), (type)); \
if (__ret < 0) \
if (__ret < 0) \
FAIL_ERRNO("prog_detach2(" #type ")"); \
__ret; \
})
@@ -66,21 +69,15 @@
__ret; \
})
static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
static inline int add_to_sockmap(int mapfd, int fd1, int fd2)
{
u64 value;
u32 key;
int err;
key = 0;
value = fd1;
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
err = xbpf_map_update_elem(mapfd, &u32(0), &u64(fd1), BPF_NOEXIST);
if (err)
return err;
key = 1;
value = fd2;
return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
return xbpf_map_update_elem(mapfd, &u32(1), &u64(fd2), BPF_NOEXIST);
}
#endif // __SOCKMAP_HELPERS__

View File

@@ -1366,237 +1366,6 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
}
}
static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
int sock_mapfd, int nop_mapfd,
int verd_mapfd, enum redir_mode mode,
int send_flags)
{
const char *log_prefix = redir_mode_str(mode);
unsigned int pass;
int err, n;
u32 key;
char b;
zero_verdict_count(verd_mapfd);
err = add_to_sockmap(sock_mapfd, peer0, peer1);
if (err)
return;
if (nop_mapfd >= 0) {
err = add_to_sockmap(nop_mapfd, cli0, cli1);
if (err)
return;
}
/* Last byte is OOB data when send_flags has MSG_OOB bit set */
n = xsend(cli1, "ab", 2, send_flags);
if (n >= 0 && n < 2)
FAIL("%s: incomplete send", log_prefix);
if (n < 2)
return;
key = SK_PASS;
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
if (err)
return;
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
if (n < 0)
FAIL_ERRNO("%s: recv_timeout", log_prefix);
if (n == 0)
FAIL("%s: incomplete recv", log_prefix);
if (send_flags & MSG_OOB) {
/* Check that we can't read OOB while in sockmap */
errno = 0;
n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
if (n != -1 || errno != EOPNOTSUPP)
FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d",
log_prefix, n, errno);
/* Remove peer1 from sockmap */
xbpf_map_delete_elem(sock_mapfd, &(int){ 1 });
/* Check that OOB was dropped on redirect */
errno = 0;
n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
if (n != -1 || errno != EINVAL)
FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d",
log_prefix, n, errno);
}
}
static void unix_redir_to_connected(int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
int c0, c1, p0, p1;
int sfd[2];
if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
return;
c0 = sfd[0], p0 = sfd[1];
if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
goto close0;
c1 = sfd[0], p1 = sfd[1];
pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
mode, NO_FLAGS);
xclose(c1);
xclose(p1);
close0:
xclose(c0);
xclose(p0);
}
static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int sotype)
{
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
if (err)
return;
skel->bss->test_ingress = false;
unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
skel->bss->test_ingress = true;
unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
int sotype)
{
const char *family_name, *map_name;
char s[MAX_TEST_NAME];
family_name = family_str(AF_UNIX);
map_name = map_type_str(map);
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
if (!test__start_subtest(s))
return;
unix_skb_redir_to_connected(skel, map, sotype);
}
/* Returns two connected loopback vsock sockets */
static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
{
return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1);
}
static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
enum redir_mode mode, int sotype)
{
const char *log_prefix = redir_mode_str(mode);
char a = 'a', b = 'b';
int u0, u1, v0, v1;
int sfd[2];
unsigned int pass;
int err, n;
u32 key;
zero_verdict_count(verd_mapfd);
if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
return;
u0 = sfd[0];
u1 = sfd[1];
err = vsock_socketpair_connectible(sotype, &v0, &v1);
if (err) {
FAIL("vsock_socketpair_connectible() failed");
goto close_uds;
}
err = add_to_sockmap(sock_mapfd, u0, v0);
if (err) {
FAIL("add_to_sockmap failed");
goto close_vsock;
}
n = write(v1, &a, sizeof(a));
if (n < 0)
FAIL_ERRNO("%s: write", log_prefix);
if (n == 0)
FAIL("%s: incomplete write", log_prefix);
if (n < 1)
goto out;
n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
if (n < 0)
FAIL("%s: recv() err, errno=%d", log_prefix, errno);
if (n == 0)
FAIL("%s: incomplete recv", log_prefix);
if (b != a)
FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
key = SK_PASS;
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
if (err)
goto out;
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
out:
key = 0;
bpf_map_delete_elem(sock_mapfd, &key);
key = 1;
bpf_map_delete_elem(sock_mapfd, &key);
close_vsock:
close(v0);
close(v1);
close_uds:
close(u0);
close(u1);
}
static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
struct bpf_map *inner_map,
int sotype)
{
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
if (err)
return;
skel->bss->test_ingress = false;
vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
skel->bss->test_ingress = true;
vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
{
const char *family_name, *map_name;
char s[MAX_TEST_NAME];
family_name = family_str(AF_VSOCK);
map_name = map_type_str(map);
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
if (!test__start_subtest(s))
return;
vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
}
static void test_reuseport(struct test_sockmap_listen *skel,
struct bpf_map *map, int family, int sotype)
{
@@ -1637,224 +1406,6 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
static int inet_socketpair(int family, int type, int *s, int *c)
{
return create_pair(family, type | SOCK_NONBLOCK, s, c);
}
static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
enum redir_mode mode)
{
int c0, c1, p0, p1;
int err;
err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
if (err)
return;
err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
if (err)
goto close_cli0;
pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
mode, NO_FLAGS);
xclose(c1);
xclose(p1);
close_cli0:
xclose(c0);
xclose(p0);
}
static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family)
{
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
if (err)
return;
skel->bss->test_ingress = false;
udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
skel->bss->test_ingress = true;
udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
const char *family_name, *map_name;
char s[MAX_TEST_NAME];
family_name = family_str(family);
map_name = map_type_str(map);
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
if (!test__start_subtest(s))
return;
udp_skb_redir_to_connected(skel, map, family);
}
static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
int c0, c1, p0, p1;
int sfd[2];
int err;
if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
return;
c0 = sfd[0], p0 = sfd[1];
err = inet_socketpair(family, type, &p1, &c1);
if (err)
goto close;
pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
mode, NO_FLAGS);
xclose(c1);
xclose(p1);
close:
xclose(c0);
xclose(p0);
}
static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family)
{
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
if (err)
return;
skel->bss->test_ingress = false;
inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
REDIR_EGRESS);
inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
REDIR_EGRESS);
skel->bss->test_ingress = true;
inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
REDIR_INGRESS);
inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
REDIR_INGRESS);
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
int nop_mapfd, int verd_mapfd,
enum redir_mode mode, int send_flags)
{
int c0, c1, p0, p1;
int sfd[2];
int err;
err = inet_socketpair(family, type, &p0, &c0);
if (err)
return;
if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
goto close_cli0;
c1 = sfd[0], p1 = sfd[1];
pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd,
verd_mapfd, mode, send_flags);
xclose(c1);
xclose(p1);
close_cli0:
xclose(c0);
xclose(p0);
}
static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family)
{
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
int nop_map = bpf_map__fd(skel->maps.nop_map);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
if (err)
return;
skel->bss->test_ingress = false;
unix_inet_redir_to_connected(family, SOCK_DGRAM,
sock_map, -1, verdict_map,
REDIR_EGRESS, NO_FLAGS);
unix_inet_redir_to_connected(family, SOCK_STREAM,
sock_map, -1, verdict_map,
REDIR_EGRESS, NO_FLAGS);
unix_inet_redir_to_connected(family, SOCK_DGRAM,
sock_map, nop_map, verdict_map,
REDIR_EGRESS, NO_FLAGS);
unix_inet_redir_to_connected(family, SOCK_STREAM,
sock_map, nop_map, verdict_map,
REDIR_EGRESS, NO_FLAGS);
/* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
unix_inet_redir_to_connected(family, SOCK_STREAM,
sock_map, nop_map, verdict_map,
REDIR_EGRESS, MSG_OOB);
skel->bss->test_ingress = true;
unix_inet_redir_to_connected(family, SOCK_DGRAM,
sock_map, -1, verdict_map,
REDIR_INGRESS, NO_FLAGS);
unix_inet_redir_to_connected(family, SOCK_STREAM,
sock_map, -1, verdict_map,
REDIR_INGRESS, NO_FLAGS);
unix_inet_redir_to_connected(family, SOCK_DGRAM,
sock_map, nop_map, verdict_map,
REDIR_INGRESS, NO_FLAGS);
unix_inet_redir_to_connected(family, SOCK_STREAM,
sock_map, nop_map, verdict_map,
REDIR_INGRESS, NO_FLAGS);
/* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
unix_inet_redir_to_connected(family, SOCK_STREAM,
sock_map, nop_map, verdict_map,
REDIR_INGRESS, MSG_OOB);
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
const char *family_name, *map_name;
struct netns_obj *netns;
char s[MAX_TEST_NAME];
family_name = family_str(family);
map_name = map_type_str(map);
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
if (!test__start_subtest(s))
return;
netns = netns_new("sockmap_listen", true);
if (!ASSERT_OK_PTR(netns, "netns_new"))
return;
inet_unix_skb_redir_to_connected(skel, map, family);
unix_inet_skb_redir_to_connected(skel, map, family);
netns_free(netns);
}
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
@@ -1863,8 +1414,6 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_redir(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
test_udp_redir(skel, map, family);
test_udp_unix_redir(skel, map, family);
}
void serial_test_sockmap_listen(void)
@@ -1880,16 +1429,10 @@ void serial_test_sockmap_listen(void)
skel->bss->test_sockmap = true;
run_tests(skel, skel->maps.sock_map, AF_INET);
run_tests(skel, skel->maps.sock_map, AF_INET6);
test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
test_vsock_redir(skel, skel->maps.sock_map);
skel->bss->test_sockmap = false;
run_tests(skel, skel->maps.sock_hash, AF_INET);
run_tests(skel, skel->maps.sock_hash, AF_INET6);
test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
test_vsock_redir(skel, skel->maps.sock_hash);
test_sockmap_listen__destroy(skel);
}

View File

@@ -0,0 +1,465 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Test for sockmap/sockhash redirection.
*
* BPF_MAP_TYPE_SOCKMAP
* BPF_MAP_TYPE_SOCKHASH
* x
* sk_msg-to-egress
* sk_msg-to-ingress
* sk_skb-to-egress
* sk_skb-to-ingress
* x
* AF_INET, SOCK_STREAM
* AF_INET6, SOCK_STREAM
* AF_INET, SOCK_DGRAM
* AF_INET6, SOCK_DGRAM
* AF_UNIX, SOCK_STREAM
* AF_UNIX, SOCK_DGRAM
* AF_VSOCK, SOCK_STREAM
* AF_VSOCK, SOCK_SEQPACKET
*/
#include <errno.h>
#include <error.h>
#include <sched.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/un.h>
#include <linux/string.h>
#include <linux/vm_sockets.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "linux/const.h"
#include "test_progs.h"
#include "sockmap_helpers.h"
#include "test_sockmap_redir.skel.h"
/* The meaning of SUPPORTED is "will redirect packet as expected".
*/
#define SUPPORTED _BITUL(0)
/* Note on sk_skb-to-ingress ->af_vsock:
*
* Peer socket may receive the packet some time after the return from sendmsg().
* In a typical usage scenario, recvmsg() will block until the redirected packet
* appears in the destination queue, or timeout if the packet was dropped. By
* that point, the verdict map has already been updated to reflect what has
* happened.
*
* But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg()
* takes place. Which means we may race the execution of the verdict logic and
* read map_verd before it has been updated, i.e. we might observe
* map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1.
*
* This confuses the selftest logic: if there was no packet dropped, where's the
* packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0
* (which implies the verdict program has not been ran) just re-read the verdict
* map again.
*/
#define UNSUPPORTED_RACY_VERD _BITUL(1)
enum prog_type {
SK_MSG_EGRESS,
SK_MSG_INGRESS,
SK_SKB_EGRESS,
SK_SKB_INGRESS,
};
enum {
SEND_INNER = 0,
SEND_OUTER,
};
enum {
RECV_INNER = 0,
RECV_OUTER,
};
struct maps {
int in;
int out;
int verd;
};
struct combo_spec {
enum prog_type prog_type;
const char *in, *out;
};
struct redir_spec {
const char *name;
int idx_send;
int idx_recv;
enum prog_type prog_type;
};
struct socket_spec {
int family;
int sotype;
int send_flags;
int in[2];
int out[2];
};
static int socket_spec_pairs(struct socket_spec *s)
{
return create_socket_pairs(s->family, s->sotype,
&s->in[0], &s->out[0],
&s->in[1], &s->out[1]);
}
static void socket_spec_close(struct socket_spec *s)
{
xclose(s->in[0]);
xclose(s->in[1]);
xclose(s->out[0]);
xclose(s->out[1]);
}
static void get_redir_params(struct redir_spec *redir,
struct test_sockmap_redir *skel, int *prog_fd,
enum bpf_attach_type *attach_type,
int *redirect_flags)
{
enum prog_type type = redir->prog_type;
struct bpf_program *prog;
bool sk_msg;
sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS;
prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict;
*prog_fd = bpf_program__fd(prog);
*attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT;
if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS)
*redirect_flags = BPF_F_INGRESS;
else
*redirect_flags = 0;
}
static void try_recv(const char *prefix, int fd, int flags, bool expect_success)
{
ssize_t n;
char buf;
errno = 0;
n = recv(fd, &buf, 1, flags);
if (n < 0 && expect_success)
FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n);
if (!n && !expect_success)
FAIL("%s: expected failure: retval=%zd", prefix, n);
}
static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out,
int sd_recv, int map_verd, int status)
{
unsigned int drop, pass;
char recv_buf;
ssize_t n;
get_verdict:
if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) ||
xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass))
return;
if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) {
sched_yield();
goto get_verdict;
}
if (pass != 0) {
FAIL("unsupported: wanted verdict pass 0, have %u", pass);
return;
}
/* If nothing was dropped, packet should have reached the peer */
if (drop == 0) {
errno = 0;
n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
if (n != 1)
FAIL_ERRNO("unsupported: packet missing, retval=%zd", n);
}
/* Ensure queues are empty */
try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false);
if (sd_in != sd_send)
try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false);
try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false);
if (sd_recv != sd_out)
try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false);
}
static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer,
int sd_in, int sd_out, int sd_recv,
struct maps *maps, int status)
{
unsigned int drop, pass;
char *send_buf = "ab";
char recv_buf = '\0';
ssize_t n, len = 1;
/* Zero out the verdict map */
if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) ||
xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY))
return;
if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST))
return;
if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST))
goto del_in;
/* Last byte is OOB data when send_flags has MSG_OOB bit set */
if (send_flags & MSG_OOB)
len++;
n = send(sd_send, send_buf, len, send_flags);
if (n >= 0 && n < len)
FAIL("incomplete send");
if (n < 0) {
/* sk_msg redirect combo not supported? */
if (status & SUPPORTED || errno != EACCES)
FAIL_ERRNO("send");
goto out;
}
if (!(status & SUPPORTED)) {
handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv,
maps->verd, status);
goto out;
}
errno = 0;
n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
if (n != 1) {
FAIL_ERRNO("recv_timeout()");
goto out;
}
/* Check verdict _after_ recv(); af_vsock may need time to catch up */
if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) ||
xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass))
goto out;
if (drop != 0 || pass != 1)
FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u",
drop, pass);
if (recv_buf != send_buf[0])
FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]);
if (send_flags & MSG_OOB) {
/* Fail reading OOB while in sockmap */
try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out,
MSG_OOB | MSG_DONTWAIT, false);
/* Remove sd_out from sockmap */
xbpf_map_delete_elem(maps->out, &u32(0));
/* Check that OOB was dropped on redirect */
try_recv("recv(sd_out, MSG_OOB)", sd_out,
MSG_OOB | MSG_DONTWAIT, false);
goto del_in;
}
out:
xbpf_map_delete_elem(maps->out, &u32(0));
del_in:
xbpf_map_delete_elem(maps->in, &u32(0));
}
static int is_redir_supported(enum prog_type type, const char *in,
const char *out)
{
/* Matching based on strings returned by socket_kind_to_str():
* tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq
* Plus a wildcard: any
* Not in use: u_seq, v_dgr
*/
struct combo_spec *c, combos[] = {
/* Send to local: TCP -> any, but vsock */
{ SK_MSG_INGRESS, "tcp", "tcp" },
{ SK_MSG_INGRESS, "tcp", "udp" },
{ SK_MSG_INGRESS, "tcp", "u_str" },
{ SK_MSG_INGRESS, "tcp", "u_dgr" },
/* Send to egress: TCP -> TCP */
{ SK_MSG_EGRESS, "tcp", "tcp" },
/* Ingress to egress: any -> any */
{ SK_SKB_EGRESS, "any", "any" },
/* Ingress to local: any -> any, but vsock */
{ SK_SKB_INGRESS, "any", "tcp" },
{ SK_SKB_INGRESS, "any", "udp" },
{ SK_SKB_INGRESS, "any", "u_str" },
{ SK_SKB_INGRESS, "any", "u_dgr" },
};
for (c = combos; c < combos + ARRAY_SIZE(combos); c++) {
if (c->prog_type == type &&
(!strcmp(c->in, "any") || strstarts(in, c->in)) &&
(!strcmp(c->out, "any") || strstarts(out, c->out)))
return SUPPORTED;
}
return 0;
}
static int get_support_status(enum prog_type type, const char *in,
const char *out)
{
int status = is_redir_supported(type, in, out);
if (type == SK_SKB_INGRESS && strstarts(out, "v_"))
status |= UNSUPPORTED_RACY_VERD;
return status;
}
static void test_socket(enum bpf_map_type type, struct redir_spec *redir,
struct maps *maps, struct socket_spec *s_in,
struct socket_spec *s_out)
{
int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status;
const char *in_str, *out_str;
char s[MAX_TEST_NAME];
fd_in = s_in->in[0];
fd_out = s_out->out[0];
fd_send = s_in->in[redir->idx_send];
fd_peer = s_in->in[redir->idx_send ^ 1];
fd_recv = s_out->out[redir->idx_recv];
flags = s_in->send_flags;
in_str = socket_kind_to_str(fd_in);
out_str = socket_kind_to_str(fd_out);
status = get_support_status(redir->prog_type, in_str, out_str);
snprintf(s, sizeof(s),
"%-4s %-17s %-5s %s %-5s%6s",
/* hash sk_skb-to-ingress u_str → v_str (OOB) */
type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash",
redir->name,
in_str,
status & SUPPORTED ? "" : " ",
out_str,
(flags & MSG_OOB) ? "(OOB)" : "");
if (!test__start_subtest(s))
return;
test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv,
maps, status);
}
static void test_redir(enum bpf_map_type type, struct redir_spec *redir,
struct maps *maps)
{
struct socket_spec *s, sockets[] = {
{ AF_INET, SOCK_STREAM },
// { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */
{ AF_INET6, SOCK_STREAM },
{ AF_INET, SOCK_DGRAM },
{ AF_INET6, SOCK_DGRAM },
{ AF_UNIX, SOCK_STREAM },
{ AF_UNIX, SOCK_STREAM, MSG_OOB },
{ AF_UNIX, SOCK_DGRAM },
// { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */
{ AF_VSOCK, SOCK_STREAM },
// { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */
{ AF_VSOCK, SOCK_SEQPACKET },
};
for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
if (socket_spec_pairs(s))
goto out;
/* Intra-proto */
for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
test_socket(type, redir, maps, s, s);
/* Cross-proto */
for (int i = 0; i < ARRAY_SIZE(sockets); i++) {
for (int j = 0; j < ARRAY_SIZE(sockets); j++) {
struct socket_spec *out = &sockets[j];
struct socket_spec *in = &sockets[i];
/* Skip intra-proto and between variants */
if (out->send_flags ||
(in->family == out->family &&
in->sotype == out->sotype))
continue;
test_socket(type, redir, maps, in, out);
}
}
out:
while (--s >= sockets)
socket_spec_close(s);
}
static void test_map(enum bpf_map_type type)
{
struct redir_spec *r, redirs[] = {
{ "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS },
{ "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS },
{ "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS },
{ "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS },
};
for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) {
enum bpf_attach_type attach_type;
struct test_sockmap_redir *skel;
struct maps maps;
int prog_fd;
skel = test_sockmap_redir__open_and_load();
if (!skel) {
FAIL("open_and_load");
return;
}
switch (type) {
case BPF_MAP_TYPE_SOCKMAP:
maps.in = bpf_map__fd(skel->maps.nop_map);
maps.out = bpf_map__fd(skel->maps.sock_map);
break;
case BPF_MAP_TYPE_SOCKHASH:
maps.in = bpf_map__fd(skel->maps.nop_hash);
maps.out = bpf_map__fd(skel->maps.sock_hash);
break;
default:
FAIL("Unsupported bpf_map_type");
return;
}
skel->bss->redirect_type = type;
maps.verd = bpf_map__fd(skel->maps.verdict_map);
get_redir_params(r, skel, &prog_fd, &attach_type,
&skel->bss->redirect_flags);
if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0))
return;
test_redir(type, r, &maps);
if (xbpf_prog_detach2(prog_fd, maps.in, attach_type))
return;
test_sockmap_redir__destroy(skel);
}
}
void serial_test_sockmap_redir(void)
{
test_map(BPF_MAP_TYPE_SOCKMAP);
test_map(BPF_MAP_TYPE_SOCKHASH);
}

View File

@@ -0,0 +1,68 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
SEC(".maps") struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, __u64);
} nop_map, sock_map;
SEC(".maps") struct {
__uint(type, BPF_MAP_TYPE_SOCKHASH);
__uint(max_entries, 1);
__type(key, __u32);
__type(value, __u64);
} nop_hash, sock_hash;
SEC(".maps") struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 2);
__type(key, int);
__type(value, unsigned int);
} verdict_map;
/* Set by user space */
int redirect_type;
int redirect_flags;
#define redirect_map(__data) \
_Generic((__data), \
struct __sk_buff * : bpf_sk_redirect_map, \
struct sk_msg_md * : bpf_msg_redirect_map \
)((__data), &sock_map, (__u32){0}, redirect_flags)
#define redirect_hash(__data) \
_Generic((__data), \
struct __sk_buff * : bpf_sk_redirect_hash, \
struct sk_msg_md * : bpf_msg_redirect_hash \
)((__data), &sock_hash, &(__u32){0}, redirect_flags)
#define DEFINE_PROG(__type, __param) \
SEC("sk_" XSTR(__type)) \
int prog_ ## __type ## _verdict(__param data) \
{ \
unsigned int *count; \
int verdict; \
\
if (redirect_type == BPF_MAP_TYPE_SOCKMAP) \
verdict = redirect_map(data); \
else if (redirect_type == BPF_MAP_TYPE_SOCKHASH) \
verdict = redirect_hash(data); \
else \
verdict = redirect_type - __MAX_BPF_MAP_TYPE; \
\
count = bpf_map_lookup_elem(&verdict_map, &verdict); \
if (count) \
(*count)++; \
\
return verdict; \
}
DEFINE_PROG(skb, struct __sk_buff *);
DEFINE_PROG(msg, struct sk_msg_md *);
char _license[] SEC("license") = "GPL";