Merge branch 'selftests-bpf-migrate-test_flow_dissector-sh-to-test_progs'

Alexis Lothoré says:

====================
This is the revision 3 of test_flow_dissector_migration.sh into
test_progs. This revision addresses comments from Stanislas, especially
about proper reuse of pseudo-header checksuming in new network helpers.

There are 2 "main" parts in test_flow_dissector.sh:
- a set of tests checking flow_dissector programs attachment to either
  root namespace or non-root namespace
- dissection test

The first set is integrated in flow_dissector.c, which already contains
some existing tests for flow_dissector programs. This series uses the
opportunity to update a bit this file (use new assert, re-split tests,
etc)
The second part is migrated into a new file under test_progs,
flow_dissector_classification.c. It uses the same eBPF programs as
flow_dissector.c, but the difference is rather about how those program
are executed:
- flow_dissector.c manually runs programs with BPF_PROG_RUN
- flow_dissector_classification.c sends real packets to be dissected, and
  so it also executes kernel code related to eBPF flow dissector (eg:
__skb_flow_bpf_to_target)

---
Changes in v3:
- Keep new helpers name in sync with kernel ones
- Document some existing network helpers
- Properly reuse pseudo-header csum helper in transport layer csum
  helper
- Drop duplicate assert
- Use const for test structure in the migrated test
- Simplify shutdown callchain for basic test
- collect Acked-by
- Link to v2: https://lore.kernel.org/r/20241114-flow_dissector-v2-0-ee4a3be3de65@bootlin.com

Changes in v2:
- allow tests to run in parallel
- move some generic helpers to network_helpers.h
- define proper function for ASSERT_MEMEQ
- fetch acked-by tags
- Link to v1: https://lore.kernel.org/r/20241113-flow_dissector-v1-0-27c4df0592dc@bootlin.com
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov
2024-11-25 14:00:05 -08:00
20 changed files with 1175 additions and 1067 deletions

View File

@@ -19,7 +19,6 @@ feature
urandom_read
test_sockmap
test_lirc_mode2_user
test_flow_dissector
flow_dissector_load
test_tcpnotify_user
test_libbpf

View File

@@ -133,7 +133,6 @@ TEST_PROGS := test_kmod.sh \
test_tunnel.sh \
test_lwt_seg6local.sh \
test_lirc_mode2.sh \
test_flow_dissector.sh \
test_xdp_vlan_mode_generic.sh \
test_xdp_vlan_mode_native.sh \
test_lwt_ip_encap.sh \
@@ -161,7 +160,6 @@ TEST_GEN_PROGS_EXTENDED = \
flow_dissector_load \
runqslower \
test_cpp \
test_flow_dissector \
test_lirc_mode2_user \
veristat \
xdp_features \

View File

@@ -58,6 +58,7 @@ CONFIG_MPLS=y
CONFIG_MPLS_IPTUNNEL=y
CONFIG_MPLS_ROUTING=y
CONFIG_MPTCP=y
CONFIG_NET_ACT_GACT=y
CONFIG_NET_ACT_SKBMOD=y
CONFIG_NET_CLS=y
CONFIG_NET_CLS_ACT=y

View File

@@ -21,7 +21,7 @@
#include <linux/limits.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <netinet/udp.h>
#include <netinet/tcp.h>
#include <net/if.h>

View File

@@ -14,6 +14,7 @@ typedef __u16 __sum16;
#include <linux/sockios.h>
#include <linux/err.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <bpf/bpf_endian.h>
#include <net/if.h>
@@ -105,6 +106,45 @@ static __u16 csum_fold(__u32 csum)
return (__u16)~csum;
}
static __wsum csum_partial(const void *buf, int len, __wsum sum)
{
__u16 *p = (__u16 *)buf;
int num_u16 = len >> 1;
int i;
for (i = 0; i < num_u16; i++)
sum += p[i];
return sum;
}
static inline __sum16 build_ip_csum(struct iphdr *iph)
{
__u32 sum = 0;
__u16 *p;
iph->check = 0;
p = (void *)iph;
sum = csum_partial(p, iph->ihl << 2, 0);
return csum_fold(sum);
}
/**
* csum_tcpudp_magic - compute IP pseudo-header checksum
*
* Compute the IPv4 pseudo header checksum. The helper can take a
* accumulated sum from the transport layer to accumulate it and directly
* return the transport layer
*
* @saddr: IP source address
* @daddr: IP dest address
* @len: IP data size
* @proto: transport layer protocol
* @csum: The accumulated partial sum to add to the computation
*
* Returns the folded sum
*/
static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
__u32 len, __u8 proto,
__wsum csum)
@@ -120,6 +160,21 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
return csum_fold((__u32)s);
}
/**
* csum_ipv6_magic - compute IPv6 pseudo-header checksum
*
* Compute the ipv6 pseudo header checksum. The helper can take a
* accumulated sum from the transport layer to accumulate it and directly
* return the transport layer
*
* @saddr: IPv6 source address
* @daddr: IPv6 dest address
* @len: IPv6 data size
* @proto: transport layer protocol
* @csum: The accumulated partial sum to add to the computation
*
* Returns the folded sum
*/
static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto,
@@ -139,6 +194,47 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
return csum_fold((__u32)s);
}
/**
* build_udp_v4_csum - compute UDP checksum for UDP over IPv4
*
* Compute the checksum to embed in UDP header, composed of the sum of IP
* pseudo-header checksum, UDP header checksum and UDP data checksum
* @iph IP header
* @udph UDP header, which must be immediately followed by UDP data
*
* Returns the total checksum
*/
static inline __sum16 build_udp_v4_csum(const struct iphdr *iph,
const struct udphdr *udph)
{
unsigned long sum;
sum = csum_partial(udph, ntohs(udph->len), 0);
return csum_tcpudp_magic(iph->saddr, iph->daddr, ntohs(udph->len),
IPPROTO_UDP, sum);
}
/**
* build_udp_v6_csum - compute UDP checksum for UDP over IPv6
*
* Compute the checksum to embed in UDP header, composed of the sum of IPv6
* pseudo-header checksum, UDP header checksum and UDP data checksum
* @ip6h IPv6 header
* @udph UDP header, which must be immediately followed by UDP data
*
* Returns the total checksum
*/
static inline __sum16 build_udp_v6_csum(const struct ipv6hdr *ip6h,
const struct udphdr *udph)
{
unsigned long sum;
sum = csum_partial(udph, ntohs(udph->len), 0);
return csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ntohs(udph->len),
IPPROTO_UDP, sum);
}
struct tmonitor_ctx;
#ifdef TRAFFIC_MONITOR

View File

@@ -7,39 +7,14 @@
#include "bpf_flow.skel.h"
#define TEST_NS "flow_dissector_ns"
#define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */
#define TEST_NAME_MAX_LEN 64
#ifndef IP_MF
#define IP_MF 0x2000
#endif
#define CHECK_FLOW_KEYS(desc, got, expected) \
_CHECK(memcmp(&got, &expected, sizeof(got)) != 0, \
desc, \
topts.duration, \
"nhoff=%u/%u " \
"thoff=%u/%u " \
"addr_proto=0x%x/0x%x " \
"is_frag=%u/%u " \
"is_first_frag=%u/%u " \
"is_encap=%u/%u " \
"ip_proto=0x%x/0x%x " \
"n_proto=0x%x/0x%x " \
"flow_label=0x%x/0x%x " \
"sport=%u/%u " \
"dport=%u/%u\n", \
got.nhoff, expected.nhoff, \
got.thoff, expected.thoff, \
got.addr_proto, expected.addr_proto, \
got.is_frag, expected.is_frag, \
got.is_first_frag, expected.is_first_frag, \
got.is_encap, expected.is_encap, \
got.ip_proto, expected.ip_proto, \
got.n_proto, expected.n_proto, \
got.flow_label, expected.flow_label, \
got.sport, expected.sport, \
got.dport, expected.dport)
struct ipv4_pkt {
struct ethhdr eth;
struct iphdr iph;
@@ -89,6 +64,19 @@ struct dvlan_ipv6_pkt {
struct tcphdr tcp;
} __packed;
struct gre_base_hdr {
__be16 flags;
__be16 protocol;
} gre_base_hdr;
struct gre_minimal_pkt {
struct ethhdr eth;
struct iphdr iph;
struct gre_base_hdr gre_hdr;
struct iphdr iph_inner;
struct tcphdr tcp;
} __packed;
struct test {
const char *name;
union {
@@ -98,6 +86,7 @@ struct test {
struct ipv6_pkt ipv6;
struct ipv6_frag_pkt ipv6_frag;
struct dvlan_ipv6_pkt dvlan_ipv6;
struct gre_minimal_pkt gre_minimal;
} pkt;
struct bpf_flow_keys keys;
__u32 flags;
@@ -106,7 +95,6 @@ struct test {
#define VLAN_HLEN 4
static __u32 duration;
struct test tests[] = {
{
.name = "ipv4",
@@ -444,8 +432,131 @@ struct test tests[] = {
},
.retval = BPF_FLOW_DISSECTOR_CONTINUE,
},
{
.name = "ip-gre",
.pkt.gre_minimal = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
.iph.ihl = 5,
.iph.protocol = IPPROTO_GRE,
.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
.gre_hdr = {
.flags = 0,
.protocol = __bpf_constant_htons(ETH_P_IP),
},
.iph_inner.ihl = 5,
.iph_inner.protocol = IPPROTO_TCP,
.iph_inner.tot_len =
__bpf_constant_htons(MAGIC_BYTES -
sizeof(struct iphdr)),
.tcp.doff = 5,
.tcp.source = 80,
.tcp.dest = 8080,
},
.keys = {
.nhoff = ETH_HLEN,
.thoff = ETH_HLEN + sizeof(struct iphdr) * 2 +
sizeof(struct gre_base_hdr),
.addr_proto = ETH_P_IP,
.ip_proto = IPPROTO_TCP,
.n_proto = __bpf_constant_htons(ETH_P_IP),
.is_encap = true,
.sport = 80,
.dport = 8080,
},
.retval = BPF_OK,
},
{
.name = "ip-gre-no-encap",
.pkt.ipip = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
.iph.ihl = 5,
.iph.protocol = IPPROTO_GRE,
.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
.iph_inner.ihl = 5,
.iph_inner.protocol = IPPROTO_TCP,
.iph_inner.tot_len =
__bpf_constant_htons(MAGIC_BYTES -
sizeof(struct iphdr)),
.tcp.doff = 5,
.tcp.source = 80,
.tcp.dest = 8080,
},
.keys = {
.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
.nhoff = ETH_HLEN,
.thoff = ETH_HLEN + sizeof(struct iphdr)
+ sizeof(struct gre_base_hdr),
.addr_proto = ETH_P_IP,
.ip_proto = IPPROTO_GRE,
.n_proto = __bpf_constant_htons(ETH_P_IP),
.is_encap = true,
},
.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
.retval = BPF_OK,
},
};
void serial_test_flow_dissector_namespace(void)
{
struct bpf_flow *skel;
struct nstoken *ns;
int err, prog_fd;
skel = bpf_flow__open_and_load();
if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
return;
prog_fd = bpf_program__fd(skel->progs._dissect);
if (!ASSERT_OK_FD(prog_fd, "get dissector fd"))
goto out_destroy_skel;
/* We must be able to attach a flow dissector to root namespace */
err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
if (!ASSERT_OK(err, "attach on root namespace ok"))
goto out_destroy_skel;
err = make_netns(TEST_NS);
if (!ASSERT_OK(err, "create non-root net namespace"))
goto out_destroy_skel;
/* We must not be able to additionally attach a flow dissector to a
* non-root net namespace
*/
ns = open_netns(TEST_NS);
if (!ASSERT_OK_PTR(ns, "enter non-root net namespace"))
goto out_clean_ns;
err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
close_netns(ns);
ASSERT_ERR(err, "refuse new flow dissector in non-root net namespace");
ASSERT_EQ(errno, EEXIST, "refused because of already attached prog");
/* If no flow dissector is attached to the root namespace, we must
* be able to attach one to a non-root net namespace
*/
bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
ns = open_netns(TEST_NS);
ASSERT_OK_PTR(ns, "enter non-root net namespace");
err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
close_netns(ns);
ASSERT_OK(err, "accept new flow dissector in non-root net namespace");
/* If a flow dissector is attached to non-root net namespace, attaching
* a flow dissector to root namespace must fail
*/
err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
ASSERT_ERR(err, "refuse new flow dissector on root namespace");
ASSERT_EQ(errno, EEXIST, "refused because of already attached prog");
ns = open_netns(TEST_NS);
bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
close_netns(ns);
out_clean_ns:
remove_netns(TEST_NS);
out_destroy_skel:
bpf_flow__destroy(skel);
}
static int create_tap(const char *ifname)
{
struct ifreq ifr = {
@@ -533,22 +644,27 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
return 0;
}
static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
static void run_tests_skb_less(int tap_fd, struct bpf_map *keys,
char *test_suffix)
{
char test_name[TEST_NAME_MAX_LEN];
int i, err, keys_fd;
keys_fd = bpf_map__fd(keys);
if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
if (!ASSERT_OK_FD(keys_fd, "bpf_map__fd"))
return;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
/* Keep in sync with 'flags' from eth_get_headlen. */
__u32 eth_get_headlen_flags =
BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
LIBBPF_OPTS(bpf_test_run_opts, topts);
struct bpf_flow_keys flow_keys = {};
__u32 key = (__u32)(tests[i].keys.sport) << 16 |
tests[i].keys.dport;
snprintf(test_name, TEST_NAME_MAX_LEN, "%s-%s", tests[i].name,
test_suffix);
if (!test__start_subtest(test_name))
continue;
/* For skb-less case we can't pass input flags; run
* only the tests that have a matching set of flags.
@@ -558,78 +674,139 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
continue;
err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
if (!ASSERT_EQ(err, sizeof(tests[i].pkt), "tx_tap"))
continue;
/* check the stored flow_keys only if BPF_OK expected */
if (tests[i].retval != BPF_OK)
continue;
err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
ASSERT_OK(err, "bpf_map_lookup_elem");
if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
continue;
CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
ASSERT_MEMEQ(&flow_keys, &tests[i].keys,
sizeof(struct bpf_flow_keys),
"returned flow keys");
err = bpf_map_delete_elem(keys_fd, &key);
ASSERT_OK(err, "bpf_map_delete_elem");
}
}
static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd)
void test_flow_dissector_skb_less_direct_attach(void)
{
int err, prog_fd;
int err, prog_fd, tap_fd;
struct bpf_flow *skel;
struct netns_obj *ns;
ns = netns_new("flow_dissector_skb_less_indirect_attach_ns", true);
if (!ASSERT_OK_PTR(ns, "create and open netns"))
return;
skel = bpf_flow__open_and_load();
if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
goto out_clean_ns;
err = init_prog_array(skel->obj, skel->maps.jmp_table);
if (!ASSERT_OK(err, "init_prog_array"))
goto out_destroy_skel;
prog_fd = bpf_program__fd(skel->progs._dissect);
if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
return;
if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd"))
goto out_destroy_skel;
err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno))
return;
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out_destroy_skel;
run_tests_skb_less(tap_fd, skel->maps.last_dissection);
tap_fd = create_tap("tap0");
if (!ASSERT_OK_FD(tap_fd, "create_tap"))
goto out_destroy_skel;
err = ifup("tap0");
if (!ASSERT_OK(err, "ifup"))
goto out_close_tap;
run_tests_skb_less(tap_fd, skel->maps.last_dissection,
"non-skb-direct-attach");
err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
CHECK(err, "bpf_prog_detach2", "err %d errno %d\n", err, errno);
ASSERT_OK(err, "bpf_prog_detach2");
out_close_tap:
close(tap_fd);
out_destroy_skel:
bpf_flow__destroy(skel);
out_clean_ns:
netns_free(ns);
}
static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
void test_flow_dissector_skb_less_indirect_attach(void)
{
int err, net_fd, tap_fd;
struct bpf_flow *skel;
struct bpf_link *link;
int err, net_fd;
struct netns_obj *ns;
ns = netns_new("flow_dissector_skb_less_indirect_attach_ns", true);
if (!ASSERT_OK_PTR(ns, "create and open netns"))
return;
skel = bpf_flow__open_and_load();
if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
goto out_clean_ns;
net_fd = open("/proc/self/ns/net", O_RDONLY);
if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno))
return;
if (!ASSERT_OK_FD(net_fd, "open(/proc/self/ns/net"))
goto out_destroy_skel;
err = init_prog_array(skel->obj, skel->maps.jmp_table);
if (!ASSERT_OK(err, "init_prog_array"))
goto out_destroy_skel;
tap_fd = create_tap("tap0");
if (!ASSERT_OK_FD(tap_fd, "create_tap"))
goto out_close_ns;
err = ifup("tap0");
if (!ASSERT_OK(err, "ifup"))
goto out_close_tap;
link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
if (!ASSERT_OK_PTR(link, "attach_netns"))
goto out_close;
goto out_close_tap;
run_tests_skb_less(tap_fd, skel->maps.last_dissection);
run_tests_skb_less(tap_fd, skel->maps.last_dissection,
"non-skb-indirect-attach");
err = bpf_link__destroy(link);
CHECK(err, "bpf_link__destroy", "err %d\n", err);
out_close:
ASSERT_OK(err, "bpf_link__destroy");
out_close_tap:
close(tap_fd);
out_close_ns:
close(net_fd);
out_destroy_skel:
bpf_flow__destroy(skel);
out_clean_ns:
netns_free(ns);
}
void test_flow_dissector(void)
void test_flow_dissector_skb(void)
{
int i, err, prog_fd, keys_fd = -1, tap_fd;
char test_name[TEST_NAME_MAX_LEN];
struct bpf_flow *skel;
int i, err, prog_fd;
skel = bpf_flow__open_and_load();
if (CHECK(!skel, "skel", "failed to open/load skeleton\n"))
if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
return;
prog_fd = bpf_program__fd(skel->progs._dissect);
if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
goto out_destroy_skel;
keys_fd = bpf_map__fd(skel->maps.last_dissection);
if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
goto out_destroy_skel;
err = init_prog_array(skel->obj, skel->maps.jmp_table);
if (CHECK(err, "init_prog_array", "err %d\n", err))
if (!ASSERT_OK(err, "init_prog_array"))
goto out_destroy_skel;
prog_fd = bpf_program__fd(skel->progs._dissect);
if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd"))
goto out_destroy_skel;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
@@ -641,6 +818,10 @@ void test_flow_dissector(void)
);
static struct bpf_flow_keys ctx = {};
snprintf(test_name, TEST_NAME_MAX_LEN, "%s-skb", tests[i].name);
if (!test__start_subtest(test_name))
continue;
if (tests[i].flags) {
topts.ctx_in = &ctx;
topts.ctx_size_in = sizeof(ctx);
@@ -656,26 +837,12 @@ void test_flow_dissector(void)
continue;
ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
"test_run data_size_out");
CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
ASSERT_MEMEQ(&flow_keys, &tests[i].keys,
sizeof(struct bpf_flow_keys),
"returned flow keys");
}
/* Do the same tests but for skb-less flow dissector.
* We use a known path in the net/tun driver that calls
* eth_get_headlen and we manually export bpf_flow_keys
* via BPF map in this case.
*/
tap_fd = create_tap("tap0");
CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno);
err = ifup("tap0");
CHECK(err, "ifup", "err %d errno %d\n", err, errno);
/* Test direct prog attachment */
test_skb_less_prog_attach(skel, tap_fd);
/* Test indirect prog attachment via link */
test_skb_less_link_create(skel, tap_fd);
close(tap_fd);
out_destroy_skel:
bpf_flow__destroy(skel);
}

View File

@@ -0,0 +1,792 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <bpf/bpf.h>
#include <linux/bpf.h>
#include <bpf/libbpf.h>
#include <arpa/inet.h>
#include <asm/byteorder.h>
#include <netinet/udp.h>
#include <poll.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <unistd.h>
#include "test_progs.h"
#include "network_helpers.h"
#include "bpf_util.h"
#include "bpf_flow.skel.h"
#define CFG_PORT_INNER 8000
#define CFG_PORT_GUE 6080
#define SUBTEST_NAME_MAX_LEN 32
#define TEST_NAME_MAX_LEN (32 + SUBTEST_NAME_MAX_LEN)
#define MAX_SOURCE_PORTS 3
#define TEST_PACKETS_COUNT 10
#define TEST_PACKET_LEN 100
#define TEST_PACKET_PATTERN 'a'
#define TEST_IPV4 "192.168.0.1/32"
#define TEST_IPV6 "100::a/128"
#define TEST_TUNNEL_REMOTE "127.0.0.2"
#define TEST_TUNNEL_LOCAL "127.0.0.1"
#define INIT_ADDR4(addr4, port) \
{ \
.sin_family = AF_INET, \
.sin_port = __constant_htons(port), \
.sin_addr.s_addr = __constant_htonl(addr4), \
}
#define INIT_ADDR6(addr6, port) \
{ \
.sin6_family = AF_INET6, \
.sin6_port = __constant_htons(port), \
.sin6_addr = addr6, \
}
#define TEST_IN4_SRC_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK + 2, 0)
#define TEST_IN4_DST_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK, CFG_PORT_INNER)
#define TEST_OUT4_SRC_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK + 1, 0)
#define TEST_OUT4_DST_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK, 0)
#define TEST_IN6_SRC_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0)
#define TEST_IN6_DST_ADDR_DEFAULT \
INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
#define TEST_OUT6_SRC_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0)
#define TEST_OUT6_DST_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0)
#define TEST_IN4_SRC_ADDR_DISSECT_CONTINUE INIT_ADDR4(INADDR_LOOPBACK + 126, 0)
#define TEST_IN4_SRC_ADDR_IPIP INIT_ADDR4((in_addr_t)0x01010101, 0)
#define TEST_IN4_DST_ADDR_IPIP INIT_ADDR4((in_addr_t)0xC0A80001, CFG_PORT_INNER)
struct grehdr {
uint16_t unused;
uint16_t protocol;
} __packed;
struct guehdr {
union {
struct {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u8 hlen : 5, control : 1, version : 2;
#elif defined(__BIG_ENDIAN_BITFIELD)
__u8 version : 2, control : 1, hlen : 5;
#else
#error "Please fix <asm/byteorder.h>"
#endif
__u8 proto_ctype;
__be16 flags;
};
__be32 word;
};
};
static char buf[ETH_DATA_LEN];
struct test_configuration {
char name[SUBTEST_NAME_MAX_LEN];
int (*test_setup)(void);
void (*test_teardown)(void);
int source_ports[MAX_SOURCE_PORTS];
int cfg_l3_inner;
struct sockaddr_in in_saddr4;
struct sockaddr_in in_daddr4;
struct sockaddr_in6 in_saddr6;
struct sockaddr_in6 in_daddr6;
int cfg_l3_outer;
struct sockaddr_in out_saddr4;
struct sockaddr_in out_daddr4;
struct sockaddr_in6 out_saddr6;
struct sockaddr_in6 out_daddr6;
int cfg_encap_proto;
uint8_t cfg_dsfield_inner;
uint8_t cfg_dsfield_outer;
int cfg_l3_extra;
struct sockaddr_in extra_saddr4;
struct sockaddr_in extra_daddr4;
struct sockaddr_in6 extra_saddr6;
struct sockaddr_in6 extra_daddr6;
};
static unsigned long util_gettime(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
}
static void build_ipv4_header(void *header, uint8_t proto, uint32_t src,
uint32_t dst, int payload_len, uint8_t tos)
{
struct iphdr *iph = header;
iph->ihl = 5;
iph->version = 4;
iph->tos = tos;
iph->ttl = 8;
iph->tot_len = htons(sizeof(*iph) + payload_len);
iph->id = htons(1337);
iph->protocol = proto;
iph->saddr = src;
iph->daddr = dst;
iph->check = build_ip_csum((void *)iph);
}
static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
{
uint16_t val, *ptr = (uint16_t *)ip6h;
val = ntohs(*ptr);
val &= 0xF00F;
val |= ((uint16_t)dsfield) << 4;
*ptr = htons(val);
}
static void build_ipv6_header(void *header, uint8_t proto,
const struct sockaddr_in6 *src,
const struct sockaddr_in6 *dst, int payload_len,
uint8_t dsfield)
{
struct ipv6hdr *ip6h = header;
ip6h->version = 6;
ip6h->payload_len = htons(payload_len);
ip6h->nexthdr = proto;
ip6h->hop_limit = 8;
ipv6_set_dsfield(ip6h, dsfield);
memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
}
static void build_udp_header(void *header, int payload_len, uint16_t sport,
uint16_t dport, int family)
{
struct udphdr *udph = header;
int len = sizeof(*udph) + payload_len;
udph->source = htons(sport);
udph->dest = htons(dport);
udph->len = htons(len);
udph->check = 0;
if (family == AF_INET)
udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
udph);
else
udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
udph);
}
static void build_gue_header(void *header, uint8_t proto)
{
struct guehdr *gueh = header;
gueh->proto_ctype = proto;
}
static void build_gre_header(void *header, uint16_t proto)
{
struct grehdr *greh = header;
greh->protocol = htons(proto);
}
static int l3_length(int family)
{
if (family == AF_INET)
return sizeof(struct iphdr);
else
return sizeof(struct ipv6hdr);
}
static int build_packet(const struct test_configuration *test, uint16_t sport)
{
int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
int el3_len = 0, packet_len;
memset(buf, 0, ETH_DATA_LEN);
if (test->cfg_l3_extra)
el3_len = l3_length(test->cfg_l3_extra);
/* calculate header offsets */
if (test->cfg_encap_proto) {
ol3_len = l3_length(test->cfg_l3_outer);
if (test->cfg_encap_proto == IPPROTO_GRE)
ol4_len = sizeof(struct grehdr);
else if (test->cfg_encap_proto == IPPROTO_UDP)
ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
}
il3_len = l3_length(test->cfg_l3_inner);
il4_len = sizeof(struct udphdr);
packet_len = el3_len + ol3_len + ol4_len + il3_len + il4_len +
TEST_PACKET_LEN;
if (!ASSERT_LE(packet_len, sizeof(buf), "check packet size"))
return -1;
/*
* Fill packet from inside out, to calculate correct checksums.
* But create ip before udp headers, as udp uses ip for pseudo-sum.
*/
memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
TEST_PACKET_PATTERN, TEST_PACKET_LEN);
/* add zero byte for udp csum padding */
buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + TEST_PACKET_LEN] =
0;
switch (test->cfg_l3_inner) {
case PF_INET:
build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
IPPROTO_UDP, test->in_saddr4.sin_addr.s_addr,
test->in_daddr4.sin_addr.s_addr,
il4_len + TEST_PACKET_LEN,
test->cfg_dsfield_inner);
break;
case PF_INET6:
build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
IPPROTO_UDP, &test->in_saddr6,
&test->in_daddr6, il4_len + TEST_PACKET_LEN,
test->cfg_dsfield_inner);
break;
}
build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
TEST_PACKET_LEN, sport, CFG_PORT_INNER,
test->cfg_l3_inner);
if (!test->cfg_encap_proto)
return il3_len + il4_len + TEST_PACKET_LEN;
switch (test->cfg_l3_outer) {
case PF_INET:
build_ipv4_header(buf + el3_len, test->cfg_encap_proto,
test->out_saddr4.sin_addr.s_addr,
test->out_daddr4.sin_addr.s_addr,
ol4_len + il3_len + il4_len + TEST_PACKET_LEN,
test->cfg_dsfield_outer);
break;
case PF_INET6:
build_ipv6_header(buf + el3_len, test->cfg_encap_proto,
&test->out_saddr6, &test->out_daddr6,
ol4_len + il3_len + il4_len + TEST_PACKET_LEN,
test->cfg_dsfield_outer);
break;
}
switch (test->cfg_encap_proto) {
case IPPROTO_UDP:
build_gue_header(buf + el3_len + ol3_len + ol4_len -
sizeof(struct guehdr),
test->cfg_l3_inner == PF_INET ? IPPROTO_IPIP :
IPPROTO_IPV6);
build_udp_header(buf + el3_len + ol3_len,
sizeof(struct guehdr) + il3_len + il4_len +
TEST_PACKET_LEN,
sport, CFG_PORT_GUE, test->cfg_l3_outer);
break;
case IPPROTO_GRE:
build_gre_header(buf + el3_len + ol3_len,
test->cfg_l3_inner == PF_INET ? ETH_P_IP :
ETH_P_IPV6);
break;
}
switch (test->cfg_l3_extra) {
case PF_INET:
build_ipv4_header(buf,
test->cfg_l3_outer == PF_INET ? IPPROTO_IPIP :
IPPROTO_IPV6,
test->extra_saddr4.sin_addr.s_addr,
test->extra_daddr4.sin_addr.s_addr,
ol3_len + ol4_len + il3_len + il4_len +
TEST_PACKET_LEN,
0);
break;
case PF_INET6:
build_ipv6_header(buf,
test->cfg_l3_outer == PF_INET ? IPPROTO_IPIP :
IPPROTO_IPV6,
&test->extra_saddr6, &test->extra_daddr6,
ol3_len + ol4_len + il3_len + il4_len +
TEST_PACKET_LEN,
0);
break;
}
return el3_len + ol3_len + ol4_len + il3_len + il4_len +
TEST_PACKET_LEN;
}
/* sender transmits encapsulated over RAW or unencap'd over UDP */
static int setup_tx(const struct test_configuration *test)
{
int family, fd, ret;
if (test->cfg_l3_extra)
family = test->cfg_l3_extra;
else if (test->cfg_l3_outer)
family = test->cfg_l3_outer;
else
family = test->cfg_l3_inner;
fd = socket(family, SOCK_RAW, IPPROTO_RAW);
if (!ASSERT_OK_FD(fd, "setup tx socket"))
return fd;
if (test->cfg_l3_extra) {
if (test->cfg_l3_extra == PF_INET)
ret = connect(fd, (void *)&test->extra_daddr4,
sizeof(test->extra_daddr4));
else
ret = connect(fd, (void *)&test->extra_daddr6,
sizeof(test->extra_daddr6));
if (!ASSERT_OK(ret, "connect")) {
close(fd);
return ret;
}
} else if (test->cfg_l3_outer) {
/* connect to destination if not encapsulated */
if (test->cfg_l3_outer == PF_INET)
ret = connect(fd, (void *)&test->out_daddr4,
sizeof(test->out_daddr4));
else
ret = connect(fd, (void *)&test->out_daddr6,
sizeof(test->out_daddr6));
if (!ASSERT_OK(ret, "connect")) {
close(fd);
return ret;
}
} else {
/* otherwise using loopback */
if (test->cfg_l3_inner == PF_INET)
ret = connect(fd, (void *)&test->in_daddr4,
sizeof(test->in_daddr4));
else
ret = connect(fd, (void *)&test->in_daddr6,
sizeof(test->in_daddr6));
if (!ASSERT_OK(ret, "connect")) {
close(fd);
return ret;
}
}
return fd;
}
/* receiver reads unencapsulated UDP */
static int setup_rx(const struct test_configuration *test)
{
int fd, ret;
fd = socket(test->cfg_l3_inner, SOCK_DGRAM, 0);
if (!ASSERT_OK_FD(fd, "socket rx"))
return fd;
if (test->cfg_l3_inner == PF_INET)
ret = bind(fd, (void *)&test->in_daddr4,
sizeof(test->in_daddr4));
else
ret = bind(fd, (void *)&test->in_daddr6,
sizeof(test->in_daddr6));
if (!ASSERT_OK(ret, "bind rx")) {
close(fd);
return ret;
}
return fd;
}
static int do_tx(int fd, const char *pkt, int len)
{
int ret;
ret = write(fd, pkt, len);
return ret != len;
}
static int do_poll(int fd, short events, int timeout)
{
struct pollfd pfd;
int ret;
pfd.fd = fd;
pfd.events = events;
ret = poll(&pfd, 1, timeout);
return ret;
}
static int do_rx(int fd)
{
char rbuf;
int ret, num = 0;
while (1) {
ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
if (ret == -1 && errno == EAGAIN)
break;
if (ret < 0)
return -1;
if (!ASSERT_EQ(rbuf, TEST_PACKET_PATTERN, "check pkt pattern"))
return -1;
num++;
}
return num;
}
static int run_test(const struct test_configuration *test,
int source_port_index)
{
int fdt = -1, fdr = -1, len, tx = 0, rx = 0, err;
unsigned long tstop, tcur;
fdr = setup_rx(test);
fdt = setup_tx(test);
if (!ASSERT_OK_FD(fdr, "setup rx") || !ASSERT_OK_FD(fdt, "setup tx")) {
err = -1;
goto out_close_sockets;
}
len = build_packet(test,
(uint16_t)test->source_ports[source_port_index]);
if (!ASSERT_GT(len, 0, "build test packet"))
return -1;
tcur = util_gettime();
tstop = tcur;
while (tx < TEST_PACKETS_COUNT) {
if (!ASSERT_OK(do_tx(fdt, buf, len), "do_tx"))
break;
tx++;
err = do_rx(fdr);
if (!ASSERT_GE(err, 0, "do_rx"))
break;
rx += err;
}
/* read straggler packets, if any */
if (rx < tx) {
tstop = util_gettime() + 100;
while (rx < tx) {
tcur = util_gettime();
if (tcur >= tstop)
break;
err = do_poll(fdr, POLLIN, tstop - tcur);
if (err < 0)
break;
err = do_rx(fdr);
if (err >= 0)
rx += err;
}
}
out_close_sockets:
close(fdt);
close(fdr);
return rx;
}
static int attach_and_configure_program(struct bpf_flow *skel)
{
struct bpf_map *prog_array = skel->maps.jmp_table;
int main_prog_fd, sub_prog_fd, map_fd, i, err;
struct bpf_program *prog;
char prog_name[32];
main_prog_fd = bpf_program__fd(skel->progs._dissect);
if (main_prog_fd < 0)
return main_prog_fd;
err = bpf_prog_attach(main_prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
if (err)
return err;
map_fd = bpf_map__fd(prog_array);
if (map_fd < 0)
return map_fd;
for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
prog = bpf_object__find_program_by_name(skel->obj, prog_name);
if (!prog)
return -1;
sub_prog_fd = bpf_program__fd(prog);
if (sub_prog_fd < 0)
return -1;
err = bpf_map_update_elem(map_fd, &i, &sub_prog_fd, BPF_ANY);
if (err)
return -1;
}
return main_prog_fd;
}
static void detach_program(struct bpf_flow *skel, int prog_fd)
{
bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
}
static int set_port_drop(int pf, bool multi_port)
{
SYS(fail, "tc qdisc add dev lo ingress");
SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s",
"dev lo",
"parent FFFF:",
"protocol", pf == PF_INET6 ? "ipv6" : "ip",
"pref 1337",
"flower",
"ip_proto udp",
"src_port", multi_port ? "8-10" : "9",
"action drop");
return 0;
fail_delete_qdisc:
SYS_NOFAIL("tc qdisc del dev lo ingress");
fail:
return 1;
}
static void remove_filter(void)
{
SYS_NOFAIL("tc filter del dev lo ingress");
SYS_NOFAIL("tc qdisc del dev lo ingress");
}
static int ipv4_setup(void)
{
return set_port_drop(PF_INET, false);
}
static int ipv6_setup(void)
{
return set_port_drop(PF_INET6, false);
}
static int port_range_setup(void)
{
return set_port_drop(PF_INET, true);
}
static int set_addresses(void)
{
SYS(out, "ip -4 addr add %s dev lo", TEST_IPV4);
SYS(out_remove_ipv4, "ip -6 addr add %s dev lo", TEST_IPV6);
return 0;
out_remove_ipv4:
SYS_NOFAIL("ip -4 addr del %s dev lo", TEST_IPV4);
out:
return -1;
}
static void unset_addresses(void)
{
SYS_NOFAIL("ip -4 addr del %s dev lo", TEST_IPV4);
SYS_NOFAIL("ip -6 addr del %s dev lo", TEST_IPV6);
}
static int ipip_setup(void)
{
if (!ASSERT_OK(set_addresses(), "configure addresses"))
return -1;
if (!ASSERT_OK(set_port_drop(PF_INET, false), "set filter"))
goto out_unset_addresses;
SYS(out_remove_filter,
"ip link add ipip_test type ipip remote %s local %s dev lo",
TEST_TUNNEL_REMOTE, TEST_TUNNEL_LOCAL);
SYS(out_clean_netif, "ip link set ipip_test up");
return 0;
out_clean_netif:
SYS_NOFAIL("ip link del ipip_test");
out_remove_filter:
remove_filter();
out_unset_addresses:
unset_addresses();
return -1;
}
static void ipip_shutdown(void)
{
SYS_NOFAIL("ip link del ipip_test");
remove_filter();
unset_addresses();
}
static int gre_setup(void)
{
if (!ASSERT_OK(set_addresses(), "configure addresses"))
return -1;
if (!ASSERT_OK(set_port_drop(PF_INET, false), "set filter"))
goto out_unset_addresses;
SYS(out_remove_filter,
"ip link add gre_test type gre remote %s local %s dev lo",
TEST_TUNNEL_REMOTE, TEST_TUNNEL_LOCAL);
SYS(out_clean_netif, "ip link set gre_test up");
return 0;
out_clean_netif:
SYS_NOFAIL("ip link del ipip_test");
out_remove_filter:
remove_filter();
out_unset_addresses:
unset_addresses();
return -1;
}
static void gre_shutdown(void)
{
SYS_NOFAIL("ip link del gre_test");
remove_filter();
unset_addresses();
}
static const struct test_configuration tests_input[] = {
{
.name = "ipv4",
.test_setup = ipv4_setup,
.test_teardown = remove_filter,
.source_ports = { 8, 9, 10 },
.cfg_l3_inner = PF_INET,
.in_saddr4 = TEST_IN4_SRC_ADDR_DEFAULT,
.in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT
},
{
.name = "ipv4_continue_dissect",
.test_setup = ipv4_setup,
.test_teardown = remove_filter,
.source_ports = { 8, 9, 10 },
.cfg_l3_inner = PF_INET,
.in_saddr4 = TEST_IN4_SRC_ADDR_DISSECT_CONTINUE,
.in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT },
{
.name = "ipip",
.test_setup = ipip_setup,
.test_teardown = ipip_shutdown,
.source_ports = { 8, 9, 10 },
.cfg_l3_inner = PF_INET,
.in_saddr4 = TEST_IN4_SRC_ADDR_IPIP,
.in_daddr4 = TEST_IN4_DST_ADDR_IPIP,
.out_saddr4 = TEST_OUT4_SRC_ADDR_DEFAULT,
.out_daddr4 = TEST_OUT4_DST_ADDR_DEFAULT,
.cfg_l3_outer = PF_INET,
.cfg_encap_proto = IPPROTO_IPIP,
},
{
.name = "gre",
.test_setup = gre_setup,
.test_teardown = gre_shutdown,
.source_ports = { 8, 9, 10 },
.cfg_l3_inner = PF_INET,
.in_saddr4 = TEST_IN4_SRC_ADDR_IPIP,
.in_daddr4 = TEST_IN4_DST_ADDR_IPIP,
.out_saddr4 = TEST_OUT4_SRC_ADDR_DEFAULT,
.out_daddr4 = TEST_OUT4_DST_ADDR_DEFAULT,
.cfg_l3_outer = PF_INET,
.cfg_encap_proto = IPPROTO_GRE,
},
{
.name = "port_range",
.test_setup = port_range_setup,
.test_teardown = remove_filter,
.source_ports = { 7, 9, 11 },
.cfg_l3_inner = PF_INET,
.in_saddr4 = TEST_IN4_SRC_ADDR_DEFAULT,
.in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT },
{
.name = "ipv6",
.test_setup = ipv6_setup,
.test_teardown = remove_filter,
.source_ports = { 8, 9, 10 },
.cfg_l3_inner = PF_INET6,
.in_saddr6 = TEST_IN6_SRC_ADDR_DEFAULT,
.in_daddr6 = TEST_IN6_DST_ADDR_DEFAULT
},
};
struct test_ctx {
struct bpf_flow *skel;
struct netns_obj *ns;
int prog_fd;
};
static int test_global_init(struct test_ctx *ctx)
{
int err;
ctx->skel = bpf_flow__open_and_load();
if (!ASSERT_OK_PTR(ctx->skel, "open and load flow_dissector"))
return -1;
ctx->ns = netns_new("flow_dissector_classification", true);
if (!ASSERT_OK_PTR(ctx->ns, "switch ns"))
goto out_destroy_skel;
err = write_sysctl("/proc/sys/net/ipv4/conf/default/rp_filter", "0");
err |= write_sysctl("/proc/sys/net/ipv4/conf/all/rp_filter", "0");
err |= write_sysctl("/proc/sys/net/ipv4/conf/lo/rp_filter", "0");
if (!ASSERT_OK(err, "configure net tunables"))
goto out_clean_ns;
ctx->prog_fd = attach_and_configure_program(ctx->skel);
if (!ASSERT_OK_FD(ctx->prog_fd, "attach and configure program"))
goto out_clean_ns;
return 0;
out_clean_ns:
netns_free(ctx->ns);
out_destroy_skel:
bpf_flow__destroy(ctx->skel);
return -1;
}
static void test_global_shutdown(struct test_ctx *ctx)
{
detach_program(ctx->skel, ctx->prog_fd);
netns_free(ctx->ns);
bpf_flow__destroy(ctx->skel);
}
void test_flow_dissector_classification(void)
{
struct test_ctx ctx;
const struct test_configuration *test;
int i;
if (test_global_init(&ctx))
return;
for (i = 0; i < ARRAY_SIZE(tests_input); i++) {
if (!test__start_subtest(tests_input[i].name))
continue;
test = &tests_input[i];
/* All tests are expected to have one rx-ok port first,
* then a non-working rx port, and finally a rx-ok port
*/
if (test->test_setup &&
!ASSERT_OK(test->test_setup(), "init filter"))
continue;
ASSERT_EQ(run_test(test, 0), TEST_PACKETS_COUNT,
"test first port");
ASSERT_EQ(run_test(test, 1), 0, "test second port");
ASSERT_EQ(run_test(test, 2), TEST_PACKETS_COUNT,
"test third port");
if (test->test_teardown)
test->test_teardown();
}
test_global_shutdown(&ctx);
}

View File

@@ -2,7 +2,7 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
#include <linux/tcp.h>
#include <netinet/tcp.h>
#include <linux/netlink.h>
#include "sockopt_sk.skel.h"

View File

@@ -17,7 +17,7 @@
#include "network_helpers.h"
#include <linux/if_bonding.h>
#include <linux/limits.h>
#include <linux/udp.h>
#include <netinet/udp.h>
#include <uapi/linux/netdev.h>
#include "xdp_dummy.skel.h"

View File

@@ -7,7 +7,7 @@
#include <linux/if_link.h>
#include <linux/ipv6.h>
#include <linux/in6.h>
#include <linux/udp.h>
#include <netinet/udp.h>
#include <bpf/bpf_endian.h>
#include <uapi/linux/netdev.h>
#include "test_xdp_do_redirect.skel.h"

View File

@@ -3,7 +3,7 @@
#include <network_helpers.h>
#include <bpf/btf.h>
#include <linux/if_link.h>
#include <linux/udp.h>
#include <netinet/udp.h>
#include <net/if.h>
#include <unistd.h>

View File

@@ -10,7 +10,7 @@
#include <linux/errqueue.h>
#include <linux/if_link.h>
#include <linux/net_tstamp.h>
#include <linux/udp.h>
#include <netinet/udp.h>
#include <sys/mman.h>
#include <net/if.h>
#include <poll.h>
@@ -133,23 +133,6 @@ static void close_xsk(struct xsk *xsk)
munmap(xsk->umem_area, UMEM_SIZE);
}
static void ip_csum(struct iphdr *iph)
{
__u32 sum = 0;
__u16 *p;
int i;
iph->check = 0;
p = (void *)iph;
for (i = 0; i < sizeof(*iph) / sizeof(*p); i++)
sum += p[i];
while (sum >> 16)
sum = (sum & 0xffff) + (sum >> 16);
iph->check = ~sum;
}
static int generate_packet(struct xsk *xsk, __u16 dst_port)
{
struct xsk_tx_metadata *meta;
@@ -192,7 +175,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
iph->protocol = IPPROTO_UDP;
ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)");
ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
ip_csum(iph);
iph->check = build_ip_csum(iph);
udph->source = htons(UDP_SOURCE_PORT);
udph->dest = htons(dst_port);

View File

@@ -15,7 +15,7 @@
#include <linux/ipv6.h>
#include <linux/pkt_cls.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <netinet/udp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>

View File

@@ -10,7 +10,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/udp.h>
#include <netinet/udp.h>
/* offsetof() is used in static asserts, and the libbpf-redefined CO-RE
* friendly version breaks compilation for older clang versions <= 15

View File

@@ -15,7 +15,7 @@
#include <linux/ipv6.h>
#include <linux/pkt_cls.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <netinet/udp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>

View File

@@ -1,780 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Inject packets with all sorts of encapsulation into the kernel.
*
* IPv4/IPv6 outer layer 3
* GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/..
* IPv4/IPv6 inner layer 3
*/
#define _GNU_SOURCE
#include <stddef.h>
#include <arpa/inet.h>
#include <asm/byteorder.h>
#include <error.h>
#include <errno.h>
#include <linux/if_packet.h>
#include <linux/if_ether.h>
#include <linux/ipv6.h>
#include <netinet/ip.h>
#include <netinet/in.h>
#include <netinet/udp.h>
#include <poll.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#define CFG_PORT_INNER 8000
/* Add some protocol definitions that do not exist in userspace */
struct grehdr {
uint16_t unused;
uint16_t protocol;
} __attribute__((packed));
struct guehdr {
union {
struct {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u8 hlen:5,
control:1,
version:2;
#elif defined (__BIG_ENDIAN_BITFIELD)
__u8 version:2,
control:1,
hlen:5;
#else
#error "Please fix <asm/byteorder.h>"
#endif
__u8 proto_ctype;
__be16 flags;
};
__be32 word;
};
};
static uint8_t cfg_dsfield_inner;
static uint8_t cfg_dsfield_outer;
static uint8_t cfg_encap_proto;
static bool cfg_expect_failure = false;
static int cfg_l3_extra = AF_UNSPEC; /* optional SIT prefix */
static int cfg_l3_inner = AF_UNSPEC;
static int cfg_l3_outer = AF_UNSPEC;
static int cfg_num_pkt = 10;
static int cfg_num_secs = 0;
static char cfg_payload_char = 'a';
static int cfg_payload_len = 100;
static int cfg_port_gue = 6080;
static bool cfg_only_rx;
static bool cfg_only_tx;
static int cfg_src_port = 9;
static char buf[ETH_DATA_LEN];
#define INIT_ADDR4(name, addr4, port) \
static struct sockaddr_in name = { \
.sin_family = AF_INET, \
.sin_port = __constant_htons(port), \
.sin_addr.s_addr = __constant_htonl(addr4), \
};
#define INIT_ADDR6(name, addr6, port) \
static struct sockaddr_in6 name = { \
.sin6_family = AF_INET6, \
.sin6_port = __constant_htons(port), \
.sin6_addr = addr6, \
};
INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER)
INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0)
INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0)
INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0)
INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0)
INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0)
INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
static unsigned long util_gettime(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
}
static void util_printaddr(const char *msg, struct sockaddr *addr)
{
unsigned long off = 0;
char nbuf[INET6_ADDRSTRLEN];
switch (addr->sa_family) {
case PF_INET:
off = __builtin_offsetof(struct sockaddr_in, sin_addr);
break;
case PF_INET6:
off = __builtin_offsetof(struct sockaddr_in6, sin6_addr);
break;
default:
error(1, 0, "printaddr: unsupported family %u\n",
addr->sa_family);
}
if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf,
sizeof(nbuf)))
error(1, errno, "inet_ntop");
fprintf(stderr, "%s: %s\n", msg, nbuf);
}
static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
{
unsigned long sum = 0;
int i;
for (i = 0; i < num_u16; i++)
sum += start[i];
return sum;
}
static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
unsigned long sum)
{
sum += add_csum_hword(start, num_u16);
while (sum >> 16)
sum = (sum & 0xffff) + (sum >> 16);
return ~sum;
}
static void build_ipv4_header(void *header, uint8_t proto,
uint32_t src, uint32_t dst,
int payload_len, uint8_t tos)
{
struct iphdr *iph = header;
iph->ihl = 5;
iph->version = 4;
iph->tos = tos;
iph->ttl = 8;
iph->tot_len = htons(sizeof(*iph) + payload_len);
iph->id = htons(1337);
iph->protocol = proto;
iph->saddr = src;
iph->daddr = dst;
iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
}
static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
{
uint16_t val, *ptr = (uint16_t *)ip6h;
val = ntohs(*ptr);
val &= 0xF00F;
val |= ((uint16_t) dsfield) << 4;
*ptr = htons(val);
}
static void build_ipv6_header(void *header, uint8_t proto,
struct sockaddr_in6 *src,
struct sockaddr_in6 *dst,
int payload_len, uint8_t dsfield)
{
struct ipv6hdr *ip6h = header;
ip6h->version = 6;
ip6h->payload_len = htons(payload_len);
ip6h->nexthdr = proto;
ip6h->hop_limit = 8;
ipv6_set_dsfield(ip6h, dsfield);
memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
}
static uint16_t build_udp_v4_csum(const struct iphdr *iph,
const struct udphdr *udph,
int num_words)
{
unsigned long pseudo_sum;
int num_u16 = sizeof(iph->saddr); /* halfwords: twice byte len */
pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16);
pseudo_sum += htons(IPPROTO_UDP);
pseudo_sum += udph->len;
return build_ip_csum((void *) udph, num_words, pseudo_sum);
}
static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h,
const struct udphdr *udph,
int num_words)
{
unsigned long pseudo_sum;
int num_u16 = sizeof(ip6h->saddr); /* halfwords: twice byte len */
pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16);
pseudo_sum += htons(ip6h->nexthdr);
pseudo_sum += ip6h->payload_len;
return build_ip_csum((void *) udph, num_words, pseudo_sum);
}
static void build_udp_header(void *header, int payload_len,
uint16_t dport, int family)
{
struct udphdr *udph = header;
int len = sizeof(*udph) + payload_len;
udph->source = htons(cfg_src_port);
udph->dest = htons(dport);
udph->len = htons(len);
udph->check = 0;
if (family == AF_INET)
udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
udph, len >> 1);
else
udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
udph, len >> 1);
}
static void build_gue_header(void *header, uint8_t proto)
{
struct guehdr *gueh = header;
gueh->proto_ctype = proto;
}
static void build_gre_header(void *header, uint16_t proto)
{
struct grehdr *greh = header;
greh->protocol = htons(proto);
}
static int l3_length(int family)
{
if (family == AF_INET)
return sizeof(struct iphdr);
else
return sizeof(struct ipv6hdr);
}
static int build_packet(void)
{
int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
int el3_len = 0;
if (cfg_l3_extra)
el3_len = l3_length(cfg_l3_extra);
/* calculate header offsets */
if (cfg_encap_proto) {
ol3_len = l3_length(cfg_l3_outer);
if (cfg_encap_proto == IPPROTO_GRE)
ol4_len = sizeof(struct grehdr);
else if (cfg_encap_proto == IPPROTO_UDP)
ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
}
il3_len = l3_length(cfg_l3_inner);
il4_len = sizeof(struct udphdr);
if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >=
sizeof(buf))
error(1, 0, "packet too large\n");
/*
* Fill packet from inside out, to calculate correct checksums.
* But create ip before udp headers, as udp uses ip for pseudo-sum.
*/
memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
cfg_payload_char, cfg_payload_len);
/* add zero byte for udp csum padding */
buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0;
switch (cfg_l3_inner) {
case PF_INET:
build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
IPPROTO_UDP,
in_saddr4.sin_addr.s_addr,
in_daddr4.sin_addr.s_addr,
il4_len + cfg_payload_len,
cfg_dsfield_inner);
break;
case PF_INET6:
build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
IPPROTO_UDP,
&in_saddr6, &in_daddr6,
il4_len + cfg_payload_len,
cfg_dsfield_inner);
break;
}
build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner);
if (!cfg_encap_proto)
return il3_len + il4_len + cfg_payload_len;
switch (cfg_l3_outer) {
case PF_INET:
build_ipv4_header(buf + el3_len, cfg_encap_proto,
out_saddr4.sin_addr.s_addr,
out_daddr4.sin_addr.s_addr,
ol4_len + il3_len + il4_len + cfg_payload_len,
cfg_dsfield_outer);
break;
case PF_INET6:
build_ipv6_header(buf + el3_len, cfg_encap_proto,
&out_saddr6, &out_daddr6,
ol4_len + il3_len + il4_len + cfg_payload_len,
cfg_dsfield_outer);
break;
}
switch (cfg_encap_proto) {
case IPPROTO_UDP:
build_gue_header(buf + el3_len + ol3_len + ol4_len -
sizeof(struct guehdr),
cfg_l3_inner == PF_INET ? IPPROTO_IPIP
: IPPROTO_IPV6);
build_udp_header(buf + el3_len + ol3_len,
sizeof(struct guehdr) + il3_len + il4_len +
cfg_payload_len,
cfg_port_gue, cfg_l3_outer);
break;
case IPPROTO_GRE:
build_gre_header(buf + el3_len + ol3_len,
cfg_l3_inner == PF_INET ? ETH_P_IP
: ETH_P_IPV6);
break;
}
switch (cfg_l3_extra) {
case PF_INET:
build_ipv4_header(buf,
cfg_l3_outer == PF_INET ? IPPROTO_IPIP
: IPPROTO_IPV6,
extra_saddr4.sin_addr.s_addr,
extra_daddr4.sin_addr.s_addr,
ol3_len + ol4_len + il3_len + il4_len +
cfg_payload_len, 0);
break;
case PF_INET6:
build_ipv6_header(buf,
cfg_l3_outer == PF_INET ? IPPROTO_IPIP
: IPPROTO_IPV6,
&extra_saddr6, &extra_daddr6,
ol3_len + ol4_len + il3_len + il4_len +
cfg_payload_len, 0);
break;
}
return el3_len + ol3_len + ol4_len + il3_len + il4_len +
cfg_payload_len;
}
/* sender transmits encapsulated over RAW or unencap'd over UDP */
static int setup_tx(void)
{
int family, fd, ret;
if (cfg_l3_extra)
family = cfg_l3_extra;
else if (cfg_l3_outer)
family = cfg_l3_outer;
else
family = cfg_l3_inner;
fd = socket(family, SOCK_RAW, IPPROTO_RAW);
if (fd == -1)
error(1, errno, "socket tx");
if (cfg_l3_extra) {
if (cfg_l3_extra == PF_INET)
ret = connect(fd, (void *) &extra_daddr4,
sizeof(extra_daddr4));
else
ret = connect(fd, (void *) &extra_daddr6,
sizeof(extra_daddr6));
if (ret)
error(1, errno, "connect tx");
} else if (cfg_l3_outer) {
/* connect to destination if not encapsulated */
if (cfg_l3_outer == PF_INET)
ret = connect(fd, (void *) &out_daddr4,
sizeof(out_daddr4));
else
ret = connect(fd, (void *) &out_daddr6,
sizeof(out_daddr6));
if (ret)
error(1, errno, "connect tx");
} else {
/* otherwise using loopback */
if (cfg_l3_inner == PF_INET)
ret = connect(fd, (void *) &in_daddr4,
sizeof(in_daddr4));
else
ret = connect(fd, (void *) &in_daddr6,
sizeof(in_daddr6));
if (ret)
error(1, errno, "connect tx");
}
return fd;
}
/* receiver reads unencapsulated UDP */
static int setup_rx(void)
{
int fd, ret;
fd = socket(cfg_l3_inner, SOCK_DGRAM, 0);
if (fd == -1)
error(1, errno, "socket rx");
if (cfg_l3_inner == PF_INET)
ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4));
else
ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6));
if (ret)
error(1, errno, "bind rx");
return fd;
}
static int do_tx(int fd, const char *pkt, int len)
{
int ret;
ret = write(fd, pkt, len);
if (ret == -1)
error(1, errno, "send");
if (ret != len)
error(1, errno, "send: len (%d < %d)\n", ret, len);
return 1;
}
static int do_poll(int fd, short events, int timeout)
{
struct pollfd pfd;
int ret;
pfd.fd = fd;
pfd.events = events;
ret = poll(&pfd, 1, timeout);
if (ret == -1)
error(1, errno, "poll");
if (ret && !(pfd.revents & POLLIN))
error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents);
return ret;
}
static int do_rx(int fd)
{
char rbuf;
int ret, num = 0;
while (1) {
ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
if (ret == -1 && errno == EAGAIN)
break;
if (ret == -1)
error(1, errno, "recv");
if (rbuf != cfg_payload_char)
error(1, 0, "recv: payload mismatch");
num++;
}
return num;
}
static int do_main(void)
{
unsigned long tstop, treport, tcur;
int fdt = -1, fdr = -1, len, tx = 0, rx = 0;
if (!cfg_only_tx)
fdr = setup_rx();
if (!cfg_only_rx)
fdt = setup_tx();
len = build_packet();
tcur = util_gettime();
treport = tcur + 1000;
tstop = tcur + (cfg_num_secs * 1000);
while (1) {
if (!cfg_only_rx)
tx += do_tx(fdt, buf, len);
if (!cfg_only_tx)
rx += do_rx(fdr);
if (cfg_num_secs) {
tcur = util_gettime();
if (tcur >= tstop)
break;
if (tcur >= treport) {
fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
tx = 0;
rx = 0;
treport = tcur + 1000;
}
} else {
if (tx == cfg_num_pkt)
break;
}
}
/* read straggler packets, if any */
if (rx < tx) {
tstop = util_gettime() + 100;
while (rx < tx) {
tcur = util_gettime();
if (tcur >= tstop)
break;
do_poll(fdr, POLLIN, tstop - tcur);
rx += do_rx(fdr);
}
}
fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
if (fdr != -1 && close(fdr))
error(1, errno, "close rx");
if (fdt != -1 && close(fdt))
error(1, errno, "close tx");
/*
* success (== 0) only if received all packets
* unless failure is expected, in which case none must arrive.
*/
if (cfg_expect_failure)
return rx != 0;
else
return rx != tx;
}
static void __attribute__((noreturn)) usage(const char *filepath)
{
fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] "
"[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] "
"[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] "
"[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n",
filepath);
exit(1);
}
static void parse_addr(int family, void *addr, const char *optarg)
{
int ret;
ret = inet_pton(family, optarg, addr);
if (ret == -1)
error(1, errno, "inet_pton");
if (ret == 0)
error(1, 0, "inet_pton: bad string");
}
static void parse_addr4(struct sockaddr_in *addr, const char *optarg)
{
parse_addr(AF_INET, &addr->sin_addr, optarg);
}
static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg)
{
parse_addr(AF_INET6, &addr->sin6_addr, optarg);
}
static int parse_protocol_family(const char *filepath, const char *optarg)
{
if (!strcmp(optarg, "4"))
return PF_INET;
if (!strcmp(optarg, "6"))
return PF_INET6;
usage(filepath);
}
static void parse_opts(int argc, char **argv)
{
int c;
while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) {
switch (c) {
case 'd':
if (cfg_l3_outer == AF_UNSPEC)
error(1, 0, "-d must be preceded by -o");
if (cfg_l3_outer == AF_INET)
parse_addr4(&out_daddr4, optarg);
else
parse_addr6(&out_daddr6, optarg);
break;
case 'D':
if (cfg_l3_inner == AF_UNSPEC)
error(1, 0, "-D must be preceded by -i");
if (cfg_l3_inner == AF_INET)
parse_addr4(&in_daddr4, optarg);
else
parse_addr6(&in_daddr6, optarg);
break;
case 'e':
if (!strcmp(optarg, "gre"))
cfg_encap_proto = IPPROTO_GRE;
else if (!strcmp(optarg, "gue"))
cfg_encap_proto = IPPROTO_UDP;
else if (!strcmp(optarg, "bare"))
cfg_encap_proto = IPPROTO_IPIP;
else if (!strcmp(optarg, "none"))
cfg_encap_proto = IPPROTO_IP; /* == 0 */
else
usage(argv[0]);
break;
case 'f':
cfg_src_port = strtol(optarg, NULL, 0);
break;
case 'F':
cfg_expect_failure = true;
break;
case 'h':
usage(argv[0]);
break;
case 'i':
if (!strcmp(optarg, "4"))
cfg_l3_inner = PF_INET;
else if (!strcmp(optarg, "6"))
cfg_l3_inner = PF_INET6;
else
usage(argv[0]);
break;
case 'l':
cfg_payload_len = strtol(optarg, NULL, 0);
break;
case 'n':
cfg_num_pkt = strtol(optarg, NULL, 0);
break;
case 'o':
cfg_l3_outer = parse_protocol_family(argv[0], optarg);
break;
case 'O':
cfg_l3_extra = parse_protocol_family(argv[0], optarg);
break;
case 'R':
cfg_only_rx = true;
break;
case 's':
if (cfg_l3_outer == AF_INET)
parse_addr4(&out_saddr4, optarg);
else
parse_addr6(&out_saddr6, optarg);
break;
case 'S':
if (cfg_l3_inner == AF_INET)
parse_addr4(&in_saddr4, optarg);
else
parse_addr6(&in_saddr6, optarg);
break;
case 't':
cfg_num_secs = strtol(optarg, NULL, 0);
break;
case 'T':
cfg_only_tx = true;
break;
case 'x':
cfg_dsfield_outer = strtol(optarg, NULL, 0);
break;
case 'X':
cfg_dsfield_inner = strtol(optarg, NULL, 0);
break;
}
}
if (cfg_only_rx && cfg_only_tx)
error(1, 0, "options: cannot combine rx-only and tx-only");
if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC)
error(1, 0, "options: must specify outer with encap");
else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC)
error(1, 0, "options: cannot combine no-encap and outer");
else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC)
error(1, 0, "options: cannot combine no-encap and extra");
if (cfg_l3_inner == AF_UNSPEC)
cfg_l3_inner = AF_INET6;
if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP)
cfg_encap_proto = IPPROTO_IPV6;
/* RFC 6040 4.2:
* on decap, if outer encountered congestion (CE == 0x3),
* but inner cannot encode ECN (NoECT == 0x0), then drop packet.
*/
if (((cfg_dsfield_outer & 0x3) == 0x3) &&
((cfg_dsfield_inner & 0x3) == 0x0))
cfg_expect_failure = true;
}
static void print_opts(void)
{
if (cfg_l3_inner == PF_INET6) {
util_printaddr("inner.dest6", (void *) &in_daddr6);
util_printaddr("inner.source6", (void *) &in_saddr6);
} else {
util_printaddr("inner.dest4", (void *) &in_daddr4);
util_printaddr("inner.source4", (void *) &in_saddr4);
}
if (!cfg_l3_outer)
return;
fprintf(stderr, "encap proto: %u\n", cfg_encap_proto);
if (cfg_l3_outer == PF_INET6) {
util_printaddr("outer.dest6", (void *) &out_daddr6);
util_printaddr("outer.source6", (void *) &out_saddr6);
} else {
util_printaddr("outer.dest4", (void *) &out_daddr4);
util_printaddr("outer.source4", (void *) &out_saddr4);
}
if (!cfg_l3_extra)
return;
if (cfg_l3_outer == PF_INET6) {
util_printaddr("extra.dest6", (void *) &extra_daddr6);
util_printaddr("extra.source6", (void *) &extra_saddr6);
} else {
util_printaddr("extra.dest4", (void *) &extra_daddr4);
util_printaddr("extra.source4", (void *) &extra_saddr4);
}
}
int main(int argc, char **argv)
{
parse_opts(argc, argv);
print_opts();
return do_main();
}

View File

@@ -1,178 +0,0 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Load BPF flow dissector and verify it correctly dissects traffic
BPF_FILE="bpf_flow.bpf.o"
export TESTNAME=test_flow_dissector
unmount=0
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
msg="skip all tests:"
if [ $UID != 0 ]; then
echo $msg please run this as root >&2
exit $ksft_skip
fi
# This test needs to be run in a network namespace with in_netns.sh. Check if
# this is the case and run it with in_netns.sh if it is being run in the root
# namespace.
if [[ -z $(ip netns identify $$) ]]; then
err=0
if bpftool="$(which bpftool)"; then
echo "Testing global flow dissector..."
$bpftool prog loadall $BPF_FILE /sys/fs/bpf/flow \
type flow_dissector
if ! unshare --net $bpftool prog attach pinned \
/sys/fs/bpf/flow/_dissect flow_dissector; then
echo "Unexpected unsuccessful attach in namespace" >&2
err=1
fi
$bpftool prog attach pinned /sys/fs/bpf/flow/_dissect \
flow_dissector
if unshare --net $bpftool prog attach pinned \
/sys/fs/bpf/flow/_dissect flow_dissector; then
echo "Unexpected successful attach in namespace" >&2
err=1
fi
if ! $bpftool prog detach pinned \
/sys/fs/bpf/flow/_dissect flow_dissector; then
echo "Failed to detach flow dissector" >&2
err=1
fi
rm -rf /sys/fs/bpf/flow
else
echo "Skipping root flow dissector test, bpftool not found" >&2
fi
# Run the rest of the tests in a net namespace.
../net/in_netns.sh "$0" "$@"
err=$(( $err + $? ))
if (( $err == 0 )); then
echo "selftests: $TESTNAME [PASS]";
else
echo "selftests: $TESTNAME [FAILED]";
fi
exit $err
fi
# Determine selftest success via shell exit code
exit_handler()
{
set +e
# Cleanup
tc filter del dev lo ingress pref 1337 2> /dev/null
tc qdisc del dev lo ingress 2> /dev/null
./flow_dissector_load -d 2> /dev/null
if [ $unmount -ne 0 ]; then
umount bpffs 2> /dev/null
fi
}
# Exit script immediately (well catched by trap handler) if any
# program/thing exits with a non-zero status.
set -e
# (Use 'trap -l' to list meaning of numbers)
trap exit_handler 0 2 3 6 9
# Mount BPF file system
if /bin/mount | grep /sys/fs/bpf > /dev/null; then
echo "bpffs already mounted"
else
echo "bpffs not mounted. Mounting..."
unmount=1
/bin/mount bpffs /sys/fs/bpf -t bpf
fi
# Attach BPF program
./flow_dissector_load -p $BPF_FILE -s _dissect
# Setup
tc qdisc add dev lo ingress
echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter
echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter
echo "Testing IPv4..."
# Drops all IP/UDP packets coming from port 9
tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
udp src_port 9 action drop
# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
./test_flow_dissector -i 4 -f 8
# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
./test_flow_dissector -i 4 -f 9 -F
# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
./test_flow_dissector -i 4 -f 10
echo "Testing IPv4 from 127.0.0.127 (fallback to generic dissector)..."
# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
./test_flow_dissector -i 4 -S 127.0.0.127 -f 8
# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
./test_flow_dissector -i 4 -S 127.0.0.127 -f 9 -F
# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
./test_flow_dissector -i 4 -S 127.0.0.127 -f 10
echo "Testing IPIP..."
# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any.
./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
-D 192.168.0.1 -S 1.1.1.1 -f 8
# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all.
./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
-D 192.168.0.1 -S 1.1.1.1 -f 9 -F
# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any.
./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
-D 192.168.0.1 -S 1.1.1.1 -f 10
echo "Testing IPv4 + GRE..."
# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any.
./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
-D 192.168.0.1 -S 1.1.1.1 -f 8
# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all.
./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
-D 192.168.0.1 -S 1.1.1.1 -f 9 -F
# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any.
./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
-D 192.168.0.1 -S 1.1.1.1 -f 10
tc filter del dev lo ingress pref 1337
echo "Testing port range..."
# Drops all IP/UDP packets coming from port 8-10
tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
udp src_port 8-10 action drop
# Send 10 IPv4/UDP packets from port 7. Filter should not drop any.
./test_flow_dissector -i 4 -f 7
# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
./test_flow_dissector -i 4 -f 9 -F
# Send 10 IPv4/UDP packets from port 11. Filter should not drop any.
./test_flow_dissector -i 4 -f 11
tc filter del dev lo ingress pref 1337
echo "Testing IPv6..."
# Drops all IPv6/UDP packets coming from port 9
tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \
udp src_port 9 action drop
# Send 10 IPv6/UDP packets from port 8. Filter should not drop any.
./test_flow_dissector -i 6 -f 8
# Send 10 IPv6/UDP packets from port 9. Filter should drop all.
./test_flow_dissector -i 6 -f 9 -F
# Send 10 IPv6/UDP packets from port 10. Filter should not drop any.
./test_flow_dissector -i 6 -f 10
exit 0

View File

@@ -1282,6 +1282,21 @@ void crash_handler(int signum)
backtrace_symbols_fd(bt, sz, STDERR_FILENO);
}
void hexdump(const char *prefix, const void *buf, size_t len)
{
for (int i = 0; i < len; i++) {
if (!(i % 16)) {
if (i)
fprintf(stdout, "\n");
fprintf(stdout, "%s", prefix);
}
if (i && !(i % 8) && (i % 16))
fprintf(stdout, "\t");
fprintf(stdout, "%02X ", ((uint8_t *)(buf))[i]);
}
fprintf(stdout, "\n");
}
static void sigint_handler(int signum)
{
int i;

View File

@@ -185,6 +185,7 @@ void test__end_subtest(void);
void test__skip(void);
void test__fail(void);
int test__join_cgroup(const char *path);
void hexdump(const char *prefix, const void *buf, size_t len);
#define PRINT_FAIL(format...) \
({ \
@@ -344,6 +345,20 @@ int test__join_cgroup(const char *path);
___ok; \
})
#define ASSERT_MEMEQ(actual, expected, len, name) ({ \
static int duration = 0; \
const void *__act = actual; \
const void *__exp = expected; \
int __len = len; \
bool ___ok = memcmp(__act, __exp, __len) == 0; \
CHECK(!___ok, (name), "unexpected memory mismatch\n"); \
fprintf(stdout, "actual:\n"); \
hexdump("\t", __act, __len); \
fprintf(stdout, "expected:\n"); \
hexdump("\t", __exp, __len); \
___ok; \
})
#define ASSERT_OK(res, name) ({ \
static int duration = 0; \
long long ___res = (res); \

View File

@@ -27,7 +27,7 @@
#include <linux/errqueue.h>
#include <linux/if_link.h>
#include <linux/net_tstamp.h>
#include <linux/udp.h>
#include <netinet/udp.h>
#include <linux/sockios.h>
#include <linux/if_xdp.h>
#include <sys/mman.h>