mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-02-14 20:43:03 -05:00
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says: ==================== pull-request: bpf-next 2025-11-10 We've added 19 non-merge commits during the last 3 day(s) which contain a total of 22 files changed, 1345 insertions(+), 197 deletions(-). The main changes are: 1) Preserve skb metadata after a TC BPF program has changed the skb, from Jakub Sitnicki. This allows a TC program at the end of a TC filter chain to still see the skb metadata, even if another TC program at the front of the chain has changed the skb using BPF helpers. 2) Initial af_smc bpf_struct_ops support to control the smc specific syn/synack options, from D. Wythe. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: bpf/selftests: Add selftest for bpf_smc_hs_ctrl net/smc: bpf: Introduce generic hook for handshake flow bpf: Export necessary symbols for modules with struct_ops selftests/bpf: Cover skb metadata access after bpf_skb_change_proto selftests/bpf: Cover skb metadata access after change_head/tail helper selftests/bpf: Cover skb metadata access after bpf_skb_adjust_room selftests/bpf: Cover skb metadata access after vlan push/pop helper selftests/bpf: Expect unclone to preserve skb metadata selftests/bpf: Dump skb metadata on verification failure selftests/bpf: Verify skb metadata in BPF instead of userspace bpf: Make bpf_skb_change_head helper metadata-safe bpf: Make bpf_skb_change_proto helper metadata-safe bpf: Make bpf_skb_adjust_room metadata-safe bpf: Make bpf_skb_vlan_push helper metadata-safe bpf: Make bpf_skb_vlan_pop helper metadata-safe vlan: Make vlan_remove_tag return nothing bpf: Unclone skb head on bpf_dynptr_write to skb metadata net: Preserve metadata on pskb_expand_head net: Helper to move packet data and metadata after skb_push/pull ==================== Link: https://patch.msgid.link/20251110232427.3929291-1-martin.lau@linux.dev Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -1781,6 +1781,8 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
|
||||
void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
|
||||
void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
|
||||
void *buf, unsigned long len, bool flush);
|
||||
int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset,
|
||||
const void *from, u32 len, u64 flags);
|
||||
void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset);
|
||||
#else /* CONFIG_NET */
|
||||
static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
|
||||
@@ -1817,6 +1819,13 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi
|
||||
{
|
||||
}
|
||||
|
||||
static inline int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset,
|
||||
const void *from, u32 len,
|
||||
u64 flags)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
|
||||
{
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
@@ -355,16 +355,17 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb,
|
||||
__be16 vlan_proto, u16 vlan_tci,
|
||||
unsigned int mac_len)
|
||||
{
|
||||
const u8 meta_len = mac_len > ETH_TLEN ? skb_metadata_len(skb) : 0;
|
||||
struct vlan_ethhdr *veth;
|
||||
|
||||
if (skb_cow_head(skb, VLAN_HLEN) < 0)
|
||||
if (skb_cow_head(skb, meta_len + VLAN_HLEN) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
skb_push(skb, VLAN_HLEN);
|
||||
|
||||
/* Move the mac header sans proto to the beginning of the new header. */
|
||||
if (likely(mac_len > ETH_TLEN))
|
||||
memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN);
|
||||
skb_postpush_data_move(skb, VLAN_HLEN, mac_len - ETH_TLEN);
|
||||
if (skb_mac_header_was_set(skb))
|
||||
skb->mac_header -= VLAN_HLEN;
|
||||
|
||||
@@ -731,18 +732,16 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
|
||||
*
|
||||
* Expects the skb to contain a VLAN tag in the payload, and to have skb->data
|
||||
* pointing at the MAC header.
|
||||
*
|
||||
* Returns: a new pointer to skb->data, or NULL on failure to pull.
|
||||
*/
|
||||
static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci)
|
||||
static inline void vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci)
|
||||
{
|
||||
struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
|
||||
|
||||
*vlan_tci = ntohs(vhdr->h_vlan_TCI);
|
||||
|
||||
memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
|
||||
vlan_set_encap_proto(skb, vhdr);
|
||||
return __skb_pull(skb, VLAN_HLEN);
|
||||
__skb_pull(skb, VLAN_HLEN);
|
||||
skb_postpull_data_move(skb, VLAN_HLEN, 2 * ETH_ALEN);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -4564,6 +4564,81 @@ static inline void skb_metadata_clear(struct sk_buff *skb)
|
||||
skb_metadata_set(skb, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* skb_data_move - Move packet data and metadata after skb_push() or skb_pull().
|
||||
* @skb: packet to operate on
|
||||
* @len: number of bytes pushed or pulled from &sk_buff->data
|
||||
* @n: number of bytes to memmove() from pre-push/pull &sk_buff->data
|
||||
*
|
||||
* Moves @n bytes of packet data, can be zero, and all bytes of skb metadata.
|
||||
*
|
||||
* Assumes metadata is located immediately before &sk_buff->data prior to the
|
||||
* push/pull, and that sufficient headroom exists to hold it after an
|
||||
* skb_push(). Otherwise, metadata is cleared and a one-time warning is issued.
|
||||
*
|
||||
* Prefer skb_postpull_data_move() or skb_postpush_data_move() to calling this
|
||||
* helper directly.
|
||||
*/
|
||||
static inline void skb_data_move(struct sk_buff *skb, const int len,
|
||||
const unsigned int n)
|
||||
{
|
||||
const u8 meta_len = skb_metadata_len(skb);
|
||||
u8 *meta, *meta_end;
|
||||
|
||||
if (!len || (!n && !meta_len))
|
||||
return;
|
||||
|
||||
if (!meta_len)
|
||||
goto no_metadata;
|
||||
|
||||
meta_end = skb_metadata_end(skb);
|
||||
meta = meta_end - meta_len;
|
||||
|
||||
if (WARN_ON_ONCE(meta_end + len != skb->data ||
|
||||
meta_len > skb_headroom(skb))) {
|
||||
skb_metadata_clear(skb);
|
||||
goto no_metadata;
|
||||
}
|
||||
|
||||
memmove(meta + len, meta, meta_len + n);
|
||||
return;
|
||||
|
||||
no_metadata:
|
||||
memmove(skb->data, skb->data - len, n);
|
||||
}
|
||||
|
||||
/**
|
||||
* skb_postpull_data_move - Move packet data and metadata after skb_pull().
|
||||
* @skb: packet to operate on
|
||||
* @len: number of bytes pulled from &sk_buff->data
|
||||
* @n: number of bytes to memmove() from pre-pull &sk_buff->data
|
||||
*
|
||||
* See skb_data_move() for details.
|
||||
*/
|
||||
static inline void skb_postpull_data_move(struct sk_buff *skb,
|
||||
const unsigned int len,
|
||||
const unsigned int n)
|
||||
{
|
||||
DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
|
||||
skb_data_move(skb, len, n);
|
||||
}
|
||||
|
||||
/**
|
||||
* skb_postpush_data_move - Move packet data and metadata after skb_push().
|
||||
* @skb: packet to operate on
|
||||
* @len: number of bytes pushed onto &sk_buff->data
|
||||
* @n: number of bytes to memmove() from pre-push &sk_buff->data
|
||||
*
|
||||
* See skb_data_move() for details.
|
||||
*/
|
||||
static inline void skb_postpush_data_move(struct sk_buff *skb,
|
||||
const unsigned int len,
|
||||
const unsigned int n)
|
||||
{
|
||||
DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
|
||||
skb_data_move(skb, -len, n);
|
||||
}
|
||||
|
||||
struct sk_buff *skb_clone_sk(struct sk_buff *skb);
|
||||
|
||||
#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
|
||||
|
||||
@@ -17,6 +17,9 @@ struct netns_smc {
|
||||
#ifdef CONFIG_SYSCTL
|
||||
struct ctl_table_header *smc_hdr;
|
||||
#endif
|
||||
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
|
||||
struct smc_hs_ctrl __rcu *hs_ctrl;
|
||||
#endif /* CONFIG_SMC_HS_CTRL_BPF */
|
||||
unsigned int sysctl_autocorking_size;
|
||||
unsigned int sysctl_smcr_buf_type;
|
||||
int sysctl_smcr_testlink_time;
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
#include <linux/wait.h>
|
||||
#include <linux/dibs.h>
|
||||
|
||||
struct tcp_sock;
|
||||
struct inet_request_sock;
|
||||
struct sock;
|
||||
|
||||
#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */
|
||||
@@ -50,4 +52,55 @@ struct smcd_dev {
|
||||
u8 going_away : 1;
|
||||
};
|
||||
|
||||
#define SMC_HS_CTRL_NAME_MAX 16
|
||||
|
||||
enum {
|
||||
/* ops can be inherit from init_net */
|
||||
SMC_HS_CTRL_FLAG_INHERITABLE = 0x1,
|
||||
|
||||
SMC_HS_CTRL_ALL_FLAGS = SMC_HS_CTRL_FLAG_INHERITABLE,
|
||||
};
|
||||
|
||||
struct smc_hs_ctrl {
|
||||
/* private */
|
||||
|
||||
struct list_head list;
|
||||
struct module *owner;
|
||||
|
||||
/* public */
|
||||
|
||||
/* unique name */
|
||||
char name[SMC_HS_CTRL_NAME_MAX];
|
||||
int flags;
|
||||
|
||||
/* Invoked before computing SMC option for SYN packets.
|
||||
* We can control whether to set SMC options by returning various value.
|
||||
* Return 0 to disable SMC, or return any other value to enable it.
|
||||
*/
|
||||
int (*syn_option)(struct tcp_sock *tp);
|
||||
|
||||
/* Invoked before Set up SMC options for SYN-ACK packets
|
||||
* We can control whether to respond SMC options by returning various
|
||||
* value. Return 0 to disable SMC, or return any other value to enable
|
||||
* it.
|
||||
*/
|
||||
int (*synack_option)(const struct tcp_sock *tp,
|
||||
struct inet_request_sock *ireq);
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
|
||||
#define smc_call_hsbpf(init_val, tp, func, ...) ({ \
|
||||
typeof(init_val) __ret = (init_val); \
|
||||
struct smc_hs_ctrl *ctrl; \
|
||||
rcu_read_lock(); \
|
||||
ctrl = rcu_dereference(sock_net((struct sock *)(tp))->smc.hs_ctrl); \
|
||||
if (ctrl && ctrl->func) \
|
||||
__ret = ctrl->func(tp, ##__VA_ARGS__); \
|
||||
rcu_read_unlock(); \
|
||||
__ret; \
|
||||
})
|
||||
#else
|
||||
#define smc_call_hsbpf(init_val, tp, ...) ({ (void)(tp); (init_val); })
|
||||
#endif /* CONFIG_SMC_HS_CTRL_BPF */
|
||||
|
||||
#endif /* _SMC_H */
|
||||
|
||||
@@ -1162,6 +1162,7 @@ bool bpf_struct_ops_get(const void *kdata)
|
||||
map = __bpf_map_inc_not_zero(&st_map->map, false);
|
||||
return !IS_ERR(map);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_struct_ops_get);
|
||||
|
||||
void bpf_struct_ops_put(const void *kdata)
|
||||
{
|
||||
@@ -1173,6 +1174,7 @@ void bpf_struct_ops_put(const void *kdata)
|
||||
|
||||
bpf_map_put(&st_map->map);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_struct_ops_put);
|
||||
|
||||
u32 bpf_struct_ops_id(const void *kdata)
|
||||
{
|
||||
|
||||
@@ -1842,10 +1842,8 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
|
||||
return -EINVAL;
|
||||
return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len);
|
||||
case BPF_DYNPTR_TYPE_SKB_META:
|
||||
if (flags)
|
||||
return -EINVAL;
|
||||
memmove(bpf_skb_meta_pointer(dst->data, dst->offset + offset), src, len);
|
||||
return 0;
|
||||
return __bpf_skb_meta_store_bytes(dst->data, dst->offset + offset, src,
|
||||
len, flags);
|
||||
default:
|
||||
WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
|
||||
return -EFAULT;
|
||||
|
||||
@@ -1234,6 +1234,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
|
||||
|
||||
return src - orig_src;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_obj_name_cpy);
|
||||
|
||||
int map_check_no_btf(const struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
|
||||
@@ -3253,11 +3253,11 @@ static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto)
|
||||
|
||||
static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
|
||||
{
|
||||
/* Caller already did skb_cow() with len as headroom,
|
||||
/* Caller already did skb_cow() with meta_len+len as headroom,
|
||||
* so no need to do it here.
|
||||
*/
|
||||
skb_push(skb, len);
|
||||
memmove(skb->data, skb->data + len, off);
|
||||
skb_postpush_data_move(skb, len, off);
|
||||
memset(skb->data + off, 0, len);
|
||||
|
||||
/* No skb_postpush_rcsum(skb, skb->data + off, len)
|
||||
@@ -3281,7 +3281,7 @@ static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
|
||||
old_data = skb->data;
|
||||
__skb_pull(skb, len);
|
||||
skb_postpull_rcsum(skb, old_data + off, len);
|
||||
memmove(skb->data, old_data, off);
|
||||
skb_postpull_data_move(skb, len, off);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -3326,10 +3326,11 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
|
||||
static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
|
||||
{
|
||||
const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
|
||||
const u8 meta_len = skb_metadata_len(skb);
|
||||
u32 off = skb_mac_header_len(skb);
|
||||
int ret;
|
||||
|
||||
ret = skb_cow(skb, len_diff);
|
||||
ret = skb_cow(skb, meta_len + len_diff);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
@@ -3489,6 +3490,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
|
||||
u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
|
||||
bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
|
||||
u16 mac_len = 0, inner_net = 0, inner_trans = 0;
|
||||
const u8 meta_len = skb_metadata_len(skb);
|
||||
unsigned int gso_type = SKB_GSO_DODGY;
|
||||
int ret;
|
||||
|
||||
@@ -3499,7 +3501,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
ret = skb_cow_head(skb, len_diff);
|
||||
ret = skb_cow_head(skb, meta_len + len_diff);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
@@ -3873,6 +3875,7 @@ static const struct bpf_func_proto sk_skb_change_tail_proto = {
|
||||
static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
|
||||
u64 flags)
|
||||
{
|
||||
const u8 meta_len = skb_metadata_len(skb);
|
||||
u32 max_len = BPF_SKB_MAX_LEN;
|
||||
u32 new_len = skb->len + head_room;
|
||||
int ret;
|
||||
@@ -3882,7 +3885,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
|
||||
new_len < skb->len))
|
||||
return -EINVAL;
|
||||
|
||||
ret = skb_cow(skb, head_room);
|
||||
ret = skb_cow(skb, meta_len + head_room);
|
||||
if (likely(!ret)) {
|
||||
/* Idea for this helper is that we currently only
|
||||
* allow to expand on mac header. This means that
|
||||
@@ -3894,6 +3897,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
|
||||
* for redirection into L2 device.
|
||||
*/
|
||||
__skb_push(skb, head_room);
|
||||
skb_postpush_data_move(skb, head_room, 0);
|
||||
memset(skb->data, 0, head_room);
|
||||
skb_reset_mac_header(skb);
|
||||
skb_reset_mac_len(skb);
|
||||
@@ -12102,6 +12106,18 @@ void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
|
||||
return skb_metadata_end(skb) - skb_metadata_len(skb) + offset;
|
||||
}
|
||||
|
||||
int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset,
|
||||
const void *from, u32 len, u64 flags)
|
||||
{
|
||||
if (unlikely(flags))
|
||||
return -EINVAL;
|
||||
if (unlikely(bpf_try_make_writable(skb, 0)))
|
||||
return -EFAULT;
|
||||
|
||||
memmove(bpf_skb_meta_pointer(skb, offset), from, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__bpf_kfunc_start_defs();
|
||||
__bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
|
||||
struct bpf_dynptr *ptr__uninit)
|
||||
@@ -12129,9 +12145,6 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
|
||||
* XDP context with bpf_xdp_adjust_meta(). Serves as an alternative to
|
||||
* &__sk_buff->data_meta.
|
||||
*
|
||||
* If passed @skb_ is a clone which shares the data with the original, the
|
||||
* dynptr will be read-only. This limitation may be lifted in the future.
|
||||
*
|
||||
* Return:
|
||||
* * %0 - dynptr ready to use
|
||||
* * %-EINVAL - invalid flags, dynptr set to null
|
||||
@@ -12149,9 +12162,6 @@ __bpf_kfunc int bpf_dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags,
|
||||
|
||||
bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB_META, 0, skb_metadata_len(skb));
|
||||
|
||||
if (skb_cloned(skb))
|
||||
bpf_dynptr_set_rdonly(ptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -2234,6 +2234,10 @@ EXPORT_SYMBOL(__pskb_copy_fclone);
|
||||
*
|
||||
* All the pointers pointing into skb header may change and must be
|
||||
* reloaded after call to this function.
|
||||
*
|
||||
* Note: If you skb_push() the start of the buffer after reallocating the
|
||||
* header, call skb_postpush_data_move() first to move the metadata out of
|
||||
* the way before writing to &sk_buff->data.
|
||||
*/
|
||||
|
||||
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
|
||||
@@ -2305,8 +2309,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
|
||||
skb->nohdr = 0;
|
||||
atomic_set(&skb_shinfo(skb)->dataref, 1);
|
||||
|
||||
skb_metadata_clear(skb);
|
||||
|
||||
/* It is not generally safe to change skb->truesize.
|
||||
* For the moment, we really care of rx path, or
|
||||
* when skb is orphaned (not attached to a socket).
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include <net/tcp.h>
|
||||
#include <net/tcp_ecn.h>
|
||||
#include <net/mptcp.h>
|
||||
#include <net/smc.h>
|
||||
#include <net/proto_memory.h>
|
||||
#include <net/psp.h>
|
||||
|
||||
@@ -802,34 +803,36 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
|
||||
mptcp_options_write(th, ptr, tp, opts);
|
||||
}
|
||||
|
||||
static void smc_set_option(const struct tcp_sock *tp,
|
||||
static void smc_set_option(struct tcp_sock *tp,
|
||||
struct tcp_out_options *opts,
|
||||
unsigned int *remaining)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_SMC)
|
||||
if (static_branch_unlikely(&tcp_have_smc)) {
|
||||
if (tp->syn_smc) {
|
||||
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
|
||||
opts->options |= OPTION_SMC;
|
||||
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
|
||||
}
|
||||
if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc) {
|
||||
tp->syn_smc = !!smc_call_hsbpf(1, tp, syn_option);
|
||||
/* re-check syn_smc */
|
||||
if (tp->syn_smc &&
|
||||
*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
|
||||
opts->options |= OPTION_SMC;
|
||||
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void smc_set_option_cond(const struct tcp_sock *tp,
|
||||
const struct inet_request_sock *ireq,
|
||||
struct inet_request_sock *ireq,
|
||||
struct tcp_out_options *opts,
|
||||
unsigned int *remaining)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_SMC)
|
||||
if (static_branch_unlikely(&tcp_have_smc)) {
|
||||
if (tp->syn_smc && ireq->smc_ok) {
|
||||
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
|
||||
opts->options |= OPTION_SMC;
|
||||
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
|
||||
}
|
||||
if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc && ireq->smc_ok) {
|
||||
ireq->smc_ok = !!smc_call_hsbpf(1, tp, synack_option, ireq);
|
||||
/* re-check smc_ok */
|
||||
if (ireq->smc_ok &&
|
||||
*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
|
||||
opts->options |= OPTION_SMC;
|
||||
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -19,3 +19,13 @@ config SMC_DIAG
|
||||
smcss.
|
||||
|
||||
if unsure, say Y.
|
||||
|
||||
config SMC_HS_CTRL_BPF
|
||||
bool "Generic eBPF hook for SMC handshake flow"
|
||||
depends on SMC && BPF_SYSCALL
|
||||
default y
|
||||
help
|
||||
SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC
|
||||
handshake flow, which offer much greater flexibility in modifying the behavior
|
||||
of the SMC protocol stack compared to a complete kernel-based approach. Select
|
||||
this option if you want filtring the handshake process via eBPF programs.
|
||||
@@ -6,3 +6,4 @@ smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
|
||||
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
|
||||
smc-y += smc_tracepoint.o smc_inet.o
|
||||
smc-$(CONFIG_SYSCTL) += smc_sysctl.o
|
||||
smc-$(CONFIG_SMC_HS_CTRL_BPF) += smc_hs_bpf.o
|
||||
|
||||
@@ -58,6 +58,7 @@
|
||||
#include "smc_tracepoint.h"
|
||||
#include "smc_sysctl.h"
|
||||
#include "smc_inet.h"
|
||||
#include "smc_hs_bpf.h"
|
||||
|
||||
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
|
||||
* creation on server
|
||||
@@ -3600,8 +3601,16 @@ static int __init smc_init(void)
|
||||
pr_err("%s: smc_inet_init fails with %d\n", __func__, rc);
|
||||
goto out_ulp;
|
||||
}
|
||||
rc = bpf_smc_hs_ctrl_init();
|
||||
if (rc) {
|
||||
pr_err("%s: bpf_smc_hs_ctrl_init fails with %d\n", __func__,
|
||||
rc);
|
||||
goto out_inet;
|
||||
}
|
||||
static_branch_enable(&tcp_have_smc);
|
||||
return 0;
|
||||
out_inet:
|
||||
smc_inet_exit();
|
||||
out_ulp:
|
||||
tcp_unregister_ulp(&smc_ulp_ops);
|
||||
out_ib:
|
||||
|
||||
140
net/smc/smc_hs_bpf.c
Normal file
140
net/smc/smc_hs_bpf.c
Normal file
@@ -0,0 +1,140 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Shared Memory Communications over RDMA (SMC-R) and RoCE
|
||||
*
|
||||
* Generic hook for SMC handshake flow.
|
||||
*
|
||||
* Copyright IBM Corp. 2016
|
||||
* Copyright (c) 2025, Alibaba Inc.
|
||||
*
|
||||
* Author: D. Wythe <alibuda@linux.alibaba.com>
|
||||
*/
|
||||
|
||||
#include <linux/bpf_verifier.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/rculist.h>
|
||||
|
||||
#include "smc_hs_bpf.h"
|
||||
|
||||
static DEFINE_SPINLOCK(smc_hs_ctrl_list_lock);
|
||||
static LIST_HEAD(smc_hs_ctrl_list);
|
||||
|
||||
static int smc_hs_ctrl_reg(struct smc_hs_ctrl *ctrl)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&smc_hs_ctrl_list_lock);
|
||||
/* already exist or duplicate name */
|
||||
if (smc_hs_ctrl_find_by_name(ctrl->name))
|
||||
ret = -EEXIST;
|
||||
else
|
||||
list_add_tail_rcu(&ctrl->list, &smc_hs_ctrl_list);
|
||||
spin_unlock(&smc_hs_ctrl_list_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void smc_hs_ctrl_unreg(struct smc_hs_ctrl *ctrl)
|
||||
{
|
||||
spin_lock(&smc_hs_ctrl_list_lock);
|
||||
list_del_rcu(&ctrl->list);
|
||||
spin_unlock(&smc_hs_ctrl_list_lock);
|
||||
|
||||
/* Ensure that all readers to complete */
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name)
|
||||
{
|
||||
struct smc_hs_ctrl *ctrl;
|
||||
|
||||
list_for_each_entry_rcu(ctrl, &smc_hs_ctrl_list, list) {
|
||||
if (strcmp(ctrl->name, name) == 0)
|
||||
return ctrl;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int __smc_bpf_stub_set_tcp_option(struct tcp_sock *tp) { return 1; }
|
||||
static int __smc_bpf_stub_set_tcp_option_cond(const struct tcp_sock *tp,
|
||||
struct inet_request_sock *ireq)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct smc_hs_ctrl __smc_bpf_hs_ctrl = {
|
||||
.syn_option = __smc_bpf_stub_set_tcp_option,
|
||||
.synack_option = __smc_bpf_stub_set_tcp_option_cond,
|
||||
};
|
||||
|
||||
static int smc_bpf_hs_ctrl_init(struct btf *btf) { return 0; }
|
||||
|
||||
static int smc_bpf_hs_ctrl_reg(void *kdata, struct bpf_link *link)
|
||||
{
|
||||
if (link)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return smc_hs_ctrl_reg(kdata);
|
||||
}
|
||||
|
||||
static void smc_bpf_hs_ctrl_unreg(void *kdata, struct bpf_link *link)
|
||||
{
|
||||
smc_hs_ctrl_unreg(kdata);
|
||||
}
|
||||
|
||||
static int smc_bpf_hs_ctrl_init_member(const struct btf_type *t,
|
||||
const struct btf_member *member,
|
||||
void *kdata, const void *udata)
|
||||
{
|
||||
const struct smc_hs_ctrl *u_ctrl;
|
||||
struct smc_hs_ctrl *k_ctrl;
|
||||
u32 moff;
|
||||
|
||||
u_ctrl = (const struct smc_hs_ctrl *)udata;
|
||||
k_ctrl = (struct smc_hs_ctrl *)kdata;
|
||||
|
||||
moff = __btf_member_bit_offset(t, member) / 8;
|
||||
switch (moff) {
|
||||
case offsetof(struct smc_hs_ctrl, name):
|
||||
if (bpf_obj_name_cpy(k_ctrl->name, u_ctrl->name,
|
||||
sizeof(u_ctrl->name)) <= 0)
|
||||
return -EINVAL;
|
||||
return 1;
|
||||
case offsetof(struct smc_hs_ctrl, flags):
|
||||
if (u_ctrl->flags & ~SMC_HS_CTRL_ALL_FLAGS)
|
||||
return -EINVAL;
|
||||
k_ctrl->flags = u_ctrl->flags;
|
||||
return 1;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
bpf_smc_hs_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
return bpf_base_func_proto(func_id, prog);
|
||||
}
|
||||
|
||||
static const struct bpf_verifier_ops smc_bpf_verifier_ops = {
|
||||
.get_func_proto = bpf_smc_hs_func_proto,
|
||||
.is_valid_access = bpf_tracing_btf_ctx_access,
|
||||
};
|
||||
|
||||
static struct bpf_struct_ops bpf_smc_hs_ctrl_ops = {
|
||||
.name = "smc_hs_ctrl",
|
||||
.init = smc_bpf_hs_ctrl_init,
|
||||
.reg = smc_bpf_hs_ctrl_reg,
|
||||
.unreg = smc_bpf_hs_ctrl_unreg,
|
||||
.cfi_stubs = &__smc_bpf_hs_ctrl,
|
||||
.verifier_ops = &smc_bpf_verifier_ops,
|
||||
.init_member = smc_bpf_hs_ctrl_init_member,
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
int bpf_smc_hs_ctrl_init(void)
|
||||
{
|
||||
return register_bpf_struct_ops(&bpf_smc_hs_ctrl_ops, smc_hs_ctrl);
|
||||
}
|
||||
31
net/smc/smc_hs_bpf.h
Normal file
31
net/smc/smc_hs_bpf.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Shared Memory Communications over RDMA (SMC-R) and RoCE
|
||||
*
|
||||
* Generic hook for SMC handshake flow.
|
||||
*
|
||||
* Copyright IBM Corp. 2016
|
||||
* Copyright (c) 2025, Alibaba Inc.
|
||||
*
|
||||
* Author: D. Wythe <alibuda@linux.alibaba.com>
|
||||
*/
|
||||
|
||||
#ifndef __SMC_HS_CTRL
|
||||
#define __SMC_HS_CTRL
|
||||
|
||||
#include <net/smc.h>
|
||||
|
||||
/* Find hs_ctrl by the target name, which required to be a c-string.
|
||||
* Return NULL if no such ctrl was found,otherwise, return a valid ctrl.
|
||||
*
|
||||
* Note: Caller MUST ensure it's was invoked under rcu_read_lock.
|
||||
*/
|
||||
struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name);
|
||||
|
||||
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
|
||||
int bpf_smc_hs_ctrl_init(void);
|
||||
#else
|
||||
static inline int bpf_smc_hs_ctrl_init(void) { return 0; }
|
||||
#endif /* CONFIG_SMC_HS_CTRL_BPF */
|
||||
|
||||
#endif /* __SMC_HS_CTRL */
|
||||
@@ -12,12 +12,14 @@
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <net/net_namespace.h>
|
||||
|
||||
#include "smc.h"
|
||||
#include "smc_core.h"
|
||||
#include "smc_llc.h"
|
||||
#include "smc_sysctl.h"
|
||||
#include "smc_hs_bpf.h"
|
||||
|
||||
static int min_sndbuf = SMC_BUF_MIN_SIZE;
|
||||
static int min_rcvbuf = SMC_BUF_MIN_SIZE;
|
||||
@@ -32,6 +34,69 @@ static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX;
|
||||
static unsigned int smcr_max_wr_min = 2;
|
||||
static unsigned int smcr_max_wr_max = 2048;
|
||||
|
||||
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
|
||||
static int smc_net_replace_smc_hs_ctrl(struct net *net, const char *name)
|
||||
{
|
||||
struct smc_hs_ctrl *ctrl = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
/* null or empty name ask to clear current ctrl */
|
||||
if (name && name[0]) {
|
||||
ctrl = smc_hs_ctrl_find_by_name(name);
|
||||
if (!ctrl) {
|
||||
rcu_read_unlock();
|
||||
return -EINVAL;
|
||||
}
|
||||
/* no change, just return */
|
||||
if (ctrl == rcu_dereference(net->smc.hs_ctrl)) {
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
if (!bpf_try_module_get(ctrl, ctrl->owner)) {
|
||||
rcu_read_unlock();
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
/* xhcg old ctrl with the new one atomically */
|
||||
ctrl = unrcu_pointer(xchg(&net->smc.hs_ctrl, RCU_INITIALIZER(ctrl)));
|
||||
/* release old ctrl */
|
||||
if (ctrl)
|
||||
bpf_module_put(ctrl, ctrl->owner);
|
||||
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int proc_smc_hs_ctrl(const struct ctl_table *ctl, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
struct net *net = container_of(ctl->data, struct net, smc.hs_ctrl);
|
||||
char val[SMC_HS_CTRL_NAME_MAX];
|
||||
const struct ctl_table tbl = {
|
||||
.data = val,
|
||||
.maxlen = SMC_HS_CTRL_NAME_MAX,
|
||||
};
|
||||
struct smc_hs_ctrl *ctrl;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ctrl = rcu_dereference(net->smc.hs_ctrl);
|
||||
if (ctrl)
|
||||
memcpy(val, ctrl->name, sizeof(ctrl->name));
|
||||
else
|
||||
val[0] = '\0';
|
||||
rcu_read_unlock();
|
||||
|
||||
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (write)
|
||||
ret = smc_net_replace_smc_hs_ctrl(net, val);
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_SMC_HS_CTRL_BPF */
|
||||
|
||||
static struct ctl_table smc_table[] = {
|
||||
{
|
||||
.procname = "autocorking_size",
|
||||
@@ -119,6 +184,15 @@ static struct ctl_table smc_table[] = {
|
||||
.extra1 = &smcr_max_wr_min,
|
||||
.extra2 = &smcr_max_wr_max,
|
||||
},
|
||||
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
|
||||
{
|
||||
.procname = "hs_ctrl",
|
||||
.data = &init_net.smc.hs_ctrl,
|
||||
.mode = 0644,
|
||||
.maxlen = SMC_HS_CTRL_NAME_MAX,
|
||||
.proc_handler = proc_smc_hs_ctrl,
|
||||
},
|
||||
#endif /* CONFIG_SMC_HS_CTRL_BPF */
|
||||
};
|
||||
|
||||
int __net_init smc_sysctl_net_init(struct net *net)
|
||||
@@ -129,6 +203,16 @@ int __net_init smc_sysctl_net_init(struct net *net)
|
||||
table = smc_table;
|
||||
if (!net_eq(net, &init_net)) {
|
||||
int i;
|
||||
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
|
||||
struct smc_hs_ctrl *ctrl;
|
||||
|
||||
rcu_read_lock();
|
||||
ctrl = rcu_dereference(init_net.smc.hs_ctrl);
|
||||
if (ctrl && ctrl->flags & SMC_HS_CTRL_FLAG_INHERITABLE &&
|
||||
bpf_try_module_get(ctrl, ctrl->owner))
|
||||
rcu_assign_pointer(net->smc.hs_ctrl, ctrl);
|
||||
rcu_read_unlock();
|
||||
#endif /* CONFIG_SMC_HS_CTRL_BPF */
|
||||
|
||||
table = kmemdup(table, sizeof(smc_table), GFP_KERNEL);
|
||||
if (!table)
|
||||
@@ -161,6 +245,9 @@ int __net_init smc_sysctl_net_init(struct net *net)
|
||||
if (!net_eq(net, &init_net))
|
||||
kfree(table);
|
||||
err_alloc:
|
||||
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
|
||||
smc_net_replace_smc_hs_ctrl(net, NULL);
|
||||
#endif /* CONFIG_SMC_HS_CTRL_BPF */
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -170,6 +257,10 @@ void __net_exit smc_sysctl_net_exit(struct net *net)
|
||||
|
||||
table = net->smc.smc_hdr->ctl_table_arg;
|
||||
unregister_net_sysctl_table(net->smc.smc_hdr);
|
||||
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
|
||||
smc_net_replace_smc_hs_ctrl(net, NULL);
|
||||
#endif /* CONFIG_SMC_HS_CTRL_BPF */
|
||||
|
||||
if (!net_eq(net, &init_net))
|
||||
kfree(table);
|
||||
}
|
||||
|
||||
@@ -123,3 +123,8 @@ CONFIG_XDP_SOCKETS=y
|
||||
CONFIG_XFRM_INTERFACE=y
|
||||
CONFIG_TCP_CONG_DCTCP=y
|
||||
CONFIG_TCP_CONG_BBR=y
|
||||
CONFIG_INFINIBAND=y
|
||||
CONFIG_SMC=y
|
||||
CONFIG_SMC_HS_CTRL_BPF=y
|
||||
CONFIG_DIBS=y
|
||||
CONFIG_DIBS_LO=y
|
||||
390
tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
Normal file
390
tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
Normal file
@@ -0,0 +1,390 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <test_progs.h>
|
||||
#include <linux/genetlink.h>
|
||||
#include "network_helpers.h"
|
||||
#include "bpf_smc.skel.h"
|
||||
|
||||
#ifndef IPPROTO_SMC
|
||||
#define IPPROTO_SMC 256
|
||||
#endif
|
||||
|
||||
#define CLIENT_IP "127.0.0.1"
|
||||
#define SERVER_IP "127.0.1.0"
|
||||
#define SERVER_IP_VIA_RISK_PATH "127.0.2.0"
|
||||
|
||||
#define SERVICE_1 80
|
||||
#define SERVICE_2 443
|
||||
#define SERVICE_3 8443
|
||||
|
||||
#define TEST_NS "bpf_smc_netns"
|
||||
|
||||
static struct netns_obj *test_netns;
|
||||
|
||||
struct smc_policy_ip_key {
|
||||
__u32 sip;
|
||||
__u32 dip;
|
||||
};
|
||||
|
||||
struct smc_policy_ip_value {
|
||||
__u8 mode;
|
||||
};
|
||||
|
||||
#if defined(__s390x__)
|
||||
/* s390x has default seid */
|
||||
static bool setup_ueid(void) { return true; }
|
||||
static void cleanup_ueid(void) {}
|
||||
#else
|
||||
enum {
|
||||
SMC_NETLINK_ADD_UEID = 10,
|
||||
SMC_NETLINK_REMOVE_UEID
|
||||
};
|
||||
|
||||
enum {
|
||||
SMC_NLA_EID_TABLE_UNSPEC,
|
||||
SMC_NLA_EID_TABLE_ENTRY, /* string */
|
||||
};
|
||||
|
||||
struct msgtemplate {
|
||||
struct nlmsghdr n;
|
||||
struct genlmsghdr g;
|
||||
char buf[1024];
|
||||
};
|
||||
|
||||
#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
|
||||
#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
|
||||
#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
|
||||
#define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN)
|
||||
|
||||
#define SMC_GENL_FAMILY_NAME "SMC_GEN_NETLINK"
|
||||
#define SMC_BPFTEST_UEID "SMC-BPFTEST-UEID"
|
||||
|
||||
static uint16_t smc_nl_family_id = -1;
|
||||
|
||||
static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid,
|
||||
__u16 nlmsg_flags, __u8 genl_cmd, __u16 nla_type,
|
||||
void *nla_data, int nla_len)
|
||||
{
|
||||
struct nlattr *na;
|
||||
struct sockaddr_nl nladdr;
|
||||
int r, buflen;
|
||||
char *buf;
|
||||
|
||||
struct msgtemplate msg = {0};
|
||||
|
||||
msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
|
||||
msg.n.nlmsg_type = nlmsg_type;
|
||||
msg.n.nlmsg_flags = nlmsg_flags;
|
||||
msg.n.nlmsg_seq = 0;
|
||||
msg.n.nlmsg_pid = nlmsg_pid;
|
||||
msg.g.cmd = genl_cmd;
|
||||
msg.g.version = 1;
|
||||
na = (struct nlattr *)GENLMSG_DATA(&msg);
|
||||
na->nla_type = nla_type;
|
||||
na->nla_len = nla_len + 1 + NLA_HDRLEN;
|
||||
memcpy(NLA_DATA(na), nla_data, nla_len);
|
||||
msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
|
||||
|
||||
buf = (char *)&msg;
|
||||
buflen = msg.n.nlmsg_len;
|
||||
memset(&nladdr, 0, sizeof(nladdr));
|
||||
nladdr.nl_family = AF_NETLINK;
|
||||
|
||||
while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *)&nladdr,
|
||||
sizeof(nladdr))) < buflen) {
|
||||
if (r > 0) {
|
||||
buf += r;
|
||||
buflen -= r;
|
||||
} else if (errno != EAGAIN) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool get_smc_nl_family_id(void)
|
||||
{
|
||||
struct sockaddr_nl nl_src;
|
||||
struct msgtemplate msg;
|
||||
struct nlattr *nl;
|
||||
int fd, ret;
|
||||
pid_t pid;
|
||||
|
||||
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
|
||||
if (!ASSERT_OK_FD(fd, "nl_family socket"))
|
||||
return false;
|
||||
|
||||
pid = getpid();
|
||||
|
||||
memset(&nl_src, 0, sizeof(nl_src));
|
||||
nl_src.nl_family = AF_NETLINK;
|
||||
nl_src.nl_pid = pid;
|
||||
|
||||
ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src));
|
||||
if (!ASSERT_OK(ret, "nl_family bind"))
|
||||
goto fail;
|
||||
|
||||
ret = send_cmd(fd, GENL_ID_CTRL, pid,
|
||||
NLM_F_REQUEST, CTRL_CMD_GETFAMILY,
|
||||
CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME,
|
||||
strlen(SMC_GENL_FAMILY_NAME));
|
||||
if (!ASSERT_OK(ret, "nl_family query"))
|
||||
goto fail;
|
||||
|
||||
ret = recv(fd, &msg, sizeof(msg), 0);
|
||||
if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || ret < 0 ||
|
||||
!NLMSG_OK(&msg.n, ret), "nl_family response"))
|
||||
goto fail;
|
||||
|
||||
nl = (struct nlattr *)GENLMSG_DATA(&msg);
|
||||
nl = (struct nlattr *)((char *)nl + NLA_ALIGN(nl->nla_len));
|
||||
if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type"))
|
||||
goto fail;
|
||||
|
||||
smc_nl_family_id = *(uint16_t *)NLA_DATA(nl);
|
||||
close(fd);
|
||||
return true;
|
||||
fail:
|
||||
close(fd);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool smc_ueid(int op)
|
||||
{
|
||||
struct sockaddr_nl nl_src;
|
||||
struct msgtemplate msg;
|
||||
struct nlmsgerr *err;
|
||||
char test_ueid[32];
|
||||
int fd, ret;
|
||||
pid_t pid;
|
||||
|
||||
/* UEID required */
|
||||
memset(test_ueid, '\x20', sizeof(test_ueid));
|
||||
memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID));
|
||||
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
|
||||
if (!ASSERT_OK_FD(fd, "ueid socket"))
|
||||
return false;
|
||||
|
||||
pid = getpid();
|
||||
memset(&nl_src, 0, sizeof(nl_src));
|
||||
nl_src.nl_family = AF_NETLINK;
|
||||
nl_src.nl_pid = pid;
|
||||
|
||||
ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src));
|
||||
if (!ASSERT_OK(ret, "ueid bind"))
|
||||
goto fail;
|
||||
|
||||
ret = send_cmd(fd, smc_nl_family_id, pid,
|
||||
NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY,
|
||||
(void *)test_ueid, sizeof(test_ueid));
|
||||
if (!ASSERT_OK(ret, "ueid cmd"))
|
||||
goto fail;
|
||||
|
||||
ret = recv(fd, &msg, sizeof(msg), 0);
|
||||
if (!ASSERT_FALSE(ret < 0 ||
|
||||
!NLMSG_OK(&msg.n, ret), "ueid response"))
|
||||
goto fail;
|
||||
|
||||
if (msg.n.nlmsg_type == NLMSG_ERROR) {
|
||||
err = NLMSG_DATA(&msg);
|
||||
switch (op) {
|
||||
case SMC_NETLINK_REMOVE_UEID:
|
||||
if (!ASSERT_FALSE((err->error && err->error != -ENOENT),
|
||||
"ueid remove"))
|
||||
goto fail;
|
||||
break;
|
||||
case SMC_NETLINK_ADD_UEID:
|
||||
if (!ASSERT_OK(err->error, "ueid add"))
|
||||
goto fail;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
close(fd);
|
||||
return true;
|
||||
fail:
|
||||
close(fd);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool setup_ueid(void)
|
||||
{
|
||||
/* get smc nl id */
|
||||
if (!get_smc_nl_family_id())
|
||||
return false;
|
||||
/* clear old ueid for bpftest */
|
||||
smc_ueid(SMC_NETLINK_REMOVE_UEID);
|
||||
/* smc-loopback required ueid */
|
||||
return smc_ueid(SMC_NETLINK_ADD_UEID);
|
||||
}
|
||||
|
||||
static void cleanup_ueid(void)
|
||||
{
|
||||
smc_ueid(SMC_NETLINK_REMOVE_UEID);
|
||||
}
|
||||
#endif /* __s390x__ */
|
||||
|
||||
static bool setup_netns(void)
|
||||
{
|
||||
test_netns = netns_new(TEST_NS, true);
|
||||
if (!ASSERT_OK_PTR(test_netns, "open net namespace"))
|
||||
goto fail_netns;
|
||||
|
||||
SYS(fail_ip, "ip addr add 127.0.1.0/8 dev lo");
|
||||
SYS(fail_ip, "ip addr add 127.0.2.0/8 dev lo");
|
||||
|
||||
return true;
|
||||
fail_ip:
|
||||
netns_free(test_netns);
|
||||
fail_netns:
|
||||
return false;
|
||||
}
|
||||
|
||||
static void cleanup_netns(void)
|
||||
{
|
||||
netns_free(test_netns);
|
||||
}
|
||||
|
||||
static bool setup_smc(void)
|
||||
{
|
||||
if (!setup_ueid())
|
||||
return false;
|
||||
|
||||
if (!setup_netns())
|
||||
goto fail_netns;
|
||||
|
||||
return true;
|
||||
fail_netns:
|
||||
cleanup_ueid();
|
||||
return false;
|
||||
}
|
||||
|
||||
static int set_client_addr_cb(int fd, void *opts)
|
||||
{
|
||||
const char *src = (const char *)opts;
|
||||
struct sockaddr_in localaddr;
|
||||
|
||||
localaddr.sin_family = AF_INET;
|
||||
localaddr.sin_port = htons(0);
|
||||
localaddr.sin_addr.s_addr = inet_addr(src);
|
||||
return !ASSERT_OK(bind(fd, &localaddr, sizeof(localaddr)), "client bind");
|
||||
}
|
||||
|
||||
static void run_link(const char *src, const char *dst, int port)
|
||||
{
|
||||
struct network_helper_opts opts = {0};
|
||||
int server, client;
|
||||
|
||||
server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL);
|
||||
if (!ASSERT_OK_FD(server, "start service_1"))
|
||||
return;
|
||||
|
||||
opts.proto = IPPROTO_TCP;
|
||||
opts.post_socket_cb = set_client_addr_cb;
|
||||
opts.cb_opts = (void *)src;
|
||||
|
||||
client = connect_to_fd_opts(server, &opts);
|
||||
if (!ASSERT_OK_FD(client, "start connect"))
|
||||
goto fail_client;
|
||||
|
||||
close(client);
|
||||
fail_client:
|
||||
close(server);
|
||||
}
|
||||
|
||||
static void block_link(int map_fd, const char *src, const char *dst)
|
||||
{
|
||||
struct smc_policy_ip_value val = { .mode = /* block */ 0 };
|
||||
struct smc_policy_ip_key key = {
|
||||
.sip = inet_addr(src),
|
||||
.dip = inet_addr(dst),
|
||||
};
|
||||
|
||||
bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
|
||||
}
|
||||
|
||||
/*
|
||||
* This test describes a real-life service topology as follows:
|
||||
*
|
||||
* +-------------> service_1
|
||||
* link 1 | |
|
||||
* +--------------------> server | link 2
|
||||
* | | V
|
||||
* | +-------------> service_2
|
||||
* | link 3
|
||||
* client -------------------> server_via_unsafe_path -> service_3
|
||||
*
|
||||
* Among them,
|
||||
* 1. link-1 is very suitable for using SMC.
|
||||
* 2. link-2 is not suitable for using SMC, because the mode of this link is
|
||||
* kind of short-link services.
|
||||
* 3. link-3 is also not suitable for using SMC, because the RDMA link is
|
||||
* unavailable and needs to go through a long timeout before it can fallback
|
||||
* to TCP.
|
||||
* To achieve this goal, we use a customized SMC ip strategy via smc_hs_ctrl.
|
||||
*/
|
||||
static void test_topo(void)
|
||||
{
|
||||
struct bpf_smc *skel;
|
||||
int rc, map_fd;
|
||||
|
||||
skel = bpf_smc__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load"))
|
||||
return;
|
||||
|
||||
rc = bpf_smc__attach(skel);
|
||||
if (!ASSERT_OK(rc, "bpf_smc__attach"))
|
||||
goto fail;
|
||||
|
||||
map_fd = bpf_map__fd(skel->maps.smc_policy_ip);
|
||||
if (!ASSERT_OK_FD(map_fd, "bpf_map__fd"))
|
||||
goto fail;
|
||||
|
||||
/* Mock the process of transparent replacement, since we will modify
|
||||
* protocol to ipproto_smc accropding to it via
|
||||
* fmod_ret/update_socket_protocol.
|
||||
*/
|
||||
write_sysctl("/proc/sys/net/smc/hs_ctrl", "linkcheck");
|
||||
|
||||
/* Configure ip strat */
|
||||
block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH);
|
||||
block_link(map_fd, SERVER_IP, SERVER_IP);
|
||||
|
||||
/* should go with smc */
|
||||
run_link(CLIENT_IP, SERVER_IP, SERVICE_1);
|
||||
/* should go with smc fallback */
|
||||
run_link(SERVER_IP, SERVER_IP, SERVICE_2);
|
||||
|
||||
ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count");
|
||||
ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
|
||||
|
||||
/* should go with smc */
|
||||
run_link(CLIENT_IP, SERVER_IP, SERVICE_2);
|
||||
|
||||
ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count");
|
||||
ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
|
||||
|
||||
/* should go with smc fallback */
|
||||
run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3);
|
||||
|
||||
ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count");
|
||||
ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count");
|
||||
|
||||
fail:
|
||||
bpf_smc__destroy(skel);
|
||||
}
|
||||
|
||||
void test_bpf_smc(void)
|
||||
{
|
||||
if (!setup_smc()) {
|
||||
printf("setup for smc test failed, test SKIP:\n");
|
||||
test__skip();
|
||||
return;
|
||||
}
|
||||
|
||||
if (test__start_subtest("topo"))
|
||||
test_topo();
|
||||
|
||||
cleanup_ueid();
|
||||
cleanup_netns();
|
||||
}
|
||||
@@ -124,10 +124,10 @@ static int send_test_packet(int ifindex)
|
||||
int n, sock = -1;
|
||||
__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
|
||||
|
||||
/* The ethernet header is not relevant for this test and doesn't need to
|
||||
* be meaningful.
|
||||
*/
|
||||
struct ethhdr eth = { 0 };
|
||||
/* We use the Ethernet header only to identify the test packet */
|
||||
struct ethhdr eth = {
|
||||
.h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
|
||||
};
|
||||
|
||||
memcpy(packet, ð, sizeof(eth));
|
||||
memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
|
||||
@@ -160,8 +160,16 @@ static int write_test_packet(int tap_fd)
|
||||
__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
|
||||
int n;
|
||||
|
||||
/* The ethernet header doesn't need to be valid for this test */
|
||||
memset(packet, 0, sizeof(struct ethhdr));
|
||||
/* The Ethernet header is mostly not relevant. We use it to identify the
|
||||
* test packet and some BPF helpers we exercise expect to operate on
|
||||
* Ethernet frames carrying IP packets. Pretend that's the case.
|
||||
*/
|
||||
struct ethhdr eth = {
|
||||
.h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
|
||||
.h_proto = htons(ETH_P_IP),
|
||||
};
|
||||
|
||||
memcpy(packet, ð, sizeof(eth));
|
||||
memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN);
|
||||
|
||||
n = write(tap_fd, packet, sizeof(packet));
|
||||
@@ -171,31 +179,19 @@ static int write_test_packet(int tap_fd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void assert_test_result(const struct bpf_map *result_map)
|
||||
static void dump_err_stream(const struct bpf_program *prog)
|
||||
{
|
||||
int err;
|
||||
__u32 map_key = 0;
|
||||
__u8 map_value[TEST_PAYLOAD_LEN];
|
||||
char buf[512];
|
||||
int ret;
|
||||
|
||||
err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key),
|
||||
&map_value, TEST_PAYLOAD_LEN, BPF_ANY);
|
||||
if (!ASSERT_OK(err, "lookup test_result"))
|
||||
return;
|
||||
|
||||
ASSERT_MEMEQ(&map_value, &test_payload, TEST_PAYLOAD_LEN,
|
||||
"test_result map contains test payload");
|
||||
}
|
||||
|
||||
static bool clear_test_result(struct bpf_map *result_map)
|
||||
{
|
||||
const __u8 v[sizeof(test_payload)] = {};
|
||||
const __u32 k = 0;
|
||||
int err;
|
||||
|
||||
err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY);
|
||||
ASSERT_OK(err, "update test_result");
|
||||
|
||||
return err == 0;
|
||||
ret = 0;
|
||||
do {
|
||||
ret = bpf_prog_stream_read(bpf_program__fd(prog),
|
||||
BPF_STREAM_STDERR, buf, sizeof(buf),
|
||||
NULL);
|
||||
if (ret > 0)
|
||||
fwrite(buf, sizeof(buf[0]), ret, stderr);
|
||||
} while (ret > 0);
|
||||
}
|
||||
|
||||
void test_xdp_context_veth(void)
|
||||
@@ -270,11 +266,14 @@ void test_xdp_context_veth(void)
|
||||
if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx"))
|
||||
goto close;
|
||||
|
||||
skel->bss->test_pass = false;
|
||||
|
||||
ret = send_test_packet(tx_ifindex);
|
||||
if (!ASSERT_OK(ret, "send_test_packet"))
|
||||
goto close;
|
||||
|
||||
assert_test_result(skel->maps.test_result);
|
||||
if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass"))
|
||||
dump_err_stream(tc_prog);
|
||||
|
||||
close:
|
||||
close_netns(nstoken);
|
||||
@@ -286,7 +285,7 @@ void test_xdp_context_veth(void)
|
||||
static void test_tuntap(struct bpf_program *xdp_prog,
|
||||
struct bpf_program *tc_prio_1_prog,
|
||||
struct bpf_program *tc_prio_2_prog,
|
||||
struct bpf_map *result_map)
|
||||
bool *test_pass)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
|
||||
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
|
||||
@@ -295,8 +294,7 @@ static void test_tuntap(struct bpf_program *xdp_prog,
|
||||
int tap_ifindex;
|
||||
int ret;
|
||||
|
||||
if (!clear_test_result(result_map))
|
||||
return;
|
||||
*test_pass = false;
|
||||
|
||||
ns = netns_new(TAP_NETNS, true);
|
||||
if (!ASSERT_OK_PTR(ns, "create and open ns"))
|
||||
@@ -340,7 +338,8 @@ static void test_tuntap(struct bpf_program *xdp_prog,
|
||||
if (!ASSERT_OK(ret, "write_test_packet"))
|
||||
goto close;
|
||||
|
||||
assert_test_result(result_map);
|
||||
if (!ASSERT_TRUE(*test_pass, "test_pass"))
|
||||
dump_err_stream(tc_prio_2_prog ? : tc_prio_1_prog);
|
||||
|
||||
close:
|
||||
if (tap_fd >= 0)
|
||||
@@ -411,7 +410,8 @@ static void test_tuntap_mirred(struct bpf_program *xdp_prog,
|
||||
if (!ASSERT_OK(ret, "write_test_packet"))
|
||||
goto close;
|
||||
|
||||
ASSERT_TRUE(*test_pass, "test_pass");
|
||||
if (!ASSERT_TRUE(*test_pass, "test_pass"))
|
||||
dump_err_stream(tc_prog);
|
||||
|
||||
close:
|
||||
if (tap_fd >= 0)
|
||||
@@ -431,61 +431,82 @@ void test_xdp_context_tuntap(void)
|
||||
test_tuntap(skel->progs.ing_xdp,
|
||||
skel->progs.ing_cls,
|
||||
NULL, /* tc prio 2 */
|
||||
skel->maps.test_result);
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("dynptr_read"))
|
||||
test_tuntap(skel->progs.ing_xdp,
|
||||
skel->progs.ing_cls_dynptr_read,
|
||||
NULL, /* tc prio 2 */
|
||||
skel->maps.test_result);
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("dynptr_slice"))
|
||||
test_tuntap(skel->progs.ing_xdp,
|
||||
skel->progs.ing_cls_dynptr_slice,
|
||||
NULL, /* tc prio 2 */
|
||||
skel->maps.test_result);
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("dynptr_write"))
|
||||
test_tuntap(skel->progs.ing_xdp_zalloc_meta,
|
||||
skel->progs.ing_cls_dynptr_write,
|
||||
skel->progs.ing_cls_dynptr_read,
|
||||
skel->maps.test_result);
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("dynptr_slice_rdwr"))
|
||||
test_tuntap(skel->progs.ing_xdp_zalloc_meta,
|
||||
skel->progs.ing_cls_dynptr_slice_rdwr,
|
||||
skel->progs.ing_cls_dynptr_slice,
|
||||
skel->maps.test_result);
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("dynptr_offset"))
|
||||
test_tuntap(skel->progs.ing_xdp_zalloc_meta,
|
||||
skel->progs.ing_cls_dynptr_offset_wr,
|
||||
skel->progs.ing_cls_dynptr_offset_rd,
|
||||
skel->maps.test_result);
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("dynptr_offset_oob"))
|
||||
test_tuntap(skel->progs.ing_xdp,
|
||||
skel->progs.ing_cls_dynptr_offset_oob,
|
||||
skel->progs.ing_cls,
|
||||
skel->maps.test_result);
|
||||
if (test__start_subtest("clone_data_meta_empty_on_data_write"))
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("clone_data_meta_survives_data_write"))
|
||||
test_tuntap_mirred(skel->progs.ing_xdp,
|
||||
skel->progs.clone_data_meta_empty_on_data_write,
|
||||
skel->progs.clone_data_meta_survives_data_write,
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("clone_data_meta_empty_on_meta_write"))
|
||||
if (test__start_subtest("clone_data_meta_survives_meta_write"))
|
||||
test_tuntap_mirred(skel->progs.ing_xdp,
|
||||
skel->progs.clone_data_meta_empty_on_meta_write,
|
||||
skel->progs.clone_data_meta_survives_meta_write,
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("clone_dynptr_empty_on_data_slice_write"))
|
||||
if (test__start_subtest("clone_meta_dynptr_survives_data_slice_write"))
|
||||
test_tuntap_mirred(skel->progs.ing_xdp,
|
||||
skel->progs.clone_dynptr_empty_on_data_slice_write,
|
||||
skel->progs.clone_meta_dynptr_survives_data_slice_write,
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write"))
|
||||
if (test__start_subtest("clone_meta_dynptr_survives_meta_slice_write"))
|
||||
test_tuntap_mirred(skel->progs.ing_xdp,
|
||||
skel->progs.clone_dynptr_empty_on_meta_slice_write,
|
||||
skel->progs.clone_meta_dynptr_survives_meta_slice_write,
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write"))
|
||||
if (test__start_subtest("clone_meta_dynptr_rw_before_data_dynptr_write"))
|
||||
test_tuntap_mirred(skel->progs.ing_xdp,
|
||||
skel->progs.clone_dynptr_rdonly_before_data_dynptr_write,
|
||||
skel->progs.clone_meta_dynptr_rw_before_data_dynptr_write,
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write"))
|
||||
if (test__start_subtest("clone_meta_dynptr_rw_before_meta_dynptr_write"))
|
||||
test_tuntap_mirred(skel->progs.ing_xdp,
|
||||
skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write,
|
||||
skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write,
|
||||
&skel->bss->test_pass);
|
||||
/* Tests for BPF helpers which touch headroom */
|
||||
if (test__start_subtest("helper_skb_vlan_push_pop"))
|
||||
test_tuntap(skel->progs.ing_xdp,
|
||||
skel->progs.helper_skb_vlan_push_pop,
|
||||
NULL, /* tc prio 2 */
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("helper_skb_adjust_room"))
|
||||
test_tuntap(skel->progs.ing_xdp,
|
||||
skel->progs.helper_skb_adjust_room,
|
||||
NULL, /* tc prio 2 */
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("helper_skb_change_head_tail"))
|
||||
test_tuntap(skel->progs.ing_xdp,
|
||||
skel->progs.helper_skb_change_head_tail,
|
||||
NULL, /* tc prio 2 */
|
||||
&skel->bss->test_pass);
|
||||
if (test__start_subtest("helper_skb_change_proto"))
|
||||
test_tuntap(skel->progs.ing_xdp,
|
||||
skel->progs.helper_skb_change_proto,
|
||||
NULL, /* tc prio 2 */
|
||||
&skel->bss->test_pass);
|
||||
|
||||
test_xdp_meta__destroy(skel);
|
||||
}
|
||||
|
||||
117
tools/testing/selftests/bpf/progs/bpf_smc.c
Normal file
117
tools/testing/selftests/bpf/progs/bpf_smc.c
Normal file
@@ -0,0 +1,117 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "vmlinux.h"
|
||||
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include "bpf_tracing_net.h"
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
enum {
|
||||
BPF_SMC_LISTEN = 10,
|
||||
};
|
||||
|
||||
struct smc_sock___local {
|
||||
struct sock sk;
|
||||
struct smc_sock *listen_smc;
|
||||
bool use_fallback;
|
||||
} __attribute__((preserve_access_index));
|
||||
|
||||
int smc_cnt = 0;
|
||||
int fallback_cnt = 0;
|
||||
|
||||
SEC("fentry/smc_release")
|
||||
int BPF_PROG(bpf_smc_release, struct socket *sock)
|
||||
{
|
||||
/* only count from one side (client) */
|
||||
if (sock->sk->__sk_common.skc_state == BPF_SMC_LISTEN)
|
||||
return 0;
|
||||
smc_cnt++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fentry/smc_switch_to_fallback")
|
||||
int BPF_PROG(bpf_smc_switch_to_fallback, struct smc_sock___local *smc)
|
||||
{
|
||||
/* only count from one side (client) */
|
||||
if (smc && !smc->listen_smc)
|
||||
fallback_cnt++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* go with default value if no strat was found */
|
||||
bool default_ip_strat_value = true;
|
||||
|
||||
struct smc_policy_ip_key {
|
||||
__u32 sip;
|
||||
__u32 dip;
|
||||
};
|
||||
|
||||
struct smc_policy_ip_value {
|
||||
__u8 mode;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(key_size, sizeof(struct smc_policy_ip_key));
|
||||
__uint(value_size, sizeof(struct smc_policy_ip_value));
|
||||
__uint(max_entries, 128);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
} smc_policy_ip SEC(".maps");
|
||||
|
||||
static bool smc_check(__u32 src, __u32 dst)
|
||||
{
|
||||
struct smc_policy_ip_value *value;
|
||||
struct smc_policy_ip_key key = {
|
||||
.sip = src,
|
||||
.dip = dst,
|
||||
};
|
||||
|
||||
value = bpf_map_lookup_elem(&smc_policy_ip, &key);
|
||||
return value ? value->mode : default_ip_strat_value;
|
||||
}
|
||||
|
||||
SEC("fmod_ret/update_socket_protocol")
|
||||
int BPF_PROG(smc_run, int family, int type, int protocol)
|
||||
{
|
||||
struct task_struct *task;
|
||||
|
||||
if (family != AF_INET && family != AF_INET6)
|
||||
return protocol;
|
||||
|
||||
if ((type & 0xf) != SOCK_STREAM)
|
||||
return protocol;
|
||||
|
||||
if (protocol != 0 && protocol != IPPROTO_TCP)
|
||||
return protocol;
|
||||
|
||||
task = bpf_get_current_task_btf();
|
||||
/* Prevent from affecting other tests */
|
||||
if (!task || !task->nsproxy->net_ns->smc.hs_ctrl)
|
||||
return protocol;
|
||||
|
||||
return IPPROTO_SMC;
|
||||
}
|
||||
|
||||
SEC("struct_ops")
|
||||
int BPF_PROG(bpf_smc_set_tcp_option_cond, const struct tcp_sock *tp,
|
||||
struct inet_request_sock *ireq)
|
||||
{
|
||||
return smc_check(ireq->req.__req_common.skc_daddr,
|
||||
ireq->req.__req_common.skc_rcv_saddr);
|
||||
}
|
||||
|
||||
SEC("struct_ops")
|
||||
int BPF_PROG(bpf_smc_set_tcp_option, struct tcp_sock *tp)
|
||||
{
|
||||
return smc_check(tp->inet_conn.icsk_inet.sk.__sk_common.skc_rcv_saddr,
|
||||
tp->inet_conn.icsk_inet.sk.__sk_common.skc_daddr);
|
||||
}
|
||||
|
||||
SEC(".struct_ops")
|
||||
struct smc_hs_ctrl linkcheck = {
|
||||
.name = "linkcheck",
|
||||
.syn_option = (void *)bpf_smc_set_tcp_option,
|
||||
.synack_option = (void *)bpf_smc_set_tcp_option_cond,
|
||||
};
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/pkt_cls.h>
|
||||
|
||||
#include <bpf/bpf_endian.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include "bpf_kfuncs.h"
|
||||
|
||||
@@ -11,37 +12,72 @@
|
||||
|
||||
#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
|
||||
|
||||
/* Demonstrates how metadata can be passed from an XDP program to a TC program
|
||||
* using bpf_xdp_adjust_meta.
|
||||
* For the sake of testing the metadata support in drivers, the XDP program uses
|
||||
* a fixed-size payload after the Ethernet header as metadata. The TC program
|
||||
* copies the metadata it receives into a map so it can be checked from
|
||||
* userspace.
|
||||
/* Demonstrate passing metadata from XDP to TC using bpf_xdp_adjust_meta.
|
||||
*
|
||||
* The XDP program extracts a fixed-size payload following the Ethernet header
|
||||
* and stores it as packet metadata to test the driver's metadata support. The
|
||||
* TC program then verifies if the passed metadata is correct.
|
||||
*/
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, __u32);
|
||||
__uint(value_size, META_SIZE);
|
||||
} test_result SEC(".maps");
|
||||
|
||||
bool test_pass;
|
||||
|
||||
static const __u8 smac_want[ETH_ALEN] = {
|
||||
0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF,
|
||||
};
|
||||
|
||||
static const __u8 meta_want[META_SIZE] = {
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
|
||||
0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
|
||||
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
|
||||
0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
|
||||
};
|
||||
|
||||
static bool check_smac(const struct ethhdr *eth)
|
||||
{
|
||||
return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN);
|
||||
}
|
||||
|
||||
static bool check_metadata(const char *file, int line, __u8 *meta_have)
|
||||
{
|
||||
if (!__builtin_memcmp(meta_have, meta_want, META_SIZE))
|
||||
return true;
|
||||
|
||||
bpf_stream_printk(BPF_STREAM_STDERR,
|
||||
"FAIL:%s:%d: metadata mismatch\n"
|
||||
" have:\n %pI6\n %pI6\n"
|
||||
" want:\n %pI6\n %pI6\n",
|
||||
file, line,
|
||||
&meta_have[0x00], &meta_have[0x10],
|
||||
&meta_want[0x00], &meta_want[0x10]);
|
||||
return false;
|
||||
}
|
||||
|
||||
#define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have)
|
||||
|
||||
static bool check_skb_metadata(const char *file, int line, struct __sk_buff *skb)
|
||||
{
|
||||
__u8 *data_meta = ctx_ptr(skb, data_meta);
|
||||
__u8 *data = ctx_ptr(skb, data);
|
||||
|
||||
return data_meta + META_SIZE <= data && (check_metadata)(file, line, data_meta);
|
||||
}
|
||||
|
||||
#define check_skb_metadata(skb) check_skb_metadata(__FILE__, __LINE__, skb)
|
||||
|
||||
SEC("tc")
|
||||
int ing_cls(struct __sk_buff *ctx)
|
||||
{
|
||||
__u8 *data, *data_meta;
|
||||
__u32 key = 0;
|
||||
__u8 *meta_have = ctx_ptr(ctx, data_meta);
|
||||
__u8 *data = ctx_ptr(ctx, data);
|
||||
|
||||
data_meta = ctx_ptr(ctx, data_meta);
|
||||
data = ctx_ptr(ctx, data);
|
||||
if (meta_have + META_SIZE > data)
|
||||
goto out;
|
||||
|
||||
if (data_meta + META_SIZE > data)
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY);
|
||||
if (!check_metadata(meta_have))
|
||||
goto out;
|
||||
|
||||
test_pass = true;
|
||||
out:
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
@@ -49,17 +85,17 @@ int ing_cls(struct __sk_buff *ctx)
|
||||
SEC("tc")
|
||||
int ing_cls_dynptr_read(struct __sk_buff *ctx)
|
||||
{
|
||||
__u8 meta_have[META_SIZE];
|
||||
struct bpf_dynptr meta;
|
||||
const __u32 zero = 0;
|
||||
__u8 *dst;
|
||||
|
||||
dst = bpf_map_lookup_elem(&test_result, &zero);
|
||||
if (!dst)
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
|
||||
bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0);
|
||||
bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
|
||||
|
||||
if (!check_metadata(meta_have))
|
||||
goto out;
|
||||
|
||||
test_pass = true;
|
||||
out:
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
@@ -86,20 +122,18 @@ SEC("tc")
|
||||
int ing_cls_dynptr_slice(struct __sk_buff *ctx)
|
||||
{
|
||||
struct bpf_dynptr meta;
|
||||
const __u32 zero = 0;
|
||||
__u8 *dst, *src;
|
||||
|
||||
dst = bpf_map_lookup_elem(&test_result, &zero);
|
||||
if (!dst)
|
||||
return TC_ACT_SHOT;
|
||||
__u8 *meta_have;
|
||||
|
||||
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
|
||||
src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
|
||||
if (!src)
|
||||
return TC_ACT_SHOT;
|
||||
meta_have = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
|
||||
if (!meta_have)
|
||||
goto out;
|
||||
|
||||
__builtin_memcpy(dst, src, META_SIZE);
|
||||
if (!check_metadata(meta_have))
|
||||
goto out;
|
||||
|
||||
test_pass = true;
|
||||
out:
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
@@ -129,14 +163,12 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
|
||||
SEC("tc")
|
||||
int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
|
||||
{
|
||||
struct bpf_dynptr meta;
|
||||
const __u32 chunk_len = META_SIZE / 4;
|
||||
const __u32 zero = 0;
|
||||
__u8 meta_have[META_SIZE];
|
||||
struct bpf_dynptr meta;
|
||||
__u8 *dst, *src;
|
||||
|
||||
dst = bpf_map_lookup_elem(&test_result, &zero);
|
||||
if (!dst)
|
||||
return TC_ACT_SHOT;
|
||||
dst = meta_have;
|
||||
|
||||
/* 1. Regular read */
|
||||
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
|
||||
@@ -155,9 +187,14 @@ int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
|
||||
/* 4. Read from a slice starting at an offset */
|
||||
src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len);
|
||||
if (!src)
|
||||
return TC_ACT_SHOT;
|
||||
goto out;
|
||||
__builtin_memcpy(dst, src, chunk_len);
|
||||
|
||||
if (!check_metadata(meta_have))
|
||||
goto out;
|
||||
|
||||
test_pass = true;
|
||||
out:
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
@@ -254,7 +291,7 @@ int ing_xdp_zalloc_meta(struct xdp_md *ctx)
|
||||
/* Drop any non-test packets */
|
||||
if (eth + 1 > ctx_ptr(ctx, data_end))
|
||||
return XDP_DROP;
|
||||
if (eth->h_proto != 0)
|
||||
if (!check_smac(eth))
|
||||
return XDP_DROP;
|
||||
|
||||
ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
|
||||
@@ -294,9 +331,9 @@ int ing_xdp(struct xdp_md *ctx)
|
||||
|
||||
/* The Linux networking stack may send other packets on the test
|
||||
* interface that interfere with the test. Just drop them.
|
||||
* The test packets can be recognized by their ethertype of zero.
|
||||
* The test packets can be recognized by their source MAC address.
|
||||
*/
|
||||
if (eth->h_proto != 0)
|
||||
if (!check_smac(eth))
|
||||
return XDP_DROP;
|
||||
|
||||
__builtin_memcpy(data_meta, payload, META_SIZE);
|
||||
@@ -304,22 +341,25 @@ int ing_xdp(struct xdp_md *ctx)
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that skb->data_meta..skb->data is empty if prog writes to packet
|
||||
* _payload_ using packet pointers. Applies only to cloned skbs.
|
||||
* Check that, when operating on a cloned packet, skb->data_meta..skb->data is
|
||||
* kept intact if prog writes to packet _payload_ using packet pointers.
|
||||
*/
|
||||
SEC("tc")
|
||||
int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx)
|
||||
int clone_data_meta_survives_data_write(struct __sk_buff *ctx)
|
||||
{
|
||||
__u8 *meta_have = ctx_ptr(ctx, data_meta);
|
||||
struct ethhdr *eth = ctx_ptr(ctx, data);
|
||||
|
||||
if (eth + 1 > ctx_ptr(ctx, data_end))
|
||||
goto out;
|
||||
/* Ignore non-test packets */
|
||||
if (eth->h_proto != 0)
|
||||
if (!check_smac(eth))
|
||||
goto out;
|
||||
|
||||
/* Expect no metadata */
|
||||
if (ctx->data_meta != ctx->data)
|
||||
if (meta_have + META_SIZE > eth)
|
||||
goto out;
|
||||
|
||||
if (!check_metadata(meta_have))
|
||||
goto out;
|
||||
|
||||
/* Packet write to trigger unclone in prologue */
|
||||
@@ -331,40 +371,44 @@ int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that skb->data_meta..skb->data is empty if prog writes to packet
|
||||
* _metadata_ using packet pointers. Applies only to cloned skbs.
|
||||
* Check that, when operating on a cloned packet, skb->data_meta..skb->data is
|
||||
* kept intact if prog writes to packet _metadata_ using packet pointers.
|
||||
*/
|
||||
SEC("tc")
|
||||
int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx)
|
||||
int clone_data_meta_survives_meta_write(struct __sk_buff *ctx)
|
||||
{
|
||||
__u8 *meta_have = ctx_ptr(ctx, data_meta);
|
||||
struct ethhdr *eth = ctx_ptr(ctx, data);
|
||||
__u8 *md = ctx_ptr(ctx, data_meta);
|
||||
|
||||
if (eth + 1 > ctx_ptr(ctx, data_end))
|
||||
goto out;
|
||||
/* Ignore non-test packets */
|
||||
if (eth->h_proto != 0)
|
||||
if (!check_smac(eth))
|
||||
goto out;
|
||||
|
||||
if (md + 1 > ctx_ptr(ctx, data)) {
|
||||
/* Expect no metadata */
|
||||
test_pass = true;
|
||||
} else {
|
||||
/* Metadata write to trigger unclone in prologue */
|
||||
*md = 42;
|
||||
}
|
||||
if (meta_have + META_SIZE > eth)
|
||||
goto out;
|
||||
|
||||
if (!check_metadata(meta_have))
|
||||
goto out;
|
||||
|
||||
/* Metadata write to trigger unclone in prologue */
|
||||
*meta_have = 42;
|
||||
|
||||
test_pass = true;
|
||||
out:
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that skb_meta dynptr is writable but empty if prog writes to packet
|
||||
* _payload_ using a dynptr slice. Applies only to cloned skbs.
|
||||
* Check that, when operating on a cloned packet, metadata remains intact if
|
||||
* prog creates a r/w slice to packet _payload_.
|
||||
*/
|
||||
SEC("tc")
|
||||
int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx)
|
||||
int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx)
|
||||
{
|
||||
struct bpf_dynptr data, meta;
|
||||
__u8 meta_have[META_SIZE];
|
||||
struct ethhdr *eth;
|
||||
|
||||
bpf_dynptr_from_skb(ctx, 0, &data);
|
||||
@@ -372,51 +416,45 @@ int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx)
|
||||
if (!eth)
|
||||
goto out;
|
||||
/* Ignore non-test packets */
|
||||
if (eth->h_proto != 0)
|
||||
if (!check_smac(eth))
|
||||
goto out;
|
||||
|
||||
/* Expect no metadata */
|
||||
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
|
||||
if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
|
||||
bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
|
||||
if (!check_metadata(meta_have))
|
||||
goto out;
|
||||
|
||||
/* Packet write to trigger unclone in prologue */
|
||||
eth->h_proto = 42;
|
||||
|
||||
test_pass = true;
|
||||
out:
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that skb_meta dynptr is writable but empty if prog writes to packet
|
||||
* _metadata_ using a dynptr slice. Applies only to cloned skbs.
|
||||
* Check that, when operating on a cloned packet, metadata remains intact if
|
||||
* prog creates an r/w slice to packet _metadata_.
|
||||
*/
|
||||
SEC("tc")
|
||||
int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx)
|
||||
int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx)
|
||||
{
|
||||
struct bpf_dynptr data, meta;
|
||||
const struct ethhdr *eth;
|
||||
__u8 *md;
|
||||
__u8 *meta_have;
|
||||
|
||||
bpf_dynptr_from_skb(ctx, 0, &data);
|
||||
eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
|
||||
if (!eth)
|
||||
goto out;
|
||||
/* Ignore non-test packets */
|
||||
if (eth->h_proto != 0)
|
||||
if (!check_smac(eth))
|
||||
goto out;
|
||||
|
||||
/* Expect no metadata */
|
||||
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
|
||||
if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
|
||||
meta_have = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
|
||||
if (!meta_have)
|
||||
goto out;
|
||||
|
||||
/* Metadata write to trigger unclone in prologue */
|
||||
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
|
||||
md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
|
||||
if (md)
|
||||
*md = 42;
|
||||
if (!check_metadata(meta_have))
|
||||
goto out;
|
||||
|
||||
test_pass = true;
|
||||
out:
|
||||
@@ -424,34 +462,40 @@ int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that skb_meta dynptr is read-only before prog writes to packet payload
|
||||
* using dynptr_write helper. Applies only to cloned skbs.
|
||||
* Check that, when operating on a cloned packet, skb_meta dynptr is read-write
|
||||
* before prog writes to packet _payload_ using dynptr_write helper and metadata
|
||||
* remains intact before and after the write.
|
||||
*/
|
||||
SEC("tc")
|
||||
int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx)
|
||||
int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx)
|
||||
{
|
||||
struct bpf_dynptr data, meta;
|
||||
__u8 meta_have[META_SIZE];
|
||||
const struct ethhdr *eth;
|
||||
int err;
|
||||
|
||||
bpf_dynptr_from_skb(ctx, 0, &data);
|
||||
eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
|
||||
if (!eth)
|
||||
goto out;
|
||||
/* Ignore non-test packets */
|
||||
if (eth->h_proto != 0)
|
||||
if (!check_smac(eth))
|
||||
goto out;
|
||||
|
||||
/* Expect read-only metadata before unclone */
|
||||
/* Expect read-write metadata before unclone */
|
||||
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
|
||||
if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
|
||||
if (bpf_dynptr_is_rdonly(&meta))
|
||||
goto out;
|
||||
|
||||
err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
|
||||
if (err || !check_metadata(meta_have))
|
||||
goto out;
|
||||
|
||||
/* Helper write to payload will unclone the packet */
|
||||
bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);
|
||||
|
||||
/* Expect no metadata after unclone */
|
||||
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
|
||||
if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0)
|
||||
err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
|
||||
if (err || !check_metadata(meta_have))
|
||||
goto out;
|
||||
|
||||
test_pass = true;
|
||||
@@ -460,31 +504,165 @@ int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that skb_meta dynptr is read-only if prog writes to packet
|
||||
* metadata using dynptr_write helper. Applies only to cloned skbs.
|
||||
* Check that, when operating on a cloned packet, skb_meta dynptr is read-write
|
||||
* before prog writes to packet _metadata_ using dynptr_write helper and
|
||||
* metadata remains intact before and after the write.
|
||||
*/
|
||||
SEC("tc")
|
||||
int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx)
|
||||
int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx)
|
||||
{
|
||||
struct bpf_dynptr data, meta;
|
||||
__u8 meta_have[META_SIZE];
|
||||
const struct ethhdr *eth;
|
||||
int err;
|
||||
|
||||
bpf_dynptr_from_skb(ctx, 0, &data);
|
||||
eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
|
||||
if (!eth)
|
||||
goto out;
|
||||
/* Ignore non-test packets */
|
||||
if (eth->h_proto != 0)
|
||||
if (!check_smac(eth))
|
||||
goto out;
|
||||
|
||||
/* Expect read-only metadata */
|
||||
/* Expect read-write metadata before unclone */
|
||||
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
|
||||
if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
|
||||
if (bpf_dynptr_is_rdonly(&meta))
|
||||
goto out;
|
||||
|
||||
/* Metadata write. Expect failure. */
|
||||
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
|
||||
if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL)
|
||||
err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
|
||||
if (err || !check_metadata(meta_have))
|
||||
goto out;
|
||||
|
||||
/* Helper write to metadata will unclone the packet */
|
||||
bpf_dynptr_write(&meta, 0, &meta_have[0], 1, 0);
|
||||
|
||||
err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
|
||||
if (err || !check_metadata(meta_have))
|
||||
goto out;
|
||||
|
||||
test_pass = true;
|
||||
out:
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
SEC("tc")
|
||||
int helper_skb_vlan_push_pop(struct __sk_buff *ctx)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* bpf_skb_vlan_push assumes HW offload for primary VLAN tag. Only
|
||||
* secondary tag push triggers an actual MAC header modification.
|
||||
*/
|
||||
err = bpf_skb_vlan_push(ctx, 0, 42);
|
||||
if (err)
|
||||
goto out;
|
||||
err = bpf_skb_vlan_push(ctx, 0, 207);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (!check_skb_metadata(ctx))
|
||||
goto out;
|
||||
|
||||
err = bpf_skb_vlan_pop(ctx);
|
||||
if (err)
|
||||
goto out;
|
||||
err = bpf_skb_vlan_pop(ctx);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (!check_skb_metadata(ctx))
|
||||
goto out;
|
||||
|
||||
test_pass = true;
|
||||
out:
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
SEC("tc")
|
||||
int helper_skb_adjust_room(struct __sk_buff *ctx)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* Grow a 1 byte hole after the MAC header */
|
||||
err = bpf_skb_adjust_room(ctx, 1, BPF_ADJ_ROOM_MAC, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (!check_skb_metadata(ctx))
|
||||
goto out;
|
||||
|
||||
/* Shrink a 1 byte hole after the MAC header */
|
||||
err = bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_MAC, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (!check_skb_metadata(ctx))
|
||||
goto out;
|
||||
|
||||
/* Grow a 256 byte hole to trigger head reallocation */
|
||||
err = bpf_skb_adjust_room(ctx, 256, BPF_ADJ_ROOM_MAC, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (!check_skb_metadata(ctx))
|
||||
goto out;
|
||||
|
||||
test_pass = true;
|
||||
out:
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
SEC("tc")
|
||||
int helper_skb_change_head_tail(struct __sk_buff *ctx)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* Reserve 1 extra in the front for packet data */
|
||||
err = bpf_skb_change_head(ctx, 1, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (!check_skb_metadata(ctx))
|
||||
goto out;
|
||||
|
||||
/* Reserve 256 extra bytes in the front to trigger head reallocation */
|
||||
err = bpf_skb_change_head(ctx, 256, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (!check_skb_metadata(ctx))
|
||||
goto out;
|
||||
|
||||
/* Reserve 4k extra bytes in the back to trigger head reallocation */
|
||||
err = bpf_skb_change_tail(ctx, ctx->len + 4096, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (!check_skb_metadata(ctx))
|
||||
goto out;
|
||||
|
||||
test_pass = true;
|
||||
out:
|
||||
return TC_ACT_SHOT;
|
||||
}
|
||||
|
||||
SEC("tc")
|
||||
int helper_skb_change_proto(struct __sk_buff *ctx)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (!check_skb_metadata(ctx))
|
||||
goto out;
|
||||
|
||||
err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IP), 0);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (!check_skb_metadata(ctx))
|
||||
goto out;
|
||||
|
||||
test_pass = true;
|
||||
|
||||
Reference in New Issue
Block a user