Merge branch 'netkit: Add option for scrubbing skb meta data'

Daniel Borkmann says:

=====================
This series is to add a NETKIT_SCRUB_NONE mode such that
the netkit device will not scrub the skb->{mark, priority} before
running the netkit bpf prog. This will allow the netkit bpf prog to
implement different policies based on the skb->{mark, priority}.

The default mode NETKIT_SCRUB_DEFAULT will always scrub
the skb->{mark, priority} before calling the netkit bpf prog. This
is the existing behavior of the netkit device and this change
will not affect the existing netkit users.
=====================

Link: https://lore.kernel.org/r/20241004101335.117711-1-daniel@iogearbox.net
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
This commit is contained in:
Martin KaFai Lau
2024-10-07 18:42:40 -07:00
6 changed files with 736 additions and 44 deletions

View File

@@ -920,6 +920,13 @@ definitions:
- name: l2
- name: l3
-
name: netkit-scrub
type: enum
entries:
- name: none
- name: default
attribute-sets:
-
name: link-attrs
@@ -2147,6 +2154,14 @@ attribute-sets:
name: mode
type: u32
enum: netkit-mode
-
name: scrub
type: u32
enum: netkit-scrub
-
name: peer-scrub
type: u32
enum: netkit-scrub
sub-messages:
-

View File

@@ -20,6 +20,7 @@ struct netkit {
struct net_device __rcu *peer;
struct bpf_mprog_entry __rcu *active;
enum netkit_action policy;
enum netkit_scrub scrub;
struct bpf_mprog_bundle bundle;
/* Needed in slow-path */
@@ -50,12 +51,24 @@ netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
return ret;
}
static void netkit_prep_forward(struct sk_buff *skb, bool xnet)
static void netkit_xnet(struct sk_buff *skb)
{
skb_scrub_packet(skb, xnet);
skb->priority = 0;
skb->mark = 0;
}
static void netkit_prep_forward(struct sk_buff *skb,
bool xnet, bool xnet_scrub)
{
skb_scrub_packet(skb, false);
nf_skip_egress(skb, true);
skb_reset_mac_header(skb);
if (!xnet)
return;
ipvs_reset(skb);
skb_clear_tstamp(skb);
if (xnet_scrub)
netkit_xnet(skb);
}
static struct netkit *netkit_priv(const struct net_device *dev)
@@ -80,7 +93,8 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
!pskb_may_pull(skb, ETH_HLEN) ||
skb_orphan_frags(skb, GFP_ATOMIC)))
goto drop;
netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)));
netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)),
nk->scrub);
eth_skb_pkt_type(skb, peer);
skb->dev = peer;
entry = rcu_dereference(nk->active);
@@ -297,20 +311,6 @@ static int netkit_check_policy(int policy, struct nlattr *tb,
}
}
static int netkit_check_mode(int mode, struct nlattr *tb,
struct netlink_ext_ack *extack)
{
switch (mode) {
case NETKIT_L2:
case NETKIT_L3:
return 0;
default:
NL_SET_ERR_MSG_ATTR(extack, tb,
"Provided device mode can only be L2 or L3");
return -EINVAL;
}
}
static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
@@ -332,8 +332,10 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr;
enum netkit_action default_prim = NETKIT_PASS;
enum netkit_action default_peer = NETKIT_PASS;
enum netkit_action policy_prim = NETKIT_PASS;
enum netkit_action policy_peer = NETKIT_PASS;
enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT;
enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT;
enum netkit_mode mode = NETKIT_L3;
unsigned char ifname_assign_type;
struct ifinfomsg *ifmp = NULL;
@@ -344,13 +346,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
int err;
if (data) {
if (data[IFLA_NETKIT_MODE]) {
attr = data[IFLA_NETKIT_MODE];
mode = nla_get_u32(attr);
err = netkit_check_mode(mode, attr, extack);
if (err < 0)
return err;
}
if (data[IFLA_NETKIT_MODE])
mode = nla_get_u32(data[IFLA_NETKIT_MODE]);
if (data[IFLA_NETKIT_PEER_INFO]) {
attr = data[IFLA_NETKIT_PEER_INFO];
ifmp = nla_data(attr);
@@ -362,17 +359,21 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
return err;
tbp = peer_tb;
}
if (data[IFLA_NETKIT_SCRUB])
scrub_prim = nla_get_u32(data[IFLA_NETKIT_SCRUB]);
if (data[IFLA_NETKIT_PEER_SCRUB])
scrub_peer = nla_get_u32(data[IFLA_NETKIT_PEER_SCRUB]);
if (data[IFLA_NETKIT_POLICY]) {
attr = data[IFLA_NETKIT_POLICY];
default_prim = nla_get_u32(attr);
err = netkit_check_policy(default_prim, attr, extack);
policy_prim = nla_get_u32(attr);
err = netkit_check_policy(policy_prim, attr, extack);
if (err < 0)
return err;
}
if (data[IFLA_NETKIT_PEER_POLICY]) {
attr = data[IFLA_NETKIT_PEER_POLICY];
default_peer = nla_get_u32(attr);
err = netkit_check_policy(default_peer, attr, extack);
policy_peer = nla_get_u32(attr);
err = netkit_check_policy(policy_peer, attr, extack);
if (err < 0)
return err;
}
@@ -409,7 +410,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
nk = netkit_priv(peer);
nk->primary = false;
nk->policy = default_peer;
nk->policy = policy_peer;
nk->scrub = scrub_peer;
nk->mode = mode;
bpf_mprog_bundle_init(&nk->bundle);
@@ -434,7 +436,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
nk = netkit_priv(dev);
nk->primary = true;
nk->policy = default_prim;
nk->policy = policy_prim;
nk->scrub = scrub_prim;
nk->mode = mode;
bpf_mprog_bundle_init(&nk->bundle);
@@ -874,6 +877,18 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
return -EACCES;
}
if (data[IFLA_NETKIT_SCRUB]) {
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_SCRUB],
"netkit scrubbing cannot be changed after device creation");
return -EACCES;
}
if (data[IFLA_NETKIT_PEER_SCRUB]) {
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_SCRUB],
"netkit scrubbing cannot be changed after device creation");
return -EACCES;
}
if (data[IFLA_NETKIT_PEER_INFO]) {
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO],
"netkit peer info cannot be changed after device creation");
@@ -908,8 +923,10 @@ static size_t netkit_get_size(const struct net_device *dev)
{
return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */
nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_SCRUB */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */
nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */
0;
}
@@ -924,11 +941,15 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
return -EMSGSIZE;
if (nla_put_u32(skb, IFLA_NETKIT_MODE, nk->mode))
return -EMSGSIZE;
if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub))
return -EMSGSIZE;
if (peer) {
nk = netkit_priv(peer);
if (nla_put_u32(skb, IFLA_NETKIT_PEER_POLICY, nk->policy))
return -EMSGSIZE;
if (nla_put_u32(skb, IFLA_NETKIT_PEER_SCRUB, nk->scrub))
return -EMSGSIZE;
}
return 0;
@@ -936,9 +957,11 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
[IFLA_NETKIT_PEER_INFO] = { .len = sizeof(struct ifinfomsg) },
[IFLA_NETKIT_MODE] = NLA_POLICY_MAX(NLA_U32, NETKIT_L3),
[IFLA_NETKIT_POLICY] = { .type = NLA_U32 },
[IFLA_NETKIT_MODE] = { .type = NLA_U32 },
[IFLA_NETKIT_PEER_POLICY] = { .type = NLA_U32 },
[IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
[IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
[IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT,
.reject_message = "Primary attribute is read-only" },
};

View File

@@ -1292,6 +1292,19 @@ enum netkit_mode {
NETKIT_L3,
};
/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to
* the BPF program if attached. This also means the latter can
* consume the two fields if they were populated earlier.
*
* NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before
* invoking the attached BPF program when the peer device resides
* in a different network namespace. This is the default behavior.
*/
enum netkit_scrub {
NETKIT_SCRUB_NONE,
NETKIT_SCRUB_DEFAULT,
};
enum {
IFLA_NETKIT_UNSPEC,
IFLA_NETKIT_PEER_INFO,
@@ -1299,6 +1312,8 @@ enum {
IFLA_NETKIT_POLICY,
IFLA_NETKIT_PEER_POLICY,
IFLA_NETKIT_MODE,
IFLA_NETKIT_SCRUB,
IFLA_NETKIT_PEER_SCRUB,
__IFLA_NETKIT_MAX,
};
#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)

View File

@@ -461,6 +461,286 @@ enum in6_addr_gen_mode {
/* Bridge section */
/**
* DOC: Bridge enum definition
*
* Please *note* that the timer values in the following section are expected
* in clock_t format, which is seconds multiplied by USER_HZ (generally
* defined as 100).
*
* @IFLA_BR_FORWARD_DELAY
* The bridge forwarding delay is the time spent in LISTENING state
* (before moving to LEARNING) and in LEARNING state (before moving
* to FORWARDING). Only relevant if STP is enabled.
*
* The valid values are between (2 * USER_HZ) and (30 * USER_HZ).
* The default value is (15 * USER_HZ).
*
* @IFLA_BR_HELLO_TIME
* The time between hello packets sent by the bridge, when it is a root
* bridge or a designated bridge. Only relevant if STP is enabled.
*
* The valid values are between (1 * USER_HZ) and (10 * USER_HZ).
* The default value is (2 * USER_HZ).
*
* @IFLA_BR_MAX_AGE
* The hello packet timeout is the time until another bridge in the
* spanning tree is assumed to be dead, after reception of its last hello
* message. Only relevant if STP is enabled.
*
* The valid values are between (6 * USER_HZ) and (40 * USER_HZ).
* The default value is (20 * USER_HZ).
*
* @IFLA_BR_AGEING_TIME
* Configure the bridge's FDB entries aging time. It is the time a MAC
* address will be kept in the FDB after a packet has been received from
* that address. After this time has passed, entries are cleaned up.
* Allow values outside the 802.1 standard specification for special cases:
*
* * 0 - entry never ages (all permanent)
* * 1 - entry disappears (no persistence)
*
* The default value is (300 * USER_HZ).
*
* @IFLA_BR_STP_STATE
* Turn spanning tree protocol on (*IFLA_BR_STP_STATE* > 0) or off
* (*IFLA_BR_STP_STATE* == 0) for this bridge.
*
* The default value is 0 (disabled).
*
* @IFLA_BR_PRIORITY
* Set this bridge's spanning tree priority, used during STP root bridge
* election.
*
* The valid values are between 0 and 65535.
*
* @IFLA_BR_VLAN_FILTERING
* Turn VLAN filtering on (*IFLA_BR_VLAN_FILTERING* > 0) or off
* (*IFLA_BR_VLAN_FILTERING* == 0). When disabled, the bridge will not
* consider the VLAN tag when handling packets.
*
* The default value is 0 (disabled).
*
* @IFLA_BR_VLAN_PROTOCOL
* Set the protocol used for VLAN filtering.
*
* The valid values are 0x8100(802.1Q) or 0x88A8(802.1AD). The default value
* is 0x8100(802.1Q).
*
* @IFLA_BR_GROUP_FWD_MASK
* The group forwarding mask. This is the bitmask that is applied to
* decide whether to forward incoming frames destined to link-local
* addresses (of the form 01:80:C2:00:00:0X).
*
* The default value is 0, which means the bridge does not forward any
* link-local frames coming on this port.
*
* @IFLA_BR_ROOT_ID
* The bridge root id, read only.
*
* @IFLA_BR_BRIDGE_ID
* The bridge id, read only.
*
* @IFLA_BR_ROOT_PORT
* The bridge root port, read only.
*
* @IFLA_BR_ROOT_PATH_COST
* The bridge root path cost, read only.
*
* @IFLA_BR_TOPOLOGY_CHANGE
* The bridge topology change, read only.
*
* @IFLA_BR_TOPOLOGY_CHANGE_DETECTED
* The bridge topology change detected, read only.
*
* @IFLA_BR_HELLO_TIMER
* The bridge hello timer, read only.
*
* @IFLA_BR_TCN_TIMER
* The bridge tcn timer, read only.
*
* @IFLA_BR_TOPOLOGY_CHANGE_TIMER
* The bridge topology change timer, read only.
*
* @IFLA_BR_GC_TIMER
* The bridge gc timer, read only.
*
* @IFLA_BR_GROUP_ADDR
* Set the MAC address of the multicast group this bridge uses for STP.
* The address must be a link-local address in standard Ethernet MAC address
* format. It is an address of the form 01:80:C2:00:00:0X, with X in [0, 4..f].
*
* The default value is 0.
*
* @IFLA_BR_FDB_FLUSH
* Flush bridge's fdb dynamic entries.
*
* @IFLA_BR_MCAST_ROUTER
* Set bridge's multicast router if IGMP snooping is enabled.
* The valid values are:
*
* * 0 - disabled.
* * 1 - automatic (queried).
* * 2 - permanently enabled.
*
* The default value is 1.
*
* @IFLA_BR_MCAST_SNOOPING
* Turn multicast snooping on (*IFLA_BR_MCAST_SNOOPING* > 0) or off
* (*IFLA_BR_MCAST_SNOOPING* == 0).
*
* The default value is 1.
*
* @IFLA_BR_MCAST_QUERY_USE_IFADDR
* If enabled use the bridge's own IP address as source address for IGMP
* queries (*IFLA_BR_MCAST_QUERY_USE_IFADDR* > 0) or the default of 0.0.0.0
* (*IFLA_BR_MCAST_QUERY_USE_IFADDR* == 0).
*
* The default value is 0 (disabled).
*
* @IFLA_BR_MCAST_QUERIER
* Enable (*IFLA_BR_MULTICAST_QUERIER* > 0) or disable
* (*IFLA_BR_MULTICAST_QUERIER* == 0) IGMP querier, ie sending of multicast
* queries by the bridge.
*
* The default value is 0 (disabled).
*
* @IFLA_BR_MCAST_HASH_ELASTICITY
* Set multicast database hash elasticity, It is the maximum chain length in
* the multicast hash table. This attribute is *deprecated* and the value
* is always 16.
*
* @IFLA_BR_MCAST_HASH_MAX
* Set maximum size of the multicast hash table
*
* The default value is 4096, the value must be a power of 2.
*
* @IFLA_BR_MCAST_LAST_MEMBER_CNT
* The Last Member Query Count is the number of Group-Specific Queries
* sent before the router assumes there are no local members. The Last
* Member Query Count is also the number of Group-and-Source-Specific
* Queries sent before the router assumes there are no listeners for a
* particular source.
*
* The default value is 2.
*
* @IFLA_BR_MCAST_STARTUP_QUERY_CNT
* The Startup Query Count is the number of Queries sent out on startup,
* separated by the Startup Query Interval.
*
* The default value is 2.
*
* @IFLA_BR_MCAST_LAST_MEMBER_INTVL
* The Last Member Query Interval is the Max Response Time inserted into
* Group-Specific Queries sent in response to Leave Group messages, and
* is also the amount of time between Group-Specific Query messages.
*
* The default value is (1 * USER_HZ).
*
* @IFLA_BR_MCAST_MEMBERSHIP_INTVL
* The interval after which the bridge will leave a group, if no membership
* reports for this group are received.
*
* The default value is (260 * USER_HZ).
*
* @IFLA_BR_MCAST_QUERIER_INTVL
* The interval between queries sent by other routers. if no queries are
* seen after this delay has passed, the bridge will start to send its own
* queries (as if *IFLA_BR_MCAST_QUERIER_INTVL* was enabled).
*
* The default value is (255 * USER_HZ).
*
* @IFLA_BR_MCAST_QUERY_INTVL
* The Query Interval is the interval between General Queries sent by
* the Querier.
*
* The default value is (125 * USER_HZ). The minimum value is (1 * USER_HZ).
*
* @IFLA_BR_MCAST_QUERY_RESPONSE_INTVL
* The Max Response Time used to calculate the Max Resp Code inserted
* into the periodic General Queries.
*
* The default value is (10 * USER_HZ).
*
* @IFLA_BR_MCAST_STARTUP_QUERY_INTVL
* The interval between queries in the startup phase.
*
* The default value is (125 * USER_HZ) / 4. The minimum value is (1 * USER_HZ).
*
* @IFLA_BR_NF_CALL_IPTABLES
* Enable (*NF_CALL_IPTABLES* > 0) or disable (*NF_CALL_IPTABLES* == 0)
* iptables hooks on the bridge.
*
* The default value is 0 (disabled).
*
* @IFLA_BR_NF_CALL_IP6TABLES
* Enable (*NF_CALL_IP6TABLES* > 0) or disable (*NF_CALL_IP6TABLES* == 0)
* ip6tables hooks on the bridge.
*
* The default value is 0 (disabled).
*
* @IFLA_BR_NF_CALL_ARPTABLES
* Enable (*NF_CALL_ARPTABLES* > 0) or disable (*NF_CALL_ARPTABLES* == 0)
* arptables hooks on the bridge.
*
* The default value is 0 (disabled).
*
* @IFLA_BR_VLAN_DEFAULT_PVID
* VLAN ID applied to untagged and priority-tagged incoming packets.
*
* The default value is 1. Setting to the special value 0 makes all ports of
* this bridge not have a PVID by default, which means that they will
* not accept VLAN-untagged traffic.
*
* @IFLA_BR_PAD
* Bridge attribute padding type for netlink message.
*
* @IFLA_BR_VLAN_STATS_ENABLED
* Enable (*IFLA_BR_VLAN_STATS_ENABLED* == 1) or disable
* (*IFLA_BR_VLAN_STATS_ENABLED* == 0) per-VLAN stats accounting.
*
* The default value is 0 (disabled).
*
* @IFLA_BR_MCAST_STATS_ENABLED
* Enable (*IFLA_BR_MCAST_STATS_ENABLED* > 0) or disable
* (*IFLA_BR_MCAST_STATS_ENABLED* == 0) multicast (IGMP/MLD) stats
* accounting.
*
* The default value is 0 (disabled).
*
* @IFLA_BR_MCAST_IGMP_VERSION
* Set the IGMP version.
*
* The valid values are 2 and 3. The default value is 2.
*
* @IFLA_BR_MCAST_MLD_VERSION
* Set the MLD version.
*
* The valid values are 1 and 2. The default value is 1.
*
* @IFLA_BR_VLAN_STATS_PER_PORT
* Enable (*IFLA_BR_VLAN_STATS_PER_PORT* == 1) or disable
* (*IFLA_BR_VLAN_STATS_PER_PORT* == 0) per-VLAN per-port stats accounting.
* Can be changed only when there are no port VLANs configured.
*
* The default value is 0 (disabled).
*
* @IFLA_BR_MULTI_BOOLOPT
* The multi_boolopt is used to control new boolean options to avoid adding
* new netlink attributes. You can look at ``enum br_boolopt_id`` for those
* options.
*
* @IFLA_BR_MCAST_QUERIER_STATE
* Bridge mcast querier states, read only.
*
* @IFLA_BR_FDB_N_LEARNED
* The number of dynamically learned FDB entries for the current bridge,
* read only.
*
* @IFLA_BR_FDB_MAX_LEARNED
* Set the number of max dynamically learned FDB entries for the current
* bridge.
*/
enum {
IFLA_BR_UNSPEC,
IFLA_BR_FORWARD_DELAY,
@@ -510,6 +790,8 @@ enum {
IFLA_BR_VLAN_STATS_PER_PORT,
IFLA_BR_MULTI_BOOLOPT,
IFLA_BR_MCAST_QUERIER_STATE,
IFLA_BR_FDB_N_LEARNED,
IFLA_BR_FDB_MAX_LEARNED,
__IFLA_BR_MAX,
};
@@ -520,11 +802,252 @@ struct ifla_bridge_id {
__u8 addr[6]; /* ETH_ALEN */
};
/**
* DOC: Bridge mode enum definition
*
* @BRIDGE_MODE_HAIRPIN
* Controls whether traffic may be sent back out of the port on which it
* was received. This option is also called reflective relay mode, and is
* used to support basic VEPA (Virtual Ethernet Port Aggregator)
* capabilities. By default, this flag is turned off and the bridge will
* not forward traffic back out of the receiving port.
*/
enum {
BRIDGE_MODE_UNSPEC,
BRIDGE_MODE_HAIRPIN,
};
/**
* DOC: Bridge port enum definition
*
* @IFLA_BRPORT_STATE
* The operation state of the port. Here are the valid values.
*
* * 0 - port is in STP *DISABLED* state. Make this port completely
* inactive for STP. This is also called BPDU filter and could be used
* to disable STP on an untrusted port, like a leaf virtual device.
* The traffic forwarding is also stopped on this port.
* * 1 - port is in STP *LISTENING* state. Only valid if STP is enabled
* on the bridge. In this state the port listens for STP BPDUs and
* drops all other traffic frames.
* * 2 - port is in STP *LEARNING* state. Only valid if STP is enabled on
* the bridge. In this state the port will accept traffic only for the
* purpose of updating MAC address tables.
* * 3 - port is in STP *FORWARDING* state. Port is fully active.
* * 4 - port is in STP *BLOCKING* state. Only valid if STP is enabled on
* the bridge. This state is used during the STP election process.
* In this state, port will only process STP BPDUs.
*
* @IFLA_BRPORT_PRIORITY
* The STP port priority. The valid values are between 0 and 255.
*
* @IFLA_BRPORT_COST
* The STP path cost of the port. The valid values are between 1 and 65535.
*
* @IFLA_BRPORT_MODE
* Set the bridge port mode. See *BRIDGE_MODE_HAIRPIN* for more details.
*
* @IFLA_BRPORT_GUARD
* Controls whether STP BPDUs will be processed by the bridge port. By
* default, the flag is turned off to allow BPDU processing. Turning this
* flag on will disable the bridge port if a STP BPDU packet is received.
*
* If the bridge has Spanning Tree enabled, hostile devices on the network
* may send BPDU on a port and cause network failure. Setting *guard on*
* will detect and stop this by disabling the port. The port will be
* restarted if the link is brought down, or removed and reattached.
*
* @IFLA_BRPORT_PROTECT
* Controls whether a given port is allowed to become a root port or not.
* Only used when STP is enabled on the bridge. By default the flag is off.
*
* This feature is also called root port guard. If BPDU is received from a
* leaf (edge) port, it should not be elected as root port. This could
* be used if using STP on a bridge and the downstream bridges are not fully
* trusted; this prevents a hostile guest from rerouting traffic.
*
* @IFLA_BRPORT_FAST_LEAVE
* This flag allows the bridge to immediately stop multicast traffic
* forwarding on a port that receives an IGMP Leave message. It is only used
* when IGMP snooping is enabled on the bridge. By default the flag is off.
*
* @IFLA_BRPORT_LEARNING
* Controls whether a given port will learn *source* MAC addresses from
* received traffic or not. Also controls whether dynamic FDB entries
* (which can also be added by software) will be refreshed by incoming
* traffic. By default this flag is on.
*
* @IFLA_BRPORT_UNICAST_FLOOD
* Controls whether unicast traffic for which there is no FDB entry will
* be flooded towards this port. By default this flag is on.
*
* @IFLA_BRPORT_PROXYARP
* Enable proxy ARP on this port.
*
* @IFLA_BRPORT_LEARNING_SYNC
* Controls whether a given port will sync MAC addresses learned on device
* port to bridge FDB.
*
* @IFLA_BRPORT_PROXYARP_WIFI
* Enable proxy ARP on this port which meets extended requirements by
* IEEE 802.11 and Hotspot 2.0 specifications.
*
* @IFLA_BRPORT_ROOT_ID
*
* @IFLA_BRPORT_BRIDGE_ID
*
* @IFLA_BRPORT_DESIGNATED_PORT
*
* @IFLA_BRPORT_DESIGNATED_COST
*
* @IFLA_BRPORT_ID
*
* @IFLA_BRPORT_NO
*
* @IFLA_BRPORT_TOPOLOGY_CHANGE_ACK
*
* @IFLA_BRPORT_CONFIG_PENDING
*
* @IFLA_BRPORT_MESSAGE_AGE_TIMER
*
* @IFLA_BRPORT_FORWARD_DELAY_TIMER
*
* @IFLA_BRPORT_HOLD_TIMER
*
* @IFLA_BRPORT_FLUSH
* Flush bridge ports' fdb dynamic entries.
*
* @IFLA_BRPORT_MULTICAST_ROUTER
* Configure the port's multicast router presence. A port with
* a multicast router will receive all multicast traffic.
* The valid values are:
*
* * 0 disable multicast routers on this port
* * 1 let the system detect the presence of routers (default)
* * 2 permanently enable multicast traffic forwarding on this port
* * 3 enable multicast routers temporarily on this port, not depending
* on incoming queries.
*
* @IFLA_BRPORT_PAD
*
* @IFLA_BRPORT_MCAST_FLOOD
* Controls whether a given port will flood multicast traffic for which
* there is no MDB entry. By default this flag is on.
*
* @IFLA_BRPORT_MCAST_TO_UCAST
* Controls whether a given port will replicate packets using unicast
* instead of multicast. By default this flag is off.
*
* This is done by copying the packet per host and changing the multicast
* destination MAC to a unicast one accordingly.
*
* *mcast_to_unicast* works on top of the multicast snooping feature of the
* bridge. Which means unicast copies are only delivered to hosts which
* are interested in unicast and signaled this via IGMP/MLD reports previously.
*
* This feature is intended for interface types which have a more reliable
* and/or efficient way to deliver unicast packets than broadcast ones
* (e.g. WiFi).
*
* However, it should only be enabled on interfaces where no IGMPv2/MLDv1
* report suppression takes place. IGMP/MLD report suppression issue is
* usually overcome by the network daemon (supplicant) enabling AP isolation
* and by that separating all STAs.
*
* Delivery of STA-to-STA IP multicast is made possible again by enabling
* and utilizing the bridge hairpin mode, which considers the incoming port
* as a potential outgoing port, too (see *BRIDGE_MODE_HAIRPIN* option).
* Hairpin mode is performed after multicast snooping, therefore leading
* to only deliver reports to STAs running a multicast router.
*
* @IFLA_BRPORT_VLAN_TUNNEL
* Controls whether vlan to tunnel mapping is enabled on the port.
* By default this flag is off.
*
* @IFLA_BRPORT_BCAST_FLOOD
* Controls flooding of broadcast traffic on the given port. By default
* this flag is on.
*
* @IFLA_BRPORT_GROUP_FWD_MASK
* Set the group forward mask. This is a bitmask that is applied to
* decide whether to forward incoming frames destined to link-local
* addresses. The addresses of the form are 01:80:C2:00:00:0X (defaults
* to 0, which means the bridge does not forward any link-local frames
* coming on this port).
*
* @IFLA_BRPORT_NEIGH_SUPPRESS
* Controls whether neighbor discovery (arp and nd) proxy and suppression
* is enabled on the port. By default this flag is off.
*
* @IFLA_BRPORT_ISOLATED
* Controls whether a given port will be isolated, which means it will be
* able to communicate with non-isolated ports only. By default this
* flag is off.
*
* @IFLA_BRPORT_BACKUP_PORT
* Set a backup port. If the port loses carrier all traffic will be
* redirected to the configured backup port. Set the value to 0 to disable
* it.
*
* @IFLA_BRPORT_MRP_RING_OPEN
*
* @IFLA_BRPORT_MRP_IN_OPEN
*
* @IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT
* The number of per-port EHT hosts limit. The default value is 512.
* Setting to 0 is not allowed.
*
* @IFLA_BRPORT_MCAST_EHT_HOSTS_CNT
* The current number of tracked hosts, read only.
*
* @IFLA_BRPORT_LOCKED
* Controls whether a port will be locked, meaning that hosts behind the
* port will not be able to communicate through the port unless an FDB
* entry with the unit's MAC address is in the FDB. The common use case is
* that hosts are allowed access through authentication with the IEEE 802.1X
* protocol or based on whitelists. By default this flag is off.
*
* Please note that secure 802.1X deployments should always use the
* *BR_BOOLOPT_NO_LL_LEARN* flag, to not permit the bridge to populate its
* FDB based on link-local (EAPOL) traffic received on the port.
*
* @IFLA_BRPORT_MAB
* Controls whether a port will use MAC Authentication Bypass (MAB), a
* technique through which select MAC addresses may be allowed on a locked
* port, without using 802.1X authentication. Packets with an unknown source
* MAC address generates a "locked" FDB entry on the incoming bridge port.
* The common use case is for user space to react to these bridge FDB
* notifications and optionally replace the locked FDB entry with a normal
* one, allowing traffic to pass for whitelisted MAC addresses.
*
* Setting this flag also requires *IFLA_BRPORT_LOCKED* and
* *IFLA_BRPORT_LEARNING*. *IFLA_BRPORT_LOCKED* ensures that unauthorized
* data packets are dropped, and *IFLA_BRPORT_LEARNING* allows the dynamic
* FDB entries installed by user space (as replacements for the locked FDB
* entries) to be refreshed and/or aged out.
*
* @IFLA_BRPORT_MCAST_N_GROUPS
*
* @IFLA_BRPORT_MCAST_MAX_GROUPS
* Sets the maximum number of MDB entries that can be registered for a
* given port. Attempts to register more MDB entries at the port than this
* limit allows will be rejected, whether they are done through netlink
* (e.g. the bridge tool), or IGMP or MLD membership reports. Setting a
* limit of 0 disables the limit. The default value is 0.
*
* @IFLA_BRPORT_NEIGH_VLAN_SUPPRESS
* Controls whether neighbor discovery (arp and nd) proxy and suppression is
* enabled for a given port. By default this flag is off.
*
* Note that this option only takes effect when *IFLA_BRPORT_NEIGH_SUPPRESS*
* is enabled for a given port.
*
* @IFLA_BRPORT_BACKUP_NHID
* The FDB nexthop object ID to attach to packets being redirected to a
* backup port that has VLAN tunnel mapping enabled (via the
* *IFLA_BRPORT_VLAN_TUNNEL* option). Setting a value of 0 (default) has
* the effect of not attaching any ID.
*/
enum {
IFLA_BRPORT_UNSPEC,
IFLA_BRPORT_STATE, /* Spanning tree state */
@@ -769,6 +1292,19 @@ enum netkit_mode {
NETKIT_L3,
};
/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to
* the BPF program if attached. This also means the latter can
* consume the two fields if they were populated earlier.
*
* NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before
* invoking the attached BPF program when the peer device resides
* in a different network namespace. This is the default behavior.
*/
enum netkit_scrub {
NETKIT_SCRUB_NONE,
NETKIT_SCRUB_DEFAULT,
};
enum {
IFLA_NETKIT_UNSPEC,
IFLA_NETKIT_PEER_INFO,
@@ -776,6 +1312,8 @@ enum {
IFLA_NETKIT_POLICY,
IFLA_NETKIT_PEER_POLICY,
IFLA_NETKIT_MODE,
IFLA_NETKIT_SCRUB,
IFLA_NETKIT_PEER_SCRUB,
__IFLA_NETKIT_MAX,
};
#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)
@@ -854,6 +1392,7 @@ enum {
IFLA_VXLAN_DF,
IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
IFLA_VXLAN_LOCALBYPASS,
IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@@ -871,6 +1410,13 @@ enum ifla_vxlan_df {
VXLAN_DF_MAX = __VXLAN_DF_END - 1,
};
enum ifla_vxlan_label_policy {
VXLAN_LABEL_FIXED = 0,
VXLAN_LABEL_INHERIT = 1,
__VXLAN_LABEL_END,
VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1,
};
/* GENEVE section */
enum {
IFLA_GENEVE_UNSPEC,
@@ -935,6 +1481,8 @@ enum {
IFLA_GTP_ROLE,
IFLA_GTP_CREATE_SOCKETS,
IFLA_GTP_RESTART_COUNT,
IFLA_GTP_LOCAL,
IFLA_GTP_LOCAL6,
__IFLA_GTP_MAX,
};
#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
@@ -1240,6 +1788,7 @@ enum {
IFLA_HSR_PROTOCOL, /* Indicate different protocol than
* HSR. For example PRP.
*/
IFLA_HSR_INTERLINK, /* HSR interlink network device */
__IFLA_HSR_MAX,
};
@@ -1417,7 +1966,9 @@ enum {
enum {
IFLA_DSA_UNSPEC,
IFLA_DSA_MASTER,
IFLA_DSA_CONDUIT,
/* Deprecated, use IFLA_DSA_CONDUIT instead */
IFLA_DSA_MASTER = IFLA_DSA_CONDUIT,
__IFLA_DSA_MAX,
};

View File

@@ -14,7 +14,9 @@
#include "netlink_helpers.h"
#include "tc_helpers.h"
#define ICMP_ECHO 8
#define MARK 42
#define PRIO 0xeb9f
#define ICMP_ECHO 8
struct icmphdr {
__u8 type;
@@ -33,7 +35,7 @@ struct iplink_req {
};
static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
bool same_netns)
bool same_netns, int scrub, int peer_scrub)
{
struct rtnl_handle rth = { .fd = -1 };
struct iplink_req req = {};
@@ -58,6 +60,8 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
addattr32(&req.n, sizeof(req), IFLA_NETKIT_POLICY, policy);
addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_POLICY, peer_policy);
addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub);
addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub);
addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
addattr_nest_end(&req.n, data);
addattr_nest_end(&req.n, linkinfo);
@@ -118,9 +122,9 @@ static void destroy_netkit(void)
static int __send_icmp(__u32 dest)
{
int sock, ret, mark = MARK, prio = PRIO;
struct sockaddr_in addr;
struct icmphdr icmp;
int sock, ret;
ret = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0");
if (!ASSERT_OK(ret, "write_sysctl(net.ipv4.ping_group_range)"))
@@ -135,6 +139,15 @@ static int __send_icmp(__u32 dest)
if (!ASSERT_OK(ret, "setsockopt(SO_BINDTODEVICE)"))
goto out;
ret = setsockopt(sock, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
if (!ASSERT_OK(ret, "setsockopt(SO_MARK)"))
goto out;
ret = setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
&prio, sizeof(prio));
if (!ASSERT_OK(ret, "setsockopt(SO_PRIORITY)"))
goto out;
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(dest);
@@ -171,7 +184,8 @@ void serial_test_tc_netkit_basic(void)
int err, ifindex;
err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
&ifindex, false);
&ifindex, false, NETKIT_SCRUB_DEFAULT,
NETKIT_SCRUB_DEFAULT);
if (err)
return;
@@ -285,7 +299,8 @@ static void serial_test_tc_netkit_multi_links_target(int mode, int target)
int err, ifindex;
err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
&ifindex, false);
&ifindex, false, NETKIT_SCRUB_DEFAULT,
NETKIT_SCRUB_DEFAULT);
if (err)
return;
@@ -413,7 +428,8 @@ static void serial_test_tc_netkit_multi_opts_target(int mode, int target)
int err, ifindex;
err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
&ifindex, false);
&ifindex, false, NETKIT_SCRUB_DEFAULT,
NETKIT_SCRUB_DEFAULT);
if (err)
return;
@@ -527,7 +543,8 @@ void serial_test_tc_netkit_device(void)
int err, ifindex, ifindex2;
err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS,
&ifindex, true);
&ifindex, true, NETKIT_SCRUB_DEFAULT,
NETKIT_SCRUB_DEFAULT);
if (err)
return;
@@ -638,7 +655,8 @@ static void serial_test_tc_netkit_neigh_links_target(int mode, int target)
int err, ifindex;
err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
&ifindex, false);
&ifindex, false, NETKIT_SCRUB_DEFAULT,
NETKIT_SCRUB_DEFAULT);
if (err)
return;
@@ -715,7 +733,8 @@ static void serial_test_tc_netkit_pkt_type_mode(int mode)
struct bpf_link *link;
err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
&ifindex, true);
&ifindex, true, NETKIT_SCRUB_DEFAULT,
NETKIT_SCRUB_DEFAULT);
if (err)
return;
@@ -779,3 +798,60 @@ void serial_test_tc_netkit_pkt_type(void)
serial_test_tc_netkit_pkt_type_mode(NETKIT_L2);
serial_test_tc_netkit_pkt_type_mode(NETKIT_L3);
}
static void serial_test_tc_netkit_scrub_type(int scrub)
{
LIBBPF_OPTS(bpf_netkit_opts, optl);
struct test_tc_link *skel;
struct bpf_link *link;
int err, ifindex;
err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
&ifindex, false, scrub, scrub);
if (err)
return;
skel = test_tc_link__open();
if (!ASSERT_OK_PTR(skel, "skel_open"))
goto cleanup;
ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc8,
BPF_NETKIT_PRIMARY), 0, "tc8_attach_type");
err = test_tc_link__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto cleanup;
assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
ASSERT_EQ(skel->bss->seen_tc8, false, "seen_tc8");
link = bpf_program__attach_netkit(skel->progs.tc8, ifindex, &optl);
if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
skel->links.tc8 = link;
assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
tc_skel_reset_all_seen(skel);
ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8");
ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark");
ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio");
cleanup:
test_tc_link__destroy(skel);
assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
destroy_netkit();
}
void serial_test_tc_netkit_scrub(void)
{
serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT);
serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE);
}

View File

@@ -18,6 +18,7 @@ bool seen_tc4;
bool seen_tc5;
bool seen_tc6;
bool seen_tc7;
bool seen_tc8;
bool set_type;
@@ -25,6 +26,8 @@ bool seen_eth;
bool seen_host;
bool seen_mcast;
int mark, prio;
SEC("tc/ingress")
int tc1(struct __sk_buff *skb)
{
@@ -100,3 +103,12 @@ int tc7(struct __sk_buff *skb)
seen_tc7 = true;
return TCX_PASS;
}
SEC("tc/egress")
int tc8(struct __sk_buff *skb)
{
seen_tc8 = true;
mark = skb->mark;
prio = skb->priority;
return TCX_PASS;
}