Merge branch 'add-broadcast_neighbor-for-no-stacking-networking-arch'

Tonghao Zhang says:

====================
add broadcast_neighbor for no-stacking networking arch

For no-stacking networking arch, and enable the bond mode 4(lacp) in
datacenter, the switch require arp/nd packets as session synchronization.
More details please see patch.

Cc: Jay Vosburgh <jv@jvosburgh.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Nikolay Aleksandrov <razor@blackwall.org>
Cc: Zengbing Tu <tuzengbing@didiglobal.com>
====================

Link: https://patch.msgid.link/cover.1751031306.git.tonghao@bamaicloud.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni
2025-07-08 10:59:58 +02:00
8 changed files with 165 additions and 13 deletions

View File

@@ -562,6 +562,12 @@ lacp_rate
The default is slow.
broadcast_neighbor
Option specifying whether to broadcast ARP/ND packets to all
active slaves. This option has no effect in modes other than
802.3ad mode. The default is off (0).
max_bonds
Specifies the number of bonding devices to create for this
@@ -767,8 +773,9 @@ num_unsol_na
greater than 1.
The valid range is 0 - 255; the default value is 1. These options
affect only the active-backup mode. These options were added for
bonding versions 3.3.0 and 3.4.0 respectively.
affect the active-backup or 802.3ad (broadcast_neighbor enabled) mode.
These options were added for bonding versions 3.3.0 and 3.4.0
respectively.
From Linux 3.0 and bonding version 3.7.1, these notifications
are generated by the ipv4 and ipv6 code and the numbers of

View File

@@ -982,6 +982,17 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)
return 0;
}
static void ad_cond_set_peer_notif(struct port *port)
{
struct bonding *bond = port->slave->bond;
if (bond->params.broadcast_neighbor && rtnl_trylock()) {
bond->send_peer_notif = bond->params.num_peer_notif *
max(1, bond->params.peer_notif_delay);
rtnl_unlock();
}
}
/**
* ad_mux_machine - handle a port's mux state machine
* @port: the port we're looking at
@@ -2062,6 +2073,8 @@ static void ad_enable_collecting_distributing(struct port *port,
__enable_port(port);
/* Slave array needs update */
*update_slave_arr = true;
/* Should notify peers if possible */
ad_cond_set_peer_notif(port);
}
}

View File

@@ -212,6 +212,8 @@ atomic_t netpoll_block_tx = ATOMIC_INIT(0);
unsigned int bond_net_id __read_mostly;
DEFINE_STATIC_KEY_FALSE(bond_bcast_neigh_enabled);
static const struct flow_dissector_key flow_keys_bonding_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
@@ -1235,17 +1237,32 @@ static struct slave *bond_find_best_slave(struct bonding *bond)
/* must be called in RCU critical section or with RTNL held */
static bool bond_should_notify_peers(struct bonding *bond)
{
struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave);
struct bond_up_slave *usable;
struct slave *slave = NULL;
if (!slave || !bond->send_peer_notif ||
if (!bond->send_peer_notif ||
bond->send_peer_notif %
max(1, bond->params.peer_notif_delay) != 0 ||
!netif_carrier_ok(bond->dev) ||
test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
!netif_carrier_ok(bond->dev))
return false;
/* The send_peer_notif is set by active-backup or 8023ad
* mode, and cleared in bond_close() when changing mode.
* It is safe to only check bond mode here.
*/
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
usable = rcu_dereference_rtnl(bond->usable_slaves);
if (!usable || !READ_ONCE(usable->count))
return false;
} else {
slave = rcu_dereference_rtnl(bond->curr_active_slave);
if (!slave || test_bit(__LINK_STATE_LINKWATCH_PENDING,
&slave->dev->state))
return false;
}
netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n",
slave ? slave->dev->name : "NULL");
slave ? slave->dev->name : "all");
return true;
}
@@ -4456,6 +4473,9 @@ static int bond_open(struct net_device *bond_dev)
bond_for_each_slave(bond, slave, iter)
dev_mc_add(slave->dev, lacpdu_mcast_addr);
if (bond->params.broadcast_neighbor)
static_branch_inc(&bond_bcast_neigh_enabled);
}
if (bond_mode_can_use_xmit_hash(bond))
@@ -4475,6 +4495,10 @@ static int bond_close(struct net_device *bond_dev)
bond_alb_deinitialize(bond);
bond->recv_probe = NULL;
if (BOND_MODE(bond) == BOND_MODE_8023AD &&
bond->params.broadcast_neighbor)
static_branch_dec(&bond_bcast_neigh_enabled);
if (bond_uses_primary(bond)) {
rcu_read_lock();
slave = rcu_dereference(bond->curr_active_slave);
@@ -5310,6 +5334,37 @@ static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond,
return slaves->arr[hash % count];
}
static bool bond_should_broadcast_neighbor(struct sk_buff *skb,
struct net_device *dev)
{
struct bonding *bond = netdev_priv(dev);
struct {
struct ipv6hdr ip6;
struct icmp6hdr icmp6;
} *combined, _combined;
if (!static_branch_unlikely(&bond_bcast_neigh_enabled))
return false;
if (!bond->params.broadcast_neighbor)
return false;
if (skb->protocol == htons(ETH_P_ARP))
return true;
if (skb->protocol == htons(ETH_P_IPV6)) {
combined = skb_header_pointer(skb, skb_mac_header_len(skb),
sizeof(_combined),
&_combined);
if (combined && combined->ip6.nexthdr == NEXTHDR_ICMP &&
(combined->icmp6.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
combined->icmp6.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT))
return true;
}
return false;
}
/* Use this Xmit function for 3AD as well as XOR modes. The current
* usable slave array is formed in the control path. The xmit function
* just calculates hash and sends the packet out.
@@ -5329,17 +5384,27 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb,
return bond_tx_drop(dev, skb);
}
/* in broadcast mode, we send everything to all usable interfaces. */
/* in broadcast mode, we send everything to all or usable slave interfaces.
* under rcu_read_lock when this function is called.
*/
static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb,
struct net_device *bond_dev)
struct net_device *bond_dev,
bool all_slaves)
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave = NULL;
struct list_head *iter;
struct bond_up_slave *slaves;
bool xmit_suc = false;
bool skb_used = false;
int slaves_count, i;
bond_for_each_slave_rcu(bond, slave, iter) {
if (all_slaves)
slaves = rcu_dereference(bond->all_slaves);
else
slaves = rcu_dereference(bond->usable_slaves);
slaves_count = slaves ? READ_ONCE(slaves->count) : 0;
for (i = 0; i < slaves_count; i++) {
struct slave *slave = slaves->arr[i];
struct sk_buff *skb2;
if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP))
@@ -5577,10 +5642,13 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
case BOND_MODE_ACTIVEBACKUP:
return bond_xmit_activebackup(skb, dev);
case BOND_MODE_8023AD:
if (bond_should_broadcast_neighbor(skb, dev))
return bond_xmit_broadcast(skb, dev, false);
fallthrough;
case BOND_MODE_XOR:
return bond_3ad_xor_xmit(skb, dev);
case BOND_MODE_BROADCAST:
return bond_xmit_broadcast(skb, dev);
return bond_xmit_broadcast(skb, dev, true);
case BOND_MODE_ALB:
return bond_alb_xmit(skb, dev);
case BOND_MODE_TLB:
@@ -6456,6 +6524,7 @@ static int __init bond_check_params(struct bond_params *params)
eth_zero_addr(params->ad_actor_system);
params->ad_user_port_key = ad_user_port_key;
params->coupled_control = 1;
params->broadcast_neighbor = 0;
if (packets_per_slave > 0) {
params->reciprocal_packets_per_slave =
reciprocal_value(packets_per_slave);

View File

@@ -124,6 +124,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
[IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 },
[IFLA_BOND_NS_IP6_TARGET] = { .type = NLA_NESTED },
[IFLA_BOND_COUPLED_CONTROL] = { .type = NLA_U8 },
[IFLA_BOND_BROADCAST_NEIGH] = { .type = NLA_U8 },
};
static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
@@ -561,6 +562,16 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[],
return err;
}
if (data[IFLA_BOND_BROADCAST_NEIGH]) {
int broadcast_neigh = nla_get_u8(data[IFLA_BOND_BROADCAST_NEIGH]);
bond_opt_initval(&newval, broadcast_neigh);
err = __bond_opt_set(bond, BOND_OPT_BROADCAST_NEIGH, &newval,
data[IFLA_BOND_BROADCAST_NEIGH], extack);
if (err)
return err;
}
return 0;
}
@@ -630,6 +641,7 @@ static size_t bond_get_size(const struct net_device *bond_dev)
nla_total_size(sizeof(struct nlattr)) +
nla_total_size(sizeof(struct in6_addr)) * BOND_MAX_NS_TARGETS +
nla_total_size(sizeof(u8)) + /* IFLA_BOND_COUPLED_CONTROL */
nla_total_size(sizeof(u8)) + /* IFLA_BOND_BROADCAST_NEIGH */
0;
}
@@ -793,6 +805,10 @@ static int bond_fill_info(struct sk_buff *skb,
bond->params.coupled_control))
goto nla_put_failure;
if (nla_put_u8(skb, IFLA_BOND_BROADCAST_NEIGH,
bond->params.broadcast_neighbor))
goto nla_put_failure;
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
struct ad_info info;

View File

@@ -87,6 +87,8 @@ static int bond_option_missed_max_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_coupled_control_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_broadcast_neigh_set(struct bonding *bond,
const struct bond_opt_value *newval);
static const struct bond_opt_value bond_mode_tbl[] = {
{ "balance-rr", BOND_MODE_ROUNDROBIN, BOND_VALFLAG_DEFAULT},
@@ -240,6 +242,12 @@ static const struct bond_opt_value bond_coupled_control_tbl[] = {
{ NULL, -1, 0},
};
static const struct bond_opt_value bond_broadcast_neigh_tbl[] = {
{ "off", 0, BOND_VALFLAG_DEFAULT},
{ "on", 1, 0},
{ NULL, -1, 0}
};
static const struct bond_option bond_opts[BOND_OPT_LAST] = {
[BOND_OPT_MODE] = {
.id = BOND_OPT_MODE,
@@ -513,6 +521,14 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
.flags = BOND_OPTFLAG_IFDOWN,
.values = bond_coupled_control_tbl,
.set = bond_option_coupled_control_set,
},
[BOND_OPT_BROADCAST_NEIGH] = {
.id = BOND_OPT_BROADCAST_NEIGH,
.name = "broadcast_neighbor",
.desc = "Broadcast neighbor packets to all active slaves",
.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)),
.values = bond_broadcast_neigh_tbl,
.set = bond_option_broadcast_neigh_set,
}
};
@@ -894,6 +910,13 @@ static int bond_option_mode_set(struct bonding *bond,
bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
bond->params.mode = newval->value;
/* When changing mode, the bond device is down, we may reduce
* the bond_bcast_neigh_enabled in bond_close() if broadcast_neighbor
* enabled in 8023ad mode. Therefore, only clear broadcast_neighbor
* to 0.
*/
bond->params.broadcast_neighbor = 0;
if (bond->dev->reg_state == NETREG_REGISTERED) {
bool update = false;
@@ -1840,3 +1863,22 @@ static int bond_option_coupled_control_set(struct bonding *bond,
bond->params.coupled_control = newval->value;
return 0;
}
static int bond_option_broadcast_neigh_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
if (bond->params.broadcast_neighbor == newval->value)
return 0;
bond->params.broadcast_neighbor = newval->value;
if (bond->dev->flags & IFF_UP) {
if (bond->params.broadcast_neighbor)
static_branch_inc(&bond_bcast_neigh_enabled);
else
static_branch_dec(&bond_bcast_neigh_enabled);
}
netdev_dbg(bond->dev, "Setting broadcast_neighbor to %s (%llu)\n",
newval->string, newval->value);
return 0;
}

View File

@@ -77,6 +77,7 @@ enum {
BOND_OPT_NS_TARGETS,
BOND_OPT_PRIO,
BOND_OPT_COUPLED_CONTROL,
BOND_OPT_BROADCAST_NEIGH,
BOND_OPT_LAST
};

View File

@@ -115,6 +115,8 @@ static inline int is_netpoll_tx_blocked(struct net_device *dev)
#define is_netpoll_tx_blocked(dev) (0)
#endif
DECLARE_STATIC_KEY_FALSE(bond_bcast_neigh_enabled);
struct bond_params {
int mode;
int xmit_policy;
@@ -149,6 +151,7 @@ struct bond_params {
struct in6_addr ns_targets[BOND_MAX_NS_TARGETS];
#endif
int coupled_control;
int broadcast_neighbor;
/* 2 bytes of padding : see ether_addr_equal_64bits() */
u8 ad_actor_system[ETH_ALEN + 2];

View File

@@ -1535,6 +1535,7 @@ enum {
IFLA_BOND_MISSED_MAX,
IFLA_BOND_NS_IP6_TARGET,
IFLA_BOND_COUPLED_CONTROL,
IFLA_BOND_BROADCAST_NEIGH,
__IFLA_BOND_MAX,
};