Merge branch 'bonding-support-aggregator-selection-based-on-port-priority'

Hangbin Liu says:

====================
bonding: support aggregator selection based on port priority

This patchset introduces a new per-port bonding option: `ad_actor_port_prio`.

It allows users to configure the actor's port priority, which can then be used
by the bonding driver for aggregator selection based on port priority.

This provides finer control over LACP aggregator choice, especially in setups
with multiple eligible aggregators over 2 switches.
====================

Link: https://patch.msgid.link/20250902064501.360822-1-liuhangbin@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni
2025-09-09 10:56:04 +02:00
11 changed files with 247 additions and 33 deletions

View File

@@ -193,6 +193,15 @@ ad_actor_sys_prio
This parameter has effect only in 802.3ad mode and is available through
SysFs interface.
actor_port_prio
In an AD system, this specifies the port priority. The allowed range
is 1 - 65535. If the value is not specified, it takes 255 as the
default value.
This parameter has effect only in 802.3ad mode and is available through
netlink interface.
ad_actor_system
In an AD system, this specifies the mac-address for the actor in
@@ -241,10 +250,18 @@ ad_select
ports (slaves). Reselection occurs as described under the
"bandwidth" setting, above.
The bandwidth and count selection policies permit failover of
802.3ad aggregations when partial failure of the active aggregator
occurs. This keeps the aggregator with the highest availability
(either in bandwidth or in number of ports) active at all times.
actor_port_prio or 3
The active aggregator is chosen by the highest total sum of
actor port priorities across its active ports. Note this
priority is actor_port_prio, not per port prio, which is
used for primary reselect.
The bandwidth, count and actor_port_prio selection policies permit
failover of 802.3ad aggregations when partial failure of the active
aggregator occurs. This keeps the aggregator with the highest
availability (either in bandwidth, number of ports, or total value
of port priorities) active at all times.
This option was added in bonding version 3.4.0.

View File

@@ -436,6 +436,7 @@ static void __ad_actor_update_port(struct port *port)
port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr;
port->actor_system_priority = BOND_AD_INFO(bond).system.sys_priority;
port->actor_port_priority = SLAVE_AD_INFO(port->slave)->port_priority;
}
/* Conversions */
@@ -746,6 +747,18 @@ static int __agg_active_ports(struct aggregator *agg)
return active;
}
static unsigned int __agg_ports_priority(const struct aggregator *agg)
{
struct port *port = agg->lag_ports;
unsigned int prio = 0;
for (; port; port = port->next_port_in_aggregator)
if (port->is_enabled)
prio += port->actor_port_priority;
return prio;
}
/**
* __get_agg_bandwidth - get the total bandwidth of an aggregator
* @aggregator: the aggregator we're looking at
@@ -1707,6 +1720,9 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,
* 4. Therefore, current and best both have partner replies or
* both do not, so perform selection policy:
*
* BOND_AD_PRIO: Select by total priority of ports. If priority
* is equal, select by count.
*
* BOND_AD_COUNT: Select by count of ports. If count is equal,
* select by bandwidth.
*
@@ -1728,6 +1744,14 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,
return best;
switch (__get_agg_selection_mode(curr->lag_ports)) {
case BOND_AD_PRIO:
if (__agg_ports_priority(curr) > __agg_ports_priority(best))
return curr;
if (__agg_ports_priority(curr) < __agg_ports_priority(best))
return best;
fallthrough;
case BOND_AD_COUNT:
if (__agg_active_ports(curr) > __agg_active_ports(best))
return curr;
@@ -1793,6 +1817,10 @@ static int agg_device_up(const struct aggregator *agg)
* (slaves), and reselect whenever a link state change takes place or the
* set of slaves in the bond changes.
*
* BOND_AD_PRIO: select the aggregator with highest total priority of ports
* (slaves), and reselect whenever a link state change takes place or the
* set of slaves in the bond changes.
*
* FIXME: this function MUST be called with the first agg in the bond, or
* __get_active_agg() won't work correctly. This function should be better
* called with the bond itself, and retrieve the first agg from it.
@@ -2209,6 +2237,9 @@ void bond_3ad_bind_slave(struct slave *slave)
ad_initialize_port(port, &bond->params);
/* Port priority is initialized. Update it to slave's ad info */
SLAVE_AD_INFO(slave)->port_priority = port->actor_port_priority;
port->slave = slave;
port->actor_port_number = SLAVE_AD_INFO(slave)->id;
/* key is determined according to the link speed, duplex and

View File

@@ -28,6 +28,7 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev,
nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */
nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */
nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */
nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_ACTOR_PORT_PRIO */
0;
}
@@ -77,6 +78,10 @@ static int bond_fill_slave_info(struct sk_buff *skb,
ad_port->partner_oper.port_state))
goto nla_put_failure;
}
if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO,
SLAVE_AD_INFO(slave)->port_priority))
goto nla_put_failure;
}
return 0;
@@ -130,6 +135,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
[IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 },
[IFLA_BOND_SLAVE_PRIO] = { .type = NLA_S32 },
[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO] = { .type = NLA_U16 },
};
static int bond_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -180,6 +186,16 @@ static int bond_slave_changelink(struct net_device *bond_dev,
return err;
}
if (data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]) {
u16 ad_prio = nla_get_u16(data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]);
bond_opt_slave_initval(&newval, &slave_dev, ad_prio);
err = __bond_opt_set(bond, BOND_OPT_ACTOR_PORT_PRIO, &newval,
data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO], extack);
if (err)
return err;
}
return 0;
}

View File

@@ -79,6 +79,8 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_ad_actor_sys_prio_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_actor_port_prio_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_ad_actor_system_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_ad_user_port_key_set(struct bonding *bond,
@@ -160,10 +162,11 @@ static const struct bond_opt_value bond_lacp_rate_tbl[] = {
};
static const struct bond_opt_value bond_ad_select_tbl[] = {
{ "stable", BOND_AD_STABLE, BOND_VALFLAG_DEFAULT},
{ "bandwidth", BOND_AD_BANDWIDTH, 0},
{ "count", BOND_AD_COUNT, 0},
{ NULL, -1, 0},
{ "stable", BOND_AD_STABLE, BOND_VALFLAG_DEFAULT},
{ "bandwidth", BOND_AD_BANDWIDTH, 0},
{ "count", BOND_AD_COUNT, 0},
{ "actor_port_prio", BOND_AD_PRIO, 0},
{ NULL, -1, 0},
};
static const struct bond_opt_value bond_num_peer_notif_tbl[] = {
@@ -222,6 +225,13 @@ static const struct bond_opt_value bond_ad_actor_sys_prio_tbl[] = {
{ NULL, -1, 0},
};
static const struct bond_opt_value bond_actor_port_prio_tbl[] = {
{ "minval", 0, BOND_VALFLAG_MIN},
{ "maxval", 65535, BOND_VALFLAG_MAX},
{ "default", 255, BOND_VALFLAG_DEFAULT},
{ NULL, -1, 0},
};
static const struct bond_opt_value bond_ad_user_port_key_tbl[] = {
{ "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT},
{ "maxval", 1023, BOND_VALFLAG_MAX},
@@ -483,6 +493,13 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
.values = bond_ad_actor_sys_prio_tbl,
.set = bond_option_ad_actor_sys_prio_set,
},
[BOND_OPT_ACTOR_PORT_PRIO] = {
.id = BOND_OPT_ACTOR_PORT_PRIO,
.name = "actor_port_prio",
.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)),
.values = bond_actor_port_prio_tbl,
.set = bond_option_actor_port_prio_set,
},
[BOND_OPT_AD_ACTOR_SYSTEM] = {
.id = BOND_OPT_AD_ACTOR_SYSTEM,
.name = "ad_actor_system",
@@ -1812,6 +1829,26 @@ static int bond_option_ad_actor_sys_prio_set(struct bonding *bond,
return 0;
}
static int bond_option_actor_port_prio_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
struct slave *slave;
slave = bond_slave_get_rtnl(newval->slave_dev);
if (!slave) {
netdev_dbg(bond->dev, "%s called on NULL slave\n", __func__);
return -ENODEV;
}
netdev_dbg(newval->slave_dev, "Setting actor_port_prio to %llu\n",
newval->value);
SLAVE_AD_INFO(slave)->port_priority = newval->value;
bond_3ad_update_ad_actor_settings(bond);
return 0;
}
static int bond_option_ad_actor_system_set(struct bonding *bond,
const struct bond_opt_value *newval)
{

View File

@@ -26,6 +26,7 @@ enum {
BOND_AD_STABLE = 0,
BOND_AD_BANDWIDTH = 1,
BOND_AD_COUNT = 2,
BOND_AD_PRIO = 3,
};
/* rx machine states(43.4.11 in the 802.3ad standard) */
@@ -274,6 +275,7 @@ struct ad_slave_info {
struct port port; /* 802.3ad port structure */
struct bond_3ad_stats stats;
u16 id;
u16 port_priority;
};
static inline const char *bond_3ad_churn_desc(churn_state_t state)

View File

@@ -78,6 +78,7 @@ enum {
BOND_OPT_PRIO,
BOND_OPT_COUPLED_CONTROL,
BOND_OPT_BROADCAST_NEIGH,
BOND_OPT_ACTOR_PORT_PRIO,
BOND_OPT_LAST
};

View File

@@ -1564,6 +1564,7 @@ enum {
IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
IFLA_BOND_SLAVE_PRIO,
IFLA_BOND_SLAVE_ACTOR_PORT_PRIO,
__IFLA_BOND_SLAVE_MAX,
};

View File

@@ -11,7 +11,8 @@ TEST_PROGS := \
bond_options.sh \
bond-eth-type-change.sh \
bond_macvlan_ipvlan.sh \
bond_passive_lacp.sh
bond_passive_lacp.sh \
bond_lacp_prio.sh
TEST_FILES := \
lag_lib.sh \

View File

@@ -0,0 +1,108 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Testing if bond lacp per port priority works
#
# Switch (s_ns) Backup Switch (b_ns)
# +-------------------------+ +-------------------------+
# | bond0 | | bond0 |
# | + | | + |
# | eth0 | eth1 | | eth0 | eth1 |
# | +---+---+ | | +---+---+ |
# | | | | | | | |
# +-------------------------+ +-------------------------+
# | | | |
# +-----------------------------------------------------+
# | | | | | |
# | +-------+---------+---------+-------+ |
# | eth0 eth1 | eth2 eth3 |
# | + |
# | bond0 |
# +-----------------------------------------------------+
# Client (c_ns)
lib_dir=$(dirname "$0")
# shellcheck disable=SC1091
source "$lib_dir"/../../../net/lib.sh
setup_links()
{
# shellcheck disable=SC2154
ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}"
ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}"
# shellcheck disable=SC2154
ip -n "${c_ns}" link add eth2 type veth peer name eth0 netns "${b_ns}"
ip -n "${c_ns}" link add eth3 type veth peer name eth1 netns "${b_ns}"
ip -n "${c_ns}" link add bond0 type bond mode 802.3ad miimon 100 \
lacp_rate fast ad_select actor_port_prio
ip -n "${s_ns}" link add bond0 type bond mode 802.3ad miimon 100 \
lacp_rate fast
ip -n "${b_ns}" link add bond0 type bond mode 802.3ad miimon 100 \
lacp_rate fast
ip -n "${c_ns}" link set eth0 master bond0
ip -n "${c_ns}" link set eth1 master bond0
ip -n "${c_ns}" link set eth2 master bond0
ip -n "${c_ns}" link set eth3 master bond0
ip -n "${s_ns}" link set eth0 master bond0
ip -n "${s_ns}" link set eth1 master bond0
ip -n "${b_ns}" link set eth0 master bond0
ip -n "${b_ns}" link set eth1 master bond0
ip -n "${c_ns}" link set bond0 up
ip -n "${s_ns}" link set bond0 up
ip -n "${b_ns}" link set bond0 up
}
test_port_prio_setting()
{
RET=0
ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 1000
prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" \
".[].linkinfo.info_slave_data.actor_port_prio")
[ "$prio" -ne 1000 ] && RET=1
ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 10
prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" \
".[].linkinfo.info_slave_data.actor_port_prio")
[ "$prio" -ne 10 ] && RET=1
}
test_agg_reselect()
{
local bond_agg_id slave_agg_id
local expect_slave="$1"
RET=0
# Trigger link state change to reselect the aggregator
ip -n "${c_ns}" link set eth1 down
sleep 0.5
ip -n "${c_ns}" link set eth1 up
sleep 0.5
bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" \
".[].linkinfo.info_data.ad_info.aggregator")
slave_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show $expect_slave" \
".[].linkinfo.info_slave_data.ad_aggregator_id")
# shellcheck disable=SC2034
[ "${bond_agg_id}" -ne "${slave_agg_id}" ] && \
RET=1
}
trap cleanup_all_ns EXIT
setup_ns c_ns s_ns b_ns
setup_links
test_port_prio_setting
log_test "bond 802.3ad" "actor_port_prio setting"
test_agg_reselect eth0
log_test "bond 802.3ad" "actor_port_prio select"
# Change the actor port prio and re-test
ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 10
ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 1000
test_agg_reselect eth2
log_test "bond 802.3ad" "actor_port_prio switch"
exit "${EXIT_STATUS}"

View File

@@ -571,30 +571,6 @@ wait_for_dev()
fi
}
cmd_jq()
{
local cmd=$1
local jq_exp=$2
local jq_opts=$3
local ret
local output
output="$($cmd)"
# it the command fails, return error right away
ret=$?
if [[ $ret -ne 0 ]]; then
return $ret
fi
output=$(echo $output | jq -r $jq_opts "$jq_exp")
ret=$?
if [[ $ret -ne 0 ]]; then
return $ret
fi
echo $output
# return success only in case of non-empty output
[ ! -z "$output" ]
}
pre_cleanup()
{
if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then

View File

@@ -645,3 +645,27 @@ wait_local_port_listen()
sleep 0.1
done
}
cmd_jq()
{
local cmd=$1
local jq_exp=$2
local jq_opts=$3
local ret
local output
output="$($cmd)"
# it the command fails, return error right away
ret=$?
if [[ $ret -ne 0 ]]; then
return $ret
fi
output=$(echo $output | jq -r $jq_opts "$jq_exp")
ret=$?
if [[ $ret -ne 0 ]]; then
return $ret
fi
echo $output
# return success only in case of non-empty output
[ ! -z "$output" ]
}