diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index a2b42ae719d2..e700bf1d095c 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -193,6 +193,15 @@ ad_actor_sys_prio This parameter has effect only in 802.3ad mode and is available through SysFs interface. +actor_port_prio + + In an AD system, this specifies the port priority. The allowed range + is 1 - 65535. If the value is not specified, it takes 255 as the + default value. + + This parameter has effect only in 802.3ad mode and is available through + netlink interface. + ad_actor_system In an AD system, this specifies the mac-address for the actor in @@ -241,10 +250,18 @@ ad_select ports (slaves). Reselection occurs as described under the "bandwidth" setting, above. - The bandwidth and count selection policies permit failover of - 802.3ad aggregations when partial failure of the active aggregator - occurs. This keeps the aggregator with the highest availability - (either in bandwidth or in number of ports) active at all times. + actor_port_prio or 3 + + The active aggregator is chosen by the highest total sum of + actor port priorities across its active ports. Note this + priority is actor_port_prio, not per port prio, which is + used for primary reselect. + + The bandwidth, count and actor_port_prio selection policies permit + failover of 802.3ad aggregations when partial failure of the active + aggregator occurs. This keeps the aggregator with the highest + availability (either in bandwidth, number of ports, or total value + of port priorities) active at all times. This option was added in bonding version 3.4.0. diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 4edc8e6b6b64..49717b7b82a2 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -436,6 +436,7 @@ static void __ad_actor_update_port(struct port *port) port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; port->actor_system_priority = BOND_AD_INFO(bond).system.sys_priority; + port->actor_port_priority = SLAVE_AD_INFO(port->slave)->port_priority; } /* Conversions */ @@ -746,6 +747,18 @@ static int __agg_active_ports(struct aggregator *agg) return active; } +static unsigned int __agg_ports_priority(const struct aggregator *agg) +{ + struct port *port = agg->lag_ports; + unsigned int prio = 0; + + for (; port; port = port->next_port_in_aggregator) + if (port->is_enabled) + prio += port->actor_port_priority; + + return prio; +} + /** * __get_agg_bandwidth - get the total bandwidth of an aggregator * @aggregator: the aggregator we're looking at @@ -1707,6 +1720,9 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best, * 4. Therefore, current and best both have partner replies or * both do not, so perform selection policy: * + * BOND_AD_PRIO: Select by total priority of ports. If priority + * is equal, select by count. + * * BOND_AD_COUNT: Select by count of ports. If count is equal, * select by bandwidth. * @@ -1728,6 +1744,14 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best, return best; switch (__get_agg_selection_mode(curr->lag_ports)) { + case BOND_AD_PRIO: + if (__agg_ports_priority(curr) > __agg_ports_priority(best)) + return curr; + + if (__agg_ports_priority(curr) < __agg_ports_priority(best)) + return best; + + fallthrough; case BOND_AD_COUNT: if (__agg_active_ports(curr) > __agg_active_ports(best)) return curr; @@ -1793,6 +1817,10 @@ static int agg_device_up(const struct aggregator *agg) * (slaves), and reselect whenever a link state change takes place or the * set of slaves in the bond changes. * + * BOND_AD_PRIO: select the aggregator with highest total priority of ports + * (slaves), and reselect whenever a link state change takes place or the + * set of slaves in the bond changes. + * * FIXME: this function MUST be called with the first agg in the bond, or * __get_active_agg() won't work correctly. This function should be better * called with the bond itself, and retrieve the first agg from it. @@ -2209,6 +2237,9 @@ void bond_3ad_bind_slave(struct slave *slave) ad_initialize_port(port, &bond->params); + /* Port priority is initialized. Update it to slave's ad info */ + SLAVE_AD_INFO(slave)->port_priority = port->actor_port_priority; + port->slave = slave; port->actor_port_number = SLAVE_AD_INFO(slave)->id; /* key is determined according to the link speed, duplex and diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index e573b34a1bbc..ba71d95a82d2 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -28,6 +28,7 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev, nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */ nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */ + nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_ACTOR_PORT_PRIO */ 0; } @@ -77,6 +78,10 @@ static int bond_fill_slave_info(struct sk_buff *skb, ad_port->partner_oper.port_state)) goto nla_put_failure; } + + if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, + SLAVE_AD_INFO(slave)->port_priority)) + goto nla_put_failure; } return 0; @@ -130,6 +135,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { [IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 }, [IFLA_BOND_SLAVE_PRIO] = { .type = NLA_S32 }, + [IFLA_BOND_SLAVE_ACTOR_PORT_PRIO] = { .type = NLA_U16 }, }; static int bond_validate(struct nlattr *tb[], struct nlattr *data[], @@ -180,6 +186,16 @@ static int bond_slave_changelink(struct net_device *bond_dev, return err; } + if (data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]) { + u16 ad_prio = nla_get_u16(data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]); + + bond_opt_slave_initval(&newval, &slave_dev, ad_prio); + err = __bond_opt_set(bond, BOND_OPT_ACTOR_PORT_PRIO, &newval, + data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO], extack); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index c0a5eb8766b5..5b275cb266bc 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -79,6 +79,8 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_actor_port_prio_set(struct bonding *bond, + const struct bond_opt_value *newval); static int bond_option_ad_actor_system_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_user_port_key_set(struct bonding *bond, @@ -160,10 +162,11 @@ static const struct bond_opt_value bond_lacp_rate_tbl[] = { }; static const struct bond_opt_value bond_ad_select_tbl[] = { - { "stable", BOND_AD_STABLE, BOND_VALFLAG_DEFAULT}, - { "bandwidth", BOND_AD_BANDWIDTH, 0}, - { "count", BOND_AD_COUNT, 0}, - { NULL, -1, 0}, + { "stable", BOND_AD_STABLE, BOND_VALFLAG_DEFAULT}, + { "bandwidth", BOND_AD_BANDWIDTH, 0}, + { "count", BOND_AD_COUNT, 0}, + { "actor_port_prio", BOND_AD_PRIO, 0}, + { NULL, -1, 0}, }; static const struct bond_opt_value bond_num_peer_notif_tbl[] = { @@ -222,6 +225,13 @@ static const struct bond_opt_value bond_ad_actor_sys_prio_tbl[] = { { NULL, -1, 0}, }; +static const struct bond_opt_value bond_actor_port_prio_tbl[] = { + { "minval", 0, BOND_VALFLAG_MIN}, + { "maxval", 65535, BOND_VALFLAG_MAX}, + { "default", 255, BOND_VALFLAG_DEFAULT}, + { NULL, -1, 0}, +}; + static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, { "maxval", 1023, BOND_VALFLAG_MAX}, @@ -483,6 +493,13 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_ad_actor_sys_prio_tbl, .set = bond_option_ad_actor_sys_prio_set, }, + [BOND_OPT_ACTOR_PORT_PRIO] = { + .id = BOND_OPT_ACTOR_PORT_PRIO, + .name = "actor_port_prio", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .values = bond_actor_port_prio_tbl, + .set = bond_option_actor_port_prio_set, + }, [BOND_OPT_AD_ACTOR_SYSTEM] = { .id = BOND_OPT_AD_ACTOR_SYSTEM, .name = "ad_actor_system", @@ -1812,6 +1829,26 @@ static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, return 0; } +static int bond_option_actor_port_prio_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + struct slave *slave; + + slave = bond_slave_get_rtnl(newval->slave_dev); + if (!slave) { + netdev_dbg(bond->dev, "%s called on NULL slave\n", __func__); + return -ENODEV; + } + + netdev_dbg(newval->slave_dev, "Setting actor_port_prio to %llu\n", + newval->value); + + SLAVE_AD_INFO(slave)->port_priority = newval->value; + bond_3ad_update_ad_actor_settings(bond); + + return 0; +} + static int bond_option_ad_actor_system_set(struct bonding *bond, const struct bond_opt_value *newval) { diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index dba369a2cf27..c92d4a976246 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -26,6 +26,7 @@ enum { BOND_AD_STABLE = 0, BOND_AD_BANDWIDTH = 1, BOND_AD_COUNT = 2, + BOND_AD_PRIO = 3, }; /* rx machine states(43.4.11 in the 802.3ad standard) */ @@ -274,6 +275,7 @@ struct ad_slave_info { struct port port; /* 802.3ad port structure */ struct bond_3ad_stats stats; u16 id; + u16 port_priority; }; static inline const char *bond_3ad_churn_desc(churn_state_t state) diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 022b122a9fb6..e6eedf23aea1 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -78,6 +78,7 @@ enum { BOND_OPT_PRIO, BOND_OPT_COUPLED_CONTROL, BOND_OPT_BROADCAST_NEIGH, + BOND_OPT_ACTOR_PORT_PRIO, BOND_OPT_LAST }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 784ace3a519c..45f56c9f95d9 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1564,6 +1564,7 @@ enum { IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, IFLA_BOND_SLAVE_PRIO, + IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, __IFLA_BOND_SLAVE_MAX, }; diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 44b98f17f8ff..3462783ed3ac 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -11,7 +11,8 @@ TEST_PROGS := \ bond_options.sh \ bond-eth-type-change.sh \ bond_macvlan_ipvlan.sh \ - bond_passive_lacp.sh + bond_passive_lacp.sh \ + bond_lacp_prio.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh new file mode 100755 index 000000000000..a483d505c6a8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing if bond lacp per port priority works +# +# Switch (s_ns) Backup Switch (b_ns) +# +-------------------------+ +-------------------------+ +# | bond0 | | bond0 | +# | + | | + | +# | eth0 | eth1 | | eth0 | eth1 | +# | +---+---+ | | +---+---+ | +# | | | | | | | | +# +-------------------------+ +-------------------------+ +# | | | | +# +-----------------------------------------------------+ +# | | | | | | +# | +-------+---------+---------+-------+ | +# | eth0 eth1 | eth2 eth3 | +# | + | +# | bond0 | +# +-----------------------------------------------------+ +# Client (c_ns) + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh + +setup_links() +{ + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" + ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth2 type veth peer name eth0 netns "${b_ns}" + ip -n "${c_ns}" link add eth3 type veth peer name eth1 netns "${b_ns}" + + ip -n "${c_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast ad_select actor_port_prio + ip -n "${s_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast + ip -n "${b_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast + + ip -n "${c_ns}" link set eth0 master bond0 + ip -n "${c_ns}" link set eth1 master bond0 + ip -n "${c_ns}" link set eth2 master bond0 + ip -n "${c_ns}" link set eth3 master bond0 + ip -n "${s_ns}" link set eth0 master bond0 + ip -n "${s_ns}" link set eth1 master bond0 + ip -n "${b_ns}" link set eth0 master bond0 + ip -n "${b_ns}" link set eth1 master bond0 + + ip -n "${c_ns}" link set bond0 up + ip -n "${s_ns}" link set bond0 up + ip -n "${b_ns}" link set bond0 up +} + +test_port_prio_setting() +{ + RET=0 + ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 1000 + prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" \ + ".[].linkinfo.info_slave_data.actor_port_prio") + [ "$prio" -ne 1000 ] && RET=1 + ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 10 + prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" \ + ".[].linkinfo.info_slave_data.actor_port_prio") + [ "$prio" -ne 10 ] && RET=1 +} + +test_agg_reselect() +{ + local bond_agg_id slave_agg_id + local expect_slave="$1" + RET=0 + + # Trigger link state change to reselect the aggregator + ip -n "${c_ns}" link set eth1 down + sleep 0.5 + ip -n "${c_ns}" link set eth1 up + sleep 0.5 + + bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" \ + ".[].linkinfo.info_data.ad_info.aggregator") + slave_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show $expect_slave" \ + ".[].linkinfo.info_slave_data.ad_aggregator_id") + # shellcheck disable=SC2034 + [ "${bond_agg_id}" -ne "${slave_agg_id}" ] && \ + RET=1 +} + +trap cleanup_all_ns EXIT +setup_ns c_ns s_ns b_ns +setup_links + +test_port_prio_setting +log_test "bond 802.3ad" "actor_port_prio setting" + +test_agg_reselect eth0 +log_test "bond 802.3ad" "actor_port_prio select" + +# Change the actor port prio and re-test +ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 10 +ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 1000 +test_agg_reselect eth2 +log_test "bond 802.3ad" "actor_port_prio switch" + +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 890b3374dacd..08121cb9dc26 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -571,30 +571,6 @@ wait_for_dev() fi } -cmd_jq() -{ - local cmd=$1 - local jq_exp=$2 - local jq_opts=$3 - local ret - local output - - output="$($cmd)" - # it the command fails, return error right away - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - output=$(echo $output | jq -r $jq_opts "$jq_exp") - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - echo $output - # return success only in case of non-empty output - [ ! -z "$output" ] -} - pre_cleanup() { if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index c7add0dc4c60..4dca6893aa8a 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -645,3 +645,27 @@ wait_local_port_listen() sleep 0.1 done } + +cmd_jq() +{ + local cmd=$1 + local jq_exp=$2 + local jq_opts=$3 + local ret + local output + + output="$($cmd)" + # it the command fails, return error right away + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + output=$(echo $output | jq -r $jq_opts "$jq_exp") + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + echo $output + # return success only in case of non-empty output + [ ! -z "$output" ] +}