Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux

Tariq Toukan says:

====================
mlx5-next updates 2026-01-13

* 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux:
  net/mlx5: Add IFC bits for extended ETS rate limit bandwidth value
  net/mlx5: Add support for querying bond speed
  net/mlx5: Handle port and vport speed change events in MPESW
  net/mlx5: Propagate LAG effective max_tx_speed to vports
  net/mlx5: Add max_tx_speed and its CAP bit to IFC
====================

Link: https://patch.msgid.link/1768299471-1603093-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2026-01-13 17:46:19 -08:00
10 changed files with 395 additions and 6 deletions

View File

@@ -233,14 +233,25 @@ static void mlx5_ldev_free(struct kref *ref)
{
struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
struct net *net;
int i;
if (ldev->nb.notifier_call) {
net = read_pnet(&ldev->net);
unregister_netdevice_notifier_net(net, &ldev->nb);
}
mlx5_ldev_for_each(i, 0, ldev) {
if (ldev->pf[i].dev &&
ldev->pf[i].port_change_nb.nb.notifier_call) {
struct mlx5_nb *nb = &ldev->pf[i].port_change_nb;
mlx5_eq_notifier_unregister(ldev->pf[i].dev, nb);
}
}
mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
cancel_work_sync(&ldev->speed_update_work);
destroy_workqueue(ldev->wq);
mutex_destroy(&ldev->lock);
kfree(ldev);
@@ -274,6 +285,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
kref_init(&ldev->ref);
mutex_init(&ldev->lock);
INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
ldev->nb.notifier_call = mlx5_lag_netdev_event;
write_pnet(&ldev->net, mlx5_core_net(dev));
@@ -996,6 +1008,137 @@ static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
ldev->mode != MLX5_LAG_MODE_MPESW;
}
#ifdef CONFIG_MLX5_ESWITCH
static int
mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed,
int (*get_speed)(struct mlx5_core_dev *, u32 *))
{
struct mlx5_core_dev *pf_mdev;
int pf_idx;
u32 speed;
int ret;
*sum_speed = 0;
mlx5_ldev_for_each(pf_idx, 0, ldev) {
pf_mdev = ldev->pf[pf_idx].dev;
if (!pf_mdev)
continue;
ret = get_speed(pf_mdev, &speed);
if (ret) {
mlx5_core_dbg(pf_mdev,
"Failed to get device speed using %ps. Device %s speed is not available (err=%d)\n",
get_speed, dev_name(pf_mdev->device),
ret);
return ret;
}
*sum_speed += speed;
}
return 0;
}
static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed)
{
return mlx5_lag_sum_devices_speed(ldev, max_speed,
mlx5_port_max_linkspeed);
}
static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev,
u32 *oper_speed)
{
return mlx5_lag_sum_devices_speed(ldev, oper_speed,
mlx5_port_oper_linkspeed);
}
static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
u32 speed)
{
u16 op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
struct mlx5_eswitch *esw = mdev->priv.eswitch;
struct mlx5_vport *vport;
unsigned long i;
int ret;
if (!esw)
return;
if (!MLX5_CAP_ESW(mdev, esw_vport_state_max_tx_speed))
return;
mlx5_esw_for_each_vport(esw, i, vport) {
if (!vport)
continue;
if (vport->vport == MLX5_VPORT_UPLINK)
continue;
ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod,
vport->vport, true, speed);
if (ret)
mlx5_core_dbg(mdev,
"Failed to set vport %d speed %d, err=%d\n",
vport->vport, speed, ret);
}
}
void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *mdev;
u32 speed;
int pf_idx;
if (ldev->mode == MLX5_LAG_MODE_MPESW) {
if (mlx5_lag_sum_devices_oper_speed(ldev, &speed))
return;
} else {
speed = ldev->tracker.bond_speed_mbps;
if (speed == SPEED_UNKNOWN)
return;
}
/* If speed is not set, use the sum of max speeds of all PFs */
if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed))
return;
speed = speed / MLX5_MAX_TX_SPEED_UNIT;
mlx5_ldev_for_each(pf_idx, 0, ldev) {
mdev = ldev->pf[pf_idx].dev;
if (!mdev)
continue;
mlx5_lag_modify_device_vports_speed(mdev, speed);
}
}
void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *mdev;
u32 speed;
int pf_idx;
int ret;
mlx5_ldev_for_each(pf_idx, 0, ldev) {
mdev = ldev->pf[pf_idx].dev;
if (!mdev)
continue;
ret = mlx5_port_oper_linkspeed(mdev, &speed);
if (ret) {
mlx5_core_dbg(mdev,
"Failed to reset vports speed for device %s. Oper speed is not available (err=%d)\n",
dev_name(mdev->device), ret);
continue;
}
speed = speed / MLX5_MAX_TX_SPEED_UNIT;
mlx5_lag_modify_device_vports_speed(mdev, speed);
}
}
#endif
static void mlx5_do_bond(struct mlx5_lag *ldev)
{
int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
@@ -1083,9 +1226,12 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
ndev);
dev_put(ndev);
}
mlx5_lag_set_vports_agg_speed(ldev);
} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
mlx5_modify_lag(ldev, &tracker);
mlx5_lag_set_vports_agg_speed(ldev);
} else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
mlx5_lag_reset_vports_speed(ldev);
mlx5_disable_lag(ldev);
}
}
@@ -1286,6 +1432,65 @@ static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
return 1;
}
static void mlx5_lag_update_tracker_speed(struct lag_tracker *tracker,
struct net_device *ndev)
{
struct ethtool_link_ksettings lksettings;
struct net_device *bond_dev;
int err;
if (netif_is_lag_master(ndev))
bond_dev = ndev;
else
bond_dev = netdev_master_upper_dev_get(ndev);
if (!bond_dev) {
tracker->bond_speed_mbps = SPEED_UNKNOWN;
return;
}
err = __ethtool_get_link_ksettings(bond_dev, &lksettings);
if (err) {
netdev_dbg(bond_dev,
"Failed to get speed for bond dev %s, err=%d\n",
bond_dev->name, err);
tracker->bond_speed_mbps = SPEED_UNKNOWN;
return;
}
if (lksettings.base.speed == SPEED_UNKNOWN)
tracker->bond_speed_mbps = 0;
else
tracker->bond_speed_mbps = lksettings.base.speed;
}
/* Returns speed in Mbps. */
int mlx5_lag_query_bond_speed(struct mlx5_core_dev *mdev, u32 *speed)
{
struct mlx5_lag *ldev;
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(mdev);
if (!ldev) {
ret = -ENODEV;
goto unlock;
}
*speed = ldev->tracker.bond_speed_mbps;
if (*speed == SPEED_UNKNOWN) {
mlx5_core_dbg(mdev, "Bond speed is unknown\n");
ret = -EINVAL;
}
unlock:
spin_unlock_irqrestore(&lag_lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(mlx5_lag_query_bond_speed);
/* this handler is always registered to netdev events */
static int mlx5_lag_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
@@ -1317,6 +1522,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
break;
}
if (changed)
mlx5_lag_update_tracker_speed(&tracker, ndev);
ldev->tracker = tracker;
if (changed)
@@ -1362,6 +1570,10 @@ static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
ldev->pf[fn].dev = dev;
dev->priv.lag = ldev;
MLX5_NB_INIT(&ldev->pf[fn].port_change_nb,
mlx5_lag_mpesw_port_change_event, PORT_CHANGE);
mlx5_eq_notifier_register(dev, &ldev->pf[fn].port_change_nb);
}
static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
@@ -1373,6 +1585,9 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
if (ldev->pf[fn].dev != dev)
return;
if (ldev->pf[fn].port_change_nb.nb.notifier_call)
mlx5_eq_notifier_unregister(dev, &ldev->pf[fn].port_change_nb);
ldev->pf[fn].dev = NULL;
dev->priv.lag = NULL;
}

View File

@@ -39,6 +39,7 @@ struct lag_func {
struct mlx5_core_dev *dev;
struct net_device *netdev;
bool has_drop;
struct mlx5_nb port_change_nb;
};
/* Used for collection of netdev event info. */
@@ -48,6 +49,7 @@ struct lag_tracker {
unsigned int is_bonded:1;
unsigned int has_inactive:1;
enum netdev_lag_hash hash_type;
u32 bond_speed_mbps;
};
/* LAG data of a ConnectX card.
@@ -66,6 +68,7 @@ struct mlx5_lag {
struct lag_tracker tracker;
struct workqueue_struct *wq;
struct delayed_work bond_work;
struct work_struct speed_update_work;
struct notifier_block nb;
possible_net_t net;
struct lag_mp lag_mp;
@@ -116,6 +119,14 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev);
void mlx5_lag_add_devices(struct mlx5_lag *ldev);
struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev);
#ifdef CONFIG_MLX5_ESWITCH
void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev);
void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev);
#else
static inline void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev) {}
static inline void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev) {}
#endif
static inline bool mlx5_lag_is_supported(struct mlx5_core_dev *dev)
{
if (!MLX5_CAP_GEN(dev, vport_group_manager) ||

View File

@@ -110,6 +110,8 @@ static int enable_mpesw(struct mlx5_lag *ldev)
goto err_rescan_drivers;
}
mlx5_lag_set_vports_agg_speed(ldev);
return 0;
err_rescan_drivers:
@@ -223,3 +225,40 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev)
return ldev && ldev->mode == MLX5_LAG_MODE_MPESW;
}
EXPORT_SYMBOL(mlx5_lag_is_mpesw);
void mlx5_mpesw_speed_update_work(struct work_struct *work)
{
struct mlx5_lag *ldev = container_of(work, struct mlx5_lag,
speed_update_work);
mutex_lock(&ldev->lock);
if (ldev->mode == MLX5_LAG_MODE_MPESW) {
if (ldev->mode_changes_in_progress)
queue_work(ldev->wq, &ldev->speed_update_work);
else
mlx5_lag_set_vports_agg_speed(ldev);
}
mutex_unlock(&ldev->lock);
}
int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
unsigned long event, void *data)
{
struct mlx5_nb *mlx5_nb = container_of(nb, struct mlx5_nb, nb);
struct lag_func *lag_func = container_of(mlx5_nb,
struct lag_func,
port_change_nb);
struct mlx5_core_dev *dev = lag_func->dev;
struct mlx5_lag *ldev = dev->priv.lag;
struct mlx5_eqe *eqe = data;
if (!ldev)
return NOTIFY_DONE;
if (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_DOWN ||
eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE)
queue_work(ldev->wq, &ldev->speed_update_work);
return NOTIFY_OK;
}

View File

@@ -32,4 +32,18 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev);
void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev);
int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev);
#ifdef CONFIG_MLX5_ESWITCH
void mlx5_mpesw_speed_update_work(struct work_struct *work);
int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
unsigned long event, void *data);
#else
static inline void mlx5_mpesw_speed_update_work(struct work_struct *work) {}
static inline int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
unsigned long event,
void *data)
{
return NOTIFY_DONE;
}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_LAG_MPESW_H__ */

View File

@@ -381,6 +381,7 @@ const struct mlx5_link_info *mlx5_port_ptys2info(struct mlx5_core_dev *mdev,
u32 mlx5_port_info2linkmodes(struct mlx5_core_dev *mdev,
struct mlx5_link_info *info,
bool force_legacy);
int mlx5_port_oper_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \

View File

@@ -1203,6 +1203,30 @@ u32 mlx5_port_info2linkmodes(struct mlx5_core_dev *mdev,
return link_modes;
}
int mlx5_port_oper_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
{
const struct mlx5_link_info *table;
struct mlx5_port_eth_proto eproto;
u32 oper_speed = 0;
u32 max_size;
bool ext;
int err;
int i;
ext = mlx5_ptys_ext_supported(mdev);
err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
if (err)
return err;
mlx5e_port_get_link_mode_info_arr(mdev, &table, &max_size, false);
for (i = 0; i < max_size; ++i)
if (eproto.oper & MLX5E_PROT_MASK(i))
oper_speed = max(oper_speed, table[i].speed);
*speed = oper_speed;
return 0;
}
int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
{
const struct mlx5_link_info *table;

View File

@@ -62,6 +62,28 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
return MLX5_GET(query_vport_state_out, out, state);
}
static int mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
u16 vport, u8 other_vport,
u8 *admin_state)
{
u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
int err;
MLX5_SET(query_vport_state_in, in, opcode,
MLX5_CMD_OP_QUERY_VPORT_STATE);
MLX5_SET(query_vport_state_in, in, op_mod, opmod);
MLX5_SET(query_vport_state_in, in, vport_number, vport);
MLX5_SET(query_vport_state_in, in, other_vport, other_vport);
err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
if (err)
return err;
*admin_state = MLX5_GET(query_vport_state_out, out, admin_state);
return 0;
}
int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
u16 vport, u8 other_vport, u8 state)
{
@@ -77,6 +99,58 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
return mlx5_cmd_exec_in(mdev, modify_vport_state, in);
}
int mlx5_modify_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 opmod,
u16 vport, u8 other_vport, u16 max_tx_speed)
{
u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {};
u8 admin_state;
int err;
err = mlx5_query_vport_admin_state(mdev, opmod, vport, other_vport,
&admin_state);
if (err)
return err;
MLX5_SET(modify_vport_state_in, in, opcode,
MLX5_CMD_OP_MODIFY_VPORT_STATE);
MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
MLX5_SET(modify_vport_state_in, in, vport_number, vport);
MLX5_SET(modify_vport_state_in, in, other_vport, other_vport);
MLX5_SET(modify_vport_state_in, in, admin_state, admin_state);
MLX5_SET(modify_vport_state_in, in, max_tx_speed, max_tx_speed);
return mlx5_cmd_exec_in(mdev, modify_vport_state, in);
}
int mlx5_query_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 op_mod,
u16 vport, u8 other_vport, u32 *max_tx_speed)
{
u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
u32 state;
int err;
MLX5_SET(query_vport_state_in, in, opcode,
MLX5_CMD_OP_QUERY_VPORT_STATE);
MLX5_SET(query_vport_state_in, in, op_mod, op_mod);
MLX5_SET(query_vport_state_in, in, vport_number, vport);
MLX5_SET(query_vport_state_in, in, other_vport, other_vport);
err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
if (err)
return err;
state = MLX5_GET(query_vport_state_out, out, state);
if (state == VPORT_STATE_DOWN) {
*max_tx_speed = 0;
return 0;
}
*max_tx_speed = MLX5_GET(query_vport_state_out, out, max_tx_speed);
return 0;
}
EXPORT_SYMBOL_GPL(mlx5_query_vport_max_tx_speed);
static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
bool other_vport, u32 *out)
{

View File

@@ -1149,6 +1149,7 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
int mlx5_lag_query_bond_speed(struct mlx5_core_dev *dev, u32 *speed);
bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev);
bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);

View File

@@ -1071,7 +1071,9 @@ struct mlx5_ifc_e_switch_cap_bits {
u8 esw_shared_ingress_acl[0x1];
u8 esw_uplink_ingress_acl[0x1];
u8 root_ft_on_other_esw[0x1];
u8 reserved_at_a[0xf];
u8 reserved_at_a[0x1];
u8 esw_vport_state_max_tx_speed[0x1];
u8 reserved_at_c[0xd];
u8 esw_functions_changed[0x1];
u8 reserved_at_1a[0x1];
u8 ecpf_vport_exists[0x1];
@@ -5445,7 +5447,8 @@ struct mlx5_ifc_query_vport_state_out_bits {
u8 reserved_at_40[0x20];
u8 reserved_at_60[0x18];
u8 max_tx_speed[0x10];
u8 reserved_at_70[0x8];
u8 admin_state[0x4];
u8 state[0x4];
};
@@ -7778,7 +7781,7 @@ struct mlx5_ifc_modify_vport_state_in_bits {
u8 reserved_at_41[0xf];
u8 vport_number[0x10];
u8 reserved_at_60[0x10];
u8 max_tx_speed[0x10];
u8 ingress_connect[0x1];
u8 egress_connect[0x1];
u8 ingress_connect_valid[0x1];
@@ -11006,7 +11009,9 @@ struct mlx5_ifc_qcam_access_reg_cap_mask {
};
struct mlx5_ifc_qcam_qos_feature_cap_mask {
u8 qcam_qos_feature_cap_mask_127_to_1[0x7F];
u8 qcam_qos_feature_cap_mask_127_to_5[0x7B];
u8 qetcr_qshr_max_bw_val_msb[0x1];
u8 qcam_qos_feature_cap_mask_3_to_1[0x3];
u8 qpts_trust_both[0x1];
};
@@ -11962,8 +11967,7 @@ struct mlx5_ifc_ets_tcn_config_reg_bits {
u8 reserved_at_20[0xc];
u8 max_bw_units[0x4];
u8 reserved_at_30[0x8];
u8 max_bw_value[0x8];
u8 max_bw_value[0x10];
};
struct mlx5_ifc_ets_global_config_reg_bits {

View File

@@ -41,6 +41,8 @@
(MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
mlx5_core_is_pf(mdev))
#define MLX5_MAX_TX_SPEED_UNIT 100
enum {
MLX5_CAP_INLINE_MODE_L2,
MLX5_CAP_INLINE_MODE_VPORT_CONTEXT,
@@ -58,6 +60,10 @@ enum {
u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
u16 vport, u8 other_vport, u8 state);
int mlx5_query_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 op_mod,
u16 vport, u8 other_vport, u32 *max_tx_speed);
int mlx5_modify_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 opmod,
u16 vport, u8 other_vport, u16 max_tx_speed);
int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
u16 vport, bool other, u8 *addr);
int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr);