Merge branch 'mlx5-misc-fixes-2025-08-25'

Mark Bloch says:

====================
mlx5 misc fixes 2025-08-25

This patchset provides misc bug fixes from the team to the mlx5 core
and Eth drivers.

v1: https://lore.kernel.org/20250824083944.523858-1-mbloch@nvidia.com
====================

Link: https://patch.msgid.link/20250825143435.598584-1-mbloch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-08-26 18:00:29 -07:00
12 changed files with 139 additions and 70 deletions

View File

@@ -160,7 +160,7 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli
if (err)
return err;
mlx5_unload_one_devl_locked(dev, true);
mlx5_sync_reset_unload_flow(dev, true);
err = mlx5_health_wait_pci_up(dev);
if (err)
NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset");

View File

@@ -575,7 +575,6 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (err)
return err;
}
priv->dcbx.xoff = xoff;
/* Apply the settings */
if (update_buffer) {
@@ -584,6 +583,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
return err;
}
priv->dcbx.xoff = xoff;
if (update_prio2buffer)
err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer);

View File

@@ -66,11 +66,23 @@ struct mlx5e_port_buffer {
struct mlx5e_bufferx_reg buffer[MLX5E_MAX_NETWORK_BUFFER];
};
#ifdef CONFIG_MLX5_CORE_EN_DCB
int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
u32 change, unsigned int mtu,
struct ieee_pfc *pfc,
u32 *buffer_size,
u8 *prio2buffer);
#else
static inline int
mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
u32 change, unsigned int mtu,
void *pfc,
u32 *buffer_size,
u8 *prio2buffer)
{
return 0;
}
#endif
int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
struct mlx5e_port_buffer *port_buffer);

View File

@@ -49,6 +49,7 @@
#include "en.h"
#include "en/dim.h"
#include "en/txrx.h"
#include "en/port_buffer.h"
#include "en_tc.h"
#include "en_rep.h"
#include "en_accel/ipsec.h"
@@ -138,6 +139,8 @@ void mlx5e_update_carrier(struct mlx5e_priv *priv)
if (up) {
netdev_info(priv->netdev, "Link up\n");
netif_carrier_on(priv->netdev);
mlx5e_port_manual_buffer_config(priv, 0, priv->netdev->mtu,
NULL, NULL, NULL);
} else {
netdev_info(priv->netdev, "Link down\n");
netif_carrier_off(priv->netdev);
@@ -3040,9 +3043,11 @@ int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
struct mlx5e_params *params = &priv->channels.params;
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
u16 mtu;
u16 mtu, prev_mtu;
int err;
mlx5e_query_mtu(mdev, params, &prev_mtu);
err = mlx5e_set_mtu(mdev, params, params->sw_mtu);
if (err)
return err;
@@ -3052,6 +3057,18 @@ int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n",
__func__, mtu, params->sw_mtu);
if (mtu != prev_mtu && MLX5_BUFFER_SUPPORTED(mdev)) {
err = mlx5e_port_manual_buffer_config(priv, 0, mtu,
NULL, NULL, NULL);
if (err) {
netdev_warn(netdev, "%s: Failed to set Xon/Xoff values with MTU %d (err %d), setting back to previous MTU %d\n",
__func__, mtu, err, prev_mtu);
mlx5e_set_mtu(mdev, params, prev_mtu);
return err;
}
}
params->sw_mtu = mtu;
return 0;
}

View File

@@ -3734,6 +3734,13 @@ static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id,
char *value = val.vstr;
u8 eswitch_mode;
eswitch_mode = mlx5_eswitch_mode(dev);
if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) {
NL_SET_ERR_MSG_FMT_MOD(extack,
"Changing fs mode is not supported when eswitch offloads enabled.");
return -EOPNOTSUPP;
}
if (!strcmp(value, "dmfs"))
return 0;
@@ -3759,14 +3766,6 @@ static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id,
return -EINVAL;
}
eswitch_mode = mlx5_eswitch_mode(dev);
if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) {
NL_SET_ERR_MSG_FMT_MOD(extack,
"Moving to %s is not supported when eswitch offloads enabled.",
value);
return -EOPNOTSUPP;
}
return 0;
}

View File

@@ -6,13 +6,15 @@
#include "fw_reset.h"
#include "diag/fw_tracer.h"
#include "lib/tout.h"
#include "sf/sf.h"
enum {
MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
MLX5_FW_RESET_FLAGS_PENDING_COMP,
MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED
MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED,
MLX5_FW_RESET_FLAGS_UNLOAD_EVENT,
};
struct mlx5_fw_reset {
@@ -219,7 +221,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false);
}
static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded)
static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
struct devlink *devlink = priv_to_devlink(dev);
@@ -228,8 +230,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unload
if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
complete(&fw_reset->done);
} else {
if (!unloaded)
mlx5_unload_one(dev, false);
mlx5_sync_reset_unload_flow(dev, false);
if (mlx5_health_wait_pci_up(dev))
mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
else
@@ -272,7 +273,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work)
mlx5_sync_reset_clear_reset_requested(dev, false);
mlx5_enter_error_state(dev, true);
mlx5_fw_reset_complete_reload(dev, false);
mlx5_fw_reset_complete_reload(dev);
}
#define MLX5_RESET_POLL_INTERVAL (HZ / 10)
@@ -428,6 +429,11 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev,
return false;
}
if (!mlx5_core_is_ecpf(dev) && !mlx5_sf_table_empty(dev)) {
mlx5_core_warn(dev, "SFs should be removed before reset\n");
return false;
}
#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)
if (reset_method != MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET) {
err = mlx5_check_hotplug_interrupt(dev, bridge);
@@ -586,65 +592,23 @@ static int mlx5_sync_pci_reset(struct mlx5_core_dev *dev, u8 reset_method)
return err;
}
static void mlx5_sync_reset_now_event(struct work_struct *work)
void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked)
{
struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
reset_now_work);
struct mlx5_core_dev *dev = fw_reset->dev;
int err;
if (mlx5_sync_reset_clear_reset_requested(dev, false))
return;
mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
err = mlx5_cmd_fast_teardown_hca(dev);
if (err) {
mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err);
goto done;
}
err = mlx5_sync_pci_reset(dev, fw_reset->reset_method);
if (err) {
mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err);
set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags);
}
mlx5_enter_error_state(dev, true);
done:
fw_reset->ret = err;
mlx5_fw_reset_complete_reload(dev, false);
}
static void mlx5_sync_reset_unload_event(struct work_struct *work)
{
struct mlx5_fw_reset *fw_reset;
struct mlx5_core_dev *dev;
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
unsigned long timeout;
int poll_freq = 20;
bool reset_action;
u8 rst_state;
int err;
fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work);
dev = fw_reset->dev;
if (mlx5_sync_reset_clear_reset_requested(dev, false))
return;
mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n");
err = mlx5_cmd_fast_teardown_hca(dev);
if (err)
mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err);
else
mlx5_enter_error_state(dev, true);
if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags))
if (locked)
mlx5_unload_one_devl_locked(dev, false);
else
mlx5_unload_one(dev, false);
if (!test_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags))
return;
mlx5_set_fw_rst_ack(dev);
mlx5_core_warn(dev, "Sync Reset Unload done, device reset expected\n");
@@ -672,17 +636,73 @@ static void mlx5_sync_reset_unload_event(struct work_struct *work)
goto done;
}
mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state);
mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n",
rst_state);
if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) {
err = mlx5_sync_pci_reset(dev, fw_reset->reset_method);
if (err) {
mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", err);
mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n",
err);
fw_reset->ret = err;
}
}
done:
mlx5_fw_reset_complete_reload(dev, true);
clear_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags);
}
static void mlx5_sync_reset_now_event(struct work_struct *work)
{
struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
reset_now_work);
struct mlx5_core_dev *dev = fw_reset->dev;
int err;
if (mlx5_sync_reset_clear_reset_requested(dev, false))
return;
mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
err = mlx5_cmd_fast_teardown_hca(dev);
if (err) {
mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err);
goto done;
}
err = mlx5_sync_pci_reset(dev, fw_reset->reset_method);
if (err) {
mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err);
set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags);
}
mlx5_enter_error_state(dev, true);
done:
fw_reset->ret = err;
mlx5_fw_reset_complete_reload(dev);
}
static void mlx5_sync_reset_unload_event(struct work_struct *work)
{
struct mlx5_fw_reset *fw_reset;
struct mlx5_core_dev *dev;
int err;
fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work);
dev = fw_reset->dev;
if (mlx5_sync_reset_clear_reset_requested(dev, false))
return;
set_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags);
mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n");
err = mlx5_cmd_fast_teardown_hca(dev);
if (err)
mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err);
else
mlx5_enter_error_state(dev, true);
mlx5_fw_reset_complete_reload(dev);
}
static void mlx5_sync_reset_abort_event(struct work_struct *work)

View File

@@ -12,6 +12,7 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked);
int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev,
struct netlink_ext_ack *extack);
void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);

View File

@@ -518,3 +518,13 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev)
WARN_ON(!xa_empty(&table->function_ids));
kfree(table);
}
bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev)
{
struct mlx5_sf_table *table = dev->priv.sf_table;
if (!table)
return true;
return xa_empty(&table->function_ids);
}

View File

@@ -17,6 +17,7 @@ void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev);
int mlx5_sf_table_init(struct mlx5_core_dev *dev);
void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev);
bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev);
int mlx5_devlink_sf_port_new(struct devlink *devlink,
const struct devlink_port_new_attrs *add_attr,
@@ -61,6 +62,11 @@ static inline void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev)
{
}
static inline bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev)
{
return true;
}
#endif
#endif

View File

@@ -117,7 +117,7 @@ static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx,
mlx5hws_err(ctx, "No such stc_type: %d\n", stc_type);
pr_warn("HWS: Invalid stc_type: %d\n", stc_type);
ret = -EINVAL;
goto unlock_and_out;
goto free_shared_stc;
}
ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl_type,

View File

@@ -279,7 +279,7 @@ int mlx5hws_pat_get_pattern(struct mlx5hws_context *ctx,
return ret;
clean_pattern:
mlx5hws_cmd_header_modify_pattern_destroy(ctx->mdev, *pattern_id);
mlx5hws_cmd_header_modify_pattern_destroy(ctx->mdev, ptrn_id);
out_unlock:
mutex_unlock(&ctx->pattern_cache->lock);
return ret;
@@ -527,7 +527,6 @@ int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions,
u32 *nop_locations, __be64 *new_pat)
{
u16 prev_src_field = INVALID_FIELD, prev_dst_field = INVALID_FIELD;
u16 src_field, dst_field;
u8 action_type;
bool dependent;
size_t i, j;
@@ -539,6 +538,9 @@ int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions,
return 0;
for (i = 0, j = 0; i < num_actions; i++, j++) {
u16 src_field = INVALID_FIELD;
u16 dst_field = INVALID_FIELD;
if (j >= max_actions)
return -EINVAL;

View File

@@ -124,6 +124,7 @@ static int hws_pool_buddy_init(struct mlx5hws_pool *pool)
mlx5hws_err(pool->ctx, "Failed to create resource type: %d size %zu\n",
pool->type, pool->alloc_log_sz);
mlx5hws_buddy_cleanup(buddy);
kfree(buddy);
return -ENOMEM;
}