net/mlx5: Add support for device steering tag

Background, from PCIe specification 6.2.

TLP Processing Hints (TPH)
--------------------------
TLP Processing Hints is an optional feature that provides hints in
Request TLP headers to facilitate optimized processing of Requests that
target Memory Space. These Processing Hints enable the system hardware
(e.g., the Root Complex and/or Endpoints) to optimize platform
resources such as system and memory interconnect on a per TLP basis.
Steering Tags are system-specific values used to identify a processing
resource that a Requester explicitly targets. System software discovers
and identifies TPH capabilities to determine the Steering Tag allocation
for each Function that supports TPH.

This patch adds steering tag support for mlx5 based NICs by:

- Enabling the TPH functionality over PCI if both FW and OS support it.
- Managing steering tags and their matching steering indexes by
  writing a ST to an ST index over the PCI configuration space.
- Exposing APIs to upper layers (e.g.,mlx5_ib) to allow usage of
  the PCI TPH infrastructure.

Further details:
- Upon probing of a device, the feature will be enabled based
  on both capability detection and OS support.

- It will retrieve the appropriate ST for a given CPU ID and memory
  type using the pcie_tph_get_cpu_st() API.

- It will track available ST indices according to the configuration
  space table size (expected to be 63 entries), reserving index 0 to
  indicate non-TPH use.

- It will assign a free ST index with a ST using the
  pcie_tph_set_st_entry() API.

- It will reuse the same index for identical (CPU ID + memory type)
  combinations by maintaining a reference count per entry.

- It will expose APIs to upper layers (e.g., mlx5_ib) to allow usage of
  the PCI TPH infrastructure.

- SF will use its parent PF stuff.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://patch.msgid.link/de1ae7398e9e34eacd8c10845683df44fc9e32f8.1752752567.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
This commit is contained in:
Yishai Hadas
2025-07-17 15:17:27 +03:00
committed by Leon Romanovsky
parent 5f9ec7880e
commit 888a7776f4
5 changed files with 200 additions and 0 deletions

View File

@@ -167,5 +167,10 @@ mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_
#
mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o
#
# TPH support
#
mlx5_core-$(CONFIG_PCIE_TPH) += lib/st.o
obj-$(CONFIG_MLX5_DPLL) += mlx5_dpll.o
mlx5_dpll-y := dpll.o

View File

@@ -0,0 +1,164 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
*/
#include <linux/mlx5/driver.h>
#include <linux/mlx5/device.h>
#include "mlx5_core.h"
#include "lib/mlx5.h"
struct mlx5_st_idx_data {
refcount_t usecount;
u16 tag;
};
struct mlx5_st {
/* serialize access upon alloc/free flows */
struct mutex lock;
struct xa_limit index_limit;
struct xarray idx_xa; /* key == index, value == struct mlx5_st_idx_data */
};
struct mlx5_st *mlx5_st_create(struct mlx5_core_dev *dev)
{
struct pci_dev *pdev = dev->pdev;
struct mlx5_st *st;
u16 num_entries;
int ret;
if (!MLX5_CAP_GEN(dev, mkey_pcie_tph))
return NULL;
#ifdef CONFIG_MLX5_SF
if (mlx5_core_is_sf(dev))
return dev->priv.parent_mdev->st;
#endif
/* Checking whether the device is capable */
if (!pdev->tph_cap)
return NULL;
num_entries = pcie_tph_get_st_table_size(pdev);
/* We need a reserved entry for non TPH cases */
if (num_entries < 2)
return NULL;
/* The OS doesn't support ST */
ret = pcie_enable_tph(pdev, PCI_TPH_ST_DS_MODE);
if (ret)
return NULL;
st = kzalloc(sizeof(*st), GFP_KERNEL);
if (!st)
goto end;
mutex_init(&st->lock);
xa_init_flags(&st->idx_xa, XA_FLAGS_ALLOC);
/* entry 0 is reserved for non TPH cases */
st->index_limit.min = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX + 1;
st->index_limit.max = num_entries - 1;
return st;
end:
pcie_disable_tph(dev->pdev);
return NULL;
}
void mlx5_st_destroy(struct mlx5_core_dev *dev)
{
struct mlx5_st *st = dev->st;
if (mlx5_core_is_sf(dev) || !st)
return;
pcie_disable_tph(dev->pdev);
WARN_ON_ONCE(!xa_empty(&st->idx_xa));
kfree(st);
}
int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type,
unsigned int cpu_uid, u16 *st_index)
{
struct mlx5_st_idx_data *idx_data;
struct mlx5_st *st = dev->st;
unsigned long index;
u32 xa_id;
u16 tag;
int ret;
if (!st)
return -EOPNOTSUPP;
ret = pcie_tph_get_cpu_st(dev->pdev, mem_type, cpu_uid, &tag);
if (ret)
return ret;
mutex_lock(&st->lock);
xa_for_each(&st->idx_xa, index, idx_data) {
if (tag == idx_data->tag) {
refcount_inc(&idx_data->usecount);
*st_index = index;
goto end;
}
}
idx_data = kzalloc(sizeof(*idx_data), GFP_KERNEL);
if (!idx_data) {
ret = -ENOMEM;
goto end;
}
refcount_set(&idx_data->usecount, 1);
idx_data->tag = tag;
ret = xa_alloc(&st->idx_xa, &xa_id, idx_data, st->index_limit, GFP_KERNEL);
if (ret)
goto clean_idx_data;
ret = pcie_tph_set_st_entry(dev->pdev, xa_id, tag);
if (ret)
goto clean_idx_xa;
*st_index = xa_id;
goto end;
clean_idx_xa:
xa_erase(&st->idx_xa, xa_id);
clean_idx_data:
kfree(idx_data);
end:
mutex_unlock(&st->lock);
return ret;
}
EXPORT_SYMBOL_GPL(mlx5_st_alloc_index);
int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index)
{
struct mlx5_st_idx_data *idx_data;
struct mlx5_st *st = dev->st;
int ret = 0;
if (!st)
return -EOPNOTSUPP;
mutex_lock(&st->lock);
idx_data = xa_load(&st->idx_xa, st_index);
if (WARN_ON_ONCE(!idx_data)) {
ret = -EINVAL;
goto end;
}
if (refcount_dec_and_test(&idx_data->usecount)) {
xa_erase(&st->idx_xa, st_index);
/* We leave PCI config space as was before, no mkey will refer to it */
}
end:
mutex_unlock(&st->lock);
return ret;
}
EXPORT_SYMBOL_GPL(mlx5_st_dealloc_index);

View File

@@ -1102,6 +1102,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
}
dev->dm = mlx5_dm_create(dev);
dev->st = mlx5_st_create(dev);
dev->tracer = mlx5_fw_tracer_create(dev);
dev->hv_vhca = mlx5_hv_vhca_create(dev);
dev->rsc_dump = mlx5_rsc_dump_create(dev);
@@ -1150,6 +1151,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_rsc_dump_destroy(dev);
mlx5_hv_vhca_destroy(dev->hv_vhca);
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_st_destroy(dev);
mlx5_dm_cleanup(dev);
mlx5_fs_core_free(dev);
mlx5_sf_table_cleanup(dev);

View File

@@ -300,6 +300,15 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev);
void mlx5_dm_cleanup(struct mlx5_core_dev *dev);
#ifdef CONFIG_PCIE_TPH
struct mlx5_st *mlx5_st_create(struct mlx5_core_dev *dev);
void mlx5_st_destroy(struct mlx5_core_dev *dev);
#else
static inline struct mlx5_st *
mlx5_st_create(struct mlx5_core_dev *dev) { return NULL; }
static inline void mlx5_st_destroy(struct mlx5_core_dev *dev) { return; }
#endif
void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
enum mlx5_port_status status);

View File

@@ -36,6 +36,7 @@
#include <linux/kernel.h>
#include <linux/completion.h>
#include <linux/pci.h>
#include <linux/pci-tph.h>
#include <linux/irq.h>
#include <linux/spinlock_types.h>
#include <linux/semaphore.h>
@@ -688,6 +689,7 @@ struct mlx5_fw_tracer;
struct mlx5_vxlan;
struct mlx5_geneve;
struct mlx5_hv_vhca;
struct mlx5_st;
#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
@@ -757,6 +759,7 @@ struct mlx5_core_dev {
u32 issi;
struct mlx5e_resources mlx5e_res;
struct mlx5_dm *dm;
struct mlx5_st *st;
struct mlx5_vxlan *vxlan;
struct mlx5_geneve *geneve;
struct {
@@ -1160,6 +1163,23 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
u64 length, u16 uid, phys_addr_t addr, u32 obj_id);
#ifdef CONFIG_PCIE_TPH
int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type,
unsigned int cpu_uid, u16 *st_index);
int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index);
#else
static inline int mlx5_st_alloc_index(struct mlx5_core_dev *dev,
enum tph_mem_type mem_type,
unsigned int cpu_uid, u16 *st_index)
{
return -EOPNOTSUPP;
}
static inline int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index)
{
return -EOPNOTSUPP;
}
#endif
struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev);
void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev);