From 40b72108f9c609c5043903efb70c52d46b8aa871 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:35:56 -0700 Subject: [PATCH 01/16] net/mlx5: Add the log_min_mkey_entity_size capability Add the capability that will allow the driver to determine the minimal MTT page size to be able to map the smallest possible pages in XSK. The older firmwares that don't have this capability default to 12 (i.e. 4096-byte pages). Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3d963c0e47e4..1ad762e22d86 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1878,7 +1878,13 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 max_reformat_remove_size[0x8]; u8 max_reformat_remove_offset[0x8]; - u8 reserved_at_c0[0x160]; + u8 reserved_at_c0[0xe0]; + + u8 reserved_at_1a0[0xb]; + u8 log_min_mkey_entity_size[0x5]; + u8 reserved_at_1b0[0x10]; + + u8 reserved_at_1c0[0x60]; u8 reserved_at_220[0x1]; u8 sw_vhca_id_valid[0x1]; From f060ccc2afaa099a6bb86e5ec89efbaea5dd2ecf Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:35:57 -0700 Subject: [PATCH 02/16] net/mlx5e: Convert mlx5e_get_max_sq_wqebbs to u8 The return value of mlx5e_get_max_sq_wqebbs is clamped down to MLX5_SEND_WQE_MAX_WQEBBS = 16, which fits into u8. This commit changes the return type of this function to u8 for stricter type safety. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 648a178e8db8..05126c6ae13d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -227,10 +227,12 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) * bytes units. Driver hardens the limitation to 1KB (16 * WQEBBs), unless firmware capability is stricter. */ -static inline u16 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev) +static inline u8 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev) { - return min_t(u16, MLX5_SEND_WQE_MAX_WQEBBS, - MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB); + BUILD_BUG_ON(MLX5_SEND_WQE_MAX_WQEBBS > U8_MAX); + + return (u8)min_t(u16, MLX5_SEND_WQE_MAX_WQEBBS, + MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB); } static inline u8 mlx5e_get_sw_max_sq_mpw_wqebbs(u8 max_sq_wqebbs) From 665f29de4ca31e65487fd63bac38e80a76142df1 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:35:58 -0700 Subject: [PATCH 03/16] net/mlx5e: Remove unused fields from datapath structs No need to keep max_sq_wqebbs in mlx5e_txqsq and mlx5e_xdpsq, as it's only used when allocating the queues. Removing an extra field reduces the struct size. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 -- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 05126c6ae13d..881e406d8757 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -478,7 +478,6 @@ struct mlx5e_txqsq { struct work_struct recover_work; struct mlx5e_ptpsq *ptpsq; cqe_ts_to_ns ptp_cyc2time; - u16 max_sq_wqebbs; } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { @@ -582,7 +581,6 @@ struct mlx5e_xdpsq { /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_channel *channel; - u16 max_sq_wqebbs; } ____cacheline_aligned_in_smp; struct mlx5e_ktls_resync_resp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 099a69d0ee4a..04af77092a29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1155,9 +1155,9 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, is_redirect ? &c->priv->channel_stats[c->ix]->xdpsq : &c->priv->channel_stats[c->ix]->rq_xdpsq; - sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); - sq->stop_room = MLX5E_STOP_ROOM(sq->max_sq_wqebbs); - sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs); + sq->stop_room = MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev)); + sq->max_sq_mpw_wqebbs = + mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)); param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1318,8 +1318,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); - sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs); + sq->max_sq_mpw_wqebbs = + mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); From 7e49abb1e393c3fe741bd47462f5c1f5d714ec24 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:35:59 -0700 Subject: [PATCH 04/16] net/mlx5e: Make mlx5e_verify_rx_mpwqe_strides static mlx5e_verify_rx_mpwqe_strides is only used in en/params.c, so it can be made static and removed from en/params.h. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en/params.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index e025040350ba..8b54fec04fef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -90,8 +90,8 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, linear_frag_sz <= PAGE_SIZE; } -bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, - u8 log_stride_sz, u8 log_num_strides) +static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + u8 log_stride_sz, u8 log_num_strides) { if (log_stride_sz + log_num_strides != MLX5_MPWRQ_LOG_WQE_SZ) return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index f5c46e78eebc..3cc1c6b16444 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -97,8 +97,6 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *para void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, - u8 log_stride_sz, u8 log_num_strides); u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, From 44f4fd03b51705ce8af54c103c9af55926c5d950 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:36:00 -0700 Subject: [PATCH 05/16] net/mlx5e: Validate striding RQ before enabling XDP Currently, the driver can silently fall back to legacy RQ after enabling XDP, even if striding RQ was active before. It happens when PAGE_SIZE is bigger than the maximum supported stride size. This commit changes this behavior to more straightforward: if an operation (enabling XDP) doesn't support the current parameters (striding RQ mode), it fails. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/params.c | 34 +++++++++++-------- .../ethernet/mellanox/mlx5/core/en/params.h | 4 ++- .../mellanox/mlx5/core/en/xsk/setup.c | 2 +- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 12 ++++--- .../net/ethernet/mellanox/mlx5/core/en_main.c | 16 +++++++-- 5 files changed, 45 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 8b54fec04fef..2be09cc3c437 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -320,22 +320,27 @@ bool slow_pci_heuristic(struct mlx5_core_dev *mdev) link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; } -bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) +int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) - return false; + return -EOPNOTSUPP; - if (params->xdp_prog) { - /* XSK params are not considered here. If striding RQ is in use, - * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will - * be called with the known XSK params. - */ - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) - return false; - } + if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) + return -EINVAL; - return true; + return 0; +} + +int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) + return -EOPNOTSUPP; + + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return -EINVAL; + + return 0; } void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, @@ -356,8 +361,7 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) && - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? + params->rq_wq_type = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_CYCLIC; } @@ -374,7 +378,7 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, */ if ((!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) || MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) && - mlx5e_striding_rq_possible(mdev, params) && + !mlx5e_mpwrq_validate_regular(mdev, params) && (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || !mlx5e_rx_is_linear_skb(params, NULL))) MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 3cc1c6b16444..6e86cbfc7b58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -92,7 +92,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); bool slow_pci_heuristic(struct mlx5_core_dev *mdev); -bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 98ed9ef3a6bd..0b3c9f10b597 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -30,7 +30,7 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, */ switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); + return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk); default: /* MLX5_WQ_TYPE_CYCLIC */ return mlx5e_rx_is_linear_skb(params, xsk); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 29ed20abc3da..8ae5cff3361e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1997,10 +1997,14 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) struct mlx5e_params new_params; if (enable) { - if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) - return -EOPNOTSUPP; - if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params)) - return -EINVAL; + /* Checking the regular RQ here; mlx5e_validate_xsk_param called + * from mlx5e_open_xsk will check for each XSK queue, and + * mlx5e_safe_switch_params will be reverted if any check fails. + */ + int err = mlx5e_mpwrq_validate_regular(mdev, &priv->channels.params); + + if (err) + return err; } else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) { netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n"); return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 04af77092a29..71df9891d7f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4582,8 +4582,20 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) new_params = priv->channels.params; new_params.xdp_prog = prog; - if (reset) - mlx5e_set_rq_type(priv->mdev, &new_params); + + /* XDP affects striding RQ parameters. Block XDP if striding RQ won't be + * supported with the new parameters: if PAGE_SIZE is bigger than + * MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though + * the MTU is small enough for the linear mode, because XDP uses strides + * of PAGE_SIZE on regular RQs. + */ + if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { + /* Checking for regular RQs here; XSK RQs were checked on XSK bind. */ + err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params); + if (err) + goto unlock; + } + old_prog = priv->channels.params.xdp_prog; err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); From ed5c92ff0f3e2e1a9b9b7bf1d14f96bf9ceebfb3 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:36:01 -0700 Subject: [PATCH 06/16] net/mlx5e: Let mlx5e_get_sw_max_sq_mpw_wqebbs accept mdev To shorten and simplify code, let mlx5e_get_sw_max_sq_mpw_wqebbs accept mdev and derive max SQ WQEBBs from it. Also rename the function to a more generic name mlx5e_get_max_sq_aligned_wqebbs, because the following patches will use it in non-MPWQE contexts. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 8 ++++---- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 881e406d8757..fc595a8ef11f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -174,8 +174,7 @@ struct page_pool; ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT) #define MLX5E_MAX_KLM_PER_WQE(mdev) \ - MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * \ - mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev))) + MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev)) #define MLX5E_MSG_LEVEL NETIF_MSG_LINK @@ -235,7 +234,7 @@ static inline u8 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev) MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB); } -static inline u8 mlx5e_get_sw_max_sq_mpw_wqebbs(u8 max_sq_wqebbs) +static inline u8 mlx5e_get_max_sq_aligned_wqebbs(struct mlx5_core_dev *mdev) { /* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS. * Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16, @@ -244,8 +243,9 @@ static inline u8 mlx5e_get_sw_max_sq_mpw_wqebbs(u8 max_sq_wqebbs) * than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be * cache-aligned. */ - u8 wqebbs = min_t(u8, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1); + u8 wqebbs = mlx5e_get_max_sq_wqebbs(mdev); + wqebbs = min_t(u8, wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1); #if L1_CACHE_BYTES >= 128 wqebbs = ALIGN_DOWN(wqebbs, 2); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 71df9891d7f8..ec74f330297e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1156,8 +1156,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, &c->priv->channel_stats[c->ix]->xdpsq : &c->priv->channel_stats[c->ix]->rq_xdpsq; sq->stop_room = MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev)); - sq->max_sq_mpw_wqebbs = - mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)); + sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1318,8 +1317,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - sq->max_sq_mpw_wqebbs = - mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)); + sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); From 527918e9cc4d0578dbea7c67594050c2056bd2e4 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:36:02 -0700 Subject: [PATCH 07/16] net/mlx5e: Use mlx5e_stop_room_for_max_wqe where appropriate mlx5e_alloc_xdpsq calculates sq->stop_room internally, but there is already a function for that: mlx5e_stop_room_for_max_wqe. This commit makes use of this function. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ec74f330297e..688a6589b3b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1155,7 +1155,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, is_redirect ? &c->priv->channel_stats[c->ix]->xdpsq : &c->priv->channel_stats[c->ix]->rq_xdpsq; - sq->stop_room = MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev)); + sq->stop_room = mlx5e_stop_room_for_max_wqe(mdev); sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); param->wq.db_numa_node = cpu_to_node(c->cpu); From e3c4c496dc9a44412bae6e1c5a9cf7fd0cdafba1 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:36:03 -0700 Subject: [PATCH 08/16] net/mlx5e: Fix a typo in mlx5e_xdp_mpwqe_is_full Fix a typo in the function name: mpqwe -> mpwqe (stands for multi-packet work queue element). Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 8f321a6c0809..4685c652c97e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -333,7 +333,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); - if (unlikely(mlx5e_xdp_mpqwe_is_full(session, sq->max_sq_mpw_wqebbs))) + if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs))) mlx5e_xdp_mpwqe_complete(sq); stats->xmit++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 287e17911251..bc2d9034af5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -122,7 +122,7 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur) return cur; } -static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) +static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) { if (session->inline_on) return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > From 21a0502d59109792b830b476f73287573981a0dd Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:36:04 -0700 Subject: [PATCH 09/16] net/mlx5e: Use the aligned max TX MPWQE size TX MPWQE size is limited to the cacheline-aligned maximum. Use the same value for the stop room and the capability check. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 8 ++++---- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 7 +++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 2be09cc3c437..2c8fe2e60e17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -209,11 +209,11 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par stop_room = mlx5e_ktls_get_stop_room(mdev, params); stop_room += mlx5e_stop_room_for_max_wqe(mdev); if (is_mpwqe) - /* A MPWQE can take up to the maximum-sized WQE + all the normal - * stop room can be taken if a new packet breaks the active - * MPWQE session and allocates its WQEs right away. + /* A MPWQE can take up to the maximum cacheline-aligned WQE + + * all the normal stop room can be taken if a new packet breaks + * the active MPWQE session and allocates its WQEs right away. */ - stop_room += mlx5e_stop_room_for_max_wqe(mdev); + stop_room += mlx5e_stop_room_for_mpwqe(mdev); return stop_room; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index c208ea307bff..8751e48e283d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -439,6 +439,13 @@ static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev) return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev)); } +static inline u16 mlx5e_stop_room_for_mpwqe(struct mlx5_core_dev *mdev) +{ + u8 mpwqe_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); + + return mlx5e_stop_room_for_wqe(mdev, mpwqe_wqebbs); +} + static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size) { u16 room = sq->reserved_room; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 688a6589b3b8..9b48ae61f692 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -75,7 +75,7 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) && MLX5_CAP_ETH(mdev, reg_umr_sq); - max_wqe_sz_cap = mlx5e_get_max_sq_wqebbs(mdev) * MLX5_SEND_WQE_BB; + max_wqe_sz_cap = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB; inline_umr = max_wqe_sz_cap >= MLX5E_UMR_WQE_INLINE_SZ; if (!striding_rq_umr) return false; @@ -1155,7 +1155,8 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, is_redirect ? &c->priv->channel_stats[c->ix]->xdpsq : &c->priv->channel_stats[c->ix]->rq_xdpsq; - sq->stop_room = mlx5e_stop_room_for_max_wqe(mdev); + sq->stop_room = param->is_mpw ? mlx5e_stop_room_for_mpwqe(mdev) : + mlx5e_stop_room_for_max_wqe(mdev); sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); param->wq.db_numa_node = cpu_to_node(c->cpu); From 4c78782e2e98f117b1bc69eef17034d09a6ac127 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:36:05 -0700 Subject: [PATCH 10/16] net/mlx5e: kTLS, Check ICOSQ WQE size in advance Instead of WARNing in runtime when TLS offload WQEs posted to ICOSQ are over the hardware limit, check their size before enabling TLS RX offload, and block the offload if the condition fails. It also allows to drop a u16 field from struct mlx5e_icosq. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 8 +------- .../mellanox/mlx5/core/en_accel/ktls.c | 18 ++++++++++++++++++ .../mellanox/mlx5/core/en_accel/ktls.h | 5 +---- .../net/ethernet/mellanox/mlx5/core/en_main.c | 1 - 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index fc595a8ef11f..4778298f4645 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -609,7 +609,6 @@ struct mlx5e_icosq { /* control path */ struct mlx5_wq_ctrl wq_ctrl; struct mlx5e_channel *channel; - u16 max_sq_wqebbs; struct work_struct recover_work; } ____cacheline_aligned_in_smp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 8751e48e283d..f4f306bb8e6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -448,13 +448,7 @@ static inline u16 mlx5e_stop_room_for_mpwqe(struct mlx5_core_dev *mdev) static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size) { - u16 room = sq->reserved_room; - - WARN_ONCE(wqe_size > sq->max_sq_wqebbs, - "wqe_size %u is greater than max SQ WQEBBs %u", - wqe_size, sq->max_sq_wqebbs); - - room += MLX5E_STOP_ROOM(wqe_size); + u16 room = sq->reserved_room + MLX5E_STOP_ROOM(wqe_size); return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index c0b77963cc7c..da2184c94203 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -92,6 +92,24 @@ static const struct tlsdev_ops mlx5e_ktls_ops = { .tls_dev_resync = mlx5e_ktls_resync, }; +bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) +{ + u8 max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); + + if (is_kdump_kernel() || !MLX5_CAP_GEN(mdev, tls_rx)) + return false; + + /* Check the possibility to post the required ICOSQ WQEs. */ + if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS)) + return false; + if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS)) + return false; + if (WARN_ON_ONCE(max_sq_wqebbs < MLX5E_KTLS_GET_PROGRESS_WQEBBS)) + return false; + + return true; +} + void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index 299334b2f935..1c35045e41fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -61,10 +61,7 @@ static inline bool mlx5e_is_ktls_tx(struct mlx5_core_dev *mdev) return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx); } -static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) -{ - return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_rx); -} +bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev); struct mlx5e_tls_sw_stats { atomic64_t tx_tls_ctx; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9b48ae61f692..ab9bd250e7a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1232,7 +1232,6 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, sq->channel = c; sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->reserved_room = param->stop_room; - sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev); param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); From ddbef365607216ddea9c515f54aab97a3b48f1b1 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:36:06 -0700 Subject: [PATCH 11/16] net/mlx5e: Simplify stride size calculation for linear RQ Linear RX buffers must be big enough to fit the MTU-sized packet along with the headroom. On the other hand, they must be small enough to fit into a page (or into an XSK frame). A straightforward way to check whether the linear mode is possible would be comparing the required buffer size to PAGE_SIZE or XSK frame size. Stride size in the linear mode is defined by the following constraints: 1. A stride is at least as big as the buffer size, and it's a power of two. 2. If non-XSK XDP is enabled, the stride size is PAGE_SIZE, because mlx5e requires each packet to be in its own page when XDP is in use. The previous constraint is automatically fulfilled, because buffer size can't be bigger than PAGE_SIZE. 3. XSK uses stride size equal to PAGE_SIZE, but the following commits will allow it to use roundup_pow_of_two(XSK frame size), by allowing the NIC's MMU to use page sizes not equal to the CPU page size. This commit puts the above requirements and constraints straight to the code in an attempt to simplify it and to prepare it for changes made in the next patches. For the reference, the old code uses an equivalent, but trickier calculation (high-level simplified pseudocode): if XDP or XSK: mlx5e_rx_get_linear_frag_sz := max(buffer size, PAGE_SIZE) else: mlx5e_rx_get_linear_frag_sz := buffer size mlx5e_rx_is_linear_skb := mlx5e_rx_get_linear_frag_sz <= PAGE_SIZE stride size := roundup_pow_of_two(mlx5e_rx_get_linear_frag_sz) The new code effectively removes mlx5e_rx_get_linear_frag_sz that used to return either buffer size or stride size, depending on the situation, making it hard to work with and to make changes: if XDP or XSK: mlx5e_rx_get_linear_stride_sz := PAGE_SIZE else mlx5e_rx_get_linear_stride_sz := roundup_pow_of_two(buffer size) mlx5e_rx_is_linear_skb := buffer size <= (PAGE_SIZE or XSK frame sz) stride size := mlx5e_rx_get_linear_stride_sz Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/params.c | 72 ++++++++++--------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 2c8fe2e60e17..bb039c3c4039 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -39,55 +39,58 @@ u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, return linear_rq_headroom + hw_mtu; } -static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk); + return mlx5e_rx_get_min_frag_sz(params, xsk); +} - /* AF_XDP doesn't build SKBs in place. */ - if (!xsk) - frag_sz = MLX5_SKB_FRAG_SZ(frag_sz); +static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params) +{ + return MLX5_SKB_FRAG_SZ(mlx5e_rx_get_min_frag_sz(params, NULL)); +} - /* XDP in mlx5e doesn't support multiple packets per page. AF_XDP is a - * special case. It can run with frames smaller than a page, as it - * doesn't allocate pages dynamically. However, here we pretend that - * fragments are page-sized: it allows to treat XSK frames like pages - * by redirecting alloc and free operations to XSK rings and by using - * the fact there are no multiple packets per "page" (which is a frame). - * The latter is important, because frames may come in a random order, - * and we will have trouble assemblying a real page of multiple frames. - */ - if (mlx5e_rx_is_xdp(params, xsk)) - frag_sz = max_t(u32, frag_sz, PAGE_SIZE); - - /* Even if we can go with a smaller fragment size, we must not put - * multiple packets into a single frame. +static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + /* XSK frames are mapped as individual pages, because frames may come in + * an arbitrary order from random locations in the UMEM. */ if (xsk) - frag_sz = max_t(u32, frag_sz, xsk->chunk_size); + return PAGE_SIZE; - return frag_sz; + /* XDP in mlx5e doesn't support multiple packets per page. */ + if (params->xdp_prog) + return PAGE_SIZE; + + return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params)); } u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk); + u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(params, xsk); - return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); + return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_stride_sz); } bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more - * than one page. For this, check both with and without xsk. - */ - u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk), - mlx5e_rx_get_linear_frag_sz(params, NULL)); + if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) + return false; - return params->packet_merge.type == MLX5E_PACKET_MERGE_NONE && - linear_frag_sz <= PAGE_SIZE; + /* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data + * must fit into a CPU page. + */ + if (mlx5e_rx_get_linear_sz_skb(params) > PAGE_SIZE) + return false; + + /* XSK frames must be big enough to hold the packet data. */ + if (xsk && mlx5e_rx_get_linear_sz_xsk(params, xsk) > xsk->chunk_size) + return false; + + return true; } static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, @@ -119,7 +122,7 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, if (!mlx5e_rx_is_linear_skb(params, xsk)) return false; - log_stride_sz = order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk)); + log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(params, xsk)); log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - log_stride_sz; return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz, log_num_strides); @@ -164,7 +167,7 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk) { if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) - return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk)); + return order_base_2(mlx5e_rx_get_linear_stride_sz(params, xsk)); return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); } @@ -426,8 +429,7 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, if (mlx5e_rx_is_linear_skb(params, xsk)) { int frag_stride; - frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk); - frag_stride = roundup_pow_of_two(frag_stride); + frag_stride = mlx5e_rx_get_linear_stride_sz(params, xsk); info->arr[0].frag_size = byte_count; info->arr[0].frag_stride = frag_stride; From 8c654a1bb686bf847aa00e0ea7a545b3a30a5e66 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:36:07 -0700 Subject: [PATCH 12/16] net/mlx5e: xsk: Remove dead code in validation One of the checks in mlx5e_rx_is_linear_skb verifies that the RX buffer fits into the XSK frame size. Remove the duplicating check from mlx5e_validate_xsk_param. It allows to make mlx5e_rx_get_min_frag_sz static. Remove mlx5e_rx_is_xdp altogether, as its only usage is located in a branch where xsk == NULL. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 12 +++--------- drivers/net/ethernet/mellanox/mlx5/core/en/params.h | 2 -- .../net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 4 ---- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index bb039c3c4039..c9a4a507a168 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -7,12 +7,6 @@ #include "en_accel/en_accel.h" #include "en_accel/ipsec.h" -static bool mlx5e_rx_is_xdp(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) -{ - return params->xdp_prog || xsk; -} - u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { @@ -22,7 +16,7 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, return xsk->headroom; headroom = NET_IP_ALIGN; - if (mlx5e_rx_is_xdp(params, xsk)) + if (params->xdp_prog) headroom += XDP_PACKET_HEADROOM; else headroom += MLX5_RX_HEADROOM; @@ -30,8 +24,8 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, return headroom; } -u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +static u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 6e86cbfc7b58..3e148a00fa73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -101,8 +101,6 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params * u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); -u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 0b3c9f10b597..c7c25f20ad72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -21,10 +21,6 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) return false; - /* Current MTU and XSK headroom don't allow packets to fit the frames. */ - if (mlx5e_rx_get_min_frag_sz(params, xsk) > xsk->chunk_size) - return false; - /* frag_sz is different for regular and XSK RQs, so ensure that linear * SKB mode is possible. */ From 411295fbe6f4643b7fed30c27542af7d752f8e16 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:36:08 -0700 Subject: [PATCH 13/16] net/mlx5e: xsk: Fix SKB headroom calculation in validation In a typical scenario, if an XSK socket is opened first, then an XDP program is attached, mlx5e_validate_xsk_param will be called twice: first on XSK bind, second on channel restart caused by enabling XDP. The validation includes a call to mlx5e_rx_is_linear_skb, which checks the presence of the XDP program. The above means that mlx5e_rx_is_linear_skb might return true the first time, but false the second time, as mlx5e_rx_get_linear_sz_skb's return value will increase, because of a different headroom used with XDP. As XSK RQs never exist without XDP, it would make sense to trick mlx5e_rx_get_linear_sz_skb into thinking XDP is enabled at the first check as well. This way, if MTU is too big, it would be detected on XSK bind, without giving false hope to the userspace application. However, it turns out that this check is too restrictive in the first place. SKBs created on XDP_PASS on XSK RQs don't have any headroom. That means that big MTUs filtered out on the first and the second checks might actually work. So, address this issue in the proper way, but taking into account the absence of the SKB headroom on XSK RQs, when calculating the buffer size. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en/params.c | 25 ++++++++----------- .../net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index c9a4a507a168..5dd3567d02d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -24,24 +24,21 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, return headroom; } -static u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) -{ - u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk); - - return linear_rq_headroom + hw_mtu; -} - static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - return mlx5e_rx_get_min_frag_sz(params, xsk); + u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + + return xsk->headroom + hw_mtu; } -static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params) +static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk) { - return MLX5_SKB_FRAG_SZ(mlx5e_rx_get_min_frag_sz(params, NULL)); + /* SKBs built on XDP_PASS on XSK RQs don't have headroom. */ + u16 headroom = xsk ? 0 : mlx5e_get_linear_rq_headroom(params, NULL); + u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + + return MLX5_SKB_FRAG_SZ(headroom + hw_mtu); } static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5e_params *params, @@ -57,7 +54,7 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5e_params *params, if (params->xdp_prog) return PAGE_SIZE; - return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params)); + return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false)); } u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, @@ -77,7 +74,7 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, /* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data * must fit into a CPU page. */ - if (mlx5e_rx_get_linear_sz_skb(params) > PAGE_SIZE) + if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE) return false; /* XSK frames must be big enough to hold the packet data. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ab9bd250e7a9..61f752e88389 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4009,7 +4009,7 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE. */ max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); - max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk); + max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0)); max_mtu = min(max_mtu_frame, max_mtu_page); netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n", From 3904d2afad4ccf9ec7dbe9b7096ee5f9acde3a00 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:36:09 -0700 Subject: [PATCH 14/16] net/mlx5e: Improve the MTU change shortcut Normally, the MTU change requires reopening the channels, but it can be skipped if the new MTU doesn't change any of the queue parameters and if MTU is not used in the data path. The shortcut is applicable to the non-linear mode of striding RQ, because the only thing affected by MTU is the queue length. As ethtool sets the queue length in packets, but striding RQ length is defined in strides or bytes, we estimate the RQ length to be at least as big as the requested number of MTU-sized packets, that's why it depends on MTU. Improve the shortcut by actually checking whether the RQ length stayed the same, instead of an intermediate step in the calculation. As MTU also affects the SHAMPO parameters, skip the shortcut if SHAMPO is in use. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en/params.h | 2 -- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 ++++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 5dd3567d02d8..9a58f8f978b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -57,8 +57,8 @@ static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5e_params *params, return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false)); } -u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(params, xsk); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 3e148a00fa73..f2c1a23dca61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -101,8 +101,6 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params * u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); -u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 61f752e88389..0d9a9012e34f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4080,19 +4080,21 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO) reset = false; - if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) { bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL); bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_params, NULL); - u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL); - u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_params, NULL); + u8 sz_old = mlx5e_mpwqe_get_log_rq_size(params, NULL); + u8 sz_new = mlx5e_mpwqe_get_log_rq_size(&new_params, NULL); /* Always reset in linear mode - hw_mtu is used in data path. * Check that the mode was non-linear and didn't change. * If XSK is active, XSK RQs are linear. + * Reset if the RQ size changed, even if it's non-linear. */ if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt && - ppw_old == ppw_new) + sz_old == sz_new) reset = false; } From 258e655c00734d2e4b5fd8ee2827f76cd0fe39c4 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:36:10 -0700 Subject: [PATCH 15/16] net/mlx5e: Make dma_info array dynamic in struct mlx5e_mpw_info This commit moves the dma_info array to the end of struct mlx5e_mpw_info to make it a flexible array. It also removes the intermediate struct mlx5e_umr_dma_info, which used to contain only this array. The flexibility of dma_info will allow to choose its size dynamically in a following commit. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 +---- .../ethernet/mellanox/mlx5/core/en/xsk/rx.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 8 +++--- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 27 ++++++++++++------- 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 4778298f4645..0c716db88cf4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -619,14 +619,10 @@ struct mlx5e_wqe_frag_info { bool last_in_page; }; -struct mlx5e_umr_dma_info { - struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; -}; - struct mlx5e_mpw_info { - struct mlx5e_umr_dma_info umr; u16 consumed_strides; DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); + struct mlx5e_dma_info dma_info[]; }; #define MLX5E_MAX_RX_FRAGS 4 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 9a1553598a7c..6245dfde6666 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -30,7 +30,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, u32 head_offset, u32 page_idx) { - struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk; + struct xdp_buff *xdp = wi->dma_info[page_idx].xsk; struct bpf_prog *prog; /* Check packet size. Note LRO doesn't use linear SKB */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0d9a9012e34f..b3466c721ad8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -260,10 +260,12 @@ static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) { int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); + size_t alloc_size; - rq->mpwqe.info = kvzalloc_node(array_size(wq_sz, - sizeof(*rq->mpwqe.info)), - GFP_KERNEL, node); + alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, dma_info, + MLX5_MPWRQ_PAGES_PER_WQE)); + + rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node); if (!rq->mpwqe.info) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 4d3e7897b51b..b910fc1dbc72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -75,6 +75,13 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = { .handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo, }; +static struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i) +{ + size_t isz = struct_size(rq->mpwqe.info, dma_info, MLX5_MPWRQ_PAGES_PER_WQE); + + return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz)); +} + static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) { return config->rx_filter == HWTSTAMP_FILTER_ALL; @@ -478,7 +485,7 @@ static void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) { bool no_xdp_xmit; - struct mlx5e_dma_info *dma_info = wi->umr.dma_info; + struct mlx5e_dma_info *dma_info = wi->dma_info; int i; /* A common case for AF_XDP. */ @@ -660,8 +667,8 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); + struct mlx5e_dma_info *dma_info = &wi->dma_info[0]; struct mlx5e_icosq *sq = rq->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *umr_wqe; @@ -768,7 +775,7 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); /* Don't recycle, this function is called on rq/netdev close */ mlx5e_free_rx_mpwqe(rq, wi, false); } @@ -1795,7 +1802,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 { u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id); u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); @@ -1878,7 +1885,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w u16 cqe_bcnt, u32 head_offset, u32 page_idx) { u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); - struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; + struct mlx5e_dma_info *di = &wi->dma_info[page_idx]; u32 frag_offset = head_offset + headlen; u32 byte_cnt = cqe_bcnt - headlen; struct mlx5e_dma_info *head_di = di; @@ -1912,7 +1919,7 @@ static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, u16 cqe_bcnt, u32 head_offset, u32 page_idx) { - struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; + struct mlx5e_dma_info *di = &wi->dma_info[page_idx]; u16 rx_headroom = rq->buff.headroom; struct bpf_prog *prog; struct sk_buff *skb; @@ -2078,7 +2085,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq struct mlx5e_mpw_info *wi; struct mlx5_wq_ll *wq; - wi = &rq->mpwqe.info[wqe_id]; + wi = mlx5e_get_mpw_info(rq, wqe_id); wi->consumed_strides += cstrides; if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { @@ -2124,7 +2131,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } if (likely(head_size)) { - di = &wi->umr.dma_info[page_idx]; + di = &wi->dma_info[page_idx]; mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset); } @@ -2147,7 +2154,7 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq { u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); - struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id); u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); From 997ce6affe264e80c2fd1ab6f09d22c00d7bfa38 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 27 Sep 2022 13:36:11 -0700 Subject: [PATCH 16/16] net/mlx5e: Use runtime values of striding RQ parameters in datapath Some of the parameters of striding RQ are compile-time constants, but they are going to become dynamically calculated at runtime in a following commit. This commit prepares the datapath to take cached runtime parameters, prefilled at queue creation. New fields added to struct mlx5e_rq fit into an existing 7-byte hole. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 12 ++---- .../ethernet/mellanox/mlx5/core/en/params.c | 14 ++++++- .../ethernet/mellanox/mlx5/core/en/params.h | 5 +++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 41 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 35 ++++++++-------- 5 files changed, 65 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0c716db88cf4..9ff746a09a17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -107,7 +107,6 @@ struct page_pool; * dropped by the driver at a later stage. */ #define MLX5E_REQUIRED_WQE_MTTS (MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1)) -#define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS) #define MLX5E_MAX_RQ_NUM_MTTS \ (ALIGN_DOWN(U16_MAX, 4) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ #define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024)) @@ -150,13 +149,6 @@ struct page_pool; #define MLX5E_TX_XSK_POLL_BUDGET 64 #define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ -#define MLX5E_UMR_WQE_INLINE_SZ \ - (sizeof(struct mlx5e_umr_wqe) + \ - ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(struct mlx5_mtt), \ - MLX5_UMR_MTT_ALIGNMENT)) -#define MLX5E_UMR_WQEBBS \ - (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB)) - #define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\ (sizeof(struct mlx5e_umr_wqe) +\ (sizeof(struct mlx5_klm) * (sgl_len))) @@ -712,6 +704,10 @@ struct mlx5e_rq { u8 umr_last_bulk; u8 umr_completed; u8 min_wqe_bulk; + u8 page_shift; + u8 pages_per_wqe; + u8 umr_wqebbs; + u8 mtts_per_wqe; struct mlx5e_shampo_hd *shampo; } mpwqe; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 9a58f8f978b1..5f8912e8404d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -7,6 +7,17 @@ #include "en_accel/en_accel.h" #include "en_accel/ipsec.h" +u16 mlx5e_mpwrq_umr_wqe_sz(u8 pages_per_wqe) +{ + return sizeof(struct mlx5e_umr_wqe) + + ALIGN(pages_per_wqe * sizeof(struct mlx5_mtt), MLX5_UMR_MTT_ALIGNMENT); +} + +u8 mlx5e_mpwrq_umr_wqebbs(u8 pages_per_wqe) +{ + return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(pages_per_wqe), MLX5_SEND_WQE_BB); +} + u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { @@ -786,7 +797,8 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - wqebbs = MLX5E_UMR_WQEBBS * BIT(mlx5e_get_rq_log_wq_sz(rqp->rqc)); + wqebbs = mlx5e_mpwrq_umr_wqebbs(MLX5_MPWRQ_PAGES_PER_WQE) * + (1 << mlx5e_get_rq_log_wq_sz(rqp->rqc)); /* If XDP program is attached, XSK may be turned on at any time without * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index f2c1a23dca61..2bb9aba57ea0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -84,6 +84,11 @@ static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile, return qid < params->num_channels * profile->rq_groups; } +/* Striding RQ dynamic parameters */ + +u16 mlx5e_mpwrq_umr_wqe_sz(u8 pages_per_wqe); +u8 mlx5e_mpwrq_umr_wqebbs(u8 pages_per_wqe); + /* Parameter calculations */ void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b3466c721ad8..26f1557843a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -71,17 +71,20 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { bool striding_rq_umr, inline_umr; - u16 max_wqe_sz_cap; + u16 max_wqebbs; + u16 umr_wqebbs; striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) && MLX5_CAP_ETH(mdev, reg_umr_sq); - max_wqe_sz_cap = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB; - inline_umr = max_wqe_sz_cap >= MLX5E_UMR_WQE_INLINE_SZ; + max_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); + umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(MLX5_MPWRQ_PAGES_PER_WQE); + inline_umr = umr_wqebbs <= max_wqebbs; if (!striding_rq_umr) return false; if (!inline_umr) { - mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n", - (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap); + mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%u) exceeds maximum supported (%u).\n", + umr_wqebbs * MLX5_SEND_WQE_BB, + max_wqebbs * MLX5_SEND_WQE_BB); return false; } return true; @@ -206,7 +209,10 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, { struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; - u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS); + u8 ds_cnt; + + ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mpwqe.pages_per_wqe), + MLX5_SEND_WQE_DS); cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt); @@ -214,7 +220,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; ucseg->xlt_octowords = - cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); + cpu_to_be16(MLX5_MTT_OCTW(rq->mpwqe.pages_per_wqe)); ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } @@ -263,7 +269,7 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) size_t alloc_size; alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, dma_info, - MLX5_MPWRQ_PAGES_PER_WQE)); + rq->mpwqe.pages_per_wqe)); rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node); if (!rq->mpwqe.info) @@ -359,9 +365,9 @@ static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev, static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) { - u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq)); + u64 num_mtts = mlx5_wq_ll_get_size(&rq->mpwqe.wq) * rq->mpwqe.mtts_per_wqe; - return mlx5e_create_umr_mtt_mkey(mdev, num_mtts, PAGE_SHIFT, + return mlx5e_create_umr_mtt_mkey(mdev, num_mtts, rq->mpwqe.page_shift, &rq->umr_mkey, rq->wqe_overflow.addr); } @@ -379,11 +385,6 @@ static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, &rq->mpwqe.shampo->mkey); } -static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix) -{ - return MLX5E_REQUIRED_MTTS(wqe_ix) << PAGE_SHIFT; -} - static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) { struct mlx5e_wqe_frag_info next_frag = {}; @@ -590,7 +591,12 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); - pool_size = MLX5_MPWRQ_PAGES_PER_WQE << + rq->mpwqe.page_shift = PAGE_SHIFT; + rq->mpwqe.pages_per_wqe = MLX5_MPWRQ_PAGES_PER_WQE; + rq->mpwqe.umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(rq->mpwqe.pages_per_wqe); + rq->mpwqe.mtts_per_wqe = MLX5E_REQUIRED_WQE_MTTS; + + pool_size = rq->mpwqe.pages_per_wqe << mlx5e_mpwqe_get_log_rq_size(params, xsk); rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); @@ -680,7 +686,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i); u32 byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; - u64 dma_offset = mlx5e_get_mpwqe_offset(i); + u64 dma_offset = mul_u32_u32(i, rq->mpwqe.mtts_per_wqe) << + rq->mpwqe.page_shift; u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ? 0 : rq->buff.headroom; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index b910fc1dbc72..e2f360da6437 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -77,7 +77,7 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = { static struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i) { - size_t isz = struct_size(rq->mpwqe.info, dma_info, MLX5_MPWRQ_PAGES_PER_WQE); + size_t isz = struct_size(rq->mpwqe.info, dma_info, rq->mpwqe.pages_per_wqe); return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz)); } @@ -272,6 +272,7 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, stats->cache_reuse++; dma_sync_single_for_device(rq->pdev, dma_info->addr, + /* Non-XSK always uses PAGE_SIZE. */ PAGE_SIZE, DMA_FROM_DEVICE); return true; @@ -287,6 +288,7 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, if (unlikely(!dma_info->page)) return -ENOMEM; + /* Non-XSK always uses PAGE_SIZE. */ dma_info->addr = dma_map_page_attrs(rq->pdev, dma_info->page, 0, PAGE_SIZE, rq->buff.map_dir, DMA_ATTR_SKIP_CPU_SYNC); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { @@ -489,13 +491,12 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle int i; /* A common case for AF_XDP. */ - if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE)) + if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe)) return; - no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, - MLX5_MPWRQ_PAGES_PER_WQE); + no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) mlx5e_page_release(rq, &dma_info[i], recycle); } @@ -680,7 +681,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) * one-by-one, failing and moving frames to the Reuse Ring. */ if (rq->xsk_pool && - unlikely(!xsk_buff_can_alloc(rq->xsk_pool, MLX5_MPWRQ_PAGES_PER_WQE))) { + unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) { err = -ENOMEM; goto err; } @@ -691,33 +692,33 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) goto err; } - pi = mlx5e_icosq_get_next_pi(sq, MLX5E_UMR_WQEBBS); + pi = mlx5e_icosq_get_next_pi(sq, rq->mpwqe.umr_wqebbs); umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts)); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { + for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, dma_info++) { err = mlx5e_page_alloc(rq, dma_info); if (unlikely(err)) goto err_unmap; umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR); } - bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); + bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); wi->consumed_strides = 0; umr_wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); umr_wqe->uctrl.xlt_offset = - cpu_to_be16(MLX5_ALIGNED_MTTS_OCTW(MLX5E_REQUIRED_MTTS(ix))); + cpu_to_be16(MLX5_ALIGNED_MTTS_OCTW(ix * rq->mpwqe.mtts_per_wqe)); sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, - .num_wqebbs = MLX5E_UMR_WQEBBS, + .num_wqebbs = rq->mpwqe.umr_wqebbs, .umr.rq = rq, }; - sq->pc += MLX5E_UMR_WQEBBS; + sq->pc += rq->mpwqe.umr_wqebbs; sq->doorbell_cseg = &umr_wqe->ctrl; @@ -1805,8 +1806,8 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id); u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; - u32 head_offset = wqe_offset & (PAGE_SIZE - 1); - u32 page_idx = wqe_offset >> PAGE_SHIFT; + u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1); + u32 page_idx = wqe_offset >> rq->mpwqe.page_shift; struct mlx5e_rx_wqe_ll *wqe; struct mlx5_wq_ll *wq; struct sk_buff *skb; @@ -1863,6 +1864,7 @@ mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, struct mlx5e_dma_i net_prefetchw(skb->data); while (data_bcnt) { + /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt); unsigned int truesize; @@ -1900,6 +1902,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w net_prefetchw(skb->data); + /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ if (unlikely(frag_offset >= PAGE_SIZE)) { di++; frag_offset -= PAGE_SIZE; @@ -2157,8 +2160,8 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id); u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; - u32 head_offset = wqe_offset & (PAGE_SIZE - 1); - u32 page_idx = wqe_offset >> PAGE_SHIFT; + u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1); + u32 page_idx = wqe_offset >> rq->mpwqe.page_shift; struct mlx5e_rx_wqe_ll *wqe; struct mlx5_wq_ll *wq; struct sk_buff *skb;