diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 26911b15f8fe..81fd53569463 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -53,6 +52,7 @@ #include "wq.h" #include "mlx5_core.h" #include "en_stats.h" +#include "en/dcbnl.h" #include "en/fs.h" #include "lib/hv_vhca.h" @@ -69,8 +69,6 @@ struct page_pool; #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu)) -#define MLX5E_MAX_PRIORITY 8 -#define MLX5E_MAX_DSCP 64 #define MLX5E_MAX_NUM_TC 8 #define MLX5_RX_HEADROOM NET_SKB_PAD @@ -243,10 +241,6 @@ enum mlx5e_priv_flag { #define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag)))) -#ifdef CONFIG_MLX5_CORE_EN_DCB -#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ -#endif - struct mlx5e_params { u8 log_sq_size; u8 rq_wq_type; @@ -271,42 +265,6 @@ struct mlx5e_params { int hard_mtu; }; -#ifdef CONFIG_MLX5_CORE_EN_DCB -struct mlx5e_cee_config { - /* bw pct for priority group */ - u8 pg_bw_pct[CEE_DCBX_MAX_PGS]; - u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO]; - bool pfc_setting[CEE_DCBX_MAX_PRIO]; - bool pfc_enable; -}; - -enum { - MLX5_DCB_CHG_RESET, - MLX5_DCB_NO_CHG, - MLX5_DCB_CHG_NO_RESET, -}; - -struct mlx5e_dcbx { - enum mlx5_dcbx_oper_mode mode; - struct mlx5e_cee_config cee_cfg; /* pending configuration */ - u8 dscp_app_cnt; - - /* The only setting that cannot be read from FW */ - u8 tc_tsa[IEEE_8021QAZ_MAX_TCS]; - u8 cap; - - /* Buffer configuration */ - bool manual_buffer; - u32 cable_len; - u32 xoff; -}; - -struct mlx5e_dcbx_dp { - u8 dscp2prio[MLX5E_MAX_DSCP]; - u8 trust_state; -}; -#endif - enum { MLX5E_RQ_STATE_ENABLED, MLX5E_RQ_STATE_RECOVERING, @@ -1069,13 +1027,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) } extern const struct ethtool_ops mlx5e_ethtool_ops; -#ifdef CONFIG_MLX5_CORE_EN_DCB -extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; -int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets); -void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv); -void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv); -void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv); -#endif int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in); @@ -1083,7 +1034,8 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir); int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); -int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb); +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, + bool enable_mc_lb); /* common netdev helpers */ void mlx5e_create_q_counters(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h new file mode 100644 index 000000000000..7be6b2d36b60 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5E_DCBNL_H__ +#define __MLX5E_DCBNL_H__ + +#ifdef CONFIG_MLX5_CORE_EN_DCB + +#define MLX5E_MAX_PRIORITY (8) + +struct mlx5e_cee_config { + /* bw pct for priority group */ + u8 pg_bw_pct[CEE_DCBX_MAX_PGS]; + u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO]; + bool pfc_setting[CEE_DCBX_MAX_PRIO]; + bool pfc_enable; +}; + +struct mlx5e_dcbx { + enum mlx5_dcbx_oper_mode mode; + struct mlx5e_cee_config cee_cfg; /* pending configuration */ + u8 dscp_app_cnt; + + /* The only setting that cannot be read from FW */ + u8 tc_tsa[IEEE_8021QAZ_MAX_TCS]; + u8 cap; + + /* Buffer configuration */ + bool manual_buffer; + u32 cable_len; + u32 xoff; +}; + +#define MLX5E_MAX_DSCP (64) + +struct mlx5e_dcbx_dp { + u8 dscp2prio[MLX5E_MAX_DSCP]; + u8 trust_state; +}; + +void mlx5e_dcbnl_build_netdev(struct net_device *netdev); +void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev); +void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv); +void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv); +void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv); +#else +static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {} +static inline void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev) {} +static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {} +static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {} +static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {} +#endif + +#endif /* __MLX5E_DCBNL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 5568ded97e0b..98263f00ee43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -24,6 +24,7 @@ #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) #define MLX5_CT_STATE_TRK_BIT BIT(2) +#define MLX5_CT_STATE_NAT_BIT BIT(3) #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) @@ -61,6 +62,15 @@ struct mlx5_ct_zone_rule { bool nat; }; +struct mlx5_tc_ct_pre { + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *flow_grp; + struct mlx5_flow_group *miss_grp; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_handle *miss_rule; + struct mlx5_modify_hdr *modify_hdr; +}; + struct mlx5_ct_ft { struct rhash_head node; u16 zone; @@ -68,6 +78,8 @@ struct mlx5_ct_ft { struct nf_flowtable *nf_ft; struct mlx5_tc_ct_priv *ct_priv; struct rhashtable ct_entries_ht; + struct mlx5_tc_ct_pre pre_ct; + struct mlx5_tc_ct_pre pre_ct_nat; }; struct mlx5_ct_entry { @@ -426,6 +438,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_eswitch *esw = ct_priv->esw; struct mlx5_modify_hdr *mod_hdr; struct flow_action_entry *meta; + u16 ct_state = 0; int err; meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); @@ -444,11 +457,13 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, &mod_acts); if (err) goto err_mapping; + + ct_state |= MLX5_CT_STATE_NAT_BIT; } + ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT; err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, - (MLX5_CT_STATE_ESTABLISHED_BIT | - MLX5_CT_STATE_TRK_BIT), + ct_state, meta->ct_metadata.mark, meta->ct_metadata.labels[0], tupleid); @@ -791,6 +806,238 @@ mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, return 0; } +static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct, + bool nat) +{ + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5_core_dev *dev = ct_priv->esw->dev; + struct mlx5_flow_table *fdb = pre_ct->fdb; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 ctstate; + u16 zone; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + zone = ct_ft->zone & MLX5_CT_ZONE_MASK; + err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone); + if (err) { + ct_dbg("Failed to set zone register mapping"); + goto err_mapping; + } + + mod_hdr = mlx5_modify_header_alloc(dev, + MLX5_FLOW_NAMESPACE_FDB, + pre_mod_acts.num_actions, + pre_mod_acts.actions); + + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to create pre ct mod hdr"); + goto err_mapping; + } + pre_ct->modify_hdr = mod_hdr; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + flow_act.modify_hdr = mod_hdr; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + + /* add flow rule */ + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + zone, MLX5_CT_ZONE_MASK); + ctstate = MLX5_CT_STATE_TRK_BIT; + if (nat) + ctstate |= MLX5_CT_STATE_NAT_BIT; + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); + + dest.ft = ct_priv->post_ct; + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add pre ct flow rule zone %d", zone); + goto err_flow_rule; + } + pre_ct->flow_rule = rule; + + /* add miss rule */ + memset(spec, 0, sizeof(*spec)); + dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add pre ct miss rule zone %d", zone); + goto err_miss_rule; + } + pre_ct->miss_rule = rule; + + dealloc_mod_hdr_actions(&pre_mod_acts); + kvfree(spec); + return 0; + +err_miss_rule: + mlx5_del_flow_rules(pre_ct->flow_rule); +err_flow_rule: + mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); +err_mapping: + dealloc_mod_hdr_actions(&pre_mod_acts); + kvfree(spec); + return err; +} + +static void +tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct) +{ + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5_core_dev *dev = ct_priv->esw->dev; + + mlx5_del_flow_rules(pre_ct->flow_rule); + mlx5_del_flow_rules(pre_ct->miss_rule); + mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); +} + +static int +mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct, + bool nat) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5_core_dev *dev = ct_priv->esw->dev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + u32 metadata_reg_c_2_mask; + u32 *flow_group_in; + void *misc; + int err; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!ns) { + err = -EOPNOTSUPP; + ct_dbg("Failed to get FDB flow namespace"); + return err; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = FDB_TC_OFFLOAD; + ft_attr.max_fte = 2; + ft_attr.level = 1; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + ct_dbg("Failed to create pre ct table"); + goto out_free; + } + pre_ct->fdb = ft; + + /* create flow group */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria.misc_parameters_2); + + metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; + metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); + if (nat) + metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, + metadata_reg_c_2_mask); + + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + ct_dbg("Failed to create pre ct group"); + goto err_flow_grp; + } + pre_ct->flow_grp = g; + + /* create miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + ct_dbg("Failed to create pre ct miss group"); + goto err_miss_grp; + } + pre_ct->miss_grp = g; + + err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); + if (err) + goto err_add_rules; + + kvfree(flow_group_in); + return 0; + +err_add_rules: + mlx5_destroy_flow_group(pre_ct->miss_grp); +err_miss_grp: + mlx5_destroy_flow_group(pre_ct->flow_grp); +err_flow_grp: + mlx5_destroy_flow_table(ft); +out_free: + kvfree(flow_group_in); + return err; +} + +static void +mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct) +{ + tc_ct_pre_ct_del_rules(ct_ft, pre_ct); + mlx5_destroy_flow_group(pre_ct->miss_grp); + mlx5_destroy_flow_group(pre_ct->flow_grp); + mlx5_destroy_flow_table(pre_ct->fdb); +} + +static int +mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) +{ + int err; + + err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false); + if (err) + return err; + + err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true); + if (err) + goto err_pre_ct_nat; + + return 0; + +err_pre_ct_nat: + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); + return err; +} + +static void +mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) +{ + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat); + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); +} + static struct mlx5_ct_ft * mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, struct nf_flowtable *nf_ft) @@ -813,6 +1060,10 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, ft->ct_priv = ct_priv; refcount_set(&ft->refcount, 1); + err = mlx5_tc_ct_alloc_pre_ct_tables(ft); + if (err) + goto err_alloc_pre_ct; + err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); if (err) goto err_init; @@ -834,6 +1085,8 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, err_insert: rhashtable_destroy(&ft->ct_entries_ht); err_init: + mlx5_tc_ct_free_pre_ct_tables(ft); +err_alloc_pre_ct: kfree(ft); return ERR_PTR(err); } @@ -859,21 +1112,40 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) rhashtable_free_and_destroy(&ft->ct_entries_ht, mlx5_tc_ct_flush_ft_entry, ct_priv); + mlx5_tc_ct_free_pre_ct_tables(ft); kfree(ft); } /* We translate the tc filter with CT action to the following HW model: * - * +-------------------+ +--------------------+ +--------------+ - * + pre_ct (tc chain) +----->+ CT (nat or no nat) +--->+ post_ct +-----> - * + original match + | + tuple + zone match + | + fte_id match + | - * +-------------------+ | +--------------------+ | +--------------+ | - * v v v - * set chain miss mapping set mark original - * set fte_id set label filter - * set zone set established actions - * set tunnel_id do nat (if needed) - * do decap + * +---------------------+ + * + fdb prio (tc chain) + + * + original match + + * +---------------------+ + * | set chain miss mapping + * | set fte_id + * | set tunnel_id + * | do decap + * v + * +---------------------+ + * + pre_ct/pre_ct_nat + if matches +---------------------+ + * + zone+nat match +---------------->+ post_ct (see below) + + * +---------------------+ set zone +---------------------+ + * | set zone + * v + * +--------------------+ + * + CT (nat or no nat) + + * + tuple + zone match + + * +--------------------+ + * | set mark + * | set label + * | set established + * | do nat (if needed) + * v + * +--------------+ + * + post_ct + original filter actions + * + fte_id match +------------------------> + * +--------------+ */ static int __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, @@ -888,7 +1160,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, struct mlx5_flow_spec *post_ct_spec = NULL; struct mlx5_eswitch *esw = ct_priv->esw; struct mlx5_esw_flow_attr *pre_ct_attr; - struct mlx5_modify_hdr *mod_hdr; + struct mlx5_modify_hdr *mod_hdr; struct mlx5_flow_handle *rule; struct mlx5_ct_flow *ct_flow; int chain_mapping = 0, err; @@ -951,14 +1223,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, goto err_mapping; } - err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, ZONE_TO_REG, - attr->ct_attr.zone & - MLX5_CT_ZONE_MASK); - if (err) { - ct_dbg("Failed to set zone register mapping"); - goto err_mapping; - } - err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, FTEID_TO_REG, fte_id); if (err) { @@ -1018,7 +1282,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, /* Change original rule point to ct table */ pre_ct_attr->dest_chain = 0; - pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; + pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb; ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index dce2bbbf9109..bfd3e1161bc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -6,25 +6,6 @@ #include "en.h" -#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1) -#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ - MLX5E_SQ_NOPS_ROOM) - -#ifndef CONFIG_MLX5_EN_TLS -#define MLX5E_SQ_TLS_ROOM (0) -#else -/* TLS offload requires additional stop_room for: - * - a resync SKB. - * kTLS offload requires fixed additional stop_room for: - * - a static params WQE, and a progress params WQE. - * The additional MTU-depending room for the resync DUMP WQEs - * will be calculated and added in runtime. - */ -#define MLX5E_SQ_TLS_ROOM \ - (MLX5_SEND_WQE_MAX_WQEBBS + \ - MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS) -#endif - #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) enum mlx5e_icosq_wqe_type { @@ -331,4 +312,25 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, } } +static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size) +{ + BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS); + + /* A WQE must not cross the page boundary, hence two conditions: + * 1. Its size must not exceed the page size. + * 2. If the WQE size is X, and the space remaining in a page is less + * than X, this space needs to be padded with NOPs. So, one WQE of + * size X may require up to X-1 WQEBBs of padding, which makes the + * stop room of X-1 + X. + * WQE size is also limited by the hardware limit. + */ + + if (__builtin_constant_p(wqe_size)) + BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS); + else + WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS); + + return wqe_size * 2 - 1; +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 761c8979bd41..42202d19245c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -257,8 +257,10 @@ enum { static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) { if (unlikely(!sq->mpwqe.wqe)) { + const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5E_XDPSQ_STOP_ROOM))) { + stop_room))) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); sq->stats->full++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index e2e01f064c1e..be64eb68f4e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -40,8 +40,6 @@ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) -#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM) - #define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) #define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \ DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index 46725cd743a3..417a2d9dd248 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -4,6 +4,19 @@ #include "en.h" #include "en_accel/ktls.h" +u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq) +{ + u16 num_dumps, stop_room = 0; + + num_dumps = mlx5e_ktls_dumps_num_wqes(sq, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE); + + stop_room += mlx5e_stop_room_for_wqe(MLX5E_KTLS_STATIC_WQEBBS); + stop_room += mlx5e_stop_room_for_wqe(MLX5E_KTLS_PROGRESS_WQEBBS); + stop_room += num_dumps * mlx5e_stop_room_for_wqe(MLX5E_KTLS_DUMP_WQEBBS); + + return stop_room; +} + static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index dabbc5f226ce..c6180892cfcb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -102,15 +102,16 @@ bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *s void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, u32 *dma_fifo_cc); +u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq); + static inline u8 -mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags, - unsigned int sync_len) +mlx5e_ktls_dumps_num_wqes(struct mlx5e_txqsq *sq, unsigned int nfrags, + unsigned int sync_len) { /* Given the MTU and sync_len, calculates an upper bound for the - * number of WQEBBs needed for the TX resync DUMP WQEs of a record. + * number of DUMP WQEs needed for the TX resync of a record. */ - return MLX5E_KTLS_DUMP_WQEBBS * - (nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu)); + return nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu); } #else @@ -122,7 +123,6 @@ static inline void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, u32 *dma_fifo_cc) {} - #endif #endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c index fba561ffe1d4..c27e9a609d51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c @@ -240,3 +240,17 @@ void mlx5e_tls_cleanup(struct mlx5e_priv *priv) kfree(tls); priv->tls = NULL; } + +u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + + if (!mlx5_accel_is_tls_device(mdev)) + return 0; + + if (MLX5_CAP_GEN(mdev, tls_tx)) + return mlx5e_ktls_get_stop_room(sq); + + /* Resync SKB. */ + return mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h index 9015f3f7792d..9219bdb2786e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h @@ -94,6 +94,8 @@ int mlx5e_tls_get_count(struct mlx5e_priv *priv); int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data); int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data); +u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq); + #else static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) @@ -108,6 +110,11 @@ static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; } static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; } static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; } +static inline u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq) +{ + return 0; +} + #endif #endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index af3228b3f303..1e42c7ae621b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -141,10 +141,12 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) memset(res, 0, sizeof(*res)); } -int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, + bool enable_mc_lb) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_tir *tir; + u8 lb_flags = 0; int err = 0; u32 tirn = 0; int inlen; @@ -158,8 +160,13 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) } if (enable_uc_lb) - MLX5_SET(modify_tir_in, in, ctx.self_lb_block, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST); + lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; + + if (enable_mc_lb) + lb_flags |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; + + if (lb_flags) + MLX5_SET(modify_tir_in, in, ctx.self_lb_block, lb_flags); MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 47874d34156b..ec7b332d74c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -35,6 +35,8 @@ #include "en/port.h" #include "en/port_buffer.h" +#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ + #define MLX5E_100MB (100000) #define MLX5E_1GB (1000000) @@ -49,6 +51,12 @@ enum { MLX5E_LOWEST_PRIO_GROUP = 0, }; +enum { + MLX5_DCB_CHG_RESET, + MLX5_DCB_NO_CHG, + MLX5_DCB_CHG_NO_RESET, +}; + #define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg) && \ MLX5_CAP_QCAM_REG(mdev, qpts) && \ MLX5_CAP_QCAM_REG(mdev, qpdpm)) @@ -238,7 +246,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, * Report both group #0 and #1 as ETS type. * All the tcs in group #0 will be reported with 0% BW. */ -int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) +static int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) { struct mlx5_core_dev *mdev = priv->mdev; u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS]; @@ -1009,6 +1017,24 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = { .setpfcstate = mlx5e_dcbnl_setpfcstate, }; +void mlx5e_dcbnl_build_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; +} + +void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; +} + static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv, enum mlx5_dcbx_oper_mode *mode) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0e4ca08ddca9..07823abe5557 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -66,7 +66,6 @@ #include "en/devlink.h" #include "lib/mlx5.h" - bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && @@ -1122,6 +1121,22 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) return 0; } +static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size) +{ + int sq_size = 1 << log_sq_size; + + sq->stop_room = mlx5e_tls_get_stop_room(sq); + sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + + if (WARN_ON(sq->stop_room >= sq_size)) { + netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n", + sq->stop_room, sq_size); + return -ENOSPC; + } + + return 0; +} + static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int txq_ix, @@ -1146,20 +1161,16 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; - sq->stop_room = MLX5E_SQ_STOP_ROOM; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); -#ifdef CONFIG_MLX5_EN_TLS - if (mlx5_accel_is_tls_device(c->priv->mdev)) { + if (mlx5_accel_is_tls_device(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_TLS, &sq->state); - sq->stop_room += MLX5E_SQ_TLS_ROOM + - mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS, - TLS_MAX_PAYLOAD_SIZE); - } -#endif + err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size); + if (err) + return err; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -4915,10 +4926,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->netdev_ops = &mlx5e_netdev_ops; -#ifdef CONFIG_MLX5_CORE_EN_DCB - if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos)) - netdev->dcbnl_ops = &mlx5e_dcbnl_ops; -#endif + mlx5e_dcbnl_build_netdev(netdev); netdev->watchdog_timeo = 15 * HZ; @@ -5206,9 +5214,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) return err; } -#ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_initialize(priv); -#endif return 0; } @@ -5235,9 +5241,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_hv_vhca_stats_create(priv); if (netdev->reg_state != NETREG_REGISTERED) return; -#ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_init_app(priv); -#endif queue_work(priv->wq, &priv->set_rx_mode_work); @@ -5252,10 +5256,8 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; -#ifdef CONFIG_MLX5_CORE_EN_DCB if (priv->netdev->reg_state == NETREG_REGISTERED) mlx5e_dcbnl_delete_app(priv); -#endif rtnl_lock(); if (netif_running(priv->netdev)) @@ -5275,7 +5277,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) int mlx5e_update_nic_rx(struct mlx5e_priv *priv) { - return mlx5e_refresh_tirs(priv, false); + return mlx5e_refresh_tirs(priv, false, false); } static const struct mlx5e_profile mlx5e_nic_profile = { @@ -5552,9 +5554,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) mlx5e_devlink_port_type_eth_set(priv); -#ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_init_app(priv); -#endif return priv; err_devlink_port_unregister: @@ -5577,9 +5577,7 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) } #endif priv = vpriv; -#ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_delete_app(priv); -#endif unregister_netdev(priv->netdev); mlx5e_devlink_port_unregister(priv); mlx5e_detach(mdev, vpriv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1eac7a53d56f..52351c105627 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1544,10 +1544,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) /* we want a persistent mac for the uplink rep */ mlx5_query_mac_address(mdev, netdev->dev_addr); netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops; -#ifdef CONFIG_MLX5_CORE_EN_DCB - if (MLX5_CAP_GEN(mdev, qos)) - netdev->dcbnl_ops = &mlx5e_dcbnl_ops; -#endif + mlx5e_dcbnl_build_rep_netdev(netdev); } else { netdev->netdev_ops = &mlx5e_netdev_ops_rep; eth_hw_addr_random(netdev); @@ -1929,10 +1926,8 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) mlx5_lag_add(mdev, netdev); priv->events_nb.notifier_call = uplink_rep_async_event; mlx5_notifier_register(mdev, &priv->events_nb); -#ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_initialize(priv); mlx5e_dcbnl_init_app(priv); -#endif } static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) @@ -1940,9 +1935,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_rep_priv *rpriv = priv->ppriv; -#ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_delete_app(priv); -#endif mlx5_notifier_unregister(mdev, &priv->events_nb); cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work); mlx5_lag_remove(mdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 821f94beda7a..a514685fb560 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1489,6 +1489,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) #ifdef CONFIG_MLX5_CORE_IPOIB +#define MLX5_IB_GRH_SGID_OFFSET 8 #define MLX5_IB_GRH_DGID_OFFSET 24 #define MLX5_GID_SIZE 16 @@ -1502,6 +1503,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, struct net_device *netdev; struct mlx5e_priv *priv; char *pseudo_header; + u32 flags_rqpn; u32 qpn; u8 *dgid; u8 g; @@ -1523,7 +1525,8 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, tstamp = &priv->tstamp; stats = &priv->channel_stats[rq->ix].rq; - g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; + flags_rqpn = be32_to_cpu(cqe->flags_rqpn); + g = (flags_rqpn >> 28) & 3; dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; if ((!g) || dgid[0] != 0xff) skb->pkt_type = PACKET_HOST; @@ -1532,9 +1535,15 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, else skb->pkt_type = PACKET_MULTICAST; - /* TODO: IB/ipoib: Allow mcast packets from other VFs - * 68996a6e760e5c74654723eeb57bf65628ae87f4 + /* Drop packets that this interface sent, ie multicast packets + * that the HCA has replicated. */ + if (g && (qpn == (flags_rqpn & 0xffffff)) && + (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET, + MLX5_GID_SIZE) == 0)) { + skb->dev = NULL; + return; + } skb_pull(skb, MLX5_IB_GRH_BYTES); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index bbff8d8ded76..46790216ce86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -234,7 +234,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, return err; } - err = mlx5e_refresh_tirs(priv, true); + err = mlx5e_refresh_tirs(priv, true, false); if (err) goto out; @@ -263,7 +263,7 @@ static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv, mlx5_nic_vport_update_local_lb(priv->mdev, false); dev_remove_pack(&lbtp->pt); - mlx5e_refresh_tirs(priv, false); + mlx5e_refresh_tirs(priv, false, false); } #define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 2da45e9b9b6d..52af6023a4b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1755,11 +1755,13 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, list_for_each_entry(iter, match_head, list) { g = iter->g; - if (!g->node.active) - continue; - nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + if (!g->node.active) { + up_write_ref_node(&g->node, false); + continue; + } + err = insert_fte(g, fte); if (err) { up_write_ref_node(&g->node, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 035bd21e5d4e..7db70b6ccc07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -262,6 +262,11 @@ void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn) mlx5_cmd_exec_in(mdev, destroy_qp, in); } +int mlx5i_update_nic_rx(struct mlx5e_priv *priv) +{ + return mlx5e_refresh_tirs(priv, true, true); +} + int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; @@ -456,7 +461,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .cleanup_rx = mlx5i_cleanup_rx, .enable = NULL, /* mlx5i_enable */ .disable = NULL, /* mlx5i_disable */ - .update_rx = mlx5e_update_nic_rx, + .update_rx = mlx5i_update_nic_rx, .update_stats = NULL, /* mlx5i_update_stats */ .update_carrier = NULL, /* no HW update in IB link */ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index c4aa47018c0e..79071a15c4ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -92,6 +92,8 @@ int mlx5i_init(struct mlx5_core_dev *mdev, void *ppriv); void mlx5i_cleanup(struct mlx5e_priv *priv); +int mlx5i_update_nic_rx(struct mlx5e_priv *priv); + /* Get child interface nic profile */ const struct mlx5e_profile *mlx5i_pkey_get_profile(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index b9af37ad40bf..f70367018862 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -347,7 +347,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = { .cleanup_rx = mlx5i_pkey_cleanup_rx, .enable = NULL, .disable = NULL, - .update_rx = mlx5e_update_nic_rx, + .update_rx = mlx5i_update_nic_rx, .update_stats = NULL, .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 43f97601b500..ef0706d15a5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -32,6 +32,7 @@ #include #include +#include #include #include "lib/eq.h" #include "en.h" @@ -66,6 +67,26 @@ enum { MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), }; +static u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, + struct ptp_system_timestamp *sts) +{ + u32 timer_h, timer_h1, timer_l; + + timer_h = ioread32be(&dev->iseg->internal_timer_h); + ptp_read_system_prets(sts); + timer_l = ioread32be(&dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); + timer_h1 = ioread32be(&dev->iseg->internal_timer_h); + if (timer_h != timer_h1) { + /* wrap around */ + ptp_read_system_prets(sts); + timer_l = ioread32be(&dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); + } + + return (u64)timer_l | (u64)timer_h1 << 32; +} + static u64 read_internal_timer(const struct cyclecounter *cc) { struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 742ba012c234..4d2e1e982460 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -672,26 +672,6 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) return mlx5_cmd_exec_in(dev, disable_hca, in); } -u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, - struct ptp_system_timestamp *sts) -{ - u32 timer_h, timer_h1, timer_l; - - timer_h = ioread32be(&dev->iseg->internal_timer_h); - ptp_read_system_prets(sts); - timer_l = ioread32be(&dev->iseg->internal_timer_l); - ptp_read_system_postts(sts); - timer_h1 = ioread32be(&dev->iseg->internal_timer_h); - if (timer_h != timer_h1) { - /* wrap around */ - ptp_read_system_prets(sts); - timer_l = ioread32be(&dev->iseg->internal_timer_l); - ptp_read_system_postts(sts); - } - - return (u64)timer_l | (u64)timer_h1 << 32; -} - static int mlx5_core_set_issi(struct mlx5_core_dev *dev) { u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {}; @@ -1217,10 +1197,9 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) mlx5_register_device(dev); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); -out: - mutex_unlock(&dev->intf_state_mutex); - return err; + mutex_unlock(&dev->intf_state_mutex); + return 0; err_devlink_reg: mlx5_unload(dev); @@ -1230,17 +1209,15 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) function_teardown: mlx5_function_teardown(dev, boot); dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; +out: mutex_unlock(&dev->intf_state_mutex); - return err; } void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) { - if (cleanup) { + if (cleanup) mlx5_unregister_device(dev); - mlx5_drain_health_wq(dev); - } mutex_lock(&dev->intf_state_mutex); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { @@ -1383,6 +1360,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_crdump_disable(dev); mlx5_devlink_unregister(devlink); + mlx5_drain_health_wq(dev); mlx5_unload_one(dev, true); mlx5_pci_close(dev); mlx5_mdev_uninit(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index a8fb43a85d1d..fc1649dac11b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -141,8 +140,6 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages); -u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, - struct ptp_system_timestamp *sts); void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); void mlx5_cmd_flush(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 8ce78f42dfc0..5ddd18639a1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -156,15 +156,21 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, return err; } -static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr) +static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id) { - struct fw_page *fp; + struct fw_page *fp = NULL; + struct fw_page *iter; unsigned n; - if (list_empty(&dev->priv.free_list)) + list_for_each_entry(iter, &dev->priv.free_list, list) { + if (iter->func_id != func_id) + continue; + fp = iter; + } + + if (list_empty(&dev->priv.free_list) || !fp) return -ENOMEM; - fp = list_entry(dev->priv.free_list.next, struct fw_page, list); n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask)); if (n >= MLX5_NUM_4K_IN_PAGE) { mlx5_core_warn(dev, "alloc 4k bug\n"); @@ -182,35 +188,35 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr) #define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT) -static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp) +static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp, + bool in_free_list) { - int n = (fwp->addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT; - - fwp->free_count++; - set_bit(n, &fwp->bitmask); - if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) { - rb_erase(&fwp->rb_node, &dev->priv.page_root); - if (fwp->free_count != 1) - list_del(&fwp->list); - dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK, - PAGE_SIZE, DMA_BIDIRECTIONAL); - __free_page(fwp->page); - kfree(fwp); - } else if (fwp->free_count == 1) { - list_add(&fwp->list, &dev->priv.free_list); - } + rb_erase(&fwp->rb_node, &dev->priv.page_root); + if (in_free_list) + list_del(&fwp->list); + dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(fwp->page); + kfree(fwp); } -static void free_addr(struct mlx5_core_dev *dev, u64 addr) +static void free_4k(struct mlx5_core_dev *dev, u64 addr) { struct fw_page *fwp; + int n; fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK); if (!fwp) { mlx5_core_warn_rl(dev, "page not found\n"); return; } - free_fwp(dev, fwp); + n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT; + fwp->free_count++; + set_bit(n, &fwp->bitmask); + if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) + free_fwp(dev, fwp, fwp->free_count != 1); + else if (fwp->free_count == 1) + list_add(&fwp->list, &dev->priv.free_list); } static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) @@ -295,7 +301,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, for (i = 0; i < npages; i++) { retry: - err = alloc_4k(dev, &addr); + err = alloc_4k(dev, &addr, func_id); if (err) { if (err == -ENOMEM) err = alloc_system_page(dev, func_id); @@ -334,7 +340,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, out_4k: for (i--; i >= 0; i--) - free_addr(dev, MLX5_GET64(manage_pages_in, in, pas[i])); + free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i])); out_free: kvfree(in); if (notify_fail) @@ -355,8 +361,8 @@ static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id, p = rb_next(p); if (fwp->func_id != func_id) continue; - free_fwp(dev, fwp); - npages++; + npages += (MLX5_NUM_4K_IN_PAGE - fwp->free_count); + free_fwp(dev, fwp, fwp->free_count); } dev->priv.fw_pages -= npages; @@ -440,7 +446,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, } for (i = 0; i < num_claimed; i++) - free_addr(dev, MLX5_GET64(manage_pages_out, out, pas[i])); + free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i])); if (nclaimed) *nclaimed = num_claimed;