From 3be83ee9de0298f8321aa0b148d8f9995102e40f Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 9 Dec 2024 15:08:53 +0100 Subject: [PATCH 1/4] ice: do not configure destination override for switchdev After switchdev is enabled and disabled later, LLDP packets sending stops, despite working perfectly fine before and during switchdev state. To reproduce (creating/destroying VF is what triggers the reconfiguration): devlink dev eswitch set pci/
mode switchdev echo '2' > /sys/class/net//device/sriov_numvfs echo '0' > /sys/class/net//device/sriov_numvfs This happens because LLDP relies on the destination override functionality. It needs to 1) set a flag in the descriptor, 2) set the VSI permission to make it valid. The permissions are set when the PF VSI is first configured, but switchdev then enables it for the uplink VSI (which is always the PF) once more when configured and disables when deconfigured, which leads to software-generated LLDP packets being blocked. Do not modify the destination override permissions when configuring switchdev, as the enabled state is the default configuration that is never modified. Fixes: 1a1c40df2e80 ("ice: set and release switchdev environment") Reviewed-by: Michal Swiatkowski Signed-off-by: Larysa Zaremba Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 6 ------ drivers/net/ethernet/intel/ice/ice_lib.c | 18 ------------------ drivers/net/ethernet/intel/ice/ice_lib.h | 4 ---- 3 files changed, 28 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index d649c197cf67..ed21d7f55ac1 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -49,9 +49,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) if (vlan_ops->dis_rx_filtering(uplink_vsi)) goto err_vlan_filtering; - if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override)) - goto err_override_uplink; - if (ice_vsi_update_local_lb(uplink_vsi, true)) goto err_override_local_lb; @@ -63,8 +60,6 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) err_up: ice_vsi_update_local_lb(uplink_vsi, false); err_override_local_lb: - ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); -err_override_uplink: vlan_ops->ena_rx_filtering(uplink_vsi); err_vlan_filtering: ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, @@ -275,7 +270,6 @@ static void ice_eswitch_release_env(struct ice_pf *pf) vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi); ice_vsi_update_local_lb(uplink_vsi, false); - ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); vlan_ops->ena_rx_filtering(uplink_vsi); ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, false, ICE_FLTR_TX); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 38a1c8372180..d0faa087793d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3936,24 +3936,6 @@ void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx) ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); } -/** - * ice_vsi_ctx_set_allow_override - allow destination override on VSI - * @ctx: pointer to VSI ctx structure - */ -void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx) -{ - ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; -} - -/** - * ice_vsi_ctx_clear_allow_override - turn off destination override on VSI - * @ctx: pointer to VSI ctx structure - */ -void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx) -{ - ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; -} - /** * ice_vsi_update_local_lb - update sw block in VSI with local loopback bit * @vsi: pointer to VSI structure diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index eabb35834a24..b4c9cb28a016 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -105,10 +105,6 @@ ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)) void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx); void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx); - -void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx); - -void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx); int ice_vsi_update_local_lb(struct ice_vsi *vsi, bool set); int ice_vsi_add_vlan_zero(struct ice_vsi *vsi); int ice_vsi_del_vlan_zero(struct ice_vsi *vsi); From 23d97f18901ef5e4e264e3b1777fe65c760186b5 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Thu, 23 Jan 2025 09:15:39 +0100 Subject: [PATCH 2/4] ice: fix memory leak in aRFS after reset Fix aRFS (accelerated Receive Flow Steering) structures memory leak by adding a checker to verify if aRFS memory is already allocated while configuring VSI. aRFS objects are allocated in two cases: - as part of VSI initialization (at probe), and - as part of reset handling However, VSI reconfiguration executed during reset involves memory allocation one more time, without prior releasing already allocated resources. This led to the memory leak with the following signature: [root@os-delivery ~]# cat /sys/kernel/debug/kmemleak unreferenced object 0xff3c1ca7252e6000 (size 8192): comm "kworker/0:0", pid 8, jiffies 4296833052 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): [] __kmalloc_cache_noprof+0x275/0x340 [] ice_init_arfs+0x3a/0xe0 [ice] [] ice_vsi_cfg_def+0x607/0x850 [ice] [] ice_vsi_setup+0x5b/0x130 [ice] [] ice_init+0x1c1/0x460 [ice] [] ice_probe+0x2af/0x520 [ice] [] local_pci_probe+0x43/0xa0 [] work_for_cpu_fn+0x13/0x20 [] process_one_work+0x179/0x390 [] worker_thread+0x239/0x340 [] kthread+0xcc/0x100 [] ret_from_fork+0x2d/0x50 [] ret_from_fork_asm+0x1a/0x30 ... Fixes: 28bf26724fdb ("ice: Implement aRFS") Reviewed-by: Michal Swiatkowski Signed-off-by: Grzegorz Nitka Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_arfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 7cee365cc7d1..405ddd17de1b 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -511,7 +511,7 @@ void ice_init_arfs(struct ice_vsi *vsi) struct hlist_head *arfs_fltr_list; unsigned int i; - if (!vsi || vsi->type != ICE_VSI_PF) + if (!vsi || vsi->type != ICE_VSI_PF || ice_is_arfs_active(vsi)) return; arfs_fltr_list = kcalloc(ICE_MAX_ARFS_LIST, sizeof(*arfs_fltr_list), From dce97cb0a3e34204c0b99345418a714eac85953f Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Thu, 2 Jan 2025 20:07:52 +0100 Subject: [PATCH 3/4] ice: Fix switchdev slow-path in LAG Ever since removing switchdev control VSI and using PF for port representor Tx/Rx, switchdev slow-path has been working improperly after failover in SR-IOV LAG. LAG assumes that the first uplink to be added to the aggregate will own VFs and have switchdev configured. After failing-over to the other uplink, representors are still configured to Tx through the uplink they are set up on, which fails because that uplink is now down. On failover, update all PRs on primary uplink to use the currently active uplink for Tx. Call netif_keep_dst(), as the secondary uplink might not be in switchdev mode. Also make sure to call ice_eswitch_set_target_vsi() if uplink is in LAG. On the Rx path, representors are already working properly, because default Tx from VFs is set to PF owning the eswitch. After failover the same PF is receiving traffic from VFs, even though link is down. Fixes: defd52455aee ("ice: do Tx through PF netdev in slow-path") Reviewed-by: Michal Swiatkowski Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lag.c | 27 +++++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_txrx.c | 4 +++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 1ccb572ce285..22371011c249 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -1000,6 +1000,28 @@ static void ice_lag_link(struct ice_lag *lag) netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n"); } +/** + * ice_lag_config_eswitch - configure eswitch to work with LAG + * @lag: lag info struct + * @netdev: active network interface device struct + * + * Updates all port representors in eswitch to use @netdev for Tx. + * + * Configures the netdev to keep dst metadata (also used in representor Tx). + * This is required for an uplink without switchdev mode configured. + */ +static void ice_lag_config_eswitch(struct ice_lag *lag, + struct net_device *netdev) +{ + struct ice_repr *repr; + unsigned long id; + + xa_for_each(&lag->pf->eswitch.reprs, id, repr) + repr->dst->u.port_info.lower_dev = netdev; + + netif_keep_dst(netdev); +} + /** * ice_lag_unlink - handle unlink event * @lag: LAG info struct @@ -1021,6 +1043,9 @@ static void ice_lag_unlink(struct ice_lag *lag) ice_lag_move_vf_nodes(lag, act_port, pri_port); lag->primary = false; lag->active_port = ICE_LAG_INVALID_PORT; + + /* Config primary's eswitch back to normal operation. */ + ice_lag_config_eswitch(lag, lag->netdev); } else { struct ice_lag *primary_lag; @@ -1419,6 +1444,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) ice_lag_move_vf_nodes(lag, prim_port, event_port); lag->active_port = event_port; + ice_lag_config_eswitch(lag, event_netdev); return; } @@ -1428,6 +1454,7 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) /* new active port */ ice_lag_move_vf_nodes(lag, lag->active_port, event_port); lag->active_port = event_port; + ice_lag_config_eswitch(lag, event_netdev); } else { /* port not set as currently active (e.g. new active port * has already claimed the nodes and filters diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 9c9ea4c1b93b..380ba1e8b3b2 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -2424,7 +2424,9 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) ICE_TXD_CTX_QW1_CMD_S); ice_tstamp(tx_ring, skb, first, &offload); - if (ice_is_switchdev_running(vsi->back) && vsi->type != ICE_VSI_SF) + if ((ice_is_switchdev_running(vsi->back) || + ice_lag_is_switchdev_running(vsi->back)) && + vsi->type != ICE_VSI_SF) ice_eswitch_set_target_vsi(skb, &offload); if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) { From 2a3e89a14864090ee4804fcec655ffc15fabf45c Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Wed, 19 Feb 2025 14:30:39 +0100 Subject: [PATCH 4/4] ice: register devlink prior to creating health reporters ice_health_init() was introduced in the commit 2a82874a3b7b ("ice: add Tx hang devlink health reporter"). The call to it should have been put after ice_devlink_register(). It went unnoticed until next reporter by Konrad, which receives events from FW. FW is reporting all events, also from prior driver load, and thus it is not unlikely to have something at the very beginning. And that results in a splat: [ 24.455950] ? devlink_recover_notify.constprop.0+0x198/0x1b0 [ 24.455973] devlink_health_report+0x5d/0x2a0 [ 24.455976] ? __pfx_ice_health_status_lookup_compare+0x10/0x10 [ice] [ 24.456044] ice_process_health_status_event+0x1b7/0x200 [ice] Do the analogous thing for deinit patch. Fixes: 85d6164ec56d ("ice: add fw and port health reporters") Reviewed-by: Aleksandr Loktionov Reviewed-by: Michal Swiatkowski Reviewed-by: Konrad Knitter Signed-off-by: Przemek Kitszel Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c3a0fb97c5ee..e13bd5a6cb6c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5065,16 +5065,16 @@ static int ice_init_devlink(struct ice_pf *pf) return err; ice_devlink_init_regions(pf); - ice_health_init(pf); ice_devlink_register(pf); + ice_health_init(pf); return 0; } static void ice_deinit_devlink(struct ice_pf *pf) { - ice_devlink_unregister(pf); ice_health_deinit(pf); + ice_devlink_unregister(pf); ice_devlink_destroy_regions(pf); ice_devlink_unregister_params(pf); }