From f12b69d8f22824a07f17c1399c99757072de73e0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 27 Sep 2025 19:39:08 +0200 Subject: [PATCH 01/75] batman-adv: Release references to inactive interfaces Trying to dump the originators or the neighbors via netlink for a meshif with an inactive primary interface is not allowed. The dump functions were checking this correctly but they didn't handle non-existing primary interfaces and existing _inactive_ interfaces differently. (Primary) batadv_hard_ifaces hold a references to a net_device. And accessing them is only allowed when either being in a RCU/spinlock protected section or when holding a valid reference to them. The netlink dump functions use the latter. But because the missing specific error handling for inactive primary interfaces, the reference was never dropped. This reference counting error was only detected when the interface should have been removed from the system: unregister_netdevice: waiting for batadv_slave_0 to become free. Usage count = 2 Cc: stable@vger.kernel.org Fixes: 6ecc4fd6c2f4 ("batman-adv: netlink: reduce duplicate code by returning interfaces") Reported-by: syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/originator.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index a464ff96b929..ed89d7fd1e7f 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -764,11 +764,16 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + if (!primary_if) { ret = -ENOENT; goto out_put_mesh_iface; } + if (primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out_put_primary_if; + } + hard_iface = batadv_netlink_get_hardif(bat_priv, cb); if (IS_ERR(hard_iface) && PTR_ERR(hard_iface) != -ENONET) { ret = PTR_ERR(hard_iface); @@ -1333,11 +1338,16 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + if (!primary_if) { ret = -ENOENT; goto out_put_mesh_iface; } + if (primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out_put_primary_if; + } + hard_iface = batadv_netlink_get_hardif(bat_priv, cb); if (IS_ERR(hard_iface) && PTR_ERR(hard_iface) != -ENONET) { ret = PTR_ERR(hard_iface); From 2e9c1da4ee9d0acfca2e0a3d78f3d8cb5802da1b Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 26 Sep 2025 21:56:56 +0200 Subject: [PATCH 02/75] wifi: ath10k: Fix memory leak on unsupported WMI command ath10k_wmi_cmd_send takes ownership of the passed buffer (skb) and has the responsibility to release it in case of error. This patch fixes missing free in case of early error due to unhandled WMI command ID. Tested-on: WCN3990 hw1.0 WLAN.HL.3.3.7.c2-00931-QCAHLSWMTPLZ-1 Fixes: 553215592f14 ("ath10k: warn if give WMI command is not supported") Suggested-by: Jeff Johnson Signed-off-by: Loic Poulain Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250926195656.187970-1-loic.poulain@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index cb8ae751eb31..b4aad6604d6d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1938,6 +1938,7 @@ int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id) if (cmd_id == WMI_CMD_UNSUPPORTED) { ath10k_warn(ar, "wmi command %d is not supported by firmware\n", cmd_id); + dev_kfree_skb_any(skb); return ret; } From 0eb002c93c3b47f88244cecb1e356eaeab61a6bf Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 29 Sep 2025 15:21:35 -0400 Subject: [PATCH 03/75] wifi: ath11k: Add missing platform IDs for quirk table Lenovo platforms can come with one of two different IDs. The pm_quirk table was missing the second ID for each platform. Add missing ID and some extra platform identification comments. Reported on https://bugzilla.kernel.org/show_bug.cgi?id=219196 Tested-on: P14s G4 AMD. Fixes: ce8669a27016 ("wifi: ath11k: determine PM policy based on machine model") Signed-off-by: Mark Pearson Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219196 Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250929192146.1789648-1-mpearson-lenovo@squebb.ca Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.c | 54 +++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index d49353b6b2e7..e9618432cd2f 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -912,42 +912,84 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { static const struct dmi_system_id ath11k_pm_quirk_table[] = { { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* X13 G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J3"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* X13 G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21J4"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* T14 G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K3"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* T14 G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21K4"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* P14s G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K5"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* P14s G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21K6"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* T16 G2 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K7"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* T16 G2 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21K8"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* P16s G2 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K9"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* P16s G2 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21KA"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* T14s G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21F8"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* T14s G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21F9"), }, From 92282074e1d2e7b6da5c05fe38a7cc974187fe14 Mon Sep 17 00:00:00 2001 From: Karthik M Date: Tue, 23 Sep 2025 15:03:16 -0700 Subject: [PATCH 04/75] wifi: ath12k: free skb during idr cleanup callback ath12k just like ath11k [1] did not handle skb cleanup during idr cleanup callback. Both ath12k_mac_vif_txmgmt_idr_remove() and ath12k_mac_tx_mgmt_pending_free() performed idr cleanup and DMA unmapping for skb but only ath12k_mac_tx_mgmt_pending_free() freed skb. As a result, during vdev deletion a memory leak occurs. Refactor all clean up steps into a new function. New function ath12k_mac_tx_mgmt_free() creates a centralized area where idr cleanup, DMA unmapping for skb and freeing skb is performed. Utilize skb pointer given by idr_remove(), instead of passed as a function argument because IDR will be protected by locking. This will prevent concurrent modification of the same IDR. Now ath12k_mac_tx_mgmt_pending_free() and ath12k_mac_vif_txmgmt_idr_remove() call ath12k_mac_tx_mgmt_free(). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Link: https://lore.kernel.org/r/1637832614-13831-1-git-send-email-quic_srirrama@quicinc.com > # [1] Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Karthik M Signed-off-by: Muna Sinada Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250923220316.1595758-1-muna.sinada@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 34 ++++++++++++++------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 3a3965b79942..2fad2df1d6ce 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -8304,23 +8304,32 @@ static void ath12k_mgmt_over_wmi_tx_drop(struct ath12k *ar, struct sk_buff *skb) wake_up(&ar->txmgmt_empty_waitq); } -int ath12k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx) +static void ath12k_mac_tx_mgmt_free(struct ath12k *ar, int buf_id) { - struct sk_buff *msdu = skb; + struct sk_buff *msdu; struct ieee80211_tx_info *info; - struct ath12k *ar = ctx; - struct ath12k_base *ab = ar->ab; spin_lock_bh(&ar->txmgmt_idr_lock); - idr_remove(&ar->txmgmt_idr, buf_id); + msdu = idr_remove(&ar->txmgmt_idr, buf_id); spin_unlock_bh(&ar->txmgmt_idr_lock); - dma_unmap_single(ab->dev, ATH12K_SKB_CB(msdu)->paddr, msdu->len, + + if (!msdu) + return; + + dma_unmap_single(ar->ab->dev, ATH12K_SKB_CB(msdu)->paddr, msdu->len, DMA_TO_DEVICE); info = IEEE80211_SKB_CB(msdu); memset(&info->status, 0, sizeof(info->status)); - ath12k_mgmt_over_wmi_tx_drop(ar, skb); + ath12k_mgmt_over_wmi_tx_drop(ar, msdu); +} + +int ath12k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx) +{ + struct ath12k *ar = ctx; + + ath12k_mac_tx_mgmt_free(ar, buf_id); return 0; } @@ -8329,17 +8338,10 @@ static int ath12k_mac_vif_txmgmt_idr_remove(int buf_id, void *skb, void *ctx) { struct ieee80211_vif *vif = ctx; struct ath12k_skb_cb *skb_cb = ATH12K_SKB_CB(skb); - struct sk_buff *msdu = skb; struct ath12k *ar = skb_cb->ar; - struct ath12k_base *ab = ar->ab; - if (skb_cb->vif == vif) { - spin_lock_bh(&ar->txmgmt_idr_lock); - idr_remove(&ar->txmgmt_idr, buf_id); - spin_unlock_bh(&ar->txmgmt_idr_lock); - dma_unmap_single(ab->dev, skb_cb->paddr, msdu->len, - DMA_TO_DEVICE); - } + if (skb_cb->vif == vif) + ath12k_mac_tx_mgmt_free(ar, buf_id); return 0; } From 9c78e747dd4fee6c36fcc926212e20032055cf9d Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Fri, 3 Oct 2025 14:51:58 +0530 Subject: [PATCH 05/75] wifi: ath11k: avoid bit operation on key flags Bitwise operations with WMI_KEY_PAIRWISE (defined as 0) are ineffective and misleading. This results in pairwise key validations added in commit 97acb0259cc9 ("wifi: ath11k: fix group data packet drops during rekey") to always evaluate false and clear key commands for pairwise keys are not honored. Since firmware supports overwriting the new key without explicitly clearing the previous one, there is no visible impact currently. However, to restore consistency with the previous behavior and improve clarity, replace bitwise operations with direct assignments and comparisons for key flags. Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.9.0.1-02146-QCAHKSWPL_SILICONZ-1 Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.41 Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-wireless/aLlaetkalDvWcB7b@stanley.mountain Fixes: 97acb0259cc9 ("wifi: ath11k: fix group data packet drops during rekey") Signed-off-by: Rameshkumar Sundaram Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251003092158.1080637-1-rameshkumar.sundaram@oss.qualcomm.com [update copyright per current guidance] Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 106e2530b64e..0e41b5a91d66 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include @@ -4417,9 +4417,9 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, } if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) - flags |= WMI_KEY_PAIRWISE; + flags = WMI_KEY_PAIRWISE; else - flags |= WMI_KEY_GROUP; + flags = WMI_KEY_GROUP; ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "%s for peer %pM on vdev %d flags 0x%X, type = %d, num_sta %d\n", @@ -4456,7 +4456,7 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, is_ap_with_no_sta = (vif->type == NL80211_IFTYPE_AP && !arvif->num_stations); - if ((flags & WMI_KEY_PAIRWISE) || cmd == SET_KEY || is_ap_with_no_sta) { + if (flags == WMI_KEY_PAIRWISE || cmd == SET_KEY || is_ap_with_no_sta) { ret = ath11k_install_key(arvif, key, cmd, peer_addr, flags); if (ret) { ath11k_warn(ab, "ath11k_install_key failed (%d)\n", ret); @@ -4470,7 +4470,7 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, goto exit; } - if ((flags & WMI_KEY_GROUP) && cmd == SET_KEY && is_ap_with_no_sta) + if (flags == WMI_KEY_GROUP && cmd == SET_KEY && is_ap_with_no_sta) arvif->reinstall_group_keys = true; } From 77e67d5daaf155f7d0f99f4e797c4842169ec19e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 Sep 2025 14:20:16 +0300 Subject: [PATCH 06/75] wifi: iwlwifi: fix potential use after free in iwl_mld_remove_link() This code frees "link" by calling kfree_rcu(link, rcu_head) and then it dereferences "link" to get the "link->fw_id". Save the "link->fw_id" first to avoid a potential use after free. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aNKCcKlbSkkS4_gO@stanley.mountain Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mld/link.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/link.c b/drivers/net/wireless/intel/iwlwifi/mld/link.c index 782fc41aa1c3..960dcd208f00 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/link.c @@ -501,6 +501,7 @@ void iwl_mld_remove_link(struct iwl_mld *mld, struct iwl_mld_vif *mld_vif = iwl_mld_vif_from_mac80211(bss_conf->vif); struct iwl_mld_link *link = iwl_mld_link_from_mac80211(bss_conf); bool is_deflink = link == &mld_vif->deflink; + u8 fw_id = link->fw_id; if (WARN_ON(!link || link->active)) return; @@ -513,10 +514,10 @@ void iwl_mld_remove_link(struct iwl_mld *mld, RCU_INIT_POINTER(mld_vif->link[bss_conf->link_id], NULL); - if (WARN_ON(link->fw_id >= mld->fw->ucode_capa.num_links)) + if (WARN_ON(fw_id >= mld->fw->ucode_capa.num_links)) return; - RCU_INIT_POINTER(mld->fw_id_to_bss_conf[link->fw_id], NULL); + RCU_INIT_POINTER(mld->fw_id_to_bss_conf[fw_id], NULL); } void iwl_mld_handle_missed_beacon_notif(struct iwl_mld *mld, From 607844761454e3c17e928002e126ccf21c83f6aa Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Wed, 24 Sep 2025 18:30:14 +0530 Subject: [PATCH 07/75] wifi: mac80211: reset FILS discovery and unsol probe resp intervals When ieee80211_stop_ap() deletes the FILS discovery and unsolicited broadcast probe response templates, the associated interval values are not reset. This can lead to drivers subsequently operating with the non-zero values, leading to unexpected behavior. Trigger repeated retrieval attempts of the FILS discovery template in ath12k, resulting in excessive log messages such as: mac vdev 0 failed to retrieve FILS discovery template mac vdev 4 failed to retrieve FILS discovery template Fix this by resetting the intervals in ieee80211_stop_ap() to ensure proper cleanup of FILS discovery and unsolicited broadcast probe response templates. Fixes: 295b02c4be74 ("mac80211: Add FILS discovery support") Fixes: 632189a0180f ("mac80211: Unsolicited broadcast probe response support") Signed-off-by: Aloka Dixit Signed-off-by: Aaradhana Sahu Link: https://patch.msgid.link/20250924130014.2575533-1-aaradhana.sahu@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d9aca1c3c097..c52b0456039d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1876,6 +1876,9 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, link_conf->nontransmitted = false; link_conf->ema_ap = false; link_conf->bssid_indicator = 0; + link_conf->fils_discovery.min_interval = 0; + link_conf->fils_discovery.max_interval = 0; + link_conf->unsol_bcast_probe_resp_interval = 0; __sta_info_flush(sdata, true, link_id, NULL); From a2a69add80411dd295c9088c1bcf925b1f4e53d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 3 Oct 2025 14:51:26 +0200 Subject: [PATCH 08/75] bcma: don't register devices disabled in OF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some bus devices can be marked as disabled for specific SoCs or models. Those should not be registered to avoid probing them. Signed-off-by: Rafał Miłecki Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251003125126.27950-1-zajec5@gmail.com Signed-off-by: Johannes Berg --- drivers/bcma/main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 6ecfc821cf83..72f045e6ed51 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -294,6 +294,8 @@ static int bcma_register_devices(struct bcma_bus *bus) int err; list_for_each_entry(core, &bus->cores, list) { + struct device_node *np; + /* We support that core ourselves */ switch (core->id.id) { case BCMA_CORE_4706_CHIPCOMMON: @@ -311,6 +313,10 @@ static int bcma_register_devices(struct bcma_bus *bus) if (bcma_is_core_needed_early(core->id.id)) continue; + np = core->dev.of_node; + if (np && !of_device_is_available(np)) + continue; + /* Only first GMAC core on BCM4706 is connected and working */ if (core->id.id == BCMA_CORE_4706_MAC_GBIT && core->core_unit > 0) From 1e1801cab6c7f302baec2a0fe3afe25458d0be7e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 11 Oct 2025 00:57:35 +0100 Subject: [PATCH 09/75] MAINTAINERS: wcn36xx: Add linux-wireless list The wcn36xx is a wireless device but doesn't have the wireless list in its MAINTAINERS entry. Add it. Signed-off-by: Dr. David Alan Gilbert Acked-by: Jeff Johnson Link: https://patch.msgid.link/20251010235735.350638-1-linux@treblig.org Signed-off-by: Johannes Berg --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 545a4776795e..91bf59792060 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21317,6 +21317,7 @@ F: drivers/media/platform/qcom/venus/ QUALCOMM WCN36XX WIRELESS DRIVER M: Loic Poulain L: wcn36xx@lists.infradead.org +L: linux-wireless@vger.kernel.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/wcn36xx F: drivers/net/wireless/ath/wcn36xx/ From 3776c685ebe5f43e9060af06872661de55e80b9a Mon Sep 17 00:00:00 2001 From: Gokul Sivakumar Date: Mon, 13 Oct 2025 15:58:19 +0530 Subject: [PATCH 10/75] wifi: brcmfmac: fix crash while sending Action Frames in standalone AP Mode Currently, whenever there is a need to transmit an Action frame, the brcmfmac driver always uses the P2P vif to send the "actframe" IOVAR to firmware. The P2P interfaces were available when wpa_supplicant is managing the wlan interface. However, the P2P interfaces are not created/initialized when only hostapd is managing the wlan interface. And if hostapd receives an ANQP Query REQ Action frame even from an un-associated STA, the brcmfmac driver tries to use an uninitialized P2P vif pointer for sending the IOVAR to firmware. This NULL pointer dereferencing triggers a driver crash. [ 1417.074538] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [...] [ 1417.075188] Hardware name: Raspberry Pi 4 Model B Rev 1.5 (DT) [...] [ 1417.075653] Call trace: [ 1417.075662] brcmf_p2p_send_action_frame+0x23c/0xc58 [brcmfmac] [ 1417.075738] brcmf_cfg80211_mgmt_tx+0x304/0x5c0 [brcmfmac] [ 1417.075810] cfg80211_mlme_mgmt_tx+0x1b0/0x428 [cfg80211] [ 1417.076067] nl80211_tx_mgmt+0x238/0x388 [cfg80211] [ 1417.076281] genl_family_rcv_msg_doit+0xe0/0x158 [ 1417.076302] genl_rcv_msg+0x220/0x2a0 [ 1417.076317] netlink_rcv_skb+0x68/0x140 [ 1417.076330] genl_rcv+0x40/0x60 [ 1417.076343] netlink_unicast+0x330/0x3b8 [ 1417.076357] netlink_sendmsg+0x19c/0x3f8 [ 1417.076370] __sock_sendmsg+0x64/0xc0 [ 1417.076391] ____sys_sendmsg+0x268/0x2a0 [ 1417.076408] ___sys_sendmsg+0xb8/0x118 [ 1417.076427] __sys_sendmsg+0x90/0xf8 [ 1417.076445] __arm64_sys_sendmsg+0x2c/0x40 [ 1417.076465] invoke_syscall+0x50/0x120 [ 1417.076486] el0_svc_common.constprop.0+0x48/0xf0 [ 1417.076506] do_el0_svc+0x24/0x38 [ 1417.076525] el0_svc+0x30/0x100 [ 1417.076548] el0t_64_sync_handler+0x100/0x130 [ 1417.076569] el0t_64_sync+0x190/0x198 [ 1417.076589] Code: f9401e80 aa1603e2 f9403be1 5280e483 (f9400000) Fix this, by always using the vif corresponding to the wdev on which the Action frame Transmission request was initiated by the userspace. This way, even if P2P vif is not available, the IOVAR is sent to firmware on AP vif and the ANQP Query RESP Action frame is transmitted without crashing the driver. Move init_completion() for "send_af_done" from brcmf_p2p_create_p2pdev() to brcmf_p2p_attach(). Because the former function would not get executed when only hostapd is managing wlan interface, and it is not safe to do reinit_completion() later in brcmf_p2p_tx_action_frame(), without any prior init_completion(). And in the brcmf_p2p_tx_action_frame() function, the condition check for P2P Presence response frame is not needed, since the wpa_supplicant is properly sending the P2P Presense Response frame on the P2P-GO vif instead of the P2P-Device vif. Cc: stable@vger.kernel.org Fixes: 18e2f61db3b7 ("brcmfmac: P2P action frame tx") Signed-off-by: Gokul Sivakumar Acked-by: Arend van Spriel Link: https://patch.msgid.link/20251013102819.9727-1-gokulkumar.sivakumar@infineon.com [Cc stable] Signed-off-by: Johannes Berg --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 3 +- .../broadcom/brcm80211/brcmfmac/p2p.c | 28 +++++++------------ .../broadcom/brcm80211/brcmfmac/p2p.h | 3 +- 3 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 8afaffe31031..bb96b87b2a6e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -5627,8 +5627,7 @@ brcmf_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, *cookie, le16_to_cpu(action_frame->len), le32_to_cpu(af_params->channel)); - ack = brcmf_p2p_send_action_frame(cfg, cfg_to_ndev(cfg), - af_params); + ack = brcmf_p2p_send_action_frame(vif->ifp, af_params); cfg80211_mgmt_tx_status(wdev, *cookie, buf, len, ack, GFP_KERNEL); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 0dc9d28cd77b..e1752a513c73 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -1529,6 +1529,7 @@ int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, /** * brcmf_p2p_tx_action_frame() - send action frame over fil. * + * @ifp: interface to transmit on. * @p2p: p2p info struct for vif. * @af_params: action frame data/info. * @@ -1538,12 +1539,11 @@ int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, * The WLC_E_ACTION_FRAME_COMPLETE event will be received when the action * frame is transmitted. */ -static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p, +static s32 brcmf_p2p_tx_action_frame(struct brcmf_if *ifp, + struct brcmf_p2p_info *p2p, struct brcmf_fil_af_params_le *af_params) { struct brcmf_pub *drvr = p2p->cfg->pub; - struct brcmf_cfg80211_vif *vif; - struct brcmf_p2p_action_frame *p2p_af; s32 err = 0; brcmf_dbg(TRACE, "Enter\n"); @@ -1552,14 +1552,7 @@ static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p, clear_bit(BRCMF_P2P_STATUS_ACTION_TX_COMPLETED, &p2p->status); clear_bit(BRCMF_P2P_STATUS_ACTION_TX_NOACK, &p2p->status); - /* check if it is a p2p_presence response */ - p2p_af = (struct brcmf_p2p_action_frame *)af_params->action_frame.data; - if (p2p_af->subtype == P2P_AF_PRESENCE_RSP) - vif = p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif; - else - vif = p2p->bss_idx[P2PAPI_BSSCFG_DEVICE].vif; - - err = brcmf_fil_bsscfg_data_set(vif->ifp, "actframe", af_params, + err = brcmf_fil_bsscfg_data_set(ifp, "actframe", af_params, sizeof(*af_params)); if (err) { bphy_err(drvr, " sending action frame has failed\n"); @@ -1711,16 +1704,14 @@ static bool brcmf_p2p_check_dwell_overflow(u32 requested_dwell, /** * brcmf_p2p_send_action_frame() - send action frame . * - * @cfg: driver private data for cfg80211 interface. - * @ndev: net device to transmit on. + * @ifp: interface to transmit on. * @af_params: configuration data for action frame. */ -bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg, - struct net_device *ndev, +bool brcmf_p2p_send_action_frame(struct brcmf_if *ifp, struct brcmf_fil_af_params_le *af_params) { + struct brcmf_cfg80211_info *cfg = ifp->drvr->config; struct brcmf_p2p_info *p2p = &cfg->p2p; - struct brcmf_if *ifp = netdev_priv(ndev); struct brcmf_fil_action_frame_le *action_frame; struct brcmf_config_af_params config_af_params; struct afx_hdl *afx_hdl = &p2p->afx_hdl; @@ -1857,7 +1848,7 @@ bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg, if (af_params->channel) msleep(P2P_AF_RETRY_DELAY_TIME); - ack = !brcmf_p2p_tx_action_frame(p2p, af_params); + ack = !brcmf_p2p_tx_action_frame(ifp, p2p, af_params); tx_retry++; dwell_overflow = brcmf_p2p_check_dwell_overflow(requested_dwell, dwell_jiffies); @@ -2217,7 +2208,6 @@ static struct wireless_dev *brcmf_p2p_create_p2pdev(struct brcmf_p2p_info *p2p, WARN_ON(p2p_ifp->bsscfgidx != bsscfgidx); - init_completion(&p2p->send_af_done); INIT_WORK(&p2p->afx_hdl.afx_work, brcmf_p2p_afx_handler); init_completion(&p2p->afx_hdl.act_frm_scan); init_completion(&p2p->wait_next_af); @@ -2513,6 +2503,8 @@ s32 brcmf_p2p_attach(struct brcmf_cfg80211_info *cfg, bool p2pdev_forced) pri_ifp = brcmf_get_ifp(cfg->pub, 0); p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif = pri_ifp->vif; + init_completion(&p2p->send_af_done); + if (p2pdev_forced) { err_ptr = brcmf_p2p_create_p2pdev(p2p, NULL, NULL); if (IS_ERR(err_ptr)) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h index d2ecee565bf2..d3137ebd7158 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h @@ -168,8 +168,7 @@ int brcmf_p2p_notify_action_frame_rx(struct brcmf_if *ifp, int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, const struct brcmf_event_msg *e, void *data); -bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg, - struct net_device *ndev, +bool brcmf_p2p_send_action_frame(struct brcmf_if *ifp, struct brcmf_fil_af_params_le *af_params); bool brcmf_p2p_scan_finding_common_channel(struct brcmf_cfg80211_info *cfg, struct brcmf_bss_info_le *bi); From ed6a47346ec69e7f1659e0a1a3558293f60d5dd7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Oct 2025 11:54:27 +0300 Subject: [PATCH 11/75] wifi: mac80211: fix key tailroom accounting leak For keys added by ieee80211_gtk_rekey_add(), we assume that they're already present in the hardware and set the flag KEY_FLAG_UPLOADED_TO_HARDWARE. However, setting this flag needs to be paired with decrementing the tailroom needed, which was missed. Fixes: f52a0b408ed1 ("wifi: mac80211: mark keys as uploaded when added by the driver") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019115358.c88eafb4083e.I69e9d4d78a756a133668c55b5570cf15a4b0e6a4@changeid Signed-off-by: Johannes Berg --- net/mac80211/key.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index b14e9cd9713f..d5da7ccea66e 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -508,11 +508,16 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, ret = ieee80211_key_enable_hw_accel(new); } } else { - if (!new->local->wowlan) + if (!new->local->wowlan) { ret = ieee80211_key_enable_hw_accel(new); - else if (link_id < 0 || !sdata->vif.active_links || - BIT(link_id) & sdata->vif.active_links) + } else if (link_id < 0 || !sdata->vif.active_links || + BIT(link_id) & sdata->vif.active_links) { new->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; + if (!(new->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE | + IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) + decrease_tailroom_need_count(sdata, 1); + } } if (ret) From 249e1443e3d57e059925bdb698f53e4d008fc106 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 20 Oct 2025 10:57:45 +0300 Subject: [PATCH 12/75] wifi: nl80211: call kfree without a NULL check Coverity is unhappy because we may leak old_radio_rts_threshold. Since this pointer is only valid in the context of the function and kfree is NULL pointer safe, don't check and just call kfree. Note that somehow, we were checking old_rts_threshold to free old_radio_rts_threshold which is a bit odd. Fixes: 264637941cf4 ("wifi: cfg80211: Add Support to Set RTS Threshold for each Radio") Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Link: https://patch.msgid.link/20251020075745.44168-1-emmanuel.grumbach@intel.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 346dfd2bd987..03d07b54359a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4136,8 +4136,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.txq_quantum = old_txq_quantum; } - if (old_rts_threshold) - kfree(old_radio_rts_threshold); + kfree(old_radio_rts_threshold); return result; } From 420c84c330d1688b8c764479e5738bbdbf0a33de Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Wed, 22 Oct 2025 10:40:07 +0800 Subject: [PATCH 13/75] usbnet: Prevents free active kevent The root cause of this issue are: 1. When probing the usbnet device, executing usbnet_link_change(dev, 0, 0); put the kevent work in global workqueue. However, the kevent has not yet been scheduled when the usbnet device is unregistered. Therefore, executing free_netdev() results in the "free active object (kevent)" error reported here. 2. Another factor is that when calling usbnet_disconnect()->unregister_netdev(), if the usbnet device is up, ndo_stop() is executed to cancel the kevent. However, because the device is not up, ndo_stop() is not executed. The solution to this problem is to cancel the kevent before executing free_netdev(). Fixes: a69e617e533e ("usbnet: Fix linkwatch use-after-free on disconnect") Reported-by: Sam Sun Closes: https://syzkaller.appspot.com/bug?extid=8bfd7bcc98f7300afb84 Signed-off-by: Lizhi Xu Link: https://patch.msgid.link/20251022024007.1831898-1-lizhi.xu@windriver.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index bf01f2728531..697cd9d866d3 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1659,6 +1659,8 @@ void usbnet_disconnect (struct usb_interface *intf) net = dev->net; unregister_netdev (net); + cancel_work_sync(&dev->kevent); + while ((urb = usb_get_from_anchor(&dev->deferred))) { dev_kfree_skb(urb->context); kfree(urb->sg); From 1ab665817448c31f4758dce43c455bd4c5e460aa Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 22 Oct 2025 22:56:30 +0700 Subject: [PATCH 14/75] virtio-net: drop the multi-buffer XDP packet in zerocopy In virtio-net, we have not yet supported multi-buffer XDP packet in zerocopy mode when there is a binding XDP program. However, in that case, when receiving multi-buffer XDP packet, we skip the XDP program and return XDP_PASS. As a result, the packet is passed to normal network stack which is an incorrect behavior (e.g. a XDP program for packet count is installed, multi-buffer XDP packet arrives and does go through XDP program. As a result, the packet count does not increase but the packet is still received from network stack).This commit instead returns XDP_ABORTED in that case. Fixes: 99c861b44eb1 ("virtio_net: xsk: rx: support recv merge mode") Cc: stable@vger.kernel.org Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Signed-off-by: Bui Quang Minh Link: https://patch.msgid.link/20251022155630.49272-1-minhquangbui99@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index a757cbcab87f..8e8a179aaa49 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1379,9 +1379,14 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct ret = XDP_PASS; rcu_read_lock(); prog = rcu_dereference(rq->xdp_prog); - /* TODO: support multi buffer. */ - if (prog && num_buf == 1) - ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); + if (prog) { + /* TODO: support multi buffer. */ + if (num_buf == 1) + ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, + stats); + else + ret = XDP_ABORTED; + } rcu_read_unlock(); switch (ret) { From 09b0cd1297b4dbfe736aeaa0ceeab2265f47f772 Mon Sep 17 00:00:00 2001 From: Cen Zhang Date: Mon, 29 Sep 2025 05:30:17 +0000 Subject: [PATCH 15/75] Bluetooth: hci_sync: fix race in hci_cmd_sync_dequeue_once hci_cmd_sync_dequeue_once() does lookup and then cancel the entry under two separate lock sections. Meanwhile, hci_cmd_sync_work() can also delete the same entry, leading to double list_del() and "UAF". Fix this by holding cmd_sync_work_lock across both lookup and cancel, so that the entry cannot be removed concurrently. Fixes: 505ea2b29592 ("Bluetooth: hci_sync: Add helper functions to manipulate cmd_sync queue") Reported-by: Cen Zhang Signed-off-by: Cen Zhang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index eefdb6134ca5..d160e5e1fe8a 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -863,11 +863,17 @@ bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev, { struct hci_cmd_sync_work_entry *entry; - entry = hci_cmd_sync_lookup_entry(hdev, func, data, destroy); - if (!entry) - return false; + mutex_lock(&hdev->cmd_sync_work_lock); - hci_cmd_sync_cancel_entry(hdev, entry); + entry = _hci_cmd_sync_lookup_entry(hdev, func, data, destroy); + if (!entry) { + mutex_unlock(&hdev->cmd_sync_work_lock); + return false; + } + + _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); + + mutex_unlock(&hdev->cmd_sync_work_lock); return true; } From f0c200a4a537f8f374584a974518b0ce69eda76c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 26 Sep 2025 11:48:50 -0400 Subject: [PATCH 16/75] Bluetooth: ISO: Fix BIS connection dst_type handling Socket dst_type cannot be directly assigned to hci_conn->type since there domain is different which may lead to the wrong address type being used. Fixes: 6a5ad251b7cd ("Bluetooth: ISO: Fix possible circular locking dependency") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 9b263d061e05..954e1916506b 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2032,7 +2032,7 @@ static void iso_conn_ready(struct iso_conn *conn) */ if (!bacmp(&hcon->dst, BDADDR_ANY)) { bacpy(&hcon->dst, &iso_pi(parent)->dst); - hcon->dst_type = iso_pi(parent)->dst_type; + hcon->dst_type = le_addr_type(iso_pi(parent)->dst_type); } if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags)) { From 77343b8b4f87560f8f03e77b98a81ff3a147b262 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 30 Sep 2025 13:39:33 +0800 Subject: [PATCH 17/75] Bluetooth: btmtksdio: Add pmctrl handling for BT closed state during reset This patch adds logic to handle power management control when the Bluetooth function is closed during the SDIO reset sequence. Specifically, if BT is closed before reset, the driver enables the SDIO function and sets driver pmctrl. After reset, if BT remains closed, the driver sets firmware pmctrl and disables the SDIO function. These changes ensure proper power management and device state consistency across the reset flow. Fixes: 8fafe702253d ("Bluetooth: mt7921s: support bluetooth reset mechanism") Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtksdio.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 50abefba6d04..62db31bd6592 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1270,6 +1270,12 @@ static void btmtksdio_reset(struct hci_dev *hdev) sdio_claim_host(bdev->func); + /* set drv_pmctrl if BT is closed before doing reset */ + if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) { + sdio_enable_func(bdev->func); + btmtksdio_drv_pmctrl(bdev); + } + sdio_writel(bdev->func, C_INT_EN_CLR, MTK_REG_CHLPCR, NULL); skb_queue_purge(&bdev->txq); cancel_work_sync(&bdev->txrx_work); @@ -1285,6 +1291,12 @@ static void btmtksdio_reset(struct hci_dev *hdev) goto err; } + /* set fw_pmctrl back if BT is closed after doing reset */ + if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) { + btmtksdio_fw_pmctrl(bdev); + sdio_disable_func(bdev->func); + } + clear_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state); err: sdio_release_host(bdev->func); From 0d92808024b4e9868cef68d16f121d509843e80e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Oct 2025 10:55:58 -0400 Subject: [PATCH 18/75] Bluetooth: HCI: Fix tracking of advertisement set/instance 0x00 This fixes the state tracking of advertisement set/instance 0x00 which is considered a legacy instance and is not tracked individually by adv_instances list, previously it was assumed that hci_dev itself would track it via HCI_LE_ADV but that is a global state not specifc to instance 0x00, so to fix it a new flag is introduced that only tracks the state of instance 0x00. Fixes: 1488af7b8b5f ("Bluetooth: hci_sync: Fix hci_resume_advertising_sync") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + net/bluetooth/hci_event.c | 4 ++++ net/bluetooth/hci_sync.c | 5 ++--- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 9ecc70baaca9..8d0e703bc929 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -434,6 +434,7 @@ enum { HCI_USER_CHANNEL, HCI_EXT_CONFIGURED, HCI_LE_ADV, + HCI_LE_ADV_0, HCI_LE_PER_ADV, HCI_LE_SCAN, HCI_SSP_ENABLED, diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d790b0d4eb9a..1dabf5a7ae18 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1609,6 +1609,8 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, if (adv && !adv->periodic) adv->enabled = true; + else if (!set->handle) + hci_dev_set_flag(hdev, HCI_LE_ADV_0); conn = hci_lookup_le_connect(hdev); if (conn) @@ -1619,6 +1621,8 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, if (cp->num_of_sets) { if (adv) adv->enabled = false; + else if (!set->handle) + hci_dev_clear_flag(hdev, HCI_LE_ADV_0); /* If just one instance was disabled check if there are * any other instance enabled before clearing HCI_LE_ADV diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d160e5e1fe8a..28ad08cd7d70 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2606,9 +2606,8 @@ static int hci_resume_advertising_sync(struct hci_dev *hdev) /* If current advertising instance is set to instance 0x00 * then we need to re-enable it. */ - if (!hdev->cur_adv_instance) - err = hci_enable_ext_advertising_sync(hdev, - hdev->cur_adv_instance); + if (hci_dev_test_and_clear_flag(hdev, HCI_LE_ADV_0)) + err = hci_enable_ext_advertising_sync(hdev, 0x00); } else { /* Schedule for most recent instance to be restarted and begin * the software rotation loop From e8785404de06a69d89dcdd1e9a0b6ea42dc6d327 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Fri, 3 Oct 2025 22:07:32 +0300 Subject: [PATCH 19/75] Bluetooth: MGMT: fix crash in set_mesh_sync and set_mesh_complete There is a BUG: KASAN: stack-out-of-bounds in set_mesh_sync due to memcpy from badly declared on-stack flexible array. Another crash is in set_mesh_complete() due to double list_del via mgmt_pending_valid + mgmt_pending_remove. Use DEFINE_FLEX to declare the flexible array right, and don't memcpy outside bounds. As mgmt_pending_valid removes the cmd from list, use mgmt_pending_free, and also report status on error. Fixes: 302a1f674c00d ("Bluetooth: MGMT: Fix possible UAFs") Signed-off-by: Pauli Virtanen Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/mgmt.h | 2 +- net/bluetooth/mgmt.c | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 74edea06985b..bca0333f1e99 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -853,7 +853,7 @@ struct mgmt_cp_set_mesh { __le16 window; __le16 period; __u8 num_ad_types; - __u8 ad_types[]; + __u8 ad_types[] __counted_by(num_ad_types); } __packed; #define MGMT_SET_MESH_RECEIVER_SIZE 6 diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a3d16eece0d2..24e335e3a727 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2175,19 +2175,24 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) sk = cmd->sk; if (status) { + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + status); mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true, cmd_status_rsp, &status); - return; + goto done; } - mgmt_pending_remove(cmd); mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, 0, NULL, 0); + +done: + mgmt_pending_free(cmd); } static int set_mesh_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_set_mesh cp; + DEFINE_FLEX(struct mgmt_cp_set_mesh, cp, ad_types, num_ad_types, + sizeof(hdev->mesh_ad_types)); size_t len; mutex_lock(&hdev->mgmt_pending_lock); @@ -2197,27 +2202,26 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data) return -ECANCELED; } - memcpy(&cp, cmd->param, sizeof(cp)); + len = cmd->param_len; + memcpy(cp, cmd->param, min(__struct_size(cp), len)); mutex_unlock(&hdev->mgmt_pending_lock); - len = cmd->param_len; - memset(hdev->mesh_ad_types, 0, sizeof(hdev->mesh_ad_types)); - if (cp.enable) + if (cp->enable) hci_dev_set_flag(hdev, HCI_MESH); else hci_dev_clear_flag(hdev, HCI_MESH); - hdev->le_scan_interval = __le16_to_cpu(cp.period); - hdev->le_scan_window = __le16_to_cpu(cp.window); + hdev->le_scan_interval = __le16_to_cpu(cp->period); + hdev->le_scan_window = __le16_to_cpu(cp->window); - len -= sizeof(cp); + len -= sizeof(struct mgmt_cp_set_mesh); /* If filters don't fit, forward all adv pkts */ if (len <= sizeof(hdev->mesh_ad_types)) - memcpy(hdev->mesh_ad_types, cp.ad_types, len); + memcpy(hdev->mesh_ad_types, cp->ad_types, len); hci_update_passive_scan_sync(hdev); return 0; From 76e20da0bd00c556ed0a1e7250bdb6ac3e808ea8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Mon, 6 Oct 2025 10:35:44 +0200 Subject: [PATCH 20/75] Revert "Bluetooth: L2CAP: convert timeouts to secs_to_jiffies()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit c9d84da18d1e0d28a7e16ca6df8e6d47570501d4. It replaces in L2CAP calls to msecs_to_jiffies() to secs_to_jiffies() and updates the constants accordingly. But the constants are also used in LCAP Configure Request and L2CAP Configure Response which expect values in milliseconds. This may prevent correct usage of L2CAP channel. To fix it, keep those constants in milliseconds and so revert this change. Fixes: c9d84da18d1e ("Bluetooth: L2CAP: convert timeouts to secs_to_jiffies()") Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/l2cap.h | 4 ++-- net/bluetooth/l2cap_core.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 4bb0eaedda18..00e182a22720 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -38,8 +38,8 @@ #define L2CAP_DEFAULT_TX_WINDOW 63 #define L2CAP_DEFAULT_EXT_WINDOW 0x3FFF #define L2CAP_DEFAULT_MAX_TX 3 -#define L2CAP_DEFAULT_RETRANS_TO 2 /* seconds */ -#define L2CAP_DEFAULT_MONITOR_TO 12 /* seconds */ +#define L2CAP_DEFAULT_RETRANS_TO 2000 /* 2 seconds */ +#define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */ #define L2CAP_DEFAULT_MAX_PDU_SIZE 1492 /* Sized for AMP packet */ #define L2CAP_DEFAULT_ACK_TO 200 #define L2CAP_DEFAULT_MAX_SDU_SIZE 0xFFFF diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 805c752ac0a9..d08320380ad6 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -282,7 +282,7 @@ static void __set_retrans_timer(struct l2cap_chan *chan) if (!delayed_work_pending(&chan->monitor_timer) && chan->retrans_timeout) { l2cap_set_timer(chan, &chan->retrans_timer, - secs_to_jiffies(chan->retrans_timeout)); + msecs_to_jiffies(chan->retrans_timeout)); } } @@ -291,7 +291,7 @@ static void __set_monitor_timer(struct l2cap_chan *chan) __clear_retrans_timer(chan); if (chan->monitor_timeout) { l2cap_set_timer(chan, &chan->monitor_timer, - secs_to_jiffies(chan->monitor_timeout)); + msecs_to_jiffies(chan->monitor_timeout)); } } From c403da5e98b04a2aec9cfb25cbeeb28d7ce29975 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 7 Oct 2025 13:29:15 -0400 Subject: [PATCH 21/75] Bluetooth: ISO: Fix another instance of dst_type handling Socket dst_type cannot be directly assigned to hci_conn->type since there domain is different which may lead to the wrong address type being used. Fixes: 6a5ad251b7cd ("Bluetooth: ISO: Fix possible circular locking dependency") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 954e1916506b..3d98cb6291da 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2046,7 +2046,13 @@ static void iso_conn_ready(struct iso_conn *conn) } bacpy(&iso_pi(sk)->dst, &hcon->dst); - iso_pi(sk)->dst_type = hcon->dst_type; + + /* Convert from HCI to three-value type */ + if (hcon->dst_type == ADDR_LE_DEV_PUBLIC) + iso_pi(sk)->dst_type = BDADDR_LE_PUBLIC; + else + iso_pi(sk)->dst_type = BDADDR_LE_RANDOM; + iso_pi(sk)->sync_handle = iso_pi(parent)->sync_handle; memcpy(iso_pi(sk)->base, iso_pi(parent)->base, iso_pi(parent)->base_len); iso_pi(sk)->base_len = iso_pi(parent)->base_len; From 057b6ca5961203f16a2a02fb0592661a7a959a84 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Thu, 16 Oct 2025 10:00:43 +0530 Subject: [PATCH 22/75] Bluetooth: btintel_pcie: Fix event packet loss issue In the current btintel_pcie driver implementation, when an interrupt is received, the driver checks for the alive cause before the TX/RX cause. Handling the alive cause involves resetting the TX/RX queue indices. This flow works correctly when the causes are mutually exclusive. However, if both cause bits are set simultaneously, the alive cause resets the queue indices, resulting in an event packet drop and a command timeout. To fix this issue, the driver is modified to handle all other causes before checking for the alive cause. Test case: Issue is seen with stress reboot scenario - 50x run [20.337589] Bluetooth: hci0: Device revision is 0 [20.346750] Bluetooth: hci0: Secure boot is enabled [20.346752] Bluetooth: hci0: OTP lock is disabled [20.346752] Bluetooth: hci0: API lock is enabled [20.346752] Bluetooth: hci0: Debug lock is disabled [20.346753] Bluetooth: hci0: Minimum firmware build 1 week 10 2014 [20.346754] Bluetooth: hci0: Bootloader timestamp 2023.43 buildtype 1 build 11631 [20.359070] Bluetooth: hci0: Found device firmware: intel/ibt-00a0-00a1-iml.sfi [20.371499] Bluetooth: hci0: Boot Address: 0xb02ff800 [20.385769] Bluetooth: hci0: Firmware Version: 166-34.25 [20.538257] Bluetooth: hci0: Waiting for firmware download to complete [20.554424] Bluetooth: hci0: Firmware loaded in 178651 usecs [21.081588] Bluetooth: hci0: Timeout (500 ms) on tx completion [21.096541] Bluetooth: hci0: Failed to send frame (-62) [21.110240] Bluetooth: hci0: sending frame failed (-62) [21.138551] Bluetooth: hci0: Failed to send Intel Reset command [21.170153] Bluetooth: hci0: Intel Soft Reset failed (-62) Signed-off-by: Kiran K Signed-off-by: Sai Teja Aluvala Reviewed-by: Paul Menzel Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 6d3963bd56a9..a075d8ec4677 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1467,11 +1467,6 @@ static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) if (intr_hw & BTINTEL_PCIE_MSIX_HW_INT_CAUSES_GP1) btintel_pcie_msix_gp1_handler(data); - /* This interrupt is triggered by the firmware after updating - * boot_stage register and image_response register - */ - if (intr_hw & BTINTEL_PCIE_MSIX_HW_INT_CAUSES_GP0) - btintel_pcie_msix_gp0_handler(data); /* For TX */ if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_0) { @@ -1487,6 +1482,12 @@ static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) btintel_pcie_msix_tx_handle(data); } + /* This interrupt is triggered by the firmware after updating + * boot_stage register and image_response register + */ + if (intr_hw & BTINTEL_PCIE_MSIX_HW_INT_CAUSES_GP0) + btintel_pcie_msix_gp0_handler(data); + /* * Before sending the interrupt the HW disables it to prevent a nested * interrupt. This is done by writing 1 to the corresponding bit in From b489556a856d31f1eb73972150f371d2e4ce1de8 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Thu, 23 Oct 2025 11:47:19 -0700 Subject: [PATCH 23/75] Bluetooth: fix corruption in h4_recv_buf() after cleanup A different structure is stored in drvdata for the drivers which used that duplicate function, but h4_recv_buf() assumes drvdata is always an hci_uart structure. Consequently, alignment and padding are now randomly corrupted for btmtkuart, btnxpuart, and bpa10x in h4_recv_buf(), causing erratic breakage. Fix this by making the hci_uart structure the explicit argument to h4_recv_buf(). Every caller already has a reference to hci_uart, and already obtains the hci_hdev reference through it, so this actually eliminates a redundant pointer indirection for all existing callers. Fixes: 93f06f8f0daf ("Bluetooth: remove duplicate h4_recv_buf() in header") Reported-by: Francesco Valla Closes: https://lore.kernel.org/lkml/6837167.ZASKD2KPVS@fedora.fritz.box/ Signed-off-by: Calvin Owens Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/bpa10x.c | 4 +++- drivers/bluetooth/btmtkuart.c | 4 +++- drivers/bluetooth/btnxpuart.c | 4 +++- drivers/bluetooth/hci_ag6xx.c | 2 +- drivers/bluetooth/hci_aml.c | 2 +- drivers/bluetooth/hci_ath.c | 2 +- drivers/bluetooth/hci_bcm.c | 2 +- drivers/bluetooth/hci_h4.c | 6 +++--- drivers/bluetooth/hci_intel.c | 2 +- drivers/bluetooth/hci_ll.c | 2 +- drivers/bluetooth/hci_mrvl.c | 6 +++--- drivers/bluetooth/hci_nokia.c | 4 ++-- drivers/bluetooth/hci_qca.c | 2 +- drivers/bluetooth/hci_uart.h | 2 +- 14 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index b7ba667a3d09..e305d04aac9d 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -41,6 +41,7 @@ struct bpa10x_data { struct usb_anchor rx_anchor; struct sk_buff *rx_skb[2]; + struct hci_uart hu; }; static void bpa10x_tx_complete(struct urb *urb) @@ -96,7 +97,7 @@ static void bpa10x_rx_complete(struct urb *urb) if (urb->status == 0) { bool idx = usb_pipebulk(urb->pipe); - data->rx_skb[idx] = h4_recv_buf(hdev, data->rx_skb[idx], + data->rx_skb[idx] = h4_recv_buf(&data->hu, data->rx_skb[idx], urb->transfer_buffer, urb->actual_length, bpa10x_recv_pkts, @@ -388,6 +389,7 @@ static int bpa10x_probe(struct usb_interface *intf, hci_set_drvdata(hdev, data); data->hdev = hdev; + data->hu.hdev = hdev; SET_HCIDEV_DEV(hdev, &intf->dev); diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index d9b90ea2ad38..27aa48ff3ac2 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -79,6 +79,7 @@ struct btmtkuart_dev { u16 stp_dlen; const struct btmtkuart_data *data; + struct hci_uart hu; }; #define btmtkuart_is_standalone(bdev) \ @@ -368,7 +369,7 @@ static void btmtkuart_recv(struct hci_dev *hdev, const u8 *data, size_t count) sz_left -= adv; p_left += adv; - bdev->rx_skb = h4_recv_buf(bdev->hdev, bdev->rx_skb, p_h4, + bdev->rx_skb = h4_recv_buf(&bdev->hu, bdev->rx_skb, p_h4, sz_h4, mtk_recv_pkts, ARRAY_SIZE(mtk_recv_pkts)); if (IS_ERR(bdev->rx_skb)) { @@ -858,6 +859,7 @@ static int btmtkuart_probe(struct serdev_device *serdev) } bdev->hdev = hdev; + bdev->hu.hdev = hdev; hdev->bus = HCI_UART; hci_set_drvdata(hdev, bdev); diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index d5153fed0518..3b1e9224e965 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -212,6 +212,7 @@ struct btnxpuart_dev { struct ps_data psdata; struct btnxpuart_data *nxp_data; struct reset_control *pdn; + struct hci_uart hu; }; #define NXP_V1_FW_REQ_PKT 0xa5 @@ -1756,7 +1757,7 @@ static size_t btnxpuart_receive_buf(struct serdev_device *serdev, ps_start_timer(nxpdev); - nxpdev->rx_skb = h4_recv_buf(nxpdev->hdev, nxpdev->rx_skb, data, count, + nxpdev->rx_skb = h4_recv_buf(&nxpdev->hu, nxpdev->rx_skb, data, count, nxp_recv_pkts, ARRAY_SIZE(nxp_recv_pkts)); if (IS_ERR(nxpdev->rx_skb)) { int err = PTR_ERR(nxpdev->rx_skb); @@ -1875,6 +1876,7 @@ static int nxp_serdev_probe(struct serdev_device *serdev) reset_control_deassert(nxpdev->pdn); nxpdev->hdev = hdev; + nxpdev->hu.hdev = hdev; hdev->bus = HCI_UART; hci_set_drvdata(hdev, nxpdev); diff --git a/drivers/bluetooth/hci_ag6xx.c b/drivers/bluetooth/hci_ag6xx.c index 2d40302409ff..94588676510f 100644 --- a/drivers/bluetooth/hci_ag6xx.c +++ b/drivers/bluetooth/hci_ag6xx.c @@ -105,7 +105,7 @@ static int ag6xx_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - ag6xx->rx_skb = h4_recv_buf(hu->hdev, ag6xx->rx_skb, data, count, + ag6xx->rx_skb = h4_recv_buf(hu, ag6xx->rx_skb, data, count, ag6xx_recv_pkts, ARRAY_SIZE(ag6xx_recv_pkts)); if (IS_ERR(ag6xx->rx_skb)) { diff --git a/drivers/bluetooth/hci_aml.c b/drivers/bluetooth/hci_aml.c index 707e90f80130..b1f32c5a8a3f 100644 --- a/drivers/bluetooth/hci_aml.c +++ b/drivers/bluetooth/hci_aml.c @@ -650,7 +650,7 @@ static int aml_recv(struct hci_uart *hu, const void *data, int count) struct aml_data *aml_data = hu->priv; int err; - aml_data->rx_skb = h4_recv_buf(hu->hdev, aml_data->rx_skb, data, count, + aml_data->rx_skb = h4_recv_buf(hu, aml_data->rx_skb, data, count, aml_recv_pkts, ARRAY_SIZE(aml_recv_pkts)); if (IS_ERR(aml_data->rx_skb)) { diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index dbfe34664633..8d2b5e7f0d6a 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c @@ -191,7 +191,7 @@ static int ath_recv(struct hci_uart *hu, const void *data, int count) { struct ath_struct *ath = hu->priv; - ath->rx_skb = h4_recv_buf(hu->hdev, ath->rx_skb, data, count, + ath->rx_skb = h4_recv_buf(hu, ath->rx_skb, data, count, ath_recv_pkts, ARRAY_SIZE(ath_recv_pkts)); if (IS_ERR(ath->rx_skb)) { int err = PTR_ERR(ath->rx_skb); diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index f96617b85d87..fff845ed44e3 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -698,7 +698,7 @@ static int bcm_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - bcm->rx_skb = h4_recv_buf(hu->hdev, bcm->rx_skb, data, count, + bcm->rx_skb = h4_recv_buf(hu, bcm->rx_skb, data, count, bcm_recv_pkts, ARRAY_SIZE(bcm_recv_pkts)); if (IS_ERR(bcm->rx_skb)) { int err = PTR_ERR(bcm->rx_skb); diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 9070e31a68bf..ec017df8572c 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -112,7 +112,7 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - h4->rx_skb = h4_recv_buf(hu->hdev, h4->rx_skb, data, count, + h4->rx_skb = h4_recv_buf(hu, h4->rx_skb, data, count, h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts)); if (IS_ERR(h4->rx_skb)) { int err = PTR_ERR(h4->rx_skb); @@ -151,12 +151,12 @@ int __exit h4_deinit(void) return hci_uart_unregister_proto(&h4p); } -struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb, +struct sk_buff *h4_recv_buf(struct hci_uart *hu, struct sk_buff *skb, const unsigned char *buffer, int count, const struct h4_recv_pkt *pkts, int pkts_count) { - struct hci_uart *hu = hci_get_drvdata(hdev); u8 alignment = hu->alignment ? hu->alignment : 1; + struct hci_dev *hdev = hu->hdev; /* Check for error from previous call */ if (IS_ERR(skb)) diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 9b353c3d6442..1d6e09508f1f 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -972,7 +972,7 @@ static int intel_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - intel->rx_skb = h4_recv_buf(hu->hdev, intel->rx_skb, data, count, + intel->rx_skb = h4_recv_buf(hu, intel->rx_skb, data, count, intel_recv_pkts, ARRAY_SIZE(intel_recv_pkts)); if (IS_ERR(intel->rx_skb)) { diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index 7044c86325ce..6f4e25917b86 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -429,7 +429,7 @@ static int ll_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - ll->rx_skb = h4_recv_buf(hu->hdev, ll->rx_skb, data, count, + ll->rx_skb = h4_recv_buf(hu, ll->rx_skb, data, count, ll_recv_pkts, ARRAY_SIZE(ll_recv_pkts)); if (IS_ERR(ll->rx_skb)) { int err = PTR_ERR(ll->rx_skb); diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c index e08222395772..8767522ec4c6 100644 --- a/drivers/bluetooth/hci_mrvl.c +++ b/drivers/bluetooth/hci_mrvl.c @@ -264,9 +264,9 @@ static int mrvl_recv(struct hci_uart *hu, const void *data, int count) !test_bit(STATE_FW_LOADED, &mrvl->flags)) return count; - mrvl->rx_skb = h4_recv_buf(hu->hdev, mrvl->rx_skb, data, count, - mrvl_recv_pkts, - ARRAY_SIZE(mrvl_recv_pkts)); + mrvl->rx_skb = h4_recv_buf(hu, mrvl->rx_skb, data, count, + mrvl_recv_pkts, + ARRAY_SIZE(mrvl_recv_pkts)); if (IS_ERR(mrvl->rx_skb)) { int err = PTR_ERR(mrvl->rx_skb); bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c index cd7575c20f65..1e65b541f8ad 100644 --- a/drivers/bluetooth/hci_nokia.c +++ b/drivers/bluetooth/hci_nokia.c @@ -624,8 +624,8 @@ static int nokia_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - btdev->rx_skb = h4_recv_buf(hu->hdev, btdev->rx_skb, data, count, - nokia_recv_pkts, ARRAY_SIZE(nokia_recv_pkts)); + btdev->rx_skb = h4_recv_buf(hu, btdev->rx_skb, data, count, + nokia_recv_pkts, ARRAY_SIZE(nokia_recv_pkts)); if (IS_ERR(btdev->rx_skb)) { err = PTR_ERR(btdev->rx_skb); dev_err(dev, "Frame reassembly failed (%d)", err); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 4cff4d9be313..888176b0faa9 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1277,7 +1277,7 @@ static int qca_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - qca->rx_skb = h4_recv_buf(hu->hdev, qca->rx_skb, data, count, + qca->rx_skb = h4_recv_buf(hu, qca->rx_skb, data, count, qca_recv_pkts, ARRAY_SIZE(qca_recv_pkts)); if (IS_ERR(qca->rx_skb)) { int err = PTR_ERR(qca->rx_skb); diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index cbbe79b241ce..48ac7ca9334e 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -162,7 +162,7 @@ struct h4_recv_pkt { int h4_init(void); int h4_deinit(void); -struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb, +struct sk_buff *h4_recv_buf(struct hci_uart *hu, struct sk_buff *skb, const unsigned char *buffer, int count, const struct h4_recv_pkt *pkts, int pkts_count); #endif From 857eb0fabc389be5159e0e17d84bc122614b5b98 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 22 Oct 2025 16:29:41 -0400 Subject: [PATCH 24/75] Bluetooth: hci_conn: Fix connection cleanup with BIG with 2 or more BIS This fixes bis_cleanup not considering connections in BT_OPEN state before attempting to remove the BIG causing the following error: btproxy[20110]: < HCI Command: LE Terminate Broadcast Isochronous Group (0x08|0x006a) plen 2 BIG Handle: 0x01 Reason: Connection Terminated By Local Host (0x16) > HCI Event: Command Status (0x0f) plen 4 LE Terminate Broadcast Isochronous Group (0x08|0x006a) ncmd 1 Status: Unknown Advertising Identifier (0x42) Fixes: fa224d0c094a ("Bluetooth: ISO: Reassociate a socket with an active BIS") Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Paul Menzel --- net/bluetooth/hci_conn.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 111f0e37b672..c5dedf39a129 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -843,6 +843,13 @@ static void bis_cleanup(struct hci_conn *conn) if (bis) return; + bis = hci_conn_hash_lookup_big_state(hdev, + conn->iso_qos.bcast.big, + BT_OPEN, + HCI_ROLE_MASTER); + if (bis) + return; + hci_le_terminate_big(hdev, conn); } else { hci_le_big_terminate(hdev, conn->iso_qos.bcast.big, From 751463ceefc3397566d03c8b64ef4a77f5fd88ac Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 22 Oct 2025 16:03:19 -0400 Subject: [PATCH 25/75] Bluetooth: hci_core: Fix tracking of periodic advertisement Periodic advertising enabled flag cannot be tracked by the enabled flag since advertising and periodic advertising each can be enabled/disabled separately from one another causing the states to be inconsistent when for example an advertising set is disabled its enabled flag is set to false which is then used for periodic which has not being disabled. Fixes: eca0ae4aea66 ("Bluetooth: Add initial implementation of BIS connections") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_event.c | 7 +++++-- net/bluetooth/hci_sync.c | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2924c2bf2a98..b8100dbfe5d7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -244,6 +244,7 @@ struct adv_info { bool enabled; bool pending; bool periodic; + bool periodic_enabled; __u8 mesh; __u8 instance; __u8 handle; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1dabf5a7ae18..d37db364acf7 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1607,7 +1607,7 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, hci_dev_set_flag(hdev, HCI_LE_ADV); - if (adv && !adv->periodic) + if (adv) adv->enabled = true; else if (!set->handle) hci_dev_set_flag(hdev, HCI_LE_ADV_0); @@ -3963,8 +3963,11 @@ static u8 hci_cc_le_set_per_adv_enable(struct hci_dev *hdev, void *data, hci_dev_set_flag(hdev, HCI_LE_PER_ADV); if (adv) - adv->enabled = true; + adv->periodic_enabled = true; } else { + if (adv) + adv->periodic_enabled = false; + /* If just one instance was disabled check if there are * any other instance enabled before clearing HCI_LE_PER_ADV. * The current periodic adv instance will be marked as diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 28ad08cd7d70..73fc41b68b68 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1607,7 +1607,7 @@ int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance) /* If periodic advertising already disabled there is nothing to do. */ adv = hci_find_adv_instance(hdev, instance); - if (!adv || !adv->periodic || !adv->enabled) + if (!adv || !adv->periodic_enabled) return 0; memset(&cp, 0, sizeof(cp)); @@ -1672,7 +1672,7 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance) /* If periodic advertising already enabled there is nothing to do. */ adv = hci_find_adv_instance(hdev, instance); - if (adv && adv->periodic && adv->enabled) + if (adv && adv->periodic_enabled) return 0; memset(&cp, 0, sizeof(cp)); From 91d35ec9b3956d6b3cf789c1593467e58855b03a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 23 Oct 2025 14:05:30 +0200 Subject: [PATCH 26/75] Bluetooth: rfcomm: fix modem control handling The RFCOMM driver confuses the local and remote modem control signals, which specifically means that the reported DTR and RTS state will instead reflect the remote end (i.e. DSR and CTS). This issue dates back to the original driver (and a follow-on update) merged in 2002, which resulted in a non-standard implementation of TIOCMSET that allowed controlling also the TS07.10 IC and DV signals by mapping them to the RI and DCD input flags, while TIOCMGET failed to return the actual state of DTR and RTS. Note that the bogus control of input signals in tiocmset() is just dead code as those flags will have been masked out by the tty layer since 2003. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/rfcomm/tty.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 376ce6de84be..b783526ab588 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -643,8 +643,8 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) tty_port_tty_hangup(&dev->port, true); dev->modem_status = - ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) | - ((v24_sig & RFCOMM_V24_RTR) ? (TIOCM_RTS | TIOCM_CTS) : 0) | + ((v24_sig & RFCOMM_V24_RTC) ? TIOCM_DSR : 0) | + ((v24_sig & RFCOMM_V24_RTR) ? TIOCM_CTS : 0) | ((v24_sig & RFCOMM_V24_IC) ? TIOCM_RI : 0) | ((v24_sig & RFCOMM_V24_DV) ? TIOCM_CD : 0); } @@ -1055,10 +1055,14 @@ static void rfcomm_tty_hangup(struct tty_struct *tty) static int rfcomm_tty_tiocmget(struct tty_struct *tty) { struct rfcomm_dev *dev = tty->driver_data; + struct rfcomm_dlc *dlc = dev->dlc; + u8 v24_sig; BT_DBG("tty %p dev %p", tty, dev); - return dev->modem_status; + rfcomm_dlc_get_modem_status(dlc, &v24_sig); + + return (v24_sig & (TIOCM_DTR | TIOCM_RTS)) | dev->modem_status; } static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) @@ -1071,23 +1075,15 @@ static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigne rfcomm_dlc_get_modem_status(dlc, &v24_sig); - if (set & TIOCM_DSR || set & TIOCM_DTR) + if (set & TIOCM_DTR) v24_sig |= RFCOMM_V24_RTC; - if (set & TIOCM_RTS || set & TIOCM_CTS) + if (set & TIOCM_RTS) v24_sig |= RFCOMM_V24_RTR; - if (set & TIOCM_RI) - v24_sig |= RFCOMM_V24_IC; - if (set & TIOCM_CD) - v24_sig |= RFCOMM_V24_DV; - if (clear & TIOCM_DSR || clear & TIOCM_DTR) + if (clear & TIOCM_DTR) v24_sig &= ~RFCOMM_V24_RTC; - if (clear & TIOCM_RTS || clear & TIOCM_CTS) + if (clear & TIOCM_RTS) v24_sig &= ~RFCOMM_V24_RTR; - if (clear & TIOCM_RI) - v24_sig &= ~RFCOMM_V24_IC; - if (clear & TIOCM_CD) - v24_sig &= ~RFCOMM_V24_DV; rfcomm_dlc_set_modem_status(dlc, v24_sig); From 84a905290cb4c3d9a71a9e3b2f2e02e031e7512f Mon Sep 17 00:00:00 2001 From: Emanuele Ghidoli Date: Thu, 23 Oct 2025 16:48:53 +0200 Subject: [PATCH 27/75] net: phy: dp83867: Disable EEE support as not implemented While the DP83867 PHYs report EEE capability through their feature registers, the actual hardware does not support EEE (see Links). When the connected MAC enables EEE, it causes link instability and communication failures. The issue is reproducible with a iMX8MP and relevant stmmac ethernet port. Since the introduction of phylink-managed EEE support in the stmmac driver, EEE is now enabled by default, leading to issues on systems using the DP83867 PHY. Call phy_disable_eee during phy initialization to prevent EEE from being enabled on DP83867 PHYs. Link: https://e2e.ti.com/support/interface-group/interface/f/interface-forum/1445244/dp83867ir-dp83867-disable-eee-lpi Link: https://e2e.ti.com/support/interface-group/interface/f/interface-forum/658638/dp83867ir-eee-energy-efficient-ethernet Fixes: 2a10154abcb7 ("net: phy: dp83867: Add TI dp83867 phy") Cc: stable@vger.kernel.org Signed-off-by: Emanuele Ghidoli Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251023144857.529566-1-ghidoliemanuele@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83867.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index deeefb962566..36a0c1b7f59c 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -738,6 +738,12 @@ static int dp83867_config_init(struct phy_device *phydev) return ret; } + /* Although the DP83867 reports EEE capability through the + * MDIO_PCS_EEE_ABLE and MDIO_AN_EEE_ADV registers, the feature + * is not actually implemented in hardware. + */ + phy_disable_eee(phydev); + if (phy_interface_is_rgmii(phydev) || phydev->interface == PHY_INTERFACE_MODE_SGMII) { val = phy_read(phydev, MII_DP83867_PHYCTRL); From 65f9c4c5888913c2cf5d2fc9454c83f9930d537d Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Fri, 24 Oct 2025 15:24:38 +0200 Subject: [PATCH 28/75] tools: ynl: fix string attribute length to include null terminator The ynl_attr_put_str() function was not including the null terminator in the attribute length calculation. This caused kernel to reject CTRL_CMD_GETFAMILY requests with EINVAL: "Attribute failed policy validation". For a 4-character family name like "dpll": - Sent: nla_len=8 (4 byte header + 4 byte string without null) - Expected: nla_len=9 (4 byte header + 5 byte string with null) The bug was introduced in commit 15d2540e0d62 ("tools: ynl: check for overflow of constructed messages") when refactoring from stpcpy() to strlen(). The original code correctly included the null terminator: end = stpcpy(ynl_attr_data(attr), str); attr->nla_len = NLA_HDRLEN + NLA_ALIGN(end - (char *)ynl_attr_data(attr)); Since stpcpy() returns a pointer past the null terminator, the length included it. The refactored version using strlen() omitted the +1. The fix also removes NLA_ALIGN() from nla_len calculation, since nla_len should contain actual attribute length, not aligned length. Alignment is only for calculating next attribute position. This makes the code consistent with ynl_attr_put(). CTRL_ATTR_FAMILY_NAME uses NLA_NUL_STRING policy which requires null terminator. Kernel validates with memchr() and rejects if not found. Fixes: 15d2540e0d62 ("tools: ynl: check for overflow of constructed messages") Signed-off-by: Petr Oros Tested-by: Ivan Vecera Reviewed-by: Ivan Vecera Link: https://lore.kernel.org/20251018151737.365485-3-zahari.doychev@linux.com Link: https://patch.msgid.link/20251024132438.351290-1-poros@redhat.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 29481989ea76..ced7dce44efb 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -313,7 +313,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) struct nlattr *attr; size_t len; - len = strlen(str); + len = strlen(str) + 1; if (__ynl_attr_put_overflow(nlh, len)) return; @@ -321,7 +321,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) attr->nla_type = attr_type; strcpy((char *)ynl_attr_data(attr), str); - attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len); + attr->nla_len = NLA_HDRLEN + len; nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len); } From e3a0ca09acbe697245f944ee92b956db58a0ed09 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Thu, 23 Oct 2025 16:24:06 +0700 Subject: [PATCH 29/75] MAINTAINERS: mark ISDN subsystem as orphan We have not heard any activities from Karsten in years: - Last review tag was nine years ago in commit a921e9bd4e22a7 ("isdn: i4l: move active-isdn drivers to staging") - Last message on lore was in October 2020 [1]. Furthermore, messages to isdn mailing list bounce. Mark the subsystem as orphan to reflect these. [1]: https://lore.kernel.org/all/0ee243a9-9937-ad26-0684-44b18e772662@linux-pingi.de/ Signed-off-by: Bagas Sanjaya Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251023092406.56699-1-bagasdotme@gmail.com Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 8 ++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CREDITS b/CREDITS index 903ea238e64f..fa5397f4ebcd 100644 --- a/CREDITS +++ b/CREDITS @@ -2036,6 +2036,10 @@ S: Botanicka' 68a S: 602 00 Brno S: Czech Republic +N: Karsten Keil +E: isdn@linux-pingi.de +D: ISDN subsystem maintainer + N: Jakob Kemi E: jakob.kemi@telia.com D: V4L W9966 Webcam driver diff --git a/MAINTAINERS b/MAINTAINERS index bccada21ef41..cca911b2de4e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13247,10 +13247,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git mast F: drivers/infiniband/ulp/isert ISDN/CMTP OVER BLUETOOTH -M: Karsten Keil -L: isdn4linux@listserv.isdn4linux.de (subscribers-only) L: netdev@vger.kernel.org -S: Odd Fixes +S: Orphan W: http://www.isdn4linux.de F: Documentation/isdn/ F: drivers/isdn/capi/ @@ -13259,10 +13257,8 @@ F: include/uapi/linux/isdn/ F: net/bluetooth/cmtp/ ISDN/mISDN SUBSYSTEM -M: Karsten Keil -L: isdn4linux@listserv.isdn4linux.de (subscribers-only) L: netdev@vger.kernel.org -S: Maintained +S: Orphan W: http://www.isdn4linux.de F: drivers/isdn/Kconfig F: drivers/isdn/Makefile From 03ca7c8c42be913529eb9f188278114430c6abbd Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 23 Oct 2025 21:13:37 +0800 Subject: [PATCH 30/75] net: hns3: return error code when function fails Currently, in hclge_mii_ioctl(), the operation to read the PHY register (SIOCGMIIREG) always returns 0. This patch changes the return type of hclge_read_phy_reg(), returning an error code when the function fails. Fixes: 024712f51e57 ("net: hns3: add ioctl support for imp-controlled PHYs") Signed-off-by: Jijie Shao Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20251023131338.2642520-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +-- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 9 ++++++--- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 9d34d28ff168..782bb48c9f3d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9429,8 +9429,7 @@ static int hclge_mii_ioctl(struct hclge_dev *hdev, struct ifreq *ifr, int cmd) /* this command reads phy id and register at the same time */ fallthrough; case SIOCGMIIREG: - data->val_out = hclge_read_phy_reg(hdev, data->reg_num); - return 0; + return hclge_read_phy_reg(hdev, data->reg_num, &data->val_out); case SIOCSMIIREG: return hclge_write_phy_reg(hdev, data->reg_num, data->val_in); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 96553109f44c..cf881108fa57 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -274,7 +274,7 @@ void hclge_mac_stop_phy(struct hclge_dev *hdev) phy_stop(phydev); } -u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr) +int hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 *val) { struct hclge_phy_reg_cmd *req; struct hclge_desc desc; @@ -286,11 +286,14 @@ u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr) req->reg_addr = cpu_to_le16(reg_addr); ret = hclge_cmd_send(&hdev->hw, &desc, 1); - if (ret) + if (ret) { dev_err(&hdev->pdev->dev, "failed to read phy reg, ret = %d.\n", ret); + return ret; + } - return le16_to_cpu(req->reg_val); + *val = le16_to_cpu(req->reg_val); + return 0; } int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h index 4200d0b6d931..21d434c82475 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h @@ -13,7 +13,7 @@ int hclge_mac_connect_phy(struct hnae3_handle *handle); void hclge_mac_disconnect_phy(struct hnae3_handle *handle); void hclge_mac_start_phy(struct hclge_dev *hdev); void hclge_mac_stop_phy(struct hclge_dev *hdev); -u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr); +int hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 *val); int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val); #endif From 46a499aaf8c27476fd05e800f3e947bfd71aa724 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Thu, 23 Oct 2025 19:48:42 +0530 Subject: [PATCH 31/75] sfc: fix potential memory leak in efx_mae_process_mport() In efx_mae_enumerate_mports(), memory allocated for mae_mport_desc is passed as a argument to efx_mae_process_mport(), but when the error path in efx_mae_process_mport() gets executed, the memory allocated for desc gets leaked. Fix that by freeing the memory allocation before returning error. Fixes: a6a15aca4207 ("sfc: enumerate mports in ef100") Acked-by: Edward Cree Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20251023141844.25847-1-nihaal@cse.iitm.ac.in Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/mae.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 6fd0c1e9a7d5..7cfd9000f79d 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -1090,6 +1090,9 @@ void efx_mae_remove_mport(void *desc, void *arg) kfree(mport); } +/* + * Takes ownership of @desc, even if it returns an error + */ static int efx_mae_process_mport(struct efx_nic *efx, struct mae_mport_desc *desc) { @@ -1100,6 +1103,7 @@ static int efx_mae_process_mport(struct efx_nic *efx, if (!IS_ERR_OR_NULL(mport)) { netif_err(efx, drv, efx->net_dev, "mport with id %u does exist!!!\n", desc->mport_id); + kfree(desc); return -EEXIST; } From e3966940559d52aa1800a008dcfeec218dd31f88 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 24 Oct 2025 12:58:53 +0000 Subject: [PATCH 32/75] tools: ynl: avoid print_field when there is no reply When request a none support device operation, there will be no reply. In this case, the len(desc) check will always be true, causing print_field to enter an infinite loop and crash the program. Example reproducer: # ethtool.py -c veth0 To fix this, return immediately if there is no reply. Fixes: f3d07b02b2b8 ("tools: ynl: ethtool testing tool") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20251024125853.102916-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ethtool.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py index 9b523cbb3568..fd0f6b8d54d1 100755 --- a/tools/net/ynl/pyynl/ethtool.py +++ b/tools/net/ynl/pyynl/ethtool.py @@ -44,6 +44,9 @@ def print_field(reply, *desc): Pretty-print a set of fields from the reply. desc specifies the fields and the optional type (bool/yn). """ + if not reply: + return + if len(desc) == 0: return print_field(reply, *zip(reply.keys(), reply.keys())) From 520ad9e96937e825a117e9f00dd35a3e199d67b5 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Fri, 24 Oct 2025 20:55:12 +0200 Subject: [PATCH 33/75] dpll: spec: add missing module-name and clock-id to pin-get reply The dpll.yaml spec incorrectly omitted module-name and clock-id from the pin-get operation reply specification, even though the kernel DPLL implementation has always included these attributes in pin-get responses since the initial implementation. This spec inconsistency caused issues with the C YNL code generator. The generated dpll_pin_get_rsp structure was missing these fields. Fix the spec by adding module-name and clock-id to the pin-attrs reply specification to match the actual kernel behavior. Fixes: 3badff3a25d8 ("dpll: spec: Add Netlink spec in YAML") Signed-off-by: Petr Oros Reviewed-by: Ivan Vecera Link: https://patch.msgid.link/20251024185512.363376-1-poros@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/dpll.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index cafb4ec20447..80728f6f9bc8 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -605,6 +605,8 @@ operations: reply: &pin-attrs attributes: - id + - module-name + - clock-id - board-label - panel-label - package-label From 210b35d6a7ea415494ce75490c4b43b4e717d935 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Oct 2025 11:17:42 +0100 Subject: [PATCH 34/75] dt-bindings: net: sparx5: Narrow properly LAN969x register space windows Commit 267bca002c50 ("dt-bindings: net: sparx5: correct LAN969x register space windows") said that LAN969x has exactly two address spaces ("reg" property) but implemented it as 2 or more. Narrow the constraint to properly express that only two items are allowed, which also matches Linux driver. Fixes: 267bca002c50 ("dt-bindings: net: sparx5: correct LAN969x register space windows") Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20251026101741.20507-2-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/microchip,sparx5-switch.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml index 5caa3779660d..5491d0775ede 100644 --- a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml +++ b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml @@ -180,9 +180,9 @@ allOf: then: properties: reg: - minItems: 2 + maxItems: 2 reg-names: - minItems: 2 + maxItems: 2 else: properties: reg: From e9840461317e1bf0628b164de54632754d5f6a44 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Fri, 21 Feb 2025 10:39:49 +0100 Subject: [PATCH 35/75] ice: fix lane number calculation E82X adapters do not have sequential IDs, lane number is PF ID. Add check for ICE_MAC_GENERIC and skip checking port options. Also, adjust logical port number for specific E825 device with external PHY support (PCI device id 0x579F). For this particular device, with 2x25G (PHY0) and 2x10G (PHY1) port configuration, modification of pf_id -> lane_number mapping is required. PF IDs on the 2nd PHY start from 4 in such scenario. Otherwise, the lane number cannot be determined correctly, leading to PTP init errors during PF initialization. Fixes: 258f5f9058159 ("ice: Add correct PHY lane assignment") Co-developed-by: Karol Kolacinski Signed-off-by: Karol Kolacinski Signed-off-by: Grzegorz Nitka Reviewed-by: Przemek Kitszel Reviewed-by: Milena Olech Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 2250426ec91b..28d74bf56ffc 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -4382,6 +4382,15 @@ int ice_get_phy_lane_number(struct ice_hw *hw) unsigned int lane; int err; + /* E82X does not have sequential IDs, lane number is PF ID. + * For E825 device, the exception is the variant with external + * PHY (0x579F), in which there is also 1:1 pf_id -> lane_number + * mapping. + */ + if (hw->mac_type == ICE_MAC_GENERIC || + hw->device_id == ICE_DEV_ID_E825C_SGMII) + return hw->pf_id; + options = kcalloc(ICE_AQC_PORT_OPT_MAX, sizeof(*options), GFP_KERNEL); if (!options) return -ENOMEM; From 45076413063cf5e0e25fd3f7f89fc90338b161c8 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Mon, 29 Sep 2025 17:29:05 +0200 Subject: [PATCH 36/75] ice: fix destination CGU for dual complex E825 On dual complex E825, only complex 0 has functional CGU (Clock Generation Unit), powering all the PHYs. SBQ (Side Band Queue) destination device 'cgu' in current implementation points to CGU on current complex and, in order to access primary CGU from the secondary complex, the driver should use 'cgu_peer' as a destination device in read/write CGU registers operations. Define new 'cgu_peer' (15) as RDA (Remote Device Access) client over SB-IOSF interface and use it as device target when accessing CGU from secondary complex. This problem has been identified when working on recovery clock enablement [1]. In existing implementation for E825 devices, only PF0, which is clock owner, is involved in CGU configuration, thus the problem was not exposed to the user. [1] https://lore.kernel.org/intel-wired-lan/20250905150947.871566-1-grzegorz.nitka@intel.com/ Fixes: e2193f9f9ec9 ("ice: enable timesync operation on 2xNAC E825 devices") Signed-off-by: Grzegorz Nitka Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 26 ++++++++++++++++++-- drivers/net/ethernet/intel/ice/ice_sbq_cmd.h | 1 + 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 28d74bf56ffc..2532b6f82e97 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -6505,6 +6505,28 @@ u32 ice_get_link_speed(u16 index) return ice_aq_to_link_speed[index]; } +/** + * ice_get_dest_cgu - get destination CGU dev for given HW + * @hw: pointer to the HW struct + * + * Get CGU client id for CGU register read/write operations. + * + * Return: CGU device id to use in SBQ transactions. + */ +static enum ice_sbq_dev_id ice_get_dest_cgu(struct ice_hw *hw) +{ + /* On dual complex E825 only complex 0 has functional CGU powering all + * the PHYs. + * SBQ destination device cgu points to CGU on a current complex and to + * access primary CGU from the secondary complex, the driver should use + * cgu_peer as a destination device. + */ + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825 && ice_is_dual(hw) && + !ice_is_primary(hw)) + return ice_sbq_dev_cgu_peer; + return ice_sbq_dev_cgu; +} + /** * ice_read_cgu_reg - Read a CGU register * @hw: Pointer to the HW struct @@ -6519,8 +6541,8 @@ u32 ice_get_link_speed(u16 index) int ice_read_cgu_reg(struct ice_hw *hw, u32 addr, u32 *val) { struct ice_sbq_msg_input cgu_msg = { + .dest_dev = ice_get_dest_cgu(hw), .opcode = ice_sbq_msg_rd, - .dest_dev = ice_sbq_dev_cgu, .msg_addr_low = addr }; int err; @@ -6551,8 +6573,8 @@ int ice_read_cgu_reg(struct ice_hw *hw, u32 addr, u32 *val) int ice_write_cgu_reg(struct ice_hw *hw, u32 addr, u32 val) { struct ice_sbq_msg_input cgu_msg = { + .dest_dev = ice_get_dest_cgu(hw), .opcode = ice_sbq_msg_wr, - .dest_dev = ice_sbq_dev_cgu, .msg_addr_low = addr, .data = val }; diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h index 183dd5457d6a..21bb861febbf 100644 --- a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h @@ -50,6 +50,7 @@ enum ice_sbq_dev_id { ice_sbq_dev_phy_0 = 0x02, ice_sbq_dev_cgu = 0x06, ice_sbq_dev_phy_0_peer = 0x0D, + ice_sbq_dev_cgu_peer = 0x0F, }; enum ice_sbq_msg_opcode { From 9a0f81fc64b2ba80ce768cd6e680c0f440723464 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Wed, 8 Oct 2025 12:28:53 +0200 Subject: [PATCH 37/75] ice: fix usage of logical PF id In some devices, the function numbers used are non-contiguous. For example, here is such configuration for E825 device: root@/home/root# lspci -v | grep Eth 0a:00.0 Ethernet controller: Intel Corporation Ethernet Connection E825-C for backplane (rev 04) 0a:00.1 Ethernet controller: Intel Corporation Ethernet Connection E825-C for backplane (rev 04) 0a:00.4 Ethernet controller: Intel Corporation Ethernet Connection E825-C 10GbE (rev 04) 0a:00.5 Ethernet controller: Intel Corporation Ethernet Connection E825-C 10GbE (rev 04) When distributing RSS and FDIR masks, which are global resources across the active devices, it is required to have a contiguous PF id, which can be described as a logical PF id. In the case above, function 0 would have a logical PF id of 0, function 1 would have a logical PF id of 1, and functions 4 and 5 would have a logical PF ids 2 and 3 respectively. Using logical PF id can properly describe which slice of resources can be used by a particular PF. The 'function id' to 'logical id' mapping has been introduced with the commit 015307754a19 ("ice: Support VF queue rate limit and quanta size configuration"). However, the usage of 'logical_pf_id' field was unintentionally skipped for profile mask configuration. Fix it by using 'logical_pf_id' instead of 'pf_id' value when configuring masks. Without that patch, wrong indexes, i.e. out of range for given PF, can be used while configuring resources masks, which might lead to memory corruption and undefined driver behavior. The call trace below is one of the examples of such error: [ +0.000008] WARNING: CPU: 39 PID: 3830 at drivers/base/devres.c:1095 devm_kfree+0x70/0xa0 [ +0.000002] RIP: 0010:devm_kfree+0x70/0xa0 [ +0.000001] Call Trace: [ +0.000002] [ +0.000002] ice_free_hw_tbls+0x183/0x710 [ice] [ +0.000106] ice_deinit_hw+0x67/0x90 [ice] [ +0.000091] ice_deinit+0x20d/0x2f0 [ice] [ +0.000076] ice_remove+0x1fa/0x6a0 [ice] [ +0.000075] pci_device_remove+0xa7/0x1d0 [ +0.000010] device_release_driver_internal+0x365/0x530 [ +0.000006] driver_detach+0xbb/0x170 [ +0.000003] bus_remove_driver+0x117/0x290 [ +0.000007] pci_unregister_driver+0x26/0x250 Fixes: 015307754a19 ("ice: Support VF queue rate limit and quanta size configuration") Suggested-by: Dan Nowlin Signed-off-by: Grzegorz Nitka Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 363ae79a3620..013c93b6605e 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1479,7 +1479,7 @@ static void ice_init_prof_masks(struct ice_hw *hw, enum ice_block blk) per_pf = ICE_PROF_MASK_COUNT / hw->dev_caps.num_funcs; hw->blk[blk].masks.count = per_pf; - hw->blk[blk].masks.first = hw->pf_id * per_pf; + hw->blk[blk].masks.first = hw->logical_pf_id * per_pf; memset(hw->blk[blk].masks.masks, 0, sizeof(hw->blk[blk].masks.masks)); From 85308d999c4b4162a742c9ec5ef954226c3b48d9 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Mon, 1 Sep 2025 05:33:11 +0900 Subject: [PATCH 38/75] ixgbe: fix memory leak and use-after-free in ixgbe_recovery_probe() The error path of ixgbe_recovery_probe() has two memory bugs. For non-E610 adapters, the function jumps to clean_up_probe without calling devlink_free(), leaking the devlink instance and its embedded adapter structure. For E610 adapters, devlink_free() is called at shutdown_aci, but clean_up_probe then accesses adapter->state, sometimes triggering use-after-free because adapter is embedded in devlink. This UAF is similar to the one recently reported in ixgbe_remove(). (Link) Fix both issues by moving devlink_free() after adapter->state access, aligning with the cleanup order in ixgbe_probe(). Link: https://lore.kernel.org/intel-wired-lan/20250828020558.1450422-1-den@valinux.co.jp/ Fixes: 29cb3b8d95c7 ("ixgbe: add E610 implementation of FW recovery mode") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Reviewed-by: Jedrzej Jagielski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ca1ccc630001..3190ce7e44c7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -11507,10 +11507,10 @@ static int ixgbe_recovery_probe(struct ixgbe_adapter *adapter) shutdown_aci: mutex_destroy(&adapter->hw.aci.lock); ixgbe_release_hw_control(adapter); - devlink_free(adapter->devlink); clean_up_probe: disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); free_netdev(netdev); + devlink_free(adapter->devlink); pci_release_mem_regions(pdev); if (disable_dev) pci_disable_device(pdev); From 81fb1fe75c672db905b54f4ab744552121099a24 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Sat, 20 Sep 2025 15:39:18 +0900 Subject: [PATCH 39/75] igc: power up the PHY before the link test The current implementation of the igc driver doesn't power up the PHY before the link test in igc_ethtool_diag_test(), causing the link test to always report FAIL when admin state is down and the PHY is consequently powered down. To test the link state regardless of admin state, power up the PHY before the link test in the offline test path. After the link test, the original PHY state is restored by igc_reset(), so additional code which explicitly restores the original state is not necessary. Note that this change is applied only for the offline test path. This is because in the online path we shouldn't interrupt normal networking operation and powering up the PHY and restoring the original state would interrupt that. This implementation also uses igc_power_up_phy_copper() without checking the media type, since igc devices are currently only copper devices and the function is called in other places without checking the media type. Furthermore, the powering up is on a best-effort basis, that is, we don't handle failures of powering up (e.g. bus error) and just let the test report FAIL. Tested on Intel Corporation Ethernet Controller I226-V (rev 04) with cable connected and link available. Set device down and do ethtool test. # ip link set dev enp0s5 down Without patch: # ethtool --test enp0s5 The test result is FAIL The test extra info: Register test (offline) 0 Eeprom test (offline) 0 Interrupt test (offline) 0 Loopback test (offline) 0 Link test (on/offline) 1 With patch: # ethtool --test enp0s5 The test result is PASS The test extra info: Register test (offline) 0 Eeprom test (offline) 0 Interrupt test (offline) 0 Loopback test (offline) 0 Link test (on/offline) 0 Fixes: f026d8ca2904 ("igc: add support to eeprom, registers and link self-tests") Signed-off-by: Kohei Enju Reviewed-by: Vitaly Lifshits Tested-by: Avigail Dahan Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index f3e7218ba6f3..ca93629b1d3a 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -2094,6 +2094,9 @@ static void igc_ethtool_diag_test(struct net_device *netdev, netdev_info(adapter->netdev, "Offline testing starting"); set_bit(__IGC_TESTING, &adapter->state); + /* power up PHY for link test */ + igc_power_up_phy_copper(&adapter->hw); + /* Link test performed before hardware reset so autoneg doesn't * interfere with test result */ From bc73c5885c606f5e48dd4222eba0361fa0f146ca Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Mon, 6 Oct 2025 21:35:21 +0900 Subject: [PATCH 40/75] igb: use EOPNOTSUPP instead of ENOTSUPP in igb_get_sset_count() igb_get_sset_count() returns -ENOTSUPP when a given stringset is not supported, causing userland programs to get "Unknown error 524". Since EOPNOTSUPP should be used when error is propagated to userland, return -EOPNOTSUPP instead of -ENOTSUPP. Fixes: 9d5c824399de ("igb: PCI-Express 82575 Gigabit Ethernet driver") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index f8a208c84f15..10e2445e0ded 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2281,7 +2281,7 @@ static int igb_get_sset_count(struct net_device *netdev, int sset) case ETH_SS_PRIV_FLAGS: return IGB_PRIV_FLAGS_STR_LEN; default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } From 21d08d1c4c29f9795fbc678011c85f72931e22c1 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Mon, 6 Oct 2025 21:35:22 +0900 Subject: [PATCH 41/75] igc: use EOPNOTSUPP instead of ENOTSUPP in igc_ethtool_get_sset_count() igc_ethtool_get_sset_count() returns -ENOTSUPP when a given stringset is not supported, causing userland programs to get "Unknown error 524". Since EOPNOTSUPP should be used when error is propagated to userland, return -EOPNOTSUPP instead of -ENOTSUPP. Fixes: 36b9fea60961 ("igc: Add support for statistics") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index ca93629b1d3a..bb783042d1af 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -810,7 +810,7 @@ static int igc_ethtool_get_sset_count(struct net_device *netdev, int sset) case ETH_SS_PRIV_FLAGS: return IGC_PRIV_FLAGS_STR_LEN; default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } From f82acf6fb42115c87d3809968a2e0ab2fedba15b Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Mon, 6 Oct 2025 21:35:23 +0900 Subject: [PATCH 42/75] ixgbe: use EOPNOTSUPP instead of ENOTSUPP in ixgbe_ptp_feature_enable() When the requested PTP feature is not supported, ixgbe_ptp_feature_enable() returns -ENOTSUPP, causing userland programs to get "Unknown error 524". Since EOPNOTSUPP should be used when error is propagated to userland, return -EOPNOTSUPP instead of -ENOTSUPP. Fixes: 3a6a4edaa592 ("ixgbe: Hardware Timestamping + PTP Hardware Clock (PHC)") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 114dd88fc71c..6885d2343c48 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -641,7 +641,7 @@ static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp, * disabled */ if (rq->type != PTP_CLK_REQ_PPS || !adapter->ptp_setup_sdp) - return -ENOTSUPP; + return -EOPNOTSUPP; if (on) adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED; From 36fedc44e37e811f25666c600bce4bc027290226 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Fri, 24 Oct 2025 21:07:33 +0200 Subject: [PATCH 43/75] dpll: fix device-id-get and pin-id-get to return errors properly The device-id-get and pin-id-get handlers were ignoring errors from the find functions and sending empty replies instead of returning error codes to userspace. When dpll_device_find_from_nlattr() or dpll_pin_find_from_nlattr() returned an error (e.g., -EINVAL for "multiple matches" or -ENODEV for "not found"), the handlers checked `if (!IS_ERR(ptr))` and skipped adding the device/pin handle to the message, but then still sent the empty message as a successful reply. This caused userspace tools to receive empty responses with id=0 instead of proper netlink errors with extack messages like "multiple matches". The bug is visible via strace, which shows the kernel sending TWO netlink messages in response to a single request: 1. Empty reply (20 bytes, just header, no attributes): recvfrom(3, [{nlmsg_len=20, nlmsg_type=dpll, nlmsg_flags=0, ...}, {cmd=0x7, version=1}], ...) 2. NLMSG_ERROR ACK with extack (because of NLM_F_ACK flag): recvfrom(3, [{nlmsg_len=60, nlmsg_type=NLMSG_ERROR, nlmsg_flags=NLM_F_CAPPED|NLM_F_ACK_TLVS, ...}, [{error=0, msg={...}}, [{nla_type=NLMSGERR_ATTR_MSG}, "multiple matches"]]], ...) The C YNL library parses the first message, sees an empty response, and creates a result object with calloc() which zero-initializes all fields, resulting in id=0. The Python YNL library parses both messages and displays the extack from the second NLMSG_ERROR message. Fix by checking `if (IS_ERR(ptr))` first and returning the error code immediately, so that netlink properly sends only NLMSG_ERROR with the extack message to userspace. After this fix, both C and Python YNL tools receive only the NLMSG_ERROR and behave consistently. This affects: - DPLL_CMD_DEVICE_ID_GET: now properly returns error when multiple devices match the criteria (e.g., same module-name + clock-id) - DPLL_CMD_PIN_ID_GET: now properly returns error when multiple pins match the criteria (e.g., same module-name) Before fix: $ dpll pin id-get module-name ice 0 (wrong - should be error, there are 17 pins with module-name "ice") After fix: $ dpll pin id-get module-name ice Error: multiple matches (correct - kernel reports the ambiguity via extack) Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions") Signed-off-by: Petr Oros Reviewed-by: Ivan Vecera Link: https://patch.msgid.link/20251024190733.364101-1-poros@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_netlink.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 74c1f0ca95f2..a4153bcb6dcf 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -1559,16 +1559,18 @@ int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info) return -EMSGSIZE; } pin = dpll_pin_find_from_nlattr(info); - if (!IS_ERR(pin)) { - if (!dpll_pin_available(pin)) { - nlmsg_free(msg); - return -ENODEV; - } - ret = dpll_msg_add_pin_handle(msg, pin); - if (ret) { - nlmsg_free(msg); - return ret; - } + if (IS_ERR(pin)) { + nlmsg_free(msg); + return PTR_ERR(pin); + } + if (!dpll_pin_available(pin)) { + nlmsg_free(msg); + return -ENODEV; + } + ret = dpll_msg_add_pin_handle(msg, pin); + if (ret) { + nlmsg_free(msg); + return ret; } genlmsg_end(msg, hdr); @@ -1735,12 +1737,14 @@ int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info) } dpll = dpll_device_find_from_nlattr(info); - if (!IS_ERR(dpll)) { - ret = dpll_msg_add_dev_handle(msg, dpll); - if (ret) { - nlmsg_free(msg); - return ret; - } + if (IS_ERR(dpll)) { + nlmsg_free(msg); + return PTR_ERR(dpll); + } + ret = dpll_msg_add_dev_handle(msg, dpll); + if (ret) { + nlmsg_free(msg); + return ret; } genlmsg_end(msg, hdr); From d8d2b1f81530988abe2e2bfaceec1c5d30b9a0b4 Mon Sep 17 00:00:00 2001 From: Pavel Zhigulin Date: Fri, 24 Oct 2025 19:13:02 +0300 Subject: [PATCH 44/75] net: cxgb4/ch_ipsec: fix potential use-after-free in ch_ipsec_xfrm_add_state() callback In ch_ipsec_xfrm_add_state() there is not check of try_module_get return value. It is very unlikely, but try_module_get() could return false value, which could cause use-after-free error. Conditions: The module count must be zero, and a module unload in progress. The thread doing the unload is blocked somewhere. Another thread makes a callback into the module for some request that (for instance) would need to create a kernel thread. It tries to get a reference for the thread. So try_module_get(THIS_MODULE) is the right call - and will fail here. This fix adds checking the result of try_module_get call Fixes: 6dad4e8ab3ec ("chcr: Add support for Inline IPSec") Signed-off-by: Pavel Zhigulin Link: https://patch.msgid.link/20251024161304.724436-1-Pavel.Zhigulin@kaspersky.com Signed-off-by: Jakub Kicinski --- .../ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c index ecd9a0bd5e18..49b57bb5fac1 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c @@ -290,9 +290,15 @@ static int ch_ipsec_xfrm_add_state(struct net_device *dev, return -EINVAL; } + if (unlikely(!try_module_get(THIS_MODULE))) { + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire module reference"); + return -ENODEV; + } + sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL); if (!sa_entry) { res = -ENOMEM; + module_put(THIS_MODULE); goto out; } @@ -301,7 +307,6 @@ static int ch_ipsec_xfrm_add_state(struct net_device *dev, sa_entry->esn = 1; ch_ipsec_setkey(x, sa_entry); x->xso.offload_handle = (unsigned long)sa_entry; - try_module_get(THIS_MODULE); out: return res; } From 40c17a02de41f12dd713309c7d2546117c577d29 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 27 Oct 2025 15:09:12 +0100 Subject: [PATCH 45/75] dpll: zl3073x: Fix output pin registration Currently, the signal format of an associated output is not considered during output pin registration. As a result, the driver registers output pins that are disabled by the signal format configuration. Fix this by calling zl3073x_output_pin_is_enabled() to check whether a given output pin should be registered or not. Fixes: 75a71ecc2412 ("dpll: zl3073x: Register DPLL devices and pins") Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20251027140912.233152-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/dpll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index 93dc93eec79e..f93f9a458324 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -1904,7 +1904,7 @@ zl3073x_dpll_pin_is_registrable(struct zl3073x_dpll *zldpll, } is_diff = zl3073x_out_is_diff(zldev, out); - is_enabled = zl3073x_out_is_enabled(zldev, out); + is_enabled = zl3073x_output_pin_is_enabled(zldev, index); } /* Skip N-pin if the corresponding input/output is differential */ From 12d2303db892d397373e2af40758cbd97309ec37 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 25 Oct 2025 09:46:40 +0800 Subject: [PATCH 46/75] net: hibmcge: fix rx buf avl irq is not re-enabled in irq_handle issue irq initialized with the macro HBG_ERR_IRQ_I will automatically be re-enabled, whereas those initialized with the macro HBG_IRQ_I will not be re-enabled. Since the rx buf avl irq is initialized using the macro HBG_IRQ_I, it needs to be actively re-enabled; otherwise priv->stats.rx_fifo_less_empty_thrsld_cnt cannot be correctly incremented. Fixes: fd394a334b1c ("net: hibmcge: Add support for abnormal irq handling feature") Signed-off-by: Jijie Shao Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251025014642.265259-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c index 8af0bc4cca21..ae4cb35186d8 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c @@ -32,6 +32,7 @@ static void hbg_irq_handle_rx_buf_val(struct hbg_priv *priv, const struct hbg_irq_info *irq_info) { priv->stats.rx_fifo_less_empty_thrsld_cnt++; + hbg_hw_irq_enable(priv, irq_info->mask, true); } #define HBG_IRQ_I(name, handle) \ From 71eb8d1e07562b43bebc0c2721699814b43fd83d Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 25 Oct 2025 09:46:41 +0800 Subject: [PATCH 47/75] net: hibmcge: remove unnecessary check for np_link_fail in scenarios without phy. hibmcge driver uses fixed_phy to configure scenarios without PHY, where the driver is always in a linked state. However, there might be no link in hardware, so the np_link error is detected in hbg_hw_adjust_link(), which can cause abnormal logs. Therefore, in scenarios without a PHY, the driver no longer checks the np_link status. Fixes: 1d7cd7a9c69c ("net: hibmcge: support scenario without PHY") Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20251025014642.265259-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h | 1 + drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c | 3 +++ drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c | 1 - 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h index ea09a09c451b..2097e4c2b3d7 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h @@ -17,6 +17,7 @@ #define HBG_PCU_CACHE_LINE_SIZE 32 #define HBG_TX_TIMEOUT_BUF_LEN 1024 #define HBG_RX_DESCR 0x01 +#define HBG_NO_PHY 0xFF #define HBG_PACKET_HEAD_SIZE ((HBG_RX_SKIP1 + HBG_RX_SKIP2 + \ HBG_RX_DESCR) * HBG_PCU_CACHE_LINE_SIZE) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index d0aa0661ecd4..d6e8ce8e351a 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -244,6 +244,9 @@ void hbg_hw_adjust_link(struct hbg_priv *priv, u32 speed, u32 duplex) hbg_hw_mac_enable(priv, HBG_STATUS_ENABLE); + if (priv->mac.phy_addr == HBG_NO_PHY) + return; + /* wait MAC link up */ ret = readl_poll_timeout(priv->io_base + HBG_REG_AN_NEG_STATE_ADDR, link_status, diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c index 37791de47f6f..b6f0a2780ea8 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c @@ -20,7 +20,6 @@ #define HBG_MDIO_OP_INTERVAL_US (5 * 1000) #define HBG_NP_LINK_FAIL_RETRY_TIMES 5 -#define HBG_NO_PHY 0xFF static void hbg_mdio_set_command(struct hbg_mac *mac, u32 cmd) { From 7e2958aee59ceb30ef153387741d12385b712250 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 25 Oct 2025 09:46:42 +0800 Subject: [PATCH 48/75] net: hibmcge: fix the inappropriate netif_device_detach() current, driver will call netif_device_detach() in pci_error_handlers.error_detected() and do reset in pci_error_handlers.slot_reset(). However, if pci_error_handlers.slot_reset() is not called after pci_error_handlers.error_detected(), driver will be detached and unable to recover. drivers/pci/pcie/err.c/report_error_detected() says: If any device in the subtree does not have an error_detected callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent error callbacks of any device in the subtree, and will exit in the disconnected error state. Therefore, when the hibmcge device and other devices that do not support the error_detected callback are under the same subtree, hibmcge will be unable to do slot_reset even for non-fatal errors. This path move netif_device_detach() from error_detected() to slot_reset(), ensuring that detach and reset are always executed together. Fixes: fd394a334b1c ("net: hibmcge: Add support for abnormal irq handling feature") Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20251025014642.265259-4-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index 83cf75bf7a17..e11495b7ee98 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -136,12 +136,11 @@ static pci_ers_result_t hbg_pci_err_detected(struct pci_dev *pdev, { struct net_device *netdev = pci_get_drvdata(pdev); - netif_device_detach(netdev); - - if (state == pci_channel_io_perm_failure) + if (state == pci_channel_io_perm_failure) { + netif_device_detach(netdev); return PCI_ERS_RESULT_DISCONNECT; + } - pci_disable_device(pdev); return PCI_ERS_RESULT_NEED_RESET; } @@ -150,6 +149,9 @@ static pci_ers_result_t hbg_pci_err_slot_reset(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct hbg_priv *priv = netdev_priv(netdev); + netif_device_detach(netdev); + pci_disable_device(pdev); + if (pci_enable_device(pdev)) { dev_err(&pdev->dev, "failed to re-enable PCI device after reset\n"); From 514f1dc8f2ca3101e04cdf452e53baca3a76e544 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Oct 2025 17:18:10 +0200 Subject: [PATCH 49/75] netfilter: nft_ct: enable labels for get case too conntrack labels can only be set when the conntrack has been created with the "ctlabel" extension. For older iptables (connlabel match), adding an "-m connlabel" rule turns on the ctlabel extension allocation for all future conntrack entries. For nftables, its only enabled for 'ct label set foo', but not for 'ct label foo' (i.e. check). But users could have a ruleset that only checks for presence, and rely on userspace to set a label bit via ctnetlink infrastructure. This doesn't work without adding a dummy 'ct label set' rule. We could also enable extension infra for the first (failing) ctnetlink request, but unlike ruleset we would not be able to disable the extension again. Therefore turn on ctlabel extension allocation if an nftables ruleset checks for a connlabel too. Fixes: 1ad8f48df6f6 ("netfilter: nftables: add connlabel set support") Reported-by: Antonio Ojea Closes: https://lore.kernel.org/netfilter-devel/aPi_VdZpVjWujZ29@strlen.de/ Signed-off-by: Florian Westphal --- net/netfilter/nft_ct.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d526e69a2a2b..a418eb3d612b 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -379,6 +379,14 @@ static bool nft_ct_tmpl_alloc_pcpu(void) } #endif +static void __nft_ct_get_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + if (priv->key == NFT_CT_LABELS) + nf_connlabels_put(ctx->net); +#endif +} + static int nft_ct_get_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -413,6 +421,10 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_DIRECTION] != NULL) return -EINVAL; len = NF_CT_LABELS_MAX_SIZE; + + err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1); + if (err) + return err; break; #endif case NFT_CT_HELPER: @@ -494,7 +506,8 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, case IP_CT_DIR_REPLY: break; default: - return -EINVAL; + err = -EINVAL; + goto err; } } @@ -502,11 +515,11 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); if (err < 0) - return err; + goto err; err = nf_ct_netns_get(ctx->net, ctx->family); if (err < 0) - return err; + goto err; if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS || @@ -514,6 +527,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, nf_ct_set_acct(ctx->net, true); return 0; +err: + __nft_ct_get_destroy(ctx, priv); + return err; } static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) @@ -626,6 +642,9 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, static void nft_ct_get_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { + struct nft_ct *priv = nft_expr_priv(expr); + + __nft_ct_get_destroy(ctx, priv); nf_ct_netns_put(ctx->net, ctx->family); } From 8d96dfdcabef00e28f0c851b1502adb679dfc6d9 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Fri, 24 Oct 2025 17:54:39 +0200 Subject: [PATCH 50/75] netfilter: nft_connlimit: fix possible data race on connection count nft_connlimit_eval() reads priv->list->count to check if the connection limit has been exceeded. This value is being read without a lock and can be modified by a different process. Use READ_ONCE() for correctness. Fixes: df4a90250976 ("netfilter: nf_conncount: merge lookup and add functions") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- net/netfilter/nft_connlimit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 92b984fa8175..fc35a11cdca2 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -48,7 +48,7 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, return; } - count = priv->list->count; + count = READ_ONCE(priv->list->count); if ((count > priv->limit) ^ priv->invert) { regs->verdict.code = NFT_BREAK; From 90918e3b6404c2a37837b8f11692471b4c512de2 Mon Sep 17 00:00:00 2001 From: Andrii Melnychenko Date: Fri, 24 Oct 2025 18:22:16 +0200 Subject: [PATCH 51/75] netfilter: nft_ct: add seqadj extension for natted connections Sequence adjustment may be required for FTP traffic with PASV/EPSV modes. due to need to re-write packet payload (IP, port) on the ftp control connection. This can require changes to the TCP length and expected seq / ack_seq. The easiest way to reproduce this issue is with PASV mode. Example ruleset: table inet ftp_nat { ct helper ftp_helper { type "ftp" protocol tcp l3proto inet } chain prerouting { type filter hook prerouting priority 0; policy accept; tcp dport 21 ct state new ct helper set "ftp_helper" } } table ip nat { chain prerouting { type nat hook prerouting priority -100; policy accept; tcp dport 21 dnat ip prefix to ip daddr map { 192.168.100.1 : 192.168.13.2/32 } } chain postrouting { type nat hook postrouting priority 100 ; policy accept; tcp sport 21 snat ip prefix to ip saddr map { 192.168.13.2 : 192.168.100.1/32 } } } Note that the ftp helper gets assigned *after* the dnat setup. The inverse (nat after helper assign) is handled by an existing check in nf_nat_setup_info() and will not show the problem. Topoloy: +-------------------+ +----------------------------------+ | FTP: 192.168.13.2 | <-> | NAT: 192.168.13.3, 192.168.100.1 | +-------------------+ +----------------------------------+ | +-----------------------+ | Client: 192.168.100.2 | +-----------------------+ ftp nat changes do not work as expected in this case: Connected to 192.168.100.1. [..] ftp> epsv EPSV/EPRT on IPv4 off. ftp> ls 227 Entering passive mode (192,168,100,1,209,129). 421 Service not available, remote server has closed connection. Kernel logs: Missing nfct_seqadj_ext_add() setup call WARNING: CPU: 1 PID: 0 at net/netfilter/nf_conntrack_seqadj.c:41 [..] __nf_nat_mangle_tcp_packet+0x100/0x160 [nf_nat] nf_nat_ftp+0x142/0x280 [nf_nat_ftp] help+0x4d1/0x880 [nf_conntrack_ftp] nf_confirm+0x122/0x2e0 [nf_conntrack] nf_hook_slow+0x3c/0xb0 .. Fix this by adding the required extension when a conntrack helper is assigned to a connection that has a nat binding. Fixes: 1a64edf54f55 ("netfilter: nft_ct: add helper set support") Signed-off-by: Andrii Melnychenko Signed-off-by: Florian Westphal --- net/netfilter/nft_ct.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a418eb3d612b..6f2ae7cad731 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -22,6 +22,7 @@ #include #include #include +#include struct nft_ct_helper_obj { struct nf_conntrack_helper *helper4; @@ -1192,6 +1193,10 @@ static void nft_ct_helper_obj_eval(struct nft_object *obj, if (help) { rcu_assign_pointer(help->helper, to_assign); set_bit(IPS_HELPER_BIT, &ct->status); + + if ((ct->status & IPS_NAT_MASK) && !nfct_seqadj(ct)) + if (!nfct_seqadj_ext_add(ct)) + regs->verdict.code = NF_DROP; } } From a6f0459aadf1b41a9b9fae02006b1db024d60856 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 12:57:59 +0100 Subject: [PATCH 52/75] mptcp: fix subflow rcvbuf adjust The mptcp PM can add subflow to the conn_list before tcp_init_transfer(). Calling tcp_rcvbuf_grow() on such subflow is not correct as later init will overwrite the update. Fix the issue calling tcp_rcvbuf_grow() only after init buffer initialization. Fixes: e118cdc34dd1 ("mptcp: rcvbuf auto-tuning improvement") Signed-off-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-1-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 0292162a14ee..a8a3bdf95543 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2051,6 +2051,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) msk->rcvq_space.space = msk->rcvq_space.copied; if (mptcp_rcvbuf_grow(sk)) { + int copied = msk->rcvq_space.copied; /* Make subflows follow along. If we do not do this, we * get drops at subflow level if skbs can't be moved to @@ -2063,8 +2064,11 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) ssk = mptcp_subflow_tcp_sock(subflow); slow = lock_sock_fast(ssk); - tcp_sk(ssk)->rcvq_space.space = msk->rcvq_space.copied; - tcp_rcvbuf_grow(ssk); + /* subflows can be added before tcp_init_transfer() */ + if (tcp_sk(ssk)->rcvq_space.space) { + tcp_sk(ssk)->rcvq_space.space = copied; + tcp_rcvbuf_grow(ssk); + } unlock_sock_fast(ssk, slow); } } From 24990d89c23de4dbef6b0b3d58383cafefdd6983 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Oct 2025 12:58:00 +0100 Subject: [PATCH 53/75] trace: tcp: add three metrics to trace_tcp_rcvbuf_grow() While chasing yet another receive autotuning bug, I found useful to add rcv_ssthresh, window_clamp and rcv_wnd. tcp_stream 40597 [068] 2172.978198: tcp:tcp_rcvbuf_grow: time=50307 rtt_us=50179 copied=77824 inq=0 space=40960 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=107474 window_clamp=112128 rcv_wnd=110592 tcp_stream 40597 [068] 2173.028528: tcp:tcp_rcvbuf_grow: time=50336 rtt_us=50206 copied=110592 inq=0 space=77824 ooo=0 scaling_ratio=219 rcvbuf=509444 rcv_ssthresh=328658 window_clamp=435813 rcv_wnd=331776 tcp_stream 40597 [068] 2173.078830: tcp:tcp_rcvbuf_grow: time=50305 rtt_us=50070 copied=270336 inq=0 space=110592 ooo=0 scaling_ratio=219 rcvbuf=509444 rcv_ssthresh=431159 window_clamp=435813 rcv_wnd=434176 tcp_stream 40597 [068] 2173.129137: tcp:tcp_rcvbuf_grow: time=50313 rtt_us=50118 copied=434176 inq=0 space=270336 ooo=0 scaling_ratio=219 rcvbuf=2457847 rcv_ssthresh=1299511 window_clamp=2102611 rcv_wnd=1302528 tcp_stream 40597 [068] 2173.179451: tcp:tcp_rcvbuf_grow: time=50318 rtt_us=50041 copied=1019904 inq=0 space=434176 ooo=0 scaling_ratio=219 rcvbuf=2457847 rcv_ssthresh=2087445 window_clamp=2102611 rcv_wnd=2088960 Signed-off-by: Eric Dumazet Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-2-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- include/trace/events/tcp.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 9d2c36c6a0ed..6757233bd064 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -218,6 +218,9 @@ TRACE_EVENT(tcp_rcvbuf_grow, __field(__u32, space) __field(__u32, ooo_space) __field(__u32, rcvbuf) + __field(__u32, rcv_ssthresh) + __field(__u32, window_clamp) + __field(__u32, rcv_wnd) __field(__u8, scaling_ratio) __field(__u16, sport) __field(__u16, dport) @@ -245,6 +248,9 @@ TRACE_EVENT(tcp_rcvbuf_grow, tp->rcv_nxt; __entry->rcvbuf = sk->sk_rcvbuf; + __entry->rcv_ssthresh = tp->rcv_ssthresh; + __entry->window_clamp = tp->window_clamp; + __entry->rcv_wnd = tp->rcv_wnd; __entry->scaling_ratio = tp->scaling_ratio; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); @@ -264,11 +270,14 @@ TRACE_EVENT(tcp_rcvbuf_grow, ), TP_printk("time=%u rtt_us=%u copied=%u inq=%u space=%u ooo=%u scaling_ratio=%u rcvbuf=%u " + "rcv_ssthresh=%u window_clamp=%u rcv_wnd=%u " "family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 " "saddrv6=%pI6c daddrv6=%pI6c skaddr=%p sock_cookie=%llx", __entry->time, __entry->rtt_us, __entry->copied, __entry->inq, __entry->space, __entry->ooo_space, __entry->scaling_ratio, __entry->rcvbuf, + __entry->rcv_ssthresh, __entry->window_clamp, + __entry->rcv_wnd, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, From b1e014a1f3275a6f3d0f2b30b8117447fc3915f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Oct 2025 12:58:01 +0100 Subject: [PATCH 54/75] tcp: add newval parameter to tcp_rcvbuf_grow() This patch has no functional change, and prepares the following one. tcp_rcvbuf_grow() will need to have access to tp->rcvq_space.space old and new values. Change mptcp_rcvbuf_grow() in a similar way. Signed-off-by: Eric Dumazet [ Moved 'oldval' declaration to the next patch to avoid warnings at build time. ] Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-3-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 +- net/ipv4/tcp_input.c | 14 +++++++------- net/mptcp/protocol.c | 20 ++++++++------------ 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 5ca230ed526a..ab20f549b8f9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -370,7 +370,7 @@ void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, int *karg); enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); -void tcp_rcvbuf_grow(struct sock *sk); +void tcp_rcvbuf_grow(struct sock *sk, u32 newval); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 31ea5af49f2d..cb4e07f84ae2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -891,18 +891,20 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, } } -void tcp_rcvbuf_grow(struct sock *sk) +void tcp_rcvbuf_grow(struct sock *sk, u32 newval) { const struct net *net = sock_net(sk); struct tcp_sock *tp = tcp_sk(sk); - int rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap; + + tp->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return; /* slow start: allow the sender to double its rate. */ - rcvwin = tp->rcvq_space.space << 1; + rcvwin = newval << 1; if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt; @@ -943,9 +945,7 @@ void tcp_rcv_space_adjust(struct sock *sk) trace_tcp_rcvbuf_grow(sk, time); - tp->rcvq_space.space = copied; - - tcp_rcvbuf_grow(sk); + tcp_rcvbuf_grow(sk, copied); new_measure: tp->rcvq_space.seq = tp->copied_seq; @@ -5270,7 +5270,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) } /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ if (sk->sk_socket) - tcp_rcvbuf_grow(sk); + tcp_rcvbuf_grow(sk, tp->rcvq_space.space); } static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a8a3bdf95543..052a0c62023f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -194,17 +194,18 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, * - mptcp does not maintain a msk-level window clamp * - returns true when the receive buffer is actually updated */ -static bool mptcp_rcvbuf_grow(struct sock *sk) +static bool mptcp_rcvbuf_grow(struct sock *sk, u32 newval) { struct mptcp_sock *msk = mptcp_sk(sk); const struct net *net = sock_net(sk); - int rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap; + msk->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return false; - rcvwin = msk->rcvq_space.space << 1; + rcvwin = newval << 1; if (!RB_EMPTY_ROOT(&msk->out_of_order_queue)) rcvwin += MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq - msk->ack_seq; @@ -334,7 +335,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb) skb_set_owner_r(skb, sk); /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ if (sk->sk_socket) - mptcp_rcvbuf_grow(sk); + mptcp_rcvbuf_grow(sk, msk->rcvq_space.space); } static void mptcp_init_skb(struct sock *ssk, struct sk_buff *skb, int offset, @@ -2049,10 +2050,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) if (msk->rcvq_space.copied <= msk->rcvq_space.space) goto new_measure; - msk->rcvq_space.space = msk->rcvq_space.copied; - if (mptcp_rcvbuf_grow(sk)) { - int copied = msk->rcvq_space.copied; - + if (mptcp_rcvbuf_grow(sk, msk->rcvq_space.copied)) { /* Make subflows follow along. If we do not do this, we * get drops at subflow level if skbs can't be moved to * the mptcp rx queue fast enough (announced rcv_win can @@ -2065,10 +2063,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) ssk = mptcp_subflow_tcp_sock(subflow); slow = lock_sock_fast(ssk); /* subflows can be added before tcp_init_transfer() */ - if (tcp_sk(ssk)->rcvq_space.space) { - tcp_sk(ssk)->rcvq_space.space = copied; - tcp_rcvbuf_grow(ssk); - } + if (tcp_sk(ssk)->rcvq_space.space) + tcp_rcvbuf_grow(ssk, msk->rcvq_space.copied); unlock_sock_fast(ssk, slow); } } From aa251c84636c326471ca9d53723816ba8fffe2bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Oct 2025 12:58:02 +0100 Subject: [PATCH 55/75] tcp: fix too slow tcp_rcvbuf_grow() action While the blamed commits apparently avoided an overshoot, they also limited how fast a sender can increase BDP at each RTT. This is not exactly a revert, we do not add the 16 * tp->advmss cushion we had, and we are keeping the out_of_order_queue contribution. Do the same in mptcp_rcvbuf_grow(). Tested: emulated 50ms rtt (tcp_stream --tcp-tx-delay 50000), cubic 20 second flow. net.ipv4.tcp_rmem set to "4096 131072 67000000" perf record -a -e tcp:tcp_rcvbuf_grow sleep 20 perf script Before: We can see we fail to roughly double RWIN at each RTT. Sender is RWIN limited while CWND is ramping up (before getting tcp_wmem limited). tcp_stream 33793 [010] 825.717525: tcp:tcp_rcvbuf_grow: time=100869 rtt_us=50428 copied=49152 inq=0 space=40960 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=103970 window_clamp=112128 rcv_wnd=106496 tcp_stream 33793 [010] 825.768966: tcp:tcp_rcvbuf_grow: time=51447 rtt_us=50362 copied=86016 inq=0 space=49152 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=107474 window_clamp=112128 rcv_wnd=106496 tcp_stream 33793 [010] 825.821539: tcp:tcp_rcvbuf_grow: time=52577 rtt_us=50243 copied=114688 inq=0 space=86016 ooo=0 scaling_ratio=219 rcvbuf=201096 rcv_ssthresh=167377 window_clamp=172031 rcv_wnd=167936 tcp_stream 33793 [010] 825.871781: tcp:tcp_rcvbuf_grow: time=50248 rtt_us=50237 copied=167936 inq=0 space=114688 ooo=0 scaling_ratio=219 rcvbuf=268129 rcv_ssthresh=224722 window_clamp=229375 rcv_wnd=225280 tcp_stream 33793 [010] 825.922475: tcp:tcp_rcvbuf_grow: time=50698 rtt_us=50183 copied=241664 inq=0 space=167936 ooo=0 scaling_ratio=219 rcvbuf=392617 rcv_ssthresh=331217 window_clamp=335871 rcv_wnd=323584 tcp_stream 33793 [010] 825.973326: tcp:tcp_rcvbuf_grow: time=50855 rtt_us=50213 copied=339968 inq=0 space=241664 ooo=0 scaling_ratio=219 rcvbuf=564986 rcv_ssthresh=478674 window_clamp=483327 rcv_wnd=462848 tcp_stream 33793 [010] 826.023970: tcp:tcp_rcvbuf_grow: time=50647 rtt_us=50248 copied=491520 inq=0 space=339968 ooo=0 scaling_ratio=219 rcvbuf=794811 rcv_ssthresh=671778 window_clamp=679935 rcv_wnd=651264 tcp_stream 33793 [010] 826.074612: tcp:tcp_rcvbuf_grow: time=50648 rtt_us=50227 copied=700416 inq=0 space=491520 ooo=0 scaling_ratio=219 rcvbuf=1149124 rcv_ssthresh=974881 window_clamp=983039 rcv_wnd=942080 tcp_stream 33793 [010] 826.125452: tcp:tcp_rcvbuf_grow: time=50845 rtt_us=50225 copied=987136 inq=8192 space=700416 ooo=0 scaling_ratio=219 rcvbuf=1637502 rcv_ssthresh=1392674 window_clamp=1400831 rcv_wnd=1339392 tcp_stream 33793 [010] 826.175698: tcp:tcp_rcvbuf_grow: time=50250 rtt_us=50198 copied=1347584 inq=0 space=978944 ooo=0 scaling_ratio=219 rcvbuf=2288672 rcv_ssthresh=1949729 window_clamp=1957887 rcv_wnd=1945600 tcp_stream 33793 [010] 826.225947: tcp:tcp_rcvbuf_grow: time=50252 rtt_us=50240 copied=1945600 inq=0 space=1347584 ooo=0 scaling_ratio=219 rcvbuf=3150516 rcv_ssthresh=2687010 window_clamp=2695167 rcv_wnd=2691072 tcp_stream 33793 [010] 826.276175: tcp:tcp_rcvbuf_grow: time=50233 rtt_us=50224 copied=2691072 inq=0 space=1945600 ooo=0 scaling_ratio=219 rcvbuf=4548617 rcv_ssthresh=3883041 window_clamp=3891199 rcv_wnd=3887104 tcp_stream 33793 [010] 826.326403: tcp:tcp_rcvbuf_grow: time=50233 rtt_us=50229 copied=3887104 inq=0 space=2691072 ooo=0 scaling_ratio=219 rcvbuf=6291456 rcv_ssthresh=5370482 window_clamp=5382144 rcv_wnd=5373952 tcp_stream 33793 [010] 826.376723: tcp:tcp_rcvbuf_grow: time=50323 rtt_us=50218 copied=5373952 inq=0 space=3887104 ooo=0 scaling_ratio=219 rcvbuf=9087658 rcv_ssthresh=7755537 window_clamp=7774207 rcv_wnd=7757824 tcp_stream 33793 [010] 826.426991: tcp:tcp_rcvbuf_grow: time=50274 rtt_us=50196 copied=7757824 inq=180224 space=5373952 ooo=0 scaling_ratio=219 rcvbuf=12563759 rcv_ssthresh=10729233 window_clamp=10747903 rcv_wnd=10575872 tcp_stream 33793 [010] 826.477229: tcp:tcp_rcvbuf_grow: time=50241 rtt_us=50078 copied=10731520 inq=180224 space=7577600 ooo=0 scaling_ratio=219 rcvbuf=17715667 rcv_ssthresh=15136529 window_clamp=15155199 rcv_wnd=14983168 tcp_stream 33793 [010] 826.527482: tcp:tcp_rcvbuf_grow: time=50258 rtt_us=50153 copied=15138816 inq=360448 space=10551296 ooo=0 scaling_ratio=219 rcvbuf=24667870 rcv_ssthresh=21073410 window_clamp=21102591 rcv_wnd=20766720 tcp_stream 33793 [010] 826.577712: tcp:tcp_rcvbuf_grow: time=50234 rtt_us=50228 copied=21073920 inq=0 space=14778368 ooo=0 scaling_ratio=219 rcvbuf=34550339 rcv_ssthresh=29517041 window_clamp=29556735 rcv_wnd=29519872 tcp_stream 33793 [010] 826.627982: tcp:tcp_rcvbuf_grow: time=50275 rtt_us=50220 copied=29519872 inq=540672 space=21073920 ooo=0 scaling_ratio=219 rcvbuf=49268707 rcv_ssthresh=42090625 window_clamp=42147839 rcv_wnd=41627648 tcp_stream 33793 [010] 826.678274: tcp:tcp_rcvbuf_grow: time=50296 rtt_us=50185 copied=42053632 inq=761856 space=28979200 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57238168 window_clamp=57316406 rcv_wnd=56606720 tcp_stream 33793 [010] 826.728627: tcp:tcp_rcvbuf_grow: time=50357 rtt_us=50128 copied=43913216 inq=851968 space=41291776 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56524800 tcp_stream 33793 [010] 827.131364: tcp:tcp_rcvbuf_grow: time=50239 rtt_us=50127 copied=43843584 inq=655360 space=43061248 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56696832 tcp_stream 33793 [010] 827.181613: tcp:tcp_rcvbuf_grow: time=50254 rtt_us=50115 copied=43843584 inq=524288 space=43188224 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56807424 tcp_stream 33793 [010] 828.339635: tcp:tcp_rcvbuf_grow: time=50283 rtt_us=50110 copied=43843584 inq=458752 space=43319296 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56864768 tcp_stream 33793 [010] 828.440350: tcp:tcp_rcvbuf_grow: time=50404 rtt_us=50099 copied=43843584 inq=393216 space=43384832 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56922112 tcp_stream 33793 [010] 829.195106: tcp:tcp_rcvbuf_grow: time=50154 rtt_us=50077 copied=43843584 inq=196608 space=43450368 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=57090048 After: It takes few steps to increase RWIN. Sender is no longer RWIN limited. tcp_stream 50826 [010] 935.634212: tcp:tcp_rcvbuf_grow: time=100788 rtt_us=50315 copied=49152 inq=0 space=40960 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=103970 window_clamp=112128 rcv_wnd=106496 tcp_stream 50826 [010] 935.685642: tcp:tcp_rcvbuf_grow: time=51437 rtt_us=50361 copied=86016 inq=0 space=49152 ooo=0 scaling_ratio=219 rcvbuf=160875 rcv_ssthresh=132969 window_clamp=137623 rcv_wnd=131072 tcp_stream 50826 [010] 935.738299: tcp:tcp_rcvbuf_grow: time=52660 rtt_us=50256 copied=139264 inq=0 space=86016 ooo=0 scaling_ratio=219 rcvbuf=502741 rcv_ssthresh=411497 window_clamp=430079 rcv_wnd=413696 tcp_stream 50826 [010] 935.788544: tcp:tcp_rcvbuf_grow: time=50249 rtt_us=50233 copied=307200 inq=0 space=139264 ooo=0 scaling_ratio=219 rcvbuf=728690 rcv_ssthresh=618717 window_clamp=623371 rcv_wnd=618496 tcp_stream 50826 [010] 935.838796: tcp:tcp_rcvbuf_grow: time=50258 rtt_us=50202 copied=618496 inq=0 space=307200 ooo=0 scaling_ratio=219 rcvbuf=2450338 rcv_ssthresh=1855709 window_clamp=2096187 rcv_wnd=1859584 tcp_stream 50826 [010] 935.889140: tcp:tcp_rcvbuf_grow: time=50347 rtt_us=50166 copied=1261568 inq=0 space=618496 ooo=0 scaling_ratio=219 rcvbuf=4376503 rcv_ssthresh=3725291 window_clamp=3743961 rcv_wnd=3706880 tcp_stream 50826 [010] 935.939435: tcp:tcp_rcvbuf_grow: time=50300 rtt_us=50185 copied=2478080 inq=24576 space=1261568 ooo=0 scaling_ratio=219 rcvbuf=9082648 rcv_ssthresh=7733731 window_clamp=7769921 rcv_wnd=7692288 tcp_stream 50826 [010] 935.989681: tcp:tcp_rcvbuf_grow: time=50251 rtt_us=50221 copied=4915200 inq=114688 space=2453504 ooo=0 scaling_ratio=219 rcvbuf=16574936 rcv_ssthresh=14108110 window_clamp=14179339 rcv_wnd=14024704 tcp_stream 50826 [010] 936.039967: tcp:tcp_rcvbuf_grow: time=50289 rtt_us=50279 copied=9830400 inq=114688 space=4800512 ooo=0 scaling_ratio=219 rcvbuf=32695050 rcv_ssthresh=27896187 window_clamp=27969593 rcv_wnd=27815936 tcp_stream 50826 [010] 936.090172: tcp:tcp_rcvbuf_grow: time=50211 rtt_us=50200 copied=19841024 inq=114688 space=9715712 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57245176 window_clamp=57316406 rcv_wnd=57163776 tcp_stream 50826 [010] 936.140430: tcp:tcp_rcvbuf_grow: time=50262 rtt_us=50197 copied=39501824 inq=114688 space=19726336 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57245176 window_clamp=57316406 rcv_wnd=57163776 tcp_stream 50826 [010] 936.190527: tcp:tcp_rcvbuf_grow: time=50101 rtt_us=50071 copied=43655168 inq=262144 space=39387136 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57259192 window_clamp=57316406 rcv_wnd=57032704 tcp_stream 50826 [010] 936.240719: tcp:tcp_rcvbuf_grow: time=50197 rtt_us=50057 copied=43843584 inq=262144 space=43393024 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57259192 window_clamp=57316406 rcv_wnd=57032704 tcp_stream 50826 [010] 936.341271: tcp:tcp_rcvbuf_grow: time=50297 rtt_us=50123 copied=43843584 inq=131072 space=43581440 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57259192 window_clamp=57316406 rcv_wnd=57147392 tcp_stream 50826 [010] 936.642503: tcp:tcp_rcvbuf_grow: time=50131 rtt_us=50084 copied=43843584 inq=0 space=43712512 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57259192 window_clamp=57316406 rcv_wnd=57262080 Fixes: 65c5287892e9 ("tcp: fix sk_rcvbuf overshoot") Fixes: e118cdc34dd1 ("mptcp: rcvbuf auto-tuning improvement") Reported-by: Neal Cardwell Signed-off-by: Eric Dumazet Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/589 Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-4-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 11 +++++++++-- net/mptcp/protocol.c | 10 +++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cb4e07f84ae2..e4a979b75cc6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -895,17 +895,24 @@ void tcp_rcvbuf_grow(struct sock *sk, u32 newval) { const struct net *net = sock_net(sk); struct tcp_sock *tp = tcp_sk(sk); - u32 rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap, oldval; + u64 grow; + oldval = tp->rcvq_space.space; tp->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return; - /* slow start: allow the sender to double its rate. */ + /* DRS is always one RTT late. */ rcvwin = newval << 1; + /* slow start: allow the sender to double its rate. */ + grow = (u64)rcvwin * (newval - oldval); + do_div(grow, oldval); + rcvwin += grow << 1; + if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 052a0c62023f..875027b9319c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -198,15 +198,23 @@ static bool mptcp_rcvbuf_grow(struct sock *sk, u32 newval) { struct mptcp_sock *msk = mptcp_sk(sk); const struct net *net = sock_net(sk); - u32 rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap, oldval; + u64 grow; + oldval = msk->rcvq_space.space; msk->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return false; + /* DRS is always one RTT late. */ rcvwin = newval << 1; + /* slow start: allow the sender to double its rate. */ + grow = (u64)rcvwin * (newval - oldval); + do_div(grow, oldval); + rcvwin += grow << 1; + if (!RB_EMPTY_ROOT(&msk->out_of_order_queue)) rcvwin += MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq - msk->ack_seq; From a4384d786e38d5ff172f0908aae01c2c30719245 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 28 Oct 2025 21:38:41 +0530 Subject: [PATCH 56/75] nfp: xsk: fix memory leak in nfp_net_alloc() In nfp_net_alloc(), the memory allocated for xsk_pools is not freed in the subsequent error paths, leading to a memory leak. Fix that by freeing it in the error path. Fixes: 6402528b7a0b ("nfp: xsk: add AF_XDP zero-copy Rx and Tx support") Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20251028160845.126919-1-nihaal@cse.iitm.ac.in Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 132626a3f9f7..9ef72f294117 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2557,14 +2557,16 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info, err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar, &nn->tlv_caps); if (err) - goto err_free_nn; + goto err_free_xsk_pools; err = nfp_ccm_mbox_alloc(nn); if (err) - goto err_free_nn; + goto err_free_xsk_pools; return nn; +err_free_xsk_pools: + kfree(nn->dp.xsk_pools); err_free_nn: if (nn->dp.netdev) free_netdev(nn->dp.netdev); From a43303809868b22bd1303739ba334e982b234d45 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Tue, 28 Oct 2025 20:20:27 +0700 Subject: [PATCH 57/75] Documentation: netconsole: Remove obsolete contact people Breno Leitao has been listed in MAINTAINERS as netconsole maintainer since 7c938e438c56db ("MAINTAINERS: make Breno the netconsole maintainer"), but the documentation says otherwise that bug reports should be sent to original netconsole authors. Remove obsolate contact info. Signed-off-by: Bagas Sanjaya Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://patch.msgid.link/20251028132027.48102-1-bagasdotme@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/netconsole.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst index 59cb9982afe6..2555e75e5cc1 100644 --- a/Documentation/networking/netconsole.rst +++ b/Documentation/networking/netconsole.rst @@ -19,9 +19,6 @@ Userdata append support by Matthew Wood , Jan 22 2024 Sysdata append support by Breno Leitao , Jan 15 2025 -Please send bug reports to Matt Mackall -Satyam Sharma , and Cong Wang - Introduction: ============= From 00764aa5c9bbb2044eb04d6d78584a436666b231 Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Tue, 28 Oct 2025 15:06:32 -0700 Subject: [PATCH 58/75] netconsole: Fix race condition in between reader and writer of userdata The update_userdata() function constructs the complete userdata string in nt->extradata_complete and updates nt->userdata_length. This data is then read by write_msg() and write_ext_msg() when sending netconsole messages. However, update_userdata() was not holding target_list_lock during this process, allowing concurrent message transmission to read partially updated userdata. This race condition could result in netconsole messages containing incomplete or inconsistent userdata - for example, reading the old userdata_length with new extradata_complete content, or vice versa, leading to truncated or corrupted output. Fix this by acquiring target_list_lock with spin_lock_irqsave() before updating extradata_complete and userdata_length, and releasing it after both fields are fully updated. This ensures that readers see a consistent view of the userdata, preventing corruption during concurrent access. The fix aligns with the existing locking pattern used throughout the netconsole code, where target_list_lock protects access to target fields including buf[] and msgcounter that are accessed during message transmission. Also get rid of the unnecessary variable complete_idx, which makes it easier to bail out of update_userdata(). Fixes: df03f830d099 ("net: netconsole: cache userdata formatted string in netconsole_target") Signed-off-by: Gustavo Luiz Duarte Link: https://patch.msgid.link/20251028-netconsole-fix-race-v4-1-63560b0ae1a0@meta.com Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 194570443493..5d8d0214786c 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -886,8 +886,11 @@ static ssize_t userdatum_value_show(struct config_item *item, char *buf) static void update_userdata(struct netconsole_target *nt) { - int complete_idx = 0, child_count = 0; struct list_head *entry; + int child_count = 0; + unsigned long flags; + + spin_lock_irqsave(&target_list_lock, flags); /* Clear the current string in case the last userdatum was deleted */ nt->userdata_length = 0; @@ -897,8 +900,11 @@ static void update_userdata(struct netconsole_target *nt) struct userdatum *udm_item; struct config_item *item; - if (WARN_ON_ONCE(child_count >= MAX_EXTRADATA_ITEMS)) - break; + if (child_count >= MAX_EXTRADATA_ITEMS) { + spin_unlock_irqrestore(&target_list_lock, flags); + WARN_ON_ONCE(1); + return; + } child_count++; item = container_of(entry, struct config_item, ci_entry); @@ -912,12 +918,11 @@ static void update_userdata(struct netconsole_target *nt) * one entry length (1/MAX_EXTRADATA_ITEMS long), entry count is * checked to not exceed MAX items with child_count above */ - complete_idx += scnprintf(&nt->extradata_complete[complete_idx], - MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n", - item->ci_name, udm_item->value); + nt->userdata_length += scnprintf(&nt->extradata_complete[nt->userdata_length], + MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n", + item->ci_name, udm_item->value); } - nt->userdata_length = strnlen(nt->extradata_complete, - sizeof(nt->extradata_complete)); + spin_unlock_irqrestore(&target_list_lock, flags); } static ssize_t userdatum_value_store(struct config_item *item, const char *buf, From 27b0e701d3872ba59c5b579a9e8a02ea49ad3d3b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 09:16:52 +0100 Subject: [PATCH 59/75] mptcp: drop bogus optimization in __mptcp_check_push() Accessing the transmit queue without owning the msk socket lock is inherently racy, hence __mptcp_check_push() could actually quit early even when there is pending data. That in turn could cause unexpected tx lock and timeout. Dropping the early check avoids the race, implicitly relaying on later tests under the relevant lock. With such change, all the other mptcp_send_head() call sites are now under the msk socket lock and we can additionally drop the now unneeded annotation on the transmit head pointer accesses. Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Geliang Tang Tested-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-1-38ffff5a9ec8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 11 ++++------- net/mptcp/protocol.h | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 875027b9319c..655a2a45224f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1007,7 +1007,7 @@ static void __mptcp_clean_una(struct sock *sk) if (WARN_ON_ONCE(!msk->recovery)) break; - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + msk->first_pending = mptcp_send_next(sk); } dfrag_clear(sk, dfrag); @@ -1552,7 +1552,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk, mptcp_update_post_push(msk, dfrag, ret); } - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + msk->first_pending = mptcp_send_next(sk); if (msk->snd_burst <= 0 || !sk_stream_memory_free(ssk) || @@ -1912,7 +1912,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) get_page(dfrag->page); list_add_tail(&dfrag->list, &msk->rtx_queue); if (!msk->first_pending) - WRITE_ONCE(msk->first_pending, dfrag); + msk->first_pending = dfrag; } pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk, dfrag->data_seq, dfrag->data_len, dfrag->already_sent, @@ -2882,7 +2882,7 @@ static void __mptcp_clear_xmit(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *dtmp, *dfrag; - WRITE_ONCE(msk->first_pending, NULL); + msk->first_pending = NULL; list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) dfrag_clear(sk, dfrag); } @@ -3422,9 +3422,6 @@ void __mptcp_data_acked(struct sock *sk) void __mptcp_check_push(struct sock *sk, struct sock *ssk) { - if (!mptcp_send_head(sk)) - return; - if (!sock_owned_by_user(sk)) __mptcp_subflow_push_pending(sk, ssk, false); else diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 52f9cfa4ce95..379a88e14e8d 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -414,7 +414,7 @@ static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) { const struct mptcp_sock *msk = mptcp_sk(sk); - return READ_ONCE(msk->first_pending); + return msk->first_pending; } static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) From 8e04ce45a8db7a080220e86e249198fa676b83dc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 09:16:53 +0100 Subject: [PATCH 60/75] mptcp: fix MSG_PEEK stream corruption If a MSG_PEEK | MSG_WAITALL read operation consumes all the bytes in the receive queue and recvmsg() need to waits for more data - i.e. it's a blocking one - upon arrival of the next packet the MPTCP protocol will start again copying the oldest data present in the receive queue, corrupting the data stream. Address the issue explicitly tracking the peeked sequence number, restarting from the last peeked byte. Fixes: ca4fb892579f ("mptcp: add MSG_PEEK support") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Geliang Tang Tested-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-2-38ffff5a9ec8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 655a2a45224f..2535788569ab 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1945,22 +1945,36 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); -static int __mptcp_recvmsg_mskq(struct sock *sk, - struct msghdr *msg, - size_t len, int flags, +static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, + size_t len, int flags, int copied_total, struct scm_timestamping_internal *tss, int *cmsg_flags) { struct mptcp_sock *msk = mptcp_sk(sk); struct sk_buff *skb, *tmp; + int total_data_len = 0; int copied = 0; skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { - u32 offset = MPTCP_SKB_CB(skb)->offset; + u32 delta, offset = MPTCP_SKB_CB(skb)->offset; u32 data_len = skb->len - offset; - u32 count = min_t(size_t, len - copied, data_len); + u32 count; int err; + if (flags & MSG_PEEK) { + /* skip already peeked skbs */ + if (total_data_len + data_len <= copied_total) { + total_data_len += data_len; + continue; + } + + /* skip the already peeked data in the current skb */ + delta = copied_total - total_data_len; + offset += delta; + data_len -= delta; + } + + count = min_t(size_t, len - copied, data_len); if (!(flags & MSG_TRUNC)) { err = skb_copy_datagram_msg(skb, offset, msg, count); if (unlikely(err < 0)) { @@ -1977,16 +1991,14 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, copied += count; - if (count < data_len) { - if (!(flags & MSG_PEEK)) { + if (!(flags & MSG_PEEK)) { + msk->bytes_consumed += count; + if (count < data_len) { MPTCP_SKB_CB(skb)->offset += count; MPTCP_SKB_CB(skb)->map_seq += count; - msk->bytes_consumed += count; + break; } - break; - } - if (!(flags & MSG_PEEK)) { /* avoid the indirect call, we know the destructor is sock_rfree */ skb->destructor = NULL; skb->sk = NULL; @@ -1994,7 +2006,6 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, sk_mem_uncharge(sk, skb->truesize); __skb_unlink(skb, &sk->sk_receive_queue); skb_attempt_defer_free(skb); - msk->bytes_consumed += count; } if (copied >= len) @@ -2191,7 +2202,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, while (copied < len) { int err, bytes_read; - bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags); + bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, + copied, &tss, &cmsg_flags); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; From a824084b98d8a1dbd6e85d0842a8eb5e73467f59 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 09:16:54 +0100 Subject: [PATCH 61/75] mptcp: restore window probe Since commit 72377ab2d671 ("mptcp: more conservative check for zero probes") the MPTCP-level zero window probe check is always disabled, as the TCP-level write queue always contains at least the newly allocated skb. Refine the relevant check tacking in account that the above condition and that such skb can have zero length. Fixes: 72377ab2d671 ("mptcp: more conservative check for zero probes") Cc: stable@vger.kernel.org Reported-by: Geliang Tang Closes: https://lore.kernel.org/d0a814c364e744ca6b836ccd5b6e9146882e8d42.camel@kernel.org Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-3-38ffff5a9ec8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2535788569ab..5d8714adae6c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1299,7 +1299,12 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, if (copy == 0) { u64 snd_una = READ_ONCE(msk->snd_una); - if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) { + /* No need for zero probe if there are any data pending + * either at the msk or ssk level; skb is the current write + * queue tail and can be empty at this point. + */ + if (snd_una != msk->snd_nxt || skb->len || + skb != tcp_send_head(ssk)) { tcp_remove_empty_skb(ssk); return 0; } From fe11dfa10919ce594682c76f5f648a0840d80a2b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 09:16:55 +0100 Subject: [PATCH 62/75] mptcp: zero window probe mib Explicitly account for MPTCP-level zero windows probe, to catch hopefully earlier issues alike the one addressed by the previous patch. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-4-38ffff5a9ec8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/mib.c | 1 + net/mptcp/mib.h | 1 + net/mptcp/protocol.c | 1 + 3 files changed, 3 insertions(+) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 6003e47c770a..171643815076 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -85,6 +85,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK), SNMP_MIB_ITEM("SimultConnectFallback", MPTCP_MIB_SIMULTCONNFALLBACK), SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED), + SNMP_MIB_ITEM("WinProbe", MPTCP_MIB_WINPROBE), }; /* mptcp_mib_alloc - allocate percpu mib counters diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 309bac6fea32..a1d3e9369fbb 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -88,6 +88,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_DSSFALLBACK, /* Bad or missing DSS */ MPTCP_MIB_SIMULTCONNFALLBACK, /* Simultaneous connect */ MPTCP_MIB_FALLBACKFAILED, /* Can't fallback due to msk status */ + MPTCP_MIB_WINPROBE, /* MPTCP-level zero window probe */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 5d8714adae6c..2d6b8de35c44 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1355,6 +1355,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, mpext->dsn64); if (zero_window_probe) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_WINPROBE); mptcp_subflow_ctx(ssk)->rel_write_seq += copy; mpext->frozen = 1; if (READ_ONCE(msk->csum_enabled)) From dc89548c6926d68dfdda11bebc1a5258bc41d887 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 27 Oct 2025 00:43:16 +0800 Subject: [PATCH 63/75] net: usb: asix_devices: Check return value of usbnet_get_endpoints The code did not check the return value of usbnet_get_endpoints. Add checks and return the error if it fails to transfer the error. Found via static anlaysis and this is similar to commit 07161b2416f7 ("sr9800: Add check for usbnet_get_endpoints"). Fixes: 933a27d39e0e ("USB: asix - Add AX88178 support and many other changes") Fixes: 2e55cc7210fe ("[PATCH] USB: usbnet (3/9) module for ASIX Ethernet adapters") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Link: https://patch.msgid.link/20251026164318.57624-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_devices.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 85bd5d845409..232bbd79a4de 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -230,7 +230,9 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf) int i; unsigned long gpio_bits = dev->driver_info->data; - usbnet_get_endpoints(dev,intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + goto out; /* Toggle the GPIOs in a manufacturer/model specific way */ for (i = 2; i >= 0; i--) { @@ -848,7 +850,9 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) dev->driver_priv = priv; - usbnet_get_endpoints(dev, intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + return ret; /* Maybe the boot loader passed the MAC address via device tree */ if (!eth_platform_get_mac_address(&dev->udev->dev, buf)) { @@ -1281,7 +1285,9 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf) int ret; u8 buf[ETH_ALEN] = {0}; - usbnet_get_endpoints(dev,intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + return ret; /* Get the MAC address */ ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0); From 53110232c95ff56067fd96c75a1a1c53d10dcd98 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Sun, 26 Oct 2025 22:20:19 +0200 Subject: [PATCH 64/75] net/mlx5: Don't zero user_count when destroying FDB tables esw->user_count tracks how many TC rules are added on an esw via mlx5e_configure_flower -> mlx5_esw_get -> atomic64_inc(&esw->user_count) esw.user_count was unconditionally set to 0 in esw_destroy_legacy_fdb_table and esw_destroy_offloads_fdb_tables. These two together can lead to the following sequence of events: 1. echo 1 > /sys/class/net/eth2/device/sriov_numvfs - mlx5_core_sriov_configure -...-> esw_create_legacy_table -> atomic64_set(&esw->user_count, 0) 2. tc qdisc add dev eth2 ingress && \ tc filter replace dev eth2 pref 1 protocol ip chain 0 ingress \ handle 1 flower action ct nat zone 64000 pipe - mlx5e_configure_flower -> mlx5_esw_get -> atomic64_inc(&esw->user_count) 3. echo 0 > /sys/class/net/eth2/device/sriov_numvfs - mlx5_core_sriov_configure -..-> esw_destroy_legacy_fdb_table -> atomic64_set(&esw->user_count, 0) 4. devlink dev eswitch set pci/0000:08:00.0 mode switchdev - mlx5_devlink_eswitch_mode_set -> mlx5_esw_try_lock -> atomic64_read(&esw->user_count) == 0 - then proceed to a WARN_ON in: esw_offloads_start -> mlx5_eswitch_enable_locke -> esw_offloads_enable -> mlx5_esw_offloads_rep_load -> mlx5e_vport_rep_load -> mlx5e_netdev_change_profile -> mlx5e_detach_netdev -> mlx5e_cleanup_nic_rx -> mlx5e_tc_nic_cleanup -> mlx5e_mod_hdr_tbl_destroy Fix this by not clearing out the user_count when destroying FDB tables, so that the check in mlx5_esw_try_lock can prevent the mode change when there are TC rules configured, as originally intended. Fixes: 2318b8bb94a3 ("net/mlx5: E-switch, Destroy legacy fdb table when needed") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1761510019-938772-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 76382626ad41..929adeb50a98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -66,7 +66,6 @@ static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) esw->fdb_table.legacy.addr_grp = NULL; esw->fdb_table.legacy.allmulti_grp = NULL; esw->fdb_table.legacy.promisc_grp = NULL; - atomic64_set(&esw->user_count, 0); } static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 34749814f19b..44a142a041b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1978,7 +1978,6 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) /* Holds true only as long as DMFS is the default */ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, MLX5_FLOW_STEERING_MODE_DMFS); - atomic64_set(&esw->user_count, 0); } static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw) From da2522df3fcc6f57068470cbdcd6516d9eb76b37 Mon Sep 17 00:00:00 2001 From: Jinliang Wang Date: Sun, 26 Oct 2025 23:55:30 -0700 Subject: [PATCH 65/75] net: mctp: Fix tx queue stall The tx queue can become permanently stuck in a stopped state due to a race condition between the URB submission path and its completion callback. The URB completion callback can run immediately after usb_submit_urb() returns, before the submitting function calls netif_stop_queue(). If this occurs, the queue state management becomes desynchronized, leading to a stall where the queue is never woken. Fix this by moving the netif_stop_queue() call to before submitting the URB. This closes the race window by ensuring the network stack is aware the queue is stopped before the URB completion can possibly run. Fixes: 0791c0327a6e ("net: mctp: Add MCTP USB transport driver") Signed-off-by: Jinliang Wang Acked-by: Jeremy Kerr Link: https://patch.msgid.link/20251027065530.2045724-1-jinliangw@google.com Signed-off-by: Jakub Kicinski --- drivers/net/mctp/mctp-usb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c index 36ccc53b1797..ef860cfc629f 100644 --- a/drivers/net/mctp/mctp-usb.c +++ b/drivers/net/mctp/mctp-usb.c @@ -96,11 +96,13 @@ static netdev_tx_t mctp_usb_start_xmit(struct sk_buff *skb, skb->data, skb->len, mctp_usb_out_complete, skb); + /* Stops TX queue first to prevent race condition with URB complete */ + netif_stop_queue(dev); rc = usb_submit_urb(urb, GFP_ATOMIC); - if (rc) + if (rc) { + netif_wake_queue(dev); goto err_drop; - else - netif_stop_queue(dev); + } return NETDEV_TX_OK; From 9311e9540a8b406d9f028aa87fb072a3819d4c82 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Mon, 27 Oct 2025 17:57:10 +0800 Subject: [PATCH 66/75] selftests: net: use BASH for bareudp testing In bareudp.sh, this script uses /bin/sh and it will load another lib.sh BASH script at the very beginning. But on some operating systems like Ubuntu, /bin/sh is actually pointed to DASH, thus it will try to run BASH commands with DASH and consequently leads to syntax issues: # ./bareudp.sh: 4: ./lib.sh: Bad substitution # ./bareudp.sh: 5: ./lib.sh: source: not found # ./bareudp.sh: 24: ./lib.sh: Syntax error: "(" unexpected Fix this by explicitly using BASH for bareudp.sh. This fixes test execution failures on systems where /bin/sh is not BASH. Reported-by: Edoardo Canepa Link: https://bugs.launchpad.net/bugs/2129812 Signed-off-by: Po-Hsu Lin Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20251027095710.2036108-2-po-hsu.lin@canonical.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/bareudp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh index 4046131e7888..d9e5b967f815 100755 --- a/tools/testing/selftests/net/bareudp.sh +++ b/tools/testing/selftests/net/bareudp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Test various bareudp tunnel configurations. From 298574936a6c4ebbe655e15d971ddb1a96c7dc0b Mon Sep 17 00:00:00 2001 From: Thanh Quan Date: Mon, 27 Oct 2025 15:02:43 +0100 Subject: [PATCH 67/75] net: phy: dp83869: fix STRAP_OPMODE bitmask According to the TI DP83869HM datasheet Revision D (June 2025), section 7.6.1.41 STRAP_STS Register, the STRAP_OPMODE bitmask is bit [11:9]. Fix this. In case the PHY is auto-detected via PHY ID registers, or not described in DT, or, in case the PHY is described in DT but the optional DT property "ti,op-mode" is not present, then the driver reads out the PHY functional mode (RGMII, SGMII, ...) from hardware straps. Currently, all upstream users of this PHY specify both DT compatible string "ethernet-phy-id2000.a0f1" and ti,op-mode = property, therefore it seems no upstream users are affected by this bug. The driver currently interprets bits [2:0] of STRAP_STS register as PHY functional mode. Those bits are controlled by ANEG_DIS, ANEGSEL_0 straps and an always-zero reserved bit. Systems that use RGMII-to-Copper functional mode are unlikely to disable auto-negotiation via ANEG_DIS strap, or change auto-negotiation behavior via ANEGSEL_0 strap. Therefore, even with this bug in place, the STRAP_STS register content is likely going to be interpreted by the driver as RGMII-to-Copper mode. However, for a system with PHY functional mode strapping set to other mode than RGMII-to-Copper, the driver is likely to misinterpret the strapping as RGMII-to-Copper and misconfigure the PHY. For example, on a system with SGMII-to-Copper strapping, the STRAP_STS register reads as 0x0c20, but the PHY ends up being configured for incompatible RGMII-to-Copper mode. Fixes: 0eaf8ccf2047 ("net: phy: dp83869: Set opmode from straps") Reviewed-by: Andrew Lunn Signed-off-by: Thanh Quan Signed-off-by: Hai Pham Signed-off-by: Marek Vasut # Port from U-Boot to Linux Link: https://patch.msgid.link/20251027140320.8996-1-marek.vasut+renesas@mailbox.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83869.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index a2cd1cc35cde..1f381d7b13ff 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -84,7 +84,7 @@ #define DP83869_CLK_DELAY_DEF 7 /* STRAP_STS1 bits */ -#define DP83869_STRAP_OP_MODE_MASK GENMASK(2, 0) +#define DP83869_STRAP_OP_MODE_MASK GENMASK(11, 9) #define DP83869_STRAP_STS1_RESERVED BIT(11) #define DP83869_STRAP_MIRROR_ENABLED BIT(12) @@ -528,7 +528,7 @@ static int dp83869_set_strapped_mode(struct phy_device *phydev) if (val < 0) return val; - dp83869->mode = val & DP83869_STRAP_OP_MODE_MASK; + dp83869->mode = FIELD_GET(DP83869_STRAP_OP_MODE_MASK, val); return 0; } From 34892cfec0c2d96787c4be7bda0d5f18d7dacf85 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 26 Oct 2025 22:03:01 +0200 Subject: [PATCH 68/75] net: tls: Change async resync helpers argument Update tls_offload_rx_resync_async_request_start() and tls_offload_rx_resync_async_request_end() to get a struct tls_offload_resync_async parameter directly, rather than extracting it from struct sock. This change aligns the function signatures with the upcoming tls_offload_rx_resync_async_request_cancel() helper, which will be introduced in a subsequent patch. Signed-off-by: Shahar Shitrit Reviewed-by: Sabrina Dubroca Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761508983-937977-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ktls_rx.c | 9 ++++++-- include/net/tls.h | 21 +++++++------------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index d7a11ff9bbdb..5fbc92269585 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -425,12 +425,14 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, { struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf; struct mlx5e_ktls_offload_context_rx *priv_rx; + struct tls_offload_context_rx *rx_ctx; u8 tracker_state, auth_state, *ctx; struct device *dev; u32 hw_seq; priv_rx = buf->priv_rx; dev = mlx5_core_dma_dev(sq->channel->mdev); + rx_ctx = tls_offload_ctx_rx(tls_get_ctx(priv_rx->sk)); if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) goto out; @@ -447,7 +449,8 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, } hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); - tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); + tls_offload_rx_resync_async_request_end(rx_ctx->resync_async, + cpu_to_be32(hw_seq)); priv_rx->rq_stats->tls_resync_req_end++; out: mlx5e_ktls_priv_rx_put(priv_rx); @@ -482,6 +485,7 @@ static bool resync_queue_get_psv(struct sock *sk) static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) { struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct tls_offload_resync_async *resync_async; struct net_device *netdev = rq->netdev; struct net *net = dev_net(netdev); struct sock *sk = NULL; @@ -527,7 +531,8 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) seq = th->seq; datalen = skb->len - depth; - tls_offload_rx_resync_async_request_start(sk, seq, datalen); + resync_async = tls_offload_ctx_rx(tls_get_ctx(sk))->resync_async; + tls_offload_rx_resync_async_request_start(resync_async, seq, datalen); rq->stats->tls_resync_req_start++; unref: diff --git a/include/net/tls.h b/include/net/tls.h index 857340338b69..b90f3b675c3c 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -451,25 +451,20 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) /* Log all TLS record header TCP sequences in [seq, seq+len] */ static inline void -tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len) +tls_offload_rx_resync_async_request_start(struct tls_offload_resync_async *resync_async, + __be32 seq, u16 len) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - - atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) | + atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); - rx_ctx->resync_async->loglen = 0; - rx_ctx->resync_async->rcd_delta = 0; + resync_async->loglen = 0; + resync_async->rcd_delta = 0; } static inline void -tls_offload_rx_resync_async_request_end(struct sock *sk, __be32 seq) +tls_offload_rx_resync_async_request_end(struct tls_offload_resync_async *resync_async, + __be32 seq) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - - atomic64_set(&rx_ctx->resync_async->req, - ((u64)ntohl(seq) << 32) | RESYNC_REQ); + atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | RESYNC_REQ); } static inline void From c15d5c62ab313c19121f10e25d4fec852bd1c40c Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 26 Oct 2025 22:03:02 +0200 Subject: [PATCH 69/75] net: tls: Cancel RX async resync request on rcd_delta overflow When a netdev issues a RX async resync request for a TLS connection, the TLS module handles it by logging record headers and attempting to match them to the tcp_sn provided by the device. If a match is found, the TLS module approves the tcp_sn for resynchronization. While waiting for a device response, the TLS module also increments rcd_delta each time a new TLS record is received, tracking the distance from the original resync request. However, if the device response is delayed or fails (e.g due to unstable connection and device getting out of tracking, hardware errors, resource exhaustion etc.), the TLS module keeps logging and incrementing, which can lead to a WARN() when rcd_delta exceeds the threshold. To address this, introduce tls_offload_rx_resync_async_request_cancel() to explicitly cancel resync requests when a device response failure is detected. Call this helper also as a final safeguard when rcd_delta crosses its threshold, as reaching this point implies that earlier cancellation did not occur. Signed-off-by: Shahar Shitrit Reviewed-by: Sabrina Dubroca Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761508983-937977-3-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/tls.h | 6 ++++++ net/tls/tls_device.c | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/tls.h b/include/net/tls.h index b90f3b675c3c..c7bcdb3afad7 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -467,6 +467,12 @@ tls_offload_rx_resync_async_request_end(struct tls_offload_resync_async *resync_ atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | RESYNC_REQ); } +static inline void +tls_offload_rx_resync_async_request_cancel(struct tls_offload_resync_async *resync_async) +{ + atomic64_set(&resync_async->req, 0); +} + static inline void tls_offload_rx_resync_set_type(struct sock *sk, enum tls_offload_sync_type type) { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index a64ae15b1a60..71734411ff4c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -723,8 +723,10 @@ tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async, /* shouldn't get to wraparound: * too long in async stage, something bad happened */ - if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) + if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) { + tls_offload_rx_resync_async_request_cancel(resync_async); return false; + } /* asynchronous stage: log all headers seq such that * req_seq <= seq <= end_seq, and wait for real resync request From 426e9da3b28404b1edcbae401231fb378150d99d Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 26 Oct 2025 22:03:03 +0200 Subject: [PATCH 70/75] net/mlx5e: kTLS, Cancel RX async resync request in error flows When device loses track of TLS records, it attempts to resync by monitoring records and requests an asynchronous resynchronization from software for this TLS connection. The TLS module handles such device RX resync requests by logging record headers and comparing them with the record tcp_sn when provided by the device. It also increments rcd_delta to track how far the current record tcp_sn is from the tcp_sn of the original resync request. If the device later responds with a matching tcp_sn, the TLS module approves the tcp_sn for resync. However, the device response may be delayed or never arrive, particularly due to traffic-related issues such as packet drops or reordering. In such cases, the TLS module remains unaware that resync will not complete, and continues performing unnecessary work by logging headers and incrementing rcd_delta, which can eventually exceed the threshold and trigger a WARN(). For example, this was observed when the device got out of tracking, causing mlx5e_ktls_handle_get_psv_completion() to fail and ultimately leading to the rcd_delta warning. To address this, call tls_offload_rx_resync_async_request_cancel() to cancel the resync request and stop resync tracking in such error cases. Also, increment the tls_resync_req_skip counter to track these cancellations. Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support") Signed-off-by: Shahar Shitrit Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761508983-937977-4-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ktls_rx.c | 34 ++++++++++++++++--- .../mellanox/mlx5/core/en_accel/ktls_txrx.h | 4 +++ .../net/ethernet/mellanox/mlx5/core/en_rx.c | 4 +++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 5fbc92269585..da2d1eb52c13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -320,7 +320,6 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, err_free: kfree(buf); err_out: - priv_rx->rq_stats->tls_resync_req_skip++; return err; } @@ -339,14 +338,19 @@ static void resync_handle_work(struct work_struct *work) if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { mlx5e_ktls_priv_rx_put(priv_rx); + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(&resync->core); return; } c = resync->priv->channels.c[priv_rx->rxq]; sq = &c->async_icosq; - if (resync_post_get_progress_params(sq, priv_rx)) + if (resync_post_get_progress_params(sq, priv_rx)) { + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(&resync->core); mlx5e_ktls_priv_rx_put(priv_rx); + } } static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, @@ -425,6 +429,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, { struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf; struct mlx5e_ktls_offload_context_rx *priv_rx; + struct tls_offload_resync_async *async_resync; struct tls_offload_context_rx *rx_ctx; u8 tracker_state, auth_state, *ctx; struct device *dev; @@ -433,8 +438,12 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, priv_rx = buf->priv_rx; dev = mlx5_core_dma_dev(sq->channel->mdev); rx_ctx = tls_offload_ctx_rx(tls_get_ctx(priv_rx->sk)); - if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) + async_resync = rx_ctx->resync_async; + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(async_resync); goto out; + } dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); @@ -445,11 +454,12 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING || auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) { priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(async_resync); goto out; } hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); - tls_offload_rx_resync_async_request_end(rx_ctx->resync_async, + tls_offload_rx_resync_async_request_end(async_resync, cpu_to_be32(hw_seq)); priv_rx->rq_stats->tls_resync_req_end++; out: @@ -475,8 +485,10 @@ static bool resync_queue_get_psv(struct sock *sk) resync = &priv_rx->resync; mlx5e_ktls_priv_rx_get(priv_rx); - if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) + if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) { mlx5e_ktls_priv_rx_put(priv_rx); + return false; + } return true; } @@ -561,6 +573,18 @@ void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, resync_handle_seq_match(priv_rx, c); } +void +mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_buf *buf; + + buf = wi->tls_get_params.buf; + priv_rx = buf->priv_rx; + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(&priv_rx->resync.core); +} + /* End of resync section */ void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index f87b65c560ea..cb08799769ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -29,6 +29,10 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, u32 *dma_fifo_cc); + +void +mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi); + static inline bool mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1c79adc51a04..26621a2972ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1036,6 +1036,10 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) netdev_WARN_ONCE(cq->netdev, "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); +#ifdef CONFIG_MLX5_EN_TLS + if (wi->wqe_type == MLX5E_ICOSQ_WQE_GET_PSV_TLS) + mlx5e_ktls_rx_resync_async_request_cancel(wi); +#endif mlx5e_dump_error_cqe(&sq->cq, sq->sqn, (struct mlx5_err_cqe *)cqe); mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); From c657f86106c8729240e1f50a62c6606b578ecf20 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Tue, 28 Oct 2025 11:18:43 +0800 Subject: [PATCH 71/75] net: stmmac: vlan: Disable 802.1AD tag insertion offload The DWMAC IP's VLAN tag insertion offload does not support inserting STAG (802.1AD) and CTAG (802.1Q) types in bytes 13 and 14 using the same MAC_VLAN_Incl and MAC_VLAN_Inner_Incl register configurations. Currently, MAC_VLAN_Incl is configured to offload only STAG type insertion. However, the DWMAC IP inserts a CTAG type when the inner VLAN ID field of the descriptor is not configured, and a STAG type when it is configured. This behavior is not documented and leads to inconsistent double VLAN tagging. Additionally, an unexpected CTAG with VLAN ID 0 is inserted, resulting in frames like: Frame 1: 110 bytes on wire (880 bits), 110 bytes captured (880 bits) Ethernet II, Src: (), Dst: () IEEE 802.1ad, ID: 100 802.1Q Virtual LAN, PRI: 0, DEI: 0, ID: 0 (unexpected) 802.1Q Virtual LAN, PRI: 0, DEI: 0, ID: 200 Internet Protocol Version 4, Src: 192.168.4.10, Dst: 192.168.4.11 Internet Control Message Protocol To avoid this undocumented and incorrect behavior, disable 802.1AD tag insertion offload. Also, don't set CSVL bit. As per the data book, when this bit is set, S-VLAN type (0x88A8) is inserted in the 13th and 14th bytes of transmitted packets and when this bit is reset, C-VLAN type (0x8100) is inserted in the 13th and 14th bytes of transmitted packets. Fixes: 30d932279dc2 ("net: stmmac: Add support for VLAN Insertion Offload") Fixes: e94e3f3b51ce ("net: stmmac: Add support for VLAN Insertion Offload in GMAC4+") Fixes: 1d2c7a5fee31 ("net: stmmac: Refactor VLAN implementation") Signed-off-by: Rohan G Thomas Reviewed-by: Boon Khai Ng Link: https://patch.msgid.link/20251028-qbv-fixes-v4-1-26481c7634e3@altera.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 ++++-------------- .../net/ethernet/stmicro/stmmac/stmmac_vlan.c | 2 +- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 650d75b73e0b..5b452469ad2c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4089,18 +4089,11 @@ static int stmmac_release(struct net_device *dev) static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb, struct stmmac_tx_queue *tx_q) { - u16 tag = 0x0, inner_tag = 0x0; - u32 inner_type = 0x0; struct dma_desc *p; + u16 tag = 0x0; - if (!priv->dma_cap.vlins) + if (!priv->dma_cap.vlins || !skb_vlan_tag_present(skb)) return false; - if (!skb_vlan_tag_present(skb)) - return false; - if (skb->vlan_proto == htons(ETH_P_8021AD)) { - inner_tag = skb_vlan_tag_get(skb); - inner_type = STMMAC_VLAN_INSERT; - } tag = skb_vlan_tag_get(skb); @@ -4109,7 +4102,7 @@ static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb, else p = &tx_q->dma_tx[tx_q->cur_tx]; - if (stmmac_set_desc_vlan_tag(priv, p, tag, inner_tag, inner_type)) + if (stmmac_set_desc_vlan_tag(priv, p, tag, 0x0, 0x0)) return false; stmmac_set_tx_owner(priv, p); @@ -7573,11 +7566,8 @@ int stmmac_dvr_probe(struct device *device, ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER; } - if (priv->dma_cap.vlins) { + if (priv->dma_cap.vlins) ndev->features |= NETIF_F_HW_VLAN_CTAG_TX; - if (priv->dma_cap.dvlan) - ndev->features |= NETIF_F_HW_VLAN_STAG_TX; - } #endif priv->msg_enable = netif_msg_init(debug, default_msg_level); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c index 0b6f6228ae35..ff02a79c00d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c @@ -212,7 +212,7 @@ static void vlan_enable(struct mac_device_info *hw, u32 type) value = readl(ioaddr + VLAN_INCL); value |= VLAN_VLTI; - value |= VLAN_CSVL; /* Only use SVLAN */ + value &= ~VLAN_CSVL; /* Only use CVLAN */ value &= ~VLAN_VLC; value |= (type << VLAN_VLC_SHIFT) & VLAN_VLC; writel(value, ioaddr + VLAN_INCL); From ded9813d17d3dd50a08e7a2ca1495769ef9c6673 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Tue, 28 Oct 2025 11:18:44 +0800 Subject: [PATCH 72/75] net: stmmac: Consider Tx VLAN offload tag length for maxSDU Queue maxSDU requirement of 802.1 Qbv standard requires mac to drop packets that exceeds maxSDU length and maxSDU doesn't include preamble, destination and source address, or FCS but includes ethernet type and VLAN header. On hardware with Tx VLAN offload enabled, VLAN header length is not included in the skb->len, when Tx VLAN offload is requested. This leads to incorrect length checks and allows transmission of oversized packets. Add the VLAN_HLEN to the skb->len before checking the Qbv maxSDU if Tx VLAN offload is requested for the packet. Fixes: c5c3e1bfc9e0 ("net: stmmac: Offload queueMaxSDU from tc-taprio") Signed-off-by: Rohan G Thomas Reviewed-by: Matthew Gerlach Link: https://patch.msgid.link/20251028-qbv-fixes-v4-2-26481c7634e3@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5b452469ad2c..7b90ecd3a55e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4500,6 +4500,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) bool has_vlan, set_ic; int entry, first_tx; dma_addr_t des; + u32 sdu_len; tx_q = &priv->dma_conf.tx_queue[queue]; txq_stats = &priv->xstats.txq_stats[queue]; @@ -4517,10 +4518,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) } if (priv->est && priv->est->enable && - priv->est->max_sdu[queue] && - skb->len > priv->est->max_sdu[queue]){ - priv->xstats.max_sdu_txq_drop[queue]++; - goto max_sdu_err; + priv->est->max_sdu[queue]) { + sdu_len = skb->len; + /* Add VLAN tag length if VLAN tag insertion offload is requested */ + if (priv->dma_cap.vlins && skb_vlan_tag_present(skb)) + sdu_len += VLAN_HLEN; + if (sdu_len > priv->est->max_sdu[queue]) { + priv->xstats.max_sdu_txq_drop[queue]++; + goto max_sdu_err; + } } if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { From 48b2e323c018c4c908ae5acabff326647bab5240 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Tue, 28 Oct 2025 11:18:45 +0800 Subject: [PATCH 73/75] net: stmmac: est: Fix GCL bounds checks Fix the bounds checks for the hw supported maximum GCL entry count and gate interval time. Fixes: b60189e0392f ("net: stmmac: Integrate EST with TAPRIO scheduler API") Signed-off-by: Rohan G Thomas Reviewed-by: Matthew Gerlach Link: https://patch.msgid.link/20251028-qbv-fixes-v4-3-26481c7634e3@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 97e89a604abd..3b4d4696afe9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -981,7 +981,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, if (qopt->cmd == TAPRIO_CMD_DESTROY) goto disable; - if (qopt->num_entries >= dep) + if (qopt->num_entries > dep) return -EINVAL; if (!qopt->cycle_time) return -ERANGE; @@ -1012,7 +1012,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, s64 delta_ns = qopt->entries[i].interval; u32 gates = qopt->entries[i].gate_mask; - if (delta_ns > GENMASK(wid, 0)) + if (delta_ns > GENMASK(wid - 1, 0)) return -ERANGE; if (gates > GENMASK(31 - wid, 0)) return -ERANGE; From 6a2108c78069fda000729b88c97b1eba0405e6d7 Mon Sep 17 00:00:00 2001 From: Shivaji Kant Date: Wed, 29 Oct 2025 06:54:19 +0000 Subject: [PATCH 74/75] net: devmem: refresh devmem TX dst in case of route invalidation The zero-copy Device Memory (Devmem) transmit path relies on the socket's route cache (`dst_entry`) to validate that the packet is being sent via the network device to which the DMA buffer was bound. However, this check incorrectly fails and returns `-ENODEV` if the socket's route cache entry (`dst`) is merely missing or expired (`dst == NULL`). This scenario is observed during network events, such as when flow steering rules are deleted, leading to a temporary route cache invalidation. This patch fixes -ENODEV error for `net_devmem_get_binding()` by doing the following: 1. It attempts to rebuild the route via `rebuild_header()` if the route is initially missing (`dst == NULL`). This allows the TCP/IP stack to recover from transient route cache misses. 2. It uses `rcu_read_lock()` and `dst_dev_rcu()` to safely access the network device pointer (`dst_dev`) from the route, preventing use-after-free conditions if the device is concurrently removed. 3. It maintains the critical safety check by validating that the retrieved destination device (`dst_dev`) is exactly the device registered in the Devmem binding (`binding->dev`). These changes prevent unnecessary ENODEV failures while maintaining the critical safety requirement that the Devmem resources are only used on the bound network device. Reviewed-by: Bobby Eshleman Reported-by: Eric Dumazet Reported-by: Vedant Mathur Suggested-by: Eric Dumazet Fixes: bd61848900bf ("net: devmem: Implement TX path") Signed-off-by: Shivaji Kant Link: https://patch.msgid.link/20251029065420.3489943-1-shivajikant@google.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index d9de31a6cc7f..1d04754bc756 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "devmem.h" @@ -357,7 +358,8 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk, unsigned int dmabuf_id) { struct net_devmem_dmabuf_binding *binding; - struct dst_entry *dst = __sk_dst_get(sk); + struct net_device *dst_dev; + struct dst_entry *dst; int err = 0; binding = net_devmem_lookup_dmabuf(dmabuf_id); @@ -366,16 +368,35 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk, goto out_err; } + rcu_read_lock(); + dst = __sk_dst_get(sk); + /* If dst is NULL (route expired), attempt to rebuild it. */ + if (unlikely(!dst)) { + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) { + err = -EHOSTUNREACH; + goto out_unlock; + } + dst = __sk_dst_get(sk); + if (unlikely(!dst)) { + err = -ENODEV; + goto out_unlock; + } + } + /* The dma-addrs in this binding are only reachable to the corresponding * net_device. */ - if (!dst || !dst->dev || dst->dev->ifindex != binding->dev->ifindex) { + dst_dev = dst_dev_rcu(dst); + if (unlikely(!dst_dev) || unlikely(dst_dev != binding->dev)) { err = -ENODEV; - goto out_err; + goto out_unlock; } + rcu_read_unlock(); return binding; +out_unlock: + rcu_read_unlock(); out_err: if (binding) net_devmem_dmabuf_binding_put(binding); From 51e5ad549c43b557c7da1e4d1a1dcf061b4a5f6c Mon Sep 17 00:00:00 2001 From: Ranganath V N Date: Sun, 26 Oct 2025 22:03:12 +0530 Subject: [PATCH 75/75] net: sctp: fix KMSAN uninit-value in sctp_inq_pop Fix an issue detected by syzbot: KMSAN reported an uninitialized-value access in sctp_inq_pop BUG: KMSAN: uninit-value in sctp_inq_pop The issue is actually caused by skb trimming via sk_filter() in sctp_rcv(). In the reproducer, skb->len becomes 1 after sk_filter(), which bypassed the original check: if (skb->len < sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) + skb_transport_offset(skb)) To handle this safely, a new check should be performed after sk_filter(). Reported-by: syzbot+d101e12bccd4095460e7@syzkaller.appspotmail.com Tested-by: syzbot+d101e12bccd4095460e7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d101e12bccd4095460e7 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Suggested-by: Xin Long Signed-off-by: Ranganath V N Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251026-kmsan_fix-v3-1-2634a409fa5f@gmail.com Signed-off-by: Paolo Abeni --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 7e99894778d4..e119e460ccde 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -190,7 +190,7 @@ int sctp_rcv(struct sk_buff *skb) goto discard_release; nf_reset_ct(skb); - if (sk_filter(sk, skb)) + if (sk_filter(sk, skb) || skb->len < sizeof(struct sctp_chunkhdr)) goto discard_release; /* Create an SCTP packet structure. */