From 2469bb6a6af944755a7d7daf66be90f3b8decbf9 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Mon, 27 Oct 2025 09:49:12 +0800 Subject: [PATCH 01/63] Revert "wifi: ath10k: avoid unnecessary wait for service ready message" This reverts commit 51a73f1b2e56b0324b4a3bb8cebc4221b5be4c7a. Although this commit benefits QCA6174, it breaks QCA988x and QCA9984 [1][2]. Since it is not likely to root cause/fix this issue in a short time, revert it to get those chips back. Compile tested only. Fixes: 51a73f1b2e56 ("wifi: ath10k: avoid unnecessary wait for service ready message") Link: https://lore.kernel.org/ath10k/6d41bc00602c33ffbf68781f563ff2e6c6915a3e.camel@gmail.com # [1] Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220671 # [2] Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251027-ath10k-revert-polling-first-change-v1-1-89aaf3bcbfa1@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/wmi.c | 39 ++++++++++++++------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index b3b00d324075..b4aad6604d6d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1764,32 +1764,33 @@ void ath10k_wmi_put_wmi_channel(struct ath10k *ar, struct wmi_channel *ch, int ath10k_wmi_wait_for_service_ready(struct ath10k *ar) { - unsigned long timeout = jiffies + WMI_SERVICE_READY_TIMEOUT_HZ; unsigned long time_left, i; - /* Sometimes the PCI HIF doesn't receive interrupt - * for the service ready message even if the buffer - * was completed. PCIe sniffer shows that it's - * because the corresponding CE ring doesn't fires - * it. Workaround here by polling CE rings. Since - * the message could arrive at any time, continue - * polling until timeout. - */ - do { + time_left = wait_for_completion_timeout(&ar->wmi.service_ready, + WMI_SERVICE_READY_TIMEOUT_HZ); + if (!time_left) { + /* Sometimes the PCI HIF doesn't receive interrupt + * for the service ready message even if the buffer + * was completed. PCIe sniffer shows that it's + * because the corresponding CE ring doesn't fires + * it. Workaround here by polling CE rings once. + */ + ath10k_warn(ar, "failed to receive service ready completion, polling..\n"); + for (i = 0; i < CE_COUNT; i++) ath10k_hif_send_complete_check(ar, i, 1); - /* The 100 ms granularity is a tradeoff considering scheduler - * overhead and response latency - */ time_left = wait_for_completion_timeout(&ar->wmi.service_ready, - msecs_to_jiffies(100)); - if (time_left) - return 0; - } while (time_before(jiffies, timeout)); + WMI_SERVICE_READY_TIMEOUT_HZ); + if (!time_left) { + ath10k_warn(ar, "polling timed out\n"); + return -ETIMEDOUT; + } - ath10k_warn(ar, "failed to receive service ready completion\n"); - return -ETIMEDOUT; + ath10k_warn(ar, "service ready completion received, continuing normally\n"); + } + + return 0; } int ath10k_wmi_wait_for_unified_ready(struct ath10k *ar) From 7ceba45a6658ce637da334cd0ebf27f4ede6c0fe Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:37 +0200 Subject: [PATCH 02/63] wifi: cfg80211: add an hrtimer based delayed work item The normal timer mechanism assume that timeout further in the future need a lower accuracy. As an example, the granularity for a timer scheduled 4096 ms in the future on a 1000 Hz system is already 512 ms. This granularity is perfectly sufficient for e.g. timeouts, but there are other types of events that will happen at a future point in time and require a higher accuracy. Add a new wiphy_hrtimer_work type that uses an hrtimer internally. The API is almost identical to the existing wiphy_delayed_work and it can be used as a drop-in replacement after minor adjustments. The work will be scheduled relative to the current time with a slack of 1 millisecond. CC: stable@vger.kernel.org # 6.4+ Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.7f13a2adc5eb.I01b5af0363869864b0580d9c2a1770bafab69566@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 78 ++++++++++++++++++++++++++++++++++++++++++ net/wireless/core.c | 56 ++++++++++++++++++++++++++++++ net/wireless/trace.h | 21 ++++++++++++ 3 files changed, 155 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 781624f5913a..820e299f06b5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6435,6 +6435,11 @@ static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork, * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can * use just cancel_work() instead of cancel_work_sync(), it requires * being in a section protected by wiphy_lock(). + * + * Note that these are scheduled with a timer where the accuracy + * becomes less the longer in the future the scheduled timer is. Use + * wiphy_hrtimer_work_queue() if the timer must be not be late by more + * than approximately 10 percent. */ void wiphy_delayed_work_queue(struct wiphy *wiphy, struct wiphy_delayed_work *dwork, @@ -6506,6 +6511,79 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy, bool wiphy_delayed_work_pending(struct wiphy *wiphy, struct wiphy_delayed_work *dwork); +struct wiphy_hrtimer_work { + struct wiphy_work work; + struct wiphy *wiphy; + struct hrtimer timer; +}; + +enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t); + +static inline void wiphy_hrtimer_work_init(struct wiphy_hrtimer_work *hrwork, + wiphy_work_func_t func) +{ + hrtimer_setup(&hrwork->timer, wiphy_hrtimer_work_timer, + CLOCK_BOOTTIME, HRTIMER_MODE_REL); + wiphy_work_init(&hrwork->work, func); +} + +/** + * wiphy_hrtimer_work_queue - queue hrtimer work for the wiphy + * @wiphy: the wiphy to queue for + * @hrwork: the high resolution timer worker + * @delay: the delay given as a ktime_t + * + * Please refer to wiphy_delayed_work_queue(). The difference is that + * the hrtimer work uses a high resolution timer for scheduling. This + * may be needed if timeouts might be scheduled further in the future + * and the accuracy of the normal timer is not sufficient. + * + * Expect a delay of a few milliseconds as the timer is scheduled + * with some slack and some more time may pass between queueing the + * work and its start. + */ +void wiphy_hrtimer_work_queue(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork, + ktime_t delay); + +/** + * wiphy_hrtimer_work_cancel - cancel previously queued hrtimer work + * @wiphy: the wiphy, for debug purposes + * @hrtimer: the hrtimer work to cancel + * + * Cancel the work *without* waiting for it, this assumes being + * called under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_hrtimer_work_cancel(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrtimer); + +/** + * wiphy_hrtimer_work_flush - flush previously queued hrtimer work + * @wiphy: the wiphy, for debug purposes + * @hrwork: the hrtimer work to flush + * + * Flush the work (i.e. run it if pending). This must be called + * under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_hrtimer_work_flush(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork); + +/** + * wiphy_hrtimer_work_pending - Find out whether a wiphy hrtimer + * work item is currently pending. + * + * @wiphy: the wiphy, for debug purposes + * @hrwork: the hrtimer work in question + * + * Return: true if timer is pending, false otherwise + * + * Please refer to the wiphy_delayed_work_pending() documentation as + * this is the equivalent function for hrtimer based delayed work + * items. + */ +bool wiphy_hrtimer_work_pending(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork); + /** * enum ieee80211_ap_reg_power - regulatory power for an Access Point * diff --git a/net/wireless/core.c b/net/wireless/core.c index 797f9f2004a6..54a34d8d356e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1787,6 +1787,62 @@ bool wiphy_delayed_work_pending(struct wiphy *wiphy, } EXPORT_SYMBOL_GPL(wiphy_delayed_work_pending); +enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t) +{ + struct wiphy_hrtimer_work *hrwork = + container_of(t, struct wiphy_hrtimer_work, timer); + + wiphy_work_queue(hrwork->wiphy, &hrwork->work); + + return HRTIMER_NORESTART; +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_timer); + +void wiphy_hrtimer_work_queue(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork, + ktime_t delay) +{ + trace_wiphy_hrtimer_work_queue(wiphy, &hrwork->work, delay); + + if (!delay) { + hrtimer_cancel(&hrwork->timer); + wiphy_work_queue(wiphy, &hrwork->work); + return; + } + + hrwork->wiphy = wiphy; + hrtimer_start_range_ns(&hrwork->timer, delay, + 1000 * NSEC_PER_USEC, HRTIMER_MODE_REL); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_queue); + +void wiphy_hrtimer_work_cancel(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + lockdep_assert_held(&wiphy->mtx); + + hrtimer_cancel(&hrwork->timer); + wiphy_work_cancel(wiphy, &hrwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_cancel); + +void wiphy_hrtimer_work_flush(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + lockdep_assert_held(&wiphy->mtx); + + hrtimer_cancel(&hrwork->timer); + wiphy_work_flush(wiphy, &hrwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_flush); + +bool wiphy_hrtimer_work_pending(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + return hrtimer_is_queued(&hrwork->timer); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_pending); + static int __init cfg80211_init(void) { int err; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 8a4c34112eb5..2b71f1d867a0 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -304,6 +304,27 @@ TRACE_EVENT(wiphy_delayed_work_queue, __entry->delay) ); +TRACE_EVENT(wiphy_hrtimer_work_queue, + TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work, + ktime_t delay), + TP_ARGS(wiphy, work, delay), + TP_STRUCT__entry( + WIPHY_ENTRY + __field(void *, instance) + __field(void *, func) + __field(ktime_t, delay) + ), + TP_fast_assign( + WIPHY_ASSIGN; + __entry->instance = work; + __entry->func = work->func; + __entry->delay = delay; + ), + TP_printk(WIPHY_PR_FMT " instance=%p func=%pS delay=%llu", + WIPHY_PR_ARG, __entry->instance, __entry->func, + __entry->delay) +); + TRACE_EVENT(wiphy_work_worker_start, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy), From dfa865d490b1bd252045463588a91a4d3c82f3c8 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:38 +0200 Subject: [PATCH 03/63] wifi: mac80211: use wiphy_hrtimer_work for ttlm_work The work item may be scheduled relatively far in the future. As the event happens at a specific point in time, the normal timer accuracy is not sufficient in that case. Switch to use wiphy_hrtimer_work so that the accuracy is sufficient. CC: stable@vger.kernel.org Fixes: 702e80470a33 ("wifi: mac80211: support handling of advertised TID-to-link mapping") Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.83c2c611545e.I35498a6d883ea24b0dc4910cf521aa768d2a0e90@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 73fd86ec1bce..eb22279c6e01 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -616,7 +616,7 @@ struct ieee80211_if_managed { u16 removed_links; /* TID-to-link mapping support */ - struct wiphy_delayed_work ttlm_work; + struct wiphy_hrtimer_work ttlm_work; struct ieee80211_adv_ttlm_info ttlm_info; struct wiphy_work teardown_ttlm_work; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3b5827ea438e..623a46b3214e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -45,7 +45,7 @@ #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_ASSOC_MAX_TRIES 3 -#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS msecs_to_jiffies(100) +#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS (100 * USEC_PER_MSEC) #define IEEE80211_ADV_TTLM_ST_UNDERFLOW 0xff00 #define IEEE80211_NEG_TTLM_REQ_TIMEOUT (HZ / 5) @@ -4242,7 +4242,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(&sdata->u.mgd.ttlm_info, 0, sizeof(sdata->u.mgd.ttlm_info)); - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm)); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, @@ -7095,7 +7095,7 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, /* if a planned TID-to-link mapping was cancelled - * abort it */ - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work); } else if (sdata->u.mgd.ttlm_info.active) { /* if no TID-to-link element, set to default mapping in @@ -7130,7 +7130,7 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, if (ttlm_info.switch_time) { u16 beacon_ts_tu, st_tu, delay; - u32 delay_jiffies; + u64 delay_usec; u64 mask; /* The t2l map switch time is indicated with a partial @@ -7152,23 +7152,23 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, if (delay > IEEE80211_ADV_TTLM_ST_UNDERFLOW) return; - delay_jiffies = TU_TO_JIFFIES(delay); + delay_usec = ieee80211_tu_to_usec(delay); /* Link switching can take time, so schedule it * 100ms before to be ready on time */ - if (delay_jiffies > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS) - delay_jiffies -= + if (delay_usec > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS) + delay_usec -= IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS; else - delay_jiffies = 0; + delay_usec = 0; sdata->u.mgd.ttlm_info = ttlm_info; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work); - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work, - delay_jiffies); + us_to_ktime(delay_usec)); return; } } @@ -8802,7 +8802,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0); wiphy_delayed_work_init(&ifmgd->tx_tspec_wk, ieee80211_sta_handle_tspec_ac_params_wk); - wiphy_delayed_work_init(&ifmgd->ttlm_work, + wiphy_hrtimer_work_init(&ifmgd->ttlm_work, ieee80211_tid_to_link_map_work); wiphy_delayed_work_init(&ifmgd->neg_ttlm_timeout_work, ieee80211_neg_ttlm_timeout_work); From 3f654d53dff565095d83a84e3b6187526dadf4c8 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:39 +0200 Subject: [PATCH 04/63] wifi: mac80211: use wiphy_hrtimer_work for ml_reconf_work The work item may be scheduled relatively far in the future. As the event happens at a specific point in time, the normal timer accuracy is not sufficient in that case. Switch to use wiphy_hrtimer_work so that the accuracy is sufficient. CC: stable@vger.kernel.org Fixes: 8eb8dd2ffbbb ("wifi: mac80211: Support link removal using Reconfiguration ML element") Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.24a7b54e9e37.I063c5c15bf7672f94cea75f83e486a3ca52d098f@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eb22279c6e01..eb38049b2252 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -612,7 +612,7 @@ struct ieee80211_if_managed { u8 *assoc_req_ies; size_t assoc_req_ies_len; - struct wiphy_delayed_work ml_reconf_work; + struct wiphy_hrtimer_work ml_reconf_work; u16 removed_links; /* TID-to-link mapping support */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 623a46b3214e..f95bcf84ecc2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4249,7 +4249,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, &ifmgd->neg_ttlm_timeout_work); sdata->u.mgd.removed_links = 0; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work); wiphy_work_cancel(sdata->local->hw.wiphy, @@ -6876,7 +6876,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, /* In case the removal was cancelled, abort it */ if (sdata->u.mgd.removed_links) { sdata->u.mgd.removed_links = 0; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work); } return; @@ -6906,9 +6906,9 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, } sdata->u.mgd.removed_links = removed_links; - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work, - TU_TO_JIFFIES(delay)); + us_to_ktime(ieee80211_tu_to_usec(delay))); } static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata, @@ -8793,7 +8793,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_csa_connection_drop_work); wiphy_delayed_work_init(&ifmgd->tdls_peer_del_work, ieee80211_tdls_peer_del_work); - wiphy_delayed_work_init(&ifmgd->ml_reconf_work, + wiphy_hrtimer_work_init(&ifmgd->ml_reconf_work, ieee80211_ml_reconf_work); wiphy_delayed_work_init(&ifmgd->reconf.wk, ieee80211_ml_sta_reconf_timeout); From fbc1cc6973099f45e4c30b86f12b4435c7cb7d24 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:40 +0200 Subject: [PATCH 05/63] wifi: mac80211: use wiphy_hrtimer_work for csa.switch_work The work item may be scheduled relatively far in the future. As the event happens at a specific point in time, the normal timer accuracy is not sufficient in that case. Switch to use wiphy_hrtimer_work so that the accuracy is sufficient. To make this work, use the same clock to store the timestamp. CC: stable@vger.kernel.org Fixes: ec3252bff7b6 ("wifi: mac80211: use wiphy work for channel switch") Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.68258c7e4ac4.I4ff2b2cdffbbf858bf5f08baccc7a88c4f9efe6f@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 2 +- net/mac80211/ieee80211_i.h | 4 ++-- net/mac80211/link.c | 4 ++-- net/mac80211/mlme.c | 18 +++++++++--------- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 57065714cf8c..7f8799fd673e 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1290,7 +1290,7 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link) &link->csa.finalize_work); break; case NL80211_IFTYPE_STATION: - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work, 0); break; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eb38049b2252..878c3b14aeb8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1017,10 +1017,10 @@ struct ieee80211_link_data_managed { bool operating_11g_mode; struct { - struct wiphy_delayed_work switch_work; + struct wiphy_hrtimer_work switch_work; struct cfg80211_chan_def ap_chandef; struct ieee80211_parsed_tpe tpe; - unsigned long time; + ktime_t time; bool waiting_bcn; bool ignored_same_chan; bool blocked_tx; diff --git a/net/mac80211/link.c b/net/mac80211/link.c index d71eabe5abf8..4a19b765ccb6 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -472,10 +472,10 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, * from there. */ if (link->conf->csa_active) - wiphy_delayed_work_queue(local->hw.wiphy, + wiphy_hrtimer_work_queue(local->hw.wiphy, &link->u.mgd.csa.switch_work, link->u.mgd.csa.time - - jiffies); + ktime_get_boottime()); } for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f95bcf84ecc2..f3138d158535 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2594,7 +2594,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, return; } - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work, 0); } @@ -2753,7 +2753,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, .timestamp = timestamp, .device_timestamp = device_timestamp, }; - unsigned long now; + u32 csa_time_tu; + ktime_t now; int res; lockdep_assert_wiphy(local->hw.wiphy); @@ -2983,10 +2984,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, csa_ie.mode); /* we may have to handle timeout for deactivated link in software */ - now = jiffies; - link->u.mgd.csa.time = now + - TU_TO_JIFFIES((max_t(int, csa_ie.count, 1) - 1) * - link->conf->beacon_int); + now = ktime_get_boottime(); + csa_time_tu = (max_t(int, csa_ie.count, 1) - 1) * link->conf->beacon_int; + link->u.mgd.csa.time = now + us_to_ktime(ieee80211_tu_to_usec(csa_time_tu)); if (ieee80211_vif_link_active(&sdata->vif, link->link_id) && local->ops->channel_switch) { @@ -3001,7 +3001,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } /* channel switch handled in software */ - wiphy_delayed_work_queue(local->hw.wiphy, + wiphy_hrtimer_work_queue(local->hw.wiphy, &link->u.mgd.csa.switch_work, link->u.mgd.csa.time - now); return; @@ -8849,7 +8849,7 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) else link->u.mgd.req_smps = IEEE80211_SMPS_OFF; - wiphy_delayed_work_init(&link->u.mgd.csa.switch_work, + wiphy_hrtimer_work_init(&link->u.mgd.csa.switch_work, ieee80211_csa_switch_work); ieee80211_clear_tpe(&link->conf->tpe); @@ -10064,7 +10064,7 @@ void ieee80211_mgd_stop_link(struct ieee80211_link_data *link) &link->u.mgd.request_smps_work); wiphy_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.recalc_smps); - wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work); } From 9222582ec524707fbb9d076febead5b6a07611ed Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Tue, 28 Oct 2025 14:07:44 +0800 Subject: [PATCH 06/63] Revert "wifi: ath12k: Fix missing station power save configuration" This reverts commit 4b66d18918f8e4d85e51974a9e3ce9abad5c7c3d. In [1], Ross Brown reports poor performance of WCN7850 after enabling power save. Temporarily revert the fix; it will be re-enabled once the issue is resolved. Tested-on: WCN7850 hw2.0 PCI WLAN.IOE_HMT.1.1-00011-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1 Fixes: 4b66d18918f8 ("wifi: ath12k: Fix missing station power save configuration") Reported-by: Ross Brown Closes: https://lore.kernel.org/all/CAMn66qZENLhDOcVJuwUZ3ir89PVtVnQRq9DkV5xjJn1p6BKB9w@mail.gmail.com/ # [1] Signed-off-by: Miaoqing Pan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251028060744.897198-1-miaoqing.pan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 122 ++++++++++++-------------- 1 file changed, 55 insertions(+), 67 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index eacab798630a..db351c922018 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4064,68 +4064,12 @@ static int ath12k_mac_fils_discovery(struct ath12k_link_vif *arvif, return ret; } -static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif) -{ - struct ath12k *ar = arvif->ar; - struct ieee80211_vif *vif = arvif->ahvif->vif; - struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf; - enum wmi_sta_powersave_param param; - struct ieee80211_bss_conf *info; - enum wmi_sta_ps_mode psmode; - int ret; - int timeout; - bool enable_ps; - - lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - - if (vif->type != NL80211_IFTYPE_STATION) - return; - - enable_ps = arvif->ahvif->ps; - if (enable_ps) { - psmode = WMI_STA_PS_MODE_ENABLED; - param = WMI_STA_PS_PARAM_INACTIVITY_TIME; - - timeout = conf->dynamic_ps_timeout; - if (timeout == 0) { - info = ath12k_mac_get_link_bss_conf(arvif); - if (!info) { - ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n", - vif->addr, arvif->link_id); - return; - } - - /* firmware doesn't like 0 */ - timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000; - } - - ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, - timeout); - if (ret) { - ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n", - arvif->vdev_id, ret); - return; - } - } else { - psmode = WMI_STA_PS_MODE_DISABLED; - } - - ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n", - arvif->vdev_id, psmode ? "enable" : "disable"); - - ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode); - if (ret) - ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n", - psmode, arvif->vdev_id, ret); -} - static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u64 changed) { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); unsigned long links = ahvif->links_map; - struct ieee80211_vif_cfg *vif_cfg; struct ieee80211_bss_conf *info; struct ath12k_link_vif *arvif; struct ieee80211_sta *sta; @@ -4189,24 +4133,61 @@ static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, } } } +} - if (changed & BSS_CHANGED_PS) { - links = ahvif->links_map; - vif_cfg = &vif->cfg; +static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif) +{ + struct ath12k *ar = arvif->ar; + struct ieee80211_vif *vif = arvif->ahvif->vif; + struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf; + enum wmi_sta_powersave_param param; + struct ieee80211_bss_conf *info; + enum wmi_sta_ps_mode psmode; + int ret; + int timeout; + bool enable_ps; - for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { - arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); - if (!arvif || !arvif->ar) - continue; + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - ar = arvif->ar; + if (vif->type != NL80211_IFTYPE_STATION) + return; - if (ar->ab->hw_params->supports_sta_ps) { - ahvif->ps = vif_cfg->ps; - ath12k_mac_vif_setup_ps(arvif); + enable_ps = arvif->ahvif->ps; + if (enable_ps) { + psmode = WMI_STA_PS_MODE_ENABLED; + param = WMI_STA_PS_PARAM_INACTIVITY_TIME; + + timeout = conf->dynamic_ps_timeout; + if (timeout == 0) { + info = ath12k_mac_get_link_bss_conf(arvif); + if (!info) { + ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n", + vif->addr, arvif->link_id); + return; } + + /* firmware doesn't like 0 */ + timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000; } + + ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, + timeout); + if (ret) { + ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n", + arvif->vdev_id, ret); + return; + } + } else { + psmode = WMI_STA_PS_MODE_DISABLED; } + + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n", + arvif->vdev_id, psmode ? "enable" : "disable"); + + ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode); + if (ret) + ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n", + psmode, arvif->vdev_id, ret); } static bool ath12k_mac_supports_tpc(struct ath12k *ar, struct ath12k_vif *ahvif, @@ -4228,6 +4209,7 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, { struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); + struct ieee80211_vif_cfg *vif_cfg = &vif->cfg; struct cfg80211_chan_def def; u32 param_id, param_value; enum nl80211_band band; @@ -4514,6 +4496,12 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, } ath12k_mac_fils_discovery(arvif, info); + + if (changed & BSS_CHANGED_PS && + ar->ab->hw_params->supports_sta_ps) { + ahvif->ps = vif_cfg->ps; + ath12k_mac_vif_setup_ps(arvif); + } } static struct ath12k_vif_cache *ath12k_ahvif_get_link_cache(struct ath12k_vif *ahvif, From 70e8335485966d7d4ed85976dceab52803b151a2 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 28 Oct 2025 23:13:39 +0530 Subject: [PATCH 07/63] wifi: zd1211rw: fix potential memory leak in __zd_usb_enable_rx() The memory allocated for urbs with kcalloc() is not freed on any error path. Fix that by freeing it in the error path. Fixes: e85d0918b54f ("[PATCH] ZyDAS ZD1211 USB-WLAN driver") Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20251028174341.139134-1-nihaal@cse.iitm.ac.in Signed-off-by: Johannes Berg --- drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index 2faa0de2a36e..8ee15a15f4ca 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -791,6 +791,7 @@ static int __zd_usb_enable_rx(struct zd_usb *usb) if (urbs) { for (i = 0; i < RX_URBS_COUNT; i++) free_rx_urb(urbs[i]); + kfree(urbs); } return r; } From 5c5f1f64681cc889d9b13e4a61285e9e029d6ab5 Mon Sep 17 00:00:00 2001 From: Raphael Pinsonneault-Thibeault Date: Fri, 24 Oct 2025 12:29:10 -0400 Subject: [PATCH 08/63] Bluetooth: hci_event: validate skb length for unknown CC opcode In hci_cmd_complete_evt(), if the command complete event has an unknown opcode, we assume the first byte of the remaining skb->data contains the return status. However, parameter data has previously been pulled in hci_event_func(), which may leave the skb empty. If so, using skb->data[0] for the return status uses un-init memory. The fix is to check skb->len before using skb->data. Reported-by: syzbot+a9a4bedfca6aa9d7fa24@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a9a4bedfca6aa9d7fa24 Tested-by: syzbot+a9a4bedfca6aa9d7fa24@syzkaller.appspotmail.com Fixes: afcb3369f46ed ("Bluetooth: hci_event: Fix vendor (unknown) opcode status handling") Signed-off-by: Raphael Pinsonneault-Thibeault Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d37db364acf7..f20c826509b6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4218,6 +4218,13 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data, } if (i == ARRAY_SIZE(hci_cc_table)) { + if (!skb->len) { + bt_dev_err(hdev, "Unexpected cc 0x%4.4x with no status", + *opcode); + *status = HCI_ERROR_UNSPECIFIED; + return; + } + /* Unknown opcode, assume byte 0 contains the status, so * that e.g. __hci_cmd_sync() properly returns errors * for vendor specific commands send by HCI drivers. From 1c21cf89a66413eb04b2d22c955b7a50edc14dfa Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 28 Oct 2025 23:26:30 +0530 Subject: [PATCH 09/63] Bluetooth: btrtl: Fix memory leak in rtlbt_parse_firmware_v2() The memory allocated for ptr using kvmalloc() is not freed on the last error path. Fix that by freeing it on that error path. Fixes: 9a24ce5e29b1 ("Bluetooth: btrtl: Firmware format v2 support") Signed-off-by: Abdun Nihaal Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btrtl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 6abd962502e3..1d4a7887abcc 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -625,8 +625,10 @@ static int rtlbt_parse_firmware_v2(struct hci_dev *hdev, len += entry->len; } - if (!len) + if (!len) { + kvfree(ptr); return -EPERM; + } *_buf = ptr; return len; From 8d59fba49362c65332395789fd82771f1028d87e Mon Sep 17 00:00:00 2001 From: Ilia Gavrilov Date: Mon, 20 Oct 2025 15:12:55 +0000 Subject: [PATCH 10/63] Bluetooth: MGMT: Fix OOB access in parse_adv_monitor_pattern() In the parse_adv_monitor_pattern() function, the value of the 'length' variable is currently limited to HCI_MAX_EXT_AD_LENGTH(251). The size of the 'value' array in the mgmt_adv_pattern structure is 31. If the value of 'pattern[i].length' is set in the user space and exceeds 31, the 'patterns[i].value' array can be accessed out of bound when copied. Increasing the size of the 'value' array in the 'mgmt_adv_pattern' structure will break the userspace. Considering this, and to avoid OOB access revert the limits for 'offset' and 'length' back to the value of HCI_MAX_AD_LENGTH. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: db08722fc7d4 ("Bluetooth: hci_core: Fix missing instances using HCI_MAX_AD_LENGTH") Cc: stable@vger.kernel.org Signed-off-by: Ilia Gavrilov Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/mgmt.h | 2 +- net/bluetooth/mgmt.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index bca0333f1e99..f5be96f08b9d 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -780,7 +780,7 @@ struct mgmt_adv_pattern { __u8 ad_type; __u8 offset; __u8 length; - __u8 value[31]; + __u8 value[HCI_MAX_AD_LENGTH]; } __packed; #define MGMT_OP_ADD_ADV_PATTERNS_MONITOR 0x0052 diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 24e335e3a727..79762bfaea5f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5395,9 +5395,9 @@ static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count, for (i = 0; i < pattern_count; i++) { offset = patterns[i].offset; length = patterns[i].length; - if (offset >= HCI_MAX_EXT_AD_LENGTH || - length > HCI_MAX_EXT_AD_LENGTH || - (offset + length) > HCI_MAX_EXT_AD_LENGTH) + if (offset >= HCI_MAX_AD_LENGTH || + length > HCI_MAX_AD_LENGTH || + (offset + length) > HCI_MAX_AD_LENGTH) return MGMT_STATUS_INVALID_PARAMS; p = kmalloc(sizeof(*p), GFP_KERNEL); From 6ab753b5d8e521616cd9bd10b09891cbeb7e0235 Mon Sep 17 00:00:00 2001 From: Tim Hostetler Date: Wed, 29 Oct 2025 11:45:39 -0700 Subject: [PATCH 11/63] gve: Implement gettimex64 with -EOPNOTSUPP gve implemented a ptp_clock for sole use of do_aux_work at this time. ptp_clock_gettime() and ptp_sys_offset() assume every ptp_clock has implemented either gettimex64 or gettime64. Stub gettimex64 and return -EOPNOTSUPP to prevent NULL dereferencing. Fixes: acd16380523b ("gve: Add initial PTP device support") Reported-by: syzbot+c8c0e7ccabd456541612@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c8c0e7ccabd456541612 Signed-off-by: Tim Hostetler Reviewed-by: Harshitha Ramamurthy Reviewed-by: Kuniyuki Iwashima Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20251029184555.3852952-2-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ptp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c index e96247c9d68d..19ae699d4b18 100644 --- a/drivers/net/ethernet/google/gve/gve_ptp.c +++ b/drivers/net/ethernet/google/gve/gve_ptp.c @@ -26,6 +26,13 @@ int gve_clock_nic_ts_read(struct gve_priv *priv) return 0; } +static int gve_ptp_gettimex64(struct ptp_clock_info *info, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + return -EOPNOTSUPP; +} + static long gve_ptp_do_aux_work(struct ptp_clock_info *info) { const struct gve_ptp *ptp = container_of(info, struct gve_ptp, info); @@ -47,6 +54,7 @@ static long gve_ptp_do_aux_work(struct ptp_clock_info *info) static const struct ptp_clock_info gve_ptp_caps = { .owner = THIS_MODULE, .name = "gve clock", + .gettimex64 = gve_ptp_gettimex64, .do_aux_work = gve_ptp_do_aux_work, }; From 329d050bbe63c2999f657cf2d3855be11a473745 Mon Sep 17 00:00:00 2001 From: Tim Hostetler Date: Wed, 29 Oct 2025 11:45:40 -0700 Subject: [PATCH 12/63] gve: Implement settime64 with -EOPNOTSUPP ptp_clock_settime() assumes every ptp_clock has implemented settime64(). Stub it with -EOPNOTSUPP to prevent a NULL dereference. Fixes: acd16380523b ("gve: Add initial PTP device support") Reported-by: syzbot+a546141ca6d53b90aba3@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a546141ca6d53b90aba3 Signed-off-by: Tim Hostetler Reviewed-by: Kuniyuki Iwashima Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20251029184555.3852952-3-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ptp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c index 19ae699d4b18..a384a9ed4914 100644 --- a/drivers/net/ethernet/google/gve/gve_ptp.c +++ b/drivers/net/ethernet/google/gve/gve_ptp.c @@ -33,6 +33,12 @@ static int gve_ptp_gettimex64(struct ptp_clock_info *info, return -EOPNOTSUPP; } +static int gve_ptp_settime64(struct ptp_clock_info *info, + const struct timespec64 *ts) +{ + return -EOPNOTSUPP; +} + static long gve_ptp_do_aux_work(struct ptp_clock_info *info) { const struct gve_ptp *ptp = container_of(info, struct gve_ptp, info); @@ -55,6 +61,7 @@ static const struct ptp_clock_info gve_ptp_caps = { .owner = THIS_MODULE, .name = "gve clock", .gettimex64 = gve_ptp_gettimex64, + .settime64 = gve_ptp_settime64, .do_aux_work = gve_ptp_do_aux_work, }; From 5a89b27afd3d010680f9355f7ff5b048cfe89333 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 29 Oct 2025 10:38:13 +0200 Subject: [PATCH 13/63] ptp: Allow exposing cycles only for clocks with free-running counter The PTP core falls back to gettimex64 and getcrosststamp when getcycles64 or getcyclesx64 are not implemented. This causes the CYCLES ioctls to retrieve PHC real time instead of free-running cycles. Reject PTP_SYS_OFFSET_{PRECISE,EXTENDED}_CYCLES for clocks without free-running counter support since the result would represent PHC real time and system time rather than cycles and system time. Fixes: faf23f54d366 ("ptp: Add ioctl commands to expose raw cycle counter values") Signed-off-by: Carolina Jubran Reviewed-by: Dragos Tatulea Reviewed-by: Tariq Toukan Reviewed-by: Pavan Chebbi Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251029083813.2276997-1-cjubran@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_chardev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 8106eb617c8c..c61cf9edac48 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -561,10 +561,14 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, return ptp_mask_en_single(pccontext->private_clkdata, argptr); case PTP_SYS_OFFSET_PRECISE_CYCLES: + if (!ptp->has_cycles) + return -EOPNOTSUPP; return ptp_sys_offset_precise(ptp, argptr, ptp->info->getcrosscycles); case PTP_SYS_OFFSET_EXTENDED_CYCLES: + if (!ptp->has_cycles) + return -EOPNOTSUPP; return ptp_sys_offset_extended(ptp, argptr, ptp->info->getcyclesx64); default: From 3d18a84eddde169d6dbf3c72cc5358b988c347d0 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 27 Oct 2025 20:46:21 +0100 Subject: [PATCH 14/63] net: dsa: tag_brcm: legacy: fix untagged rx on unbridged ports for bcm63xx The internal switch on BCM63XX SoCs will unconditionally add 802.1Q VLAN tags on egress to CPU when 802.1Q mode is enabled. We do this unconditionally since commit ed409f3bbaa5 ("net: dsa: b53: Configure VLANs while not filtering"). This is fine for VLAN aware bridges, but for standalone ports and vlan unaware bridges this means all packets are tagged with the default VID, which is 0. While the kernel will treat that like untagged, this can break userspace applications processing raw packets, expecting untagged traffic, like STP daemons. This also breaks several bridge tests, where the tcpdump output then does not match the expected output anymore. Since 0 isn't a valid VID, just strip out the VLAN tag if we encounter it, unless the priority field is set, since that would be a valid tag again. Fixes: 964dbf186eaa ("net: dsa: tag_brcm: add support for legacy tags") Signed-off-by: Jonas Gorski Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20251027194621.133301-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_brcm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 26bb657ceac3..d9c77fa553b5 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -224,12 +224,14 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, { int len = BRCM_LEG_TAG_LEN; int source_port; + __be16 *proto; u8 *brcm_tag; if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN))) return NULL; brcm_tag = dsa_etype_header_pos_rx(skb); + proto = (__be16 *)(brcm_tag + BRCM_LEG_TAG_LEN); source_port = brcm_tag[5] & BRCM_LEG_PORT_ID; @@ -237,8 +239,12 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; - /* VLAN tag is added by BCM63xx internal switch */ - if (netdev_uses_dsa(skb->dev)) + /* The internal switch in BCM63XX SoCs always tags on egress on the CPU + * port. We use VID 0 internally for untagged traffic, so strip the tag + * if the TCI field is all 0, and keep it otherwise to also retain + * e.g. 802.1p tagged packets. + */ + if (proto[0] == htons(ETH_P_8021Q) && proto[1] == 0) len += VLAN_HLEN; /* Remove Broadcom tag and update checksum */ From 02d064de05b1fcca769391fa82d205bed8bb9bf0 Mon Sep 17 00:00:00 2001 From: Anubhav Singh Date: Thu, 30 Oct 2025 06:28:18 +0000 Subject: [PATCH 15/63] selftests/net: fix out-of-order delivery of FIN in gro:tcp test Due to the gro_sender sending data packets and FIN packets in very quick succession, these are received almost simultaneously by the gro_receiver. FIN packets are sometimes processed before the data packets leading to intermittent (~1/100) test failures. This change adds a delay of 100ms before sending FIN packets in gro:tcp test to avoid the out-of-order delivery. The same mitigation already exists for the gro:ip test. Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Reviewed-by: Willem de Bruijn Signed-off-by: Anubhav Singh Link: https://patch.msgid.link/20251030062818.1562228-1-anubhavsinggh@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/gro.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 2b1d9f2b3e9e..3fa63bd85dea 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -989,6 +989,7 @@ static void check_recv_pkts(int fd, int *correct_payload, static void gro_sender(void) { + const int fin_delay_us = 100 * 1000; static char fin_pkt[MAX_HDR_LEN]; struct sockaddr_ll daddr = {}; int txfd = -1; @@ -1032,15 +1033,22 @@ static void gro_sender(void) write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "tcp") == 0) { send_changed_checksum(txfd, &daddr); + /* Adding sleep before sending FIN so that it is not + * received prior to other packets. + */ + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_changed_seq(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_changed_ts(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_diff_opt(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "ip") == 0) { send_changed_ECN(txfd, &daddr); From f8e8486702abb05b8c734093aab1606af0eac068 Mon Sep 17 00:00:00 2001 From: Anubhav Singh Date: Thu, 30 Oct 2025 06:04:36 +0000 Subject: [PATCH 16/63] selftests/net: use destination options instead of hop-by-hop The GRO self-test, gro.c, currently constructs IPv6 packets containing a Hop-by-Hop Options header (IPPROTO_HOPOPTS) to ensure the GRO path correctly handles IPv6 extension headers. However, network elements may be configured to drop packets with the Hop-by-Hop Options header (HBH). This causes the self-test to fail in environments where such network elements are present. To improve the robustness and reliability of this test in diverse network environments, switch from using IPPROTO_HOPOPTS to IPPROTO_DSTOPTS (Destination Options). The Destination Options header is less likely to be dropped by intermediate routers and still serves the core purpose of the test: validating GRO's handling of an IPv6 extension header. This change ensures the test can execute successfully without being incorrectly failed by network policies outside the kernel's control. Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Reviewed-by: Willem de Bruijn Signed-off-by: Anubhav Singh Link: https://patch.msgid.link/20251030060436.1556664-1-anubhavsinggh@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/gro.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 3fa63bd85dea..cfc39f70635d 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -754,11 +754,11 @@ static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE]; create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1); write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2); write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); } From 3f978e3f1570155a1327ffa25f60968bc7b9398f Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Thu, 30 Oct 2025 09:55:22 +0530 Subject: [PATCH 17/63] isdn: mISDN: hfcsusb: fix memory leak in hfcsusb_probe() In hfcsusb_probe(), the memory allocated for ctrl_urb gets leaked when setup_instance() fails with an error code. Fix that by freeing the urb before freeing the hw structure. Also change the error paths to use the goto ladder style. Compile tested only. Issue found using a prototype static analysis tool. Fixes: 69f52adb2d53 ("mISDN: Add HFC USB driver") Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20251030042524.194812-1-nihaal@cse.iitm.ac.in Signed-off-by: Jakub Kicinski --- drivers/isdn/hardware/mISDN/hfcsusb.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index e54419a4e731..541a20cb58f1 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -1904,13 +1904,13 @@ setup_instance(struct hfcsusb *hw, struct device *parent) mISDN_freebchannel(&hw->bch[1]); mISDN_freebchannel(&hw->bch[0]); mISDN_freedchannel(&hw->dch); - kfree(hw); return err; } static int hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id) { + int err; struct hfcsusb *hw; struct usb_device *dev = interface_to_usbdev(intf); struct usb_host_interface *iface = intf->cur_altsetting; @@ -2101,20 +2101,28 @@ hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id) if (!hw->ctrl_urb) { pr_warn("%s: No memory for control urb\n", driver_info->vend_name); - kfree(hw); - return -ENOMEM; + err = -ENOMEM; + goto err_free_hw; } pr_info("%s: %s: detected \"%s\" (%s, if=%d alt=%d)\n", hw->name, __func__, driver_info->vend_name, conf_str[small_match], ifnum, alt_used); - if (setup_instance(hw, dev->dev.parent)) - return -EIO; + if (setup_instance(hw, dev->dev.parent)) { + err = -EIO; + goto err_free_urb; + } hw->intf = intf; usb_set_intfdata(hw->intf, hw); return 0; + +err_free_urb: + usb_free_urb(hw->ctrl_urb); +err_free_hw: + kfree(hw); + return err; } /* function called when an active device is removed */ From d01f8136d46b925798abcf86b35a4021e4cfb8bb Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Thu, 30 Oct 2025 12:03:40 +0800 Subject: [PATCH 18/63] selftests: netdevsim: Fix ethtool-coalesce.sh fail by installing ethtool-common.sh The script "ethtool-common.sh" is not installed in INSTALL_PATH, and triggers some errors when I try to run the test 'drivers/net/netdevsim/ethtool-coalesce.sh': TAP version 13 1..1 # timeout set to 600 # selftests: drivers/net/netdevsim: ethtool-coalesce.sh # ./ethtool-coalesce.sh: line 4: ethtool-common.sh: No such file or directory # ./ethtool-coalesce.sh: line 25: make_netdev: command not found # ethtool: bad command line argument(s) # ./ethtool-coalesce.sh: line 124: check: command not found # ./ethtool-coalesce.sh: line 126: [: -eq: unary operator expected # FAILED /0 checks not ok 1 selftests: drivers/net/netdevsim: ethtool-coalesce.sh # exit=1 Install this file to avoid this error. After this patch: TAP version 13 1..1 # timeout set to 600 # selftests: drivers/net/netdevsim: ethtool-coalesce.sh # PASSED all 22 checks ok 1 selftests: drivers/net/netdevsim: ethtool-coalesce.sh Fixes: fbb8531e58bd ("selftests: extract common functions in ethtool-common.sh") Signed-off-by: Wang Liang Link: https://patch.msgid.link/20251030040340.3258110-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/netdevsim/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index daf51113c827..df10c7243511 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -20,4 +20,8 @@ TEST_PROGS := \ udp_tunnel_nic.sh \ # end of TEST_PROGS +TEST_FILES := \ + ethtool-common.sh +# end of TEST_FILES + include ../../../lib.mk From c211f5d7cbd5cb34489d526648bb9c8ecc907dee Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 30 Oct 2025 07:35:39 +0000 Subject: [PATCH 19/63] net: vlan: sync VLAN features with lower device After registering a VLAN device and setting its feature flags, we need to synchronize the VLAN features with the lower device. For example, the VLAN device does not have the NETIF_F_LRO flag, it should be synchronized with the lower device based on the NETIF_F_UPPER_DISABLES definition. As the dev->vlan_features has changed, we need to call netdev_update_features(). The caller must run after netdev_upper_dev_link() links the lower devices, so this patch adds the netdev_update_features() call in register_vlan_dev(). Fixes: fd867d51f889 ("net/core: generic support for disabling netdev features down stack") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20251030073539.133779-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- net/8021q/vlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index fda3a80e9340..2b74ed56eb16 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -193,6 +193,8 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev); grp->nr_vlan_devs++; + netdev_update_features(dev); + return 0; out_unregister_netdev: From d7d2fcf7ae31471b4e08b7e448b8fd0ec2e06a1b Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Wed, 29 Oct 2025 13:50:24 -0700 Subject: [PATCH 20/63] netconsole: Acquire su_mutex before navigating configs hierarchy There is a race between operations that iterate over the userdata cg_children list and concurrent add/remove of userdata items through configfs. The update_userdata() function iterates over the nt->userdata_group.cg_children list, and count_extradata_entries() also iterates over this same list to count nodes. Quoting from Documentation/filesystems/configfs.rst: > A subsystem can navigate the cg_children list and the ci_parent pointer > to see the tree created by the subsystem. This can race with configfs' > management of the hierarchy, so configfs uses the subsystem mutex to > protect modifications. Whenever a subsystem wants to navigate the > hierarchy, it must do so under the protection of the subsystem > mutex. Without proper locking, if a userdata item is added or removed concurrently while these functions are iterating, the list can be accessed in an inconsistent state. For example, the list_for_each() loop can reach a node that is being removed from the list by list_del_init() which sets the nodes' .next pointer to point to itself, so the loop will never end (or reach the WARN_ON_ONCE in update_userdata() ). Fix this by holding the configfs subsystem mutex (su_mutex) during all operations that iterate over cg_children. This includes: - userdatum_value_store() which calls update_userdata() to iterate over cg_children - All sysdata_*_enabled_store() functions which call count_extradata_entries() to iterate over cg_children The su_mutex must be acquired before dynamic_netconsole_mutex to avoid potential lock ordering issues, as configfs operations may already hold su_mutex when calling into our code. Fixes: df03f830d099 ("net: netconsole: cache userdata formatted string in netconsole_target") Signed-off-by: Gustavo Luiz Duarte Link: https://patch.msgid.link/20251029-netconsole-fix-warn-v1-1-0d0dd4622f48@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 5d8d0214786c..bb6e03a92956 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -936,6 +936,7 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, if (count > MAX_EXTRADATA_VALUE_LEN) return -EMSGSIZE; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); ret = strscpy(udm->value, buf, sizeof(udm->value)); @@ -949,6 +950,7 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, ret = count; out_unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -974,6 +976,7 @@ static ssize_t sysdata_msgid_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_MSGID); if (msgid_enabled == curr) @@ -994,6 +997,7 @@ static ssize_t sysdata_msgid_enabled_store(struct config_item *item, ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -1008,6 +1012,7 @@ static ssize_t sysdata_release_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_RELEASE); if (release_enabled == curr) @@ -1028,6 +1033,7 @@ static ssize_t sysdata_release_enabled_store(struct config_item *item, ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -1042,6 +1048,7 @@ static ssize_t sysdata_taskname_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_TASKNAME); if (taskname_enabled == curr) @@ -1062,6 +1069,7 @@ static ssize_t sysdata_taskname_enabled_store(struct config_item *item, ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -1077,6 +1085,7 @@ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_CPU_NR); if (cpu_nr_enabled == curr) @@ -1105,6 +1114,7 @@ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } From b2b526c2cf57d14ee269e012ed179081871f45a1 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 31 Oct 2025 09:15:53 -0700 Subject: [PATCH 21/63] net: mdio: Check regmap pointer returned by device_node_to_regmap() The call to device_node_to_regmap() in airoha_mdio_probe() can return an ERR_PTR() if regmap initialization fails. Currently, the driver stores the pointer without validation, which could lead to a crash if it is later dereferenced. Add an IS_ERR() check and return the corresponding error code to make the probe path more robust. Fixes: 67e3ba978361 ("net: mdio: Add MDIO bus controller for Airoha AN7583") Signed-off-by: Alok Tiwari Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251031161607.58581-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-airoha.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/mdio/mdio-airoha.c b/drivers/net/mdio/mdio-airoha.c index 1dc9939c8d7d..52e7475121ea 100644 --- a/drivers/net/mdio/mdio-airoha.c +++ b/drivers/net/mdio/mdio-airoha.c @@ -219,6 +219,8 @@ static int airoha_mdio_probe(struct platform_device *pdev) priv = bus->priv; priv->base_addr = addr; priv->regmap = device_node_to_regmap(dev->parent->of_node); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); priv->clk = devm_clk_get_enabled(dev, NULL); if (IS_ERR(priv->clk)) From 7ed8b63ddc9a9578eae81f4da32761568a25efad Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 1 Nov 2025 11:39:54 +0100 Subject: [PATCH 22/63] MAINTAINERS: add brcm tag driver to b53 The b53 entry was missing the brcm tag driver, so add it. Reported-by: Jakub Kicinski Link: https://lore.kernel.org/netdev/20251029181216.3f35f8ba@kernel.org/ Signed-off-by: Jonas Gorski Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251101103954.29816-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0518f1f4f3b5..92e9cd1a363b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4818,6 +4818,7 @@ F: drivers/net/dsa/b53/* F: drivers/net/dsa/bcm_sf2* F: include/linux/dsa/brcm.h F: include/linux/platform_data/b53.h +F: net/dsa/tag_brcm.c BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE M: Florian Fainelli From b6a8a5477fe9bd6be2b594a88f82f8bba41e6d54 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 1 Nov 2025 14:28:06 +0100 Subject: [PATCH 23/63] net: dsa: b53: fix resetting speed and pause on forced link There is no guarantee that the port state override registers have their default values, as not all switches support being reset via register or have a reset GPIO. So when forcing port config, we need to make sure to clear all fields, which we currently do not do for the speed and flow control configuration. This can cause flow control stay enabled, or in the case of speed becoming an illegal value, e.g. configured for 1G (0x2), then setting 100M (0x1), results in 0x3 which is invalid. For PORT_OVERRIDE_SPEED_2000M we need to make sure to only clear it on supported chips, as the bit can have different meanings on other chips, e.g. for BCM5389 this controls scanning PHYs for link/speed configuration. Fixes: 5e004460f874 ("net: dsa: b53: Add helper to set link parameters") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251101132807.50419-2-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 2f846381d5a7..cb28256ef3cc 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1372,6 +1372,10 @@ static void b53_force_port_config(struct b53_device *dev, int port, else reg &= ~PORT_OVERRIDE_FULL_DUPLEX; + reg &= ~(0x3 << GMII_PO_SPEED_S); + if (is5301x(dev) || is58xx(dev)) + reg &= ~PORT_OVERRIDE_SPEED_2000M; + switch (speed) { case 2000: reg |= PORT_OVERRIDE_SPEED_2000M; @@ -1390,6 +1394,11 @@ static void b53_force_port_config(struct b53_device *dev, int port, return; } + if (is5325(dev)) + reg &= ~PORT_OVERRIDE_LP_FLOW_25; + else + reg &= ~(PORT_OVERRIDE_RX_FLOW | PORT_OVERRIDE_TX_FLOW); + if (rx_pause) { if (is5325(dev)) reg |= PORT_OVERRIDE_LP_FLOW_25; From 3e4ebdc1606adf77744cf8ed7a433d279fdc57ba Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 1 Nov 2025 14:28:07 +0100 Subject: [PATCH 24/63] net: dsa: b53: fix bcm63xx RGMII port link adjustment BCM63XX's switch does not support MDIO scanning of external phys, so its MACs needs to be manually configured for autonegotiated link speeds. So b53_force_port_config() and b53_force_link() accordingly also when mode is MLO_AN_PHY for those ports. Fixes lower speeds than 1000/full on rgmii ports 4 - 7. This aligns the behaviour with the old bcm63xx_enetsw driver for those ports. Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251101132807.50419-3-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index cb28256ef3cc..bb2c6dfa7835 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1602,8 +1602,11 @@ static void b53_phylink_mac_link_down(struct phylink_config *config, struct b53_device *dev = dp->ds->priv; int port = dp->index; - if (mode == MLO_AN_PHY) + if (mode == MLO_AN_PHY) { + if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4)) + b53_force_link(dev, port, false); return; + } if (mode == MLO_AN_FIXED) { b53_force_link(dev, port, false); @@ -1631,6 +1634,13 @@ static void b53_phylink_mac_link_up(struct phylink_config *config, if (mode == MLO_AN_PHY) { /* Re-negotiate EEE if it was enabled already */ p->eee_enabled = b53_eee_init(ds, port, phydev); + + if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4)) { + b53_force_port_config(dev, port, speed, duplex, + tx_pause, rx_pause); + b53_force_link(dev, port, true); + } + return; } From c264294624e956a967a9e2e5fa41e3273340b089 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 2 Nov 2025 11:07:56 +0100 Subject: [PATCH 25/63] net: dsa: b53: fix enabling ip multicast In the New Control register bit 1 is either reserved, or has a different function: Out of Range Error Discard When enabled, the ingress port discards any frames if the Length field is between 1500 and 1536 (excluding 1500 and 1536) and with good CRC. The actual bit for enabling IP multicast is bit 0, which was only explicitly enabled for BCM5325 so far. For older switch chips, this bit defaults to 0, so we want to enable it as well, while newer switch chips default to 1, and their documentation says "It is illegal to set this bit to zero." So drop the wrong B53_IPMC_FWD_EN define, enable the IP multicast bit also for other switch chips. While at it, rename it to (B53_)IP_MC as that is how it is called in Broadcom code. Fixes: 63cc54a6f073 ("net: dsa: b53: Fix egress flooding settings") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251102100758.28352-2-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 4 ++-- drivers/net/dsa/b53/b53_regs.h | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index bb2c6dfa7835..58c31049c0e7 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -371,11 +371,11 @@ static void b53_set_forwarding(struct b53_device *dev, int enable) * frames should be flooded or not. */ b53_read8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, &mgmt); - mgmt |= B53_UC_FWD_EN | B53_MC_FWD_EN | B53_IPMC_FWD_EN; + mgmt |= B53_UC_FWD_EN | B53_MC_FWD_EN | B53_IP_MC; b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt); } else { b53_read8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, &mgmt); - mgmt |= B53_IP_MCAST_25; + mgmt |= B53_IP_MC; b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt); } } diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 309fe0e46dad..8ce1ce72e938 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -111,8 +111,7 @@ /* IP Multicast control (8 bit) */ #define B53_IP_MULTICAST_CTRL 0x21 -#define B53_IP_MCAST_25 BIT(0) -#define B53_IPMC_FWD_EN BIT(1) +#define B53_IP_MC BIT(0) #define B53_UC_FWD_EN BIT(6) #define B53_MC_FWD_EN BIT(7) From 0be04b5fa62a82a9929ca261f6c9f64a3d0a28da Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 2 Nov 2025 11:07:57 +0100 Subject: [PATCH 26/63] net: dsa: b53: stop reading ARL entries if search is done The switch clears the ARL_SRCH_STDN bit when the search is done, i.e. it finished traversing the ARL table. This means that there will be no valid result, so we should not attempt to read and process any further entries. We only ever check the validity of the entries for 4 ARL bin chips, and only after having passed the first entry to the b53_fdb_copy(). This means that we always pass an invalid entry at the end to the b53_fdb_copy(). b53_fdb_copy() does check the validity though before passing on the entry, so it never gets passed on. On < 4 ARL bin chips, we will even continue reading invalid entries until we reach the result limit. Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251102100758.28352-3-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 58c31049c0e7..b467500699c7 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2037,7 +2037,7 @@ static int b53_arl_search_wait(struct b53_device *dev) do { b53_read8(dev, B53_ARLIO_PAGE, offset, ®); if (!(reg & ARL_SRCH_STDN)) - return 0; + return -ENOENT; if (reg & ARL_SRCH_VLID) return 0; From e57723fe536f040cc2635ec1545dd0a7919a321e Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 2 Nov 2025 11:07:58 +0100 Subject: [PATCH 27/63] net: dsa: b53: properly bound ARL searches for < 4 ARL bin chips When iterating over the ARL table we stop at max ARL entries / 2, but this is only valid if the chip actually returns 2 results at once. For chips with only one result register we will stop before reaching the end of the table if it is more than half full. Fix this by only dividing the maximum results by two if we have a chip with more than one result register (i.e. those with 4 ARL bins). Fixes: cd169d799bee ("net: dsa: b53: Bound check ARL searches") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251102100758.28352-4-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index b467500699c7..eb767edc4c13 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2087,13 +2087,16 @@ static int b53_fdb_copy(int port, const struct b53_arl_entry *ent, int b53_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data) { + unsigned int count = 0, results_per_hit = 1; struct b53_device *priv = ds->priv; struct b53_arl_entry results[2]; - unsigned int count = 0; u8 offset; int ret; u8 reg; + if (priv->num_arl_bins > 2) + results_per_hit = 2; + mutex_lock(&priv->arl_mutex); if (is5325(priv) || is5365(priv)) @@ -2115,7 +2118,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, if (ret) break; - if (priv->num_arl_bins > 2) { + if (results_per_hit == 2) { b53_arl_search_rd(priv, 1, &results[1]); ret = b53_fdb_copy(port, &results[1], cb, data); if (ret) @@ -2125,7 +2128,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, break; } - } while (count++ < b53_max_arl_entries(priv) / 2); + } while (count++ < b53_max_arl_entries(priv) / results_per_hit); mutex_unlock(&priv->arl_mutex); From c8732e933925e188cbb1d9bc3a5e1ae9affe6869 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 31 Oct 2025 13:16:28 +0100 Subject: [PATCH 28/63] net: phy: micrel: lan8842 errata Add errata for lan8842. The errata document can be found here [1]. This is fixing the module 2 ("Analog front-end not optimized for PHY-side shorted center taps"). [1] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/LAN8842-Errata-DS80001172.pdf Fixes: 5a774b64cd6a ("net: phy: micrel: Add support for lan8842") Reviewed-by: Andrew Lunn Signed-off-by: Horatiu Vultur Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 147 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 604b5de0c158..1fa56d4c1793 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -466,6 +466,12 @@ struct lan8842_priv { u16 rev; }; +struct lanphy_reg_data { + int page; + u16 addr; + u16 val; +}; + static const struct kszphy_type lan8814_type = { .led_mode_reg = ~LAN8814_LED_CTRL_1, .cable_diag_reg = LAN8814_CABLE_DIAG, @@ -2835,6 +2841,13 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev, */ #define LAN8814_PAGE_PCS_DIGITAL 2 +/** + * LAN8814_PAGE_EEE - Selects Extended Page 3. + * + * This page contains EEE registers + */ +#define LAN8814_PAGE_EEE 3 + /** * LAN8814_PAGE_COMMON_REGS - Selects Extended Page 4. * @@ -2853,6 +2866,13 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev, */ #define LAN8814_PAGE_PORT_REGS 5 +/** + * LAN8814_PAGE_POWER_REGS - Selects Extended Page 28. + * + * This page contains analog control registers and power mode registers. + */ +#define LAN8814_PAGE_POWER_REGS 28 + /** * LAN8814_PAGE_SYSTEM_CTRL - Selects Extended Page 31. * @@ -5884,6 +5904,128 @@ static int lan8842_probe(struct phy_device *phydev) return 0; } +#define LAN8814_POWER_MGMT_MODE_3_ANEG_MDI 0x13 +#define LAN8814_POWER_MGMT_MODE_4_ANEG_MDIX 0x14 +#define LAN8814_POWER_MGMT_MODE_5_10BT_MDI 0x15 +#define LAN8814_POWER_MGMT_MODE_6_10BT_MDIX 0x16 +#define LAN8814_POWER_MGMT_MODE_7_100BT_TRAIN 0x17 +#define LAN8814_POWER_MGMT_MODE_8_100BT_MDI 0x18 +#define LAN8814_POWER_MGMT_MODE_9_100BT_EEE_MDI_TX 0x19 +#define LAN8814_POWER_MGMT_MODE_10_100BT_EEE_MDI_RX 0x1a +#define LAN8814_POWER_MGMT_MODE_11_100BT_MDIX 0x1b +#define LAN8814_POWER_MGMT_MODE_12_100BT_EEE_MDIX_TX 0x1c +#define LAN8814_POWER_MGMT_MODE_13_100BT_EEE_MDIX_RX 0x1d +#define LAN8814_POWER_MGMT_MODE_14_100BTX_EEE_TX_RX 0x1e + +#define LAN8814_POWER_MGMT_DLLPD_D BIT(0) +#define LAN8814_POWER_MGMT_ADCPD_D BIT(1) +#define LAN8814_POWER_MGMT_PGAPD_D BIT(2) +#define LAN8814_POWER_MGMT_TXPD_D BIT(3) +#define LAN8814_POWER_MGMT_DLLPD_C BIT(4) +#define LAN8814_POWER_MGMT_ADCPD_C BIT(5) +#define LAN8814_POWER_MGMT_PGAPD_C BIT(6) +#define LAN8814_POWER_MGMT_TXPD_C BIT(7) +#define LAN8814_POWER_MGMT_DLLPD_B BIT(8) +#define LAN8814_POWER_MGMT_ADCPD_B BIT(9) +#define LAN8814_POWER_MGMT_PGAPD_B BIT(10) +#define LAN8814_POWER_MGMT_TXPD_B BIT(11) +#define LAN8814_POWER_MGMT_DLLPD_A BIT(12) +#define LAN8814_POWER_MGMT_ADCPD_A BIT(13) +#define LAN8814_POWER_MGMT_PGAPD_A BIT(14) +#define LAN8814_POWER_MGMT_TXPD_A BIT(15) + +#define LAN8814_POWER_MGMT_C_D (LAN8814_POWER_MGMT_DLLPD_D | \ + LAN8814_POWER_MGMT_ADCPD_D | \ + LAN8814_POWER_MGMT_PGAPD_D | \ + LAN8814_POWER_MGMT_DLLPD_C | \ + LAN8814_POWER_MGMT_ADCPD_C | \ + LAN8814_POWER_MGMT_PGAPD_C) + +#define LAN8814_POWER_MGMT_B_C_D (LAN8814_POWER_MGMT_C_D | \ + LAN8814_POWER_MGMT_DLLPD_B | \ + LAN8814_POWER_MGMT_ADCPD_B | \ + LAN8814_POWER_MGMT_PGAPD_B) + +#define LAN8814_POWER_MGMT_VAL1 (LAN8814_POWER_MGMT_C_D | \ + LAN8814_POWER_MGMT_ADCPD_B | \ + LAN8814_POWER_MGMT_PGAPD_B | \ + LAN8814_POWER_MGMT_ADCPD_A | \ + LAN8814_POWER_MGMT_PGAPD_A) + +#define LAN8814_POWER_MGMT_VAL2 LAN8814_POWER_MGMT_C_D + +#define LAN8814_POWER_MGMT_VAL3 (LAN8814_POWER_MGMT_C_D | \ + LAN8814_POWER_MGMT_DLLPD_B | \ + LAN8814_POWER_MGMT_ADCPD_B | \ + LAN8814_POWER_MGMT_PGAPD_A) + +#define LAN8814_POWER_MGMT_VAL4 (LAN8814_POWER_MGMT_B_C_D | \ + LAN8814_POWER_MGMT_ADCPD_A | \ + LAN8814_POWER_MGMT_PGAPD_A) + +#define LAN8814_POWER_MGMT_VAL5 LAN8814_POWER_MGMT_B_C_D + +static const struct lanphy_reg_data short_center_tap_errata[] = { + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_3_ANEG_MDI, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_4_ANEG_MDIX, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_5_10BT_MDI, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_6_10BT_MDIX, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_7_100BT_TRAIN, + LAN8814_POWER_MGMT_VAL2 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_8_100BT_MDI, + LAN8814_POWER_MGMT_VAL3 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_9_100BT_EEE_MDI_TX, + LAN8814_POWER_MGMT_VAL3 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_10_100BT_EEE_MDI_RX, + LAN8814_POWER_MGMT_VAL4 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_11_100BT_MDIX, + LAN8814_POWER_MGMT_VAL5 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_12_100BT_EEE_MDIX_TX, + LAN8814_POWER_MGMT_VAL5 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_13_100BT_EEE_MDIX_RX, + LAN8814_POWER_MGMT_VAL4 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_14_100BTX_EEE_TX_RX, + LAN8814_POWER_MGMT_VAL4 }, +}; + +static int lanphy_write_reg_data(struct phy_device *phydev, + const struct lanphy_reg_data *data, + size_t num) +{ + int ret = 0; + + while (num--) { + ret = lanphy_write_page_reg(phydev, data->page, data->addr, + data->val); + if (ret) + break; + } + + return ret; +} + +static int lan8842_erratas(struct phy_device *phydev) +{ + return lanphy_write_reg_data(phydev, short_center_tap_errata, + ARRAY_SIZE(short_center_tap_errata)); +} + static int lan8842_config_init(struct phy_device *phydev) { int ret; @@ -5896,6 +6038,11 @@ static int lan8842_config_init(struct phy_device *phydev) if (ret < 0) return ret; + /* Apply the erratas for this device */ + ret = lan8842_erratas(phydev); + if (ret < 0) + return ret; + /* Even if the GPIOs are set to control the LEDs the behaviour of the * LEDs is wrong, they are not blinking when there is traffic. * To fix this it is required to set extended LED mode From 65bd9a262644b67490dddd93a2b842ac94ccbe36 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 31 Oct 2025 13:16:29 +0100 Subject: [PATCH 29/63] net: phy: micrel: lan8842 errata Add errata for lan8842. The errata document can be found here [1]. This is fixing the module 7 ("1000BASE-T PMA EEE TX wake timer is non-compliant") [1] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/LAN8842-Errata-DS80001172.pdf Fixes: 5a774b64cd6a ("net: phy: micrel: Add support for lan8842") Reviewed-by: Andrew Lunn Signed-off-by: Horatiu Vultur Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 1fa56d4c1793..6a1a424e3b30 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -5965,6 +5965,9 @@ static int lan8842_probe(struct phy_device *phydev) #define LAN8814_POWER_MGMT_VAL5 LAN8814_POWER_MGMT_B_C_D +#define LAN8814_EEE_WAKE_TX_TIMER 0x0e +#define LAN8814_EEE_WAKE_TX_TIMER_MAX_VAL 0x1f + static const struct lanphy_reg_data short_center_tap_errata[] = { { LAN8814_PAGE_POWER_REGS, LAN8814_POWER_MGMT_MODE_3_ANEG_MDI, @@ -6004,6 +6007,12 @@ static const struct lanphy_reg_data short_center_tap_errata[] = { LAN8814_POWER_MGMT_VAL4 }, }; +static const struct lanphy_reg_data waketx_timer_errata[] = { + { LAN8814_PAGE_EEE, + LAN8814_EEE_WAKE_TX_TIMER, + LAN8814_EEE_WAKE_TX_TIMER_MAX_VAL }, +}; + static int lanphy_write_reg_data(struct phy_device *phydev, const struct lanphy_reg_data *data, size_t num) @@ -6022,8 +6031,15 @@ static int lanphy_write_reg_data(struct phy_device *phydev, static int lan8842_erratas(struct phy_device *phydev) { - return lanphy_write_reg_data(phydev, short_center_tap_errata, + int ret; + + ret = lanphy_write_reg_data(phydev, short_center_tap_errata, ARRAY_SIZE(short_center_tap_errata)); + if (ret) + return ret; + + return lanphy_write_reg_data(phydev, waketx_timer_errata, + ARRAY_SIZE(waketx_timer_errata)); } static int lan8842_config_init(struct phy_device *phydev) From 38f50242bf0f237cdc262308d624d333286ec3c5 Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Tue, 28 Oct 2025 17:12:26 +0100 Subject: [PATCH 30/63] sctp: Hold RCU read lock while iterating over address list With CONFIG_PROVE_RCU_LIST=y and by executing $ netcat -l --sctp & $ netcat --sctp localhost & $ ss --sctp one can trigger the following Lockdep-RCU splat(s): WARNING: suspicious RCU usage 6.18.0-rc1-00093-g7f864458e9a6 #5 Not tainted ----------------------------- net/sctp/diag.c:76 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by ss/215: #0: ffff9c740828bec0 (nlk_cb_mutex-SOCK_DIAG){+.+.}-{4:4}, at: __netlink_dump_start+0x84/0x2b0 #1: ffff9c7401d72cd0 (sk_lock-AF_INET6){+.+.}-{0:0}, at: sctp_sock_dump+0x38/0x200 stack backtrace: CPU: 0 UID: 0 PID: 215 Comm: ss Not tainted 6.18.0-rc1-00093-g7f864458e9a6 #5 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x5d/0x90 lockdep_rcu_suspicious.cold+0x4e/0xa3 inet_sctp_diag_fill.isra.0+0x4b1/0x5d0 sctp_sock_dump+0x131/0x200 sctp_transport_traverse_process+0x170/0x1b0 ? __pfx_sctp_sock_filter+0x10/0x10 ? __pfx_sctp_sock_dump+0x10/0x10 sctp_diag_dump+0x103/0x140 __inet_diag_dump+0x70/0xb0 netlink_dump+0x148/0x490 __netlink_dump_start+0x1f3/0x2b0 inet_diag_handler_cmd+0xcd/0x100 ? __pfx_inet_diag_dump_start+0x10/0x10 ? __pfx_inet_diag_dump+0x10/0x10 ? __pfx_inet_diag_dump_done+0x10/0x10 sock_diag_rcv_msg+0x18e/0x320 ? __pfx_sock_diag_rcv_msg+0x10/0x10 netlink_rcv_skb+0x4d/0x100 netlink_unicast+0x1d7/0x2b0 netlink_sendmsg+0x203/0x450 ____sys_sendmsg+0x30c/0x340 ___sys_sendmsg+0x94/0xf0 __sys_sendmsg+0x83/0xf0 do_syscall_64+0xbb/0x390 entry_SYSCALL_64_after_hwframe+0x77/0x7f ... Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Stefan Wiehler Reviewed-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251028161506.3294376-2-stefan.wiehler@nokia.com Signed-off-by: Jakub Kicinski --- net/sctp/diag.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 996c2018f0e6..1a8761f87bf1 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -73,19 +73,23 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, struct nlattr *attr; void *info = NULL; + rcu_read_lock(); list_for_each_entry_rcu(laddr, address_list, list) addrcnt++; + rcu_read_unlock(); attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt); if (!attr) return -EMSGSIZE; info = nla_data(attr); + rcu_read_lock(); list_for_each_entry_rcu(laddr, address_list, list) { memcpy(info, &laddr->a, sizeof(laddr->a)); memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a)); info += addrlen; } + rcu_read_unlock(); return 0; } From 95aef86ab231f047bb8085c70666059b58f53c09 Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Tue, 28 Oct 2025 17:12:27 +0100 Subject: [PATCH 31/63] sctp: Prevent TOCTOU out-of-bounds write For the following path not holding the sock lock, sctp_diag_dump() -> sctp_for_each_endpoint() -> sctp_ep_dump() make sure not to exceed bounds in case the address list has grown between buffer allocation (time-of-check) and write (time-of-use). Suggested-by: Kuniyuki Iwashima Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Stefan Wiehler Reviewed-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251028161506.3294376-3-stefan.wiehler@nokia.com Signed-off-by: Jakub Kicinski --- net/sctp/diag.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 1a8761f87bf1..5d64dd99ca9a 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -88,6 +88,9 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, memcpy(info, &laddr->a, sizeof(laddr->a)); memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a)); info += addrlen; + + if (!--addrcnt) + break; } rcu_read_unlock(); From f1fc201148c7e684c10a72b6a3375597f28d1ef6 Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Tue, 28 Oct 2025 17:12:28 +0100 Subject: [PATCH 32/63] sctp: Hold sock lock while iterating over address list Move address list traversal in inet_assoc_attr_size() under the sock lock to avoid holding the RCU read lock. Suggested-by: Xin Long Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Stefan Wiehler Acked-by: Xin Long Link: https://patch.msgid.link/20251028161506.3294376-4-stefan.wiehler@nokia.com Signed-off-by: Jakub Kicinski --- net/sctp/diag.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 5d64dd99ca9a..2afb376299fe 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -230,14 +230,15 @@ struct sctp_comm_param { bool net_admin; }; -static size_t inet_assoc_attr_size(struct sctp_association *asoc) +static size_t inet_assoc_attr_size(struct sock *sk, + struct sctp_association *asoc) { int addrlen = sizeof(struct sockaddr_storage); int addrcnt = 0; struct sctp_sockaddr_entry *laddr; list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list, - list) + list, lockdep_sock_is_held(sk)) addrcnt++; return nla_total_size(sizeof(struct sctp_info)) @@ -263,11 +264,14 @@ static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *t if (err) return err; - rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL); - if (!rep) - return -ENOMEM; - lock_sock(sk); + + rep = nlmsg_new(inet_assoc_attr_size(sk, assoc), GFP_KERNEL); + if (!rep) { + release_sock(sk); + return -ENOMEM; + } + if (ep != assoc->ep) { err = -EAGAIN; goto out; From d261f5b09c28850dc63ca1d3018596f829f402d5 Mon Sep 17 00:00:00 2001 From: Mohammad Heib Date: Fri, 31 Oct 2025 17:52:02 +0200 Subject: [PATCH 33/63] net: ionic: add dma_wmb() before ringing TX doorbell The TX path currently writes descriptors and then immediately writes to the MMIO doorbell register to notify the NIC. On weakly ordered architectures, descriptor writes may still be pending in CPU or DMA write buffers when the doorbell is issued, leading to the device fetching stale or incomplete descriptors. Add a dma_wmb() in ionic_txq_post() to ensure all descriptor writes are visible to the device before the doorbell MMIO write. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Mohammad Heib Link: https://patch.msgid.link/20251031155203.203031-1-mheib@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index d10b58ebf603..2e571d0a0d8a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -29,6 +29,10 @@ static void ionic_tx_clean(struct ionic_queue *q, static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell) { + /* Ensure TX descriptor writes reach memory before NIC reads them. + * Prevents device from fetching stale descriptors. + */ + dma_wmb(); ionic_q_post(q, ring_dbell); } From de0337d641bfa5b6d6b489e479792f1039274e84 Mon Sep 17 00:00:00 2001 From: Mohammad Heib Date: Fri, 31 Oct 2025 17:52:03 +0200 Subject: [PATCH 34/63] net: ionic: map SKB after pseudo-header checksum prep The TSO path called ionic_tx_map_skb() before preparing the TCP pseudo checksum (ionic_tx_tcp_[inner_]pseudo_csum()), which may perform skb_cow_head() and might modifies bytes in the linear header area. Mapping first and then mutating the header risks: - Using a stale DMA address if skb_cow_head() relocates the head, and/or - Device reading stale header bytes on weakly-ordered systems (CPU writes after mapping are not guaranteed visible without an explicit dma_sync_single_for_device()). Reorder the TX path to perform all header mutations (including skb_cow_head()) *before* DMA mapping. Mapping is now done only after the skb layout and header contents are final. This removes the need for any post-mapping dma_sync and prevents on-wire corruption observed under VLAN+TSO load after repeated runs. This change is purely an ordering fix; no functional behavior change otherwise. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Mohammad Heib Reviewed-by: Brett Creeley Link: https://patch.msgid.link/20251031155203.203031-2-mheib@redhat.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/pensando/ionic/ionic_txrx.c | 30 ++++++++----------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 2e571d0a0d8a..301ebee2fdc5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -1448,19 +1448,6 @@ static int ionic_tx_tso(struct net_device *netdev, struct ionic_queue *q, bool encap; int err; - desc_info = &q->tx_info[q->head_idx]; - - if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) - return -EIO; - - len = skb->len; - mss = skb_shinfo(skb)->gso_size; - outer_csum = (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPXIP4 | - SKB_GSO_IPXIP6 | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM)); has_vlan = !!skb_vlan_tag_present(skb); vlan_tci = skb_vlan_tag_get(skb); encap = skb->encapsulation; @@ -1474,12 +1461,21 @@ static int ionic_tx_tso(struct net_device *netdev, struct ionic_queue *q, err = ionic_tx_tcp_inner_pseudo_csum(skb); else err = ionic_tx_tcp_pseudo_csum(skb); - if (unlikely(err)) { - /* clean up mapping from ionic_tx_map_skb */ - ionic_tx_desc_unmap_bufs(q, desc_info); + if (unlikely(err)) return err; - } + desc_info = &q->tx_info[q->head_idx]; + if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) + return -EIO; + + len = skb->len; + mss = skb_shinfo(skb)->gso_size; + outer_csum = (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | + SKB_GSO_IPXIP4 | + SKB_GSO_IPXIP6 | + SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM)); if (encap) hdrlen = skb_inner_tcp_all_headers(skb); else From 2e25935ed24daee37c4c2e8e29e478ce6e1f72c7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 1 Nov 2025 16:26:42 +0300 Subject: [PATCH 35/63] octeontx2-pf: Fix devm_kcalloc() error checking The devm_kcalloc() function never return error pointers, it returns NULL on failure. Also delete the netdev_err() printk. These allocation functions already have debug output built-in some the extra error message is not required. Fixes: efabce290151 ("octeontx2-pf: AF_XDP zero copy receive support") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aQYKkrGA12REb2sj@stanley.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index aff17c37ddde..902d6abaa3ec 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1516,10 +1516,8 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, pool->xdp_cnt = numptrs; pool->xdp = devm_kcalloc(pfvf->dev, numptrs, sizeof(struct xdp_buff *), GFP_KERNEL); - if (IS_ERR(pool->xdp)) { - netdev_err(pfvf->netdev, "Creation of xsk pool failed\n"); - return PTR_ERR(pool->xdp); - } + if (!pool->xdp) + return -ENOMEM; } return 0; From 5556f23478e6eb5d6a0321d4135e2c37a3c78a1e Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 3 Nov 2025 10:02:49 +0800 Subject: [PATCH 36/63] net: spacemit: Check netif_running() in emac_set_pauseparam() Currently, emac_set_pauseparam() will oops if userspace calls it while the interface is not up, because phydev is NULL, but it is still accessed in emac_set_fc() and emac_set_fc_autoneg(). Check for netif_running(dev) in emac_set_pauseparam() before proceeding. Fixes: bfec6d7f2001 ("net: spacemit: Add K1 Ethernet MAC") Signed-off-by: Vivian Wang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251103-k1-ethernet-remove-fc-v3-1-2083770cd282@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/spacemit/k1_emac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/spacemit/k1_emac.c b/drivers/net/ethernet/spacemit/k1_emac.c index e1c5faff3b71..220eb5ce7583 100644 --- a/drivers/net/ethernet/spacemit/k1_emac.c +++ b/drivers/net/ethernet/spacemit/k1_emac.c @@ -1441,6 +1441,9 @@ static int emac_set_pauseparam(struct net_device *dev, struct emac_priv *priv = netdev_priv(dev); u8 fc = 0; + if (!netif_running(dev)) + return -ENETDOWN; + priv->flow_control_autoneg = pause->autoneg; if (pause->autoneg) { From 59b20b15c112867f28a12a24aa25f14549db02e4 Mon Sep 17 00:00:00 2001 From: Huiwen He Date: Mon, 3 Nov 2025 10:36:19 +0800 Subject: [PATCH 37/63] sctp: make sctp_transport_init() void sctp_transport_init() is static and never returns NULL. It is only called by sctp_transport_new(), so change it to void and remove the redundant return value check. Signed-off-by: Huiwen He Acked-by: Xin Long Link: https://patch.msgid.link/20251103023619.1025622-1-hehuiwen@kylinos.cn Signed-off-by: Jakub Kicinski --- net/sctp/transport.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 4d258a6e8033..0d48c61fe6ad 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -37,10 +37,10 @@ /* 1st Level Abstractions. */ /* Initialize a new transport from provided memory. */ -static struct sctp_transport *sctp_transport_init(struct net *net, - struct sctp_transport *peer, - const union sctp_addr *addr, - gfp_t gfp) +static void sctp_transport_init(struct net *net, + struct sctp_transport *peer, + const union sctp_addr *addr, + gfp_t gfp) { /* Copy in the address. */ peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); @@ -83,8 +83,6 @@ static struct sctp_transport *sctp_transport_init(struct net *net, get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); refcount_set(&peer->refcnt, 1); - - return peer; } /* Allocate and initialize a new transport. */ @@ -96,20 +94,13 @@ struct sctp_transport *sctp_transport_new(struct net *net, transport = kzalloc(sizeof(*transport), gfp); if (!transport) - goto fail; + return NULL; - if (!sctp_transport_init(net, transport, addr, gfp)) - goto fail_init; + sctp_transport_init(net, transport, addr, gfp); SCTP_DBG_OBJCNT_INC(transport); return transport; - -fail_init: - kfree(transport); - -fail: - return NULL; } /* This transport is no longer needed. Free up if possible, or From e120f46768d98151ece8756ebd688b0e43dc8b29 Mon Sep 17 00:00:00 2001 From: Qendrim Maxhuni Date: Wed, 29 Oct 2025 08:57:44 +0100 Subject: [PATCH 38/63] net: usb: qmi_wwan: initialize MAC header offset in qmimux_rx_fixup Raw IP packets have no MAC header, leaving skb->mac_header uninitialized. This can trigger kernel panics on ARM64 when xfrm or other subsystems access the offset due to strict alignment checks. Initialize the MAC header to prevent such crashes. This can trigger kernel panics on ARM when running IPsec over the qmimux0 interface. Example trace: Internal error: Oops: 000000009600004f [#1] SMP CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.34-gbe78e49cb433 #1 Hardware name: LS1028A RDB Board (DT) pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : xfrm_input+0xde8/0x1318 lr : xfrm_input+0x61c/0x1318 sp : ffff800080003b20 Call trace: xfrm_input+0xde8/0x1318 xfrm6_rcv+0x38/0x44 xfrm6_esp_rcv+0x48/0xa8 ip6_protocol_deliver_rcu+0x94/0x4b0 ip6_input_finish+0x44/0x70 ip6_input+0x44/0xc0 ipv6_rcv+0x6c/0x114 __netif_receive_skb_one_core+0x5c/0x8c __netif_receive_skb+0x18/0x60 process_backlog+0x78/0x17c __napi_poll+0x38/0x180 net_rx_action+0x168/0x2f0 Fixes: c6adf77953bc ("net: usb: qmi_wwan: add qmap mux protocol support") Signed-off-by: Qendrim Maxhuni Link: https://patch.msgid.link/20251029075744.105113-1-qendrim.maxhuni@garderos.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 11352d85475a..3a4985b582cb 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -192,6 +192,12 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (!skbn) return 0; + /* Raw IP packets don't have a MAC header, but other subsystems + * (like xfrm) may still access MAC header offsets, so they must + * be initialized. + */ + skb_reset_mac_header(skbn); + switch (skb->data[offset + qmimux_hdr_sz] & 0xf0) { case 0x40: skbn->protocol = htons(ETH_P_IP); From c3838262b824c71c145cd3668722e99a69bc9cd9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 31 Oct 2025 14:05:51 +0800 Subject: [PATCH 39/63] virtio_net: fix alignment for virtio_net_hdr_v1_hash Changing alignment of header would mean it's no longer safe to cast a 2 byte aligned pointer between formats. Use two 16 bit fields to make it 2 byte aligned as previously. This fixes the performance regression since commit ("virtio_net: enable gso over UDP tunnel support.") as it uses virtio_net_hdr_v1_hash_tunnel which embeds virtio_net_hdr_v1_hash. Pktgen in guest + XDP_DROP on TAP + vhost_net shows the TX PPS is recovered from 2.4Mpps to 4.45Mpps. Fixes: 56a06bd40fab ("virtio_net: enable gso over UDP tunnel support.") Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang Tested-by: Lei Yang Link: https://patch.msgid.link/20251031060551.126-1-jasowang@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 15 +++++++++++++-- include/linux/virtio_net.h | 3 ++- include/uapi/linux/virtio_net.h | 3 ++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8e8a179aaa49..e6e650bc3bc3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2539,6 +2539,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, return NULL; } +static inline u32 +virtio_net_hash_value(const struct virtio_net_hdr_v1_hash *hdr_hash) +{ + return __le16_to_cpu(hdr_hash->hash_value_lo) | + (__le16_to_cpu(hdr_hash->hash_value_hi) << 16); +} + static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, struct sk_buff *skb) { @@ -2565,7 +2572,7 @@ static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, default: rss_hash_type = PKT_HASH_TYPE_NONE; } - skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); + skb_set_hash(skb, virtio_net_hash_value(hdr_hash), rss_hash_type); } static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, @@ -3311,6 +3318,10 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); + /* Make sure it's safe to cast between formats */ + BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr)); + BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr.hdr)); + can_push = vi->any_header_sg && !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; @@ -6750,7 +6761,7 @@ static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, hash_report = VIRTIO_NET_HASH_REPORT_NONE; *rss_type = virtnet_xdp_rss_type[hash_report]; - *hash = __le32_to_cpu(hdr_hash->hash_value); + *hash = virtio_net_hash_value(hdr_hash); return 0; } diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 4d1780848d0e..b673c31569f3 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -401,7 +401,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, if (!tnl_hdr_negotiated) return -EINVAL; - vhdr->hash_hdr.hash_value = 0; + vhdr->hash_hdr.hash_value_lo = 0; + vhdr->hash_hdr.hash_value_hi = 0; vhdr->hash_hdr.hash_report = 0; vhdr->hash_hdr.padding = 0; diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 8bf27ab8bcb4..1db45b01532b 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -193,7 +193,8 @@ struct virtio_net_hdr_v1 { struct virtio_net_hdr_v1_hash { struct virtio_net_hdr_v1 hdr; - __le32 hash_value; + __le16 hash_value_lo; + __le16 hash_value_hi; #define VIRTIO_NET_HASH_REPORT_NONE 0 #define VIRTIO_NET_HASH_REPORT_IPv4 1 #define VIRTIO_NET_HASH_REPORT_TCPv4 2 From bc7208ca805ae6062f353a4753467d913d963bc6 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 3 Nov 2025 16:56:55 -0800 Subject: [PATCH 40/63] bnxt_en: Shutdown FW DMA in bnxt_shutdown() The netif_close() call in bnxt_shutdown() only stops packet DMA. There may be FW DMA for trace logging (recently added) that will continue. If we kexec to a new kernel, the DMA will corrupt memory in the new kernel. Add bnxt_hwrm_func_drv_unrgtr() to unregister the driver from the FW. This will stop the FW DMA. In case the call fails, call pcie_flr() to reset the function and stop the DMA. Fixes: 24d694aec139 ("bnxt_en: Allocate backing store memory for FW trace logs") Reported-by: Jakub Kicinski Reviewed-by: Damodharam Ammepalli Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3fc33b1b4dfb..c0e9caa1df73 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16892,6 +16892,10 @@ static void bnxt_shutdown(struct pci_dev *pdev) if (netif_running(dev)) netif_close(dev); + if (bnxt_hwrm_func_drv_unrgtr(bp)) { + pcie_flr(pdev); + goto shutdown_exit; + } bnxt_ptp_clear(bp); bnxt_clear_int_mode(bp); pci_disable_device(pdev); From deb8eb39164382f1f67ef8e8af9176baf5e10f2d Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 3 Nov 2025 16:56:56 -0800 Subject: [PATCH 41/63] bnxt_en: Fix a possible memory leak in bnxt_ptp_init In bnxt_ptp_init(), when ptp_clock_register() fails, the driver is not freeing the memory allocated for ptp_info->pin_config. Fix it to unconditionally free ptp_info->pin_config in bnxt_ptp_free(). Fixes: caf3eedbcd8d ("bnxt_en: 1PPS support for 5750X family chips") Reviewed-by: Pavan Chebbi Reviewed-by: Somnath Kotur Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index db81cf6d5289..0abaa2bbe357 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -1051,9 +1051,9 @@ static void bnxt_ptp_free(struct bnxt *bp) if (ptp->ptp_clock) { ptp_clock_unregister(ptp->ptp_clock); ptp->ptp_clock = NULL; - kfree(ptp->ptp_info.pin_config); - ptp->ptp_info.pin_config = NULL; } + kfree(ptp->ptp_info.pin_config); + ptp->ptp_info.pin_config = NULL; } int bnxt_ptp_init(struct bnxt *bp) From ff02be05f78399c766be68ab0b2285ff90b2aaa8 Mon Sep 17 00:00:00 2001 From: Gautam R A Date: Mon, 3 Nov 2025 16:56:57 -0800 Subject: [PATCH 42/63] bnxt_en: Fix null pointer dereference in bnxt_bs_trace_check_wrap() With older FW, we may get the ASYNC_EVENT_CMPL_EVENT_ID_DBG_BUF_PRODUCER for FW trace data type that has not been initialized. This will result in a crash in bnxt_bs_trace_type_wrap(). Add a guard to check for a valid magic_byte pointer before proceeding. Fixes: 84fcd9449fd7 ("bnxt_en: Manage the FW trace context memory") Reviewed-by: Somnath Kotur Reviewed-by: Shruti Parab Signed-off-by: Gautam R A Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 741b2d854789..7df46a21dd18 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2149,7 +2149,7 @@ struct bnxt_bs_trace_info { static inline void bnxt_bs_trace_check_wrap(struct bnxt_bs_trace_info *bs_trace, u32 offset) { - if (!bs_trace->wrapped && + if (!bs_trace->wrapped && bs_trace->magic_byte && *bs_trace->magic_byte != BNXT_TRACE_BUF_MAGIC_BYTE) bs_trace->wrapped = 1; bs_trace->last_offset = offset; From 28d9a84ef0ce56cc623da2a1ebf7583c00d52b31 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Mon, 3 Nov 2025 16:56:58 -0800 Subject: [PATCH 43/63] bnxt_en: Always provide max entry and entry size in coredump segments While populating firmware host logging segments for the coredump, it is possible for the FW command that flushes the segment to fail. When that happens, the existing code will not update the max entry and entry size in the segment header and this causes software that decodes the coredump to skip the segment. The segment most likely has already collected some DMA data, so always update these 2 segment fields in the header to allow the decoder to decode any data in the segment. Fixes: 3c2179e66355 ("bnxt_en: Add FW trace coredump segments to the coredump") Reviewed-by: Shruti Parab Signed-off-by: Kashyap Desai Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-5-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 0181ab1f2dfd..ccb8b509662d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -333,13 +333,14 @@ static void bnxt_fill_drv_seg_record(struct bnxt *bp, u32 offset = 0; int rc = 0; + record->max_entries = cpu_to_le32(ctxm->max_entries); + record->entry_size = cpu_to_le32(ctxm->entry_size); + rc = bnxt_dbg_hwrm_log_buffer_flush(bp, type, 0, &offset); if (rc) return; bnxt_bs_trace_check_wrap(bs_trace, offset); - record->max_entries = cpu_to_le32(ctxm->max_entries); - record->entry_size = cpu_to_le32(ctxm->entry_size); record->offset = cpu_to_le32(bs_trace->last_offset); record->wrapped = bs_trace->wrapped; } From 5204943a4c6efc832993c0fa17dec275071eeccc Mon Sep 17 00:00:00 2001 From: Shantiprasad Shettar Date: Mon, 3 Nov 2025 16:56:59 -0800 Subject: [PATCH 44/63] bnxt_en: Fix warning in bnxt_dl_reload_down() The existing code calls bnxt_cancel_reservations() after bnxt_hwrm_func_drv_unrgtr() in bnxt_dl_reload_down(). bnxt_cancel_reservations() calls the FW and it will always fail since the driver has already unregistered, triggering this warning: bnxt_en 0000:0a:00.0 ens2np0: resc_qcaps failed Fix it by calling bnxt_clear_reservations() which will skip the unnecessary FW call since we have unregistered. Fixes: 228ea8c187d8 ("bnxt_en: implement devlink dev reload driver_reinit") Reviewed-by: Mohammad Shuab Siddique Reviewed-by: Somnath Kotur Reviewed-by: Kalesh AP Signed-off-by: Shantiprasad Shettar Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-6-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c0e9caa1df73..a625e7c311dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12439,7 +12439,7 @@ static int bnxt_try_recover_fw(struct bnxt *bp) return -ENODEV; } -static void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset) +void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 7df46a21dd18..3613a172483a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2941,6 +2941,7 @@ void bnxt_report_link(struct bnxt *bp); int bnxt_update_link(struct bnxt *bp, bool chng_link_state); int bnxt_hwrm_set_pause(struct bnxt *); int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool); +void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset); int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset); int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp); int bnxt_hwrm_free_wol_fltr(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 02961d93ed35..67ca02d84c97 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -461,7 +461,7 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, rtnl_unlock(); break; } - bnxt_cancel_reservations(bp, false); + bnxt_clear_reservations(bp, false); bnxt_free_ctx_mem(bp, false); break; } From 0c716703965ffc5ef4311b65cb5d84a703784717 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 30 Oct 2025 21:44:38 +0700 Subject: [PATCH 45/63] virtio-net: fix received length check in big packets Since commit 4959aebba8c0 ("virtio-net: use mtu size as buffer length for big packets"), when guest gso is off, the allocated size for big packets is not MAX_SKB_FRAGS * PAGE_SIZE anymore but depends on negotiated MTU. The number of allocated frags for big packets is stored in vi->big_packets_num_skbfrags. Because the host announced buffer length can be malicious (e.g. the host vhost_net driver's get_rx_bufs is modified to announce incorrect length), we need a check in virtio_net receive path. Currently, the check is not adapted to the new change which can lead to NULL page pointer dereference in the below while loop when receiving length that is larger than the allocated one. This commit fixes the received length check corresponding to the new change. Fixes: 4959aebba8c0 ("virtio-net: use mtu size as buffer length for big packets") Cc: stable@vger.kernel.org Signed-off-by: Bui Quang Minh Reviewed-by: Xuan Zhuo Tested-by: Lei Yang Link: https://patch.msgid.link/20251030144438.7582-1-minhquangbui99@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e6e650bc3bc3..8855a994e12b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -910,17 +910,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, goto ok; } - /* - * Verify that we can indeed put this data into a skb. - * This is here to handle cases when the device erroneously - * tries to receive more than is possible. This is usually - * the case of a broken device. - */ - if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { - net_dbg_ratelimited("%s: too much data\n", skb->dev->name); - dev_kfree_skb(skb); - return NULL; - } BUG_ON(offset >= PAGE_SIZE); while (len) { unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); @@ -2112,9 +2101,19 @@ static struct sk_buff *receive_big(struct net_device *dev, struct virtnet_rq_stats *stats) { struct page *page = buf; - struct sk_buff *skb = - page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); + struct sk_buff *skb; + /* Make sure that len does not exceed the size allocated in + * add_recvbuf_big. + */ + if (unlikely(len > (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE)) { + pr_debug("%s: rx error: len %u exceeds allocated size %lu\n", + dev->name, len, + (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE); + goto err; + } + + skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); u64_stats_add(&stats->bytes, len - vi->hdr_len); if (unlikely(!skb)) goto err; From 90a88306eb874fe4bbdd860e6c9787f5bbc588b5 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 3 Nov 2025 10:28:11 -0600 Subject: [PATCH 46/63] net: ethernet: ti: netcp: Standardize knav_dma_open_channel to return NULL on error Make knav_dma_open_channel consistently return NULL on error instead of ERR_PTR. Currently the header include/linux/soc/ti/knav_dma.h returns NULL when the driver is disabled, but the driver implementation does not even return NULL or ERR_PTR on failure, causing inconsistency in the users. This results in a crash in netcp_free_navigator_resources as followed (trimmed): Unhandled fault: alignment exception (0x221) at 0xfffffff2 [fffffff2] *pgd=80000800207003, *pmd=82ffda003, *pte=00000000 Internal error: : 221 [#1] SMP ARM Modules linked in: CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.17.0-rc7 #1 NONE Hardware name: Keystone PC is at knav_dma_close_channel+0x30/0x19c LR is at netcp_free_navigator_resources+0x2c/0x28c [... TRIM...] Call trace: knav_dma_close_channel from netcp_free_navigator_resources+0x2c/0x28c netcp_free_navigator_resources from netcp_ndo_open+0x430/0x46c netcp_ndo_open from __dev_open+0x114/0x29c __dev_open from __dev_change_flags+0x190/0x208 __dev_change_flags from netif_change_flags+0x1c/0x58 netif_change_flags from dev_change_flags+0x38/0xa0 dev_change_flags from ip_auto_config+0x2c4/0x11f0 ip_auto_config from do_one_initcall+0x58/0x200 do_one_initcall from kernel_init_freeable+0x1cc/0x238 kernel_init_freeable from kernel_init+0x1c/0x12c kernel_init from ret_from_fork+0x14/0x38 [... TRIM...] Standardize the error handling by making the function return NULL on all error conditions. The API is used in just the netcp_core.c so the impact is limited. Note, this change, in effect reverts commit 5b6cb43b4d62 ("net: ethernet: ti: netcp_core: return error while dma channel open issue"), but provides a less error prone implementation. Suggested-by: Simon Horman Suggested-by: Jacob Keller Signed-off-by: Nishanth Menon Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251103162811.3730055-1-nm@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/netcp_core.c | 10 +++++----- drivers/soc/ti/knav_dma.c | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 857820657bac..5ee13db568f0 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1338,10 +1338,10 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe) tx_pipe->dma_channel = knav_dma_open_channel(dev, tx_pipe->dma_chan_name, &config); - if (IS_ERR(tx_pipe->dma_channel)) { + if (!tx_pipe->dma_channel) { dev_err(dev, "failed opening tx chan(%s)\n", tx_pipe->dma_chan_name); - ret = PTR_ERR(tx_pipe->dma_channel); + ret = -EINVAL; goto err; } @@ -1359,7 +1359,7 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe) return 0; err: - if (!IS_ERR_OR_NULL(tx_pipe->dma_channel)) + if (tx_pipe->dma_channel) knav_dma_close_channel(tx_pipe->dma_channel); tx_pipe->dma_channel = NULL; return ret; @@ -1678,10 +1678,10 @@ static int netcp_setup_navigator_resources(struct net_device *ndev) netcp->rx_channel = knav_dma_open_channel(netcp->netcp_device->device, netcp->dma_chan_name, &config); - if (IS_ERR(netcp->rx_channel)) { + if (!netcp->rx_channel) { dev_err(netcp->ndev_dev, "failed opening rx chan(%s\n", netcp->dma_chan_name); - ret = PTR_ERR(netcp->rx_channel); + ret = -EINVAL; goto fail; } diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c index a25ebe6cd503..553ae7ee20f1 100644 --- a/drivers/soc/ti/knav_dma.c +++ b/drivers/soc/ti/knav_dma.c @@ -402,7 +402,7 @@ static int of_channel_match_helper(struct device_node *np, const char *name, * @name: slave channel name * @config: dma configuration parameters * - * Returns pointer to appropriate DMA channel on success or error. + * Return: Pointer to appropriate DMA channel on success or NULL on error. */ void *knav_dma_open_channel(struct device *dev, const char *name, struct knav_dma_cfg *config) @@ -414,13 +414,13 @@ void *knav_dma_open_channel(struct device *dev, const char *name, if (!kdev) { pr_err("keystone-navigator-dma driver not registered\n"); - return (void *)-EINVAL; + return NULL; } chan_num = of_channel_match_helper(dev->of_node, name, &instance); if (chan_num < 0) { dev_err(kdev->dev, "No DMA instance with name %s\n", name); - return (void *)-EINVAL; + return NULL; } dev_dbg(kdev->dev, "initializing %s channel %d from DMA %s\n", @@ -431,7 +431,7 @@ void *knav_dma_open_channel(struct device *dev, const char *name, if (config->direction != DMA_MEM_TO_DEV && config->direction != DMA_DEV_TO_MEM) { dev_err(kdev->dev, "bad direction\n"); - return (void *)-EINVAL; + return NULL; } /* Look for correct dma instance */ @@ -443,7 +443,7 @@ void *knav_dma_open_channel(struct device *dev, const char *name, } if (!dma) { dev_err(kdev->dev, "No DMA instance with name %s\n", instance); - return (void *)-EINVAL; + return NULL; } /* Look for correct dma channel from dma instance */ @@ -463,14 +463,14 @@ void *knav_dma_open_channel(struct device *dev, const char *name, if (!chan) { dev_err(kdev->dev, "channel %d is not in DMA %s\n", chan_num, instance); - return (void *)-EINVAL; + return NULL; } if (atomic_read(&chan->ref_count) >= 1) { if (!check_config(chan, config)) { dev_err(kdev->dev, "channel %d config miss-match\n", chan_num); - return (void *)-EINVAL; + return NULL; } } From 327c20c21d80e0d87834b392d83ae73c955ad8ff Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 3 Nov 2025 08:38:17 -0800 Subject: [PATCH 47/63] netpoll: Fix deadlock in memory allocation under spinlock Fix a AA deadlock in refill_skbs() where memory allocation while holding skb_pool->lock can trigger a recursive lock acquisition attempt. The deadlock scenario occurs when the system is under severe memory pressure: 1. refill_skbs() acquires skb_pool->lock (spinlock) 2. alloc_skb() is called while holding the lock 3. Memory allocator fails and calls slab_out_of_memory() 4. This triggers printk() for the OOM warning 5. The console output path calls netpoll_send_udp() 6. netpoll_send_udp() attempts to acquire the same skb_pool->lock 7. Deadlock: the lock is already held by the same CPU Call stack: refill_skbs() spin_lock_irqsave(&skb_pool->lock) <- lock acquired __alloc_skb() kmem_cache_alloc_node_noprof() slab_out_of_memory() printk() console_flush_all() netpoll_send_udp() skb_dequeue() spin_lock_irqsave(&skb_pool->lock) <- deadlock attempt This bug was exposed by commit 248f6571fd4c51 ("netpoll: Optimize skb refilling on critical path") which removed refill_skbs() from the critical path (where nested printk was being deferred), letting nested printk being called from inside refill_skbs() Refactor refill_skbs() to never allocate memory while holding the spinlock. Another possible solution to fix this problem is protecting the refill_skbs() from nested printks, basically calling printk_deferred_{enter,exit}() in refill_skbs(), then, any nested pr_warn() would be deferred. I prefer this approach, given I _think_ it might be a good idea to move the alloc_skb() from GFP_ATOMIC to GFP_KERNEL in the future, so, having the alloc_skb() outside of the lock will be necessary step. There is a possible TOCTOU issue when checking for the pool length, and queueing the new allocated skb, but, this is not an issue, given that an extra SKB in the pool is harmless and it will be eventually used. Signed-off-by: Breno Leitao Fixes: 248f6571fd4c51 ("netpoll: Optimize skb refilling on critical path") Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251103-fix_netpoll_aa-v4-1-4cfecdf6da7c@debian.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 60a05d3b7c24..c85f740065fc 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -228,19 +228,16 @@ static void refill_skbs(struct netpoll *np) { struct sk_buff_head *skb_pool; struct sk_buff *skb; - unsigned long flags; skb_pool = &np->skb_pool; - spin_lock_irqsave(&skb_pool->lock, flags); - while (skb_pool->qlen < MAX_SKBS) { + while (READ_ONCE(skb_pool->qlen) < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; - __skb_queue_tail(skb_pool, skb); + skb_queue_tail(skb_pool, skb); } - spin_unlock_irqrestore(&skb_pool->lock, flags); } static void zap_completion_queue(void) From c74619e7602e88a0239cd4999571dd31081e9adf Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Mon, 3 Nov 2025 09:24:36 +0100 Subject: [PATCH 48/63] wifi: mac80211_hwsim: Limit destroy_on_close radio removal to netgroup hwsim radios marked destroy_on_close are removed when the Netlink socket that created them is closed. As the portid is not unique across network namespaces, closing a socket in one namespace may remove radios in another if it has the destroy_on_close flag set. Instead of matching the network namespace, match the netgroup of the radio to limit radio removal to those that have been created by the closing Netlink socket. The netgroup of a radio identifies the network namespace it was created in, and matching on it removes a destroy_on_close radio even if it has been moved to another namespace. Fixes: 100cb9ff40e0 ("mac80211_hwsim: Allow managing radios from non-initial namespaces") Signed-off-by: Martin Willi Link: https://patch.msgid.link/20251103082436.30483-1-martin@strongswan.org Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 9f856042a67a..d28bf18d57ec 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -6698,14 +6698,15 @@ static struct genl_family hwsim_genl_family __ro_after_init = { .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), }; -static void remove_user_radios(u32 portid) +static void remove_user_radios(u32 portid, int netgroup) { struct mac80211_hwsim_data *entry, *tmp; LIST_HEAD(list); spin_lock_bh(&hwsim_radio_lock); list_for_each_entry_safe(entry, tmp, &hwsim_radios, list) { - if (entry->destroy_on_close && entry->portid == portid) { + if (entry->destroy_on_close && entry->portid == portid && + entry->netgroup == netgroup) { list_move(&entry->list, &list); rhashtable_remove_fast(&hwsim_radios_rht, &entry->rht, hwsim_rht_params); @@ -6730,7 +6731,7 @@ static int mac80211_hwsim_netlink_notify(struct notifier_block *nb, if (state != NETLINK_URELEASE) return NOTIFY_DONE; - remove_user_radios(notify->portid); + remove_user_radios(notify->portid, hwsim_net_get_netgroup(notify->net)); if (notify->portid == hwsim_net_get_wmediumd(notify->net)) { printk(KERN_INFO "mac80211_hwsim: wmediumd released netlink" From b1d16f7c0063b7209fd3251ce40c77d37b477b83 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 4 Nov 2025 09:23:31 -0800 Subject: [PATCH 49/63] libie: depend on DEBUG_FS when building LIBIE_FWLOG LIBIE_FWLOG is unusable without DEBUG_FS. Mark it in Kconfig. Fix build error on ixgbe when DEBUG_FS is not set. To not add another layer of #if IS_ENABLED(LIBIE_FWLOG) in ixgbe fwlog code define debugfs dentry even when DEBUG_FS isn't enabled. In this case the dummy functions of LIBIE_FWLOG will be used, so not initialized dentry isn't a problem. Fixes: 641585bc978e ("ixgbe: fwlog support for e610") Reported-by: Guenter Roeck Closes: https://lore.kernel.org/lkml/f594c621-f9e1-49f2-af31-23fbcb176058@roeck-us.net/ Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20251104172333.752445-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/Kconfig | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 -- include/linux/net/intel/libie/fwlog.h | 12 ++++++++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index a563a94e2780..122ee23497e6 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -146,7 +146,7 @@ config IXGBE tristate "Intel(R) 10GbE PCI Express adapters support" depends on PCI depends on PTP_1588_CLOCK_OPTIONAL - select LIBIE_FWLOG + select LIBIE_FWLOG if DEBUG_FS select MDIO select NET_DEVLINK select PLDMFW @@ -298,7 +298,7 @@ config ICE select DIMLIB select LIBIE select LIBIE_ADMINQ - select LIBIE_FWLOG + select LIBIE_FWLOG if DEBUG_FS select NET_DEVLINK select PACKING select PLDMFW diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 14d275270123..dce4936708eb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -821,9 +821,7 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_HWMON struct hwmon_buff *ixgbe_hwmon_buff; #endif /* CONFIG_IXGBE_HWMON */ -#ifdef CONFIG_DEBUG_FS struct dentry *ixgbe_dbg_adapter; -#endif /*CONFIG_DEBUG_FS*/ u8 default_up; /* Bitmask indicating in use pools */ diff --git a/include/linux/net/intel/libie/fwlog.h b/include/linux/net/intel/libie/fwlog.h index 36b13fabca9e..7273c78c826b 100644 --- a/include/linux/net/intel/libie/fwlog.h +++ b/include/linux/net/intel/libie/fwlog.h @@ -78,8 +78,20 @@ struct libie_fwlog { ); }; +#if IS_ENABLED(CONFIG_LIBIE_FWLOG) int libie_fwlog_init(struct libie_fwlog *fwlog, struct libie_fwlog_api *api); void libie_fwlog_deinit(struct libie_fwlog *fwlog); void libie_fwlog_reregister(struct libie_fwlog *fwlog); void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, u16 len); +#else +static inline int libie_fwlog_init(struct libie_fwlog *fwlog, + struct libie_fwlog_api *api) +{ + return -EOPNOTSUPP; +} +static inline void libie_fwlog_deinit(struct libie_fwlog *fwlog) { } +static inline void libie_fwlog_reregister(struct libie_fwlog *fwlog) { } +static inline void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, + u16 len) { } +#endif /* CONFIG_LIBIE_FWLOG */ #endif /* _LIBIE_FWLOG_H_ */ From d917c217b612971ea05ae1582e8740b747e0e7e8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 4 Nov 2025 16:34:35 +0100 Subject: [PATCH 50/63] net: gro_cells: Reduce lock scope in gro_cell_poll One GRO-cell device's NAPI callback can nest into the GRO-cell of another device if the underlying device is also using GRO-cell. This is the case for IPsec over vxlan. These two GRO-cells are separate devices. From lockdep's point of view it is the same because each device is sharing the same lock class and so it reports a possible deadlock assuming one device is nesting into itself. Hold the bh_lock only while accessing gro_cell::napi_skbs in gro_cell_poll(). This reduces the locking scope and avoids acquiring the same lock class multiple times. Fixes: 25718fdcbdd2 ("net: gro_cells: Use nested-BH locking for gro_cell") Reported-by: Gal Pressman Closes: https://lore.kernel.org/all/66664116-edb8-48dc-ad72-d5223696dd19@nvidia.com/ Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20251104153435.ty88xDQt@linutronix.de Signed-off-by: Jakub Kicinski --- net/core/gro_cells.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index fd57b845de33..a725d21159a6 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -60,9 +60,10 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) struct sk_buff *skb; int work_done = 0; - __local_lock_nested_bh(&cell->bh_lock); while (work_done < budget) { + __local_lock_nested_bh(&cell->bh_lock); skb = __skb_dequeue(&cell->napi_skbs); + __local_unlock_nested_bh(&cell->bh_lock); if (!skb) break; napi_gro_receive(napi, skb); @@ -71,7 +72,6 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) if (work_done < budget) napi_complete_done(napi, work_done); - __local_unlock_nested_bh(&cell->bh_lock); return work_done; } From d1c94bc5b90c21b65469d30d4a6bc8ed715c1bfe Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 4 Nov 2025 16:15:36 +0200 Subject: [PATCH 51/63] net/mlx5e: Fix return value in case of module EEPROM read error mlx5e_get_module_eeprom_by_page() has weird error handling. First, it is treating -EINVAL as a special case, but it is unclear why. Second, it tries to fail "gracefully" by returning the number of bytes read even in case of an error. This results in wrongly returning success (0 return value) if the error occurs before any bytes were read. Simplify the error handling by returning an error when such occurs. This also aligns with the error handling we have in mlx5e_get_module_eeprom() for the old API. This fixes the following case where the query fails, but userspace ethtool wrongly treats it as success and dumps an output: # ethtool -m eth2 netlink warning: mlx5_core: Query module eeprom by page failed, read 0 bytes, err -5 netlink warning: mlx5_core: Query module eeprom by page failed, read 0 bytes, err -5 Offset Values ------ ------ 0x0000: 00 00 00 00 05 00 04 00 00 00 00 00 05 00 05 00 0x0010: 00 00 00 00 05 00 06 00 50 00 00 00 67 65 20 66 0x0020: 61 69 6c 65 64 2c 20 72 65 61 64 20 30 20 62 79 0x0030: 74 65 73 2c 20 65 72 72 20 2d 35 00 14 00 03 00 0x0040: 08 00 01 00 03 00 00 00 08 00 02 00 1a 00 00 00 0x0050: 14 00 04 00 08 00 01 00 04 00 00 00 08 00 02 00 0x0060: 0e 00 00 00 14 00 05 00 08 00 01 00 05 00 00 00 0x0070: 08 00 02 00 1a 00 00 00 14 00 06 00 08 00 01 00 Fixes: e109d2b204da ("net/mlx5: Implement get_module_eeprom_by_page()") Signed-off-by: Gal Pressman Reviewed-by: Alex Lazar Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1762265736-1028868-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 53e5ae252eac..893e1380a7c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -2125,14 +2125,12 @@ static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev, if (!size_read) return i; - if (size_read == -EINVAL) - return -EINVAL; if (size_read < 0) { NL_SET_ERR_MSG_FMT_MOD( extack, "Query module eeprom by page failed, read %u bytes, err %d", i, size_read); - return i; + return size_read; } i += size_read; From ae4789affd1e181ae46e72e2b5fbe2d6d7b6616a Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 4 Nov 2025 16:14:15 +0530 Subject: [PATCH 52/63] net: ti: icssg-prueth: Fix fdb hash size configuration The ICSSG driver does the initial FDB configuration which includes setting the control registers. Other run time management like learning is managed by the PRU's. The default FDB hash size used by the firmware is 512 slots, which is currently missing in the current driver. Update the driver FDB config to include FDB hash size as well. Please refer trm [1] 6.4.14.12.17 section on how the FDB config register gets configured. From the table 6-1404, there is a reset field for FDB_HAS_SIZE which is 4, meaning 1024 slots. Currently the driver is not updating this reset value from 4(1024 slots) to 3(512 slots). This patch fixes this by updating the reset value to 512 slots. [1]: https://www.ti.com/lit/pdf/spruim2 Fixes: abd5576b9c57f ("net: ti: icssg-prueth: Add support for ICSSG switch firmware") Signed-off-by: Meghana Malladi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251104104415.3110537-1-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_config.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c index da53eb04b0a4..3f8237c17d09 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_config.c +++ b/drivers/net/ethernet/ti/icssg/icssg_config.c @@ -66,6 +66,9 @@ #define FDB_GEN_CFG1 0x60 #define SMEM_VLAN_OFFSET 8 #define SMEM_VLAN_OFFSET_MASK GENMASK(25, 8) +#define FDB_HASH_SIZE_MASK GENMASK(6, 3) +#define FDB_HASH_SIZE_SHIFT 3 +#define FDB_HASH_SIZE 3 #define FDB_GEN_CFG2 0x64 #define FDB_VLAN_EN BIT(6) @@ -463,6 +466,8 @@ void icssg_init_emac_mode(struct prueth *prueth) /* Set VLAN TABLE address base */ regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, SMEM_VLAN_OFFSET_MASK, addr << SMEM_VLAN_OFFSET); + regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, FDB_HASH_SIZE_MASK, + FDB_HASH_SIZE << FDB_HASH_SIZE_SHIFT); /* Set enable VLAN aware mode, and FDBs for all PRUs */ regmap_write(prueth->miig_rt, FDB_GEN_CFG2, (FDB_PRU0_EN | FDB_PRU1_EN | FDB_HOST_EN)); prueth->vlan_tbl = (struct prueth_vlan_tbl __force *)(prueth->shram.va + @@ -484,6 +489,8 @@ void icssg_init_fw_offload_mode(struct prueth *prueth) /* Set VLAN TABLE address base */ regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, SMEM_VLAN_OFFSET_MASK, addr << SMEM_VLAN_OFFSET); + regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, FDB_HASH_SIZE_MASK, + FDB_HASH_SIZE << FDB_HASH_SIZE_SHIFT); /* Set enable VLAN aware mode, and FDBs for all PRUs */ regmap_write(prueth->miig_rt, FDB_GEN_CFG2, FDB_EN_ALL); prueth->vlan_tbl = (struct prueth_vlan_tbl __force *)(prueth->shram.va + From 665a7e13c220bbde55531a24bd5524320648df10 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 4 Nov 2025 08:48:33 +0200 Subject: [PATCH 53/63] net/mlx5e: SHAMPO, Fix header mapping for 64K pages HW-GRO is broken on mlx5 for 64K page sizes. The patch in the fixes tag didn't take into account larger page sizes when doing an align down of max_ksm_entries. For 64K page size, max_ksm_entries is 0 which will skip mapping header pages via WQE UMR. This breaks header-data split and will result in the following syndrome: mlx5_core 0000:00:08.0 eth2: Error cqe on cqn 0x4c9, ci 0x0, qn 0x1133, opcode 0xe, syndrome 0x4, vendor syndrome 0x32 00000000: 00 00 00 00 04 4a 00 00 00 00 00 00 20 00 93 32 00000010: 55 00 00 00 fb cc 00 00 00 00 00 00 07 18 00 00 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 4a 00000030: 00 00 3b c7 93 01 32 04 00 00 00 00 00 00 bf e0 mlx5_core 0000:00:08.0 eth2: ERR CQE on RQ: 0x1133 Furthermore, the function that fills in WQE UMRs for the headers (mlx5e_build_shampo_hd_umr()) only supports mapping page sizes that fit in a single UMR WQE. This patch goes back to the old non-aligned max_ksm_entries value and it changes mlx5e_build_shampo_hd_umr() to support mapping a large page over multiple UMR WQEs. This means that mlx5e_build_shampo_hd_umr() can now leave a page only partially mapped. The caller, mlx5e_alloc_rx_hd_mpwqe(), ensures that there are enough UMR WQEs to cover complete pages by working on ksm_entries that are multiples of MLX5E_SHAMPO_WQ_HEADER_PER_PAGE. Fixes: 8a0ee54027b1 ("net/mlx5e: SHAMPO, Simplify UMR allocation for headers") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1762238915-1027590-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 26621a2972ec..0c031954ca30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -671,7 +671,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, u16 pi, header_offset, err, wqe_bbs; u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; struct mlx5e_umr_wqe *umr_wqe; - int headroom, i = 0; + int headroom, i; headroom = rq->buff.headroom; wqe_bbs = MLX5E_KSM_UMR_WQEBBS(ksm_entries); @@ -679,25 +679,24 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); build_ksm_umr(sq, umr_wqe, shampo->mkey_be, index, ksm_entries); - WARN_ON_ONCE(ksm_entries & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)); - while (i < ksm_entries) { - struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); + for (i = 0; i < ksm_entries; i++, index++) { + struct mlx5e_frag_page *frag_page; u64 addr; - err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page); - if (unlikely(err)) - goto err_unmap; + frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); + header_offset = mlx5e_shampo_hd_offset(index); + if (!header_offset) { + err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, + frag_page); + if (err) + goto err_unmap; + } addr = page_pool_get_dma_addr_netmem(frag_page->netmem); - - for (int j = 0; j < MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; j++) { - header_offset = mlx5e_shampo_hd_offset(index++); - - umr_wqe->inline_ksms[i++] = (struct mlx5_ksm) { - .key = cpu_to_be32(lkey), - .va = cpu_to_be64(addr + header_offset + headroom), - }; - } + umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(lkey), + .va = cpu_to_be64(addr + header_offset + headroom), + }; } sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { @@ -713,7 +712,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, return 0; err_unmap: - while (--i) { + while (--i >= 0) { --index; header_offset = mlx5e_shampo_hd_offset(index); if (!header_offset) { @@ -735,8 +734,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) struct mlx5e_icosq *sq = rq->icosq; int i, err, max_ksm_entries, len; - max_ksm_entries = ALIGN_DOWN(MLX5E_MAX_KSM_PER_WQE(rq->mdev), - MLX5E_SHAMPO_WQ_HEADER_PER_PAGE); + max_ksm_entries = MLX5E_MAX_KSM_PER_WQE(rq->mdev); ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); From bacd8d80181ebe34b599a39aa26bf73a44c91e55 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 4 Nov 2025 08:48:34 +0200 Subject: [PATCH 54/63] net/mlx5e: SHAMPO, Fix skb size check for 64K pages mlx5e_hw_gro_skb_has_enough_space() uses a formula to check if there is enough space in the skb frags to store more data. This formula is incorrect for 64K page sizes and it triggers early GRO session termination because the first fragment will blow up beyond GRO_LEGACY_MAX_SIZE. This patch adds a special case for page sizes >= GRO_LEGACY_MAX_SIZE (64K) which uses the skb->len instead. Within this context, the check is safe from fragment overflow because the hardware will continuously fill the data up to the reservation size of 64K and the driver will coalesce all data from the same page to the same fragment. This means that the data will span one fragment or at most two for such a large page size. It is expected that the if statement will be optimized out as the check is done with constants. Fixes: 92552d3abd32 ("net/mlx5e: HW_GRO cqe handler implementation") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1762238915-1027590-3-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0c031954ca30..f2a06752ce37 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -2354,7 +2354,10 @@ mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt) { int nr_frags = skb_shinfo(skb)->nr_frags; - return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; + if (PAGE_SIZE >= GRO_LEGACY_MAX_SIZE) + return skb->len + data_bcnt <= GRO_LEGACY_MAX_SIZE; + else + return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; } static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) From d8a7ed9586c7579a99e9e2d90988c9eceeee61ff Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 4 Nov 2025 08:48:35 +0200 Subject: [PATCH 55/63] net/mlx5e: SHAMPO, Fix header formulas for higher MTUs and 64K pages The MLX5E_SHAMPO_WQ_HEADER_PER_PAGE and MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE macros are used directly in several places under the assumption that there will always be more headers per WQE than headers per page. However, this assumption doesn't hold for 64K page sizes and higher MTUs (> 4K). This can be first observed during header page allocation: ksm_entries will become 0 during alignment to MLX5E_SHAMPO_WQ_HEADER_PER_PAGE. This patch introduces 2 additional members to the mlx5e_shampo_hd struct which are meant to be used instead of the macrose mentioned above. When the number of headers per WQE goes below MLX5E_SHAMPO_WQ_HEADER_PER_PAGE, clamp the number of headers per page and expand the header size accordingly so that the headers for one WQE cover a full page. All the formulas are adapted to use these two new members. Fixes: 945ca432bfd0 ("net/mlx5e: SHAMPO, Drop info array") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1762238915-1027590-4-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 ++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++++++++++--- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 33 +++++++++++-------- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 14e3207b14e7..a163f81f07c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -634,7 +634,10 @@ struct mlx5e_dma_info { struct mlx5e_shampo_hd { struct mlx5e_frag_page *pages; u32 hd_per_wq; + u32 hd_per_page; u16 hd_per_wqe; + u8 log_hd_per_page; + u8 log_hd_entry_size; unsigned long *bitmap; u16 pi; u16 ci; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9c46511e7b43..6023bbbf3f39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -791,8 +791,9 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, int node) { void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq); + u8 log_hd_per_page, log_hd_entry_size; + u16 hd_per_wq, hd_per_wqe; u32 hd_pool_size; - u16 hd_per_wq; int wq_size; int err; @@ -815,11 +816,24 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, if (err) goto err_umr_mkey; - rq->mpwqe.shampo->hd_per_wqe = - mlx5e_shampo_hd_per_wqe(mdev, params, rqp); + hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rqp); wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); - hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) / - MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; + + BUILD_BUG_ON(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE > PAGE_SHIFT); + if (hd_per_wqe >= MLX5E_SHAMPO_WQ_HEADER_PER_PAGE) { + log_hd_per_page = MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE; + log_hd_entry_size = MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; + } else { + log_hd_per_page = order_base_2(hd_per_wqe); + log_hd_entry_size = order_base_2(PAGE_SIZE / hd_per_wqe); + } + + rq->mpwqe.shampo->hd_per_wqe = hd_per_wqe; + rq->mpwqe.shampo->hd_per_page = BIT(log_hd_per_page); + rq->mpwqe.shampo->log_hd_per_page = log_hd_per_page; + rq->mpwqe.shampo->log_hd_entry_size = log_hd_entry_size; + + hd_pool_size = (hd_per_wqe * wq_size) >> log_hd_per_page; if (netif_rxq_has_unreadable_mp(rq->netdev, rq->ix)) { /* Separate page pool for shampo headers */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f2a06752ce37..687cf123211d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -648,17 +648,20 @@ static void build_ksm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, umr_wqe->hdr.uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } -static struct mlx5e_frag_page *mlx5e_shampo_hd_to_frag_page(struct mlx5e_rq *rq, int header_index) +static struct mlx5e_frag_page *mlx5e_shampo_hd_to_frag_page(struct mlx5e_rq *rq, + int header_index) { - BUILD_BUG_ON(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE > PAGE_SHIFT); + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - return &rq->mpwqe.shampo->pages[header_index >> MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE]; + return &shampo->pages[header_index >> shampo->log_hd_per_page]; } -static u64 mlx5e_shampo_hd_offset(int header_index) +static u64 mlx5e_shampo_hd_offset(struct mlx5e_rq *rq, int header_index) { - return (header_index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << - MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u32 hd_per_page = shampo->hd_per_page; + + return (header_index & (hd_per_page - 1)) << shampo->log_hd_entry_size; } static void mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index); @@ -684,7 +687,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, u64 addr; frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); - header_offset = mlx5e_shampo_hd_offset(index); + header_offset = mlx5e_shampo_hd_offset(rq, index); if (!header_offset) { err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page); @@ -714,7 +717,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, err_unmap: while (--i >= 0) { --index; - header_offset = mlx5e_shampo_hd_offset(index); + header_offset = mlx5e_shampo_hd_offset(rq, index); if (!header_offset) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); @@ -738,7 +741,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); - ksm_entries = ALIGN_DOWN(ksm_entries, MLX5E_SHAMPO_WQ_HEADER_PER_PAGE); + ksm_entries = ALIGN_DOWN(ksm_entries, shampo->hd_per_page); if (!ksm_entries) return 0; @@ -856,7 +859,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { + if (((header_index + 1) & (shampo->hd_per_page - 1)) == 0) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); mlx5e_page_release_fragmented(rq->hd_page_pool, frag_page); @@ -1223,9 +1226,10 @@ static unsigned int mlx5e_lro_update_hdr(struct sk_buff *skb, static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - u16 head_offset = mlx5e_shampo_hd_offset(header_index) + rq->buff.headroom; + u16 head_offset = mlx5e_shampo_hd_offset(rq, header_index); + void *addr = netmem_address(frag_page->netmem); - return netmem_address(frag_page->netmem) + head_offset; + return addr + head_offset + rq->buff.headroom; } static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) @@ -2265,7 +2269,8 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 header_index) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - u16 head_offset = mlx5e_shampo_hd_offset(header_index); + u16 head_offset = mlx5e_shampo_hd_offset(rq, header_index); + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; u16 head_size = cqe->shampo.header_size; u16 rx_headroom = rq->buff.headroom; struct sk_buff *skb = NULL; @@ -2281,7 +2286,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, data = hdr + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); - if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) { + if (likely(frag_size <= BIT(shampo->log_hd_entry_size))) { /* build SKB around header */ dma_sync_single_range_for_cpu(rq->pdev, dma_addr, 0, frag_size, rq->buff.map_dir); net_prefetchw(hdr); From a04ea57aae375bdda1cb57034d8bcbb351e1f973 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 4 Nov 2025 14:23:21 +0800 Subject: [PATCH 56/63] net: libwx: fix device bus LAN ID The device bus LAN ID was obtained from PCI_FUNC(), but when a PF port is passthrough to a virtual machine, the function number may not match the actual port index on the device. This could cause the driver to perform operations such as LAN reset on the wrong port. Fix this by reading the LAN ID from port status register. Fixes: a34b3e6ed8fb ("net: txgbe: Store PCI info") Cc: stable@vger.kernel.org Signed-off-by: Jiawen Wu Reviewed-by: Simon Horman Link: https://patch.msgid.link/B60A670C1F52CB8E+20251104062321.40059-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 3 ++- drivers/net/ethernet/wangxun/libwx/wx_type.h | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 1e2713f0c921..b37d6cfbfbe9 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2427,7 +2427,8 @@ int wx_sw_init(struct wx *wx) wx->oem_svid = pdev->subsystem_vendor; wx->oem_ssid = pdev->subsystem_device; wx->bus.device = PCI_SLOT(pdev->devfn); - wx->bus.func = PCI_FUNC(pdev->devfn); + wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID, + rd32(wx, WX_CFG_PORT_ST)); if (wx->oem_svid == PCI_VENDOR_ID_WANGXUN || pdev->is_virtfn) { diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index d89b9b8a0a2c..2f8319e03182 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -97,6 +97,8 @@ #define WX_CFG_PORT_CTL_DRV_LOAD BIT(3) #define WX_CFG_PORT_CTL_QINQ BIT(2) #define WX_CFG_PORT_CTL_D_VLAN BIT(0) /* double vlan*/ +#define WX_CFG_PORT_ST 0x14404 +#define WX_CFG_PORT_ST_LANID GENMASK(9, 8) #define WX_CFG_TAG_TPID(_i) (0x14430 + ((_i) * 4)) #define WX_CFG_PORT_CTL_NUM_VT_MASK GENMASK(13, 12) /* number of TVs */ @@ -557,8 +559,6 @@ enum WX_MSCA_CMD_value { #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), WX_MAX_DATA_PER_TXD) #define DESC_NEEDED (MAX_SKB_FRAGS + 4) -#define WX_CFG_PORT_ST 0x14404 - /******************* Receive Descriptor bit definitions **********************/ #define WX_RXD_STAT_DD BIT(0) /* Done */ #define WX_RXD_STAT_EOP BIT(1) /* End of Packet */ From 4d6ec3a7932ca5b168426f7b5b40abab2b41d2da Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Wed, 5 Nov 2025 11:47:16 +0800 Subject: [PATCH 57/63] net: wan: framer: pef2256: Switch to devm_mfd_add_devices() The driver calls mfd_add_devices() but fails to call mfd_remove_devices() in error paths after successful MFD device registration and in the remove function. This leads to resource leaks where MFD child devices are not properly unregistered. Replace mfd_add_devices with devm_mfd_add_devices to automatically manage the device resources. Fixes: c96e976d9a05 ("net: wan: framer: Add support for the Lantiq PEF2256 framer") Suggested-by: Herve Codina Signed-off-by: Haotian Zhang Acked-by: Herve Codina Link: https://patch.msgid.link/20251105034716.662-1-vulab@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/wan/framer/pef2256/pef2256.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wan/framer/pef2256/pef2256.c b/drivers/net/wan/framer/pef2256/pef2256.c index c5501826db1e..c058cc79137d 100644 --- a/drivers/net/wan/framer/pef2256/pef2256.c +++ b/drivers/net/wan/framer/pef2256/pef2256.c @@ -648,7 +648,8 @@ static int pef2256_add_audio_devices(struct pef2256 *pef2256) audio_devs[i].id = i; } - ret = mfd_add_devices(pef2256->dev, 0, audio_devs, count, NULL, 0, NULL); + ret = devm_mfd_add_devices(pef2256->dev, 0, audio_devs, count, + NULL, 0, NULL); kfree(audio_devs); return ret; } @@ -822,8 +823,8 @@ static int pef2256_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pef2256); - ret = mfd_add_devices(pef2256->dev, 0, pef2256_devs, - ARRAY_SIZE(pef2256_devs), NULL, 0, NULL); + ret = devm_mfd_add_devices(pef2256->dev, 0, pef2256_devs, + ARRAY_SIZE(pef2256_devs), NULL, 0, NULL); if (ret) { dev_err(pef2256->dev, "add devices failed (%d)\n", ret); return ret; From 96baf482ca1f69f0da9d10a5bd8422c87ea9039e Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Tue, 4 Nov 2025 19:37:41 -0800 Subject: [PATCH 58/63] net: dsa: microchip: Fix reserved multicast address table programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KSZ9477/KSZ9897 and LAN937X families of switches use a reserved multicast address table for some specific forwarding with some multicast addresses, like the one used in STP. The hardware assumes the host port is the last port in KSZ9897 family and port 5 in LAN937X family. Most of the time this assumption is correct but not in other cases like KSZ9477. Originally the function just setups the first entry, but the others still need update, especially for one common multicast address that is used by PTP operation. LAN937x also uses different register bits when accessing the reserved table. Fixes: 457c182af597 ("net: dsa: microchip: generic access to ksz9477 static and reserved table") Signed-off-by: Tristram Ha Tested-by: Łukasz Majewski Link: https://patch.msgid.link/20251105033741.6455-1-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz9477.c | 98 +++++++++++++++++++++---- drivers/net/dsa/microchip/ksz9477_reg.h | 3 +- drivers/net/dsa/microchip/ksz_common.c | 4 + drivers/net/dsa/microchip/ksz_common.h | 2 + 4 files changed, 91 insertions(+), 16 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index d747ea1c41a7..5df8f153d511 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1355,9 +1355,15 @@ void ksz9477_config_cpu_port(struct dsa_switch *ds) } } +#define RESV_MCAST_CNT 8 + +static u8 reserved_mcast_map[RESV_MCAST_CNT] = { 0, 1, 3, 16, 32, 33, 2, 17 }; + int ksz9477_enable_stp_addr(struct ksz_device *dev) { + u8 i, ports, update; const u32 *masks; + bool override; u32 data; int ret; @@ -1366,23 +1372,87 @@ int ksz9477_enable_stp_addr(struct ksz_device *dev) /* Enable Reserved multicast table */ ksz_cfg(dev, REG_SW_LUE_CTRL_0, SW_RESV_MCAST_ENABLE, true); - /* Set the Override bit for forwarding BPDU packet to CPU */ - ret = ksz_write32(dev, REG_SW_ALU_VAL_B, - ALU_V_OVERRIDE | BIT(dev->cpu_port)); - if (ret < 0) - return ret; + /* The reserved multicast address table has 8 entries. Each entry has + * a default value of which port to forward. It is assumed the host + * port is the last port in most of the switches, but that is not the + * case for KSZ9477 or maybe KSZ9897. For LAN937X family the default + * port is port 5, the first RGMII port. It is okay for LAN9370, a + * 5-port switch, but may not be correct for the other 8-port + * versions. It is necessary to update the whole table to forward to + * the right ports. + * Furthermore PTP messages can use a reserved multicast address and + * the host will not receive them if this table is not correct. + */ + for (i = 0; i < RESV_MCAST_CNT; i++) { + data = reserved_mcast_map[i] << + dev->info->shifts[ALU_STAT_INDEX]; + data |= ALU_STAT_START | + masks[ALU_STAT_DIRECT] | + masks[ALU_RESV_MCAST_ADDR] | + masks[ALU_STAT_READ]; + ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); + if (ret < 0) + return ret; - data = ALU_STAT_START | ALU_RESV_MCAST_ADDR | masks[ALU_STAT_WRITE]; + /* wait to be finished */ + ret = ksz9477_wait_alu_sta_ready(dev); + if (ret < 0) + return ret; - ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); - if (ret < 0) - return ret; + ret = ksz_read32(dev, REG_SW_ALU_VAL_B, &data); + if (ret < 0) + return ret; - /* wait to be finished */ - ret = ksz9477_wait_alu_sta_ready(dev); - if (ret < 0) { - dev_err(dev->dev, "Failed to update Reserved Multicast table\n"); - return ret; + override = false; + ports = data & dev->port_mask; + switch (i) { + case 0: + case 6: + /* Change the host port. */ + update = BIT(dev->cpu_port); + override = true; + break; + case 2: + /* Change the host port. */ + update = BIT(dev->cpu_port); + break; + case 4: + case 5: + case 7: + /* Skip the host port. */ + update = dev->port_mask & ~BIT(dev->cpu_port); + break; + default: + update = ports; + break; + } + if (update != ports || override) { + data &= ~dev->port_mask; + data |= update; + /* Set Override bit to receive frame even when port is + * closed. + */ + if (override) + data |= ALU_V_OVERRIDE; + ret = ksz_write32(dev, REG_SW_ALU_VAL_B, data); + if (ret < 0) + return ret; + + data = reserved_mcast_map[i] << + dev->info->shifts[ALU_STAT_INDEX]; + data |= ALU_STAT_START | + masks[ALU_STAT_DIRECT] | + masks[ALU_RESV_MCAST_ADDR] | + masks[ALU_STAT_WRITE]; + ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); + if (ret < 0) + return ret; + + /* wait to be finished */ + ret = ksz9477_wait_alu_sta_ready(dev); + if (ret < 0) + return ret; + } } return 0; diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index ff579920078e..61ea11e3338e 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -2,7 +2,7 @@ /* * Microchip KSZ9477 register definitions * - * Copyright (C) 2017-2024 Microchip Technology Inc. + * Copyright (C) 2017-2025 Microchip Technology Inc. */ #ifndef __KSZ9477_REGS_H @@ -397,7 +397,6 @@ #define ALU_RESV_MCAST_INDEX_M (BIT(6) - 1) #define ALU_STAT_START BIT(7) -#define ALU_RESV_MCAST_ADDR BIT(1) #define REG_SW_ALU_VAL_A 0x0420 diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index a962055bfdbd..933ae8dc6337 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -808,6 +808,8 @@ static const u16 ksz9477_regs[] = { static const u32 ksz9477_masks[] = { [ALU_STAT_WRITE] = 0, [ALU_STAT_READ] = 1, + [ALU_STAT_DIRECT] = 0, + [ALU_RESV_MCAST_ADDR] = BIT(1), [P_MII_TX_FLOW_CTRL] = BIT(5), [P_MII_RX_FLOW_CTRL] = BIT(3), }; @@ -835,6 +837,8 @@ static const u8 ksz9477_xmii_ctrl1[] = { static const u32 lan937x_masks[] = { [ALU_STAT_WRITE] = 1, [ALU_STAT_READ] = 2, + [ALU_STAT_DIRECT] = BIT(3), + [ALU_RESV_MCAST_ADDR] = BIT(2), [P_MII_TX_FLOW_CTRL] = BIT(5), [P_MII_RX_FLOW_CTRL] = BIT(3), }; diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index a1eb39771bb9..c65188cd3c0a 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -294,6 +294,8 @@ enum ksz_masks { DYNAMIC_MAC_TABLE_TIMESTAMP, ALU_STAT_WRITE, ALU_STAT_READ, + ALU_STAT_DIRECT, + ALU_RESV_MCAST_ADDR, P_MII_TX_FLOW_CTRL, P_MII_RX_FLOW_CTRL, }; From 067bf016e99ad72aa4ff869d6dec1fd62a9c6202 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 5 Nov 2025 07:26:20 +0000 Subject: [PATCH 59/63] bonding: fix NULL pointer dereference in actor_port_prio setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Liang reported an issue where setting a slave’s actor_port_prio to predefined values such as 0, 255, or 65535 would cause a system crash. The problem occurs because in bond_opt_parse(), when the provided value matches a predefined table entry, the function returns that table entry, which does not contain slave information. Later, in bond_option_actor_port_prio_set(), calling bond_slave_get_rtnl() leads to a NULL pointer dereference. Since actor_port_prio is defined as a u16 and initialized to the default value of 255 in ad_initialize_port(), there is no need for the bond_actor_port_prio_tbl. Using the BOND_OPTFLAG_RAWVAL flag is sufficient. Fixes: 6b6dc81ee7e8 ("bonding: add support for per-port LACP actor priority") Reported-by: Liang Li Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20251105072620.164841-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_options.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 495a87f2ea7c..384499c869b8 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -225,13 +225,6 @@ static const struct bond_opt_value bond_ad_actor_sys_prio_tbl[] = { { NULL, -1, 0}, }; -static const struct bond_opt_value bond_actor_port_prio_tbl[] = { - { "minval", 0, BOND_VALFLAG_MIN}, - { "maxval", 65535, BOND_VALFLAG_MAX}, - { "default", 255, BOND_VALFLAG_DEFAULT}, - { NULL, -1, 0}, -}; - static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, { "maxval", 1023, BOND_VALFLAG_MAX}, @@ -497,7 +490,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .id = BOND_OPT_ACTOR_PORT_PRIO, .name = "actor_port_prio", .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), - .values = bond_actor_port_prio_tbl, + .flags = BOND_OPTFLAG_RAWVAL, .set = bond_option_actor_port_prio_set, }, [BOND_OPT_AD_ACTOR_SYSTEM] = { From 0216721ce71252f60d89af49c8dff613358058d3 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 5 Nov 2025 08:49:55 +0100 Subject: [PATCH 60/63] lan966x: Fix sleeping in atomic context The following warning was seen when we try to connect using ssh to the device. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:575 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 104, name: dropbear preempt_count: 1, expected: 0 INFO: lockdep is turned off. CPU: 0 UID: 0 PID: 104 Comm: dropbear Tainted: G W 6.18.0-rc2-00399-g6f1ab1b109b9-dirty #530 NONE Tainted: [W]=WARN Hardware name: Generic DT based system Call trace: unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x7c/0xac dump_stack_lvl from __might_resched+0x16c/0x2b0 __might_resched from __mutex_lock+0x64/0xd34 __mutex_lock from mutex_lock_nested+0x1c/0x24 mutex_lock_nested from lan966x_stats_get+0x5c/0x558 lan966x_stats_get from dev_get_stats+0x40/0x43c dev_get_stats from dev_seq_printf_stats+0x3c/0x184 dev_seq_printf_stats from dev_seq_show+0x10/0x30 dev_seq_show from seq_read_iter+0x350/0x4ec seq_read_iter from seq_read+0xfc/0x194 seq_read from proc_reg_read+0xac/0x100 proc_reg_read from vfs_read+0xb0/0x2b0 vfs_read from ksys_read+0x6c/0xec ksys_read from ret_fast_syscall+0x0/0x1c Exception stack(0xf0b11fa8 to 0xf0b11ff0) 1fa0: 00000001 00001000 00000008 be9048d8 00001000 00000001 1fc0: 00000001 00001000 00000008 00000003 be905920 0000001e 00000000 00000001 1fe0: 0005404c be9048c0 00018684 b6ec2cd8 It seems that we are using a mutex in a atomic context which is wrong. Change the mutex with a spinlock. Fixes: 12c2d0a5b8e2 ("net: lan966x: add ethtool configuration and statistics") Signed-off-by: Horatiu Vultur Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251105074955.1766792-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- .../microchip/lan966x/lan966x_ethtool.c | 18 +++++++++--------- .../ethernet/microchip/lan966x/lan966x_main.c | 2 -- .../ethernet/microchip/lan966x/lan966x_main.h | 4 ++-- .../microchip/lan966x/lan966x_vcap_impl.c | 8 ++++---- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c index 2474dfd330f4..fe4e61405284 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c @@ -294,7 +294,7 @@ static void lan966x_stats_update(struct lan966x *lan966x) { int i, j; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); for (i = 0; i < lan966x->num_phys_ports; i++) { uint idx = i * lan966x->num_stats; @@ -310,7 +310,7 @@ static void lan966x_stats_update(struct lan966x *lan966x) } } - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } static int lan966x_get_sset_count(struct net_device *dev, int sset) @@ -365,7 +365,7 @@ static void lan966x_get_eth_mac_stats(struct net_device *dev, idx = port->chip_port * lan966x->num_stats; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); mac_stats->FramesTransmittedOK = lan966x->stats[idx + SYS_COUNT_TX_UC] + @@ -416,7 +416,7 @@ static void lan966x_get_eth_mac_stats(struct net_device *dev, lan966x->stats[idx + SYS_COUNT_RX_LONG] + lan966x->stats[idx + SYS_COUNT_RX_PMAC_LONG]; - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } static const struct ethtool_rmon_hist_range lan966x_rmon_ranges[] = { @@ -442,7 +442,7 @@ static void lan966x_get_eth_rmon_stats(struct net_device *dev, idx = port->chip_port * lan966x->num_stats; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); rmon_stats->undersize_pkts = lan966x->stats[idx + SYS_COUNT_RX_SHORT] + @@ -500,7 +500,7 @@ static void lan966x_get_eth_rmon_stats(struct net_device *dev, lan966x->stats[idx + SYS_COUNT_TX_SZ_1024_1526] + lan966x->stats[idx + SYS_COUNT_TX_PMAC_SZ_1024_1526]; - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); *ranges = lan966x_rmon_ranges; } @@ -603,7 +603,7 @@ void lan966x_stats_get(struct net_device *dev, idx = port->chip_port * lan966x->num_stats; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); stats->rx_bytes = lan966x->stats[idx + SYS_COUNT_RX_OCT] + lan966x->stats[idx + SYS_COUNT_RX_PMAC_OCT]; @@ -685,7 +685,7 @@ void lan966x_stats_get(struct net_device *dev, stats->collisions = lan966x->stats[idx + SYS_COUNT_TX_COL]; - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } int lan966x_stats_init(struct lan966x *lan966x) @@ -701,7 +701,7 @@ int lan966x_stats_init(struct lan966x *lan966x) return -ENOMEM; /* Init stats worker */ - mutex_init(&lan966x->stats_lock); + spin_lock_init(&lan966x->stats_lock); snprintf(queue_name, sizeof(queue_name), "%s-stats", dev_name(lan966x->dev)); lan966x->stats_queue = create_singlethread_workqueue(queue_name); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 7001584f1b7a..47752d3fde0b 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -1261,7 +1261,6 @@ static int lan966x_probe(struct platform_device *pdev) cancel_delayed_work_sync(&lan966x->stats_work); destroy_workqueue(lan966x->stats_queue); - mutex_destroy(&lan966x->stats_lock); debugfs_remove_recursive(lan966x->debugfs_root); @@ -1279,7 +1278,6 @@ static void lan966x_remove(struct platform_device *pdev) cancel_delayed_work_sync(&lan966x->stats_work); destroy_workqueue(lan966x->stats_queue); - mutex_destroy(&lan966x->stats_lock); lan966x_mac_purge_entries(lan966x); lan966x_mdb_deinit(lan966x); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 4f75f0688369..eea286c29474 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -295,8 +295,8 @@ struct lan966x { const struct lan966x_stat_layout *stats_layout; u32 num_stats; - /* workqueue for reading stats */ - struct mutex stats_lock; + /* lock for reading stats */ + spinlock_t stats_lock; u64 *stats; struct delayed_work stats_work; struct workqueue_struct *stats_queue; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c index a1471e38d118..2a37fc1ba4bc 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c @@ -403,11 +403,11 @@ static void lan966x_es0_read_esdx_counter(struct lan966x *lan966x, u32 counter; id = id & 0xff; /* counter limit */ - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG); counter = lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)) + lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS)); - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); if (counter) admin->cache.counter = counter; } @@ -417,14 +417,14 @@ static void lan966x_es0_write_esdx_counter(struct lan966x *lan966x, { id = id & 0xff; /* counter limit */ - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG); lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_BYTES)); lan_wr(admin->cache.counter, lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)); lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_BYTES)); lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS)); - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } static void lan966x_vcap_cache_write(struct net_device *dev, From 8dca36978aa80bab9d4da130c211db75c9e00048 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 5 Nov 2025 13:19:18 +0200 Subject: [PATCH 61/63] net: bridge: fix use-after-free due to MST port state bypass syzbot reported[1] a use-after-free when deleting an expired fdb. It is due to a race condition between learning still happening and a port being deleted, after all its fdbs have been flushed. The port's state has been toggled to disabled so no learning should happen at that time, but if we have MST enabled, it will bypass the port's state, that together with VLAN filtering disabled can lead to fdb learning at a time when it shouldn't happen while the port is being deleted. VLAN filtering must be disabled because we flush the port VLANs when it's being deleted which will stop learning. This fix adds a check for the port's vlan group which is initialized to NULL when the port is getting deleted, that avoids the port state bypass. When MST is enabled there would be a minimal new overhead in the fast-path because the port's vlan group pointer is cache-hot. [1] https://syzkaller.appspot.com/bug?extid=dd280197f0f7ab3917be Fixes: ec7328b59176 ("net: bridge: mst: Multiple Spanning Tree (MST) mode") Reported-by: syzbot+dd280197f0f7ab3917be@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/69088ffa.050a0220.29fc44.003d.GAE@google.com/ Signed-off-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20251105111919.1499702-2-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_forward.c | 2 +- net/bridge/br_input.c | 4 ++-- net/bridge/br_private.h | 8 +++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 870bdf2e082c..dea09096ad0f 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -25,7 +25,7 @@ static inline int should_deliver(const struct net_bridge_port *p, vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && - (br_mst_is_enabled(p->br) || p->state == BR_STATE_FORWARDING) && + (br_mst_is_enabled(p) || p->state == BR_STATE_FORWARDING) && br_allowed_egress(vg, skb) && nbp_switchdev_allowed_egress(p, skb) && !br_skb_isolated(p, skb); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 67b4c905e49a..777fa869c1a1 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -94,7 +94,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb br = p->br; - if (br_mst_is_enabled(br)) { + if (br_mst_is_enabled(p)) { state = BR_STATE_FORWARDING; } else { if (p->state == BR_STATE_DISABLED) { @@ -429,7 +429,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) return RX_HANDLER_PASS; forward: - if (br_mst_is_enabled(p->br)) + if (br_mst_is_enabled(p)) goto defer_stp_filtering; switch (p->state) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 16be5d250402..b571d6f61389 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1935,10 +1935,12 @@ static inline bool br_vlan_state_allowed(u8 state, bool learn_allow) /* br_mst.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING DECLARE_STATIC_KEY_FALSE(br_mst_used); -static inline bool br_mst_is_enabled(struct net_bridge *br) +static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { + /* check the port's vlan group to avoid racing with port deletion */ return static_branch_unlikely(&br_mst_used) && - br_opt_get(br, BROPT_MST_ENABLED); + br_opt_get(p->br, BROPT_MST_ENABLED) && + rcu_access_pointer(p->vlgrp); } int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, @@ -1953,7 +1955,7 @@ int br_mst_fill_info(struct sk_buff *skb, int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr, struct netlink_ext_ack *extack); #else -static inline bool br_mst_is_enabled(struct net_bridge *br) +static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { return false; } From ee87c63f9b2a418f698d79c2991347e31a7d2c27 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 5 Nov 2025 13:19:19 +0200 Subject: [PATCH 62/63] net: bridge: fix MST static key usage As Ido pointed out, the static key usage in MST is buggy and should use inc/dec instead of enable/disable because we can have multiple bridges with MST enabled which means a single bridge can disable MST for all. Use static_branch_inc/dec to avoid that. When destroying a bridge decrement the key if MST was enabled. Fixes: ec7328b59176 ("net: bridge: mst: Multiple Spanning Tree (MST) mode") Reported-by: Ido Schimmel Closes: https://lore.kernel.org/netdev/20251104120313.1306566-1-razor@blackwall.org/T/#m6888d87658f94ed1725433940f4f4ebb00b5a68b Signed-off-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20251105111919.1499702-3-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_if.c | 1 + net/bridge/br_mst.c | 10 ++++++++-- net/bridge/br_private.h | 5 +++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 98c5b9c3145f..ca3a637d7cca 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -386,6 +386,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_mst_uninit(br); br_recalculate_neigh_suppress_enabled(br); br_fdb_delete_by_port(br, NULL, 0, 1); diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 3f24b4ee49c2..43a300ae6bfa 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -22,6 +22,12 @@ bool br_mst_enabled(const struct net_device *dev) } EXPORT_SYMBOL_GPL(br_mst_enabled); +void br_mst_uninit(struct net_bridge *br) +{ + if (br_opt_get(br, BROPT_MST_ENABLED)) + static_branch_dec(&br_mst_used); +} + int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids) { const struct net_bridge_vlan_group *vg; @@ -225,9 +231,9 @@ int br_mst_set_enabled(struct net_bridge *br, bool on, return err; if (on) - static_branch_enable(&br_mst_used); + static_branch_inc(&br_mst_used); else - static_branch_disable(&br_mst_used); + static_branch_dec(&br_mst_used); br_opt_toggle(br, BROPT_MST_ENABLED, on); return 0; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b571d6f61389..7280c4e9305f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1954,6 +1954,7 @@ int br_mst_fill_info(struct sk_buff *skb, const struct net_bridge_vlan_group *vg); int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr, struct netlink_ext_ack *extack); +void br_mst_uninit(struct net_bridge *br); #else static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { @@ -1989,6 +1990,10 @@ static inline int br_mst_process(struct net_bridge_port *p, { return -EOPNOTSUPP; } + +static inline void br_mst_uninit(struct net_bridge *br) +{ +} #endif struct nf_br_ops { From 3534e03e0ec2e00908765549828a69df5ebefb91 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Wed, 5 Nov 2025 07:59:19 -0800 Subject: [PATCH 63/63] selftests/vsock: avoid false-positives when checking dmesg Sometimes VMs will have some intermittent dmesg warnings that are unrelated to vsock. Change the dmesg parsing to filter on strings containing 'vsock' to avoid false positive failures that are unrelated to vsock. The downside is that it is possible for some vsock related warnings to not contain the substring 'vsock', so those will be missed. Fixes: a4a65c6fe08b ("selftests/vsock: add initial vmtest.sh for vsock") Reviewed-by: Simon Horman Signed-off-by: Bobby Eshleman Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20251105-vsock-vmtest-dmesg-fix-v2-1-1a042a14892c@meta.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/vsock/vmtest.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index edacebfc1632..8ceeb8a7894f 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -389,9 +389,9 @@ run_test() { local rc host_oops_cnt_before=$(dmesg | grep -c -i 'Oops') - host_warn_cnt_before=$(dmesg --level=warn | wc -l) + host_warn_cnt_before=$(dmesg --level=warn | grep -c -i 'vsock') vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops') - vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l) + vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') name=$(echo "${1}" | awk '{ print $1 }') eval test_"${name}" @@ -403,7 +403,7 @@ run_test() { rc=$KSFT_FAIL fi - host_warn_cnt_after=$(dmesg --level=warn | wc -l) + host_warn_cnt_after=$(dmesg --level=warn | grep -c -i 'vsock') if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then echo "FAIL: kernel warning detected on host" | log_host "${name}" rc=$KSFT_FAIL @@ -415,7 +415,7 @@ run_test() { rc=$KSFT_FAIL fi - vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l) + vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then echo "FAIL: kernel warning detected on vm" | log_host "${name}" rc=$KSFT_FAIL