From 1e43ebcd5152b3e681a334cc6542fb21770c3a2e Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Wed, 26 Nov 2025 10:48:49 +0100 Subject: [PATCH 1/7] iavf: Implement settime64 with -EOPNOTSUPP ptp_clock_settime() assumes every ptp_clock has implemented settime64(). Stub it with -EOPNOTSUPP to prevent a NULL dereference. The fix is similar to commit 329d050bbe63 ("gve: Implement settime64 with -EOPNOTSUPP"). Fixes: d734223b2f0d ("iavf: add initial framework for registering PTP clock") Signed-off-by: Michal Schmidt Reviewed-by: Aleksandr Loktionov Reviewed-by: Tim Hostetler Link: https://patch.msgid.link/20251126094850.2842557-1-mschmidt@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf_ptp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_ptp.c b/drivers/net/ethernet/intel/iavf/iavf_ptp.c index b4d5eda2e84f..9cbd8c154031 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ptp.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ptp.c @@ -252,6 +252,12 @@ static int iavf_ptp_gettimex64(struct ptp_clock_info *info, return iavf_read_phc_indirect(adapter, ts, sts); } +static int iavf_ptp_settime64(struct ptp_clock_info *info, + const struct timespec64 *ts) +{ + return -EOPNOTSUPP; +} + /** * iavf_ptp_cache_phc_time - Cache PHC time for performing timestamp extension * @adapter: private adapter structure @@ -320,6 +326,7 @@ static int iavf_ptp_register_clock(struct iavf_adapter *adapter) KBUILD_MODNAME, dev_name(dev)); ptp_info->owner = THIS_MODULE; ptp_info->gettimex64 = iavf_ptp_gettimex64; + ptp_info->settime64 = iavf_ptp_settime64; ptp_info->do_aux_work = iavf_ptp_do_aux_work; clock = ptp_clock_register(ptp_info, dev); From 1f73a56f986005f0bc64ed23873930e2ee4f5911 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 26 Nov 2025 11:26:25 +0100 Subject: [PATCH 2/7] net: vxlan: prevent NULL deref in vxlan_xmit_one Neither sock4 nor sock6 pointers are guaranteed to be non-NULL in vxlan_xmit_one, e.g. if the iface is brought down. This can lead to the following NULL dereference: BUG: kernel NULL pointer dereference, address: 0000000000000010 Oops: Oops: 0000 [#1] SMP NOPTI RIP: 0010:vxlan_xmit_one+0xbb3/0x1580 Call Trace: vxlan_xmit+0x429/0x610 dev_hard_start_xmit+0x55/0xa0 __dev_queue_xmit+0x6d0/0x7f0 ip_finish_output2+0x24b/0x590 ip_output+0x63/0x110 Mentioned commits changed the code path in vxlan_xmit_one and as a side effect the sock4/6 pointer validity checks in vxlan(6)_get_route were lost. Fix this by adding back checks. Since both commits being fixed were released in the same version (v6.7) and are strongly related, bundle the fixes in a single commit. Reported-by: Liang Li Fixes: 6f19b2c136d9 ("vxlan: use generic function for tunnel IPv4 route lookup") Fixes: 2aceb896ee18 ("vxlan: use generic function for tunnel IPv6 route lookup") Cc: Beniamino Galvani Signed-off-by: Antoine Tenart Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20251126102627.74223-1-atenart@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index a5c55e7e4d79..e957aa12a8a4 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2349,7 +2349,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, int addr_family; __u8 tos, ttl; int ifindex; - int err; + int err = 0; u32 flags = vxlan->cfg.flags; bool use_cache; bool udp_sum = false; @@ -2454,12 +2454,18 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, rcu_read_lock(); if (addr_family == AF_INET) { - struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); + struct vxlan_sock *sock4; u16 ipcb_flags = 0; struct rtable *rt; __be16 df = 0; __be32 saddr; + sock4 = rcu_dereference(vxlan->vn4_sock); + if (unlikely(!sock4)) { + reason = SKB_DROP_REASON_DEV_READY; + goto tx_error; + } + if (!ifindex) ifindex = sock4->sock->sk->sk_bound_dev_if; @@ -2534,10 +2540,16 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ipcb_flags); #if IS_ENABLED(CONFIG_IPV6) } else { - struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); + struct vxlan_sock *sock6; struct in6_addr saddr; u16 ip6cb_flags = 0; + sock6 = rcu_dereference(vxlan->vn6_sock); + if (unlikely(!sock6)) { + reason = SKB_DROP_REASON_DEV_READY; + goto tx_error; + } + if (!ifindex) ifindex = sock6->sock->sk->sk_bound_dev_if; From a6c121a2432eee2c4ebceb1483ccd4a50a52983d Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Thu, 27 Nov 2025 12:44:35 +0100 Subject: [PATCH 3/7] net: phy: aquantia: check for NVMEM deferral Currently, if NVMEM provider is probed later than Aquantia, loading the firmware will fail with -EINVAL. To fix this, simply check for -EPROBE_DEFER when NVMEM is attempted and return it. Fixes: e93984ebc1c8 ("net: phy: aquantia: add firmware load support") Signed-off-by: Robert Marko Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20251127114514.460924-1-robimarko@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/aquantia/aquantia_firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/aquantia/aquantia_firmware.c b/drivers/net/phy/aquantia/aquantia_firmware.c index bbbcc9736b00..569256152689 100644 --- a/drivers/net/phy/aquantia/aquantia_firmware.c +++ b/drivers/net/phy/aquantia/aquantia_firmware.c @@ -369,7 +369,7 @@ int aqr_firmware_load(struct phy_device *phydev) * assume that, and load a new image. */ ret = aqr_firmware_load_nvmem(phydev); - if (!ret) + if (ret == -EPROBE_DEFER || !ret) return ret; ret = aqr_firmware_load_fs(phydev); From 2c28ee720ad14f58eb88a97ec3efe7c5c315ea5d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 27 Nov 2025 14:33:10 +0000 Subject: [PATCH 4/7] selftests: bonding: add delay before each xvlan_over_bond connectivity check Jakub reported increased flakiness in bond_macvlan_ipvlan.sh on regular kernel, while the tests consistently pass on a debug kernel. This suggests a timing-sensitive issue. To mitigate this, introduce a short sleep before each xvlan_over_bond connectivity check. The delay helps ensure neighbor and route cache have fully converged before verifying connectivity. The sleep interval is kept minimal since check_connection() is invoked nearly 100 times during the test. Fixes: 246af950b940 ("selftests: bonding: add macvlan over bond testing") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20251114082014.750edfad@kernel.org Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20251127143310.47740-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh index c4711272fe45..559f300f965a 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh @@ -30,6 +30,7 @@ check_connection() local message=${3} RET=0 + sleep 0.25 ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null check_err $? "ping failed" log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}" From e5235eb6cfe02a51256013a78f7b28779a7740d5 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 27 Nov 2025 07:30:15 -0800 Subject: [PATCH 5/7] net: netpoll: initialize work queue before error checks Prevent a kernel warning when netconsole setup fails on devices with IFF_DISABLE_NETPOLL flag. The warning (at kernel/workqueue.c:4242 in __flush_work) occurs because the cleanup path tries to cancel an uninitialized work queue. When __netpoll_setup() encounters a device with IFF_DISABLE_NETPOLL, it fails early and calls skb_pool_flush() for cleanup. This function calls cancel_work_sync(&np->refill_wq), but refill_wq hasn't been initialized yet, triggering the warning. Move INIT_WORK() to the beginning of __netpoll_setup(), ensuring the work queue is properly initialized before any potential failure points. This allows the cleanup path to safely cancel the work queue regardless of where the setup fails. Fixes: 248f6571fd4c5 ("netpoll: Optimize skb refilling on critical path") Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20251127-netpoll_fix_init_work-v1-1-65c07806d736@debian.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 331764845e8f..09f72f10813c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -554,6 +554,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) int err; skb_queue_head_init(&np->skb_pool); + INIT_WORK(&np->refill_wq, refill_skbs_work_handler); if (ndev->priv_flags & IFF_DISABLE_NETPOLL) { np_err(np, "%s doesn't support polling, aborting\n", @@ -591,7 +592,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) /* fill up the skb queue */ refill_skbs(np); - INIT_WORK(&np->refill_wq, refill_skbs_work_handler); /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); From 9fefc78f7f02d71810776fdeb119a05a946a27cc Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Thu, 27 Nov 2025 17:14:14 -0700 Subject: [PATCH 6/7] net/sched: sch_cake: Fix incorrect qlen reduction in cake_drop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In cake_drop(), qdisc_tree_reduce_backlog() is used to update the qlen and backlog of the qdisc hierarchy. Its caller, cake_enqueue(), assumes that the parent qdisc will enqueue the current packet. However, this assumption breaks when cake_enqueue() returns NET_XMIT_CN: the parent qdisc stops enqueuing current packet, leaving the tree qlen/backlog accounting inconsistent. This mismatch can lead to a NULL dereference (e.g., when the parent Qdisc is qfq_qdisc). This patch computes the qlen/backlog delta in a more robust way by observing the difference before and after the series of cake_drop() calls, and then compensates the qdisc tree accounting if cake_enqueue() returns NET_XMIT_CN. To ensure correct compensation when ACK thinning is enabled, a new variable is introduced to keep qlen unchanged. Fixes: 15de71d06a40 ("net/sched: Make cake_enqueue return NET_XMIT_CN when past buffer_limit") Signed-off-by: Xiang Mei Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20251128001415.377823-1-xmei5@asu.edu Signed-off-by: Paolo Abeni --- net/sched/sch_cake.c | 60 ++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 32bacfc314c2..d325a90cde9e 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1597,7 +1597,6 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); sch->q.qlen--; - qdisc_tree_reduce_backlog(sch, 1, len); cake_heapify(q, 0); @@ -1743,14 +1742,14 @@ static void cake_reconfigure(struct Qdisc *sch); static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + u32 idx, tin, prev_qlen, prev_backlog, drop_id; struct cake_sched_data *q = qdisc_priv(sch); - int len = qdisc_pkt_len(skb); - int ret; + int len = qdisc_pkt_len(skb), ret; struct sk_buff *ack = NULL; ktime_t now = ktime_get(); struct cake_tin_data *b; struct cake_flow *flow; - u32 idx, tin; + bool same_flow = false; /* choose flow to insert into */ idx = cake_classify(sch, &b, skb, q->flow_mode, &ret); @@ -1823,6 +1822,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, consume_skb(skb); } else { /* not splitting */ + int ack_pkt_len = 0; + cobalt_set_enqueue_time(skb, now); get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb); flow_queue_add(flow, skb); @@ -1833,13 +1834,13 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (ack) { b->ack_drops++; sch->qstats.drops++; - b->bytes += qdisc_pkt_len(ack); - len -= qdisc_pkt_len(ack); + ack_pkt_len = qdisc_pkt_len(ack); + b->bytes += ack_pkt_len; q->buffer_used += skb->truesize - ack->truesize; if (q->rate_flags & CAKE_FLAG_INGRESS) cake_advance_shaper(q, b, ack, now, true); - qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(ack)); + qdisc_tree_reduce_backlog(sch, 1, ack_pkt_len); consume_skb(ack); } else { sch->q.qlen++; @@ -1848,11 +1849,11 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* stats */ b->packets++; - b->bytes += len; - b->backlogs[idx] += len; - b->tin_backlog += len; - sch->qstats.backlog += len; - q->avg_window_bytes += len; + b->bytes += len - ack_pkt_len; + b->backlogs[idx] += len - ack_pkt_len; + b->tin_backlog += len - ack_pkt_len; + sch->qstats.backlog += len - ack_pkt_len; + q->avg_window_bytes += len - ack_pkt_len; } if (q->overflow_timeout) @@ -1927,24 +1928,29 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (q->buffer_used > q->buffer_max_used) q->buffer_max_used = q->buffer_used; - if (q->buffer_used > q->buffer_limit) { - bool same_flow = false; - u32 dropped = 0; - u32 drop_id; + if (q->buffer_used <= q->buffer_limit) + return NET_XMIT_SUCCESS; - while (q->buffer_used > q->buffer_limit) { - dropped++; - drop_id = cake_drop(sch, to_free); + prev_qlen = sch->q.qlen; + prev_backlog = sch->qstats.backlog; - if ((drop_id >> 16) == tin && - (drop_id & 0xFFFF) == idx) - same_flow = true; - } - b->drop_overlimit += dropped; - - if (same_flow) - return NET_XMIT_CN; + while (q->buffer_used > q->buffer_limit) { + drop_id = cake_drop(sch, to_free); + if ((drop_id >> 16) == tin && + (drop_id & 0xFFFF) == idx) + same_flow = true; } + + prev_qlen -= sch->q.qlen; + prev_backlog -= sch->qstats.backlog; + b->drop_overlimit += prev_qlen; + + if (same_flow) { + qdisc_tree_reduce_backlog(sch, prev_qlen - 1, + prev_backlog - len); + return NET_XMIT_CN; + } + qdisc_tree_reduce_backlog(sch, prev_qlen, prev_backlog); return NET_XMIT_SUCCESS; } From 108f9405ce81085284c7ab09b84784b94b611435 Mon Sep 17 00:00:00 2001 From: Xiang Mei Date: Thu, 27 Nov 2025 17:14:16 -0700 Subject: [PATCH 7/7] selftests/tc-testing: Test CAKE scheduler when enqueue drops packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tests that trigger packet drops in cake_enqueue(): "CAKE with QFQ Parent - CAKE enqueue with packets dropping". It forces CAKE_enqueue to return NET_XMIT_CN after dropping the packets when it has a QFQ parent. Signed-off-by: Xiang Mei Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20251128001415.377823-3-xmei5@asu.edu Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 0091bcd91c2c..47de27fd4f90 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -1005,5 +1005,33 @@ "teardown": [ "$TC qdisc del dev $DUMMY clsact" ] + }, + { + "id": "4366", + "name": "CAKE with QFQ Parent - CAKE enqueue with packets dropping", + "category": [ + "qdisc", + "cake", + "netem" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup":[ + "$TC qdisc add dev $DUMMY handle 1: root qfq", + "$TC class add dev $DUMMY parent 1: classid 1:1 qfq maxpkt 1024", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: cake memlimit 9", + "$TC filter add dev $DUMMY protocol ip parent 1: prio 1 u32 match ip protocol 1 0xff flowid 1:1", + "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true", + "$TC qdisc replace dev $DUMMY parent 1:1 handle 3: netem delay 0ms" + ], + "cmdUnderTest": "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc qfq 1:", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] } ]