From 63c1f19a3be3169e51a5812d22a6d0c879414076 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 9 Apr 2025 15:59:56 +0200 Subject: [PATCH 01/40] espintcp: fix skb leaks A few error paths are missing a kfree_skb. Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 4 +++- net/ipv6/esp6.c | 4 +++- net/xfrm/espintcp.c | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 0e4076866c0a..876df672c0bf 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -199,8 +199,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) sk = esp_find_tcp_sk(x); err = PTR_ERR_OR_ZERO(sk); - if (err) + if (err) { + kfree_skb(skb); goto out; + } bh_lock_sock(sk); if (sock_owned_by_user(sk)) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9e73944e3b53..574989b82179 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -216,8 +216,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) sk = esp6_find_tcp_sk(x); err = PTR_ERR_OR_ZERO(sk); - if (err) + if (err) { + kfree_skb(skb); goto out; + } bh_lock_sock(sk); if (sock_owned_by_user(sk)) diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index fe82e2d07300..fc7a603b04f1 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -171,8 +171,10 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb) struct espintcp_ctx *ctx = espintcp_getctx(sk); if (skb_queue_len(&ctx->out_queue) >= - READ_ONCE(net_hotdata.max_backlog)) + READ_ONCE(net_hotdata.max_backlog)) { + kfree_skb(skb); return -ENOBUFS; + } __skb_queue_tail(&ctx->out_queue, skb); From 028363685bd0b7a19b4a820f82dd905b1dc83999 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 9 Apr 2025 15:59:57 +0200 Subject: [PATCH 02/40] espintcp: remove encap socket caching to avoid reference leak The current scheme for caching the encap socket can lead to reference leaks when we try to delete the netns. The reference chain is: xfrm_state -> enacp_sk -> netns Since the encap socket is a userspace socket, it holds a reference on the netns. If we delete the espintcp state (through flush or individual delete) before removing the netns, the reference on the socket is dropped and the netns is correctly deleted. Otherwise, the netns may not be reachable anymore (if all processes within the ns have terminated), so we cannot delete the xfrm state to drop its reference on the socket. This patch results in a small (~2% in my tests) performance regression. A GC-type mechanism could be added for the socket cache, to clear references if the state hasn't been used "recently", but it's a lot more complex than just not caching the socket. Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - net/ipv4/esp4.c | 49 ++++--------------------------------------- net/ipv6/esp6.c | 49 ++++--------------------------------------- net/xfrm/xfrm_state.c | 3 --- 4 files changed, 8 insertions(+), 94 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 39365fd2ea17..06ab2a3d2ebd 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -236,7 +236,6 @@ struct xfrm_state { /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; - struct sock __rcu *encap_sk; /* NAT keepalive */ u32 nat_keepalive_interval; /* seconds */ diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 876df672c0bf..f14a41ee4aa1 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -120,47 +120,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) } #ifdef CONFIG_INET_ESPINTCP -struct esp_tcp_sk { - struct sock *sk; - struct rcu_head rcu; -}; - -static void esp_free_tcp_sk(struct rcu_head *head) -{ - struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); - - sock_put(esk->sk); - kfree(esk); -} - static struct sock *esp_find_tcp_sk(struct xfrm_state *x) { struct xfrm_encap_tmpl *encap = x->encap; struct net *net = xs_net(x); - struct esp_tcp_sk *esk; __be16 sport, dport; - struct sock *nsk; struct sock *sk; - sk = rcu_dereference(x->encap_sk); - if (sk && sk->sk_state == TCP_ESTABLISHED) - return sk; - spin_lock_bh(&x->lock); sport = encap->encap_sport; dport = encap->encap_dport; - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (sk && sk == nsk) { - esk = kmalloc(sizeof(*esk), GFP_ATOMIC); - if (!esk) { - spin_unlock_bh(&x->lock); - return ERR_PTR(-ENOMEM); - } - RCU_INIT_POINTER(x->encap_sk, NULL); - esk->sk = sk; - call_rcu(&esk->rcu, esp_free_tcp_sk); - } spin_unlock_bh(&x->lock); sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4, @@ -173,20 +142,6 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x) return ERR_PTR(-EINVAL); } - spin_lock_bh(&x->lock); - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (encap->encap_sport != sport || - encap->encap_dport != dport) { - sock_put(sk); - sk = nsk ?: ERR_PTR(-EREMCHG); - } else if (sk == nsk) { - sock_put(sk); - } else { - rcu_assign_pointer(x->encap_sk, sk); - } - spin_unlock_bh(&x->lock); - return sk; } @@ -211,6 +166,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) err = espintcp_push_skb(sk, skb); bh_unlock_sock(sk); + sock_put(sk); + out: rcu_read_unlock(); return err; @@ -394,6 +351,8 @@ static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x, if (IS_ERR(sk)) return ERR_CAST(sk); + sock_put(sk); + *lenp = htons(len); esph = (struct ip_esp_hdr *)(lenp + 1); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 574989b82179..72adfc107b55 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -137,47 +137,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) } #ifdef CONFIG_INET6_ESPINTCP -struct esp_tcp_sk { - struct sock *sk; - struct rcu_head rcu; -}; - -static void esp_free_tcp_sk(struct rcu_head *head) -{ - struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); - - sock_put(esk->sk); - kfree(esk); -} - static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) { struct xfrm_encap_tmpl *encap = x->encap; struct net *net = xs_net(x); - struct esp_tcp_sk *esk; __be16 sport, dport; - struct sock *nsk; struct sock *sk; - sk = rcu_dereference(x->encap_sk); - if (sk && sk->sk_state == TCP_ESTABLISHED) - return sk; - spin_lock_bh(&x->lock); sport = encap->encap_sport; dport = encap->encap_dport; - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (sk && sk == nsk) { - esk = kmalloc(sizeof(*esk), GFP_ATOMIC); - if (!esk) { - spin_unlock_bh(&x->lock); - return ERR_PTR(-ENOMEM); - } - RCU_INIT_POINTER(x->encap_sk, NULL); - esk->sk = sk; - call_rcu(&esk->rcu, esp_free_tcp_sk); - } spin_unlock_bh(&x->lock); sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6, @@ -190,20 +159,6 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) return ERR_PTR(-EINVAL); } - spin_lock_bh(&x->lock); - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (encap->encap_sport != sport || - encap->encap_dport != dport) { - sock_put(sk); - sk = nsk ?: ERR_PTR(-EREMCHG); - } else if (sk == nsk) { - sock_put(sk); - } else { - rcu_assign_pointer(x->encap_sk, sk); - } - spin_unlock_bh(&x->lock); - return sk; } @@ -228,6 +183,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) err = espintcp_push_skb(sk, skb); bh_unlock_sock(sk); + sock_put(sk); + out: rcu_read_unlock(); return err; @@ -424,6 +381,8 @@ static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, if (IS_ERR(sk)) return ERR_CAST(sk); + sock_put(sk); + *lenp = htons(len); esph = (struct ip_esp_hdr *)(lenp + 1); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 341d79ecb5c2..2f57dabcc70b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -838,9 +838,6 @@ int __xfrm_state_delete(struct xfrm_state *x) xfrm_nat_keepalive_state_updated(x); spin_unlock(&net->xfrm.xfrm_state_lock); - if (x->encap_sk) - sock_put(rcu_dereference_raw(x->encap_sk)); - xfrm_dev_state_delete(x); /* All xfrm_state objects are created by xfrm_state_alloc. From e3fd0577768584ece824c8b661c40fb3d912812a Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Tue, 15 Apr 2025 13:13:18 +0200 Subject: [PATCH 03/40] xfrm: Fix UDP GRO handling for some corner cases This fixes an issue that's caused if there is a mismatch between the data offset in the GRO header and the length fields in the regular sk_buff due to the pskb_pull()/skb_push() calls. That's because the UDP GRO layer stripped off the UDP header via skb_gro_pull() already while the UDP header was explicitly not pulled/pushed in this function. For example, an IKE packet that triggered this had len=data_len=1268 and the data_offset in the GRO header was 28 (IPv4 + UDP). So pskb_pull() was called with an offset of 28-8=20, which reduced len to 1248 and via pskb_may_pull() and __pskb_pull_tail() it also set data_len to 1248. As the ESP offload module was not loaded, the function bailed out and called skb_push(), which restored len to 1268, however, data_len remained at 1248. So while skb_headlen() was 0 before, it was now 20. The latter caused a difference of 8 instead of 28 (or 0 if pskb_pull()/skb_push() was called with the complete GRO data_offset) in gro_try_pull_from_frag0() that triggered a call to gro_pull_from_frag0() that corrupted the packet. This change uses a more GRO-like approach seen in other GRO receivers via skb_gro_header() to just read the actual data we are interested in and does not try to "restore" the UDP header at this point to call the existing function. If the offload module is not loaded, it immediately bails out, otherwise, it only does a quick check to see if the packet is an IKE or keepalive packet instead of calling the existing function. Fixes: 172bf009c18d ("xfrm: Support GRO for IPv4 ESP in UDP encapsulation") Fixes: 221ddb723d90 ("xfrm: Support GRO for IPv6 ESP in UDP encapsulation") Signed-off-by: Tobias Brunner Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_input.c | 18 ++++++++++-------- net/ipv6/xfrm6_input.c | 18 ++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index b5b06323cfd9..0d31a8c108d4 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -182,11 +182,15 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, int offset = skb_gro_offset(skb); const struct net_offload *ops; struct sk_buff *pp = NULL; - int ret; + int len, dlen; + __u8 *udpdata; + __be32 *udpdata32; - offset = offset - sizeof(struct udphdr); - - if (!pskb_pull(skb, offset)) + len = skb->len - offset; + dlen = offset + min(len, 8); + udpdata = skb_gro_header(skb, dlen, offset); + udpdata32 = (__be32 *)udpdata; + if (unlikely(!udpdata)) return NULL; rcu_read_lock(); @@ -194,11 +198,10 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (!ops || !ops->callbacks.gro_receive) goto out; - ret = __xfrm4_udp_encap_rcv(sk, skb, false); - if (ret) + /* check if it is a keepalive or IKE packet */ + if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; - skb_push(skb, offset); NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); @@ -208,7 +211,6 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, out: rcu_read_unlock(); - skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 1; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4abc5e9d6322..841c81abaaf4 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -179,14 +179,18 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, int offset = skb_gro_offset(skb); const struct net_offload *ops; struct sk_buff *pp = NULL; - int ret; + int len, dlen; + __u8 *udpdata; + __be32 *udpdata32; if (skb->protocol == htons(ETH_P_IP)) return xfrm4_gro_udp_encap_rcv(sk, head, skb); - offset = offset - sizeof(struct udphdr); - - if (!pskb_pull(skb, offset)) + len = skb->len - offset; + dlen = offset + min(len, 8); + udpdata = skb_gro_header(skb, dlen, offset); + udpdata32 = (__be32 *)udpdata; + if (unlikely(!udpdata)) return NULL; rcu_read_lock(); @@ -194,11 +198,10 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (!ops || !ops->callbacks.gro_receive) goto out; - ret = __xfrm6_udp_encap_rcv(sk, skb, false); - if (ret) + /* check if it is a keepalive or IKE packet */ + if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; - skb_push(skb, offset); NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); @@ -208,7 +211,6 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, out: rcu_read_unlock(); - skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 1; From 417fae2c40896f0a67ce7fa7d9b8c6056ec36dd9 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 25 Apr 2025 16:32:55 +0200 Subject: [PATCH 04/40] xfrm: ipcomp: fix truesize computation on receive ipcomp_post_acomp currently drops all frags (via pskb_trim_unique(skb, 0)), and then subtracts the old skb->data_len from truesize. This adjustment has already be done during trimming (in skb_condense), so we don't need to do it again. This shows up for example when running fragmented traffic over ipcomp, we end up hitting the WARN_ON_ONCE in skb_try_coalesce. Fixes: eb2953d26971 ("xfrm: ipcomp: Use crypto_acomp interface") Signed-off-by: Sabrina Dubroca Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_ipcomp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 0c1420534394..907c3ccb440d 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -48,7 +48,6 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen) { struct acomp_req *req = ipcomp_cb(skb)->req; struct ipcomp_req_extra *extra; - const int plen = skb->data_len; struct scatterlist *dsg; int len, dlen; @@ -64,7 +63,7 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen) /* Only update truesize on input. */ if (!hlen) - skb->truesize += dlen - plen; + skb->truesize += dlen; skb->data_len = dlen; skb->len += dlen; From 0b91fda3a1f044141e1e615456ff62508c32b202 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 7 May 2025 13:31:58 +0200 Subject: [PATCH 05/40] xfrm: Sanitize marks before insert Prior to this patch, the mark is sanitized (applying the state's mask to the state's value) only on inserts when checking if a conflicting XFRM state or policy exists. We discovered in Cilium that this same sanitization does not occur in the hot-path __xfrm_state_lookup. In the hot-path, the sk_buff's mark is simply compared to the state's value: if ((mark & x->mark.m) != x->mark.v) continue; Therefore, users can define unsanitized marks (ex. 0xf42/0xf00) which will never match any packet. This commit updates __xfrm_state_insert and xfrm_policy_insert to store the sanitized marks, thus removing this footgun. This has the side effect of changing the ip output, as the returned mark will have the mask applied to it when printed. Fixes: 3d6acfa7641f ("xfrm: SA lookups with mark") Signed-off-by: Paul Chaignon Signed-off-by: Louis DeLosSantos Co-developed-by: Louis DeLosSantos Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 3 +++ net/xfrm/xfrm_state.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 143ac3aa7537..f4bad8c895d6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1581,6 +1581,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct xfrm_policy *delpol; struct hlist_head *chain; + /* Sanitize mark before store */ + policy->mark.v &= policy->mark.m; + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); if (chain) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2f57dabcc70b..07fe8e5daa32 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1718,6 +1718,9 @@ static void __xfrm_state_insert(struct xfrm_state *x) list_add(&x->km.all, &net->xfrm.state_all); + /* Sanitize mark before store */ + x->mark.v &= x->mark.m; + h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, From 7af8479d9eb4319b4ba7b47a8c4d2c55af1c31e1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 7 May 2025 15:00:30 -0400 Subject: [PATCH 06/40] Bluetooth: L2CAP: Fix not checking l2cap_chan security level l2cap_check_enc_key_size shall check the security level of the l2cap_chan rather than the hci_conn since for incoming connection request that may be different as hci_conn may already been encrypted using a different security level. Fixes: 522e9ed157e3 ("Bluetooth: l2cap: Check encryption key size on incoming connection") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 73472756618a..042d3ac3b4a3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1411,7 +1411,8 @@ static void l2cap_request_info(struct l2cap_conn *conn) sizeof(req), &req); } -static bool l2cap_check_enc_key_size(struct hci_conn *hcon) +static bool l2cap_check_enc_key_size(struct hci_conn *hcon, + struct l2cap_chan *chan) { /* The minimum encryption key size needs to be enforced by the * host stack before establishing any L2CAP connections. The @@ -1425,7 +1426,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon) int min_key_size = hcon->hdev->min_enc_key_size; /* On FIPS security level, key size must be 16 bytes */ - if (hcon->sec_level == BT_SECURITY_FIPS) + if (chan->sec_level == BT_SECURITY_FIPS) min_key_size = 16; return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) || @@ -1453,7 +1454,7 @@ static void l2cap_do_start(struct l2cap_chan *chan) !__l2cap_no_conn_pending(chan)) return; - if (l2cap_check_enc_key_size(conn->hcon)) + if (l2cap_check_enc_key_size(conn->hcon, chan)) l2cap_start_connection(chan); else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); @@ -1528,7 +1529,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) continue; } - if (l2cap_check_enc_key_size(conn->hcon)) + if (l2cap_check_enc_key_size(conn->hcon, chan)) l2cap_start_connection(chan); else l2cap_chan_close(chan, ECONNREFUSED); @@ -3992,7 +3993,7 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, /* Check if the ACL is secure enough (if not SDP) */ if (psm != cpu_to_le16(L2CAP_PSM_SDP) && (!hci_conn_check_link_mode(conn->hcon) || - !l2cap_check_enc_key_size(conn->hcon))) { + !l2cap_check_enc_key_size(conn->hcon, pchan))) { conn->disc_reason = HCI_ERROR_AUTH_FAILURE; result = L2CAP_CR_SEC_BLOCK; goto response; @@ -7352,7 +7353,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } if (chan->state == BT_CONNECT) { - if (!status && l2cap_check_enc_key_size(hcon)) + if (!status && l2cap_check_enc_key_size(hcon, chan)) l2cap_start_connection(chan); else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); @@ -7362,7 +7363,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) struct l2cap_conn_rsp rsp; __u16 res, stat; - if (!status && l2cap_check_enc_key_size(hcon)) { + if (!status && l2cap_check_enc_key_size(hcon, chan)) { if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { res = L2CAP_CR_PEND; stat = L2CAP_CS_AUTHOR_PEND; From 4bcb0c7dc25446b99fc7a8fa2a143d69f3314162 Mon Sep 17 00:00:00 2001 From: En-Wei Wu Date: Thu, 8 May 2025 22:15:20 +0800 Subject: [PATCH 07/40] Bluetooth: btusb: use skb_pull to avoid unsafe access in QCA dump handling Use skb_pull() and skb_pull_data() to safely parse QCA dump packets. This avoids direct pointer math on skb->data, which could lead to invalid access if the packet is shorter than expected. Fixes: 20981ce2d5a5 ("Bluetooth: btusb: Add WCN6855 devcoredump support") Signed-off-by: En-Wei Wu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 98 ++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 58 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a42dedb78e0a..256b451bbe06 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3014,9 +3014,8 @@ static void btusb_coredump_qca(struct hci_dev *hdev) static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) { int ret = 0; + unsigned int skip = 0; u8 pkt_type; - u8 *sk_ptr; - unsigned int sk_len; u16 seqno; u32 dump_size; @@ -3025,18 +3024,13 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) struct usb_device *udev = btdata->udev; pkt_type = hci_skb_pkt_type(skb); - sk_ptr = skb->data; - sk_len = skb->len; + skip = sizeof(struct hci_event_hdr); + if (pkt_type == HCI_ACLDATA_PKT) + skip += sizeof(struct hci_acl_hdr); - if (pkt_type == HCI_ACLDATA_PKT) { - sk_ptr += HCI_ACL_HDR_SIZE; - sk_len -= HCI_ACL_HDR_SIZE; - } + skb_pull(skb, skip); + dump_hdr = (struct qca_dump_hdr *)skb->data; - sk_ptr += HCI_EVENT_HDR_SIZE; - sk_len -= HCI_EVENT_HDR_SIZE; - - dump_hdr = (struct qca_dump_hdr *)sk_ptr; seqno = le16_to_cpu(dump_hdr->seqno); if (seqno == 0) { set_bit(BTUSB_HW_SSR_ACTIVE, &btdata->flags); @@ -3056,16 +3050,15 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) btdata->qca_dump.ram_dump_size = dump_size; btdata->qca_dump.ram_dump_seqno = 0; - sk_ptr += offsetof(struct qca_dump_hdr, data0); - sk_len -= offsetof(struct qca_dump_hdr, data0); + + skb_pull(skb, offsetof(struct qca_dump_hdr, data0)); usb_disable_autosuspend(udev); bt_dev_info(hdev, "%s memdump size(%u)\n", (pkt_type == HCI_ACLDATA_PKT) ? "ACL" : "event", dump_size); } else { - sk_ptr += offsetof(struct qca_dump_hdr, data); - sk_len -= offsetof(struct qca_dump_hdr, data); + skb_pull(skb, offsetof(struct qca_dump_hdr, data)); } if (!btdata->qca_dump.ram_dump_size) { @@ -3085,7 +3078,6 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) return ret; } - skb_pull(skb, skb->len - sk_len); hci_devcd_append(hdev, skb); btdata->qca_dump.ram_dump_seqno++; if (seqno == QCA_LAST_SEQUENCE_NUM) { @@ -3113,68 +3105,58 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) /* Return: true if the ACL packet is a dump packet, false otherwise. */ static bool acl_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) { - u8 *sk_ptr; - unsigned int sk_len; - struct hci_event_hdr *event_hdr; struct hci_acl_hdr *acl_hdr; struct qca_dump_hdr *dump_hdr; + struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); + bool is_dump = false; - sk_ptr = skb->data; - sk_len = skb->len; - - acl_hdr = hci_acl_hdr(skb); - if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE) + if (!clone) return false; - sk_ptr += HCI_ACL_HDR_SIZE; - sk_len -= HCI_ACL_HDR_SIZE; - event_hdr = (struct hci_event_hdr *)sk_ptr; + acl_hdr = skb_pull_data(clone, sizeof(*acl_hdr)); + if (!acl_hdr || (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE)) + goto out; - if ((event_hdr->evt != HCI_VENDOR_PKT) || - (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) - return false; + event_hdr = skb_pull_data(clone, sizeof(*event_hdr)); + if (!event_hdr || (event_hdr->evt != HCI_VENDOR_PKT)) + goto out; - sk_ptr += HCI_EVENT_HDR_SIZE; - sk_len -= HCI_EVENT_HDR_SIZE; + dump_hdr = skb_pull_data(clone, sizeof(*dump_hdr)); + if (!dump_hdr || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + goto out; - dump_hdr = (struct qca_dump_hdr *)sk_ptr; - if ((sk_len < offsetof(struct qca_dump_hdr, data)) || - (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || - (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) - return false; - - return true; + is_dump = true; +out: + consume_skb(clone); + return is_dump; } /* Return: true if the event packet is a dump packet, false otherwise. */ static bool evt_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) { - u8 *sk_ptr; - unsigned int sk_len; - struct hci_event_hdr *event_hdr; struct qca_dump_hdr *dump_hdr; + struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); + bool is_dump = false; - sk_ptr = skb->data; - sk_len = skb->len; - - event_hdr = hci_event_hdr(skb); - - if ((event_hdr->evt != HCI_VENDOR_PKT) - || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) + if (!clone) return false; - sk_ptr += HCI_EVENT_HDR_SIZE; - sk_len -= HCI_EVENT_HDR_SIZE; + event_hdr = skb_pull_data(clone, sizeof(*event_hdr)); + if (!event_hdr || (event_hdr->evt != HCI_VENDOR_PKT)) + goto out; - dump_hdr = (struct qca_dump_hdr *)sk_ptr; - if ((sk_len < offsetof(struct qca_dump_hdr, data)) || - (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || - (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) - return false; + dump_hdr = skb_pull_data(clone, sizeof(*dump_hdr)); + if (!dump_hdr || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + goto out; - return true; + is_dump = true; +out: + consume_skb(clone); + return is_dump; } static int btusb_recv_acl_qca(struct hci_dev *hdev, struct sk_buff *skb) From c9e455581e2ba87ee38c126e8dc49a424b9df0cf Mon Sep 17 00:00:00 2001 From: Sagi Maimon Date: Wed, 14 May 2025 10:35:41 +0300 Subject: [PATCH 08/40] ptp: ocp: Limit signal/freq counts in summary output functions The debugfs summary output could access uninitialized elements in the freq_in[] and signal_out[] arrays, causing NULL pointer dereferences and triggering a kernel Oops (page_fault_oops). This patch adds u8 fields (nr_freq_in, nr_signal_out) to track the number of initialized elements, with a maximum of 4 per array. The summary output functions are updated to respect these limits, preventing out-of-bounds access and ensuring safe array handling. Widen the label variables because the change confuses GCC about max length of the strings. Fixes: ef61f5528fca ("ptp: ocp: add Adva timecard support") Signed-off-by: Sagi Maimon Reviewed-by: Simon Horman Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250514073541.35817-1-maimon.sagi@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 2ccdca4f6960..e63481f24238 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -315,6 +315,8 @@ struct ptp_ocp_serial_port { #define OCP_BOARD_ID_LEN 13 #define OCP_SERIAL_LEN 6 #define OCP_SMA_NUM 4 +#define OCP_SIGNAL_NUM 4 +#define OCP_FREQ_NUM 4 enum { PORT_GNSS, @@ -342,8 +344,8 @@ struct ptp_ocp { struct dcf_master_reg __iomem *dcf_out; struct dcf_slave_reg __iomem *dcf_in; struct tod_reg __iomem *nmea_out; - struct frequency_reg __iomem *freq_in[4]; - struct ptp_ocp_ext_src *signal_out[4]; + struct frequency_reg __iomem *freq_in[OCP_FREQ_NUM]; + struct ptp_ocp_ext_src *signal_out[OCP_SIGNAL_NUM]; struct ptp_ocp_ext_src *pps; struct ptp_ocp_ext_src *ts0; struct ptp_ocp_ext_src *ts1; @@ -378,10 +380,12 @@ struct ptp_ocp { u32 utc_tai_offset; u32 ts_window_adjust; u64 fw_cap; - struct ptp_ocp_signal signal[4]; + struct ptp_ocp_signal signal[OCP_SIGNAL_NUM]; struct ptp_ocp_sma_connector sma[OCP_SMA_NUM]; const struct ocp_sma_op *sma_op; struct dpll_device *dpll; + int signals_nr; + int freq_in_nr; }; #define OCP_REQ_TIMESTAMP BIT(0) @@ -2697,6 +2701,8 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->eeprom_map = fb_eeprom_map; bp->fw_version = ioread32(&bp->image->version); bp->sma_op = &ocp_fb_sma_op; + bp->signals_nr = 4; + bp->freq_in_nr = 4; ptp_ocp_fb_set_version(bp); @@ -2862,6 +2868,8 @@ ptp_ocp_art_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->fw_version = ioread32(&bp->reg->version); bp->fw_tag = 2; bp->sma_op = &ocp_art_sma_op; + bp->signals_nr = 4; + bp->freq_in_nr = 4; /* Enable MAC serial port during initialisation */ iowrite32(1, &bp->board_config->mro50_serial_activate); @@ -2888,6 +2896,8 @@ ptp_ocp_adva_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->flash_start = 0xA00000; bp->eeprom_map = fb_eeprom_map; bp->sma_op = &ocp_adva_sma_op; + bp->signals_nr = 2; + bp->freq_in_nr = 2; version = ioread32(&bp->image->version); /* if lower 16 bits are empty, this is the fw loader. */ @@ -4008,7 +4018,7 @@ _signal_summary_show(struct seq_file *s, struct ptp_ocp *bp, int nr) { struct signal_reg __iomem *reg = bp->signal_out[nr]->mem; struct ptp_ocp_signal *signal = &bp->signal[nr]; - char label[8]; + char label[16]; bool on; u32 val; @@ -4031,7 +4041,7 @@ static void _frequency_summary_show(struct seq_file *s, int nr, struct frequency_reg __iomem *reg) { - char label[8]; + char label[16]; bool on; u32 val; @@ -4175,11 +4185,11 @@ ptp_ocp_summary_show(struct seq_file *s, void *data) } if (bp->fw_cap & OCP_CAP_SIGNAL) - for (i = 0; i < 4; i++) + for (i = 0; i < bp->signals_nr; i++) _signal_summary_show(s, bp, i); if (bp->fw_cap & OCP_CAP_FREQ) - for (i = 0; i < 4; i++) + for (i = 0; i < bp->freq_in_nr; i++) _frequency_summary_show(s, i, bp->freq_in[i]); if (bp->irig_out) { From 6b1d3c5f675cc794a015138b115afff172fb4c58 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 14 May 2025 15:03:19 -0700 Subject: [PATCH 09/40] team: grab team lock during team_change_rx_flags Syzkaller reports the following issue: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:578 netdev_lock include/linux/netdevice.h:2751 [inline] netdev_lock_ops include/net/netdev_lock.h:42 [inline] dev_set_promiscuity+0x10e/0x260 net/core/dev_api.c:285 bond_set_promiscuity drivers/net/bonding/bond_main.c:922 [inline] bond_change_rx_flags+0x219/0x690 drivers/net/bonding/bond_main.c:4732 dev_change_rx_flags net/core/dev.c:9145 [inline] __dev_set_promiscuity+0x3f5/0x590 net/core/dev.c:9189 netif_set_promiscuity+0x50/0xe0 net/core/dev.c:9201 dev_set_promiscuity+0x126/0x260 net/core/dev_api.c:286 ^^ all of the above is under rcu lock team_change_rx_flags+0x1b3/0x330 drivers/net/team/team_core.c:1785 dev_change_rx_flags net/core/dev.c:9145 [inline] __dev_set_promiscuity+0x3f5/0x590 net/core/dev.c:9189 netif_set_promiscuity+0x50/0xe0 net/core/dev.c:9201 dev_set_promiscuity+0x126/0x260 net/core/dev_api.c:286 hsr_del_port+0x25e/0x2d0 net/hsr/hsr_slave.c:233 hsr_netdev_notify+0x827/0xb60 net/hsr/hsr_main.c:104 notifier_call_chain+0x1b3/0x3e0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2214 [inline] call_netdevice_notifiers net/core/dev.c:2228 [inline] unregister_netdevice_many_notify+0x15d8/0x2330 net/core/dev.c:11970 rtnl_delete_link net/core/rtnetlink.c:3522 [inline] rtnl_dellink+0x488/0x710 net/core/rtnetlink.c:3564 rtnetlink_rcv_msg+0x7cc/0xb70 net/core/rtnetlink.c:6955 netlink_rcv_skb+0x219/0x490 net/netlink/af_netlink.c:2534 netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline] netlink_unicast+0x758/0x8d0 net/netlink/af_netlink.c:1339 netlink_sendmsg+0x805/0xb30 net/netlink/af_netlink.c:1883 team_change_rx_flags runs under rcu lock which means we can't grab instance lock for the lower devices. Switch to team->lock, similar to what we already do for team_set_mac_address and team_change_mtu. Fixes: 78cd408356fe ("net: add missing instance lock to dev_set_promiscuity") Reported-by: syzbot+53485086a41dbb43270a@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=53485086a41dbb43270a Link: https://lore.kernel.org/netdev/6822cc81.050a0220.f2294.00e8.GAE@google.com Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250514220319.3505158-1-stfomichev@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/team/team_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index d8fc0c79745d..b75ceb90359f 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -1778,8 +1778,8 @@ static void team_change_rx_flags(struct net_device *dev, int change) struct team_port *port; int inc; - rcu_read_lock(); - list_for_each_entry_rcu(port, &team->port_list, list) { + mutex_lock(&team->lock); + list_for_each_entry(port, &team->port_list, list) { if (change & IFF_PROMISC) { inc = dev->flags & IFF_PROMISC ? 1 : -1; dev_set_promiscuity(port->dev, inc); @@ -1789,7 +1789,7 @@ static void team_change_rx_flags(struct net_device *dev, int change) dev_set_allmulti(port->dev, inc); } } - rcu_read_unlock(); + mutex_unlock(&team->lock); } static void team_set_rx_mode(struct net_device *dev) From ba54bce747fa9e07896c1abd9b48545f7b4b31d2 Mon Sep 17 00:00:00 2001 From: Jakob Unterwurzacher Date: Thu, 15 May 2025 09:29:19 +0200 Subject: [PATCH 10/40] net: dsa: microchip: linearize skb for tail-tagging switches The pointer arithmentic for accessing the tail tag only works for linear skbs. For nonlinear skbs, it reads uninitialized memory inside the skb headroom, essentially randomizing the tag. I have observed it gets set to 6 most of the time. Example where ksz9477_rcv thinks that the packet from port 1 comes from port 6 (which does not exist for the ksz9896 that's in use), dropping the packet. Debug prints added by me (not included in this patch): [ 256.645337] ksz9477_rcv:323 tag0=6 [ 256.645349] skb len=47 headroom=78 headlen=0 tailroom=0 mac=(64,14) mac_len=14 net=(78,0) trans=78 shinfo(txflags=0 nr_frags=1 gso(size=0 type=0 segs=0)) csum(0x0 start=0 offset=0 ip_summed=0 complete_sw=0 valid=0 level=0) hash(0x0 sw=0 l4=0) proto=0x00f8 pkttype=1 iif=3 priority=0x0 mark=0x0 alloc_cpu=0 vlan_all=0x0 encapsulation=0 inner(proto=0x0000, mac=0, net=0, trans=0) [ 256.645377] dev name=end1 feat=0x0002e10200114bb3 [ 256.645386] skb headroom: 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 256.645395] skb headroom: 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 256.645403] skb headroom: 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 256.645411] skb headroom: 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 256.645420] skb headroom: 00000040: ff ff ff ff ff ff 00 1c 19 f2 e2 db 08 06 [ 256.645428] skb frag: 00000000: 00 01 08 00 06 04 00 01 00 1c 19 f2 e2 db 0a 02 [ 256.645436] skb frag: 00000010: 00 83 00 00 00 00 00 00 0a 02 a0 2f 00 00 00 00 [ 256.645444] skb frag: 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 [ 256.645452] ksz_common_rcv:92 dsa_conduit_find_user returned NULL Call skb_linearize before trying to access the tag. This patch fixes ksz9477_rcv which is used by the ksz9896 I have at hand, and also applies the same fix to ksz8795_rcv which seems to have the same problem. Signed-off-by: Jakob Unterwurzacher CC: stable@vger.kernel.org Fixes: 016e43a26bab ("net: dsa: ksz: Add KSZ8795 tag code") Fixes: 8b8010fb7876 ("dsa: add support for Microchip KSZ tail tagging") Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250515072920.2313014-1-jakob.unterwurzacher@cherry.de Signed-off-by: Jakub Kicinski --- net/dsa/tag_ksz.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index c33d4bf17929..0b7564b53790 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -140,7 +140,12 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev) { - u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; + u8 *tag; + + if (skb_linearize(skb)) + return NULL; + + tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; return ksz_common_rcv(skb, dev, tag[0] & KSZ8795_TAIL_TAG_EG_PORT_M, KSZ_EGRESS_TAG_LEN); @@ -311,10 +316,16 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev) { - /* Tag decoding */ - u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; - unsigned int port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M; unsigned int len = KSZ_EGRESS_TAG_LEN; + unsigned int port; + u8 *tag; + + if (skb_linearize(skb)) + return NULL; + + /* Tag decoding */ + tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; + port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M; /* Extra 4-bytes PTP timestamp */ if (tag[0] & KSZ9477_PTP_TAG_INDICATION) { From 91b6dbced0ef1d680afdd69b14fc83d50ebafaf3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 15 May 2025 11:48:48 +0300 Subject: [PATCH 11/40] bridge: netfilter: Fix forwarding of fragmented packets When netfilter defrag hooks are loaded (due to the presence of conntrack rules, for example), fragmented packets entering the bridge will be defragged by the bridge's pre-routing hook (br_nf_pre_routing() -> ipv4_conntrack_defrag()). Later on, in the bridge's post-routing hook, the defragged packet will be fragmented again. If the size of the largest fragment is larger than what the kernel has determined as the destination MTU (using ip_skb_dst_mtu()), the defragged packet will be dropped. Before commit ac6627a28dbf ("net: ipv4: Consolidate ipv4_mtu and ip_dst_mtu_maybe_forward"), ip_skb_dst_mtu() would return dst_mtu() as the destination MTU. Assuming the dst entry attached to the packet is the bridge's fake rtable one, this would simply be the bridge's MTU (see fake_mtu()). However, after above mentioned commit, ip_skb_dst_mtu() ends up returning the route's MTU stored in the dst entry's metrics. Ideally, in case the dst entry is the bridge's fake rtable one, this should be the bridge's MTU as the bridge takes care of updating this metric when its MTU changes (see br_change_mtu()). Unfortunately, the last operation is a no-op given the metrics attached to the fake rtable entry are marked as read-only. Therefore, ip_skb_dst_mtu() ends up returning 1500 (the initial MTU value) and defragged packets are dropped during fragmentation when dealing with large fragments and high MTU (e.g., 9k). Fix by moving the fake rtable entry's metrics to be per-bridge (in a similar fashion to the fake rtable entry itself) and marking them as writable, thereby allowing MTU changes to be reflected. Fixes: 62fa8a846d7d ("net: Implement read-only protection and COW'ing of metrics.") Fixes: 33eb9873a283 ("bridge: initialize fake_rtable metrics") Reported-by: Venkat Venkatsubra Closes: https://lore.kernel.org/netdev/PH0PR10MB4504888284FF4CBA648197D0ACB82@PH0PR10MB4504.namprd10.prod.outlook.com/ Tested-by: Venkat Venkatsubra Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250515084848.727706-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_nf_core.c | 7 ++----- net/bridge/br_private.h | 1 + 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c index 98aea5485aae..a8c67035e23c 100644 --- a/net/bridge/br_nf_core.c +++ b/net/bridge/br_nf_core.c @@ -65,17 +65,14 @@ static struct dst_ops fake_dst_ops = { * ipt_REJECT needs it. Future netfilter modules might * require us to fill additional fields. */ -static const u32 br_dst_default_metrics[RTAX_MAX] = { - [RTAX_MTU - 1] = 1500, -}; - void br_netfilter_rtable_init(struct net_bridge *br) { struct rtable *rt = &br->fake_rtable; rcuref_init(&rt->dst.__rcuref, 1); rt->dst.dev = br->dev; - dst_init_metrics(&rt->dst, br_dst_default_metrics, true); + dst_init_metrics(&rt->dst, br->metrics, false); + dst_metric_set(&rt->dst, RTAX_MTU, br->dev->mtu); rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; rt->dst.ops = &fake_dst_ops; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d5b3c5936a79..4715a8d6dc32 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -505,6 +505,7 @@ struct net_bridge { struct rtable fake_rtable; struct rt6_info fake_rt6_info; }; + u32 metrics[RTAX_MAX]; #endif u16 group_fwd_mask; u16 group_fwd_mask_required; From 43f0999af011fba646e015f0bb08b6c3002a0170 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Thu, 15 May 2025 19:04:56 +0000 Subject: [PATCH 12/40] vmxnet3: update MTU after device quiesce Currently, when device mtu is updated, vmxnet3 updates netdev mtu, quiesces the device and then reactivates it for the ESXi to know about the new mtu. So, technically the OS stack can start using the new mtu before ESXi knows about the new mtu. This can lead to issues for TSO packets which use mss as per the new mtu configured. This patch fixes this issue by moving the mtu write after device quiesce. Cc: stable@vger.kernel.org Fixes: d1a890fa37f2 ("net: VMware virtual Ethernet NIC driver: vmxnet3") Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Changes v1-> v2: Moved MTU write after destroy of rx rings Link: https://patch.msgid.link/20250515190457.8597-1-ronak.doshi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/vmxnet3/vmxnet3_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 3df6aabc7e33..c676979c7ab9 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3607,8 +3607,6 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) struct vmxnet3_adapter *adapter = netdev_priv(netdev); int err = 0; - WRITE_ONCE(netdev->mtu, new_mtu); - /* * Reset_work may be in the middle of resetting the device, wait for its * completion. @@ -3622,6 +3620,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) /* we need to re-create the rx queue based on the new mtu */ vmxnet3_rq_destroy_all(adapter); + WRITE_ONCE(netdev->mtu, new_mtu); vmxnet3_adjust_rx_ring_size(adapter); err = vmxnet3_rq_create_all(adapter); if (err) { @@ -3638,6 +3637,8 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) "Closing it\n", err); goto out; } + } else { + WRITE_ONCE(netdev->mtu, new_mtu); } out: From d6d2b0e1538d5c381ec0ca95afaf772c096ea5dc Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 15 May 2025 08:33:06 +0200 Subject: [PATCH 13/40] net: airoha: Fix page recycling in airoha_qdma_rx_process() Do not recycle the page twice in airoha_qdma_rx_process routine in case of error. Just run dev_kfree_skb() if the skb has been allocated and marked for recycling. Run page_pool_put_full_page() directly if the skb has not been allocated yet. Moreover, rely on DMA address from queue entry element instead of reading it from the DMA descriptor for DMA syncing in airoha_qdma_rx_process(). Fixes: e12182ddb6e71 ("net: airoha: Enable Rx Scatter-Gather") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250515-airoha-fix-rx-process-error-condition-v2-1-657e92c894b9@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index d748dc6de923..1e9ab65218ff 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -614,7 +614,6 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) struct airoha_queue_entry *e = &q->entry[q->tail]; struct airoha_qdma_desc *desc = &q->desc[q->tail]; u32 hash, reason, msg1 = le32_to_cpu(desc->msg1); - dma_addr_t dma_addr = le32_to_cpu(desc->addr); struct page *page = virt_to_head_page(e->buf); u32 desc_ctrl = le32_to_cpu(desc->ctrl); struct airoha_gdm_port *port; @@ -623,22 +622,16 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) if (!(desc_ctrl & QDMA_DESC_DONE_MASK)) break; - if (!dma_addr) - break; - - len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl); - if (!len) - break; - q->tail = (q->tail + 1) % q->ndesc; q->queued--; - dma_sync_single_for_cpu(eth->dev, dma_addr, + dma_sync_single_for_cpu(eth->dev, e->dma_addr, SKB_WITH_OVERHEAD(q->buf_size), dir); + len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl); data_len = q->skb ? q->buf_size : SKB_WITH_OVERHEAD(q->buf_size); - if (data_len < len) + if (!len || data_len < len) goto free_frag; p = airoha_qdma_get_gdm_port(eth, desc); @@ -701,9 +694,12 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) q->skb = NULL; continue; free_frag: - page_pool_put_full_page(q->page_pool, page, true); - dev_kfree_skb(q->skb); - q->skb = NULL; + if (q->skb) { + dev_kfree_skb(q->skb); + q->skb = NULL; + } else { + page_pool_put_full_page(q->page_pool, page, true); + } } airoha_qdma_fill_rx_queue(q); From c46286fdd6aa1d0e33c245bcffe9ff2428a777bd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 May 2025 18:49:26 +0200 Subject: [PATCH 14/40] mr: consolidate the ipmr_can_free_table() checks. Guoyu Yin reported a splat in the ipmr netns cleanup path: WARNING: CPU: 2 PID: 14564 at net/ipv4/ipmr.c:440 ipmr_free_table net/ipv4/ipmr.c:440 [inline] WARNING: CPU: 2 PID: 14564 at net/ipv4/ipmr.c:440 ipmr_rules_exit+0x135/0x1c0 net/ipv4/ipmr.c:361 Modules linked in: CPU: 2 UID: 0 PID: 14564 Comm: syz.4.838 Not tainted 6.14.0 #1 Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:ipmr_free_table net/ipv4/ipmr.c:440 [inline] RIP: 0010:ipmr_rules_exit+0x135/0x1c0 net/ipv4/ipmr.c:361 Code: ff df 48 c1 ea 03 80 3c 02 00 75 7d 48 c7 83 60 05 00 00 00 00 00 00 5b 5d 41 5c 41 5d 41 5e e9 71 67 7f 00 e8 4c 2d 8a fd 90 <0f> 0b 90 eb 93 e8 41 2d 8a fd 0f b6 2d 80 54 ea 01 31 ff 89 ee e8 RSP: 0018:ffff888109547c58 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff888108c12dc0 RCX: ffffffff83e09868 RDX: ffff8881022b3300 RSI: ffffffff83e098d4 RDI: 0000000000000005 RBP: ffff888104288000 R08: 0000000000000000 R09: ffffed10211825c9 R10: 0000000000000001 R11: ffff88801816c4a0 R12: 0000000000000001 R13: ffff888108c13320 R14: ffff888108c12dc0 R15: fffffbfff0b74058 FS: 00007f84f39316c0(0000) GS:ffff88811b100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f84f3930f98 CR3: 0000000113b56000 CR4: 0000000000350ef0 Call Trace: ipmr_net_exit_batch+0x50/0x90 net/ipv4/ipmr.c:3160 ops_exit_list+0x10c/0x160 net/core/net_namespace.c:177 setup_net+0x47d/0x8e0 net/core/net_namespace.c:394 copy_net_ns+0x25d/0x410 net/core/net_namespace.c:516 create_new_namespaces+0x3f6/0xaf0 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xc3/0x180 kernel/nsproxy.c:228 ksys_unshare+0x78d/0x9a0 kernel/fork.c:3342 __do_sys_unshare kernel/fork.c:3413 [inline] __se_sys_unshare kernel/fork.c:3411 [inline] __x64_sys_unshare+0x31/0x40 kernel/fork.c:3411 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xa6/0x1a0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f84f532cc29 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f84f3931038 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 00007f84f5615fa0 RCX: 00007f84f532cc29 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000040000400 RBP: 00007f84f53fba18 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f84f5615fa0 R15: 00007fff51c5f328 The running kernel has CONFIG_IP_MROUTE_MULTIPLE_TABLES disabled, and the sanity check for such build is still too loose. Address the issue consolidating the relevant sanity check in a single helper regardless of the kernel configuration. Also share it between the ipv4 and ipv6 code. Reported-by: Guoyu Yin Fixes: 50b94204446e ("ipmr: tune the ipmr_can_free_table() checks.") Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/372dc261e1bf12742276e1b984fc5a071b7fc5a8.1747321903.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/mroute_base.h | 5 +++++ net/ipv4/ipmr.c | 12 +----------- net/ipv6/ip6mr.c | 12 +----------- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 58a2401e4b55..0075f6e5c3da 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -262,6 +262,11 @@ struct mr_table { int mroute_reg_vif_num; }; +static inline bool mr_can_free_table(struct net *net) +{ + return !check_net(net) || !net_initialized(net); +} + #ifdef CONFIG_IP_MROUTE_COMMON void vif_device_init(struct vif_device *v, struct net_device *dev, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a8b04d4abcaa..85dc208f32e9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -120,11 +120,6 @@ static void ipmr_expire_process(struct timer_list *t); lockdep_rtnl_is_held() || \ list_empty(&net->ipv4.mr_tables)) -static bool ipmr_can_free_table(struct net *net) -{ - return !check_net(net) || !net_initialized(net); -} - static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -317,11 +312,6 @@ EXPORT_SYMBOL(ipmr_rule_default); #define ipmr_for_each_table(mrt, net) \ for (mrt = net->ipv4.mrt; mrt; mrt = NULL) -static bool ipmr_can_free_table(struct net *net) -{ - return !check_net(net); -} - static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -437,7 +427,7 @@ static void ipmr_free_table(struct mr_table *mrt) { struct net *net = read_pnet(&mrt->net); - WARN_ON_ONCE(!ipmr_can_free_table(net)); + WARN_ON_ONCE(!mr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index b413c9c8a21c..3276cde5ebd7 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -108,11 +108,6 @@ static void ipmr_expire_process(struct timer_list *t); lockdep_rtnl_is_held() || \ list_empty(&net->ipv6.mr6_tables)) -static bool ip6mr_can_free_table(struct net *net) -{ - return !check_net(net) || !net_initialized(net); -} - static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -306,11 +301,6 @@ EXPORT_SYMBOL(ip6mr_rule_default); #define ip6mr_for_each_table(mrt, net) \ for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) -static bool ip6mr_can_free_table(struct net *net) -{ - return !check_net(net); -} - static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -416,7 +406,7 @@ static void ip6mr_free_table(struct mr_table *mrt) { struct net *net = read_pnet(&mrt->net); - WARN_ON_ONCE(!ip6mr_can_free_table(net)); + WARN_ON_ONCE(!mr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | From 239af1970bcb039a1551d2c438d113df0010c149 Mon Sep 17 00:00:00 2001 From: Ilia Gavrilov Date: Thu, 15 May 2025 12:20:15 +0000 Subject: [PATCH 15/40] llc: fix data loss when reading from a socket in llc_ui_recvmsg() For SOCK_STREAM sockets, if user buffer size (len) is less than skb size (skb->len), the remaining data from skb will be lost after calling kfree_skb(). To fix this, move the statement for partial reading above skb deletion. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) Fixes: 30a584d944fb ("[LLX]: SOCK_DGRAM interface fixes") Cc: stable@vger.kernel.org Signed-off-by: Ilia Gavrilov Signed-off-by: David S. Miller --- net/llc/af_llc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 0259cde394ba..cc77ec5769d8 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -887,15 +887,15 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (sk->sk_type != SOCK_STREAM) goto copy_uaddr; + /* Partial read */ + if (used + offset < skb_len) + continue; + if (!(flags & MSG_PEEK)) { skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); *seq = 0; } - - /* Partial read */ - if (used + offset < skb_len) - continue; } while (len > 0); out: From 69c6d83d717317eab42835bc9fd54173c8de31ac Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 7 May 2025 10:42:00 -0500 Subject: [PATCH 16/40] dt-bindings: can: microchip,mcp2510: Fix $id path The "$id" value must match the relative path under bindings/ and is missing the "net" sub-directory. Fixes: 09328600c2f9 ("dt-bindings: can: convert microchip,mcp251x.txt to yaml") Signed-off-by: "Rob Herring (Arm)" Acked-by: Conor Dooley Link: https://patch.msgid.link/20250507154201.1589542-1-robh@kernel.org Signed-off-by: Marc Kleine-Budde --- .../devicetree/bindings/net/can/microchip,mcp2510.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml b/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml index e0ec53bc10c6..1525a50ded47 100644 --- a/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml +++ b/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/can/microchip,mcp2510.yaml# +$id: http://devicetree.org/schemas/net/can/microchip,mcp2510.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: Microchip MCP251X stand-alone CAN controller From c2aba69d0c36a496ab4f2e81e9c2b271f2693fd7 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 19 May 2025 14:50:26 +0200 Subject: [PATCH 17/40] can: bcm: add locking for bcm_op runtime updates The CAN broadcast manager (CAN BCM) can send a sequence of CAN frames via hrtimer. The content and also the length of the sequence can be changed resp reduced at runtime where the 'currframe' counter is then set to zero. Although this appeared to be a safe operation the updates of 'currframe' can be triggered from user space and hrtimer context in bcm_can_tx(). Anderson Nascimento created a proof of concept that triggered a KASAN slab-out-of-bounds read access which can be prevented with a spin_lock_bh. At the rework of bcm_can_tx() the 'count' variable has been moved into the protected section as this variable can be modified from both contexts too. Fixes: ffd980f976e7 ("[CAN]: Add broadcast manager (bcm) protocol") Reported-by: Anderson Nascimento Tested-by: Anderson Nascimento Reviewed-by: Marc Kleine-Budde Signed-off-by: Oliver Hartkopp Link: https://patch.msgid.link/20250519125027.11900-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 66 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 0bca1b9b3f70..871707dab7db 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -122,6 +123,7 @@ struct bcm_op { struct canfd_frame last_sframe; struct sock *sk; struct net_device *rx_reg_dev; + spinlock_t bcm_tx_lock; /* protect currframe/count in runtime updates */ }; struct bcm_sock { @@ -285,13 +287,18 @@ static void bcm_can_tx(struct bcm_op *op) { struct sk_buff *skb; struct net_device *dev; - struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe; + struct canfd_frame *cf; int err; /* no target device? => exit */ if (!op->ifindex) return; + /* read currframe under lock protection */ + spin_lock_bh(&op->bcm_tx_lock); + cf = op->frames + op->cfsiz * op->currframe; + spin_unlock_bh(&op->bcm_tx_lock); + dev = dev_get_by_index(sock_net(op->sk), op->ifindex); if (!dev) { /* RFC: should this bcm_op remove itself here? */ @@ -312,6 +319,10 @@ static void bcm_can_tx(struct bcm_op *op) skb->dev = dev; can_skb_set_owner(skb, op->sk); err = can_send(skb, 1); + + /* update currframe and count under lock protection */ + spin_lock_bh(&op->bcm_tx_lock); + if (!err) op->frames_abs++; @@ -320,6 +331,11 @@ static void bcm_can_tx(struct bcm_op *op) /* reached last frame? */ if (op->currframe >= op->nframes) op->currframe = 0; + + if (op->count > 0) + op->count--; + + spin_unlock_bh(&op->bcm_tx_lock); out: dev_put(dev); } @@ -430,7 +446,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) struct bcm_msg_head msg_head; if (op->kt_ival1 && (op->count > 0)) { - op->count--; + bcm_can_tx(op); if (!op->count && (op->flags & TX_COUNTEVT)) { /* create notification to user */ @@ -445,7 +461,6 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) bcm_send_to_user(op, &msg_head, NULL, 0); } - bcm_can_tx(op); } else if (op->kt_ival2) { bcm_can_tx(op); @@ -956,6 +971,27 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } op->flags = msg_head->flags; + /* only lock for unlikely count/nframes/currframe changes */ + if (op->nframes != msg_head->nframes || + op->flags & TX_RESET_MULTI_IDX || + op->flags & SETTIMER) { + + spin_lock_bh(&op->bcm_tx_lock); + + if (op->nframes != msg_head->nframes || + op->flags & TX_RESET_MULTI_IDX) { + /* potentially update changed nframes */ + op->nframes = msg_head->nframes; + /* restart multiple frame transmission */ + op->currframe = 0; + } + + if (op->flags & SETTIMER) + op->count = msg_head->count; + + spin_unlock_bh(&op->bcm_tx_lock); + } + } else { /* insert new BCM operation for the given can_id */ @@ -963,9 +999,14 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, if (!op) return -ENOMEM; + spin_lock_init(&op->bcm_tx_lock); op->can_id = msg_head->can_id; op->cfsiz = CFSIZ(msg_head->flags); op->flags = msg_head->flags; + op->nframes = msg_head->nframes; + + if (op->flags & SETTIMER) + op->count = msg_head->count; /* create array for CAN frames and copy the data */ if (msg_head->nframes > 1) { @@ -1023,22 +1064,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */ - if (op->nframes != msg_head->nframes) { - op->nframes = msg_head->nframes; - /* start multiple frame transmission with index 0 */ - op->currframe = 0; - } - - /* check flags */ - - if (op->flags & TX_RESET_MULTI_IDX) { - /* start multiple frame transmission with index 0 */ - op->currframe = 0; - } - if (op->flags & SETTIMER) { /* set timer values */ - op->count = msg_head->count; op->ival1 = msg_head->ival1; op->ival2 = msg_head->ival2; op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1); @@ -1055,11 +1082,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, op->flags |= TX_ANNOUNCE; } - if (op->flags & TX_ANNOUNCE) { + if (op->flags & TX_ANNOUNCE) bcm_can_tx(op); - if (op->count) - op->count--; - } if (op->flags & STARTTIMER) bcm_tx_start_timer(op); From dac5e6249159ac255dad9781793dbe5908ac9ddb Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 19 May 2025 14:50:27 +0200 Subject: [PATCH 18/40] can: bcm: add missing rcu read protection for procfs content When the procfs content is generated for a bcm_op which is in the process to be removed the procfs output might show unreliable data (UAF). As the removal of bcm_op's is already implemented with rcu handling this patch adds the missing rcu_read_lock() and makes sure the list entries are properly removed under rcu protection. Fixes: f1b4e32aca08 ("can: bcm: use call_rcu() instead of costly synchronize_rcu()") Reported-by: Anderson Nascimento Suggested-by: Anderson Nascimento Tested-by: Anderson Nascimento Signed-off-by: Oliver Hartkopp Link: https://patch.msgid.link/20250519125027.11900-2-socketcan@hartkopp.net Cc: stable@vger.kernel.org # >= 5.4 Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 871707dab7db..6bc1cc4c94c5 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -219,7 +219,9 @@ static int bcm_proc_show(struct seq_file *m, void *v) seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex)); seq_printf(m, " <<<\n"); - list_for_each_entry(op, &bo->rx_ops, list) { + rcu_read_lock(); + + list_for_each_entry_rcu(op, &bo->rx_ops, list) { unsigned long reduction; @@ -275,6 +277,9 @@ static int bcm_proc_show(struct seq_file *m, void *v) seq_printf(m, "# sent %ld\n", op->frames_abs); } seq_putc(m, '\n'); + + rcu_read_unlock(); + return 0; } #endif /* CONFIG_PROC_FS */ @@ -858,7 +863,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, REGMASK(op->can_id), bcm_rx_handler, op); - list_del(&op->list); + list_del_rcu(&op->list); bcm_remove_op(op); return 1; /* done */ } @@ -878,7 +883,7 @@ static int bcm_delete_tx_op(struct list_head *ops, struct bcm_msg_head *mh, list_for_each_entry_safe(op, n, ops, list) { if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) { - list_del(&op->list); + list_del_rcu(&op->list); bcm_remove_op(op); return 1; /* done */ } @@ -1296,7 +1301,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, bcm_rx_handler, op, "bcm", sk); if (err) { /* this bcm rx op is broken -> remove it */ - list_del(&op->list); + list_del_rcu(&op->list); bcm_remove_op(op); return err; } From bbd95160a03dbfcd01a541f25c27ddb730dfbbd5 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 10 Apr 2025 11:13:52 -0700 Subject: [PATCH 19/40] ice: fix vf->num_mac count with port representors The ice_vc_repr_add_mac() function indicates that it does not store the MAC address filters in the firmware. However, it still increments vf->num_mac. This is incorrect, as vf->num_mac should represent the number of MAC filters currently programmed to firmware. Indeed, we only perform this increment if the requested filter is a unicast address that doesn't match the existing vf->hw_lan_addr. In addition, ice_vc_repr_del_mac() does not decrement the vf->num_mac counter. This results in the counter becoming out of sync with the actual count. As it turns out, vf->num_mac is currently only used in legacy made without port representors. The single place where the value is checked is for enforcing a filter limit on untrusted VFs. Upcoming patches to support VF Live Migration will use this value when determining the size of the TLV for MAC address filters. Fix the representor mode function to stop incrementing the counter incorrectly. Fixes: ac19e03ef780 ("ice: allow process VF opcodes in different ways") Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 7c3006eb68dd..6446d0fcc052 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -4275,7 +4275,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) } ice_vfhw_mac_add(vf, &al->list[i]); - vf->num_mac++; break; } From 6c778f1b839b63525b30046c9d1899424a62be0a Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Mon, 28 Apr 2025 15:33:39 -0400 Subject: [PATCH 20/40] ice: Fix LACP bonds without SRIOV environment If an aggregate has the following conditions: - The SRIOV LAG DDP package has been enabled - The bond is in 802.3ad LACP mode - The bond is disqualified from supporting SRIOV VF LAG - Both interfaces were added simultaneously to the bond (same command) Then there is a chance that the two interfaces will be assigned different LACP Aggregator ID's. This will cause a failure of the LACP control over the bond. To fix this, we can detect if the primary interface for the bond (as defined by the driver) is not in switchdev mode, and exit the setup flow if so. Reproduction steps: %> ip link add bond0 type bond mode 802.3ad miimon 100 %> ip link set bond0 up %> ifenslave bond0 eth0 eth1 %> cat /proc/net/bonding/bond0 | grep Agg Check for Aggregator IDs that differ. Fixes: ec5a6c5f79ed ("ice: process events created by lag netdev event handler") Reviewed-by: Aleksandr Loktionov Signed-off-by: Dave Ertman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lag.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 22371011c249..2410aee59fb2 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -1321,12 +1321,18 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) */ if (!primary_lag) { lag->primary = true; + if (!ice_is_switchdev_running(lag->pf)) + return; + /* Configure primary's SWID to be shared */ ice_lag_primary_swid(lag, true); primary_lag = lag; } else { u16 swid; + if (!ice_is_switchdev_running(primary_lag->pf)) + return; + swid = primary_lag->pf->hw.port_info->sw_id; ice_lag_set_swid(swid, lag, true); ice_lag_add_prune_list(primary_lag, lag->pf); From 2dabe349f7882ff1407a784d54d8541909329088 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Linga Date: Fri, 11 Apr 2025 09:00:35 -0700 Subject: [PATCH 21/40] idpf: fix null-ptr-deref in idpf_features_check idpf_features_check is used to validate the TX packet. skb header length is compared with the hardware supported value received from the device control plane. The value is stored in the adapter structure and to access it, vport pointer is used. During reset all the vports are released and the vport pointer that the netdev private structure points to is NULL. To avoid null-ptr-deref, store the max header length value in netdev private structure. This also helps to cache the value and avoid accessing adapter pointer in hot path. BUG: kernel NULL pointer dereference, address: 0000000000000068 ... RIP: 0010:idpf_features_check+0x6d/0xe0 [idpf] Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x154/0x520 ? exc_page_fault+0x76/0x190 ? asm_exc_page_fault+0x26/0x30 ? idpf_features_check+0x6d/0xe0 [idpf] netif_skb_features+0x88/0x310 validate_xmit_skb+0x2a/0x2b0 validate_xmit_skb_list+0x4c/0x70 sch_direct_xmit+0x19d/0x3a0 __dev_queue_xmit+0xb74/0xe70 ... Fixes: a251eee62133 ("idpf: add SRIOV support and other ndo_ops") Reviewed-by: Madhu Chititm Signed-off-by: Pavan Kumar Linga Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf.h | 2 ++ drivers/net/ethernet/intel/idpf/idpf_lib.c | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index aef0e9775a33..70dbf80f3bb7 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -143,6 +143,7 @@ enum idpf_vport_state { * @vport_id: Vport identifier * @link_speed_mbps: Link speed in mbps * @vport_idx: Relative vport index + * @max_tx_hdr_size: Max header length hardware can support * @state: See enum idpf_vport_state * @netstats: Packet and byte stats * @stats_lock: Lock to protect stats update @@ -153,6 +154,7 @@ struct idpf_netdev_priv { u32 vport_id; u32 link_speed_mbps; u16 vport_idx; + u16 max_tx_hdr_size; enum idpf_vport_state state; struct rtnl_link_stats64 netstats; spinlock_t stats_lock; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 82f09b4030bc..3a033ce19cda 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -723,6 +723,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->vport = vport; np->vport_idx = vport->idx; np->vport_id = vport->vport_id; + np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); vport->netdev = netdev; return idpf_init_mac_addr(vport, netdev); @@ -740,6 +741,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->adapter = adapter; np->vport_idx = vport->idx; np->vport_id = vport->vport_id; + np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); spin_lock_init(&np->stats_lock); @@ -2203,8 +2205,8 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features) { - struct idpf_vport *vport = idpf_netdev_to_vport(netdev); - struct idpf_adapter *adapter = vport->adapter; + struct idpf_netdev_priv *np = netdev_priv(netdev); + u16 max_tx_hdr_size = np->max_tx_hdr_size; size_t len; /* No point in doing any of this if neither checksum nor GSO are @@ -2227,7 +2229,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, goto unsupported; len = skb_network_header_len(skb); - if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + if (unlikely(len > max_tx_hdr_size)) goto unsupported; if (!skb->encapsulation) @@ -2240,7 +2242,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, /* IPLEN can support at most 127 dwords */ len = skb_inner_network_header_len(skb); - if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + if (unlikely(len > max_tx_hdr_size)) goto unsupported; /* No need to validate L4LEN as TCP is the only protocol with a From 9176bd205ee0b2cd35073a9973c2a0936bcb579e Mon Sep 17 00:00:00 2001 From: Axel Forsman Date: Tue, 20 May 2025 13:43:30 +0200 Subject: [PATCH 22/40] can: kvaser_pciefd: Force IRQ edge in case of nested IRQ Avoid the driver missing IRQs by temporarily masking IRQs in the ISR to enforce an edge even if a different IRQ is signalled before handled IRQs are cleared. Fixes: 48f827d4f48f ("can: kvaser_pciefd: Move reset of DMA RX buffers to the end of the ISR") Cc: stable@vger.kernel.org Signed-off-by: Axel Forsman Tested-by: Jimmy Assarsson Reviewed-by: Jimmy Assarsson Link: https://patch.msgid.link/20250520114332.8961-2-axfo@kvaser.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 83 ++++++++++++++++----------------- 1 file changed, 39 insertions(+), 44 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index cf0d51805272..9cc9176c2058 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1646,24 +1646,28 @@ static int kvaser_pciefd_read_buffer(struct kvaser_pciefd *pcie, int dma_buf) return res; } -static u32 kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) +static void kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) { + void __iomem *srb_cmd_reg = KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG; u32 irq = ioread32(KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) - kvaser_pciefd_read_buffer(pcie, 0); + iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) + if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) { + kvaser_pciefd_read_buffer(pcie, 0); + iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, srb_cmd_reg); /* Rearm buffer */ + } + + if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) { kvaser_pciefd_read_buffer(pcie, 1); + iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1, srb_cmd_reg); /* Rearm buffer */ + } if (unlikely(irq & KVASER_PCIEFD_SRB_IRQ_DOF0 || irq & KVASER_PCIEFD_SRB_IRQ_DOF1 || irq & KVASER_PCIEFD_SRB_IRQ_DUF0 || irq & KVASER_PCIEFD_SRB_IRQ_DUF1)) dev_err(&pcie->pci->dev, "DMA IRQ error 0x%08X\n", irq); - - iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - return irq; } static void kvaser_pciefd_transmit_irq(struct kvaser_pciefd_can *can) @@ -1691,29 +1695,22 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) struct kvaser_pciefd *pcie = (struct kvaser_pciefd *)dev; const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask; u32 pci_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie)); - u32 srb_irq = 0; - u32 srb_release = 0; int i; if (!(pci_irq & irq_mask->all)) return IRQ_NONE; + iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); + if (pci_irq & irq_mask->kcan_rx0) - srb_irq = kvaser_pciefd_receive_irq(pcie); + kvaser_pciefd_receive_irq(pcie); for (i = 0; i < pcie->nr_channels; i++) { if (pci_irq & irq_mask->kcan_tx[i]) kvaser_pciefd_transmit_irq(pcie->can[i]); } - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0) - srb_release |= KVASER_PCIEFD_SRB_CMD_RDB0; - - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1) - srb_release |= KVASER_PCIEFD_SRB_CMD_RDB1; - - if (srb_release) - iowrite32(srb_release, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); + iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); return IRQ_HANDLED; } @@ -1733,13 +1730,22 @@ static void kvaser_pciefd_teardown_can_ctrls(struct kvaser_pciefd *pcie) } } +static void kvaser_pciefd_disable_irq_srcs(struct kvaser_pciefd *pcie) +{ + unsigned int i; + + /* Masking PCI_IRQ is insufficient as running ISR will unmask it */ + iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG); + for (i = 0; i < pcie->nr_channels; ++i) + iowrite32(0, pcie->can[i]->reg_base + KVASER_PCIEFD_KCAN_IEN_REG); +} + static int kvaser_pciefd_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int ret; struct kvaser_pciefd *pcie; const struct kvaser_pciefd_irq_mask *irq_mask; - void __iomem *irq_en_base; pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) @@ -1805,8 +1811,7 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG); /* Enable PCI interrupts */ - irq_en_base = KVASER_PCIEFD_PCI_IEN_ADDR(pcie); - iowrite32(irq_mask->all, irq_en_base); + iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); /* Ready the DMA buffers */ iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); @@ -1820,8 +1825,7 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, return 0; err_free_irq: - /* Disable PCI interrupts */ - iowrite32(0, irq_en_base); + kvaser_pciefd_disable_irq_srcs(pcie); free_irq(pcie->pci->irq, pcie); err_pci_free_irq_vectors: @@ -1844,35 +1848,26 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, return ret; } -static void kvaser_pciefd_remove_all_ctrls(struct kvaser_pciefd *pcie) -{ - int i; - - for (i = 0; i < pcie->nr_channels; i++) { - struct kvaser_pciefd_can *can = pcie->can[i]; - - if (can) { - iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG); - unregister_candev(can->can.dev); - timer_delete(&can->bec_poll_timer); - kvaser_pciefd_pwm_stop(can); - free_candev(can->can.dev); - } - } -} - static void kvaser_pciefd_remove(struct pci_dev *pdev) { struct kvaser_pciefd *pcie = pci_get_drvdata(pdev); + unsigned int i; - kvaser_pciefd_remove_all_ctrls(pcie); + for (i = 0; i < pcie->nr_channels; ++i) { + struct kvaser_pciefd_can *can = pcie->can[i]; - /* Disable interrupts */ - iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CTRL_REG); - iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); + unregister_candev(can->can.dev); + timer_delete(&can->bec_poll_timer); + kvaser_pciefd_pwm_stop(can); + } + kvaser_pciefd_disable_irq_srcs(pcie); free_irq(pcie->pci->irq, pcie); pci_free_irq_vectors(pcie->pci); + + for (i = 0; i < pcie->nr_channels; ++i) + free_candev(pcie->can[i]->can.dev); + pci_iounmap(pdev, pcie->reg_base); pci_release_regions(pdev); pci_disable_device(pdev); From 8256e0ca601051933e9395746817f3801fa9a6bf Mon Sep 17 00:00:00 2001 From: Axel Forsman Date: Tue, 20 May 2025 13:43:31 +0200 Subject: [PATCH 23/40] can: kvaser_pciefd: Fix echo_skb race The functions kvaser_pciefd_start_xmit() and kvaser_pciefd_handle_ack_packet() raced to stop/wake TX queues and get/put echo skbs, as kvaser_pciefd_can->echo_lock was only ever taken when transmitting and KCAN_TX_NR_PACKETS_CURRENT gets decremented prior to handling of ACKs. E.g., this caused the following error: can_put_echo_skb: BUG! echo_skb 5 is occupied! Instead, use the synchronization helpers in netdev_queues.h. As those piggyback on BQL barriers, start updating in-flight packets and bytes counts as well. Cc: stable@vger.kernel.org Signed-off-by: Axel Forsman Tested-by: Jimmy Assarsson Reviewed-by: Jimmy Assarsson Link: https://patch.msgid.link/20250520114332.8961-3-axfo@kvaser.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 93 +++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 34 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 9cc9176c2058..a61cbade96d9 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -16,6 +16,7 @@ #include #include #include +#include MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Kvaser AB "); @@ -410,10 +411,13 @@ struct kvaser_pciefd_can { void __iomem *reg_base; struct can_berr_counter bec; u8 cmd_seq; + u8 tx_max_count; + u8 tx_idx; + u8 ack_idx; int err_rep_cnt; - int echo_idx; + unsigned int completed_tx_pkts; + unsigned int completed_tx_bytes; spinlock_t lock; /* Locks sensitive registers (e.g. MODE) */ - spinlock_t echo_lock; /* Locks the message echo buffer */ struct timer_list bec_poll_timer; struct completion start_comp, flush_comp; }; @@ -714,6 +718,9 @@ static int kvaser_pciefd_open(struct net_device *netdev) int ret; struct kvaser_pciefd_can *can = netdev_priv(netdev); + can->tx_idx = 0; + can->ack_idx = 0; + ret = open_candev(netdev); if (ret) return ret; @@ -745,21 +752,26 @@ static int kvaser_pciefd_stop(struct net_device *netdev) timer_delete(&can->bec_poll_timer); } can->can.state = CAN_STATE_STOPPED; + netdev_reset_queue(netdev); close_candev(netdev); return ret; } +static unsigned int kvaser_pciefd_tx_avail(const struct kvaser_pciefd_can *can) +{ + return can->tx_max_count - (READ_ONCE(can->tx_idx) - READ_ONCE(can->ack_idx)); +} + static int kvaser_pciefd_prepare_tx_packet(struct kvaser_pciefd_tx_packet *p, - struct kvaser_pciefd_can *can, + struct can_priv *can, u8 seq, struct sk_buff *skb) { struct canfd_frame *cf = (struct canfd_frame *)skb->data; int packet_size; - int seq = can->echo_idx; memset(p, 0, sizeof(*p)); - if (can->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) + if (can->ctrlmode & CAN_CTRLMODE_ONE_SHOT) p->header[1] |= KVASER_PCIEFD_TPACKET_SMS; if (cf->can_id & CAN_RTR_FLAG) @@ -782,7 +794,7 @@ static int kvaser_pciefd_prepare_tx_packet(struct kvaser_pciefd_tx_packet *p, } else { p->header[1] |= FIELD_PREP(KVASER_PCIEFD_RPACKET_DLC_MASK, - can_get_cc_dlc((struct can_frame *)cf, can->can.ctrlmode)); + can_get_cc_dlc((struct can_frame *)cf, can->ctrlmode)); } p->header[1] |= FIELD_PREP(KVASER_PCIEFD_PACKET_SEQ_MASK, seq); @@ -797,22 +809,24 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct kvaser_pciefd_can *can = netdev_priv(netdev); - unsigned long irq_flags; struct kvaser_pciefd_tx_packet packet; + unsigned int seq = can->tx_idx & (can->can.echo_skb_max - 1); + unsigned int frame_len; int nr_words; - u8 count; if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; + if (!netif_subqueue_maybe_stop(netdev, 0, kvaser_pciefd_tx_avail(can), 1, 1)) + return NETDEV_TX_BUSY; - nr_words = kvaser_pciefd_prepare_tx_packet(&packet, can, skb); + nr_words = kvaser_pciefd_prepare_tx_packet(&packet, &can->can, seq, skb); - spin_lock_irqsave(&can->echo_lock, irq_flags); /* Prepare and save echo skb in internal slot */ - can_put_echo_skb(skb, netdev, can->echo_idx, 0); - - /* Move echo index to the next slot */ - can->echo_idx = (can->echo_idx + 1) % can->can.echo_skb_max; + WRITE_ONCE(can->can.echo_skb[seq], NULL); + frame_len = can_skb_get_frame_len(skb); + can_put_echo_skb(skb, netdev, seq, frame_len); + netdev_sent_queue(netdev, frame_len); + WRITE_ONCE(can->tx_idx, can->tx_idx + 1); /* Write header to fifo */ iowrite32(packet.header[0], @@ -836,14 +850,7 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb, KVASER_PCIEFD_KCAN_FIFO_LAST_REG); } - count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK, - ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); - /* No room for a new message, stop the queue until at least one - * successful transmit - */ - if (count >= can->can.echo_skb_max || can->can.echo_skb[can->echo_idx]) - netif_stop_queue(netdev); - spin_unlock_irqrestore(&can->echo_lock, irq_flags); + netif_subqueue_maybe_stop(netdev, 0, kvaser_pciefd_tx_avail(can), 1, 1); return NETDEV_TX_OK; } @@ -970,6 +977,8 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie) can->kv_pcie = pcie; can->cmd_seq = 0; can->err_rep_cnt = 0; + can->completed_tx_pkts = 0; + can->completed_tx_bytes = 0; can->bec.txerr = 0; can->bec.rxerr = 0; @@ -983,11 +992,10 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie) tx_nr_packets_max = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_MAX_MASK, ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); + can->tx_max_count = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1); can->can.clock.freq = pcie->freq; - can->can.echo_skb_max = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1); - can->echo_idx = 0; - spin_lock_init(&can->echo_lock); + can->can.echo_skb_max = roundup_pow_of_two(can->tx_max_count); spin_lock_init(&can->lock); can->can.bittiming_const = &kvaser_pciefd_bittiming_const; @@ -1510,19 +1518,21 @@ static int kvaser_pciefd_handle_ack_packet(struct kvaser_pciefd *pcie, netdev_dbg(can->can.dev, "Packet was flushed\n"); } else { int echo_idx = FIELD_GET(KVASER_PCIEFD_PACKET_SEQ_MASK, p->header[0]); - int len; - u8 count; + unsigned int len, frame_len = 0; struct sk_buff *skb; + if (echo_idx != (can->ack_idx & (can->can.echo_skb_max - 1))) + return 0; skb = can->can.echo_skb[echo_idx]; - if (skb) - kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); - len = can_get_echo_skb(can->can.dev, echo_idx, NULL); - count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK, - ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); + if (!skb) + return 0; + kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); + len = can_get_echo_skb(can->can.dev, echo_idx, &frame_len); - if (count < can->can.echo_skb_max && netif_queue_stopped(can->can.dev)) - netif_wake_queue(can->can.dev); + /* Pairs with barrier in kvaser_pciefd_start_xmit() */ + smp_store_release(&can->ack_idx, can->ack_idx + 1); + can->completed_tx_pkts++; + can->completed_tx_bytes += frame_len; if (!one_shot_fail) { can->can.dev->stats.tx_bytes += len; @@ -1638,11 +1648,26 @@ static int kvaser_pciefd_read_buffer(struct kvaser_pciefd *pcie, int dma_buf) { int pos = 0; int res = 0; + unsigned int i; do { res = kvaser_pciefd_read_packet(pcie, &pos, dma_buf); } while (!res && pos > 0 && pos < KVASER_PCIEFD_DMA_SIZE); + /* Report ACKs in this buffer to BQL en masse for correct periods */ + for (i = 0; i < pcie->nr_channels; ++i) { + struct kvaser_pciefd_can *can = pcie->can[i]; + + if (!can->completed_tx_pkts) + continue; + netif_subqueue_completed_wake(can->can.dev, 0, + can->completed_tx_pkts, + can->completed_tx_bytes, + kvaser_pciefd_tx_avail(can), 1); + can->completed_tx_pkts = 0; + can->completed_tx_bytes = 0; + } + return res; } From 6d820b81c4dc4a4023e45c3cd6707a07dd838649 Mon Sep 17 00:00:00 2001 From: Axel Forsman Date: Tue, 20 May 2025 13:43:32 +0200 Subject: [PATCH 24/40] can: kvaser_pciefd: Continue parsing DMA buf after dropped RX Going bus-off on a channel doing RX could result in dropped packets. As netif_running() gets cleared before the channel abort procedure, the handling of any last RDATA packets would see netif_rx() return non-zero to signal a dropped packet. kvaser_pciefd_read_buffer() dealt with this "error" by breaking out of processing the remaining DMA RX buffer. Only return an error from kvaser_pciefd_read_buffer() due to packet corruption, otherwise handle it internally. Cc: stable@vger.kernel.org Signed-off-by: Axel Forsman Tested-by: Jimmy Assarsson Reviewed-by: Jimmy Assarsson Link: https://patch.msgid.link/20250520114332.8961-4-axfo@kvaser.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index a61cbade96d9..f6921368cd14 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1209,7 +1209,7 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie, skb = alloc_canfd_skb(priv->dev, &cf); if (!skb) { priv->dev->stats.rx_dropped++; - return -ENOMEM; + return 0; } cf->len = can_fd_dlc2len(dlc); @@ -1221,7 +1221,7 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie, skb = alloc_can_skb(priv->dev, (struct can_frame **)&cf); if (!skb) { priv->dev->stats.rx_dropped++; - return -ENOMEM; + return 0; } can_frame_set_cc_len((struct can_frame *)cf, dlc, priv->ctrlmode); } @@ -1239,7 +1239,9 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie, priv->dev->stats.rx_packets++; kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); - return netif_rx(skb); + netif_rx(skb); + + return 0; } static void kvaser_pciefd_change_state(struct kvaser_pciefd_can *can, From 50980d8da71a0c2e045e85bba93c0099ab73a209 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 16 May 2025 07:26:55 -0500 Subject: [PATCH 25/40] net: ethernet: ti: am65-cpsw: Lower random mac address error print to info Using random mac address is not an error since the driver continues to function, it should be informative that the system has not assigned a MAC address. This is inline with other drivers such as ax88796c, dm9051 etc. Drop the error level to info level. Signed-off-by: Nishanth Menon Reviewed-by: Simon Horman Reviewed-by: Roger Quadros Link: https://patch.msgid.link/20250516122655.442808-1-nm@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 1e6d2335293d..30665ffe78cf 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2685,7 +2685,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) port->slave.mac_addr); if (!is_valid_ether_addr(port->slave.mac_addr)) { eth_random_addr(port->slave.mac_addr); - dev_err(dev, "Use random MAC address\n"); + dev_info(dev, "Use random MAC address\n"); } } From 47653e4243f2b0a26372e481ca098936b51ec3a8 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Mon, 19 May 2025 18:49:36 +0200 Subject: [PATCH 26/40] net: dwmac-sun8i: Use parsed internal PHY address instead of 1 While the MDIO address of the internal PHY on Allwinner sun8i chips is generally 1, of_mdio_parse_addr is used to cleanly parse the address from the device-tree instead of hardcoding it. A commit reworking the code ditched the parsed value and hardcoded the value 1 instead, which didn't really break anything but is more fragile and not future-proof. Restore the initial behavior using the parsed address returned from the helper. Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs") Signed-off-by: Paul Kocialkowski Reviewed-by: Andrew Lunn Acked-by: Corentin LABBE Tested-by: Corentin LABBE Link: https://patch.msgid.link/20250519164936.4172658-1-paulk@sys-base.io Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 85723a78793a..6c7e8655a7eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -964,7 +964,7 @@ static int sun8i_dwmac_set_syscon(struct device *dev, /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY * address. No need to mask it again. */ - reg |= 1 << H3_EPHY_ADDR_SHIFT; + reg |= ret << H3_EPHY_ADDR_SHIFT; } else { /* For SoCs without internal PHY the PHY selection bit should be * set to 0 (external PHY). From 48a62855073bfbdf8f8a1e06e7f97fd68e39422c Mon Sep 17 00:00:00 2001 From: Paul Barker Date: Mon, 19 May 2025 13:33:51 +0000 Subject: [PATCH 27/40] MAINTAINERS: Drop myself to reviewer for ravb driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maintenance of the ravb driver will be handled by Niklas for now. I still intend to review patches, and will be using my own email address going forward. Signed-off-by: Paul Barker Acked-by: Niklas Söderlund Link: https://patch.msgid.link/20250519133354.6564-1-paul.barker.ct@bp.renesas.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3563492e4eba..43d5f8709779 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20594,8 +20594,8 @@ F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml F: drivers/i2c/busses/i2c-emev2.c RENESAS ETHERNET AVB DRIVER -M: Paul Barker M: Niklas Söderlund +R: Paul Barker L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org S: Maintained From aed031da7e8cf6e31816a34afde961fbe0305fea Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 May 2025 13:41:28 -0700 Subject: [PATCH 28/40] bnxt_en: Fix netdev locking in ULP IRQ functions netdev_lock is already held when calling bnxt_ulp_irq_stop() and bnxt_ulp_irq_restart(). When converting rtnl_lock to netdev_lock, the original code was rtnl_dereference() to indicate that rtnl_lock was already held. rcu_dereference_protected() is the correct conversion after replacing rtnl_lock with netdev_lock. Add a new helper netdev_lock_dereference() similar to rtnl_dereference(). Fixes: 004b5008016a ("eth: bnxt: remove most dependencies on RTNL") Reviewed-by: Andy Gospodarek Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250519204130.3097027-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 9 +++------ include/net/netdev_lock.h | 3 +++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index a8e930d5dbb0..7564705d6478 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "bnxt_hsi.h" #include "bnxt.h" @@ -309,14 +310,12 @@ void bnxt_ulp_irq_stop(struct bnxt *bp) if (!ulp->msix_requested) return; - netdev_lock(bp->dev); - ops = rcu_dereference(ulp->ulp_ops); + ops = netdev_lock_dereference(ulp->ulp_ops, bp->dev); if (!ops || !ops->ulp_irq_stop) return; if (test_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) reset = true; ops->ulp_irq_stop(ulp->handle, reset); - netdev_unlock(bp->dev); } } @@ -335,8 +334,7 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) if (!ulp->msix_requested) return; - netdev_lock(bp->dev); - ops = rcu_dereference(ulp->ulp_ops); + ops = netdev_lock_dereference(ulp->ulp_ops, bp->dev); if (!ops || !ops->ulp_irq_restart) return; @@ -348,7 +346,6 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) bnxt_fill_msix_vecs(bp, ent); } ops->ulp_irq_restart(ulp->handle, ent); - netdev_unlock(bp->dev); kfree(ent); } } diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index c316b551df8d..0ee5bc766810 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -98,6 +98,9 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, &qdisc_xmit_lock_key); \ } +#define netdev_lock_dereference(p, dev) \ + rcu_dereference_protected(p, lockdep_is_held(&(dev)->lock)) + int netdev_debug_event(struct notifier_block *nb, unsigned long event, void *ptr); From 293e38ff4e4c2ba53f3fd47d8a4a9f0f0414a7a6 Mon Sep 17 00:00:00 2001 From: Thangaraj Samynathan Date: Fri, 16 May 2025 09:27:19 +0530 Subject: [PATCH 29/40] net: lan743x: Restore SGMII CTRL register on resume SGMII_CTRL register, which specifies the active interface, was not properly restored when resuming from suspend. This led to incorrect interface selection after resume particularly in scenarios involving the FPGA. To fix this: - Move the SGMII_CTRL setup out of the probe function. - Initialize the register in the hardware initialization helper function, which is called during both device initialization and resume. This ensures the interface configuration is consistently restored after suspend/resume cycles. Fixes: a46d9d37c4f4f ("net: lan743x: Add support for SGMII interface") Signed-off-by: Thangaraj Samynathan Link: https://patch.msgid.link/20250516035719.117960-1-thangaraj.s@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index e2d6bfb5d693..a70b88037a20 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3495,6 +3495,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, struct pci_dev *pdev) { struct lan743x_tx *tx; + u32 sgmii_ctl; int index; int ret; @@ -3507,6 +3508,15 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, spin_lock_init(&adapter->eth_syslock_spinlock); mutex_init(&adapter->sgmii_rw_lock); pci11x1x_set_rfe_rd_fifo_threshold(adapter); + sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); + if (adapter->is_sgmii_en) { + sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; + sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; + } else { + sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; + sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; + } + lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); } else { adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; @@ -3558,7 +3568,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) { - u32 sgmii_ctl; int ret; adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev); @@ -3570,10 +3579,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) adapter->mdiobus->priv = (void *)adapter; if (adapter->is_pci11x1x) { if (adapter->is_sgmii_en) { - sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); - sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; - sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; - lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); netif_dbg(adapter, drv, adapter->netdev, "SGMII operation\n"); adapter->mdiobus->read = lan743x_mdiobus_read_c22; @@ -3584,10 +3589,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) netif_dbg(adapter, drv, adapter->netdev, "lan743x-mdiobus-c45\n"); } else { - sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); - sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; - sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; - lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); netif_dbg(adapter, drv, adapter->netdev, "RGMII operation\n"); // Only C22 support when RGMII I/F From ef0841e4cb08754be6cb42bf97739fce5d086e5f Mon Sep 17 00:00:00 2001 From: Carlos Sanchez Date: Tue, 20 May 2025 12:23:05 +0200 Subject: [PATCH 30/40] can: slcan: allow reception of short error messages Allows slcan to receive short messages (typically errors) from the serial interface. When error support was added to slcan protocol in b32ff4668544e1333b694fcc7812b2d7397b4d6a ("can: slcan: extend the protocol with error info") the minimum valid message size changed from 5 (minimum standard can frame tIII0) to 3 ("e1a" is a valid protocol message, it is one of the examples given in the comments for slcan_bump_err() ), but the check for minimum message length prodicating all decoding was not adjusted. This makes short error messages discarded and error frames not being generated. This patch changes the minimum length to the new minimum (3 characters, excluding terminator, is now a valid message). Signed-off-by: Carlos Sanchez Fixes: b32ff4668544 ("can: slcan: extend the protocol with error info") Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20250520102305.1097494-1-carlossanchez@geotab.com Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/slcan/slcan-core.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c index 24c6622d36bd..58ff2ec1d975 100644 --- a/drivers/net/can/slcan/slcan-core.c +++ b/drivers/net/can/slcan/slcan-core.c @@ -71,12 +71,21 @@ MODULE_AUTHOR("Dario Binacchi "); #define SLCAN_CMD_LEN 1 #define SLCAN_SFF_ID_LEN 3 #define SLCAN_EFF_ID_LEN 8 +#define SLCAN_DATA_LENGTH_LEN 1 +#define SLCAN_ERROR_LEN 1 #define SLCAN_STATE_LEN 1 #define SLCAN_STATE_BE_RXCNT_LEN 3 #define SLCAN_STATE_BE_TXCNT_LEN 3 -#define SLCAN_STATE_FRAME_LEN (1 + SLCAN_CMD_LEN + \ - SLCAN_STATE_BE_RXCNT_LEN + \ - SLCAN_STATE_BE_TXCNT_LEN) +#define SLCAN_STATE_MSG_LEN (SLCAN_CMD_LEN + \ + SLCAN_STATE_LEN + \ + SLCAN_STATE_BE_RXCNT_LEN + \ + SLCAN_STATE_BE_TXCNT_LEN) +#define SLCAN_ERROR_MSG_LEN_MIN (SLCAN_CMD_LEN + \ + SLCAN_ERROR_LEN + \ + SLCAN_DATA_LENGTH_LEN) +#define SLCAN_FRAME_MSG_LEN_MIN (SLCAN_CMD_LEN + \ + SLCAN_SFF_ID_LEN + \ + SLCAN_DATA_LENGTH_LEN) struct slcan { struct can_priv can; @@ -176,6 +185,9 @@ static void slcan_bump_frame(struct slcan *sl) u32 tmpid; char *cmd = sl->rbuff; + if (sl->rcount < SLCAN_FRAME_MSG_LEN_MIN) + return; + skb = alloc_can_skb(sl->dev, &cf); if (unlikely(!skb)) { sl->dev->stats.rx_dropped++; @@ -281,7 +293,7 @@ static void slcan_bump_state(struct slcan *sl) return; } - if (state == sl->can.state || sl->rcount < SLCAN_STATE_FRAME_LEN) + if (state == sl->can.state || sl->rcount != SLCAN_STATE_MSG_LEN) return; cmd += SLCAN_STATE_BE_RXCNT_LEN + SLCAN_CMD_LEN + 1; @@ -328,6 +340,9 @@ static void slcan_bump_err(struct slcan *sl) bool rx_errors = false, tx_errors = false, rx_over_errors = false; int i, len; + if (sl->rcount < SLCAN_ERROR_MSG_LEN_MIN) + return; + /* get len from sanitized ASCII value */ len = cmd[1]; if (len >= '0' && len < '9') @@ -456,8 +471,7 @@ static void slcan_bump(struct slcan *sl) static void slcan_unesc(struct slcan *sl, unsigned char s) { if ((s == '\r') || (s == '\a')) { /* CR or BEL ends the pdu */ - if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && - sl->rcount > 4) + if (!test_and_clear_bit(SLF_ERROR, &sl->flags)) slcan_bump(sl); sl->rcount = 0; From b95ed5517354a5f451fb9b3771776ca4c0b65ac3 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Fri, 16 May 2025 21:36:38 +0000 Subject: [PATCH 31/40] xsk: Bring back busy polling support in XDP_COPY Commit 5ef44b3cb43b ("xsk: Bring back busy polling support") fixed the busy polling support in xsk for XDP_ZEROCOPY after it was broken in commit 86e25f40aa1e ("net: napi: Add napi_config"). The busy polling support with XDP_COPY remained broken since the napi_id setup in xsk_rcv_check was removed. Bring back the setup of napi_id for XDP_COPY so socket level SO_BUSYPOLL can be used to poll the underlying napi. Do the setup of napi_id for XDP_COPY in xsk_bind, as it is done currently for XDP_ZEROCOPY. The setup of napi_id for XDP_COPY in xsk_bind is safe because xsk_rcv_check checks that the rx queue at which the packet arrives is equal to the queue_id that was supplied in bind. This is done for both XDP_COPY and XDP_ZEROCOPY mode. Tested using AF_XDP support in virtio-net by running the xsk_rr AF_XDP benchmarking tool shared here: https://lore.kernel.org/all/20250320163523.3501305-1-skhawaja@google.com/T/ Enabled socket busy polling using following commands in qemu, ``` sudo ethtool -L eth0 combined 1 echo 400 | sudo tee /proc/sys/net/core/busy_read echo 100 | sudo tee /sys/class/net/eth0/napi_defer_hard_irqs echo 15000 | sudo tee /sys/class/net/eth0/gro_flush_timeout ``` Fixes: 5ef44b3cb43b ("xsk: Bring back busy polling support") Signed-off-by: Samiullah Khawaja Reviewed-by: Willem de Bruijn Acked-by: Stanislav Fomichev Signed-off-by: David S. Miller --- net/xdp/xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 4abc81f33d3e..72c000c0ae5f 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1304,7 +1304,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->queue_id = qid; xp_add_xsk(xs->pool, xs); - if (xs->zc && qid < dev->real_num_rx_queues) { + if (qid < dev->real_num_rx_queues) { struct netdev_rx_queue *rxq; rxq = __netif_get_rx_queue(dev, qid); From 009970506c92816c857e1705cf6db68c9147d39f Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 17 May 2025 17:58:27 +0800 Subject: [PATCH 32/40] net: hibmcge: fix incorrect statistics update issue When the user dumps statistics, the hibmcge driver automatically updates all statistics. If the driver is performing the reset operation, the error data of 0xFFFFFFFF is updated. Therefore, if the driver is resetting, the hbg_update_stats_by_info() needs to return directly. Fixes: c0bf9bf31e79 ("net: hibmcge: Add support for dump statistics") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250517095828.1763126-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c index 8f1107b85fbb..55520053270a 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c @@ -317,6 +317,9 @@ static void hbg_update_stats_by_info(struct hbg_priv *priv, const struct hbg_ethtool_stats *stats; u32 i; + if (test_bit(HBG_NIC_STATE_RESETTING, &priv->state)) + return; + for (i = 0; i < info_len; i++) { stats = &info[i]; if (!stats->reg) From 1b45443b84872c010b36dfc2681413bb1a10011a Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 17 May 2025 17:58:28 +0800 Subject: [PATCH 33/40] net: hibmcge: fix wrong ndo.open() after reset fail issue. If the driver reset fails, it may not work properly. Therefore, the ndo.open() operation should be rejected. In this patch, the driver calls netif_device_detach() before the reset and calls netif_device_attach() after the reset succeeds. If the reset fails, netif_device_attach() is not called. Therefore, netdev does not present and cannot be opened. If reset fails, only the PCI reset (via sysfs) can be used to attempt recovery. Fixes: 3f5a61f6d504 ("net: hibmcge: Add reset supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250517095828.1763126-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index a0bcfb5a713d..ff3295b60a69 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -61,6 +61,8 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type) return -EBUSY; } + netif_device_detach(priv->netdev); + priv->reset_type = type; set_bit(HBG_NIC_STATE_RESETTING, &priv->state); clear_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); @@ -91,6 +93,8 @@ static int hbg_reset_done(struct hbg_priv *priv, enum hbg_reset_type type) return ret; } + netif_device_attach(priv->netdev); + dev_info(&priv->pdev->dev, "reset done\n"); return ret; } @@ -117,16 +121,13 @@ void hbg_err_reset(struct hbg_priv *priv) if (running) dev_close(priv->netdev); - hbg_reset(priv); - - /* in hbg_pci_err_detected(), we will detach first, - * so we need to attach before open - */ - if (!netif_device_present(priv->netdev)) - netif_device_attach(priv->netdev); + if (hbg_reset(priv)) + goto err_unlock; if (running) dev_open(priv->netdev, NULL); + +err_unlock: rtnl_unlock(); } @@ -160,7 +161,6 @@ static pci_ers_result_t hbg_pci_err_slot_reset(struct pci_dev *pdev) pci_save_state(pdev); hbg_err_reset(priv); - netif_device_attach(netdev); return PCI_ERS_RESULT_RECOVERED; } From 407e0efdf8baf1672876d5948b75049860a93e59 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 May 2025 12:40:30 +0000 Subject: [PATCH 34/40] idpf: fix idpf_vport_splitq_napi_poll() idpf_vport_splitq_napi_poll() can incorrectly return @budget after napi_complete_done() has been called. This violates NAPI rules, because after napi_complete_done(), current thread lost napi ownership. Move the test against POLL_MODE before the napi_complete_done(). Fixes: c2d548cad150 ("idpf: add TX splitq napi poll support") Reported-by: Peter Newman Closes: https://lore.kernel.org/netdev/20250520121908.1805732-1-edumazet@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Joshua Hay Cc: Alan Brady Cc: Madhu Chittim Cc: Phani Burra Cc: Pavan Kumar Linga Link: https://patch.msgid.link/20250520124030.1983936-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index bdf52cef3891..2d5f5c9f91ce 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -4025,6 +4025,14 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) return budget; } + /* Switch to poll mode in the tear-down path after sending disable + * queues virtchnl message, as the interrupts will be disabled after + * that. + */ + if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, + q_vector->tx[0]))) + return budget; + work_done = min_t(int, work_done, budget - 1); /* Exit the polling mode, but don't re-enable interrupts if stack might @@ -4035,15 +4043,7 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) else idpf_vport_intr_set_wb_on_itr(q_vector); - /* Switch to poll mode in the tear-down path after sending disable - * queues virtchnl message, as the interrupts will be disabled after - * that - */ - if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, - q_vector->tx[0]))) - return budget; - else - return work_done; + return work_done; } /** From 3f981138109f63232a5fb7165938d4c945cc1b9d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 18 May 2025 15:20:37 -0700 Subject: [PATCH 35/40] sch_hfsc: Fix qlen accounting bug when using peek in hfsc_enqueue() When enqueuing the first packet to an HFSC class, hfsc_enqueue() calls the child qdisc's peek() operation before incrementing sch->q.qlen and sch->qstats.backlog. If the child qdisc uses qdisc_peek_dequeued(), this may trigger an immediate dequeue and potential packet drop. In such cases, qdisc_tree_reduce_backlog() is called, but the HFSC qdisc's qlen and backlog have not yet been updated, leading to inconsistent queue accounting. This can leave an empty HFSC class in the active list, causing further consequences like use-after-free. This patch fixes the bug by moving the increment of sch->q.qlen and sch->qstats.backlog before the call to the child qdisc's peek() operation. This ensures that queue length and backlog are always accurate when packet drops or dequeues are triggered during the peek. Fixes: 12d0ad3be9c3 ("net/sched/sch_hfsc.c: handle corner cases where head may change invalidating calculated deadline") Reported-by: Mingi Cho Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250518222038.58538-2-xiyou.wangcong@gmail.com Reviewed-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- net/sched/sch_hfsc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index cb8c525ea20e..7986145a527c 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1569,6 +1569,9 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } + sch->qstats.backlog += len; + sch->q.qlen++; + if (first && !cl->cl_nactive) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); @@ -1584,9 +1587,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } - sch->qstats.backlog += len; - sch->q.qlen++; - return NET_XMIT_SUCCESS; } From c3572acffb751af65505511b847f628c872ffae0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 18 May 2025 15:20:38 -0700 Subject: [PATCH 36/40] selftests/tc-testing: Add an HFSC qlen accounting test This test reproduces a scenario where HFSC queue length and backlog accounting can become inconsistent when a peek operation triggers a dequeue and possible drop before the parent qdisc updates its counters. The test sets up a DRR root qdisc with an HFSC class, netem, and blackhole children, and uses Scapy to inject a packet. It helps to verify that HFSC correctly tracks qlen and backlog even when packets are dropped during peek-induced dequeue. Cc: Mingi Cho Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250518222038.58538-3-xiyou.wangcong@gmail.com Reviewed-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index a951c0d33cd2..ddc97ecd8b39 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -573,5 +573,32 @@ "teardown": [ "$TC qdisc del dev $DEV1 handle 1: root" ] + }, + { + "id": "831d", + "name": "Test HFSC qlen accounting with DRR/NETEM/BLACKHOLE chain", + "category": ["qdisc", "hfsc", "drr", "netem", "blackhole"], + "plugins": { "requires": ["nsPlugin", "scapyPlugin"] }, + "setup": [ + "$IP link set dev $DEV1 up || true", + "$TC qdisc add dev $DEV1 root handle 1: drr", + "$TC filter add dev $DEV1 parent 1: basic classid 1:1", + "$TC class add dev $DEV1 parent 1: classid 1:1 drr", + "$TC qdisc add dev $DEV1 parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DEV1 parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DEV1 parent 2:1 handle 3: netem", + "$TC qdisc add dev $DEV1 parent 3:1 handle 4: blackhole" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1", + "matchPattern": "qdisc hfsc", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 root handle 1: drr"] } ] From 184fb40f731bd3353b0887731f7caba66609e9cd Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Mon, 19 May 2025 12:56:58 +0530 Subject: [PATCH 37/40] octeontx2-pf: Avoid adding dcbnl_ops for LBK and SDP vf Priority flow control is not supported for LBK and SDP vf. This patch adds support to not add dcbnl_ops for LBK and SDP vf. Fixes: 8e67558177f8 ("octeontx2-pf: PFC config support with DCBx") Signed-off-by: Suman Ghosh Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250519072658.2960851-1-sumang@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 7ef3ba477d49..9b28be4c4a5d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -729,9 +729,12 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) } #ifdef CONFIG_DCB - err = otx2_dcbnl_set_ops(netdev); - if (err) - goto err_free_zc_bmap; + /* Priority flow control is not supported for LBK and SDP vf(s) */ + if (!(is_otx2_lbkvf(vf->pdev) || is_otx2_sdp_rep(vf->pdev))) { + err = otx2_dcbnl_set_ops(netdev); + if (err) + goto err_free_zc_bmap; + } #endif otx2_qos_init(vf, qos_txqs); From e279024617134c94fd3e37470156534d5f2b3472 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Tue, 20 May 2025 18:14:04 +0800 Subject: [PATCH 38/40] net/tipc: fix slab-use-after-free Read in tipc_aead_encrypt_done Syzbot reported a slab-use-after-free with the following call trace: ================================================================== BUG: KASAN: slab-use-after-free in tipc_aead_encrypt_done+0x4bd/0x510 net/tipc/crypto.c:840 Read of size 8 at addr ffff88807a733000 by task kworker/1:0/25 Call Trace: kasan_report+0xd9/0x110 mm/kasan/report.c:601 tipc_aead_encrypt_done+0x4bd/0x510 net/tipc/crypto.c:840 crypto_request_complete include/crypto/algapi.h:266 aead_request_complete include/crypto/internal/aead.h:85 cryptd_aead_crypt+0x3b8/0x750 crypto/cryptd.c:772 crypto_request_complete include/crypto/algapi.h:266 cryptd_queue_worker+0x131/0x200 crypto/cryptd.c:181 process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231 Allocated by task 8355: kzalloc_noprof include/linux/slab.h:778 tipc_crypto_start+0xcc/0x9e0 net/tipc/crypto.c:1466 tipc_init_net+0x2dd/0x430 net/tipc/core.c:72 ops_init+0xb9/0x650 net/core/net_namespace.c:139 setup_net+0x435/0xb40 net/core/net_namespace.c:343 copy_net_ns+0x2f0/0x670 net/core/net_namespace.c:508 create_new_namespaces+0x3ea/0xb10 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xc0/0x1f0 kernel/nsproxy.c:228 ksys_unshare+0x419/0x970 kernel/fork.c:3323 __do_sys_unshare kernel/fork.c:3394 Freed by task 63: kfree+0x12a/0x3b0 mm/slub.c:4557 tipc_crypto_stop+0x23c/0x500 net/tipc/crypto.c:1539 tipc_exit_net+0x8c/0x110 net/tipc/core.c:119 ops_exit_list+0xb0/0x180 net/core/net_namespace.c:173 cleanup_net+0x5b7/0xbf0 net/core/net_namespace.c:640 process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231 After freed the tipc_crypto tx by delete namespace, tipc_aead_encrypt_done may still visit it in cryptd_queue_worker workqueue. I reproduce this issue by: ip netns add ns1 ip link add veth1 type veth peer name veth2 ip link set veth1 netns ns1 ip netns exec ns1 tipc bearer enable media eth dev veth1 ip netns exec ns1 tipc node set key this_is_a_master_key master ip netns exec ns1 tipc bearer disable media eth dev veth1 ip netns del ns1 The key of reproduction is that, simd_aead_encrypt is interrupted, leading to crypto_simd_usable() return false. Thus, the cryptd_queue_worker is triggered, and the tipc_crypto tx will be visited. tipc_disc_timeout tipc_bearer_xmit_skb tipc_crypto_xmit tipc_aead_encrypt crypto_aead_encrypt // encrypt() simd_aead_encrypt // crypto_simd_usable() is false child = &ctx->cryptd_tfm->base; simd_aead_encrypt crypto_aead_encrypt // encrypt() cryptd_aead_encrypt_enqueue cryptd_aead_enqueue cryptd_enqueue_request // trigger cryptd_queue_worker queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work) Fix this by holding net reference count before encrypt. Reported-by: syzbot+55c12726619ff85ce1f6@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=55c12726619ff85ce1f6 Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Signed-off-by: Wang Liang Link: https://patch.msgid.link/20250520101404.1341730-1-wangliang74@huawei.com Signed-off-by: Paolo Abeni --- net/tipc/crypto.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index c524421ec652..8584893b4785 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -817,12 +817,16 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, goto exit; } + /* Get net to avoid freed tipc_crypto when delete namespace */ + get_net(aead->crypto->net); + /* Now, do encrypt */ rc = crypto_aead_encrypt(req); if (rc == -EINPROGRESS || rc == -EBUSY) return rc; tipc_bearer_put(b); + put_net(aead->crypto->net); exit: kfree(ctx); @@ -860,6 +864,7 @@ static void tipc_aead_encrypt_done(void *data, int err) kfree(tx_ctx); tipc_bearer_put(b); tipc_aead_put(aead); + put_net(net); } /** From 0eefa27b493306928d88af6368193b134c98fd64 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 21 May 2025 11:38:33 +0530 Subject: [PATCH 39/40] octeontx2-af: Set LMT_ENA bit for APR table entries This patch enables the LMT line for a PF/VF by setting the LMT_ENA bit in the APR_LMT_MAP_ENTRY_S structure. Additionally, it simplifies the logic for calculating the LMTST table index by consistently using the maximum number of hw supported VFs (i.e., 256). Fixes: 873a1e3d207a ("octeontx2-af: cn10k: Setting up lmtst map table"). Signed-off-by: Subbaraya Sundeep Signed-off-by: Geetha sowjanya Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250521060834.19780-2-gakula@marvell.com Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/af/rvu_cn10k.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c index 7fa98aeb3663..3838c04b78c2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c @@ -15,13 +15,17 @@ #define LMT_TBL_OP_WRITE 1 #define LMT_MAP_TABLE_SIZE (128 * 1024) #define LMT_MAPTBL_ENTRY_SIZE 16 +#define LMT_MAX_VFS 256 + +#define LMT_MAP_ENTRY_ENA BIT_ULL(20) +#define LMT_MAP_ENTRY_LINES GENMASK_ULL(18, 16) /* Function to perform operations (read/write) on lmtst map table */ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, int lmt_tbl_op) { void __iomem *lmt_map_base; - u64 tbl_base; + u64 tbl_base, cfg; tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); @@ -35,6 +39,13 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, *val = readq(lmt_map_base + index); } else { writeq((*val), (lmt_map_base + index)); + + cfg = FIELD_PREP(LMT_MAP_ENTRY_ENA, 0x1); + /* 2048 LMTLINES */ + cfg |= FIELD_PREP(LMT_MAP_ENTRY_LINES, 0x6); + + writeq(cfg, (lmt_map_base + (index + 8))); + /* Flushing the AP interceptor cache to make APR_LMT_MAP_ENTRY_S * changes effective. Write 1 for flush and read is being used as a * barrier and sets up a data dependency. Write to 0 after a write @@ -52,7 +63,7 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, #define LMT_MAP_TBL_W1_OFF 8 static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc) { - return ((rvu_get_pf(pcifunc) * rvu->hw->total_vfs) + + return ((rvu_get_pf(pcifunc) * LMT_MAX_VFS) + (pcifunc & RVU_PFVF_FUNC_MASK)) * LMT_MAPTBL_ENTRY_SIZE; } From a6ae7129819ad20788e610261246e71736543b8b Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Wed, 21 May 2025 11:38:34 +0530 Subject: [PATCH 40/40] octeontx2-af: Fix APR entry mapping based on APR_LMT_CFG The current implementation maps the APR table using a fixed size, which can lead to incorrect mapping when the number of PFs and VFs varies. This patch corrects the mapping by calculating the APR table size dynamically based on the values configured in the APR_LMT_CFG register, ensuring accurate representation of APR entries in debugfs. Fixes: 0daa55d033b0 ("octeontx2-af: cn10k: debugfs for dumping LMTST map table"). Signed-off-by: Geetha sowjanya Link: https://patch.msgid.link/20250521060834.19780-3-gakula@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c | 9 ++++++--- .../net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 11 ++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c index 3838c04b78c2..4a3370a40dd8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c @@ -13,7 +13,6 @@ /* RVU LMTST */ #define LMT_TBL_OP_READ 0 #define LMT_TBL_OP_WRITE 1 -#define LMT_MAP_TABLE_SIZE (128 * 1024) #define LMT_MAPTBL_ENTRY_SIZE 16 #define LMT_MAX_VFS 256 @@ -26,10 +25,14 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, { void __iomem *lmt_map_base; u64 tbl_base, cfg; + int pfs, vfs; tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); + cfg = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG); + vfs = 1 << (cfg & 0xF); + pfs = 1 << ((cfg >> 4) & 0x7); - lmt_map_base = ioremap_wc(tbl_base, LMT_MAP_TABLE_SIZE); + lmt_map_base = ioremap_wc(tbl_base, pfs * vfs * LMT_MAPTBL_ENTRY_SIZE); if (!lmt_map_base) { dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); return -ENOMEM; @@ -80,7 +83,7 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc, mutex_lock(&rvu->rsrc_lock); rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_REQ, iova); - pf = rvu_get_pf(pcifunc) & 0x1F; + pf = rvu_get_pf(pcifunc) & RVU_PFVF_PF_MASK; val = BIT_ULL(63) | BIT_ULL(14) | BIT_ULL(13) | pf << 8 | ((pcifunc & RVU_PFVF_FUNC_MASK) & 0xFF); rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TXN_REQ, val); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index a1f9ec03c2ce..c827da626471 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -553,6 +553,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, u64 lmt_addr, val, tbl_base; int pf, vf, num_vfs, hw_vfs; void __iomem *lmt_map_base; + int apr_pfs, apr_vfs; int buf_size = 10240; size_t off = 0; int index = 0; @@ -568,8 +569,12 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, return -ENOMEM; tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); + val = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG); + apr_vfs = 1 << (val & 0xF); + apr_pfs = 1 << ((val >> 4) & 0x7); - lmt_map_base = ioremap_wc(tbl_base, 128 * 1024); + lmt_map_base = ioremap_wc(tbl_base, apr_pfs * apr_vfs * + LMT_MAPTBL_ENTRY_SIZE); if (!lmt_map_base) { dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); kfree(buf); @@ -591,7 +596,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d \t\t\t", pf); - index = pf * rvu->hw->total_vfs * LMT_MAPTBL_ENTRY_SIZE; + index = pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE; off += scnprintf(&buf[off], buf_size - 1 - off, " 0x%llx\t\t", (tbl_base + index)); lmt_addr = readq(lmt_map_base + index); @@ -604,7 +609,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, /* Reading num of VFs per PF */ rvu_get_pf_numvfs(rvu, pf, &num_vfs, &hw_vfs); for (vf = 0; vf < num_vfs; vf++) { - index = (pf * rvu->hw->total_vfs * 16) + + index = (pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE) + ((vf + 1) * LMT_MAPTBL_ENTRY_SIZE); off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d:VF%d \t\t", pf, vf);