From d2f011a0bf28c090ad75c9b1d306f2e1dda1c9bc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Dec 2023 10:12:43 +0000 Subject: [PATCH 1/2] ipv6: annotate data-races around np->mcast_oif np->mcast_oif is read locklessly in some contexts. Make all accesses to this field lockless, adding appropriate annotations. This also makes setsockopt( IPV6_MULTICAST_IF ) lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 2 +- net/ipv6/datagram.c | 4 +- net/ipv6/icmp.c | 4 +- net/ipv6/ipv6_sockglue.c | 74 +++++++++++++++++---------------- net/ipv6/ping.c | 4 +- net/ipv6/raw.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 2 +- net/rds/tcp_listen.c | 2 +- 11 files changed, 51 insertions(+), 49 deletions(-) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4550b680665a..06d7324276ec 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -669,7 +669,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) ipv6_pktoptions: if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) { if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) - np->mcast_oif = inet6_iif(opt_skb); + WRITE_ONCE(np->mcast_oif, inet6_iif(opt_skb)); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index cc6a502db39d..1804bd6f4684 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -60,7 +60,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, if (!oif) { if (ipv6_addr_is_multicast(&fl6->daddr)) - oif = np->mcast_oif; + oif = READ_ONCE(np->mcast_oif); else oif = np->ucast_oif; } @@ -229,7 +229,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, } if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST)) - WRITE_ONCE(sk->sk_bound_dev_if, np->mcast_oif); + WRITE_ONCE(sk->sk_bound_dev_if, READ_ONCE(np->mcast_oif)); /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f62427097126..f84a465c9759 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -584,7 +584,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, tmp_hdr.icmp6_pointer = htonl(info); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; + fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; @@ -770,7 +770,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; + fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 7d661735cb9d..fe7e96e69960 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -509,6 +509,34 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optlen < sizeof(int)) return -EINVAL; return ip6_sock_set_addr_preferences(sk, val); + case IPV6_MULTICAST_IF: + if (sk->sk_type == SOCK_STREAM) + return -ENOPROTOOPT; + if (optlen < sizeof(int)) + return -EINVAL; + if (val) { + struct net_device *dev; + int bound_dev_if, midx; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, val); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + midx = l3mdev_master_ifindex_rcu(dev); + + rcu_read_unlock(); + + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + if (bound_dev_if && + bound_dev_if != val && + (!midx || midx != bound_dev_if)) + return -EINVAL; + } + WRITE_ONCE(np->mcast_oif, val); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -860,36 +888,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; } - case IPV6_MULTICAST_IF: - if (sk->sk_type == SOCK_STREAM) - break; - if (optlen < sizeof(int)) - goto e_inval; - - if (val) { - struct net_device *dev; - int midx; - - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, val); - if (!dev) { - rcu_read_unlock(); - retv = -ENODEV; - break; - } - midx = l3mdev_master_ifindex_rcu(dev); - - rcu_read_unlock(); - - if (sk->sk_bound_dev_if && - sk->sk_bound_dev_if != val && - (!midx || midx != sk->sk_bound_dev_if)) - goto e_inval; - } - np->mcast_oif = val; - retv = 0; - break; case IPV6_ADD_MEMBERSHIP: case IPV6_DROP_MEMBERSHIP: { @@ -1161,10 +1159,12 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, sockopt_release_sock(sk); if (!skb) { if (np->rxopt.bits.rxinfo) { + int mcast_oif = READ_ONCE(np->mcast_oif); struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : + + src_info.ipi6_ifindex = mcast_oif ? : np->sticky_pktinfo.ipi6_ifindex; - src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr; + src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { @@ -1178,11 +1178,13 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } if (np->rxopt.bits.rxoinfo) { + int mcast_oif = READ_ONCE(np->mcast_oif); struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : + + src_info.ipi6_ifindex = mcast_oif ? : np->sticky_pktinfo.ipi6_ifindex; - src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : - np->sticky_pktinfo.ipi6_addr; + src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr : + np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { @@ -1359,7 +1361,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MULTICAST_IF: - val = np->mcast_oif; + val = READ_ONCE(np->mcast_oif); break; case IPV6_MULTICAST_ALL: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index d2098dd4ceae..465e8d004067 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -107,7 +107,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) oif = np->sticky_pktinfo.ipi6_ifindex; if (!oif && ipv6_addr_is_multicast(daddr)) - oif = np->mcast_oif; + oif = READ_ONCE(np->mcast_oif); else if (!oif) oif = np->ucast_oif; @@ -157,7 +157,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) rt = (struct rt6_info *) dst; if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; + fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index dd0a4e73e602..59a1e269a82c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -876,7 +876,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) final_p = fl6_update_dst(&fl6, opt, &final); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; + fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 06a19fe2afd1..d1307d77a6f0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1702,7 +1702,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) - np->mcast_oif = tcp_v6_iif(opt_skb); + WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 622b10a549f7..0b7c755faa77 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1541,7 +1541,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) connected = false; if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) { - fl6->flowi6_oif = np->mcast_oif; + fl6->flowi6_oif = READ_ONCE(np->mcast_oif); connected = false; } else if (!fl6->flowi6_oif) fl6->flowi6_oif = np->ucast_oif; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index bb373e249237..17301f9dd228 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -599,7 +599,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) final_p = fl6_update_dst(&fl6, opt, &final); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; + fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index eaf9f2ed0067..be74c0906dda 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1365,7 +1365,7 @@ static int set_mcast_if(struct sock *sk, struct net_device *dev) struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MULTICAST_IF */ - np->mcast_oif = dev->ifindex; + WRITE_ONCE(np->mcast_oif, dev->ifindex); } #endif release_sock(sk); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 53b3535a1e4a..05008ce5c421 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -165,7 +165,7 @@ int rds_tcp_accept_one(struct socket *sock) struct ipv6_pinfo *inet6; inet6 = inet6_sk(new_sock->sk); - dev_if = inet6->mcast_oif; + dev_if = READ_ONCE(inet6->mcast_oif); } else { dev_if = new_sock->sk->sk_bound_dev_if; } From 1ac13efd614c752d3b47bbfb58e7c36eeb92cb5a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Dec 2023 10:12:44 +0000 Subject: [PATCH 2/2] ipv6: annotate data-races around np->ucast_oif np->ucast_oif is read locklessly in some contexts. Make all accesses to this field lockless, adding appropriate annotations. This also makes setsockopt( IPV6_UNICAST_IF ) lockless. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 2 +- net/ipv6/icmp.c | 4 +-- net/ipv6/ipv6_sockglue.c | 58 ++++++++++++++++++---------------------- net/ipv6/ping.c | 4 +-- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- 7 files changed, 34 insertions(+), 40 deletions(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 1804bd6f4684..fff78496803d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -62,7 +62,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, if (ipv6_addr_is_multicast(&fl6->daddr)) oif = READ_ONCE(np->mcast_oif); else - oif = np->ucast_oif; + oif = READ_ONCE(np->ucast_oif); } fl6->flowi6_oif = oif; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f84a465c9759..1635da07285f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -586,7 +586,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + fl6.flowi6_oif = READ_ONCE(np->ucast_oif); ipcm6_init_sk(&ipc6, sk); ipc6.sockc.mark = mark; @@ -772,7 +772,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + fl6.flowi6_oif = READ_ONCE(np->ucast_oif); if (ip6_dst_lookup(net, sk, &dst, &fl6)) goto out; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index fe7e96e69960..9e8ebda170f1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -537,6 +537,31 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } WRITE_ONCE(np->mcast_oif, val); return 0; + case IPV6_UNICAST_IF: + { + struct net_device *dev; + int ifindex; + + if (optlen != sizeof(int)) + return -EINVAL; + + ifindex = (__force int)ntohl((__force __be32)val); + if (!ifindex) { + WRITE_ONCE(np->ucast_oif, 0); + return 0; + } + + dev = dev_get_by_index(net, ifindex); + if (!dev) + return -EADDRNOTAVAIL; + dev_put(dev); + + if (READ_ONCE(sk->sk_bound_dev_if)) + return -EINVAL; + + WRITE_ONCE(np->ucast_oif, ifindex); + return 0; + } } if (needs_rtnl) rtnl_lock(); @@ -857,37 +882,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; } - - case IPV6_UNICAST_IF: - { - struct net_device *dev = NULL; - int ifindex; - - if (optlen != sizeof(int)) - goto e_inval; - - ifindex = (__force int)ntohl((__force __be32)val); - if (ifindex == 0) { - np->ucast_oif = 0; - retv = 0; - break; - } - - dev = dev_get_by_index(net, ifindex); - retv = -EADDRNOTAVAIL; - if (!dev) - break; - dev_put(dev); - - retv = -EINVAL; - if (sk->sk_bound_dev_if) - break; - - np->ucast_oif = ifindex; - retv = 0; - break; - } - case IPV6_ADD_MEMBERSHIP: case IPV6_DROP_MEMBERSHIP: { @@ -1369,7 +1363,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_UNICAST_IF: - val = (__force int)htonl((__u32) np->ucast_oif); + val = (__force int)htonl((__u32) READ_ONCE(np->ucast_oif)); break; case IPV6_MTU_DISCOVER: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 465e8d004067..ef2059c88955 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -109,7 +109,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!oif && ipv6_addr_is_multicast(daddr)) oif = READ_ONCE(np->mcast_oif); else if (!oif) - oif = np->ucast_oif; + oif = READ_ONCE(np->ucast_oif); addr_type = ipv6_addr_type(daddr); if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) || @@ -159,7 +159,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + fl6.flowi6_oif = READ_ONCE(np->ucast_oif); pfh.icmph.type = user_icmph.icmp6_type; pfh.icmph.code = user_icmph.icmp6_code; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 59a1e269a82c..03dbb874c363 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -878,7 +878,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + fl6.flowi6_oif = READ_ONCE(np->ucast_oif); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); if (hdrincl) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0b7c755faa77..594e3f23c129 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1544,7 +1544,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6->flowi6_oif = READ_ONCE(np->mcast_oif); connected = false; } else if (!fl6->flowi6_oif) - fl6->flowi6_oif = np->ucast_oif; + fl6->flowi6_oif = READ_ONCE(np->ucast_oif); security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 17301f9dd228..dd3153966173 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -601,7 +601,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + fl6.flowi6_oif = READ_ONCE(np->ucast_oif); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));