From 9cb7e40d388d6c0e4677809c6b2950bc67fd8830 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:28 -0700 Subject: [PATCH 01/12] rtnetlink: Make per-netns RTNL dereference helpers to macro. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_DEBUG_NET_SMALL_RTNL is off, rtnl_net_dereference() is the static inline wrapper of rtnl_dereference() returning a plain (void *) pointer to make sure net is always evaluated as requested in [0]. But, it makes sparse complain [1] when the pointer has __rcu annotation: net/ipv4/devinet.c:674:47: sparse: warning: incorrect type in argument 2 (different address spaces) net/ipv4/devinet.c:674:47: sparse: expected void *p net/ipv4/devinet.c:674:47: sparse: got struct in_ifaddr [noderef] __rcu * Also, if we evaluate net as (void *) in a macro, then the compiler in turn fails to build due to -Werror=unused-value. #define rtnl_net_dereference(net, p) \ ({ \ (void *)net; \ rtnl_dereference(p); \ }) net/ipv4/devinet.c: In function ‘inet_rtm_deladdr’: ./include/linux/rtnetlink.h:154:17: error: statement with no effect [-Werror=unused-value] 154 | (void *)net; \ net/ipv4/devinet.c:674:21: note: in expansion of macro ‘rtnl_net_dereference’ 674 | (ifa = rtnl_net_dereference(net, *ifap)) != NULL; | ^~~~~~~~~~~~~~~~~~~~ Let's go back to the original simplest macro. Note that checkpatch complains about this approach, but it's one-shot and less noisy than the other two. WARNING: Argument 'net' is not used in function-like macro #76: FILE: include/linux/rtnetlink.h:142: +#define rtnl_net_dereference(net, p) \ + rtnl_dereference(p) Fixes: 844e5e7e656d ("rtnetlink: Add assertion helpers for per-netns RTNL.") Link: https://lore.kernel.org/netdev/20241004132145.7fd208e9@kernel.org/ [0] Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202410200325.SaEJmyZS-lkp@intel.com/ [1] Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8468a4ce8510..0e62918de63b 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -137,21 +137,12 @@ static inline void ASSERT_RTNL_NET(struct net *net) ASSERT_RTNL(); } -static inline void *rcu_dereference_rtnl_net(struct net *net, void *p) -{ - return rcu_dereference_rtnl(p); -} - -static inline void *rtnl_net_dereference(struct net *net, void *p) -{ - return rtnl_dereference(p); -} - -static inline void *rcu_replace_pointer_rtnl_net(struct net *net, - void *rp, void *p) -{ - return rcu_replace_pointer_rtnl(rp, p); -} +#define rcu_dereference_rtnl_net(net, p) \ + rcu_dereference_rtnl(p) +#define rtnl_net_dereference(net, p) \ + rtnl_dereference(p) +#define rcu_replace_pointer_rtnl_net(net, rp, p) \ + rcu_replace_pointer_rtnl(rp, p) #endif static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) From 26d8db55eeacb7dc78672523f57825916d203de4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:29 -0700 Subject: [PATCH 02/12] rtnetlink: Define RTNL_FLAG_DOIT_PERNET for per-netns RTNL doit(). We will push RTNL down to each doit() as rtnl_net_lock(). We can use RTNL_FLAG_DOIT_UNLOCKED to call doit() without RTNL, but doit() will still hold RTNL. Let's define RTNL_FLAG_DOIT_PERNET as an alias of RTNL_FLAG_DOIT_UNLOCKED. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index e0d9a8eae6b6..b260c0cc9671 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = BIT(0), +#define RTNL_FLAG_DOIT_PERNET RTNL_FLAG_DOIT_UNLOCKED RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1), RTNL_FLAG_DUMP_UNLOCKED = BIT(2), RTNL_FLAG_DUMP_SPLIT_NLM_DONE = BIT(3), /* legacy behavior */ From 2d34429d14f9d09b38a8bee6a972a07228378df6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:30 -0700 Subject: [PATCH 03/12] ipv4: Factorise RTM_NEWADDR validation to inet_validate_rtm(). rtm_to_ifaddr() validates some attributes, looks up a netdev, allocates struct in_ifaddr, and validates IFA_CACHEINFO. There is no reason to delay IFA_CACHEINFO validation. We will push RTNL down to inet_rtm_newaddr(), and then we want to complete rtnetlink validation before rtnl_net_lock(). Let's factorise the validation parts. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/ipv4/devinet.c | 81 +++++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5f859d01cbbe..da5412fb34e7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -846,35 +846,54 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp); } -static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, - __u32 *pvalid_lft, __u32 *pprefered_lft, - struct netlink_ext_ack *extack) +static int inet_validate_rtm(struct nlmsghdr *nlh, struct nlattr **tb, + struct netlink_ext_ack *extack, + __u32 *valid_lft, __u32 *prefered_lft) { - struct nlattr *tb[IFA_MAX+1]; - struct in_ifaddr *ifa; - struct ifaddrmsg *ifm; - struct net_device *dev; - struct in_device *in_dev; + struct ifaddrmsg *ifm = nlmsg_data(nlh); int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, extack); if (err < 0) - goto errout; - - ifm = nlmsg_data(nlh); - err = -EINVAL; + return err; if (ifm->ifa_prefixlen > 32) { NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length"); - goto errout; + return -EINVAL; } if (!tb[IFA_LOCAL]) { NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied"); - goto errout; + return -EINVAL; } + if (tb[IFA_CACHEINFO]) { + struct ifa_cacheinfo *ci; + + ci = nla_data(tb[IFA_CACHEINFO]); + if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { + NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid"); + return -EINVAL; + } + + *valid_lft = ci->ifa_valid; + *prefered_lft = ci->ifa_prefered; + } + + return 0; +} + +static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrmsg *ifm = nlmsg_data(nlh); + struct in_device *in_dev; + struct net_device *dev; + struct in_ifaddr *ifa; + int err; + dev = __dev_get_by_index(net, ifm->ifa_index); err = -ENODEV; if (!dev) { @@ -923,23 +942,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, if (tb[IFA_PROTO]) ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]); - if (tb[IFA_CACHEINFO]) { - struct ifa_cacheinfo *ci; - - ci = nla_data(tb[IFA_CACHEINFO]); - if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { - NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid"); - err = -EINVAL; - goto errout_free; - } - *pvalid_lft = ci->ifa_valid; - *pprefered_lft = ci->ifa_prefered; - } - return ifa; -errout_free: - inet_free_ifa(ifa); errout: return ERR_PTR(err); } @@ -964,15 +968,21 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); - struct in_ifaddr *ifa; - struct in_ifaddr *ifa_existing; - __u32 valid_lft = INFINITY_LIFE_TIME; __u32 prefered_lft = INFINITY_LIFE_TIME; + __u32 valid_lft = INFINITY_LIFE_TIME; + struct net *net = sock_net(skb->sk); + struct in_ifaddr *ifa_existing; + struct nlattr *tb[IFA_MAX + 1]; + struct in_ifaddr *ifa; + int ret; ASSERT_RTNL(); - ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack); + ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft); + if (ret < 0) + return ret; + + ifa = inet_rtm_to_ifa(net, nlh, tb, extack); if (IS_ERR(ifa)) return PTR_ERR(ifa); @@ -983,8 +993,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, */ set_ifa_lifetime(ifa, valid_lft, prefered_lft); if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { - int ret = ip_mc_autojoin_config(net, true, ifa); - + ret = ip_mc_autojoin_config(net, true, ifa); if (ret < 0) { NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed"); inet_free_ifa(ifa); From abd0deff03d854cb34818e1e01490296d0314ea1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:31 -0700 Subject: [PATCH 04/12] ipv4: Don't allocate ifa for 0.0.0.0 in inet_rtm_newaddr(). When we pass 0.0.0.0 to __inet_insert_ifa(), it frees ifa and returns 0. We can do this check much earlier for RTM_NEWADDR even before allocating struct in_ifaddr. Let's move the validation to 1. inet_insert_ifa() for ioctl() 2. inet_rtm_newaddr() for RTM_NEWADDR Now, we can remove the same check in find_matching_ifa(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/ipv4/devinet.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index da5412fb34e7..8db84c70ebed 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -508,11 +508,6 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, ASSERT_RTNL(); - if (!ifa->ifa_local) { - inet_free_ifa(ifa); - return 0; - } - ifa->ifa_flags &= ~IFA_F_SECONDARY; last_primary = &in_dev->ifa_list; @@ -584,6 +579,11 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, static int inet_insert_ifa(struct in_ifaddr *ifa) { + if (!ifa->ifa_local) { + inet_free_ifa(ifa); + return 0; + } + return __inet_insert_ifa(ifa, NULL, 0, NULL); } @@ -953,15 +953,13 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1; - if (!ifa->ifa_local) - return NULL; - in_dev_for_each_ifa_rtnl(ifa1, in_dev) { if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa) && ifa1->ifa_local == ifa->ifa_local) return ifa1; } + return NULL; } @@ -982,6 +980,9 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (ret < 0) return ret; + if (!nla_get_in_addr(tb[IFA_LOCAL])) + return 0; + ifa = inet_rtm_to_ifa(net, nlh, tb, extack); if (IS_ERR(ifa)) return PTR_ERR(ifa); From 487257786b71172648664164ba567e807e1e11fc Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:32 -0700 Subject: [PATCH 05/12] ipv4: Convert RTM_NEWADDR to per-netns RTNL. The address hash table and GC are already namespacified. Let's push down RTNL into inet_rtm_newaddr() as rtnl_net_lock(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/ipv4/devinet.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 8db84c70ebed..7f24bc38981b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -974,8 +974,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, struct in_ifaddr *ifa; int ret; - ASSERT_RTNL(); - ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft); if (ret < 0) return ret; @@ -983,9 +981,13 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (!nla_get_in_addr(tb[IFA_LOCAL])) return 0; + rtnl_net_lock(net); + ifa = inet_rtm_to_ifa(net, nlh, tb, extack); - if (IS_ERR(ifa)) - return PTR_ERR(ifa); + if (IS_ERR(ifa)) { + ret = PTR_ERR(ifa); + goto unlock; + } ifa_existing = find_matching_ifa(ifa); if (!ifa_existing) { @@ -998,11 +1000,11 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (ret < 0) { NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed"); inet_free_ifa(ifa); - return ret; + goto unlock; } } - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, - extack); + + ret = __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, extack); } else { u32 new_metric = ifa->ifa_rt_priority; u8 new_proto = ifa->ifa_proto; @@ -1012,7 +1014,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (nlh->nlmsg_flags & NLM_F_EXCL || !(nlh->nlmsg_flags & NLM_F_REPLACE)) { NL_SET_ERR_MSG(extack, "ipv4: Address already assigned"); - return -EEXIST; + ret = -EEXIST; + goto unlock; } ifa = ifa_existing; @@ -1029,7 +1032,11 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, &net->ipv4.addr_chk_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); } - return 0; + +unlock: + rtnl_net_unlock(net); + + return ret; } /* @@ -2823,7 +2830,8 @@ static struct rtnl_af_ops inet_af_ops __read_mostly = { }; static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = { - {.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr}, + {.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr, + .flags = RTNL_FLAG_DOIT_PERNET}, {.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr}, {.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr, .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, From d4b483208b2606add41a22bdd3c8cd6d36009319 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:33 -0700 Subject: [PATCH 06/12] ipv4: Use per-netns RTNL helpers in inet_rtm_newaddr(). inet_rtm_to_ifa() and find_matching_ifa() are called under rtnl_net_lock(). __in_dev_get_rtnl() and in_dev_for_each_ifa_rtnl() there can use per-netns RTNL helpers. Let's define and use __in_dev_get_rtnl_net() and in_dev_for_each_ifa_rtnl_net(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/inetdevice.h | 9 +++++++++ net/ipv4/devinet.c | 8 ++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index d9c690c8c80b..5730ba6b1cfa 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -226,6 +226,10 @@ static __inline__ bool bad_mask(__be32 mask, __be32 addr) for (ifa = rtnl_dereference((in_dev)->ifa_list); ifa; \ ifa = rtnl_dereference(ifa->ifa_next)) +#define in_dev_for_each_ifa_rtnl_net(net, ifa, in_dev) \ + for (ifa = rtnl_net_dereference(net, (in_dev)->ifa_list); ifa; \ + ifa = rtnl_net_dereference(net, ifa->ifa_next)) + #define in_dev_for_each_ifa_rcu(ifa, in_dev) \ for (ifa = rcu_dereference((in_dev)->ifa_list); ifa; \ ifa = rcu_dereference(ifa->ifa_next)) @@ -252,6 +256,11 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev) return rtnl_dereference(dev->ip_ptr); } +static inline struct in_device *__in_dev_get_rtnl_net(const struct net_device *dev) +{ + return rtnl_net_dereference(dev_net(dev), dev->ip_ptr); +} + /* called with rcu_read_lock or rtnl held */ static inline bool ip_ignore_linkdown(const struct net_device *dev) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7f24bc38981b..e14e35c22054 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -901,7 +901,7 @@ static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh, goto errout; } - in_dev = __in_dev_get_rtnl(dev); + in_dev = __in_dev_get_rtnl_net(dev); err = -ENOBUFS; if (!in_dev) goto errout; @@ -948,12 +948,12 @@ static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh, return ERR_PTR(err); } -static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) +static struct in_ifaddr *find_matching_ifa(struct net *net, struct in_ifaddr *ifa) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1; - in_dev_for_each_ifa_rtnl(ifa1, in_dev) { + in_dev_for_each_ifa_rtnl_net(net, ifa1, in_dev) { if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa) && ifa1->ifa_local == ifa->ifa_local) @@ -989,7 +989,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, goto unlock; } - ifa_existing = find_matching_ifa(ifa); + ifa_existing = find_matching_ifa(net, ifa); if (!ifa_existing) { /* It would be best to check for !NLM_F_CREATE here but * userspace already relies on not having to provide this. From 4df5066f079cfbc563c2da031b02b4ba2d9e1ba0 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:34 -0700 Subject: [PATCH 07/12] ipv4: Convert RTM_DELADDR to per-netns RTNL. Let's push down RTNL into inet_rtm_deladdr() as rtnl_net_lock(). Now, ip_mc_autojoin_config() is always called under per-netns RTNL, so ASSERT_RTNL() can be replaced with ASSERT_RTNL_NET(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/ipv4/devinet.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e14e35c22054..6b7780e12f34 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -645,7 +645,7 @@ static int ip_mc_autojoin_config(struct net *net, bool join, struct sock *sk = net->ipv4.mc_autojoin_sk; int ret; - ASSERT_RTNL(); + ASSERT_RTNL_NET(net); lock_sock(sk); if (join) @@ -671,22 +671,24 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, struct in_ifaddr *ifa; int err; - ASSERT_RTNL(); - err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, extack); if (err < 0) - goto errout; + goto out; ifm = nlmsg_data(nlh); + + rtnl_net_lock(net); + in_dev = inetdev_by_index(net, ifm->ifa_index); if (!in_dev) { NL_SET_ERR_MSG(extack, "ipv4: Device not found"); err = -ENODEV; - goto errout; + goto unlock; } - for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL; + for (ifap = &in_dev->ifa_list; + (ifa = rtnl_net_dereference(net, *ifap)) != NULL; ifap = &ifa->ifa_next) { if (tb[IFA_LOCAL] && ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL])) @@ -702,13 +704,16 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, if (ipv4_is_multicast(ifa->ifa_address)) ip_mc_autojoin_config(net, false, ifa); + __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); - return 0; + goto unlock; } NL_SET_ERR_MSG(extack, "ipv4: Address not found"); err = -EADDRNOTAVAIL; -errout: +unlock: + rtnl_net_unlock(net); +out: return err; } @@ -2832,7 +2837,8 @@ static struct rtnl_af_ops inet_af_ops __read_mostly = { static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = { {.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr, .flags = RTNL_FLAG_DOIT_PERNET}, - {.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr}, + {.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr, + .flags = RTNL_FLAG_DOIT_PERNET}, {.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr, .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, {.protocol = PF_INET, .msgtype = RTM_GETNETCONF, From c350c4761e7f4767dea59aef036ce13276466fd0 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:35 -0700 Subject: [PATCH 08/12] ipv4: Convert check_lifetime() to per-netns RTNL. Since commit 1675f385213e ("ipv4: Namespacify IPv4 address GC."), check_lifetime() works on a per-netns basis. Let's use rtnl_net_lock() and rtnl_net_dereference(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/ipv4/devinet.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6b7780e12f34..5eaef3bbb987 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -771,7 +771,8 @@ static void check_lifetime(struct work_struct *work) rcu_read_unlock(); if (!change_needed) continue; - rtnl_lock(); + + rtnl_net_lock(net); hlist_for_each_entry_safe(ifa, n, head, addr_lst) { unsigned long age; @@ -788,7 +789,7 @@ static void check_lifetime(struct work_struct *work) struct in_ifaddr *tmp; ifap = &ifa->ifa_dev->ifa_list; - tmp = rtnl_dereference(*ifap); + tmp = rtnl_net_dereference(net, *ifap); while (tmp) { if (tmp == ifa) { inet_del_ifa(ifa->ifa_dev, @@ -796,7 +797,7 @@ static void check_lifetime(struct work_struct *work) break; } ifap = &tmp->ifa_next; - tmp = rtnl_dereference(*ifap); + tmp = rtnl_net_dereference(net, *ifap); } } else if (ifa->ifa_preferred_lft != INFINITY_LIFE_TIME && @@ -806,7 +807,7 @@ static void check_lifetime(struct work_struct *work) rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); } } - rtnl_unlock(); + rtnl_net_unlock(net); } next_sec = round_jiffies_up(next); From d1c81818aa227b37d65b40f9883109c5256b9bfb Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:36 -0700 Subject: [PATCH 09/12] rtnetlink: Define rtnl_net_trylock(). We will need the per-netns version of rtnl_trylock(). rtnl_net_trylock() calls __rtnl_net_lock() only when rtnl_trylock() successfully holds RTNL. When RTNL is removed, we will use mutex_trylock() for per-netns RTNL. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 6 ++++++ net/core/rtnetlink.c | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0e62918de63b..14b88f551920 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -101,6 +101,7 @@ void __rtnl_net_lock(struct net *net); void __rtnl_net_unlock(struct net *net); void rtnl_net_lock(struct net *net); void rtnl_net_unlock(struct net *net); +int rtnl_net_trylock(struct net *net); int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); bool rtnl_net_is_locked(struct net *net); @@ -132,6 +133,11 @@ static inline void rtnl_net_unlock(struct net *net) rtnl_unlock(); } +static inline int rtnl_net_trylock(struct net *net) +{ + return rtnl_trylock(); +} + static inline void ASSERT_RTNL_NET(struct net *net) { ASSERT_RTNL(); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 194a81e5f608..dda8230fdfd4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -210,6 +210,17 @@ void rtnl_net_unlock(struct net *net) } EXPORT_SYMBOL(rtnl_net_unlock); +int rtnl_net_trylock(struct net *net) +{ + int ret = rtnl_trylock(); + + if (ret) + __rtnl_net_lock(net); + + return ret; +} +EXPORT_SYMBOL(rtnl_net_trylock); + static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b) { if (net_eq(net_a, net_b)) From 77453d428d4c9c613341de7f9b943f0c83f37a27 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:37 -0700 Subject: [PATCH 10/12] ipv4: Convert devinet_sysctl_forward() to per-netns RTNL. devinet_sysctl_forward() touches only a single netns. Let's use rtnl_trylock() and __in_dev_get_rtnl_net(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/ipv4/devinet.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5eaef3bbb987..bd65e0ef774e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2395,7 +2395,7 @@ static void inet_forward_change(struct net *net) if (on) dev_disable_lro(dev); - in_dev = __in_dev_get_rtnl(dev); + in_dev = __in_dev_get_rtnl_net(dev); if (in_dev) { IN_DEV_CONF_SET(in_dev, FORWARDING, on); inet_netconf_notify_devconf(net, RTM_NEWNETCONF, @@ -2486,7 +2486,7 @@ static int devinet_sysctl_forward(const struct ctl_table *ctl, int write, if (write && *valp != val) { if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { - if (!rtnl_trylock()) { + if (!rtnl_net_trylock(net)) { /* Restore the original values before restarting */ *valp = val; *ppos = pos; @@ -2505,7 +2505,7 @@ static int devinet_sysctl_forward(const struct ctl_table *ctl, int write, idev->dev->ifindex, cnf); } - rtnl_unlock(); + rtnl_net_unlock(net); rt_cache_flush(net); } else inet_netconf_notify_devconf(net, RTM_NEWNETCONF, From 88d1f8770690791cbe5d8f60b17137df05476299 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:38 -0700 Subject: [PATCH 11/12] ipv4: Convert devinet_ioctl() to per-netns RTNL except for SIOCSIFFLAGS. Basically, devinet_ioctl() operates on a single netns. However, ioctl(SIOCSIFFLAGS) will trigger the netdev notifier that could touch another netdev in different netns. Let's use per-netns RTNL helper in devinet_ioctl() and place ASSERT_RTNL() for SIOCSIFFLAGS. We will remove ASSERT_RTNL() once RTM_SETLINK and RTM_DELLINK are converted. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/ipv4/devinet.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index bd65e0ef774e..fb4bc63b8fa2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -589,9 +589,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { - struct in_device *in_dev = __in_dev_get_rtnl(dev); - - ASSERT_RTNL(); + struct in_device *in_dev = __in_dev_get_rtnl_net(dev); ipv4_devconf_setall(in_dev); neigh_parms_data_state_setall(in_dev->arp_parms); @@ -1129,7 +1127,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) goto out; } - rtnl_lock(); + rtnl_net_lock(net); ret = -ENODEV; dev = __dev_get_by_name(net, ifr->ifr_name); @@ -1139,7 +1137,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) if (colon) *colon = ':'; - in_dev = __in_dev_get_rtnl(dev); + in_dev = __in_dev_get_rtnl_net(dev); if (in_dev) { if (tryaddrmatch) { /* Matthias Andree */ @@ -1149,7 +1147,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) This is checked above. */ for (ifap = &in_dev->ifa_list; - (ifa = rtnl_dereference(*ifap)) != NULL; + (ifa = rtnl_net_dereference(net, *ifap)) != NULL; ifap = &ifa->ifa_next) { if (!strcmp(ifr->ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == @@ -1163,7 +1161,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) comparing just the label */ if (!ifa) { for (ifap = &in_dev->ifa_list; - (ifa = rtnl_dereference(*ifap)) != NULL; + (ifa = rtnl_net_dereference(net, *ifap)) != NULL; ifap = &ifa->ifa_next) if (!strcmp(ifr->ifr_name, ifa->ifa_label)) break; @@ -1205,6 +1203,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) inet_del_ifa(in_dev, ifap, 1); break; } + + /* NETDEV_UP/DOWN/CHANGE could touch a peer dev */ + ASSERT_RTNL(); ret = dev_change_flags(dev, ifr->ifr_flags, NULL); break; @@ -1306,7 +1307,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) break; } done: - rtnl_unlock(); + rtnl_net_unlock(net); out: return ret; } From 7ed8da17bfb2b033e42afa842ca22641821e231c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Oct 2024 11:32:39 -0700 Subject: [PATCH 12/12] ipv4: Convert devinet_ioctl to per-netns RTNL. ioctl(SIOCGIFCONF) calls dev_ifconf() that operates on the current netns. Let's use per-netns RTNL helpers in dev_ifconf() and inet_gifconf(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/dev_ioctl.c | 6 +++--- net/ipv4/devinet.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 473c437b6b53..46d43b950471 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -64,7 +64,7 @@ int dev_ifconf(struct net *net, struct ifconf __user *uifc) } /* Loop over the interfaces, and write an info block for each. */ - rtnl_lock(); + rtnl_net_lock(net); for_each_netdev(net, dev) { if (!pos) done = inet_gifconf(dev, NULL, 0, size); @@ -72,12 +72,12 @@ int dev_ifconf(struct net *net, struct ifconf __user *uifc) done = inet_gifconf(dev, pos + total, len - total, size); if (done < 0) { - rtnl_unlock(); + rtnl_net_unlock(net); return -EFAULT; } total += done; } - rtnl_unlock(); + rtnl_net_unlock(net); return put_user(total, &uifc->ifc_len); } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index fb4bc63b8fa2..f58f39a9ee87 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1314,7 +1314,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) { - struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct in_device *in_dev = __in_dev_get_rtnl_net(dev); const struct in_ifaddr *ifa; struct ifreq ifr; int done = 0; @@ -1325,7 +1325,7 @@ int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) if (!in_dev) goto out; - in_dev_for_each_ifa_rtnl(ifa, in_dev) { + in_dev_for_each_ifa_rtnl_net(dev_net(dev), ifa, in_dev) { if (!buf) { done += size; continue;