Merge branch 'ipv4-convert-rtm_-new-del-addr-and-more-to-per-netns-rtnl'

Kuniyuki Iwashima says:

====================
ipv4: Convert RTM_{NEW,DEL}ADDR and more to per-netns RTNL.

The IPv4 address hash table and GC are already namespacified.

This series converts RTM_NEWADDR/RTM_DELADDR and some more
RTNL users to per-netns RTNL.

Changes:
  v2:
    * Add patch 1 to address sparse warning for CONFIG_DEBUG_NET_SMALL_RTNL=n
    * Add Eric's tags to patch 2-12

  v1: https://lore.kernel.org/netdev/20241018012225.90409-1-kuniyu@amazon.com/
====================

Link: https://patch.msgid.link/20241021183239.79741-1-kuniyu@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni
2024-10-29 11:55:28 +01:00
6 changed files with 145 additions and 101 deletions

View File

@@ -226,6 +226,10 @@ static __inline__ bool bad_mask(__be32 mask, __be32 addr)
for (ifa = rtnl_dereference((in_dev)->ifa_list); ifa; \
ifa = rtnl_dereference(ifa->ifa_next))
#define in_dev_for_each_ifa_rtnl_net(net, ifa, in_dev) \
for (ifa = rtnl_net_dereference(net, (in_dev)->ifa_list); ifa; \
ifa = rtnl_net_dereference(net, ifa->ifa_next))
#define in_dev_for_each_ifa_rcu(ifa, in_dev) \
for (ifa = rcu_dereference((in_dev)->ifa_list); ifa; \
ifa = rcu_dereference(ifa->ifa_next))
@@ -252,6 +256,11 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev)
return rtnl_dereference(dev->ip_ptr);
}
static inline struct in_device *__in_dev_get_rtnl_net(const struct net_device *dev)
{
return rtnl_net_dereference(dev_net(dev), dev->ip_ptr);
}
/* called with rcu_read_lock or rtnl held */
static inline bool ip_ignore_linkdown(const struct net_device *dev)
{

View File

@@ -101,6 +101,7 @@ void __rtnl_net_lock(struct net *net);
void __rtnl_net_unlock(struct net *net);
void rtnl_net_lock(struct net *net);
void rtnl_net_unlock(struct net *net);
int rtnl_net_trylock(struct net *net);
int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
bool rtnl_net_is_locked(struct net *net);
@@ -132,26 +133,22 @@ static inline void rtnl_net_unlock(struct net *net)
rtnl_unlock();
}
static inline int rtnl_net_trylock(struct net *net)
{
return rtnl_trylock();
}
static inline void ASSERT_RTNL_NET(struct net *net)
{
ASSERT_RTNL();
}
static inline void *rcu_dereference_rtnl_net(struct net *net, void *p)
{
return rcu_dereference_rtnl(p);
}
static inline void *rtnl_net_dereference(struct net *net, void *p)
{
return rtnl_dereference(p);
}
static inline void *rcu_replace_pointer_rtnl_net(struct net *net,
void *rp, void *p)
{
return rcu_replace_pointer_rtnl(rp, p);
}
#define rcu_dereference_rtnl_net(net, p) \
rcu_dereference_rtnl(p)
#define rtnl_net_dereference(net, p) \
rtnl_dereference(p)
#define rcu_replace_pointer_rtnl_net(net, rp, p) \
rcu_replace_pointer_rtnl(rp, p)
#endif
static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)

View File

@@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
enum rtnl_link_flags {
RTNL_FLAG_DOIT_UNLOCKED = BIT(0),
#define RTNL_FLAG_DOIT_PERNET RTNL_FLAG_DOIT_UNLOCKED
RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1),
RTNL_FLAG_DUMP_UNLOCKED = BIT(2),
RTNL_FLAG_DUMP_SPLIT_NLM_DONE = BIT(3), /* legacy behavior */

View File

@@ -64,7 +64,7 @@ int dev_ifconf(struct net *net, struct ifconf __user *uifc)
}
/* Loop over the interfaces, and write an info block for each. */
rtnl_lock();
rtnl_net_lock(net);
for_each_netdev(net, dev) {
if (!pos)
done = inet_gifconf(dev, NULL, 0, size);
@@ -72,12 +72,12 @@ int dev_ifconf(struct net *net, struct ifconf __user *uifc)
done = inet_gifconf(dev, pos + total,
len - total, size);
if (done < 0) {
rtnl_unlock();
rtnl_net_unlock(net);
return -EFAULT;
}
total += done;
}
rtnl_unlock();
rtnl_net_unlock(net);
return put_user(total, &uifc->ifc_len);
}

View File

@@ -210,6 +210,17 @@ void rtnl_net_unlock(struct net *net)
}
EXPORT_SYMBOL(rtnl_net_unlock);
int rtnl_net_trylock(struct net *net)
{
int ret = rtnl_trylock();
if (ret)
__rtnl_net_lock(net);
return ret;
}
EXPORT_SYMBOL(rtnl_net_trylock);
static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
{
if (net_eq(net_a, net_b))

View File

@@ -508,11 +508,6 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
ASSERT_RTNL();
if (!ifa->ifa_local) {
inet_free_ifa(ifa);
return 0;
}
ifa->ifa_flags &= ~IFA_F_SECONDARY;
last_primary = &in_dev->ifa_list;
@@ -584,14 +579,17 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
static int inet_insert_ifa(struct in_ifaddr *ifa)
{
if (!ifa->ifa_local) {
inet_free_ifa(ifa);
return 0;
}
return __inet_insert_ifa(ifa, NULL, 0, NULL);
}
static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
ASSERT_RTNL();
struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
ipv4_devconf_setall(in_dev);
neigh_parms_data_state_setall(in_dev->arp_parms);
@@ -645,7 +643,7 @@ static int ip_mc_autojoin_config(struct net *net, bool join,
struct sock *sk = net->ipv4.mc_autojoin_sk;
int ret;
ASSERT_RTNL();
ASSERT_RTNL_NET(net);
lock_sock(sk);
if (join)
@@ -671,22 +669,24 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct in_ifaddr *ifa;
int err;
ASSERT_RTNL();
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_ipv4_policy, extack);
if (err < 0)
goto errout;
goto out;
ifm = nlmsg_data(nlh);
rtnl_net_lock(net);
in_dev = inetdev_by_index(net, ifm->ifa_index);
if (!in_dev) {
NL_SET_ERR_MSG(extack, "ipv4: Device not found");
err = -ENODEV;
goto errout;
goto unlock;
}
for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
for (ifap = &in_dev->ifa_list;
(ifa = rtnl_net_dereference(net, *ifap)) != NULL;
ifap = &ifa->ifa_next) {
if (tb[IFA_LOCAL] &&
ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
@@ -702,13 +702,16 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (ipv4_is_multicast(ifa->ifa_address))
ip_mc_autojoin_config(net, false, ifa);
__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
return 0;
goto unlock;
}
NL_SET_ERR_MSG(extack, "ipv4: Address not found");
err = -EADDRNOTAVAIL;
errout:
unlock:
rtnl_net_unlock(net);
out:
return err;
}
@@ -766,7 +769,8 @@ static void check_lifetime(struct work_struct *work)
rcu_read_unlock();
if (!change_needed)
continue;
rtnl_lock();
rtnl_net_lock(net);
hlist_for_each_entry_safe(ifa, n, head, addr_lst) {
unsigned long age;
@@ -783,7 +787,7 @@ static void check_lifetime(struct work_struct *work)
struct in_ifaddr *tmp;
ifap = &ifa->ifa_dev->ifa_list;
tmp = rtnl_dereference(*ifap);
tmp = rtnl_net_dereference(net, *ifap);
while (tmp) {
if (tmp == ifa) {
inet_del_ifa(ifa->ifa_dev,
@@ -791,7 +795,7 @@ static void check_lifetime(struct work_struct *work)
break;
}
ifap = &tmp->ifa_next;
tmp = rtnl_dereference(*ifap);
tmp = rtnl_net_dereference(net, *ifap);
}
} else if (ifa->ifa_preferred_lft !=
INFINITY_LIFE_TIME &&
@@ -801,7 +805,7 @@ static void check_lifetime(struct work_struct *work)
rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
}
}
rtnl_unlock();
rtnl_net_unlock(net);
}
next_sec = round_jiffies_up(next);
@@ -846,35 +850,54 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
}
static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
__u32 *pvalid_lft, __u32 *pprefered_lft,
struct netlink_ext_ack *extack)
static int inet_validate_rtm(struct nlmsghdr *nlh, struct nlattr **tb,
struct netlink_ext_ack *extack,
__u32 *valid_lft, __u32 *prefered_lft)
{
struct nlattr *tb[IFA_MAX+1];
struct in_ifaddr *ifa;
struct ifaddrmsg *ifm;
struct net_device *dev;
struct in_device *in_dev;
struct ifaddrmsg *ifm = nlmsg_data(nlh);
int err;
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_ipv4_policy, extack);
if (err < 0)
goto errout;
ifm = nlmsg_data(nlh);
err = -EINVAL;
return err;
if (ifm->ifa_prefixlen > 32) {
NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
goto errout;
return -EINVAL;
}
if (!tb[IFA_LOCAL]) {
NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
goto errout;
return -EINVAL;
}
if (tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci;
ci = nla_data(tb[IFA_CACHEINFO]);
if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
return -EINVAL;
}
*valid_lft = ci->ifa_valid;
*prefered_lft = ci->ifa_prefered;
}
return 0;
}
static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
struct ifaddrmsg *ifm = nlmsg_data(nlh);
struct in_device *in_dev;
struct net_device *dev;
struct in_ifaddr *ifa;
int err;
dev = __dev_get_by_index(net, ifm->ifa_index);
err = -ENODEV;
if (!dev) {
@@ -882,7 +905,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
goto errout;
}
in_dev = __in_dev_get_rtnl(dev);
in_dev = __in_dev_get_rtnl_net(dev);
err = -ENOBUFS;
if (!in_dev)
goto errout;
@@ -923,76 +946,69 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
if (tb[IFA_PROTO])
ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
if (tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci;
ci = nla_data(tb[IFA_CACHEINFO]);
if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
err = -EINVAL;
goto errout_free;
}
*pvalid_lft = ci->ifa_valid;
*pprefered_lft = ci->ifa_prefered;
}
return ifa;
errout_free:
inet_free_ifa(ifa);
errout:
return ERR_PTR(err);
}
static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
static struct in_ifaddr *find_matching_ifa(struct net *net, struct in_ifaddr *ifa)
{
struct in_device *in_dev = ifa->ifa_dev;
struct in_ifaddr *ifa1;
if (!ifa->ifa_local)
return NULL;
in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
in_dev_for_each_ifa_rtnl_net(net, ifa1, in_dev) {
if (ifa1->ifa_mask == ifa->ifa_mask &&
inet_ifa_match(ifa1->ifa_address, ifa) &&
ifa1->ifa_local == ifa->ifa_local)
return ifa1;
}
return NULL;
}
static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct in_ifaddr *ifa;
struct in_ifaddr *ifa_existing;
__u32 valid_lft = INFINITY_LIFE_TIME;
__u32 prefered_lft = INFINITY_LIFE_TIME;
__u32 valid_lft = INFINITY_LIFE_TIME;
struct net *net = sock_net(skb->sk);
struct in_ifaddr *ifa_existing;
struct nlattr *tb[IFA_MAX + 1];
struct in_ifaddr *ifa;
int ret;
ASSERT_RTNL();
ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft);
if (ret < 0)
return ret;
ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
if (IS_ERR(ifa))
return PTR_ERR(ifa);
if (!nla_get_in_addr(tb[IFA_LOCAL]))
return 0;
ifa_existing = find_matching_ifa(ifa);
rtnl_net_lock(net);
ifa = inet_rtm_to_ifa(net, nlh, tb, extack);
if (IS_ERR(ifa)) {
ret = PTR_ERR(ifa);
goto unlock;
}
ifa_existing = find_matching_ifa(net, ifa);
if (!ifa_existing) {
/* It would be best to check for !NLM_F_CREATE here but
* userspace already relies on not having to provide this.
*/
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
int ret = ip_mc_autojoin_config(net, true, ifa);
ret = ip_mc_autojoin_config(net, true, ifa);
if (ret < 0) {
NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
inet_free_ifa(ifa);
return ret;
goto unlock;
}
}
return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
extack);
ret = __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, extack);
} else {
u32 new_metric = ifa->ifa_rt_priority;
u8 new_proto = ifa->ifa_proto;
@@ -1002,7 +1018,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (nlh->nlmsg_flags & NLM_F_EXCL ||
!(nlh->nlmsg_flags & NLM_F_REPLACE)) {
NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
return -EEXIST;
ret = -EEXIST;
goto unlock;
}
ifa = ifa_existing;
@@ -1019,7 +1036,11 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
&net->ipv4.addr_chk_work, 0);
rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
}
return 0;
unlock:
rtnl_net_unlock(net);
return ret;
}
/*
@@ -1106,7 +1127,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
goto out;
}
rtnl_lock();
rtnl_net_lock(net);
ret = -ENODEV;
dev = __dev_get_by_name(net, ifr->ifr_name);
@@ -1116,7 +1137,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
if (colon)
*colon = ':';
in_dev = __in_dev_get_rtnl(dev);
in_dev = __in_dev_get_rtnl_net(dev);
if (in_dev) {
if (tryaddrmatch) {
/* Matthias Andree */
@@ -1126,7 +1147,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
This is checked above. */
for (ifap = &in_dev->ifa_list;
(ifa = rtnl_dereference(*ifap)) != NULL;
(ifa = rtnl_net_dereference(net, *ifap)) != NULL;
ifap = &ifa->ifa_next) {
if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
sin_orig.sin_addr.s_addr ==
@@ -1140,7 +1161,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
comparing just the label */
if (!ifa) {
for (ifap = &in_dev->ifa_list;
(ifa = rtnl_dereference(*ifap)) != NULL;
(ifa = rtnl_net_dereference(net, *ifap)) != NULL;
ifap = &ifa->ifa_next)
if (!strcmp(ifr->ifr_name, ifa->ifa_label))
break;
@@ -1182,6 +1203,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
inet_del_ifa(in_dev, ifap, 1);
break;
}
/* NETDEV_UP/DOWN/CHANGE could touch a peer dev */
ASSERT_RTNL();
ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
break;
@@ -1283,14 +1307,14 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
break;
}
done:
rtnl_unlock();
rtnl_net_unlock(net);
out:
return ret;
}
int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
const struct in_ifaddr *ifa;
struct ifreq ifr;
int done = 0;
@@ -1301,7 +1325,7 @@ int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
if (!in_dev)
goto out;
in_dev_for_each_ifa_rtnl(ifa, in_dev) {
in_dev_for_each_ifa_rtnl_net(dev_net(dev), ifa, in_dev) {
if (!buf) {
done += size;
continue;
@@ -2372,7 +2396,7 @@ static void inet_forward_change(struct net *net)
if (on)
dev_disable_lro(dev);
in_dev = __in_dev_get_rtnl(dev);
in_dev = __in_dev_get_rtnl_net(dev);
if (in_dev) {
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -2463,7 +2487,7 @@ static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
if (write && *valp != val) {
if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
if (!rtnl_trylock()) {
if (!rtnl_net_trylock(net)) {
/* Restore the original values before restarting */
*valp = val;
*ppos = pos;
@@ -2482,7 +2506,7 @@ static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
idev->dev->ifindex,
cnf);
}
rtnl_unlock();
rtnl_net_unlock(net);
rt_cache_flush(net);
} else
inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -2813,8 +2837,10 @@ static struct rtnl_af_ops inet_af_ops __read_mostly = {
};
static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = {
{.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr},
{.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr},
{.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr,
.flags = RTNL_FLAG_DOIT_PERNET},
{.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr,
.flags = RTNL_FLAG_DOIT_PERNET},
{.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr,
.flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
{.protocol = PF_INET, .msgtype = RTM_GETNETCONF,