From fa8ef258da2b05a673eb8dc0160a514c80b6ab8c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:44 -0700 Subject: [PATCH 01/14] rtnetlink: Allocate linkinfo[] as struct rtnl_newlink_tbs. We will move linkinfo to rtnl_newlink() and pass it down to other functions. Let's pack it into rtnl_newlink_tbs. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a9c92392fb1d..37193402a42c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3622,6 +3622,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, struct rtnl_newlink_tbs { struct nlattr *tb[IFLA_MAX + 1]; + struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; struct nlattr *attr[RTNL_MAX_TYPE + 1]; struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; }; @@ -3630,7 +3631,7 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct rtnl_newlink_tbs *tbs, struct netlink_ext_ack *extack) { - struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; + struct nlattr ** const linkinfo = tbs->linkinfo; struct nlattr ** const tb = tbs->tb; const struct rtnl_link_ops *m_ops; struct net_device *master_dev; @@ -3685,8 +3686,9 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, ifla_info_policy, NULL); if (err < 0) return err; - } else - memset(linkinfo, 0, sizeof(linkinfo)); + } else { + memset(linkinfo, 0, sizeof(tbs->linkinfo)); + } if (linkinfo[IFLA_INFO_KIND]) { nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); From a5838cf9b2ee59f2a55b1e486f2250a18a43ee14 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:45 -0700 Subject: [PATCH 02/14] rtnetlink: Call validate_linkmsg() in do_setlink(). There are 3 paths that finally call do_setlink(), and validate_linkmsg() is called in each path. 1. RTM_NEWLINK 1-1. dev is found in __rtnl_newlink() 1-2. dev isn't found, but IFLA_GROUP is specified in rtnl_group_changelink() 2. RTM_SETLINK The next patch factorises 1-1 to a separate function. As a preparation, let's move validate_linkmsg() calls to do_setlink(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 37193402a42c..76593de7014c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2855,6 +2855,10 @@ static int do_setlink(const struct sk_buff *skb, char ifname[IFNAMSIZ]; int err; + err = validate_linkmsg(dev, tb, extack); + if (err < 0) + goto errout; + if (tb[IFLA_IFNAME]) nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else @@ -3269,10 +3273,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = validate_linkmsg(dev, tb, extack); - if (err < 0) - goto errout; - err = do_setlink(skb, dev, ifm, extack, tb, 0); errout: return err; @@ -3516,9 +3516,6 @@ static int rtnl_group_changelink(const struct sk_buff *skb, for_each_netdev_safe(net, dev, aux) { if (dev->group == group) { - err = validate_linkmsg(dev, tb, extack); - if (err < 0) - return err; err = do_setlink(skb, dev, ifm, extack, tb, 0); if (err < 0) return err; @@ -3744,10 +3741,6 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - err = validate_linkmsg(dev, tb, extack); - if (err < 0) - return err; - if (linkinfo[IFLA_INFO_DATA]) { if (!ops || ops != dev->rtnl_link_ops || !ops->changelink) From cc47bcdf0d2ea6a2f7b10566d9b6776bf61b2d4b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:46 -0700 Subject: [PATCH 03/14] rtnetlink: Factorise do_setlink() path from __rtnl_newlink(). __rtnl_newlink() got too long to maintain. For example, netdev_master_upper_dev_get()->rtnl_link_ops is fetched even when IFLA_INFO_SLAVE_DATA is not specified. Let's factorise the single dev do_setlink() path to a separate function. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 142 ++++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 68 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 76593de7014c..21165cc2b697 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3505,6 +3505,78 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, } EXPORT_SYMBOL(rtnl_create_link); +struct rtnl_newlink_tbs { + struct nlattr *tb[IFLA_MAX + 1]; + struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; + struct nlattr *attr[RTNL_MAX_TYPE + 1]; + struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; +}; + +static int rtnl_changelink(const struct sk_buff *skb, struct nlmsghdr *nlh, + const struct rtnl_link_ops *ops, + struct net_device *dev, + struct rtnl_newlink_tbs *tbs, + struct nlattr **data, + struct netlink_ext_ack *extack) +{ + struct nlattr ** const linkinfo = tbs->linkinfo; + struct nlattr ** const tb = tbs->tb; + int status = 0; + int err; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + if (linkinfo[IFLA_INFO_DATA]) { + if (!ops || ops != dev->rtnl_link_ops || !ops->changelink) + return -EOPNOTSUPP; + + err = ops->changelink(dev, tb, data, extack); + if (err < 0) + return err; + + status |= DO_SETLINK_NOTIFY; + } + + if (linkinfo[IFLA_INFO_SLAVE_DATA]) { + const struct rtnl_link_ops *m_ops = NULL; + struct nlattr **slave_data = NULL; + struct net_device *master_dev; + + master_dev = netdev_master_upper_dev_get(dev); + if (master_dev) + m_ops = master_dev->rtnl_link_ops; + + if (!m_ops || !m_ops->slave_changelink) + return -EOPNOTSUPP; + + if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE) + return -EINVAL; + + if (m_ops->slave_maxtype) { + err = nla_parse_nested_deprecated(tbs->slave_attr, + m_ops->slave_maxtype, + linkinfo[IFLA_INFO_SLAVE_DATA], + m_ops->slave_policy, extack); + if (err < 0) + return err; + + slave_data = tbs->slave_attr; + } + + err = m_ops->slave_changelink(master_dev, dev, tb, slave_data, extack); + if (err < 0) + return err; + + status |= DO_SETLINK_NOTIFY; + } + + return do_setlink(skb, dev, nlmsg_data(nlh), extack, tb, status); +} + static int rtnl_group_changelink(const struct sk_buff *skb, struct net *net, int group, struct ifinfomsg *ifm, @@ -3617,24 +3689,14 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, goto out; } -struct rtnl_newlink_tbs { - struct nlattr *tb[IFLA_MAX + 1]; - struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; - struct nlattr *attr[RTNL_MAX_TYPE + 1]; - struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; -}; - static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct rtnl_newlink_tbs *tbs, struct netlink_ext_ack *extack) { struct nlattr ** const linkinfo = tbs->linkinfo; struct nlattr ** const tb = tbs->tb; - const struct rtnl_link_ops *m_ops; - struct net_device *master_dev; struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; - struct nlattr **slave_data; char kind[MODULE_NAME_LEN]; struct net_device *dev; struct ifinfomsg *ifm; @@ -3669,14 +3731,6 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, dev = NULL; } - master_dev = NULL; - m_ops = NULL; - if (dev) { - master_dev = netdev_master_upper_dev_get(dev); - if (master_dev) - m_ops = master_dev->rtnl_link_ops; - } - if (tb[IFLA_LINKINFO]) { err = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO], @@ -3715,56 +3769,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, } } - slave_data = NULL; - if (m_ops) { - if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE) - return -EINVAL; - - if (m_ops->slave_maxtype && - linkinfo[IFLA_INFO_SLAVE_DATA]) { - err = nla_parse_nested_deprecated(tbs->slave_attr, - m_ops->slave_maxtype, - linkinfo[IFLA_INFO_SLAVE_DATA], - m_ops->slave_policy, - extack); - if (err < 0) - return err; - slave_data = tbs->slave_attr; - } - } - - if (dev) { - int status = 0; - - if (nlh->nlmsg_flags & NLM_F_EXCL) - return -EEXIST; - if (nlh->nlmsg_flags & NLM_F_REPLACE) - return -EOPNOTSUPP; - - if (linkinfo[IFLA_INFO_DATA]) { - if (!ops || ops != dev->rtnl_link_ops || - !ops->changelink) - return -EOPNOTSUPP; - - err = ops->changelink(dev, tb, data, extack); - if (err < 0) - return err; - status |= DO_SETLINK_NOTIFY; - } - - if (linkinfo[IFLA_INFO_SLAVE_DATA]) { - if (!m_ops || !m_ops->slave_changelink) - return -EOPNOTSUPP; - - err = m_ops->slave_changelink(master_dev, dev, tb, - slave_data, extack); - if (err < 0) - return err; - status |= DO_SETLINK_NOTIFY; - } - - return do_setlink(skb, dev, ifm, extack, tb, status); - } + if (dev) + return rtnl_changelink(skb, nlh, ops, dev, tbs, data, extack); if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { /* No dev found and NLM_F_CREATE not set. Requested dev does not exist, From 7fea1a8cb4dfab059547f801ebbe7e79c60bd75a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:47 -0700 Subject: [PATCH 04/14] rtnetlink: Move simple validation from __rtnl_newlink() to rtnl_newlink(). We will push RTNL down to rtnl_newlink(). Let's move RTNL-independent validation to rtnl_newlink(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 21165cc2b697..97d6ad65647c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3707,15 +3707,6 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, #ifdef CONFIG_MODULES replay: #endif - err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, - ifla_policy, extack); - if (err < 0) - return err; - - err = rtnl_ensure_unique_netns(tb, extack, false); - if (err < 0) - return err; - ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) { link_specified = true; @@ -3731,16 +3722,6 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, dev = NULL; } - if (tb[IFLA_LINKINFO]) { - err = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX, - tb[IFLA_LINKINFO], - ifla_info_policy, NULL); - if (err < 0) - return err; - } else { - memset(linkinfo, 0, sizeof(tbs->linkinfo)); - } - if (linkinfo[IFLA_INFO_KIND]) { nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); ops = rtnl_link_ops_get(kind); @@ -3809,6 +3790,7 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct nlattr **tb, **linkinfo; struct rtnl_newlink_tbs *tbs; int ret; @@ -3816,7 +3798,30 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (!tbs) return -ENOMEM; + tb = tbs->tb; + ret = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), tb, + IFLA_MAX, ifla_policy, extack); + if (ret < 0) + goto free; + + ret = rtnl_ensure_unique_netns(tb, extack, false); + if (ret < 0) + goto free; + + linkinfo = tbs->linkinfo; + if (tb[IFLA_LINKINFO]) { + ret = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX, + tb[IFLA_LINKINFO], + ifla_info_policy, NULL); + if (ret < 0) + goto free; + } else { + memset(linkinfo, 0, sizeof(tbs->linkinfo)); + } + ret = __rtnl_newlink(skb, nlh, tbs, extack); + +free: kfree(tbs); return ret; } From 331fe31c50ef5ec1d9161986fd06b934f94176a3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:48 -0700 Subject: [PATCH 05/14] rtnetlink: Move rtnl_link_ops_get() and retry to rtnl_newlink(). Currently, if neither dev nor rtnl_link_ops is found in __rtnl_newlink(), we release RTNL and redo the whole process after request_module(), which complicates the logic. The ops will be RTNL-independent later. Let's move the ops lookup to rtnl_newlink() and do the retry earlier. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 97d6ad65647c..e708f0852602 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3690,23 +3690,19 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, } static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, + const struct rtnl_link_ops *ops, struct rtnl_newlink_tbs *tbs, struct netlink_ext_ack *extack) { struct nlattr ** const linkinfo = tbs->linkinfo; struct nlattr ** const tb = tbs->tb; struct net *net = sock_net(skb->sk); - const struct rtnl_link_ops *ops; - char kind[MODULE_NAME_LEN]; struct net_device *dev; struct ifinfomsg *ifm; struct nlattr **data; bool link_specified; int err; -#ifdef CONFIG_MODULES -replay: -#endif ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) { link_specified = true; @@ -3722,14 +3718,6 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, dev = NULL; } - if (linkinfo[IFLA_INFO_KIND]) { - nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); - ops = rtnl_link_ops_get(kind); - } else { - kind[0] = '\0'; - ops = NULL; - } - data = NULL; if (ops) { if (ops->maxtype > RTNL_MAX_TYPE) @@ -3770,16 +3758,6 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -EOPNOTSUPP; if (!ops) { -#ifdef CONFIG_MODULES - if (kind[0]) { - __rtnl_unlock(); - request_module("rtnl-link-%s", kind); - rtnl_lock(); - ops = rtnl_link_ops_get(kind); - if (ops) - goto replay; - } -#endif NL_SET_ERR_MSG(extack, "Unknown device type"); return -EOPNOTSUPP; } @@ -3790,6 +3768,7 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + const struct rtnl_link_ops *ops = NULL; struct nlattr **tb, **linkinfo; struct rtnl_newlink_tbs *tbs; int ret; @@ -3819,7 +3798,22 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, memset(linkinfo, 0, sizeof(tbs->linkinfo)); } - ret = __rtnl_newlink(skb, nlh, tbs, extack); + if (linkinfo[IFLA_INFO_KIND]) { + char kind[MODULE_NAME_LEN]; + + nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); + ops = rtnl_link_ops_get(kind); +#ifdef CONFIG_MODULES + if (!ops) { + __rtnl_unlock(); + request_module("rtnl-link-%s", kind); + rtnl_lock(); + ops = rtnl_link_ops_get(kind); + } +#endif + } + + ret = __rtnl_newlink(skb, nlh, ops, tbs, extack); free: kfree(tbs); From 0d3008d1a9aefb89e09e8dd39134512d678e3461 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:49 -0700 Subject: [PATCH 06/14] rtnetlink: Move ops->validate to rtnl_newlink(). ops->validate() does not require RTNL. Let's move it to rtnl_newlink(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 49 ++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e708f0852602..9c9290a6c271 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3692,16 +3692,14 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, const struct rtnl_link_ops *ops, struct rtnl_newlink_tbs *tbs, + struct nlattr **data, struct netlink_ext_ack *extack) { - struct nlattr ** const linkinfo = tbs->linkinfo; struct nlattr ** const tb = tbs->tb; struct net *net = sock_net(skb->sk); struct net_device *dev; struct ifinfomsg *ifm; - struct nlattr **data; bool link_specified; - int err; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) { @@ -3718,26 +3716,6 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, dev = NULL; } - data = NULL; - if (ops) { - if (ops->maxtype > RTNL_MAX_TYPE) - return -EINVAL; - - if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { - err = nla_parse_nested_deprecated(tbs->attr, ops->maxtype, - linkinfo[IFLA_INFO_DATA], - ops->policy, extack); - if (err < 0) - return err; - data = tbs->attr; - } - if (ops->validate) { - err = ops->validate(tb, data, extack); - if (err < 0) - return err; - } - } - if (dev) return rtnl_changelink(skb, nlh, ops, dev, tbs, data, extack); @@ -3768,8 +3746,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct nlattr **tb, **linkinfo, **data = NULL; const struct rtnl_link_ops *ops = NULL; - struct nlattr **tb, **linkinfo; struct rtnl_newlink_tbs *tbs; int ret; @@ -3813,7 +3791,28 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, #endif } - ret = __rtnl_newlink(skb, nlh, ops, tbs, extack); + if (ops) { + if (ops->maxtype > RTNL_MAX_TYPE) + return -EINVAL; + + if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { + ret = nla_parse_nested_deprecated(tbs->attr, ops->maxtype, + linkinfo[IFLA_INFO_DATA], + ops->policy, extack); + if (ret < 0) + goto free; + + data = tbs->attr; + } + + if (ops->validate) { + ret = ops->validate(tb, data, extack); + if (ret < 0) + goto free; + } + } + + ret = __rtnl_newlink(skb, nlh, ops, tbs, data, extack); free: kfree(tbs); From 43c7ce69d28e185f62fe2b8be2c681c5cac0bc6b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:50 -0700 Subject: [PATCH 07/14] rtnetlink: Protect struct rtnl_link_ops with SRCU. Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to guarantee that rtnl_link_ops is alive during inflight RTM_NEWLINK even when its module is being unloaded. Let's use SRCU to protect ops. rtnl_link_ops_get() now iterates link_ops under RCU and returns SRCU-protected ops pointer. The caller must call rtnl_link_ops_put() to release the pointer after the use. Also, __rtnl_link_unregister() unlinks the ops first and calls synchronize_srcu() to wait for inflight RTM_NEWLINK requests to complete. Note that link_ops needs to be protected by its dedicated lock when RTNL is removed. Suggested-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 5 ++- net/core/rtnetlink.c | 83 ++++++++++++++++++++++++++++++----------- 2 files changed, 65 insertions(+), 23 deletions(-) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index bb49c5708ce7..1a6aa5ca74f3 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -3,6 +3,7 @@ #define __NET_RTNETLINK_H #include +#include #include typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, @@ -69,7 +70,8 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) /** * struct rtnl_link_ops - rtnetlink link operations * - * @list: Used internally + * @list: Used internally, protected by RTNL and SRCU + * @srcu: Used internally * @kind: Identifier * @netns_refund: Physical device, move to init_net on netns exit * @maxtype: Highest device specific netlink attribute number @@ -100,6 +102,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) */ struct rtnl_link_ops { struct list_head list; + struct srcu_struct srcu; const char *kind; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9c9290a6c271..31b105b3a834 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -457,15 +457,29 @@ EXPORT_SYMBOL_GPL(__rtnl_unregister_many); static LIST_HEAD(link_ops); -static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) +static struct rtnl_link_ops *rtnl_link_ops_get(const char *kind, int *srcu_index) { - const struct rtnl_link_ops *ops; + struct rtnl_link_ops *ops; - list_for_each_entry(ops, &link_ops, list) { - if (!strcmp(ops->kind, kind)) - return ops; + rcu_read_lock(); + + list_for_each_entry_rcu(ops, &link_ops, list) { + if (!strcmp(ops->kind, kind)) { + *srcu_index = srcu_read_lock(&ops->srcu); + goto unlock; + } } - return NULL; + + ops = NULL; +unlock: + rcu_read_unlock(); + + return ops; +} + +static void rtnl_link_ops_put(struct rtnl_link_ops *ops, int srcu_index) +{ + srcu_read_unlock(&ops->srcu, srcu_index); } /** @@ -480,8 +494,16 @@ static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) */ int __rtnl_link_register(struct rtnl_link_ops *ops) { - if (rtnl_link_ops_get(ops->kind)) - return -EEXIST; + struct rtnl_link_ops *tmp; + int err; + + /* When RTNL is removed, add lock for link_ops. */ + ASSERT_RTNL(); + + list_for_each_entry(tmp, &link_ops, list) { + if (!strcmp(ops->kind, tmp->kind)) + return -EEXIST; + } /* The check for alloc/setup is here because if ops * does not have that filled up, it is not possible @@ -491,7 +513,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops) if ((ops->alloc || ops->setup) && !ops->dellink) ops->dellink = unregister_netdevice_queue; - list_add_tail(&ops->list, &link_ops); + err = init_srcu_struct(&ops->srcu); + if (err) + return err; + + list_add_tail_rcu(&ops->list, &link_ops); + return 0; } EXPORT_SYMBOL_GPL(__rtnl_link_register); @@ -542,10 +569,12 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) { struct net *net; - for_each_net(net) { + list_del_rcu(&ops->list); + synchronize_srcu(&ops->srcu); + cleanup_srcu_struct(&ops->srcu); + + for_each_net(net) __rtnl_kill_links(net, ops); - } - list_del(&ops->list); } EXPORT_SYMBOL_GPL(__rtnl_link_unregister); @@ -2158,10 +2187,11 @@ static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = { [IFLA_XDP_PROG_ID] = { .type = NLA_U32 }, }; -static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla) +static struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla, + int *ops_srcu_index) { - const struct rtnl_link_ops *ops = NULL; struct nlattr *linfo[IFLA_INFO_MAX + 1]; + struct rtnl_link_ops *ops = NULL; if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0) return NULL; @@ -2170,7 +2200,7 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla char kind[MODULE_NAME_LEN]; nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind)); - ops = rtnl_link_ops_get(kind); + ops = rtnl_link_ops_get(kind, ops_srcu_index); } return ops; @@ -2290,8 +2320,8 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { - const struct rtnl_link_ops *kind_ops = NULL; struct netlink_ext_ack *extack = cb->extack; + struct rtnl_link_ops *kind_ops = NULL; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); unsigned int flags = NLM_F_MULTI; @@ -2302,6 +2332,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net *tgt_net = net; u32 ext_filter_mask = 0; struct net_device *dev; + int ops_srcu_index; int master_idx = 0; int netnsid = -1; int err, i; @@ -2335,7 +2366,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) master_idx = nla_get_u32(tb[i]); break; case IFLA_LINKINFO: - kind_ops = linkinfo_to_kind_ops(tb[i]); + kind_ops = linkinfo_to_kind_ops(tb[i], &ops_srcu_index); break; default: if (cb->strict_check) { @@ -2361,6 +2392,10 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) break; } + + if (kind_ops) + rtnl_link_ops_put(kind_ops, ops_srcu_index); + cb->seq = tgt_net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); if (netnsid >= 0) @@ -3747,8 +3782,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr **tb, **linkinfo, **data = NULL; - const struct rtnl_link_ops *ops = NULL; + struct rtnl_link_ops *ops = NULL; struct rtnl_newlink_tbs *tbs; + int ops_srcu_index; int ret; tbs = kmalloc(sizeof(*tbs), GFP_KERNEL); @@ -3780,13 +3816,13 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, char kind[MODULE_NAME_LEN]; nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); - ops = rtnl_link_ops_get(kind); + ops = rtnl_link_ops_get(kind, &ops_srcu_index); #ifdef CONFIG_MODULES if (!ops) { __rtnl_unlock(); request_module("rtnl-link-%s", kind); rtnl_lock(); - ops = rtnl_link_ops_get(kind); + ops = rtnl_link_ops_get(kind, &ops_srcu_index); } #endif } @@ -3800,7 +3836,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, linkinfo[IFLA_INFO_DATA], ops->policy, extack); if (ret < 0) - goto free; + goto put_ops; data = tbs->attr; } @@ -3808,12 +3844,15 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (ops->validate) { ret = ops->validate(tb, data, extack); if (ret < 0) - goto free; + goto put_ops; } } ret = __rtnl_newlink(skb, nlh, ops, tbs, data, extack); +put_ops: + if (ops) + rtnl_link_ops_put(ops, ops_srcu_index); free: kfree(tbs); return ret; From 0fef2a1212f1ff68fc3834abd41928b4353f8af6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:51 -0700 Subject: [PATCH 08/14] rtnetlink: Call rtnl_link_get_net_capable() in rtnl_newlink(). As a prerequisite of per-netns RTNL, we must fetch netns before looking up dev or moving it to another netns. rtnl_link_get_net_capable() is called in rtnl_newlink_create() and do_setlink(), but both of them need to be moved to the RTNL-independent region, which will be rtnl_newlink(). Let's call rtnl_link_get_net_capable() in rtnl_newlink() and pass the netns down to where needed. Note that the latter two have not passed the nets to do_setlink() yet but will do so after the remaining rtnl_link_get_net_capable() is moved to rtnl_setlink() later. While at it, dest_net is renamed to tgt_net in rtnl_newlink_create() to align with rtnl_{del,set}link(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 51 ++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 31b105b3a834..f6823c8d21ad 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3549,7 +3549,7 @@ struct rtnl_newlink_tbs { static int rtnl_changelink(const struct sk_buff *skb, struct nlmsghdr *nlh, const struct rtnl_link_ops *ops, - struct net_device *dev, + struct net_device *dev, struct net *tgt_net, struct rtnl_newlink_tbs *tbs, struct nlattr **data, struct netlink_ext_ack *extack) @@ -3613,10 +3613,10 @@ static int rtnl_changelink(const struct sk_buff *skb, struct nlmsghdr *nlh, } static int rtnl_group_changelink(const struct sk_buff *skb, - struct net *net, int group, - struct ifinfomsg *ifm, - struct netlink_ext_ack *extack, - struct nlattr **tb) + struct net *net, struct net *tgt_net, + int group, struct ifinfomsg *ifm, + struct netlink_ext_ack *extack, + struct nlattr **tb) { struct net_device *dev, *aux; int err; @@ -3634,6 +3634,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb, static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, const struct rtnl_link_ops *ops, + struct net *tgt_net, const struct nlmsghdr *nlh, struct nlattr **tb, struct nlattr **data, struct netlink_ext_ack *extack) @@ -3641,9 +3642,9 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, unsigned char name_assign_type = NET_NAME_USER; struct net *net = sock_net(skb->sk); u32 portid = NETLINK_CB(skb).portid; - struct net *dest_net, *link_net; struct net_device *dev; char ifname[IFNAMSIZ]; + struct net *link_net; int err; if (!ops->alloc && !ops->setup) @@ -3656,14 +3657,10 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, name_assign_type = NET_NAME_ENUM; } - dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN); - if (IS_ERR(dest_net)) - return PTR_ERR(dest_net); - if (tb[IFLA_LINK_NETNSID]) { int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); - link_net = get_net_ns_by_id(dest_net, id); + link_net = get_net_ns_by_id(tgt_net, id); if (!link_net) { NL_SET_ERR_MSG(extack, "Unknown network namespace id"); err = -EINVAL; @@ -3676,7 +3673,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, link_net = NULL; } - dev = rtnl_create_link(link_net ? : dest_net, ifname, + dev = rtnl_create_link(link_net ? : tgt_net, ifname, name_assign_type, ops, tb, extack); if (IS_ERR(dev)) { err = PTR_ERR(dev); @@ -3698,7 +3695,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, if (err < 0) goto out_unregister; if (link_net) { - err = dev_change_net_namespace(dev, dest_net, ifname); + err = dev_change_net_namespace(dev, tgt_net, ifname); if (err < 0) goto out_unregister; } @@ -3710,7 +3707,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, out: if (link_net) put_net(link_net); - put_net(dest_net); + return err; out_unregister: if (ops->newlink) { @@ -3726,6 +3723,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, const struct rtnl_link_ops *ops, + struct net *tgt_net, struct rtnl_newlink_tbs *tbs, struct nlattr **data, struct netlink_ext_ack *extack) @@ -3752,19 +3750,18 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, } if (dev) - return rtnl_changelink(skb, nlh, ops, dev, tbs, data, extack); + return rtnl_changelink(skb, nlh, ops, dev, tgt_net, tbs, data, extack); if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { /* No dev found and NLM_F_CREATE not set. Requested dev does not exist, * or it's for a group */ - if (link_specified) + if (link_specified || !tb[IFLA_GROUP]) return -ENODEV; - if (tb[IFLA_GROUP]) - return rtnl_group_changelink(skb, net, - nla_get_u32(tb[IFLA_GROUP]), - ifm, extack, tb); - return -ENODEV; + + return rtnl_group_changelink(skb, net, tgt_net, + nla_get_u32(tb[IFLA_GROUP]), + ifm, extack, tb); } if (tb[IFLA_MAP] || tb[IFLA_PROTINFO]) @@ -3775,7 +3772,7 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -EOPNOTSUPP; } - return rtnl_newlink_create(skb, ifm, ops, nlh, tb, data, extack); + return rtnl_newlink_create(skb, ifm, ops, tgt_net, nlh, tb, data, extack); } static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -3784,6 +3781,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **tb, **linkinfo, **data = NULL; struct rtnl_link_ops *ops = NULL; struct rtnl_newlink_tbs *tbs; + struct net *tgt_net; int ops_srcu_index; int ret; @@ -3848,8 +3846,15 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, } } - ret = __rtnl_newlink(skb, nlh, ops, tbs, data, extack); + tgt_net = rtnl_link_get_net_capable(skb, sock_net(skb->sk), tb, CAP_NET_ADMIN); + if (IS_ERR(tgt_net)) { + ret = PTR_ERR(tgt_net); + goto put_ops; + } + ret = __rtnl_newlink(skb, nlh, ops, tgt_net, tbs, data, extack); + + put_net(tgt_net); put_ops: if (ops) rtnl_link_ops_put(ops, ops_srcu_index); From f7774eec20b41fae36a58e8ab04ff4dd48bb1845 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:52 -0700 Subject: [PATCH 09/14] rtnetlink: Fetch IFLA_LINK_NETNSID in rtnl_newlink(). Another netns option for RTM_NEWLINK is IFLA_LINK_NETNSID and is fetched in rtnl_newlink_create(). This must be done before holding rtnl_net_lock(). Let's move IFLA_LINK_NETNSID processing to rtnl_newlink(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 49 ++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f6823c8d21ad..eee0f820ddf6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3634,7 +3634,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb, static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, const struct rtnl_link_ops *ops, - struct net *tgt_net, + struct net *tgt_net, struct net *link_net, const struct nlmsghdr *nlh, struct nlattr **tb, struct nlattr **data, struct netlink_ext_ack *extack) @@ -3644,7 +3644,6 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, u32 portid = NETLINK_CB(skb).portid; struct net_device *dev; char ifname[IFNAMSIZ]; - struct net *link_net; int err; if (!ops->alloc && !ops->setup) @@ -3657,22 +3656,6 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, name_assign_type = NET_NAME_ENUM; } - if (tb[IFLA_LINK_NETNSID]) { - int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); - - link_net = get_net_ns_by_id(tgt_net, id); - if (!link_net) { - NL_SET_ERR_MSG(extack, "Unknown network namespace id"); - err = -EINVAL; - goto out; - } - err = -EPERM; - if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) - goto out; - } else { - link_net = NULL; - } - dev = rtnl_create_link(link_net ? : tgt_net, ifname, name_assign_type, ops, tb, extack); if (IS_ERR(dev)) { @@ -3705,9 +3688,6 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, goto out_unregister; } out: - if (link_net) - put_net(link_net); - return err; out_unregister: if (ops->newlink) { @@ -3723,7 +3703,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, const struct rtnl_link_ops *ops, - struct net *tgt_net, + struct net *tgt_net, struct net *link_net, struct rtnl_newlink_tbs *tbs, struct nlattr **data, struct netlink_ext_ack *extack) @@ -3772,16 +3752,16 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -EOPNOTSUPP; } - return rtnl_newlink_create(skb, ifm, ops, tgt_net, nlh, tb, data, extack); + return rtnl_newlink_create(skb, ifm, ops, tgt_net, link_net, nlh, tb, data, extack); } static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr **tb, **linkinfo, **data = NULL; + struct net *tgt_net, *link_net = NULL; struct rtnl_link_ops *ops = NULL; struct rtnl_newlink_tbs *tbs; - struct net *tgt_net; int ops_srcu_index; int ret; @@ -3852,8 +3832,27 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, goto put_ops; } - ret = __rtnl_newlink(skb, nlh, ops, tgt_net, tbs, data, extack); + if (tb[IFLA_LINK_NETNSID]) { + int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); + link_net = get_net_ns_by_id(tgt_net, id); + if (!link_net) { + NL_SET_ERR_MSG(extack, "Unknown network namespace id"); + ret = -EINVAL; + goto put_net; + } + + if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + goto put_net; + } + } + + ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, tbs, data, extack); + +put_net: + if (link_net) + put_net(link_net); put_net(tgt_net); put_ops: if (ops) From 175cfc5cd373b6665ec145cafe742252453a7c0e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:53 -0700 Subject: [PATCH 10/14] rtnetlink: Clean up rtnl_dellink(). We will push RTNL down to rtnl_delink(). Let's unify the error path to make it easy to place rtnl_net_lock(). While at it, keep the variables in reverse xmas order. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index eee0f820ddf6..a19b2eb2727e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3368,14 +3368,14 @@ EXPORT_SYMBOL_GPL(rtnl_delete_link); static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct ifinfomsg *ifm = nlmsg_data(nlh); struct net *net = sock_net(skb->sk); u32 portid = NETLINK_CB(skb).portid; - struct net *tgt_net = net; - struct net_device *dev = NULL; - struct ifinfomsg *ifm; struct nlattr *tb[IFLA_MAX+1]; - int err; + struct net_device *dev = NULL; + struct net *tgt_net = net; int netnsid = -1; + int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); @@ -3393,27 +3393,20 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, return PTR_ERR(tgt_net); } - err = -EINVAL; - ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) dev = rtnl_dev_get(tgt_net, tb); + + if (dev) + err = rtnl_delete_link(dev, portid, nlh); + else if (ifm->ifi_index > 0 || tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) + err = -ENODEV; else if (tb[IFLA_GROUP]) err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP])); else - goto out; + err = -EINVAL; - if (!dev) { - if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME] || ifm->ifi_index > 0) - err = -ENODEV; - - goto out; - } - - err = rtnl_delete_link(dev, portid, nlh); - -out: if (netnsid >= 0) put_net(tgt_net); From 6e495fad88ef7bdea70b0e4a1a714f6eccab2a5a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:54 -0700 Subject: [PATCH 11/14] rtnetlink: Clean up rtnl_setlink(). We will push RTNL down to rtnl_setlink(). Let's unify the error path to make it easy to place rtnl_net_lock(). While at it, keep the variables in reverse xmas order. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a19b2eb2727e..f89a902458d6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3279,11 +3279,11 @@ static struct net_device *rtnl_dev_get(struct net *net, static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + struct ifinfomsg *ifm = nlmsg_data(nlh); struct net *net = sock_net(skb->sk); - struct ifinfomsg *ifm; - struct net_device *dev; - int err; struct nlattr *tb[IFLA_MAX+1]; + struct net_device *dev = NULL; + int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); @@ -3294,21 +3294,18 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; - err = -EINVAL; - ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) dev = rtnl_dev_get(net, tb); else - goto errout; + err = -EINVAL; - if (dev == NULL) { + if (dev) + err = do_setlink(skb, dev, ifm, extack, tb, 0); + else if (!err) err = -ENODEV; - goto errout; - } - err = do_setlink(skb, dev, ifm, extack, tb, 0); errout: return err; } From a0b63c6457e100b84b1ff9179bc328c0924de75c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:55 -0700 Subject: [PATCH 12/14] rtnetlink: Call rtnl_link_get_net_capable() in do_setlink(). We will push RTNL down to rtnl_setlink(). RTM_SETLINK could call rtnl_link_get_net_capable() in do_setlink() to move a dev to a new netns, but the netns needs to be fetched before holding rtnl_net_lock(). Let's move it to rtnl_setlink() and pass the netns to do_setlink(). Now, RTM_NEWLINK paths (rtnl_changelink() and rtnl_group_changelink()) can pass the prefetched netns to do_setlink(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/core/rtnetlink.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f89a902458d6..445e6ffed75e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2881,8 +2881,8 @@ static int do_set_proto_down(struct net_device *dev, #define DO_SETLINK_MODIFIED 0x01 /* notify flag means notify + modified. */ #define DO_SETLINK_NOTIFY 0x03 -static int do_setlink(const struct sk_buff *skb, - struct net_device *dev, struct ifinfomsg *ifm, +static int do_setlink(const struct sk_buff *skb, struct net_device *dev, + struct net *tgt_net, struct ifinfomsg *ifm, struct netlink_ext_ack *extack, struct nlattr **tb, int status) { @@ -2899,27 +2899,19 @@ static int do_setlink(const struct sk_buff *skb, else ifname[0] = '\0'; - if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) { + if (!net_eq(tgt_net, dev_net(dev))) { const char *pat = ifname[0] ? ifname : NULL; - struct net *net; int new_ifindex; - net = rtnl_link_get_net_capable(skb, dev_net(dev), - tb, CAP_NET_ADMIN); - if (IS_ERR(net)) { - err = PTR_ERR(net); - goto errout; - } - if (tb[IFLA_NEW_IFINDEX]) new_ifindex = nla_get_s32(tb[IFLA_NEW_IFINDEX]); else new_ifindex = 0; - err = __dev_change_net_namespace(dev, net, pat, new_ifindex); - put_net(net); + err = __dev_change_net_namespace(dev, tgt_net, pat, new_ifindex); if (err) goto errout; + status |= DO_SETLINK_MODIFIED; } @@ -3283,6 +3275,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; + struct net *tgt_net; int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, @@ -3294,6 +3287,12 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; + tgt_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN); + if (IS_ERR(tgt_net)) { + err = PTR_ERR(tgt_net); + goto errout; + } + if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) @@ -3302,10 +3301,11 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, err = -EINVAL; if (dev) - err = do_setlink(skb, dev, ifm, extack, tb, 0); + err = do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0); else if (!err) err = -ENODEV; + put_net(tgt_net); errout: return err; } @@ -3599,7 +3599,7 @@ static int rtnl_changelink(const struct sk_buff *skb, struct nlmsghdr *nlh, status |= DO_SETLINK_NOTIFY; } - return do_setlink(skb, dev, nlmsg_data(nlh), extack, tb, status); + return do_setlink(skb, dev, tgt_net, nlmsg_data(nlh), extack, tb, status); } static int rtnl_group_changelink(const struct sk_buff *skb, @@ -3613,7 +3613,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb, for_each_netdev_safe(net, dev, aux) { if (dev->group == group) { - err = do_setlink(skb, dev, ifm, extack, tb, 0); + err = do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0); if (err < 0) return err; } From 26eebdc4b005ccd4cf63f4fef4c9c0adf9bfa380 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:56 -0700 Subject: [PATCH 13/14] rtnetlink: Return int from rtnl_af_register(). The next patch will add init_srcu_struct() in rtnl_af_register(), then we need to handle its error. Let's add the error handling in advance to make the following patch cleaner. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Matt Johnston Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 2 +- net/bridge/br_netlink.c | 6 +++++- net/core/rtnetlink.c | 4 +++- net/ipv4/devinet.c | 3 ++- net/ipv6/addrconf.c | 5 ++++- net/mctp/device.c | 16 +++++++++++----- net/mpls/af_mpls.c | 5 ++++- 7 files changed, 30 insertions(+), 11 deletions(-) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 1a6aa5ca74f3..969138ae2f4b 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -204,7 +204,7 @@ struct rtnl_af_ops { size_t (*get_stats_af_size)(const struct net_device *dev); }; -void rtnl_af_register(struct rtnl_af_ops *ops); +int rtnl_af_register(struct rtnl_af_ops *ops); void rtnl_af_unregister(struct rtnl_af_ops *ops); struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 6b97ae47f855..3e0f47203f2a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1924,7 +1924,9 @@ int __init br_netlink_init(void) if (err) goto out; - rtnl_af_register(&br_af_ops); + err = rtnl_af_register(&br_af_ops); + if (err) + goto out_vlan; err = rtnl_link_register(&br_link_ops); if (err) @@ -1934,6 +1936,8 @@ int __init br_netlink_init(void) out_af: rtnl_af_unregister(&br_af_ops); +out_vlan: + br_vlan_rtnl_uninit(); out: return err; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 445e6ffed75e..70b663aca209 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -686,11 +686,13 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) * * Returns 0 on success or a negative error code. */ -void rtnl_af_register(struct rtnl_af_ops *ops) +int rtnl_af_register(struct rtnl_af_ops *ops) { rtnl_lock(); list_add_tail_rcu(&ops->list, &rtnl_af_ops); rtnl_unlock(); + + return 0; } EXPORT_SYMBOL_GPL(rtnl_af_register); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ec29ead83e74..0ff9c0abfaa0 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2827,7 +2827,8 @@ void __init devinet_init(void) register_pernet_subsys(&devinet_ops); register_netdevice_notifier(&ip_netdev_notifier); - rtnl_af_register(&inet_af_ops); + if (rtnl_af_register(&inet_af_ops)) + panic("Unable to register inet_af_ops\n"); rtnl_register_many(devinet_rtnl_msg_handlers); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ac8645ad2537..d0a99710d65d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7468,7 +7468,9 @@ int __init addrconf_init(void) addrconf_verify(&init_net); - rtnl_af_register(&inet6_ops); + err = rtnl_af_register(&inet6_ops); + if (err) + goto erraf; err = rtnl_register_many(addrconf_rtnl_msg_handlers); if (err) @@ -7482,6 +7484,7 @@ int __init addrconf_init(void) errout: rtnl_unregister_all(PF_INET6); rtnl_af_unregister(&inet6_ops); +erraf: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: destroy_workqueue(addrconf_wq); diff --git a/net/mctp/device.c b/net/mctp/device.c index 85cc5f31f1e7..3d75b919995d 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -535,14 +535,20 @@ int __init mctp_device_init(void) int err; register_netdevice_notifier(&mctp_dev_nb); - rtnl_af_register(&mctp_af_ops); + + err = rtnl_af_register(&mctp_af_ops); + if (err) + goto err_notifier; err = rtnl_register_many(mctp_device_rtnl_msg_handlers); - if (err) { - rtnl_af_unregister(&mctp_af_ops); - unregister_netdevice_notifier(&mctp_dev_nb); - } + if (err) + goto err_af; + return 0; +err_af: + rtnl_af_unregister(&mctp_af_ops); +err_notifier: + unregister_netdevice_notifier(&mctp_dev_nb); return err; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index a0573847bc55..1f63b32d76d6 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2753,7 +2753,9 @@ static int __init mpls_init(void) dev_add_pack(&mpls_packet_type); - rtnl_af_register(&mpls_af_ops); + err = rtnl_af_register(&mpls_af_ops); + if (err) + goto out_unregister_dev_type; err = rtnl_register_many(mpls_rtnl_msg_handlers); if (err) @@ -2773,6 +2775,7 @@ static int __init mpls_init(void) rtnl_unregister_many(mpls_rtnl_msg_handlers); out_unregister_rtnl_af: rtnl_af_unregister(&mpls_af_ops); +out_unregister_dev_type: dev_remove_pack(&mpls_packet_type); out_unregister_pernet: unregister_pernet_subsys(&mpls_net_ops); From 6ab0f866948323724e95cf14d9e47fd77703c192 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 16 Oct 2024 11:53:57 -0700 Subject: [PATCH 14/14] rtnetlink: Protect struct rtnl_af_ops with SRCU. Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to guarantee that rtnl_af_ops is alive during inflight RTM_SETLINK even when its module is being unloaded. Let's use SRCU to protect ops. rtnl_af_lookup() now iterates rtnl_af_ops under RCU and returns SRCU-protected ops pointer. The caller must call rtnl_af_put() to release the pointer after the use. Also, rtnl_af_unregister() unlinks the ops first and calls synchronize_srcu() to wait for inflight RTM_SETLINK requests to complete. Note that rtnl_af_ops needs to be protected by its dedicated lock when RTNL is removed. Note also that BUG_ON() in do_setlink() is changed to the normal error handling as a different af_ops might be found after validate_linkmsg(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/rtnetlink.h | 5 +++- net/core/rtnetlink.c | 63 ++++++++++++++++++++++++++++++----------- 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 969138ae2f4b..e0d9a8eae6b6 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -172,7 +172,8 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops); /** * struct rtnl_af_ops - rtnetlink address family operations * - * @list: Used internally + * @list: Used internally, protected by RTNL and SRCU + * @srcu: Used internally * @family: Address family * @fill_link_af: Function to fill IFLA_AF_SPEC with address family * specific netlink attributes. @@ -185,6 +186,8 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops); */ struct rtnl_af_ops { struct list_head list; + struct srcu_struct srcu; + int family; int (*fill_link_af)(struct sk_buff *skb, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 70b663aca209..194a81e5f608 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -666,18 +666,31 @@ static size_t rtnl_link_get_size(const struct net_device *dev) static LIST_HEAD(rtnl_af_ops); -static const struct rtnl_af_ops *rtnl_af_lookup(const int family) +static struct rtnl_af_ops *rtnl_af_lookup(const int family, int *srcu_index) { - const struct rtnl_af_ops *ops; + struct rtnl_af_ops *ops; ASSERT_RTNL(); - list_for_each_entry(ops, &rtnl_af_ops, list) { - if (ops->family == family) - return ops; + rcu_read_lock(); + + list_for_each_entry_rcu(ops, &rtnl_af_ops, list) { + if (ops->family == family) { + *srcu_index = srcu_read_lock(&ops->srcu); + goto unlock; + } } - return NULL; + ops = NULL; +unlock: + rcu_read_unlock(); + + return ops; +} + +static void rtnl_af_put(struct rtnl_af_ops *ops, int srcu_index) +{ + srcu_read_unlock(&ops->srcu, srcu_index); } /** @@ -688,6 +701,11 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) */ int rtnl_af_register(struct rtnl_af_ops *ops) { + int err = init_srcu_struct(&ops->srcu); + + if (err) + return err; + rtnl_lock(); list_add_tail_rcu(&ops->list, &rtnl_af_ops); rtnl_unlock(); @@ -707,6 +725,8 @@ void rtnl_af_unregister(struct rtnl_af_ops *ops) rtnl_unlock(); synchronize_rcu(); + synchronize_srcu(&ops->srcu); + cleanup_srcu_struct(&ops->srcu); } EXPORT_SYMBOL_GPL(rtnl_af_unregister); @@ -2579,20 +2599,24 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], int rem, err; nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { - const struct rtnl_af_ops *af_ops; + struct rtnl_af_ops *af_ops; + int af_ops_srcu_index; - af_ops = rtnl_af_lookup(nla_type(af)); + af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index); if (!af_ops) return -EAFNOSUPPORT; if (!af_ops->set_link_af) - return -EOPNOTSUPP; - - if (af_ops->validate_link_af) { + err = -EOPNOTSUPP; + else if (af_ops->validate_link_af) err = af_ops->validate_link_af(dev, af, extack); - if (err < 0) - return err; - } + else + err = 0; + + rtnl_af_put(af_ops, af_ops_srcu_index); + + if (err < 0) + return err; } } @@ -3172,11 +3196,18 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, int rem; nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { - const struct rtnl_af_ops *af_ops; + struct rtnl_af_ops *af_ops; + int af_ops_srcu_index; - BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af)))); + af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index); + if (!af_ops) { + err = -EAFNOSUPPORT; + goto errout; + } err = af_ops->set_link_af(dev, af, extack); + rtnl_af_put(af_ops, af_ops_srcu_index); + if (err < 0) goto errout;