Merge patch series "ns: rework reference counting"

Christian Brauner <brauner@kernel.org> says:

Stop open accesses to the reference counts and cargo-culting the same
code in all namespace. Use a set of dedicated helpers and make the
actual count private.

* patches from https://lore.kernel.org/20250918-work-namespace-ns_ref-v1-0-1b0a98ee041e@kernel.org:
  ns: rename to __ns_ref
  nsfs: port to ns_ref_*() helpers
  net: port to ns_ref_*() helpers
  uts: port to ns_ref_*() helpers
  ipv4: use check_net()
  net: use check_net()
  net-sysfs: use check_net()
  user: port to ns_ref_*() helpers
  time: port to ns_ref_*() helpers
  pid: port to ns_ref_*() helpers
  ipc: port to ns_ref_*() helpers
  cgroup: port to ns_ref_*() helpers
  mnt: port to ns_ref_*() helpers
  ns: add reference count helpers

Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner
2025-09-19 10:16:49 +02:00
25 changed files with 74 additions and 49 deletions

View File

@@ -143,7 +143,7 @@ static inline void detach_mounts(struct dentry *dentry)
static inline void get_mnt_ns(struct mnt_namespace *ns)
{
refcount_inc(&ns->ns.count);
ns_ref_inc(ns);
}
extern seqlock_t mount_lock;

View File

@@ -2110,7 +2110,7 @@ struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool pr
* the mount namespace and it might already be on its
* deathbed.
*/
if (!refcount_inc_not_zero(&mntns->ns.count))
if (!ns_ref_get(mntns))
continue;
return mntns;
@@ -6015,7 +6015,7 @@ struct mnt_namespace init_mnt_ns = {
.ns.inum = PROC_MNT_INIT_INO,
.ns.ops = &mntns_operations,
.user_ns = &init_user_ns,
.ns.count = REFCOUNT_INIT(1),
.ns.__ns_ref = REFCOUNT_INIT(1),
.passive = REFCOUNT_INIT(1),
.mounts = RB_ROOT,
.poll = __WAIT_QUEUE_HEAD_INITIALIZER(init_mnt_ns.poll),
@@ -6084,7 +6084,7 @@ void __init mnt_init(void)
void put_mnt_ns(struct mnt_namespace *ns)
{
if (!refcount_dec_and_test(&ns->ns.count))
if (!ns_ref_put(ns))
return;
namespace_lock();
emptied_ns = ns;

View File

@@ -492,7 +492,7 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
VFS_WARN_ON_ONCE(ns->ops->type != fid->ns_type);
VFS_WARN_ON_ONCE(ns->inum != fid->ns_inum);
if (!refcount_inc_not_zero(&ns->count))
if (!__ns_ref_get(ns))
return NULL;
}

View File

@@ -29,12 +29,12 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
static inline void get_cgroup_ns(struct cgroup_namespace *ns)
{
refcount_inc(&ns->ns.count);
ns_ref_inc(ns);
}
static inline void put_cgroup_ns(struct cgroup_namespace *ns)
{
if (refcount_dec_and_test(&ns->ns.count))
if (ns_ref_put(ns))
free_cgroup_ns(ns);
}

View File

@@ -140,14 +140,14 @@ extern struct ipc_namespace *copy_ipcs(unsigned long flags,
static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
{
if (ns)
refcount_inc(&ns->ns.count);
ns_ref_inc(ns);
return ns;
}
static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
{
if (ns) {
if (refcount_inc_not_zero(&ns->ns.count))
if (ns_ref_get(ns))
return ns;
}

View File

@@ -29,7 +29,7 @@ struct ns_common {
struct dentry *stashed;
const struct proc_ns_operations *ops;
unsigned int inum;
refcount_t count;
refcount_t __ns_ref; /* do not use directly */
union {
struct {
u64 ns_id;
@@ -43,16 +43,24 @@ struct ns_common {
int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, int inum);
void __ns_common_free(struct ns_common *ns);
#define to_ns_common(__ns) \
_Generic((__ns), \
struct cgroup_namespace *: &(__ns)->ns, \
struct ipc_namespace *: &(__ns)->ns, \
struct mnt_namespace *: &(__ns)->ns, \
struct net *: &(__ns)->ns, \
struct pid_namespace *: &(__ns)->ns, \
struct time_namespace *: &(__ns)->ns, \
struct user_namespace *: &(__ns)->ns, \
struct uts_namespace *: &(__ns)->ns)
#define to_ns_common(__ns) \
_Generic((__ns), \
struct cgroup_namespace *: &(__ns)->ns, \
const struct cgroup_namespace *: &(__ns)->ns, \
struct ipc_namespace *: &(__ns)->ns, \
const struct ipc_namespace *: &(__ns)->ns, \
struct mnt_namespace *: &(__ns)->ns, \
const struct mnt_namespace *: &(__ns)->ns, \
struct net *: &(__ns)->ns, \
const struct net *: &(__ns)->ns, \
struct pid_namespace *: &(__ns)->ns, \
const struct pid_namespace *: &(__ns)->ns, \
struct time_namespace *: &(__ns)->ns, \
const struct time_namespace *: &(__ns)->ns, \
struct user_namespace *: &(__ns)->ns, \
const struct user_namespace *: &(__ns)->ns, \
struct uts_namespace *: &(__ns)->ns, \
const struct uts_namespace *: &(__ns)->ns)
#define ns_init_inum(__ns) \
_Generic((__ns), \
@@ -83,4 +91,21 @@ void __ns_common_free(struct ns_common *ns);
#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns)
{
return refcount_dec_and_test(&ns->__ns_ref);
}
static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns)
{
return refcount_inc_not_zero(&ns->__ns_ref);
}
#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->__ns_ref)
#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->__ns_ref)
#define ns_ref_get(__ns) __ns_ref_get(to_ns_common((__ns)))
#define ns_ref_put(__ns) __ns_ref_put(to_ns_common((__ns)))
#define ns_ref_put_and_lock(__ns, __lock) \
refcount_dec_and_lock(&to_ns_common((__ns))->__ns_ref, (__lock))
#endif

View File

@@ -62,7 +62,7 @@ static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
{
if (ns != &init_pid_ns)
refcount_inc(&ns->ns.count);
ns_ref_inc(ns);
return ns;
}

View File

@@ -44,7 +44,7 @@ extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
{
refcount_inc(&ns->ns.count);
ns_ref_inc(ns);
return ns;
}
@@ -57,7 +57,7 @@ struct page *find_timens_vvar_page(struct vm_area_struct *vma);
static inline void put_time_ns(struct time_namespace *ns)
{
if (refcount_dec_and_test(&ns->ns.count))
if (ns_ref_put(ns))
free_time_ns(ns);
}

View File

@@ -176,7 +176,7 @@ static inline struct user_namespace *to_user_ns(struct ns_common *ns)
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
if (ns)
refcount_inc(&ns->ns.count);
ns_ref_inc(ns);
return ns;
}
@@ -186,7 +186,7 @@ extern void __put_user_ns(struct user_namespace *ns);
static inline void put_user_ns(struct user_namespace *ns)
{
if (ns && refcount_dec_and_test(&ns->ns.count))
if (ns && ns_ref_put(ns))
__put_user_ns(ns);
}

View File

@@ -25,7 +25,7 @@ static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
static inline void get_uts_ns(struct uts_namespace *ns)
{
refcount_inc(&ns->ns.count);
ns_ref_inc(ns);
}
extern struct uts_namespace *copy_utsname(unsigned long flags,
@@ -34,7 +34,7 @@ extern void free_uts_ns(struct uts_namespace *ns);
static inline void put_uts_ns(struct uts_namespace *ns)
{
if (refcount_dec_and_test(&ns->ns.count))
if (ns_ref_put(ns))
free_uts_ns(ns);
}

View File

@@ -270,7 +270,7 @@ static inline struct net *to_net_ns(struct ns_common *ns)
/* Try using get_net_track() instead */
static inline struct net *get_net(struct net *net)
{
refcount_inc(&net->ns.count);
ns_ref_inc(net);
return net;
}
@@ -281,7 +281,7 @@ static inline struct net *maybe_get_net(struct net *net)
* exists. If the reference count is zero this
* function fails and returns NULL.
*/
if (!refcount_inc_not_zero(&net->ns.count))
if (!ns_ref_get(net))
net = NULL;
return net;
}
@@ -289,7 +289,7 @@ static inline struct net *maybe_get_net(struct net *net)
/* Try using put_net_track() instead */
static inline void put_net(struct net *net)
{
if (refcount_dec_and_test(&net->ns.count))
if (ns_ref_put(net))
__put_net(net);
}
@@ -301,7 +301,7 @@ int net_eq(const struct net *net1, const struct net *net2)
static inline int check_net(const struct net *net)
{
return refcount_read(&net->ns.count) != 0;
return ns_ref_read(net) != 0;
}
void net_drop_ns(void *);

View File

@@ -8,7 +8,7 @@
#include <linux/utsname.h>
struct uts_namespace init_uts_ns = {
.ns.count = REFCOUNT_INIT(2),
.ns.__ns_ref = REFCOUNT_INIT(2),
.name = {
.sysname = UTS_SYSNAME,
.nodename = UTS_NODENAME,

View File

@@ -27,7 +27,7 @@ DEFINE_SPINLOCK(mq_lock);
* and not CONFIG_IPC_NS.
*/
struct ipc_namespace init_ipc_ns = {
.ns.count = REFCOUNT_INIT(1),
.ns.__ns_ref = REFCOUNT_INIT(1),
.user_ns = &init_user_ns,
.ns.inum = PROC_IPC_INIT_INO,
#ifdef CONFIG_IPC_NS

View File

@@ -199,7 +199,7 @@ static void free_ipc(struct work_struct *unused)
*/
void put_ipc_ns(struct ipc_namespace *ns)
{
if (refcount_dec_and_lock(&ns->ns.count, &mq_lock)) {
if (ns_ref_put_and_lock(ns, &mq_lock)) {
mq_clear_sbinfo(ns);
spin_unlock(&mq_lock);

View File

@@ -219,7 +219,7 @@ static bool have_favordynmods __ro_after_init = IS_ENABLED(CONFIG_CGROUP_FAVOR_D
/* cgroup namespace for init task */
struct cgroup_namespace init_cgroup_ns = {
.ns.count = REFCOUNT_INIT(2),
.ns.__ns_ref = REFCOUNT_INIT(2),
.user_ns = &init_user_ns,
.ns.ops = &cgroupns_operations,
.ns.inum = PROC_CGROUP_INIT_INO,

View File

@@ -5,7 +5,7 @@
int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, int inum)
{
refcount_set(&ns->count, 1);
refcount_set(&ns->__ns_ref, 1);
ns->stashed = NULL;
ns->ops = ops;
ns->ns_id = 0;

View File

@@ -71,7 +71,7 @@ static int pid_max_max = PID_MAX_LIMIT;
* the scheme scales to up to 4 million PIDs, runtime.
*/
struct pid_namespace init_pid_ns = {
.ns.count = REFCOUNT_INIT(2),
.ns.__ns_ref = REFCOUNT_INIT(2),
.idr = IDR_INIT(init_pid_ns.idr),
.pid_allocated = PIDNS_ADDING,
.level = 0,

View File

@@ -169,7 +169,7 @@ static void destroy_pid_namespace_work(struct work_struct *work)
parent = ns->parent;
destroy_pid_namespace(ns);
ns = parent;
} while (ns != &init_pid_ns && refcount_dec_and_test(&ns->ns.count));
} while (ns != &init_pid_ns && ns_ref_put(ns));
}
struct pid_namespace *copy_pid_ns(unsigned long flags,
@@ -184,7 +184,7 @@ struct pid_namespace *copy_pid_ns(unsigned long flags,
void put_pid_ns(struct pid_namespace *ns)
{
if (ns && ns != &init_pid_ns && refcount_dec_and_test(&ns->ns.count))
if (ns && ns != &init_pid_ns && ns_ref_put(ns))
schedule_work(&ns->work);
}
EXPORT_SYMBOL_GPL(put_pid_ns);

View File

@@ -480,7 +480,7 @@ const struct proc_ns_operations timens_for_children_operations = {
};
struct time_namespace init_time_ns = {
.ns.count = REFCOUNT_INIT(3),
.ns.__ns_ref = REFCOUNT_INIT(3),
.user_ns = &init_user_ns,
.ns.inum = PROC_TIME_INIT_INO,
.ns.ops = &timens_operations,

View File

@@ -65,7 +65,7 @@ struct user_namespace init_user_ns = {
.nr_extents = 1,
},
},
.ns.count = REFCOUNT_INIT(3),
.ns.__ns_ref = REFCOUNT_INIT(3),
.owner = GLOBAL_ROOT_UID,
.group = GLOBAL_ROOT_GID,
.ns.inum = PROC_USER_INIT_INO,

View File

@@ -225,7 +225,7 @@ static void free_user_ns(struct work_struct *work)
kfree_rcu(ns, ns.ns_rcu);
dec_user_namespaces(ucounts);
ns = parent;
} while (refcount_dec_and_test(&parent->ns.count));
} while (ns_ref_put(parent));
}
void __put_user_ns(struct user_namespace *ns)

View File

@@ -1328,7 +1328,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
struct netdev_rx_queue *queue = &dev->_rx[i];
struct kobject *kobj = &queue->kobj;
if (!refcount_read(&dev_net(dev)->ns.count))
if (!check_net(dev_net(dev)))
kobj->uevent_suppress = 1;
if (dev->sysfs_rx_queue_group)
sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
@@ -2061,7 +2061,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
while (--i >= new_num) {
struct netdev_queue *queue = dev->_tx + i;
if (!refcount_read(&dev_net(dev)->ns.count))
if (!check_net(dev_net(dev)))
queue->kobj.uevent_suppress = 1;
if (netdev_uses_bql(dev))
@@ -2315,7 +2315,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
{
struct device *dev = &ndev->dev;
if (!refcount_read(&dev_net(ndev)->ns.count))
if (!check_net(dev_net(ndev)))
dev_set_uevent_suppress(dev, 1);
kobject_get(&dev->kobj);

View File

@@ -315,7 +315,7 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
{
int id;
if (refcount_read(&net->ns.count) == 0)
if (!check_net(net))
return NETNSA_NSID_NOT_ASSIGNED;
spin_lock(&net->nsid_lock);

View File

@@ -329,13 +329,13 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo)
TCPF_NEW_SYN_RECV))
continue;
if (refcount_read(&sock_net(sk)->ns.count))
if (check_net(sock_net(sk)))
continue;
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
continue;
if (refcount_read(&sock_net(sk)->ns.count)) {
if (check_net(sock_net(sk))) {
sock_gen_put(sk);
goto restart;
}

View File

@@ -912,7 +912,7 @@ static void tcp_metrics_flush_all(struct net *net)
spin_lock_bh(&tcp_metrics_lock);
for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
match = net ? net_eq(tm_net(tm), net) :
!refcount_read(&tm_net(tm)->ns.count);
!check_net(tm_net(tm));
if (match) {
rcu_assign_pointer(*pp, tm->tcpm_next);
kfree_rcu(tm, rcu_head);