Merge patch series "mount notification"

Miklos Szeredi <mszeredi@redhat.com> says:

This should be ready for adding to the v6.15 queue.  I don't see the
SELinux discussion converging, so I took the simpler version out of the
two that were suggested.

* patches from https://lore.kernel.org/r/20250129165803.72138-1-mszeredi@redhat.com:
  vfs: add notifications for mount attach and detach
  fanotify: notify on mount attach and detach
  fsnotify: add mount notification infrastructure

Link: https://lore.kernel.org/r/20250129165803.72138-1-mszeredi@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner
2025-01-30 17:06:31 +01:00
14 changed files with 393 additions and 36 deletions

View File

@@ -5,6 +5,8 @@
#include <linux/ns_common.h>
#include <linux/fs_pin.h>
extern struct list_head notify_list;
struct mnt_namespace {
struct ns_common ns;
struct mount * root;
@@ -21,6 +23,10 @@ struct mnt_namespace {
struct rcu_head mnt_ns_rcu;
};
u64 event;
#ifdef CONFIG_FSNOTIFY
__u32 n_fsnotify_mask;
struct fsnotify_mark_connector __rcu *n_fsnotify_marks;
#endif
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
@@ -76,6 +82,8 @@ struct mount {
#ifdef CONFIG_FSNOTIFY
struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
struct list_head to_notify; /* need to queue notification */
struct mnt_namespace *prev_ns; /* previous namespace (NULL if none) */
#endif
int mnt_id; /* mount identifier, reused */
u64 mnt_id_unique; /* mount ID unique until reboot */
@@ -177,3 +185,21 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
{
return container_of(ns, struct mnt_namespace, ns);
}
#ifdef CONFIG_FSNOTIFY
static inline void mnt_notify_add(struct mount *m)
{
/* Optimize the case where there are no watches */
if ((m->mnt_ns && m->mnt_ns->n_fsnotify_marks) ||
(m->prev_ns && m->prev_ns->n_fsnotify_marks))
list_add_tail(&m->to_notify, &notify_list);
else
m->prev_ns = m->mnt_ns;
}
#else
static inline void mnt_notify_add(struct mount *m)
{
}
#endif
struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);

View File

@@ -81,6 +81,9 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
static DEFINE_SEQLOCK(mnt_ns_tree_lock);
#ifdef CONFIG_FSNOTIFY
LIST_HEAD(notify_list); /* protected by namespace_sem */
#endif
static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
@@ -163,6 +166,7 @@ static void mnt_ns_release(struct mnt_namespace *ns)
{
/* keep alive for {list,stat}mount() */
if (refcount_dec_and_test(&ns->passive)) {
fsnotify_mntns_delete(ns);
put_user_ns(ns->user_ns);
kfree(ns);
}
@@ -1176,6 +1180,8 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
ns->mnt_first_node = &mnt->mnt_node;
rb_link_node(&mnt->mnt_node, parent, link);
rb_insert_color(&mnt->mnt_node, &ns->mounts);
mnt_notify_add(mnt);
}
/*
@@ -1723,6 +1729,50 @@ int may_umount(struct vfsmount *mnt)
EXPORT_SYMBOL(may_umount);
#ifdef CONFIG_FSNOTIFY
static void mnt_notify(struct mount *p)
{
if (!p->prev_ns && p->mnt_ns) {
fsnotify_mnt_attach(p->mnt_ns, &p->mnt);
} else if (p->prev_ns && !p->mnt_ns) {
fsnotify_mnt_detach(p->prev_ns, &p->mnt);
} else if (p->prev_ns == p->mnt_ns) {
fsnotify_mnt_move(p->mnt_ns, &p->mnt);
} else {
fsnotify_mnt_detach(p->prev_ns, &p->mnt);
fsnotify_mnt_attach(p->mnt_ns, &p->mnt);
}
p->prev_ns = p->mnt_ns;
}
static void notify_mnt_list(void)
{
struct mount *m, *tmp;
/*
* Notify about mounts that were added/reparented/detached/remain
* connected after unmount.
*/
list_for_each_entry_safe(m, tmp, &notify_list, to_notify) {
mnt_notify(m);
list_del_init(&m->to_notify);
}
}
static bool need_notify_mnt_list(void)
{
return !list_empty(&notify_list);
}
#else
static void notify_mnt_list(void)
{
}
static bool need_notify_mnt_list(void)
{
return false;
}
#endif
static void namespace_unlock(void)
{
struct hlist_head head;
@@ -1733,7 +1783,18 @@ static void namespace_unlock(void)
hlist_move_list(&unmounted, &head);
list_splice_init(&ex_mountpoints, &list);
up_write(&namespace_sem);
if (need_notify_mnt_list()) {
/*
* No point blocking out concurrent readers while notifications
* are sent. This will also allow statmount()/listmount() to run
* concurrently.
*/
downgrade_write(&namespace_sem);
notify_mnt_list();
up_read(&namespace_sem);
} else {
up_write(&namespace_sem);
}
shrink_dentry_list(&list);
@@ -1846,6 +1907,19 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
change_mnt_propagation(p, MS_PRIVATE);
if (disconnect)
hlist_add_head(&p->mnt_umount, &unmounted);
/*
* At this point p->mnt_ns is NULL, notification will be queued
* only if
*
* - p->prev_ns is non-NULL *and*
* - p->prev_ns->n_fsnotify_marks is non-NULL
*
* This will preclude queuing the mount if this is a cleanup
* after a failed copy_tree() or destruction of an anonymous
* namespace, etc.
*/
mnt_notify_add(p);
}
}
@@ -2145,16 +2219,24 @@ struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool pr
}
}
struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry)
{
if (!is_mnt_ns_file(dentry))
return NULL;
return to_mnt_ns(get_proc_ns(dentry->d_inode));
}
static bool mnt_ns_loop(struct dentry *dentry)
{
/* Could bind mounting the mount namespace inode cause a
* mount namespace loop?
*/
struct mnt_namespace *mnt_ns;
if (!is_mnt_ns_file(dentry))
struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry);
if (!mnt_ns)
return false;
mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}
@@ -2547,6 +2629,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
dest_mp = smp;
unhash_mnt(source_mnt);
attach_mnt(source_mnt, top_mnt, dest_mp, beneath);
mnt_notify_add(source_mnt);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
if (source_mnt->mnt_ns) {
@@ -4468,6 +4551,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
list_del_init(&new_mnt->mnt_expire);
put_mountpoint(root_mp);
unlock_mount_hash();
mnt_notify_add(root_mnt);
mnt_notify_add(new_mnt);
chroot_fs_refs(&root, &new);
error = 0;
out4:

View File

@@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old,
case FANOTIFY_EVENT_TYPE_FS_ERROR:
return fanotify_error_event_equal(FANOTIFY_EE(old),
FANOTIFY_EE(new));
case FANOTIFY_EVENT_TYPE_MNT:
return false;
default:
WARN_ON_ONCE(1);
}
@@ -312,7 +314,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
__func__, iter_info->report_mask, event_mask, data, data_type);
if (!fid_mode) {
if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
if (data_type != FSNOTIFY_EVENT_MNT)
return 0;
} else if (!fid_mode) {
/* Do we have path to open a file descriptor? */
if (!path)
return 0;
@@ -557,6 +562,20 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
return &pevent->fae;
}
static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp)
{
struct fanotify_mnt_event *pevent;
pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp);
if (!pevent)
return NULL;
pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT;
pevent->mnt_id = mnt_id;
return &pevent->fae;
}
static struct fanotify_event *fanotify_alloc_perm_event(const void *data,
int data_type,
gfp_t gfp)
@@ -731,6 +750,7 @@ static struct fanotify_event *fanotify_alloc_event(
fid_mode);
struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
const struct path *path = fsnotify_data_path(data, data_type);
u64 mnt_id = fsnotify_data_mnt_id(data, data_type);
struct mem_cgroup *old_memcg;
struct dentry *moved = NULL;
struct inode *child = NULL;
@@ -826,8 +846,12 @@ static struct fanotify_event *fanotify_alloc_event(
moved, &hash, gfp);
} else if (fid_mode) {
event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
} else {
} else if (path) {
event = fanotify_alloc_path_event(path, &hash, gfp);
} else if (mnt_id) {
event = fanotify_alloc_mnt_event(mnt_id, gfp);
} else {
WARN_ON_ONCE(1);
}
if (!event)
@@ -927,7 +951,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22);
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24);
mask = fanotify_group_event_mask(group, iter_info, &match_mask,
mask, data, data_type, dir);
@@ -1028,6 +1052,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group,
mempool_free(fee, &group->fanotify_data.error_events_pool);
}
static void fanotify_free_mnt_event(struct fanotify_event *event)
{
kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event));
}
static void fanotify_free_event(struct fsnotify_group *group,
struct fsnotify_event *fsn_event)
{
@@ -1054,6 +1083,9 @@ static void fanotify_free_event(struct fsnotify_group *group,
case FANOTIFY_EVENT_TYPE_FS_ERROR:
fanotify_free_error_event(group, event);
break;
case FANOTIFY_EVENT_TYPE_MNT:
fanotify_free_mnt_event(event);
break;
default:
WARN_ON_ONCE(1);
}

View File

@@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache;
extern struct kmem_cache *fanotify_fid_event_cachep;
extern struct kmem_cache *fanotify_path_event_cachep;
extern struct kmem_cache *fanotify_perm_event_cachep;
extern struct kmem_cache *fanotify_mnt_event_cachep;
/* Possible states of the permission event */
enum {
@@ -244,6 +245,7 @@ enum fanotify_event_type {
FANOTIFY_EVENT_TYPE_PATH_PERM,
FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
FANOTIFY_EVENT_TYPE_MNT,
__FANOTIFY_EVENT_TYPE_NUM
};
@@ -409,12 +411,23 @@ struct fanotify_path_event {
struct path path;
};
struct fanotify_mnt_event {
struct fanotify_event fae;
u64 mnt_id;
};
static inline struct fanotify_path_event *
FANOTIFY_PE(struct fanotify_event *event)
{
return container_of(event, struct fanotify_path_event, fae);
}
static inline struct fanotify_mnt_event *
FANOTIFY_ME(struct fanotify_event *event)
{
return container_of(event, struct fanotify_mnt_event, fae);
}
/*
* Structure for permission fanotify events. It gets allocated and freed in
* fanotify_handle_event() since we wait there for user response. When the
@@ -466,6 +479,11 @@ static inline bool fanotify_is_error_event(u32 mask)
return mask & FAN_FS_ERROR;
}
static inline bool fanotify_is_mnt_event(u32 mask)
{
return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
}
static inline const struct path *fanotify_event_path(struct fanotify_event *event)
{
if (event->type == FANOTIFY_EVENT_TYPE_PATH)

View File

@@ -113,6 +113,7 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init;
struct kmem_cache *fanotify_fid_event_cachep __ro_after_init;
struct kmem_cache *fanotify_path_event_cachep __ro_after_init;
struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init;
#define FANOTIFY_EVENT_ALIGN 4
#define FANOTIFY_FID_INFO_HDR_LEN \
@@ -123,6 +124,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
(sizeof(struct fanotify_event_info_error))
#define FANOTIFY_RANGE_INFO_LEN \
(sizeof(struct fanotify_event_info_range))
#define FANOTIFY_MNT_INFO_LEN \
(sizeof(struct fanotify_event_info_mnt))
static int fanotify_fid_info_len(int fh_len, int name_len)
{
@@ -178,6 +181,8 @@ static size_t fanotify_event_len(unsigned int info_mode,
fh_len = fanotify_event_object_fh_len(event);
event_len += fanotify_fid_info_len(fh_len, dot_len);
}
if (fanotify_is_mnt_event(event->mask))
event_len += FANOTIFY_MNT_INFO_LEN;
if (info_mode & FAN_REPORT_PIDFD)
event_len += FANOTIFY_PIDFD_INFO_LEN;
@@ -405,6 +410,25 @@ static int process_access_response(struct fsnotify_group *group,
return -ENOENT;
}
static size_t copy_mnt_info_to_user(struct fanotify_event *event,
char __user *buf, int count)
{
struct fanotify_event_info_mnt info = { };
info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT;
info.hdr.len = FANOTIFY_MNT_INFO_LEN;
if (WARN_ON(count < info.hdr.len))
return -EFAULT;
info.mnt_id = FANOTIFY_ME(event)->mnt_id;
if (copy_to_user(buf, &info, sizeof(info)))
return -EFAULT;
return info.hdr.len;
}
static size_t copy_error_info_to_user(struct fanotify_event *event,
char __user *buf, int count)
{
@@ -700,6 +724,15 @@ static int copy_info_records_to_user(struct fanotify_event *event,
total_bytes += ret;
}
if (fanotify_is_mnt_event(event->mask)) {
ret = copy_mnt_info_to_user(event, buf, count);
if (ret < 0)
return ret;
buf += ret;
count -= ret;
total_bytes += ret;
}
return total_bytes;
}
@@ -1508,6 +1541,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
return -EINVAL;
/* Don't allow mixing mnt events with inode events for now */
if (flags & FAN_REPORT_MNT) {
if (class != FAN_CLASS_NOTIF)
return -EINVAL;
if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR))
return -EINVAL;
}
if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
return -EINVAL;
@@ -1767,7 +1808,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
int dfd, const char __user *pathname)
{
struct inode *inode = NULL;
struct vfsmount *mnt = NULL;
struct fsnotify_group *group;
struct path path;
struct fan_fsid __fsid, *fsid = NULL;
@@ -1776,7 +1816,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
unsigned int obj_type, fid_mode;
void *obj;
void *obj = NULL;
u32 umask = 0;
int ret;
@@ -1800,6 +1840,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
case FAN_MARK_FILESYSTEM:
obj_type = FSNOTIFY_OBJ_TYPE_SB;
break;
case FAN_MARK_MNTNS:
obj_type = FSNOTIFY_OBJ_TYPE_MNTNS;
break;
default:
return -EINVAL;
}
@@ -1847,6 +1890,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return -EINVAL;
group = fd_file(f)->private_data;
/* Only report mount events on mnt namespace */
if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
if (mask & ~FANOTIFY_MOUNT_EVENTS)
return -EINVAL;
if (mark_type != FAN_MARK_MNTNS)
return -EINVAL;
} else {
if (mask & FANOTIFY_MOUNT_EVENTS)
return -EINVAL;
if (mark_type == FAN_MARK_MNTNS)
return -EINVAL;
}
/*
* An unprivileged user is not allowed to setup mount nor filesystem
* marks. This also includes setting up such marks by a group that
@@ -1888,7 +1944,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
* point.
*/
fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) &&
if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) &&
(!fid_mode || mark_type == FAN_MARK_MOUNT))
return -EINVAL;
@@ -1938,17 +1994,21 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
/* inode held in place by reference to path; group by fget on fd */
if (mark_type == FAN_MARK_INODE) {
if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) {
inode = path.dentry->d_inode;
obj = inode;
} else {
mnt = path.mnt;
if (mark_type == FAN_MARK_MOUNT)
obj = mnt;
else
obj = mnt->mnt_sb;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
obj = path.mnt;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) {
obj = path.mnt->mnt_sb;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
obj = mnt_ns_from_dentry(path.dentry);
}
ret = -EINVAL;
if (!obj)
goto path_put_and_out;
/*
* If some other task has this inode open for write we should not add
* an ignore mask, unless that ignore mask is supposed to survive
@@ -1956,10 +2016,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
*/
if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) &&
!(flags & FAN_MARK_IGNORED_SURV_MODIFY)) {
ret = mnt ? -EINVAL : -EISDIR;
ret = !inode ? -EINVAL : -EISDIR;
/* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
if (ignore == FAN_MARK_IGNORE &&
(mnt || S_ISDIR(inode->i_mode)))
(!inode || S_ISDIR(inode->i_mode)))
goto path_put_and_out;
ret = 0;
@@ -1968,7 +2028,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
if (mnt || !S_ISDIR(inode->i_mode)) {
if (!inode || !S_ISDIR(inode->i_mode)) {
mask &= ~FAN_EVENT_ON_CHILD;
umask = FAN_EVENT_ON_CHILD;
/*
@@ -2042,7 +2102,7 @@ static int __init fanotify_user_setup(void)
FANOTIFY_DEFAULT_MAX_USER_MARKS);
BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
fanotify_mark_cache = KMEM_CACHE(fanotify_mark,
@@ -2055,6 +2115,7 @@ static int __init fanotify_user_setup(void)
fanotify_perm_event_cachep =
KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
}
fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC);
fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS;
init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =

View File

@@ -121,6 +121,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
sb->s_dev, mflags, mark->mask, mark->ignore_mask);
} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_MNTNS) {
struct mnt_namespace *mnt_ns = fsnotify_conn_mntns(mark->connector);
seq_printf(m, "fanotify mnt_ns:%u mflags:%x mask:%x ignored_mask:%x\n",
mnt_ns->ns.inum, mflags, mark->mask, mark->ignore_mask);
}
}

View File

@@ -28,6 +28,11 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
fsnotify_clear_marks_by_mount(mnt);
}
void __fsnotify_mntns_delete(struct mnt_namespace *mntns)
{
fsnotify_clear_marks_by_mntns(mntns);
}
/**
* fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @sb: superblock being unmounted.
@@ -420,7 +425,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
file_name, cookie, iter_info);
}
static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector *const *connp)
{
struct fsnotify_mark_connector *conn;
struct hlist_node *node = NULL;
@@ -538,14 +543,15 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
{
const struct path *path = fsnotify_data_path(data, data_type);
struct super_block *sb = fsnotify_data_sb(data, data_type);
struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type);
struct fsnotify_sb_info *sbinfo = sb ? fsnotify_sb_info(sb) : NULL;
struct fsnotify_iter_info iter_info = {};
struct mount *mnt = NULL;
struct inode *inode2 = NULL;
struct dentry *moved;
int inode2_type;
int ret = 0;
__u32 test_mask, marks_mask;
__u32 test_mask, marks_mask = 0;
if (path)
mnt = real_mount(path->mnt);
@@ -578,17 +584,20 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
if ((!sbinfo || !sbinfo->sb_marks) &&
(!mnt || !mnt->mnt_fsnotify_marks) &&
(!inode || !inode->i_fsnotify_marks) &&
(!inode2 || !inode2->i_fsnotify_marks))
(!inode2 || !inode2->i_fsnotify_marks) &&
(!mnt_data || !mnt_data->ns->n_fsnotify_marks))
return 0;
marks_mask = READ_ONCE(sb->s_fsnotify_mask);
if (sb)
marks_mask |= READ_ONCE(sb->s_fsnotify_mask);
if (mnt)
marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask);
if (inode)
marks_mask |= READ_ONCE(inode->i_fsnotify_mask);
if (inode2)
marks_mask |= READ_ONCE(inode2->i_fsnotify_mask);
if (mnt_data)
marks_mask |= READ_ONCE(mnt_data->ns->n_fsnotify_mask);
/*
* If this is a modify event we may need to clear some ignore masks.
@@ -618,6 +627,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
iter_info.marks[inode2_type] =
fsnotify_first_mark(&inode2->i_fsnotify_marks);
}
if (mnt_data) {
iter_info.marks[FSNOTIFY_ITER_TYPE_MNTNS] =
fsnotify_first_mark(&mnt_data->ns->n_fsnotify_marks);
}
/*
* We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
@@ -702,11 +715,31 @@ void file_set_fsnotify_mode(struct file *file)
}
#endif
void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt)
{
struct fsnotify_mnt data = {
.ns = ns,
.mnt_id = real_mount(mnt)->mnt_id_unique,
};
if (WARN_ON_ONCE(!ns))
return;
/*
* This is an optimization as well as making sure fsnotify_init() has
* been called.
*/
if (!ns->n_fsnotify_marks)
return;
fsnotify(mask, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0);
}
static __init int fsnotify_init(void)
{
int ret;
BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 24);
BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)

View File

@@ -33,6 +33,12 @@ static inline struct super_block *fsnotify_conn_sb(
return conn->obj;
}
static inline struct mnt_namespace *fsnotify_conn_mntns(
struct fsnotify_mark_connector *conn)
{
return conn->obj;
}
static inline struct super_block *fsnotify_object_sb(void *obj,
enum fsnotify_obj_type obj_type)
{
@@ -89,6 +95,11 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
fsnotify_destroy_marks(fsnotify_sb_marks(sb));
}
static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns)
{
fsnotify_destroy_marks(&mntns->n_fsnotify_marks);
}
/*
* update the dentry->d_flags of all of inode's children to indicate if inode cares
* about events that happen to its children.

View File

@@ -107,6 +107,8 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj,
return &real_mount(obj)->mnt_fsnotify_marks;
case FSNOTIFY_OBJ_TYPE_SB:
return fsnotify_sb_marks(obj);
case FSNOTIFY_OBJ_TYPE_MNTNS:
return &((struct mnt_namespace *)obj)->n_fsnotify_marks;
default:
return NULL;
}
@@ -120,6 +122,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
else if (conn->type == FSNOTIFY_OBJ_TYPE_SB)
return &fsnotify_conn_sb(conn)->s_fsnotify_mask;
else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS)
return &fsnotify_conn_mntns(conn)->n_fsnotify_mask;
return NULL;
}
@@ -346,12 +350,15 @@ static void *fsnotify_detach_connector_from_object(
fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
} else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) {
fsnotify_conn_mntns(conn)->n_fsnotify_mask = 0;
}
rcu_assign_pointer(*connp, NULL);
conn->obj = NULL;
conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
fsnotify_update_sb_watchers(sb, conn);
if (sb)
fsnotify_update_sb_watchers(sb, conn);
return inode;
}
@@ -724,7 +731,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj,
* Attach the sb info before attaching a connector to any object on sb.
* The sb info will remain attached as long as sb lives.
*/
if (!fsnotify_sb_info(sb)) {
if (sb && !fsnotify_sb_info(sb)) {
err = fsnotify_attach_info_to_sb(sb);
if (err)
return err;
@@ -770,7 +777,8 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj,
/* mark should be the last entry. last is the current last entry */
hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
added:
fsnotify_update_sb_watchers(sb, conn);
if (sb)
fsnotify_update_sb_watchers(sb, conn);
/*
* Since connector is attached to object using cmpxchg() we are
* guaranteed that connector initialization is fully visible by anyone

View File

@@ -549,8 +549,10 @@ static void restore_mounts(struct list_head *to_restore)
mp = parent->mnt_mp;
parent = parent->mnt_parent;
}
if (parent != mnt->mnt_parent)
if (parent != mnt->mnt_parent) {
mnt_change_mountpoint(parent, mp, mnt);
mnt_notify_add(mnt);
}
}
}

View File

@@ -25,7 +25,7 @@
#define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET)
#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD)
#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD | FAN_REPORT_MNT)
/*
* fanotify_init() flags that require CAP_SYS_ADMIN.
@@ -38,7 +38,8 @@
FAN_REPORT_PIDFD | \
FAN_REPORT_FD_ERROR | \
FAN_UNLIMITED_QUEUE | \
FAN_UNLIMITED_MARKS)
FAN_UNLIMITED_MARKS | \
FAN_REPORT_MNT)
/*
* fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
@@ -58,7 +59,7 @@
#define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV)
#define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \
FAN_MARK_FILESYSTEM)
FAN_MARK_FILESYSTEM | FAN_MARK_MNTNS)
#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \
FAN_MARK_FLUSH)
@@ -109,10 +110,13 @@
/* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
#define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR)
#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH)
/* Events that user can request to be notified on */
#define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \
FANOTIFY_INODE_EVENTS | \
FANOTIFY_ERROR_EVENTS)
FANOTIFY_ERROR_EVENTS | \
FANOTIFY_MOUNT_EVENTS)
/* Extra flags that may be reported with event or control handling of events */
#define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR)

View File

@@ -299,6 +299,11 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt)
__fsnotify_vfsmount_delete(mnt);
}
static inline void fsnotify_mntns_delete(struct mnt_namespace *mntns)
{
__fsnotify_mntns_delete(mntns);
}
/*
* fsnotify_inoderemove - an inode is going away
*/
@@ -507,4 +512,19 @@ static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode,
NULL, NULL, NULL, 0);
}
static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt)
{
fsnotify_mnt(FS_MNT_ATTACH, ns, mnt);
}
static inline void fsnotify_mnt_detach(struct mnt_namespace *ns, struct vfsmount *mnt)
{
fsnotify_mnt(FS_MNT_DETACH, ns, mnt);
}
static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *mnt)
{
fsnotify_mnt(FS_MNT_MOVE, ns, mnt);
}
#endif /* _LINUX_FS_NOTIFY_H */

View File

@@ -59,6 +59,10 @@
#define FS_PRE_ACCESS 0x00100000 /* Pre-content access hook */
#define FS_MNT_ATTACH 0x01000000 /* Mount was attached */
#define FS_MNT_DETACH 0x02000000 /* Mount was detached */
#define FS_MNT_MOVE (FS_MNT_ATTACH | FS_MNT_DETACH)
/*
* Set on inode mark that cares about things that happen to its children.
* Always set for dnotify and inotify.
@@ -80,6 +84,9 @@
*/
#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME)
/* Mount namespace events */
#define FSNOTIFY_MNT_EVENTS (FS_MNT_ATTACH | FS_MNT_DETACH)
/* Content events can be used to inspect file content */
#define FSNOTIFY_CONTENT_PERM_EVENTS (FS_OPEN_PERM | FS_OPEN_EXEC_PERM | \
FS_ACCESS_PERM)
@@ -108,6 +115,7 @@
/* Events that can be reported to backends */
#define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \
FSNOTIFY_MNT_EVENTS | \
FS_EVENTS_POSS_ON_CHILD | \
FS_DELETE_SELF | FS_MOVE_SELF | \
FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
@@ -298,6 +306,7 @@ enum fsnotify_data_type {
FSNOTIFY_EVENT_PATH,
FSNOTIFY_EVENT_INODE,
FSNOTIFY_EVENT_DENTRY,
FSNOTIFY_EVENT_MNT,
FSNOTIFY_EVENT_ERROR,
};
@@ -318,6 +327,11 @@ static inline const struct path *file_range_path(const struct file_range *range)
return range->path;
}
struct fsnotify_mnt {
const struct mnt_namespace *ns;
u64 mnt_id;
};
static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
{
switch (data_type) {
@@ -383,6 +397,24 @@ static inline struct super_block *fsnotify_data_sb(const void *data,
}
}
static inline const struct fsnotify_mnt *fsnotify_data_mnt(const void *data,
int data_type)
{
switch (data_type) {
case FSNOTIFY_EVENT_MNT:
return data;
default:
return NULL;
}
}
static inline u64 fsnotify_data_mnt_id(const void *data, int data_type)
{
const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type);
return mnt_data ? mnt_data->mnt_id : 0;
}
static inline struct fs_error_report *fsnotify_data_error_report(
const void *data,
int data_type)
@@ -420,6 +452,7 @@ enum fsnotify_iter_type {
FSNOTIFY_ITER_TYPE_SB,
FSNOTIFY_ITER_TYPE_PARENT,
FSNOTIFY_ITER_TYPE_INODE2,
FSNOTIFY_ITER_TYPE_MNTNS,
FSNOTIFY_ITER_TYPE_COUNT
};
@@ -429,6 +462,7 @@ enum fsnotify_obj_type {
FSNOTIFY_OBJ_TYPE_INODE,
FSNOTIFY_OBJ_TYPE_VFSMOUNT,
FSNOTIFY_OBJ_TYPE_SB,
FSNOTIFY_OBJ_TYPE_MNTNS,
FSNOTIFY_OBJ_TYPE_COUNT,
FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT
};
@@ -613,8 +647,10 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data
extern void __fsnotify_inode_delete(struct inode *inode);
extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
extern void fsnotify_sb_delete(struct super_block *sb);
extern void __fsnotify_mntns_delete(struct mnt_namespace *mntns);
extern void fsnotify_sb_free(struct super_block *sb);
extern u32 fsnotify_get_cookie(void);
extern void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt);
static inline __u32 fsnotify_parent_needed_mask(__u32 mask)
{
@@ -928,6 +964,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
static inline void fsnotify_sb_delete(struct super_block *sb)
{}
static inline void __fsnotify_mntns_delete(struct mnt_namespace *mntns)
{}
static inline void fsnotify_sb_free(struct super_block *sb)
{}
@@ -942,6 +981,9 @@ static inline u32 fsnotify_get_cookie(void)
static inline void fsnotify_unmount_inodes(struct super_block *sb)
{}
static inline void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt)
{}
#endif /* CONFIG_FSNOTIFY */
#endif /* __KERNEL __ */

View File

@@ -28,6 +28,8 @@
/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */
#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */
#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
@@ -64,6 +66,7 @@
#define FAN_REPORT_NAME 0x00000800 /* Report events with name */
#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */
#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */
#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
@@ -94,6 +97,7 @@
#define FAN_MARK_INODE 0x00000000
#define FAN_MARK_MOUNT 0x00000010
#define FAN_MARK_FILESYSTEM 0x00000100
#define FAN_MARK_MNTNS 0x00000110
/*
* Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
@@ -147,6 +151,7 @@ struct fanotify_event_metadata {
#define FAN_EVENT_INFO_TYPE_PIDFD 4
#define FAN_EVENT_INFO_TYPE_ERROR 5
#define FAN_EVENT_INFO_TYPE_RANGE 6
#define FAN_EVENT_INFO_TYPE_MNT 7
/* Special info types for FAN_RENAME */
#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10
@@ -200,6 +205,11 @@ struct fanotify_event_info_range {
__u64 count;
};
struct fanotify_event_info_mnt {
struct fanotify_event_info_header hdr;
__u64 mnt_id;
};
/*
* User space may need to record additional information about its decision.
* The extra information type records what kind of information is included.