From b9b410cc1875120a0a2f84fd61c026cc994049e2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 May 2025 15:17:46 +0200 Subject: [PATCH 1/3] fanotify: Drop use of flex array in fanotify_fh We use flex array 'buf' in fanotify_fh to contain the file handle content. However the buffer is not a proper flex array. It has a preconfigured fixed size. Furthermore if fixed size is not big enough, we use external separately allocated buffer. Hence don't pretend buf is a flex array since we have to use special accessors anyway and instead just modify the accessors to do the pointer math without flex array. This fixes warnings that flex array is not the last struct member in fanotify_fid_event or fanotify_error_event. Signed-off-by: Jan Kara Reviewed-by: Amir Goldstein Link: https://patch.msgid.link/20250513131745.2808-2-jack@suse.cz --- fs/notify/fanotify/fanotify.c | 2 +- fs/notify/fanotify/fanotify.h | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 6d386080faf2..7bc5580a91dc 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -415,7 +415,7 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, { int dwords, type = 0; char *ext_buf = NULL; - void *buf = fh->buf; + void *buf = fh + 1; int err; fh->type = FILEID_ROOT; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index b44e70e44be6..b78308975082 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -25,7 +25,7 @@ enum { * stored in either the first or last 2 dwords. */ #define FANOTIFY_INLINE_FH_LEN (3 << 2) -#define FANOTIFY_FH_HDR_LEN offsetof(struct fanotify_fh, buf) +#define FANOTIFY_FH_HDR_LEN sizeof(struct fanotify_fh) /* Fixed size struct for file handle */ struct fanotify_fh { @@ -34,7 +34,6 @@ struct fanotify_fh { #define FANOTIFY_FH_FLAG_EXT_BUF 1 u8 flags; u8 pad; - unsigned char buf[]; } __aligned(4); /* Variable size struct for dir file handle + child file handle + name */ @@ -92,7 +91,7 @@ static inline char **fanotify_fh_ext_buf_ptr(struct fanotify_fh *fh) BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN % 4); BUILD_BUG_ON(__alignof__(char *) - 4 + sizeof(char *) > FANOTIFY_INLINE_FH_LEN); - return (char **)ALIGN((unsigned long)(fh->buf), __alignof__(char *)); + return (char **)ALIGN((unsigned long)(fh + 1), __alignof__(char *)); } static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh) @@ -102,7 +101,7 @@ static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh) static inline void *fanotify_fh_buf(struct fanotify_fh *fh) { - return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh->buf; + return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh + 1; } static inline int fanotify_info_dir_fh_len(struct fanotify_info *info) @@ -278,7 +277,7 @@ static inline void fanotify_init_event(struct fanotify_event *event, #define FANOTIFY_INLINE_FH(name, size) \ struct { \ struct fanotify_fh name; \ - /* Space for object_fh.buf[] - access with fanotify_fh_buf() */ \ + /* Space for filehandle - access with fanotify_fh_buf() */ \ unsigned char _inline_fh_buf[size]; \ } From 90d1238047a6479674db4b35264e9519186af9e8 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 16 May 2025 21:28:02 +0200 Subject: [PATCH 2/3] fanotify: remove redundant permission checks FAN_UNLIMITED_QUEUE and FAN_UNLIMITED_MARK flags are already checked as part of the CAP_SYS_ADMIN check for any FANOTIFY_ADMIN_INIT_FLAGS. Remove the individual CAP_SYS_ADMIN checks for these flags. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250516192803.838659-2-amir73il@gmail.com --- fs/notify/fanotify/fanotify_user.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index f2d840ae4ded..24112b0992be 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1334,6 +1334,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, * A group with FAN_UNLIMITED_MARKS does not contribute to mark count * in the limited groups account. */ + BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_MARKS)); if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) && !inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS)) return ERR_PTR(-ENOSPC); @@ -1637,21 +1638,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_destroy_group; } + BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE)); if (flags & FAN_UNLIMITED_QUEUE) { - fd = -EPERM; - if (!capable(CAP_SYS_ADMIN)) - goto out_destroy_group; group->max_events = UINT_MAX; } else { group->max_events = fanotify_max_queued_events; } - if (flags & FAN_UNLIMITED_MARKS) { - fd = -EPERM; - if (!capable(CAP_SYS_ADMIN)) - goto out_destroy_group; - } - if (flags & FAN_ENABLE_AUDIT) { fd = -EPERM; if (!capable(CAP_AUDIT_WRITE)) From 58f5fbeb367ff6f30a2448b2cad70f70b2de4b06 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 16 May 2025 21:28:03 +0200 Subject: [PATCH 3/3] fanotify: support watching filesystems and mounts inside userns An unprivileged user is allowed to create an fanotify group and add inode marks, but not filesystem, mntns and mount marks. Add limited support for setting up filesystem, mntns and mount marks by an unprivileged user under the following conditions: 1. User has CAP_SYS_ADMIN in the user ns where the group was created 2.a. User has CAP_SYS_ADMIN in the user ns where the sb was created OR (in case setting up a mntns mark) 2.b. User has CAP_SYS_ADMIN in the user ns associated with the mntns Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250516192803.838659-3-amir73il@gmail.com --- fs/notify/fanotify/fanotify.c | 1 + fs/notify/fanotify/fanotify_user.c | 39 +++++++++++++++++++++--------- include/linux/fanotify.h | 5 ++-- include/linux/fsnotify_backend.h | 1 + 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 7bc5580a91dc..3083643b864b 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -1009,6 +1009,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, static void fanotify_free_group_priv(struct fsnotify_group *group) { + put_user_ns(group->user_ns); kfree(group->fanotify_data.merge_hash); if (group->fanotify_data.ucounts) dec_ucount(group->fanotify_data.ucounts, diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 24112b0992be..b4a1626f6b8e 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1499,6 +1499,7 @@ static struct hlist_head *fanotify_alloc_merge_hash(void) /* fanotify syscalls */ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) { + struct user_namespace *user_ns = current_user_ns(); struct fsnotify_group *group; int f_flags, fd; unsigned int fid_mode = flags & FANOTIFY_FID_BITS; @@ -1513,10 +1514,11 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) /* * An unprivileged user can setup an fanotify group with * limited functionality - an unprivileged group is limited to - * notification events with file handles and it cannot use - * unlimited queue/marks. + * notification events with file handles or mount ids and it + * cannot use unlimited queue/marks. */ - if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode) + if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || + !(flags & (FANOTIFY_FID_BITS | FAN_REPORT_MNT))) return -EPERM; /* @@ -1595,8 +1597,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) } /* Enforce groups limits per user in all containing user ns */ - group->fanotify_data.ucounts = inc_ucount(current_user_ns(), - current_euid(), + group->fanotify_data.ucounts = inc_ucount(user_ns, current_euid(), UCOUNT_FANOTIFY_GROUPS); if (!group->fanotify_data.ucounts) { fd = -EMFILE; @@ -1605,6 +1606,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.flags = flags | internal_flags; group->memcg = get_mem_cgroup_from_mm(current->mm); + group->user_ns = get_user_ns(user_ns); group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); if (!group->fanotify_data.merge_hash) { @@ -1804,6 +1806,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, struct fsnotify_group *group; struct path path; struct fan_fsid __fsid, *fsid = NULL; + struct user_namespace *user_ns = NULL; + struct mnt_namespace *mntns; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; @@ -1897,12 +1901,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, } /* - * An unprivileged user is not allowed to setup mount nor filesystem - * marks. This also includes setting up such marks by a group that - * was initialized by an unprivileged user. + * A user is allowed to setup sb/mount/mntns marks only if it is + * capable in the user ns where the group was created. */ - if ((!capable(CAP_SYS_ADMIN) || - FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) && + if (!ns_capable(group->user_ns, CAP_SYS_ADMIN) && mark_type != FAN_MARK_INODE) return -EPERM; @@ -1986,18 +1988,31 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, fsid = &__fsid; } - /* inode held in place by reference to path; group by fget on fd */ + /* + * In addition to being capable in the user ns where group was created, + * the user also needs to be capable in the user ns associated with + * the filesystem or in the user ns associated with the mntns + * (when marking mntns). + */ if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { inode = path.dentry->d_inode; obj = inode; } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + user_ns = path.mnt->mnt_sb->s_user_ns; obj = path.mnt; } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) { + user_ns = path.mnt->mnt_sb->s_user_ns; obj = path.mnt->mnt_sb; } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { - obj = mnt_ns_from_dentry(path.dentry); + mntns = mnt_ns_from_dentry(path.dentry); + user_ns = mntns->user_ns; + obj = mntns; } + ret = -EPERM; + if (user_ns && !ns_capable(user_ns, CAP_SYS_ADMIN)) + goto path_put_and_out; + ret = -EINVAL; if (!obj) goto path_put_and_out; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 3c817dc6292e..879cff5eccd4 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -38,8 +38,7 @@ FAN_REPORT_PIDFD | \ FAN_REPORT_FD_ERROR | \ FAN_UNLIMITED_QUEUE | \ - FAN_UNLIMITED_MARKS | \ - FAN_REPORT_MNT) + FAN_UNLIMITED_MARKS) /* * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN. @@ -48,7 +47,7 @@ * so one of the flags for reporting file handles is required. */ #define FANOTIFY_USER_INIT_FLAGS (FAN_CLASS_NOTIF | \ - FANOTIFY_FID_BITS | \ + FANOTIFY_FID_BITS | FAN_REPORT_MNT | \ FAN_CLOEXEC | FAN_NONBLOCK) #define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 6cd8d1d28b8b..7dd22db06317 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -250,6 +250,7 @@ struct fsnotify_group { * full */ struct mem_cgroup *memcg; /* memcg to charge allocations */ + struct user_namespace *user_ns; /* user ns where group was created */ /* groups can define private fields here or use the void *private */ union {