Merge tag 'vfs-6.17-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:

 - Fix two memory leaks in pidfs

 - Prevent changing the idmapping of an already idmapped mount without
   OPEN_TREE_CLONE through open_tree_attr()

 - Don't fail listing extended attributes in kernfs when no extended
   attributes are set

 - Fix the return value in coredump_parse()

 - Fix the error handling for unbuffered writes in netfs

 - Fix broken data integrity guarantees for O_SYNC writes via iomap

 - Fix UAF in __mark_inode_dirty()

 - Keep inode->i_blkbits constant in fuse

 - Fix coredump selftests

 - Fix get_unused_fd_flags() usage in do_handle_open()

 - Rename EXPORT_SYMBOL_GPL_FOR_MODULES to EXPORT_SYMBOL_FOR_MODULES

 - Fix use-after-free in bh_read()

 - Fix incorrect lflags value in the move_mount() syscall

* tag 'vfs-6.17-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  signal: Fix memory leak for PIDFD_SELF* sentinels
  kernfs: don't fail listing extended attributes
  coredump: Fix return value in coredump_parse()
  fs/buffer: fix use-after-free when call bh_read() helper
  pidfs: Fix memory leak in pidfd_info()
  netfs: Fix unbuffered write error handling
  fhandle: do_handle_open() should get FD with user flags
  module: Rename EXPORT_SYMBOL_GPL_FOR_MODULES to EXPORT_SYMBOL_FOR_MODULES
  fs: fix incorrect lflags value in the move_mount syscall
  selftests/coredump: Remove the read() that fails the test
  fuse: keep inode->i_blkbits constant
  iomap: Fix broken data integrity guarantees for O_SYNC writes
  selftests/mount_setattr: add smoke tests for open_tree_attr(2) bug
  open_tree_attr: do not allow id-mapping changes without OPEN_TREE_CLONE
  fs: writeback: fix use-after-free in __mark_inode_dirty()
This commit is contained in:
Linus Torvalds
2025-08-19 09:54:47 -07:00
21 changed files with 138 additions and 68 deletions

View File

@@ -76,20 +76,21 @@ unit as preprocessor statement. The above example would then read::
within the corresponding compilation unit before the #include for
<linux/export.h>. Typically it's placed before the first #include statement.
Using the EXPORT_SYMBOL_GPL_FOR_MODULES() macro
-----------------------------------------------
Using the EXPORT_SYMBOL_FOR_MODULES() macro
-------------------------------------------
Symbols exported using this macro are put into a module namespace. This
namespace cannot be imported.
namespace cannot be imported. These exports are GPL-only as they are only
intended for in-tree modules.
The macro takes a comma separated list of module names, allowing only those
modules to access this symbol. Simple tail-globs are supported.
For example::
EXPORT_SYMBOL_GPL_FOR_MODULES(preempt_notifier_inc, "kvm,kvm-*")
EXPORT_SYMBOL_FOR_MODULES(preempt_notifier_inc, "kvm,kvm-*")
will limit usage of this symbol to modules whoes name matches the given
will limit usage of this symbol to modules whose name matches the given
patterns.
How to use Symbols exported in Namespaces

View File

@@ -147,7 +147,7 @@ void rsa_enable(struct uart_8250_port *up)
if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16)
serial_out(up, UART_RSA_FRR, 0);
}
EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_enable, "8250_base");
EXPORT_SYMBOL_FOR_MODULES(rsa_enable, "8250_base");
/*
* Attempts to turn off the RSA FIFO and resets the RSA board back to 115kbps compat mode. It is
@@ -179,7 +179,7 @@ void rsa_disable(struct uart_8250_port *up)
up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16;
uart_port_unlock_irq(&up->port);
}
EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_disable, "8250_base");
EXPORT_SYMBOL_FOR_MODULES(rsa_disable, "8250_base");
void rsa_autoconfig(struct uart_8250_port *up)
{
@@ -192,7 +192,7 @@ void rsa_autoconfig(struct uart_8250_port *up)
if (__rsa_enable(up))
up->port.type = PORT_RSA;
}
EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_autoconfig, "8250_base");
EXPORT_SYMBOL_FOR_MODULES(rsa_autoconfig, "8250_base");
void rsa_reset(struct uart_8250_port *up)
{
@@ -201,7 +201,7 @@ void rsa_reset(struct uart_8250_port *up)
serial_out(up, UART_RSA_FRR, 0);
}
EXPORT_SYMBOL_GPL_FOR_MODULES(rsa_reset, "8250_base");
EXPORT_SYMBOL_FOR_MODULES(rsa_reset, "8250_base");
#ifdef CONFIG_SERIAL_8250_DEPRECATED_OPTIONS
#ifndef MODULE

View File

@@ -129,7 +129,7 @@ struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *n
}
return inode;
}
EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm");
EXPORT_SYMBOL_FOR_MODULES(anon_inode_make_secure_inode, "kvm");
static struct file *__anon_inode_getfile(const char *name,
const struct file_operations *fops,

View File

@@ -157,8 +157,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
*/
void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
{
__end_buffer_read_notouch(bh, uptodate);
put_bh(bh);
__end_buffer_read_notouch(bh, uptodate);
}
EXPORT_SYMBOL(end_buffer_read_sync);

View File

@@ -345,7 +345,7 @@ static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm,
was_space = false;
err = cn_printf(cn, "%c", '\0');
if (err)
return err;
return false;
(*argv)[(*argc)++] = cn->used;
}
}

View File

@@ -402,7 +402,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
if (retval)
return retval;
CLASS(get_unused_fd, fd)(O_CLOEXEC);
CLASS(get_unused_fd, fd)(open_flag);
if (fd < 0)
return fd;

View File

@@ -2608,10 +2608,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
wakeup_bdi = inode_io_list_move_locked(inode, wb,
dirty_list);
spin_unlock(&wb->list_lock);
spin_unlock(&inode->i_lock);
trace_writeback_dirty_inode_enqueue(inode);
/*
* If this is the first dirty inode for this bdi,
* we have to wake-up the corresponding bdi thread
@@ -2621,6 +2617,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
if (wakeup_bdi &&
(wb->bdi->capabilities & BDI_CAP_WRITEBACK))
wb_wakeup_delayed(wb);
spin_unlock(&wb->list_lock);
spin_unlock(&inode->i_lock);
trace_writeback_dirty_inode_enqueue(inode);
return;
}
}

View File

@@ -289,11 +289,6 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
}
}
if (attr->blksize != 0)
inode->i_blkbits = ilog2(attr->blksize);
else
inode->i_blkbits = inode->i_sb->s_blocksize_bits;
/*
* Don't set the sticky bit in i_mode, unless we want the VFS
* to check permissions. This prevents failures due to the

View File

@@ -363,14 +363,14 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
if (iomap->flags & IOMAP_F_SHARED)
dio->flags |= IOMAP_DIO_COW;
if (iomap->flags & IOMAP_F_NEW) {
if (iomap->flags & IOMAP_F_NEW)
need_zeroout = true;
} else if (iomap->type == IOMAP_MAPPED) {
if (iomap_dio_can_use_fua(iomap, dio))
bio_opf |= REQ_FUA;
else
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
}
else if (iomap->type == IOMAP_MAPPED &&
iomap_dio_can_use_fua(iomap, dio))
bio_opf |= REQ_FUA;
if (!(bio_opf & REQ_FUA))
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
/*
* We can only do deferred completion for pure overwrites that

View File

@@ -142,9 +142,9 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
struct kernfs_node *kn = kernfs_dentry_node(dentry);
struct kernfs_iattrs *attrs;
attrs = kernfs_iattrs_noalloc(kn);
attrs = kernfs_iattrs(kn);
if (!attrs)
return -ENODATA;
return -ENOMEM;
return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size);
}

View File

@@ -4551,20 +4551,10 @@ SYSCALL_DEFINE5(move_mount,
if (flags & MOVE_MOUNT_SET_GROUP) mflags |= MNT_TREE_PROPAGATION;
if (flags & MOVE_MOUNT_BENEATH) mflags |= MNT_TREE_BENEATH;
lflags = 0;
if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
uflags = 0;
if (flags & MOVE_MOUNT_F_EMPTY_PATH) uflags = AT_EMPTY_PATH;
from_name = getname_maybe_null(from_pathname, uflags);
if (IS_ERR(from_name))
return PTR_ERR(from_name);
if (flags & MOVE_MOUNT_T_EMPTY_PATH)
uflags = AT_EMPTY_PATH;
lflags = 0;
if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
uflags = 0;
if (flags & MOVE_MOUNT_T_EMPTY_PATH) uflags = AT_EMPTY_PATH;
to_name = getname_maybe_null(to_pathname, uflags);
if (IS_ERR(to_name))
return PTR_ERR(to_name);
@@ -4577,11 +4567,24 @@ SYSCALL_DEFINE5(move_mount,
to_path = fd_file(f_to)->f_path;
path_get(&to_path);
} else {
lflags = 0;
if (flags & MOVE_MOUNT_T_SYMLINKS)
lflags |= LOOKUP_FOLLOW;
if (flags & MOVE_MOUNT_T_AUTOMOUNTS)
lflags |= LOOKUP_AUTOMOUNT;
ret = filename_lookup(to_dfd, to_name, lflags, &to_path, NULL);
if (ret)
return ret;
}
uflags = 0;
if (flags & MOVE_MOUNT_F_EMPTY_PATH)
uflags = AT_EMPTY_PATH;
from_name = getname_maybe_null(from_pathname, uflags);
if (IS_ERR(from_name))
return PTR_ERR(from_name);
if (!from_name && from_dfd >= 0) {
CLASS(fd_raw, f_from)(from_dfd);
if (fd_empty(f_from))
@@ -4590,6 +4593,11 @@ SYSCALL_DEFINE5(move_mount,
return vfs_move_mount(&fd_file(f_from)->f_path, &to_path, mflags);
}
lflags = 0;
if (flags & MOVE_MOUNT_F_SYMLINKS)
lflags |= LOOKUP_FOLLOW;
if (flags & MOVE_MOUNT_F_AUTOMOUNTS)
lflags |= LOOKUP_AUTOMOUNT;
ret = filename_lookup(from_dfd, from_name, lflags, &from_path, NULL);
if (ret)
return ret;
@@ -5176,7 +5184,8 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
int ret;
struct mount_kattr kattr = {};
kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE;
if (flags & OPEN_TREE_CLONE)
kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE;
if (flags & AT_RECURSIVE)
kattr.kflags |= MOUNT_KATTR_RECURSE;

View File

@@ -281,8 +281,10 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq)
} else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) {
notes |= MADE_PROGRESS;
} else {
if (!stream->failed)
if (!stream->failed) {
stream->transferred += transferred;
stream->transferred_valid = true;
}
if (front->transferred < front->len)
set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags);
notes |= MADE_PROGRESS;

View File

@@ -254,6 +254,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
if (front->start + front->transferred > stream->collected_to) {
stream->collected_to = front->start + front->transferred;
stream->transferred = stream->collected_to - wreq->start;
stream->transferred_valid = true;
notes |= MADE_PROGRESS;
}
if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
@@ -356,6 +357,7 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
{
struct netfs_inode *ictx = netfs_inode(wreq->inode);
size_t transferred;
bool transferred_valid = false;
int s;
_enter("R=%x", wreq->debug_id);
@@ -376,12 +378,16 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
continue;
if (!list_empty(&stream->subrequests))
return false;
if (stream->transferred < transferred)
if (stream->transferred_valid &&
stream->transferred < transferred) {
transferred = stream->transferred;
transferred_valid = true;
}
}
/* Okay, declare that all I/O is complete. */
wreq->transferred = transferred;
if (transferred_valid)
wreq->transferred = transferred;
trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
if (wreq->io_streams[1].active &&

View File

@@ -118,12 +118,12 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
wreq->io_streams[0].prepare_write = ictx->ops->prepare_write;
wreq->io_streams[0].issue_write = ictx->ops->issue_write;
wreq->io_streams[0].collected_to = start;
wreq->io_streams[0].transferred = LONG_MAX;
wreq->io_streams[0].transferred = 0;
wreq->io_streams[1].stream_nr = 1;
wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE;
wreq->io_streams[1].collected_to = start;
wreq->io_streams[1].transferred = LONG_MAX;
wreq->io_streams[1].transferred = 0;
if (fscache_resources_valid(&wreq->cache_resources)) {
wreq->io_streams[1].avail = true;
wreq->io_streams[1].active = true;

View File

@@ -296,12 +296,12 @@ static __u32 pidfs_coredump_mask(unsigned long mm_flags)
static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
{
struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
struct task_struct *task __free(put_task) = NULL;
struct pid *pid = pidfd_pid(file);
size_t usize = _IOC_SIZE(cmd);
struct pidfd_info kinfo = {};
struct pidfs_exit_info *exit_info;
struct user_namespace *user_ns;
struct task_struct *task;
struct pidfs_attr *attr;
const struct cred *c;
__u64 mask;

View File

@@ -739,6 +739,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
sd.pos = kiocb.ki_pos;
if (ret <= 0)
break;
WARN_ONCE(ret > sd.total_len - left,
"Splice Exceeded! ret=%zd tot=%zu left=%zu\n",
ret, sd.total_len, left);
sd.num_spliced += ret;
sd.total_len -= ret;

View File

@@ -91,6 +91,6 @@
#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", ns)
#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", ns)
#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods)
#define EXPORT_SYMBOL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods)
#endif /* _LINUX_EXPORT_H */

View File

@@ -150,6 +150,7 @@ struct netfs_io_stream {
bool active; /* T if stream is active */
bool need_retry; /* T if this stream needs retrying */
bool failed; /* T if this stream failed */
bool transferred_valid; /* T is ->transferred is valid */
};
/*

View File

@@ -4067,6 +4067,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
{
struct pid *pid;
enum pid_type type;
int ret;
/* Enforce flags be set to 0 until we add an extension. */
if (flags & ~PIDFD_SEND_SIGNAL_FLAGS)
@@ -4108,7 +4109,10 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
}
}
return do_pidfd_send_signal(pid, sig, type, info, flags);
ret = do_pidfd_send_signal(pid, sig, type, info, flags);
put_pid(pid);
return ret;
}
static int

View File

@@ -446,9 +446,6 @@ TEST_F(coredump, socket_detect_userspace_client)
if (info.coredump_mask & PIDFD_COREDUMPED)
goto out;
if (read(fd_coredump, &c, 1) < 1)
goto out;
exit_code = EXIT_SUCCESS;
out:
if (fd_peer_pidfd >= 0)

View File

@@ -107,6 +107,26 @@
#endif
#endif
#ifndef __NR_open_tree_attr
#if defined __alpha__
#define __NR_open_tree_attr 577
#elif defined _MIPS_SIM
#if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
#define __NR_open_tree_attr (467 + 4000)
#endif
#if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
#define __NR_open_tree_attr (467 + 6000)
#endif
#if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
#define __NR_open_tree_attr (467 + 5000)
#endif
#elif defined __ia64__
#define __NR_open_tree_attr (467 + 1024)
#else
#define __NR_open_tree_attr 467
#endif
#endif
#ifndef MOUNT_ATTR_IDMAP
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
@@ -121,6 +141,12 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag
return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
}
static inline int sys_open_tree_attr(int dfd, const char *path, unsigned int flags,
struct mount_attr *attr, size_t size)
{
return syscall(__NR_open_tree_attr, dfd, path, flags, attr, size);
}
static ssize_t write_nointr(int fd, const void *buf, size_t count)
{
ssize_t ret;
@@ -1222,6 +1248,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
attr.userns_fd = get_userns_fd(0, 10000, 10000);
ASSERT_GE(attr.userns_fd, 0);
ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
/*
* Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
* to bypass this mount_setattr() restriction.
*/
ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
ASSERT_EQ(close(attr.userns_fd), 0);
ASSERT_EQ(close(open_tree_fd), 0);
}
@@ -1255,6 +1287,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
ASSERT_GE(attr.userns_fd, 0);
ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
sizeof(attr)), 0);
/*
* Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
* to bypass this mount_setattr() restriction.
*/
ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
ASSERT_EQ(close(attr.userns_fd), 0);
ASSERT_EQ(close(open_tree_fd), 0);
}
@@ -1321,6 +1359,19 @@ TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
ASSERT_EQ(close(open_tree_fd), 0);
}
static bool expected_uid_gid(int dfd, const char *path, int flags,
uid_t expected_uid, gid_t expected_gid)
{
int ret;
struct stat st;
ret = fstatat(dfd, path, &st, flags);
if (ret < 0)
return false;
return st.st_uid == expected_uid && st.st_gid == expected_gid;
}
/**
* Validate that currently changing the idmapping of an idmapped mount fails.
*/
@@ -1331,6 +1382,8 @@ TEST_F(mount_setattr_idmapped, change_idmapping)
.attr_set = MOUNT_ATTR_IDMAP,
};
ASSERT_TRUE(expected_uid_gid(-EBADF, "/mnt/D", 0, 0, 0));
if (!mount_setattr_supported())
SKIP(return, "mount_setattr syscall not supported");
@@ -1348,27 +1401,25 @@ TEST_F(mount_setattr_idmapped, change_idmapping)
AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
ASSERT_EQ(close(attr.userns_fd), 0);
EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 0, 0));
EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000));
/* Change idmapping on a detached mount that is already idmapped. */
attr.userns_fd = get_userns_fd(0, 20000, 10000);
ASSERT_GE(attr.userns_fd, 0);
ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
/*
* Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
* to bypass this mount_setattr() restriction.
*/
EXPECT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 20000, 20000));
EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000));
ASSERT_EQ(close(attr.userns_fd), 0);
ASSERT_EQ(close(open_tree_fd), 0);
}
static bool expected_uid_gid(int dfd, const char *path, int flags,
uid_t expected_uid, gid_t expected_gid)
{
int ret;
struct stat st;
ret = fstatat(dfd, path, &st, flags);
if (ret < 0)
return false;
return st.st_uid == expected_uid && st.st_gid == expected_gid;
}
TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
{
int open_tree_fd = -EBADF;