Merge tag 'vfs-7.1-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:
 "This contains a fixes for the current development cycle. Note that AI
  related review sometimes delays fixes a bit because we find more fixes
  for the fixes. I might try and send smaller but more fixes PRs if this
  trend keeps up.

   - Fix various netfslib bugs

   - Fix an out-of-bounds write when listing idmappings

   - Fix the return values in jfs_mkdir() and orangefs_mkdir()

   - Fix a writeback writeback array overflow in fuse

   - Fix a forced iversion increment on lazytime timestamp updates

   - Reject a negative timeval component in kern_select()

   - Fix error return when vfs_mkdir() fails in the cachefiles code

   - Fix wrong error code returned for pidns ioctls"

* tag 'vfs-7.1-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (31 commits)
  cachefiles: Fix error return when vfs_mkdir() fails
  afs: Fix the locking used by afs_get_link()
  netfs, afs: Fix write skipping in dir/link writepages
  netfs: Fix netfs_read_folio() to wait on writeback
  netfs: Fix folio->private handling in netfs_perform_write()
  netfs: Fix partial invalidation of streaming-write folio
  netfs: Fix potential UAF in netfs_unlock_abandoned_read_pages()
  netfs: Fix leak of request in netfs_write_begin() error handling
  netfs: Fix early put of sink folio in netfs_read_gaps()
  netfs: Fix write streaming disablement if fd open O_RDWR
  netfs: Fix read-gaps to remove netfs_folio from filled folio
  netfs: Fix potential deadlock in write-through mode
  netfs: Fix streaming write being overwritten
  netfs: Defer the emission of trace_netfs_folio()
  netfs: Fix netfs_invalidate_folio() to clear dirty bit if all changes gone
  netfs: Fix overrun check in netfs_extract_user_iter()
  netfs: fix error handling in netfs_extract_user_iter()
  netfs: Fix potential uninitialised var in netfs_extract_user_iter()
  netfs: fix VM_BUG_ON_FOLIO() issue in netfs_write_begin() call
  netfs: Fix zeropoint update where i_size > remote_i_size
  ...
This commit is contained in:
Linus Torvalds
2026-05-18 07:30:31 -07:00
44 changed files with 1172 additions and 438 deletions

View File

@@ -75,17 +75,4 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
int v9fs_open_to_dotl_flags(int flags);
static inline void v9fs_i_size_write(struct inode *inode, loff_t i_size)
{
/*
* 32-bit need the lock, concurrent updates could break the
* sequences and make i_size_read() loop forever.
* 64-bit updates are atomic and can skip the locking.
*/
if (sizeof(i_size) > sizeof(long))
spin_lock(&inode->i_lock);
i_size_write(inode, i_size);
if (sizeof(i_size) > sizeof(long))
spin_unlock(&inode->i_lock);
}
#endif

View File

@@ -1141,11 +1141,13 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
v9inode->netfs.remote_i_size = stat->length;
spin_lock(&inode->i_lock);
netfs_write_remote_i_size(inode, stat->length);
if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
v9fs_i_size_write(inode, stat->length);
i_size_write(inode, stat->length);
/* not real number of blocks, but 512 byte ones ... */
inode->i_blocks = (stat->length + 512 - 1) >> 9;
spin_unlock(&inode->i_lock);
v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
}

View File

@@ -634,10 +634,12 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
v9inode->netfs.remote_i_size = stat->st_size;
spin_lock(&inode->i_lock);
netfs_write_remote_i_size(inode, stat->st_size);
if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
v9fs_i_size_write(inode, stat->st_size);
i_size_write(inode, stat->st_size);
inode->i_blocks = stat->st_blocks;
spin_unlock(&inode->i_lock);
} else {
if (stat->st_result_mask & P9_STATS_ATIME) {
inode_set_atime(inode, stat->st_atime_sec,
@@ -662,13 +664,15 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
}
spin_lock(&inode->i_lock);
if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) &&
stat->st_result_mask & P9_STATS_SIZE) {
v9inode->netfs.remote_i_size = stat->st_size;
v9fs_i_size_write(inode, stat->st_size);
netfs_write_remote_i_size(inode, stat->st_size);
i_size_write(inode, stat->st_size);
}
if (stat->st_result_mask & P9_STATS_BLOCKS)
inode->i_blocks = stat->st_blocks;
spin_unlock(&inode->i_lock);
}
if (stat->st_result_mask & P9_STATS_GEN)
inode->i_generation = stat->st_gen;

View File

@@ -30,6 +30,7 @@ kafs-y := \
server.o \
server_list.o \
super.o \
symlink.o \
validation.o \
vlclient.o \
vl_alias.o \

View File

@@ -44,6 +44,8 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags);
static int afs_dir_writepages(struct address_space *mapping,
struct writeback_control *wbc);
const struct file_operations afs_dir_file_operations = {
.open = afs_dir_open,
@@ -68,7 +70,7 @@ const struct inode_operations afs_dir_inode_operations = {
};
const struct address_space_operations afs_dir_aops = {
.writepages = afs_single_writepages,
.writepages = afs_dir_writepages,
};
const struct dentry_operations afs_fs_dentry_operations = {
@@ -233,22 +235,13 @@ static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file)
struct iov_iter iter;
ssize_t ret;
loff_t i_size;
bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
!test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
i_size = i_size_read(&dvnode->netfs.inode);
if (is_dir) {
if (i_size < AFS_DIR_BLOCK_SIZE)
return afs_bad(dvnode, afs_file_error_dir_small);
if (i_size > AFS_DIR_BLOCK_SIZE * 1024) {
trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
return -EFBIG;
}
} else {
if (i_size > AFSPATHMAX) {
trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
return -EFBIG;
}
if (i_size < AFS_DIR_BLOCK_SIZE)
return afs_bad(dvnode, afs_file_error_dir_small);
if (i_size > AFS_DIR_BLOCK_SIZE * 1024) {
trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
return -EFBIG;
}
/* Expand the storage. TODO: Shrink the storage too. */
@@ -277,24 +270,18 @@ static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file)
* buffer.
*/
ret = -ESTALE;
} else if (is_dir) {
} else {
int ret2 = afs_dir_check(dvnode);
if (ret2 < 0)
ret = ret2;
} else if (i_size < folioq_folio_size(dvnode->directory, 0)) {
/* NUL-terminate a symlink. */
char *symlink = kmap_local_folio(folioq_folio(dvnode->directory, 0), 0);
symlink[i_size] = 0;
kunmap_local(symlink);
}
}
return ret;
}
ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file)
static ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file)
{
ssize_t ret;
@@ -1763,13 +1750,20 @@ static int afs_link(struct dentry *from, struct inode *dir,
return ret;
}
static void afs_symlink_put(struct afs_operation *op)
{
kfree(op->create.symlink);
op->create.symlink = NULL;
afs_create_put(op);
}
static const struct afs_operation_ops afs_symlink_operation = {
.issue_afs_rpc = afs_fs_symlink,
.issue_yfs_rpc = yfs_fs_symlink,
.success = afs_create_success,
.aborted = afs_check_for_remote_deletion,
.edit_dir = afs_create_edit_dir,
.put = afs_create_put,
.put = afs_symlink_put,
};
/*
@@ -1779,7 +1773,9 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *content)
{
struct afs_operation *op;
struct afs_symlink *symlink;
struct afs_vnode *dvnode = AFS_FS_I(dir);
size_t clen = strlen(content);
int ret;
_enter("{%llx:%llu},{%pd},%s",
@@ -1791,12 +1787,20 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
goto error;
ret = -EINVAL;
if (strlen(content) >= AFSPATHMAX)
if (clen >= AFSPATHMAX)
goto error;
ret = -ENOMEM;
symlink = kmalloc_flex(struct afs_symlink, content, clen + 1, GFP_KERNEL);
if (!symlink)
goto error;
refcount_set(&symlink->ref, 1);
memcpy(symlink->content, content, clen + 1);
op = afs_alloc_operation(NULL, dvnode->volume);
if (IS_ERR(op)) {
ret = PTR_ERR(op);
kfree(symlink);
goto error;
}
@@ -1808,7 +1812,7 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
op->dentry = dentry;
op->ops = &afs_symlink_operation;
op->create.reason = afs_edit_dir_for_symlink;
op->create.symlink = content;
op->create.symlink = symlink;
op->mtime = current_time(dir);
ret = afs_do_sync_operation(op);
afs_dir_unuse_cookie(dvnode, ret);
@@ -2192,28 +2196,33 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
}
/*
* Write the file contents to the cache as a single blob.
* Write the directory contents to the cache as a single blob.
*/
int afs_single_writepages(struct address_space *mapping,
struct writeback_control *wbc)
static int afs_dir_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct afs_vnode *dvnode = AFS_FS_I(mapping->host);
struct iov_iter iter;
bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
!test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
int ret = 0;
/* Need to lock to prevent the folio queue and folios from being thrown
* away.
*/
down_read(&dvnode->validate_lock);
if (!down_read_trylock(&dvnode->validate_lock)) {
if (wbc->sync_mode == WB_SYNC_NONE) {
/* The VFS will have undirtied the inode. */
netfs_single_mark_inode_dirty(&dvnode->netfs.inode);
return 0;
}
down_read(&dvnode->validate_lock);
}
if (is_dir ?
test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) :
atomic64_read(&dvnode->cb_expires_at) != AFS_NO_CB_PROMISE) {
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0,
i_size_read(&dvnode->netfs.inode));
ret = netfs_writeback_single(mapping, wbc, &iter);
if (ret == 1)
ret = 0; /* Skipped write due to lock conflict. */
}
up_read(&dvnode->validate_lock);

View File

@@ -427,21 +427,35 @@ static void afs_free_request(struct netfs_io_request *rreq)
afs_put_wb_key(rreq->netfs_priv2);
}
static void afs_update_i_size(struct inode *inode, loff_t new_i_size)
/*
* Set the file size and block count, taking ->cb_lock and ->i_lock to maintain
* coherency and prevent 64-bit tearing on 32-bit arches.
*
* Also, estimate the number of 512 bytes blocks used, rounded up to nearest 1K
* for consistency with other AFS clients.
*/
void afs_set_i_size(struct afs_vnode *vnode, loff_t new_i_size)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
struct inode *inode = &vnode->netfs.inode;
loff_t i_size;
write_seqlock(&vnode->cb_lock);
i_size = i_size_read(&vnode->netfs.inode);
spin_lock(&inode->i_lock);
i_size = i_size_read(inode);
if (new_i_size > i_size) {
i_size_write(&vnode->netfs.inode, new_i_size);
inode_set_bytes(&vnode->netfs.inode, new_i_size);
i_size_write(inode, new_i_size);
inode_set_bytes(inode, round_up(new_i_size, 1024));
}
spin_unlock(&inode->i_lock);
write_sequnlock(&vnode->cb_lock);
fscache_update_cookie(afs_vnode_cache(vnode), NULL, &new_i_size);
}
static void afs_update_i_size(struct inode *inode, loff_t new_i_size)
{
afs_set_i_size(AFS_FS_I(inode), new_i_size);
}
static void afs_netfs_invalidate_cache(struct netfs_io_request *wreq)
{
struct afs_vnode *vnode = AFS_FS_I(wreq->inode);

View File

@@ -886,7 +886,7 @@ void afs_fs_symlink(struct afs_operation *op)
namesz = name->len;
padsz = (4 - (namesz & 3)) & 3;
c_namesz = strlen(op->create.symlink);
c_namesz = strlen(op->create.symlink->content);
c_padsz = (4 - (c_namesz & 3)) & 3;
reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4);
@@ -910,7 +910,7 @@ void afs_fs_symlink(struct afs_operation *op)
bp = (void *) bp + padsz;
}
*bp++ = htonl(c_namesz);
memcpy(bp, op->create.symlink, c_namesz);
memcpy(bp, op->create.symlink->content, c_namesz);
bp = (void *) bp + c_namesz;
if (c_padsz > 0) {
memset(bp, 0, c_padsz);

View File

@@ -25,96 +25,6 @@
#include "internal.h"
#include "afs_fs.h"
void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op)
{
size_t size = strlen(op->create.symlink) + 1;
size_t dsize = 0;
char *p;
if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size,
mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0)
return;
vnode->directory_size = dsize;
p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
memcpy(p, op->create.symlink, size);
kunmap_local(p);
set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
netfs_single_mark_inode_dirty(&vnode->netfs.inode);
}
static void afs_put_link(void *arg)
{
struct folio *folio = virt_to_folio(arg);
kunmap_local(arg);
folio_put(folio);
}
const char *afs_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
struct folio *folio;
char *content;
ssize_t ret;
if (!dentry) {
/* RCU pathwalk. */
if (!test_bit(AFS_VNODE_DIR_READ, &vnode->flags) || !afs_check_validity(vnode))
return ERR_PTR(-ECHILD);
goto good;
}
if (test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
goto fetch;
ret = afs_validate(vnode, NULL);
if (ret < 0)
return ERR_PTR(ret);
if (!test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
goto good;
fetch:
ret = afs_read_single(vnode, NULL);
if (ret < 0)
return ERR_PTR(ret);
set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
good:
folio = folioq_folio(vnode->directory, 0);
folio_get(folio);
content = kmap_local_folio(folio, 0);
set_delayed_call(callback, afs_put_link, content);
return content;
}
int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
DEFINE_DELAYED_CALL(done);
const char *content;
int len;
content = afs_get_link(dentry, d_inode(dentry), &done);
if (IS_ERR(content)) {
do_delayed_call(&done);
return PTR_ERR(content);
}
len = umin(strlen(content), buflen);
if (copy_to_user(buffer, content, len))
len = -EFAULT;
do_delayed_call(&done);
return len;
}
static const struct inode_operations afs_symlink_inode_operations = {
.get_link = afs_get_link,
.readlink = afs_readlink,
};
static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
{
static unsigned long once_only;
@@ -214,7 +124,7 @@ static int afs_inode_init_from_status(struct afs_operation *op,
inode->i_mode = S_IFLNK | status->mode;
inode->i_op = &afs_symlink_inode_operations;
}
inode->i_mapping->a_ops = &afs_dir_aops;
inode->i_mapping->a_ops = &afs_symlink_aops;
inode_nohighmem(inode);
mapping_set_release_always(inode->i_mapping);
break;
@@ -224,7 +134,8 @@ static int afs_inode_init_from_status(struct afs_operation *op,
return afs_protocol_error(NULL, afs_eproto_file_type);
}
afs_set_i_size(vnode, status->size);
i_size_write(inode, status->size);
inode_set_bytes(inode, status->size);
afs_set_netfs_context(vnode);
vnode->invalid_before = status->data_version;
@@ -253,7 +164,8 @@ static void afs_apply_status(struct afs_operation *op,
{
struct afs_file_status *status = &vp->scb.status;
struct afs_vnode *vnode = vp->vnode;
struct inode *inode = &vnode->netfs.inode;
struct netfs_inode *ictx = &vnode->netfs;
struct inode *inode = &ictx->inode;
struct timespec64 t;
umode_t mode;
bool unexpected_jump = false;
@@ -336,6 +248,8 @@ static void afs_apply_status(struct afs_operation *op,
}
if (data_changed) {
unsigned long long zero_point, size = status->size;
inode_set_iversion_raw(inode, status->data_version);
/* Only update the size if the data version jumped. If the
@@ -343,16 +257,25 @@ static void afs_apply_status(struct afs_operation *op,
* idea of what the size should be that's not the same as
* what's on the server.
*/
vnode->netfs.remote_i_size = status->size;
if (change_size || status->size > i_size_read(inode)) {
afs_set_i_size(vnode, status->size);
spin_lock(&inode->i_lock);
if (change_size || size > i_size_read(inode)) {
/* We can read the sizes directly as we hold i_lock. */
zero_point = ictx->_zero_point;
if (unexpected_jump)
vnode->netfs.zero_point = status->size;
zero_point = size;
netfs_write_sizes(inode, size, size, zero_point);
inode_set_bytes(inode, size);
inode_set_ctime_to_ts(inode, t);
inode_set_atime_to_ts(inode, t);
} else {
netfs_write_remote_i_size(inode, size);
}
spin_unlock(&inode->i_lock);
if (op->ops == &afs_fetch_data_operation)
op->fetch.subreq->rreq->i_size = status->size;
op->fetch.subreq->rreq->i_size = size;
}
}
@@ -709,7 +632,7 @@ int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
* it, but we need to give userspace the server's size.
*/
if (S_ISDIR(inode->i_mode))
stat->size = vnode->netfs.remote_i_size;
stat->size = netfs_read_remote_i_size(inode);
} while (read_seqretry(&vnode->cb_lock, seq));
return 0;
@@ -756,12 +679,14 @@ void afs_evict_inode(struct inode *inode)
.range_end = LLONG_MAX,
};
afs_single_writepages(inode->i_mapping, &wbc);
inode->i_mapping->a_ops->writepages(inode->i_mapping, &wbc);
}
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
netfs_free_folioq_buffer(vnode->directory);
if (vnode->symlink)
afs_evict_symlink(vnode);
afs_set_cache_aux(vnode, &aux);
netfs_clear_inode_writeback(inode, &aux);
@@ -889,7 +814,7 @@ int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
*/
if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) &&
attr->ia_size < i_size &&
attr->ia_size > vnode->netfs.remote_i_size) {
attr->ia_size > netfs_read_remote_i_size(inode)) {
truncate_setsize(inode, attr->ia_size);
netfs_resize_file(&vnode->netfs, size, false);
fscache_resize_cookie(afs_vnode_cache(vnode),

View File

@@ -710,6 +710,7 @@ struct afs_vnode {
#define AFS_VNODE_DIR_READ 11 /* Set if we've read a dir's contents */
struct folio_queue *directory; /* Directory contents */
struct afs_symlink __rcu *symlink; /* Symlink content */
struct list_head wb_keys; /* List of keys available for writeback */
struct list_head pending_locks; /* locks waiting to be granted */
struct list_head granted_locks; /* locks granted on this file */
@@ -776,6 +777,15 @@ struct afs_permits {
struct afs_permit permits[] __counted_by(nr_permits); /* List of permits sorted by key pointer */
};
/*
* Copy of symlink content for normal use.
*/
struct afs_symlink {
struct rcu_head rcu;
refcount_t ref;
char content[];
};
/*
* Error prioritisation and accumulation.
*/
@@ -887,7 +897,7 @@ struct afs_operation {
struct {
int reason; /* enum afs_edit_dir_reason */
mode_t mode;
const char *symlink;
struct afs_symlink *symlink;
} create;
struct {
bool need_rehash;
@@ -1098,13 +1108,10 @@ extern const struct inode_operations afs_dir_inode_operations;
extern const struct address_space_operations afs_dir_aops;
extern const struct dentry_operations afs_fs_dentry_operations;
ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file);
ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
__acquires(&dvnode->validate_lock);
extern void afs_d_release(struct dentry *);
extern void afs_check_for_remote_deletion(struct afs_operation *);
int afs_single_writepages(struct address_space *mapping,
struct writeback_control *wbc);
/*
* dir_edit.c
@@ -1157,6 +1164,7 @@ extern int afs_open(struct inode *, struct file *);
extern int afs_release(struct inode *, struct file *);
void afs_fetch_data_async_rx(struct work_struct *work);
void afs_fetch_data_immediate_cancel(struct afs_call *call);
void afs_set_i_size(struct afs_vnode *vnode, loff_t new_i_size);
/*
* flock.c
@@ -1246,10 +1254,6 @@ extern void afs_fs_probe_cleanup(struct afs_net *);
*/
extern const struct afs_operation_ops afs_fetch_status_operation;
void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op);
const char *afs_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback);
int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen);
extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *);
extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *);
extern int afs_ilookup5_test_by_fid(struct inode *, void *);
@@ -1599,6 +1603,21 @@ void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server
extern int __init afs_fs_init(void);
extern void afs_fs_exit(void);
/*
* symlink.c
*/
extern const struct inode_operations afs_symlink_inode_operations;
extern const struct address_space_operations afs_symlink_aops;
void afs_invalidate_symlink(struct afs_vnode *vnode);
void afs_evict_symlink(struct afs_vnode *vnode);
void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op);
const char *afs_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback);
int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen);
int afs_symlink_writepages(struct address_space *mapping,
struct writeback_control *wbc);
/*
* validation.c
*/
@@ -1758,16 +1777,6 @@ static inline void afs_update_dentry_version(struct afs_operation *op,
(void *)(unsigned long)dir_vp->scb.status.data_version;
}
/*
* Set the file size and block count. Estimate the number of 512 bytes blocks
* used, rounded up to nearest 1K for consistency with other AFS clients.
*/
static inline void afs_set_i_size(struct afs_vnode *vnode, u64 size)
{
i_size_write(&vnode->netfs.inode, size);
vnode->netfs.inode.i_blocks = ((size + 1023) >> 10) << 1;
}
/*
* Check for a conflicting operation on a directory that we just unlinked from.
* If someone managed to sneak a link or an unlink in on the file we just

278
fs/afs/symlink.c Normal file
View File

@@ -0,0 +1,278 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/* AFS filesystem symbolic link handling
*
* Copyright (C) 2026 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/iov_iter.h>
#include "internal.h"
static void afs_put_symlink(struct afs_symlink *symlink)
{
if (refcount_dec_and_test(&symlink->ref))
kfree_rcu(symlink, rcu);
}
static void afs_replace_symlink(struct afs_vnode *vnode, struct afs_symlink *symlink)
{
struct afs_symlink *old;
old = rcu_replace_pointer(vnode->symlink, symlink,
lockdep_is_held(&vnode->validate_lock));
if (old)
afs_put_symlink(old);
}
/*
* In the event that a third-party update of a symlink occurs, dispose of the
* copy of the old contents. Called under ->validate_lock.
*/
void afs_invalidate_symlink(struct afs_vnode *vnode)
{
afs_replace_symlink(vnode, NULL);
}
/*
* Dispose of a symlink copy during inode deletion.
*/
void afs_evict_symlink(struct afs_vnode *vnode)
{
struct afs_symlink *old;
old = rcu_replace_pointer(vnode->symlink, NULL, true);
if (old)
afs_put_symlink(old);
}
/*
* Set up a locally created symlink inode for immediate write to the cache.
*/
void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op)
{
struct afs_symlink *symlink = op->create.symlink;
size_t dsize = 0;
size_t size = strlen(symlink->content) + 1;
char *p;
rcu_assign_pointer(vnode->symlink, symlink);
op->create.symlink = NULL;
if (!fscache_cookie_enabled(netfs_i_cookie(&vnode->netfs)))
return;
if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size,
mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0)
return;
vnode->directory_size = dsize;
p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
memcpy(p, symlink->content, size);
kunmap_local(p);
netfs_single_mark_inode_dirty(&vnode->netfs.inode);
}
/*
* Read a symlink in a single download.
*/
static ssize_t afs_do_read_symlink(struct afs_vnode *vnode)
{
struct afs_symlink *symlink;
struct iov_iter iter;
ssize_t ret;
loff_t i_size;
i_size = i_size_read(&vnode->netfs.inode);
if (i_size > PAGE_SIZE - 1) {
trace_afs_file_error(vnode, -EFBIG, afs_file_error_dir_big);
return -EFBIG;
}
if (!vnode->directory) {
size_t cur_size = 0;
ret = netfs_alloc_folioq_buffer(NULL,
&vnode->directory, &cur_size, PAGE_SIZE,
mapping_gfp_mask(vnode->netfs.inode.i_mapping));
vnode->directory_size = PAGE_SIZE - 1;
if (ret < 0)
return ret;
}
iov_iter_folio_queue(&iter, ITER_DEST, vnode->directory, 0, 0, PAGE_SIZE);
/* AFS requires us to perform the read of a symlink as a single unit to
* avoid issues with the content being changed between reads.
*/
ret = netfs_read_single(&vnode->netfs.inode, NULL, &iter);
if (ret >= 0) {
i_size = ret;
if (i_size > PAGE_SIZE - 1) {
trace_afs_file_error(vnode, -EFBIG, afs_file_error_dir_big);
return -EFBIG;
}
vnode->directory_size = i_size;
/* Copy the symlink. */
symlink = kmalloc_flex(struct afs_symlink, content, i_size + 1,
GFP_KERNEL);
if (!symlink)
return -ENOMEM;
refcount_set(&symlink->ref, 1);
symlink->content[i_size] = 0;
const char *s = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
memcpy(symlink->content, s, i_size);
kunmap_local(s);
afs_replace_symlink(vnode, symlink);
}
if (!fscache_cookie_enabled(netfs_i_cookie(&vnode->netfs))) {
netfs_free_folioq_buffer(vnode->directory);
vnode->directory = NULL;
vnode->directory_size = 0;
}
return ret;
}
static ssize_t afs_read_symlink(struct afs_vnode *vnode)
{
ssize_t ret;
fscache_use_cookie(afs_vnode_cache(vnode), false);
ret = afs_do_read_symlink(vnode);
fscache_unuse_cookie(afs_vnode_cache(vnode), NULL, NULL);
return ret;
}
static void afs_put_link(void *arg)
{
afs_put_symlink(arg);
}
const char *afs_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
struct afs_symlink *symlink;
struct afs_vnode *vnode = AFS_FS_I(inode);
ssize_t ret;
if (!dentry) {
/* RCU pathwalk. */
symlink = rcu_dereference(vnode->symlink);
if (!symlink || !afs_check_validity(vnode))
return ERR_PTR(-ECHILD);
set_delayed_call(callback, NULL, NULL);
return symlink->content;
}
if (vnode->symlink) {
ret = afs_validate(vnode, NULL);
if (ret < 0)
return ERR_PTR(ret);
down_read(&vnode->validate_lock);
if (vnode->symlink)
goto good;
up_read(&vnode->validate_lock);
}
if (down_write_killable(&vnode->validate_lock) < 0)
return ERR_PTR(-ERESTARTSYS);
if (!vnode->symlink) {
ret = afs_read_symlink(vnode);
if (ret < 0) {
up_write(&vnode->validate_lock);
return ERR_PTR(ret);
}
}
downgrade_write(&vnode->validate_lock);
good:
symlink = rcu_dereference_protected(vnode->symlink,
lockdep_is_held(&vnode->validate_lock));
refcount_inc(&symlink->ref);
up_read(&vnode->validate_lock);
set_delayed_call(callback, afs_put_link, symlink);
return symlink->content;
}
int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
DEFINE_DELAYED_CALL(done);
const char *content;
int len;
content = afs_get_link(dentry, d_inode(dentry), &done);
if (IS_ERR(content)) {
do_delayed_call(&done);
return PTR_ERR(content);
}
len = umin(strlen(content), buflen);
if (copy_to_user(buffer, content, len))
len = -EFAULT;
do_delayed_call(&done);
return len;
}
/*
* Write the symlink contents to the cache as a single blob. We then throw
* away the page we used to receive it.
*/
int afs_symlink_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct afs_vnode *vnode = AFS_FS_I(mapping->host);
struct iov_iter iter;
int ret = 0;
if (!down_read_trylock(&vnode->validate_lock)) {
if (wbc->sync_mode == WB_SYNC_NONE) {
/* The VFS will have undirtied the inode. */
netfs_single_mark_inode_dirty(&vnode->netfs.inode);
return 0;
}
down_read(&vnode->validate_lock);
}
if (vnode->directory &&
atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE) {
iov_iter_folio_queue(&iter, ITER_SOURCE, vnode->directory, 0, 0,
i_size_read(&vnode->netfs.inode));
ret = netfs_writeback_single(mapping, wbc, &iter);
}
if (ret == 0) {
mutex_lock(&vnode->netfs.wb_lock);
netfs_free_folioq_buffer(vnode->directory);
vnode->directory = NULL;
vnode->directory_size = 0;
mutex_unlock(&vnode->netfs.wb_lock);
} else if (ret == 1) {
ret = 0; /* Skipped write due to lock conflict. */
}
up_read(&vnode->validate_lock);
return ret;
}
const struct inode_operations afs_symlink_inode_operations = {
.get_link = afs_get_link,
.readlink = afs_readlink,
};
const struct address_space_operations afs_symlink_aops = {
.writepages = afs_symlink_writepages,
};

View File

@@ -465,11 +465,17 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
vnode->cb_ro_snapshot = cb_ro_snapshot;
vnode->cb_scrub = cb_scrub;
/* if the vnode's data version number changed then its contents are
* different */
/* If the vnode's data version number changed then its contents are
* different. Note that afs_apply_status() doesn't set ZAP_DATA on
* directories.
*/
zap |= test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
if (zap)
afs_zap_data(vnode);
if (zap) {
if (S_ISREG(vnode->netfs.inode.i_mode))
afs_zap_data(vnode);
else if (S_ISLNK(vnode->netfs.inode.i_mode))
afs_invalidate_symlink(vnode);
}
up_write(&vnode->validate_lock);
_leave(" = 0");
return 0;

View File

@@ -142,7 +142,7 @@ static void afs_issue_write_worker(struct work_struct *work)
afs_begin_vnode_operation(op);
op->store.write_iter = &subreq->io_iter;
op->store.i_size = umax(pos + len, vnode->netfs.remote_i_size);
op->store.i_size = umax(pos + len, netfs_read_remote_i_size(&vnode->netfs.inode));
op->mtime = inode_get_mtime(&vnode->netfs.inode);
afs_wait_for_operation(op);

View File

@@ -960,7 +960,7 @@ void yfs_fs_symlink(struct afs_operation *op)
_enter("");
contents_sz = strlen(op->create.symlink);
contents_sz = strlen(op->create.symlink->content);
call = afs_alloc_flat_call(op->net, &yfs_RXYFSSymlink,
sizeof(__be32) +
sizeof(struct yfs_xdr_RPCFlags) +
@@ -981,7 +981,7 @@ void yfs_fs_symlink(struct afs_operation *op)
bp = xdr_encode_u32(bp, 0); /* RPC flags */
bp = xdr_encode_YFSFid(bp, &dvp->fid);
bp = xdr_encode_name(bp, name);
bp = xdr_encode_string(bp, op->create.symlink, contents_sz);
bp = xdr_encode_string(bp, op->create.symlink->content, contents_sz);
bp = xdr_encode_YFSStoreStatus(bp, &mode, &op->mtime);
yfs_check_req(call, bp);

View File

@@ -130,6 +130,8 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
ret = cachefiles_inject_write_error();
if (ret == 0) {
subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700, NULL);
if (IS_ERR(subdir))
ret = PTR_ERR(subdir);
} else {
end_creating(subdir);
subdir = ERR_PTR(ret);

View File

@@ -2176,7 +2176,10 @@ static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
WARN_ON(!ap->num_folios);
/* Reached max pages */
/* Reached max pages or max folio slots */
if (ap->num_folios >= fc->max_pages)
return true;
if (DIV_ROUND_UP(bytes, PAGE_SIZE) > fc->max_pages)
return true;

View File

@@ -2124,7 +2124,13 @@ static int inode_update_cmtime(struct inode *inode, unsigned int flags)
inode_iversion_need_inc(inode))
return -EAGAIN;
} else {
if (inode_maybe_inc_iversion(inode, !!dirty))
/*
* Don't force iversion increment for pure lazytime
* updates (I_DIRTY_TIME only), let I_VERSION_QUERIED
* dictate whether the increment is needed.
*/
if (inode_maybe_inc_iversion(inode,
dirty != I_DIRTY_TIME))
dirty |= I_DIRTY_SYNC;
}
}

View File

@@ -309,7 +309,7 @@ static struct dentry *jfs_mkdir(struct mnt_idmap *idmap, struct inode *dip,
out1:
jfs_info("jfs_mkdir: rc:%d", rc);
return ERR_PTR(rc);
return rc ? ERR_PTR(rc) : NULL;
}
/*

View File

@@ -375,6 +375,8 @@ int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_
continue;
seq_printf(seq, "%u %u %u", extent->first, lower, extent->count);
if (seq_has_overflowed(seq))
return -EAGAIN;
seq->count++; /* mappings are separated by \0 */
if (seq_has_overflowed(seq))

View File

@@ -156,9 +156,8 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
netfs_cache_read_terminated, subreq);
}
static void netfs_queue_read(struct netfs_io_request *rreq,
struct netfs_io_subrequest *subreq,
bool last_subreq)
void netfs_queue_read(struct netfs_io_request *rreq,
struct netfs_io_subrequest *subreq)
{
struct netfs_io_stream *stream = &rreq->io_streams[0];
@@ -169,7 +168,8 @@ static void netfs_queue_read(struct netfs_io_request *rreq,
* remove entries off of the front.
*/
spin_lock(&rreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
/* Write IN_PROGRESS before pointer to new subreq */
list_add_tail_release(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
if (!stream->active) {
stream->collected_to = subreq->start;
@@ -178,11 +178,6 @@ static void netfs_queue_read(struct netfs_io_request *rreq,
}
}
if (last_subreq) {
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
}
spin_unlock(&rreq->lock);
}
@@ -214,7 +209,6 @@ static void netfs_issue_read(struct netfs_io_request *rreq,
static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
struct readahead_control *ractl)
{
struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned long long start = rreq->start;
ssize_t size = rreq->len;
int ret = 0;
@@ -233,10 +227,13 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
subreq->start = start;
subreq->len = size;
netfs_queue_read(rreq, subreq);
source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
subreq->source = source;
if (source == NETFS_DOWNLOAD_FROM_SERVER) {
unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
unsigned long long zero_point = netfs_read_zero_point(rreq->inode);
unsigned long long zp = umin(zero_point, rreq->i_size);
size_t len = subreq->len;
if (unlikely(rreq->origin == NETFS_READ_SINGLE))
@@ -252,7 +249,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx",
rreq->debug_id, subreq->debug_index,
subreq->len, size,
subreq->start, ictx->zero_point, rreq->i_size);
subreq->start, zero_point, rreq->i_size);
netfs_cancel_read(subreq, ret);
break;
}
subreq->len = len;
@@ -261,12 +259,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
if (ret < 0) {
subreq->error = ret;
/* Not queued - release both refs. */
netfs_put_subrequest(subreq,
netfs_sreq_trace_put_cancel);
netfs_put_subrequest(subreq,
netfs_sreq_trace_put_cancel);
netfs_cancel_read(subreq, ret);
break;
}
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
@@ -289,24 +282,29 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
pr_err("Unexpected read source %u\n", source);
WARN_ON_ONCE(1);
netfs_cancel_read(subreq, ret);
break;
issue:
slice = netfs_prepare_read_iterator(subreq, ractl);
if (slice < 0) {
ret = slice;
subreq->error = ret;
trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
/* Not queued - release both refs. */
netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
netfs_cancel_read(subreq, ret);
break;
}
size -= slice;
start += slice;
size -= slice;
if (size <= 0) {
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
}
netfs_queue_read(rreq, subreq, size <= 0);
netfs_issue_read(rreq, subreq);
if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
netfs_wait_for_paused_read(rreq);
if (test_bit(NETFS_RREQ_FAILED, &rreq->flags))
break;
cond_resched();
} while (size > 0);
@@ -397,6 +395,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
{
struct netfs_io_request *rreq;
struct address_space *mapping = folio->mapping;
struct netfs_group *group = netfs_folio_group(folio);
struct netfs_folio *finfo = netfs_folio_info(folio);
struct netfs_inode *ctx = netfs_inode(mapping->host);
struct folio *sink = NULL;
@@ -458,14 +457,20 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
netfs_read_to_pagecache(rreq, NULL);
if (sink)
folio_put(sink);
ret = netfs_wait_for_read(rreq);
if (ret >= 0) {
if (group)
folio_change_private(folio, group);
else
folio_detach_private(folio);
kfree(finfo);
trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
flush_dcache_folio(folio);
folio_mark_uptodate(folio);
}
if (sink)
folio_put(sink);
folio_unlock(folio);
netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret < 0 ? ret : 0;
@@ -498,10 +503,10 @@ int netfs_read_folio(struct file *file, struct folio *folio)
struct netfs_inode *ctx = netfs_inode(mapping->host);
int ret;
if (folio_test_dirty(folio)) {
trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
folio_wait_writeback(folio);
if (folio_test_dirty(folio))
return netfs_read_gaps(file, folio);
}
_enter("%lx", folio->index);
@@ -667,7 +672,7 @@ int netfs_write_begin(struct netfs_inode *ctx,
ret = PTR_ERR(rreq);
goto error;
}
rreq->no_unlock_folio = folio->index;
rreq->no_unlock_folio = folio;
__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
ret = netfs_begin_cache_read(rreq, ctx);
@@ -684,9 +689,9 @@ int netfs_write_begin(struct netfs_inode *ctx,
netfs_read_to_pagecache(rreq, NULL);
ret = netfs_wait_for_read(rreq);
netfs_put_request(rreq, netfs_rreq_trace_put_return);
if (ret < 0)
goto error;
netfs_put_request(rreq, netfs_rreq_trace_put_return);
have_folio:
ret = folio_wait_private_2_killable(folio);
@@ -733,7 +738,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
goto error;
}
rreq->no_unlock_folio = folio->index;
rreq->no_unlock_folio = folio;
__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
ret = netfs_begin_cache_read(rreq, ctx);
if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)

View File

@@ -12,24 +12,6 @@
#include <linux/slab.h>
#include "internal.h"
static void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
{
if (netfs_group)
folio_attach_private(folio, netfs_get_group(netfs_group));
}
static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
{
void *priv = folio_get_private(folio);
if (unlikely(priv != netfs_group)) {
if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
folio_attach_private(folio, netfs_get_group(netfs_group));
else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
folio_detach_private(folio);
}
}
/*
* Grab a folio for writing and lock it. Attempt to allocate as large a folio
* as possible to hold as much of the remaining length as possible in one go.
@@ -149,6 +131,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
}
do {
enum netfs_folio_trace trace;
struct netfs_folio *finfo;
struct netfs_group *group;
unsigned long long fpos;
@@ -156,6 +139,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
size_t offset; /* Offset into pagecache folio */
size_t part; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
void *priv;
offset = pos & (max_chunk - 1);
part = min(max_chunk - offset, iov_iter_count(iter));
@@ -201,73 +185,99 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
goto error_folio_unlock;
}
/* Decide how we should modify a folio. We might be attempting
* to do write-streaming, in which case we don't want to a
* local RMW cycle if we can avoid it. If we're doing local
* caching or content crypto, we award that priority over
* avoiding RMW. If the file is open readably, then we also
* assume that we may want to read what we wrote.
*/
finfo = netfs_folio_info(folio);
group = netfs_folio_group(folio);
/* If the requested group differs from the group set on the
* page, then we need to flush out the folio if it has a group
* set (ie. is non-NULL). Note that COPY_TO_CACHE is a special
* case, being a netfs annotation rather than an actual group.
*
* The filesystem isn't permitted to mix writes with groups and
* writes without groups as the NULL group is used to indicate
* that no group is set.
*/
if (unlikely(group != netfs_group) &&
group != NETFS_FOLIO_COPY_TO_CACHE)
group != NETFS_FOLIO_COPY_TO_CACHE &&
group) {
WARN_ON_ONCE(!netfs_group);
goto flush_content;
}
/* Decide how we should modify a folio. We might be attempting
* to do write-streaming, as we don't want to a local RMW cycle
* if we can avoid it. If we're doing local caching or content
* crypto, we award that priority over avoiding RMW. If the
* file is open readably, then we let ->read_folio() fill in
* the gaps.
*/
if (folio_test_uptodate(folio)) {
if (mapping_writably_mapped(mapping))
flush_dcache_folio(folio);
copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
if (unlikely(copied == 0))
goto copy_failed;
netfs_set_group(folio, netfs_group);
trace_netfs_folio(folio, netfs_folio_is_uptodate);
goto copied;
trace = netfs_folio_is_uptodate;
goto copied_uptodate;
}
/* If the page is above the zero-point then we assume that the
* server would just return a block of zeros or a short read if
* we try to read it.
*/
if (fpos >= ctx->zero_point) {
if (fpos >= netfs_read_zero_point(inode)) {
folio_zero_segment(folio, 0, offset);
copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
if (unlikely(copied == 0))
goto copy_failed;
folio_zero_segment(folio, offset + copied, flen);
__netfs_set_group(folio, netfs_group);
folio_mark_uptodate(folio);
trace_netfs_folio(folio, netfs_modify_and_clear);
goto copied;
if (finfo)
trace = netfs_modify_and_clear_rm_finfo;
else
trace = netfs_modify_and_clear;
goto mark_uptodate;
}
/* See if we can write a whole folio in one go. */
if (!maybe_trouble && offset == 0 && part >= flen) {
copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
if (unlikely(copied == 0))
if (likely(copied == part)) {
if (finfo)
trace = netfs_whole_folio_modify_filled;
else
trace = netfs_whole_folio_modify;
goto mark_uptodate;
}
if (copied == 0)
goto copy_failed;
if (unlikely(copied < part)) {
if (!finfo || copied <= finfo->dirty_offset) {
maybe_trouble = true;
iov_iter_revert(iter, copied);
copied = 0;
folio_unlock(folio);
goto retry;
}
__netfs_set_group(folio, netfs_group);
folio_mark_uptodate(folio);
trace_netfs_folio(folio, netfs_whole_folio_modify);
/* We overwrote some existing dirty data, so we have to
* accept the partial write.
*/
finfo->dirty_len += finfo->dirty_offset;
if (finfo->dirty_len == flen) {
trace = netfs_whole_folio_modify_filled_efault;
goto mark_uptodate;
}
if (copied > finfo->dirty_len)
finfo->dirty_len = copied;
finfo->dirty_offset = 0;
trace = netfs_whole_folio_modify_efault;
goto copied;
}
/* We don't want to do a streaming write on a file that loses
* caching service temporarily because the backing store got
* culled and we don't really want to get a streaming write on
* a file that's open for reading as ->read_folio() then has to
* be able to flush it.
* culled.
*/
if ((file->f_mode & FMODE_READ) ||
netfs_is_cache_enabled(ctx)) {
if (netfs_is_cache_enabled(ctx)) {
if (finfo) {
netfs_stat(&netfs_n_wh_wstream_conflict);
goto flush_content;
@@ -282,11 +292,11 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
if (unlikely(copied == 0))
goto copy_failed;
netfs_set_group(folio, netfs_group);
trace_netfs_folio(folio, netfs_just_prefetch);
goto copied;
trace = netfs_just_prefetch;
goto copied_uptodate;
}
/* Do a streaming write on a folio that has nothing in it yet. */
if (!finfo) {
ret = -EIO;
if (WARN_ON(folio_get_private(folio)))
@@ -295,10 +305,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
if (unlikely(copied == 0))
goto copy_failed;
if (offset == 0 && copied == flen) {
__netfs_set_group(folio, netfs_group);
folio_mark_uptodate(folio);
trace_netfs_folio(folio, netfs_streaming_filled_page);
goto copied;
trace = netfs_streaming_filled_page;
goto mark_uptodate;
}
finfo = kzalloc_obj(*finfo);
@@ -312,7 +320,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
finfo->dirty_len = copied;
folio_attach_private(folio, (void *)((unsigned long)finfo |
NETFS_FOLIO_INFO));
trace_netfs_folio(folio, netfs_streaming_write);
trace = netfs_streaming_write;
goto copied;
}
@@ -326,16 +334,10 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
goto copy_failed;
finfo->dirty_len += copied;
if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) {
if (finfo->netfs_group)
folio_change_private(folio, finfo->netfs_group);
else
folio_detach_private(folio);
folio_mark_uptodate(folio);
kfree(finfo);
trace_netfs_folio(folio, netfs_streaming_cont_filled_page);
} else {
trace_netfs_folio(folio, netfs_streaming_write_cont);
trace = netfs_streaming_cont_filled_page;
goto mark_uptodate;
}
trace = netfs_streaming_write_cont;
goto copied;
}
@@ -349,7 +351,38 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
goto out;
continue;
/* Mark a folio as being up to data when we've filled it
* completely. If the folio has a group attached, then it must
* be the same group, otherwise we should have flushed it out
* above. We have to get rid of the netfs_folio struct if
* there was one.
*/
mark_uptodate:
folio_mark_uptodate(folio);
copied_uptodate:
priv = folio_get_private(folio);
if (likely(priv == netfs_group)) {
/* Already set correctly; no change required. */
} else if (priv == NETFS_FOLIO_COPY_TO_CACHE) {
if (!netfs_group)
folio_detach_private(folio);
else
folio_change_private(folio, netfs_get_group(netfs_group));
} else if (!priv) {
folio_attach_private(folio, netfs_get_group(netfs_group));
} else {
WARN_ON_ONCE(!finfo);
if (netfs_group)
/* finfo->netfs_group has a ref */
folio_change_private(folio, netfs_group);
else
folio_detach_private(folio);
kfree(finfo);
}
copied:
trace_netfs_folio(folio, trace);
flush_dcache_folio(folio);
/* Update the inode size if we moved the EOF marker */
@@ -510,6 +543,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
struct inode *inode = file_inode(file);
struct netfs_inode *ictx = netfs_inode(inode);
vm_fault_t ret = VM_FAULT_NOPAGE;
void *priv;
int err;
_enter("%lx", folio->index);
@@ -530,7 +564,9 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
}
group = netfs_folio_group(folio);
if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) {
if (group &&
group != netfs_group &&
group != NETFS_FOLIO_COPY_TO_CACHE) {
folio_unlock(folio);
err = filemap_fdatawrite_range(mapping,
folio_pos(folio),
@@ -552,7 +588,19 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
trace_netfs_folio(folio, netfs_folio_trace_mkwrite_plus);
else
trace_netfs_folio(folio, netfs_folio_trace_mkwrite);
netfs_set_group(folio, netfs_group);
priv = folio_get_private(folio);
if (priv != netfs_group) {
if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
folio_detach_private(folio);
else if (netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
folio_change_private(folio, netfs_get_group(netfs_group));
else if (netfs_group && !priv)
folio_attach_private(folio, netfs_get_group(netfs_group));
else
WARN_ON_ONCE(1);
}
file_update_time(file);
set_bit(NETFS_ICTX_MODIFIED_ATTR, &ictx->flags);
if (ictx->ops->post_modify)

View File

@@ -45,12 +45,11 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
* Perform a read to a buffer from the server, slicing up the region to be read
* according to the network rsize.
*/
static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
static void netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
{
struct netfs_io_stream *stream = &rreq->io_streams[0];
unsigned long long start = rreq->start;
ssize_t size = rreq->len;
int ret = 0;
int ret;
do {
struct netfs_io_subrequest *subreq;
@@ -58,7 +57,10 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
subreq = netfs_alloc_subrequest(rreq);
if (!subreq) {
ret = -ENOMEM;
/* Stash the error in the request if there's not
* already an error set.
*/
cmpxchg(&rreq->error, 0, -ENOMEM);
break;
}
@@ -66,25 +68,13 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
subreq->start = start;
subreq->len = size;
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
spin_lock(&rreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
if (!stream->active) {
stream->collected_to = subreq->start;
/* Store list pointers before active flag */
smp_store_release(&stream->active, true);
}
}
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
spin_unlock(&rreq->lock);
netfs_queue_read(rreq, subreq);
netfs_stat(&netfs_n_rh_download);
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
if (ret < 0) {
netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
netfs_cancel_read(subreq, ret);
break;
}
}
@@ -113,8 +103,6 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
netfs_wake_collector(rreq);
}
return ret;
}
/*
@@ -137,21 +125,17 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
// TODO: Use bounce buffer if requested
inode_dio_begin(rreq->inode);
netfs_dispatch_unbuffered_reads(rreq);
ret = netfs_dispatch_unbuffered_reads(rreq);
if (!rreq->submitted) {
netfs_put_request(rreq, netfs_rreq_trace_put_no_submit);
inode_dio_end(rreq->inode);
ret = 0;
goto out;
}
/* The collector will get run, even if we don't manage to submit any
* subreqs, so we shouldn't call inode_dio_end() here.
*/
if (sync)
ret = netfs_wait_for_read(rreq);
else
ret = -EIOCBQUEUED;
out:
_leave(" = %zd", ret);
return ret;
}

View File

@@ -376,8 +376,10 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret < 0)
goto out;
end = iocb->ki_pos + iov_iter_count(from);
if (end > ictx->zero_point)
ictx->zero_point = end;
spin_lock(&inode->i_lock);
if (end > ictx->_zero_point)
netfs_write_zero_point(inode, end);
spin_unlock(&inode->i_lock);
fscache_invalidate(netfs_i_cookie(ictx), NULL, i_size_read(inode),
FSCACHE_INVAL_DIO_WRITE);

View File

@@ -23,6 +23,8 @@
/*
* buffered_read.c
*/
void netfs_queue_read(struct netfs_io_request *rreq,
struct netfs_io_subrequest *subreq);
void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error);
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len);
@@ -108,6 +110,7 @@ static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq,
*/
bool netfs_read_collection(struct netfs_io_request *rreq);
void netfs_read_collection_worker(struct work_struct *work);
void netfs_cancel_read(struct netfs_io_subrequest *subreq, int error);
void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error);
/*

View File

@@ -22,7 +22,7 @@
*
* Extract the page fragments from the given amount of the source iterator and
* build up a second iterator that refers to all of those bits. This allows
* the original iterator to disposed of.
* the original iterator to be disposed of.
*
* @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA be
* allowed on the pages extracted.
@@ -43,7 +43,7 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
unsigned int max_pages;
unsigned int npages = 0;
unsigned int i;
ssize_t ret;
ssize_t ret = 0;
size_t count = orig_len, offset, len;
size_t bv_size, pg_size;
@@ -67,25 +67,28 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
ret = iov_iter_extract_pages(orig, &pages, count,
max_pages - npages, extraction_flags,
&offset);
if (ret < 0) {
pr_err("Couldn't get user pages (rc=%zd)\n", ret);
if (unlikely(ret <= 0)) {
ret = ret ?: -EIO;
break;
}
if (ret > count) {
pr_err("get_pages rc=%zd more than %zu\n", ret, count);
if (WARN(ret > count,
"%s: extract_pages overrun %zd > %zu bytes\n",
__func__, ret, count)) {
ret = -EIO;
break;
}
cur_npages = DIV_ROUND_UP(offset + ret, PAGE_SIZE);
if (WARN(cur_npages > max_pages - npages,
"%s: extract_pages overrun %u > %u pages\n",
__func__, npages + cur_npages, max_pages)) {
ret = -EIO;
break;
}
count -= ret;
ret += offset;
cur_npages = DIV_ROUND_UP(ret, PAGE_SIZE);
if (npages + cur_npages > max_pages) {
pr_err("Out of bvec array capacity (%u vs %u)\n",
npages + cur_npages, max_pages);
break;
}
for (i = 0; i < cur_npages; i++) {
len = ret > PAGE_SIZE ? PAGE_SIZE : ret;
@@ -97,6 +100,18 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
npages += cur_npages;
}
/* Note: Don't try to clean up after EIO. Either we got no pages, so
* nothing to clean up, or we got a buffer overrun, memory corruption
* and can't trust the stuff in the buffer (a WARN was emitted).
*/
if (ret < 0 && (ret == -ENOMEM || npages == 0)) {
for (i = 0; i < npages; i++)
unpin_user_page(bv[i].bv_page);
kvfree(bv);
return ret;
}
iov_iter_bvec(new, orig->data_source, bv, npages, orig_len - count);
return npages;
}

View File

@@ -211,18 +211,25 @@ EXPORT_SYMBOL(netfs_clear_inode_writeback);
void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
{
struct netfs_folio *finfo;
struct netfs_inode *ctx = netfs_inode(folio_inode(folio));
struct inode *inode = folio_inode(folio);
struct netfs_inode *ctx = netfs_inode(inode);
size_t flen = folio_size(folio);
_enter("{%lx},%zx,%zx", folio->index, offset, length);
if (offset == 0 && length == flen) {
unsigned long long i_size = i_size_read(&ctx->inode);
unsigned long long i_size, remote_i_size, zero_point;
unsigned long long fpos = folio_pos(folio), end;
netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point);
end = umin(fpos + flen, i_size);
if (fpos < i_size && end > ctx->zero_point)
ctx->zero_point = end;
if (fpos < i_size && end > zero_point) {
spin_lock(&inode->i_lock);
end = umin(fpos + flen, inode->i_size);
if (fpos < i_size && end > ctx->_zero_point)
netfs_write_zero_point(inode, end);
spin_unlock(&inode->i_lock);
}
}
folio_wait_private_2(folio); /* [DEPRECATED] */
@@ -255,7 +262,8 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
goto erase_completely;
/* Move the start of the data. */
finfo->dirty_len = fend - iend;
finfo->dirty_offset = offset;
finfo->dirty_offset = iend;
trace_netfs_folio(folio, netfs_folio_trace_invalidate_front);
return;
}
@@ -264,12 +272,14 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
*/
if (iend >= fend) {
finfo->dirty_len = offset - fstart;
trace_netfs_folio(folio, netfs_folio_trace_invalidate_tail);
return;
}
/* A partial write was split. The caller has already zeroed
* it, so just absorb the hole.
*/
trace_netfs_folio(folio, netfs_folio_trace_invalidate_middle);
}
return;
@@ -277,8 +287,9 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
netfs_put_group(netfs_folio_group(folio));
folio_detach_private(folio);
folio_clear_uptodate(folio);
folio_cancel_dirty(folio);
kfree(finfo);
return;
trace_netfs_folio(folio, netfs_folio_trace_invalidate_all);
}
EXPORT_SYMBOL(netfs_invalidate_folio);
@@ -292,15 +303,22 @@ EXPORT_SYMBOL(netfs_invalidate_folio);
*/
bool netfs_release_folio(struct folio *folio, gfp_t gfp)
{
struct netfs_inode *ctx = netfs_inode(folio_inode(folio));
unsigned long long end;
struct inode *inode = folio_inode(folio);
struct netfs_inode *ctx = netfs_inode(inode);
unsigned long long i_size, remote_i_size, zero_point, end;
if (folio_test_dirty(folio))
return false;
end = umin(folio_next_pos(folio), i_size_read(&ctx->inode));
if (end > ctx->zero_point)
ctx->zero_point = end;
netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point);
end = folio_next_pos(folio);
if (end > zero_point) {
spin_lock(&inode->i_lock);
end = umin(end, ctx->_remote_i_size);
if (end > ctx->_zero_point)
netfs_write_zero_point(inode, end);
spin_unlock(&inode->i_lock);
}
if (folio_test_private(folio))
return false;
@@ -356,6 +374,7 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
DEFINE_WAIT(myself);
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
smp_rmb(); /* Read ->next before IN_PROGRESS. */
if (!netfs_check_subreq_in_progress(subreq))
continue;

View File

@@ -83,7 +83,7 @@ static void netfs_unlock_read_folio(struct netfs_io_request *rreq,
}
just_unlock:
if (folio->index == rreq->no_unlock_folio &&
if (folio == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
_debug("no unlock");
} else {
@@ -205,8 +205,10 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq)
* in progress. The issuer thread may be adding stuff to the tail
* whilst we're doing this.
*/
front = list_first_entry_or_null(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
front = list_first_entry_or_null_acquire(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
/* Read first subreq pointer before IN_PROGRESS flag. */
while (front) {
size_t transferred;
@@ -575,6 +577,17 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
}
EXPORT_SYMBOL(netfs_read_subreq_terminated);
/*
* Cancel a read subrequest due to preparation failure.
*/
void netfs_cancel_read(struct netfs_io_subrequest *subreq, int error)
{
trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
subreq->error = error;
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
netfs_read_subreq_terminated(subreq);
}
/*
* Handle termination of a read from the cache.
*/

View File

@@ -175,7 +175,9 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
list_for_each_entry_safe_from(subreq, tmp,
&stream->subrequests, rreq_link) {
trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous);
spin_lock(&rreq->lock);
list_del(&subreq->rreq_link);
spin_unlock(&rreq->lock);
netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
if (subreq == to)
break;
@@ -203,8 +205,10 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
spin_lock(&rreq->lock);
list_add(&subreq->rreq_link, &to->rreq_link);
to = list_next_entry(to, rreq_link);
spin_unlock(&rreq->lock);
to = subreq;
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
stream->sreq_max_len = umin(len, rreq->rsize);
@@ -288,8 +292,15 @@ void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq)
struct folio *folio = folioq_folio(p, slot);
if (folio && !folioq_is_marked2(p, slot)) {
trace_netfs_folio(folio, netfs_folio_trace_abandon);
folio_unlock(folio);
if (folio == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO,
&rreq->flags)) {
_debug("no unlock");
} else {
trace_netfs_folio(folio,
netfs_folio_trace_abandon);
folio_unlock(folio);
}
}
}
}

View File

@@ -89,7 +89,6 @@ static void netfs_single_read_cache(struct netfs_io_request *rreq,
*/
static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
{
struct netfs_io_stream *stream = &rreq->io_streams[0];
struct netfs_io_subrequest *subreq;
int ret = 0;
@@ -102,14 +101,7 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
subreq->len = rreq->len;
subreq->io_iter = rreq->buffer.iter;
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
spin_lock(&rreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
/* Store list pointers before active flag */
smp_store_release(&stream->active, true);
spin_unlock(&rreq->lock);
netfs_queue_read(rreq, subreq);
netfs_single_cache_prepare_read(rreq, subreq);
switch (subreq->source) {
@@ -121,10 +113,14 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
goto cancel;
}
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
rreq->netfs_ops->issue_read(subreq);
rreq->submitted += subreq->len;
break;
case NETFS_READ_FROM_CACHE:
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
netfs_single_read_cache(rreq, subreq);
rreq->submitted += subreq->len;
@@ -134,14 +130,15 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
pr_warn("Unexpected single-read source %u\n", subreq->source);
WARN_ON_ONCE(true);
ret = -EIO;
break;
goto cancel;
}
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
return ret;
cancel:
netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
netfs_cancel_read(subreq, ret);
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
netfs_wake_collector(rreq);
return ret;
}

View File

@@ -57,7 +57,8 @@ static void netfs_dump_request(const struct netfs_io_request *rreq)
int netfs_folio_written_back(struct folio *folio)
{
enum netfs_folio_trace why = netfs_folio_trace_clear;
struct netfs_inode *ictx = netfs_inode(folio->mapping->host);
struct inode *inode = folio_inode(folio);
struct netfs_inode *ictx = netfs_inode(inode);
struct netfs_folio *finfo;
struct netfs_group *group = NULL;
int gcount = 0;
@@ -69,8 +70,10 @@ int netfs_folio_written_back(struct folio *folio)
unsigned long long fend;
fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len;
if (fend > ictx->zero_point)
ictx->zero_point = fend;
spin_lock(&ictx->inode.i_lock);
if (fend > ictx->_zero_point)
netfs_write_zero_point(inode, fend);
spin_unlock(&ictx->inode.i_lock);
folio_detach_private(folio);
group = finfo->netfs_group;
@@ -228,8 +231,10 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
if (!smp_load_acquire(&stream->active))
continue;
front = list_first_entry_or_null(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
front = list_first_entry_or_null_acquire(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
/* Read first subreq pointer before IN_PROGRESS flag. */
while (front) {
trace_netfs_collect_sreq(wreq, front);
//_debug("sreq [%x] %llx %zx/%zx",

View File

@@ -204,7 +204,8 @@ void netfs_prepare_write(struct netfs_io_request *wreq,
* remove entries off of the front.
*/
spin_lock(&wreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
/* Write IN_PROGRESS before pointer to new subreq */
list_add_tail_release(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
if (!stream->active) {
stream->collected_to = subreq->start;
@@ -413,12 +414,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
if (streamw)
netfs_issue_write(wreq, cache);
/* Flip the page to the writeback state and unlock. If we're called
* from write-through, then the page has already been put into the wb
* state.
*/
if (wreq->origin == NETFS_WRITEBACK)
folio_start_writeback(folio);
folio_start_writeback(folio);
folio_unlock(folio);
if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
@@ -646,29 +642,41 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
struct folio *folio, size_t copied, bool to_page_end,
struct folio **writethrough_cache)
{
int ret;
_enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end);
if (!*writethrough_cache) {
if (folio_test_dirty(folio))
/* Sigh. mmap. */
folio_clear_dirty_for_io(folio);
/* The folio is locked. */
if (*writethrough_cache != folio) {
if (*writethrough_cache) {
/* Did the folio get moved? */
folio_put(*writethrough_cache);
*writethrough_cache = NULL;
}
/* We can make multiple writes to the folio... */
folio_start_writeback(folio);
if (wreq->len == 0)
trace_netfs_folio(folio, netfs_folio_trace_wthru);
else
trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
*writethrough_cache = folio;
folio_get(folio);
}
wreq->len += copied;
if (!to_page_end)
return 0;
if (!to_page_end) {
folio_mark_dirty(folio);
folio_unlock(folio);
return 0;
}
ret = netfs_write_folio(wreq, wbc, folio);
folio_put(*writethrough_cache);
*writethrough_cache = NULL;
return netfs_write_folio(wreq, wbc, folio);
wreq->submitted = wreq->len;
return ret;
}
/*
@@ -682,8 +690,12 @@ ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_c
_enter("R=%x", wreq->debug_id);
if (writethrough_cache)
if (writethrough_cache) {
folio_lock(writethrough_cache);
netfs_write_folio(wreq, wbc, writethrough_cache);
folio_put(writethrough_cache);
wreq->submitted = wreq->len;
}
netfs_end_issue_write(wreq);
@@ -818,6 +830,9 @@ static int netfs_write_folio_single(struct netfs_io_request *wreq,
*
* Write a monolithic, non-pagecache object back to the server and/or
* the cache.
*
* Return: 0 if successful; 1 if skipped due to lock conflict and WB_SYNC_NONE;
* or a negative error code.
*/
int netfs_writeback_single(struct address_space *mapping,
struct writeback_control *wbc,
@@ -834,8 +849,10 @@ int netfs_writeback_single(struct address_space *mapping,
if (!mutex_trylock(&ictx->wb_lock)) {
if (wbc->sync_mode == WB_SYNC_NONE) {
/* The VFS will have undirtied the inode. */
netfs_single_mark_inode_dirty(&ictx->inode);
netfs_stat(&netfs_n_wb_lock_skip);
return 0;
return 1;
}
netfs_stat(&netfs_n_wb_lock_wait);
mutex_lock(&ictx->wb_lock);

View File

@@ -130,7 +130,9 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
list_for_each_entry_safe_from(subreq, tmp,
&stream->subrequests, rreq_link) {
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
spin_lock(&wreq->lock);
list_del(&subreq->rreq_link);
spin_unlock(&wreq->lock);
netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
if (subreq == to)
break;
@@ -153,8 +155,10 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
netfs_sreq_trace_new);
trace_netfs_sreq(subreq, netfs_sreq_trace_split);
spin_lock(&wreq->lock);
list_add(&subreq->rreq_link, &to->rreq_link);
to = list_next_entry(to, rreq_link);
spin_unlock(&wreq->lock);
to = subreq;
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
stream->sreq_max_len = len;

View File

@@ -266,7 +266,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
else
tsk = find_task_by_pid_ns(arg, pid_ns);
if (!tsk)
break;
return ret;
switch (ioctl) {
case NS_GET_PID_FROM_PIDNS:

View File

@@ -362,7 +362,7 @@ static struct dentry *orangefs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
__orangefs_setattr(dir, &iattr);
out:
op_release(new_op);
return ERR_PTR(ret);
return ret ? ERR_PTR(ret) : NULL;
}
static int orangefs_rename(struct mnt_idmap *idmap,

View File

@@ -708,6 +708,17 @@ static int kern_select(int n, fd_set __user *inp, fd_set __user *outp,
if (copy_from_user(&tv, tvp, sizeof(tv)))
return -EFAULT;
/*
* Reject negative components before normalisation. The seconds
* sum below is performed in signed long and a crafted negative
* timeval can wrap to a positive value that passes
* timespec64_valid() and turns into an effectively-infinite
* deadline via timespec64_add_safe()'s saturation, instead of
* the -EINVAL POSIX requires for negative timeouts.
*/
if (tv.tv_sec < 0 || tv.tv_usec < 0)
return -EINVAL;
to = &end_time;
if (poll_select_set_timeout(to,
tv.tv_sec + (tv.tv_usec / USEC_PER_SEC),

View File

@@ -434,7 +434,8 @@ cifs_alloc_inode(struct super_block *sb)
spin_lock_init(&cifs_inode->writers_lock);
cifs_inode->writers = 0;
cifs_inode->netfs.inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
cifs_inode->netfs.remote_i_size = 0;
cifs_inode->netfs._remote_i_size = 0;
cifs_inode->netfs._zero_point = 0;
cifs_inode->uniqueid = 0;
cifs_inode->createtime = 0;
cifs_inode->epoch = 0;
@@ -1303,7 +1304,8 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
struct cifsFileInfo *smb_file_src = src_file->private_data;
struct cifsFileInfo *smb_file_target = dst_file->private_data;
struct cifs_tcon *target_tcon, *src_tcon;
unsigned long long destend, fstart, fend, old_size, new_size;
unsigned long long i_size, old_size, new_size, zero_point;
unsigned long long destend, fstart, fend;
unsigned int xid;
int rc;
@@ -1347,7 +1349,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
* Advance the EOF marker after the flush above to the end of the range
* if it's short of that.
*/
if (src_cifsi->netfs.remote_i_size < off + len) {
if (netfs_read_remote_i_size(src_inode) < off + len) {
rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len);
if (rc < 0)
goto unlock;
@@ -1368,16 +1370,18 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false);
if (rc)
goto unlock;
if (fend > target_cifsi->netfs.zero_point)
target_cifsi->netfs.zero_point = fend + 1;
old_size = target_cifsi->netfs.remote_i_size;
spin_lock(&target_inode->i_lock);
if (fend > zero_point)
netfs_write_zero_point(target_inode, fend + 1);
i_size = target_inode->i_size;
spin_unlock(&target_inode->i_lock);
/* Discard all the folios that overlap the destination region. */
cifs_dbg(FYI, "about to discard pages %llx-%llx\n", fstart, fend);
truncate_inode_pages_range(&target_inode->i_data, fstart, fend);
fscache_invalidate(cifs_inode_cookie(target_inode), NULL,
i_size_read(target_inode), 0);
fscache_invalidate(cifs_inode_cookie(target_inode), NULL, i_size, 0);
rc = -EOPNOTSUPP;
if (target_tcon->ses->server->ops->duplicate_extents) {
@@ -1402,8 +1406,12 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
rc = -EINVAL;
}
}
if (rc == 0 && new_size > target_cifsi->netfs.zero_point)
target_cifsi->netfs.zero_point = new_size;
if (rc == 0) {
spin_lock(&target_inode->i_lock);
if (new_size > target_cifsi->netfs._zero_point)
netfs_write_zero_point(target_inode, new_size);
spin_unlock(&target_inode->i_lock);
}
}
/* force revalidate of size and timestamps of target file now
@@ -1474,7 +1482,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
* Advance the EOF marker after the flush above to the end of the range
* if it's short of that.
*/
if (src_cifsi->netfs.remote_i_size < off + len) {
if (netfs_read_remote_i_size(src_inode) < off + len) {
rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len);
if (rc < 0)
goto unlock;
@@ -1502,8 +1510,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
fscache_resize_cookie(cifs_inode_cookie(target_inode),
i_size_read(target_inode));
}
if (rc > 0 && destoff + rc > target_cifsi->netfs.zero_point)
target_cifsi->netfs.zero_point = destoff + rc;
if (rc > 0) {
spin_lock(&target_inode->i_lock);
if (destoff + rc > target_cifsi->netfs._zero_point)
netfs_write_zero_point(target_inode, destoff + rc);
spin_unlock(&target_inode->i_lock);
}
}
file_accessed(src_file);

View File

@@ -1465,6 +1465,7 @@ cifs_readv_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
struct cifs_io_subrequest *rdata = mid->callback_data;
struct netfs_inode *ictx = netfs_inode(rdata->rreq->inode);
struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink);
struct inode *inode = &ictx->inode;
struct smb_rqst rqst = { .rq_iov = rdata->iov,
.rq_nvec = 1,
.rq_iter = rdata->subreq.io_iter };
@@ -1538,7 +1539,7 @@ cifs_readv_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
} else {
size_t trans = rdata->subreq.transferred + rdata->got_bytes;
if (trans < rdata->subreq.len &&
rdata->subreq.start + trans >= ictx->remote_i_size) {
rdata->subreq.start + trans >= netfs_read_remote_i_size(inode)) {
rdata->result = 0;
__set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
} else if (rdata->got_bytes > 0) {

View File

@@ -2517,18 +2517,23 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result)
{
struct netfs_io_request *wreq = wdata->rreq;
struct netfs_inode *ictx = netfs_inode(wreq->inode);
struct inode *inode = wreq->inode;
struct netfs_inode *ictx = netfs_inode(inode);
loff_t wrend;
if (result > 0) {
spin_lock(&inode->i_lock);
wrend = wdata->subreq.start + wdata->subreq.transferred + result;
if (wrend > ictx->zero_point &&
if (wrend > ictx->_zero_point &&
(wdata->rreq->origin == NETFS_UNBUFFERED_WRITE ||
wdata->rreq->origin == NETFS_DIO_WRITE))
ictx->zero_point = wrend;
if (wrend > ictx->remote_i_size)
netfs_write_zero_point(inode, wrend);
if (wrend > ictx->_remote_i_size)
netfs_resize_file(ictx, wrend, true);
spin_unlock(&inode->i_lock);
}
netfs_write_subrequest_terminated(&wdata->subreq, result);

View File

@@ -119,7 +119,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
fattr->cf_mtime = timestamp_truncate(fattr->cf_mtime, inode);
mtime = inode_get_mtime(inode);
if (timespec64_equal(&mtime, &fattr->cf_mtime) &&
cifs_i->netfs.remote_i_size == fattr->cf_eof) {
netfs_read_remote_i_size(inode) == fattr->cf_eof) {
cifs_dbg(FYI, "%s: inode %llu is unchanged\n",
__func__, cifs_i->uniqueid);
return;
@@ -173,12 +173,12 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr,
CIFS_I(inode)->time = 0; /* force reval */
return -ESTALE;
}
if (inode_state_read_once(inode) & I_NEW)
CIFS_I(inode)->netfs.zero_point = fattr->cf_eof;
cifs_revalidate_cache(inode, fattr);
spin_lock(&inode->i_lock);
if (inode_state_read_once(inode) & I_NEW)
netfs_write_zero_point(inode, fattr->cf_eof);
fattr->cf_mtime = timestamp_truncate(fattr->cf_mtime, inode);
fattr->cf_atime = timestamp_truncate(fattr->cf_atime, inode);
fattr->cf_ctime = timestamp_truncate(fattr->cf_ctime, inode);
@@ -212,7 +212,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr,
else
clear_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags);
cifs_i->netfs.remote_i_size = fattr->cf_eof;
netfs_write_remote_i_size(inode, fattr->cf_eof);
/*
* Can't safely change the file size here if the client is writing to
* it due to potential races.
@@ -2772,7 +2772,9 @@ cifs_revalidate_mapping(struct inode *inode)
if (cifs_sb_flags(cifs_sb) & CIFS_MOUNT_RW_CACHE)
goto skip_invalidate;
cifs_inode->netfs.zero_point = cifs_inode->netfs.remote_i_size;
spin_lock(&inode->i_lock);
netfs_write_zero_point(inode, netfs_inode(inode)->_remote_i_size);
spin_unlock(&inode->i_lock);
rc = filemap_invalidate_inode(inode, true, 0, LLONG_MAX);
if (rc) {
cifs_dbg(VFS, "%s: invalidate inode %p failed with rc %d\n",

View File

@@ -143,7 +143,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
fattr->cf_rdev = inode->i_rdev;
fattr->cf_uid = inode->i_uid;
fattr->cf_gid = inode->i_gid;
fattr->cf_eof = CIFS_I(inode)->netfs.remote_i_size;
fattr->cf_eof =
netfs_read_remote_i_size(inode);
fattr->cf_symlink_target = NULL;
} else {
CIFS_I(inode)->time = 0;

View File

@@ -3402,8 +3402,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
struct inode *inode = file_inode(file);
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifsFileInfo *cfile = file->private_data;
struct netfs_inode *ictx = netfs_inode(inode);
unsigned long long i_size, new_size, remote_size;
unsigned long long i_size, new_size, remote_i_size, zero_point;
long rc;
unsigned int xid;
@@ -3414,9 +3413,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
filemap_invalidate_lock(inode->i_mapping);
i_size = i_size_read(inode);
remote_size = ictx->remote_i_size;
if (offset + len >= remote_size && offset < i_size) {
netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point);
if (offset + len >= remote_i_size && offset < i_size) {
unsigned long long top = umin(offset + len, i_size);
rc = filemap_write_and_wait_range(inode->i_mapping, offset, top - 1);
@@ -3449,9 +3447,11 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
cfile->fid.volatile_fid, cfile->pid, new_size);
if (rc >= 0) {
truncate_setsize(inode, new_size);
spin_lock(&inode->i_lock);
netfs_resize_file(&cifsi->netfs, new_size, true);
if (offset < cifsi->netfs.zero_point)
cifsi->netfs.zero_point = offset;
if (offset < cifsi->netfs._zero_point)
netfs_write_zero_point(inode, offset);
spin_unlock(&inode->i_lock);
fscache_resize_cookie(cifs_inode_cookie(inode), new_size);
}
}
@@ -3474,7 +3474,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
struct inode *inode = file_inode(file);
struct cifsFileInfo *cfile = file->private_data;
struct file_zero_data_information fsctl_buf;
unsigned long long end = offset + len, i_size, remote_i_size;
unsigned long long end = offset + len, i_size, remote_i_size, zero_point;
long rc;
unsigned int xid;
__u8 set_sparse = 1;
@@ -3516,14 +3516,17 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
* that we locally hole-punch the tail of the dirty data, the proposed
* EOF update will end up in the wrong place.
*/
i_size = i_size_read(inode);
remote_i_size = netfs_inode(inode)->remote_i_size;
netfs_read_sizes(inode, &i_size, &remote_i_size, &zero_point);
if (end > remote_i_size && i_size > remote_i_size) {
unsigned long long extend_to = umin(end, i_size);
rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, cfile->pid, extend_to);
if (rc >= 0)
netfs_inode(inode)->remote_i_size = extend_to;
if (rc >= 0) {
spin_lock(&inode->i_lock);
netfs_write_remote_i_size(inode, extend_to);
spin_unlock(&inode->i_lock);
}
}
unlock:
@@ -3787,7 +3790,6 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
struct inode *inode = file_inode(file);
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifsFileInfo *cfile = file->private_data;
struct netfs_inode *ictx = &cifsi->netfs;
loff_t old_eof, new_eof;
xid = get_xid();
@@ -3805,7 +3807,9 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
goto out_2;
truncate_pagecache_range(inode, off, old_eof);
ictx->zero_point = old_eof;
spin_lock(&inode->i_lock);
netfs_write_zero_point(inode, old_eof);
spin_unlock(&inode->i_lock);
netfs_wait_for_outstanding_io(inode);
rc = smb2_copychunk_range(xid, cfile, cfile, off + len,
@@ -3822,8 +3826,10 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
rc = 0;
truncate_setsize(inode, new_eof);
spin_lock(&inode->i_lock);
netfs_resize_file(&cifsi->netfs, new_eof, true);
ictx->zero_point = new_eof;
netfs_write_zero_point(inode, new_eof);
spin_unlock(&inode->i_lock);
fscache_resize_cookie(cifs_inode_cookie(inode), new_eof);
out_2:
filemap_invalidate_unlock(inode->i_mapping);
@@ -3866,13 +3872,17 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon,
goto out_2;
truncate_setsize(inode, new_eof);
spin_lock(&inode->i_lock);
netfs_resize_file(&cifsi->netfs, i_size_read(inode), true);
spin_unlock(&inode->i_lock);
fscache_resize_cookie(cifs_inode_cookie(inode), i_size_read(inode));
rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len);
if (rc < 0)
goto out_2;
cifsi->netfs.zero_point = new_eof;
spin_lock(&inode->i_lock);
netfs_write_zero_point(inode, new_eof);
spin_unlock(&inode->i_lock);
rc = smb3_zero_data(file, tcon, off, len, xid);
if (rc < 0)

View File

@@ -4608,6 +4608,7 @@ smb2_readv_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
struct netfs_inode *ictx = netfs_inode(rdata->rreq->inode);
struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink);
struct smb2_hdr *shdr = (struct smb2_hdr *)rdata->iov[0].iov_base;
struct inode *inode = &ictx->inode;
struct cifs_credits credits = {
.value = 0,
.instance = 0,
@@ -4721,7 +4722,7 @@ smb2_readv_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
} else {
size_t trans = rdata->subreq.transferred + rdata->got_bytes;
if (trans < rdata->subreq.len &&
rdata->subreq.start + trans >= ictx->remote_i_size) {
rdata->subreq.start + trans >= netfs_read_remote_i_size(inode)) {
__set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
rdata->result = 0;
}

View File

@@ -191,6 +191,29 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head)
__list_add(new, head->prev, head);
}
/**
* list_add_tail_release - add a new entry with release barrier
* @new: new entry to be added
* @head: list head to add it before
*
* Insert a new entry before the specified head, using a release barrier to set
* the ->next pointer that points to it. This is useful for implementing
* queues, in particular one that the elements will be walked through forwards
* locklessly.
*/
static inline void list_add_tail_release(struct list_head *new,
struct list_head *head)
{
struct list_head *prev = head->prev;
if (__list_add_valid(new, prev, head)) {
new->next = head;
new->prev = prev;
head->prev = new;
smp_store_release(&prev->next, new);
}
}
/*
* Delete a list entry by making the prev/next entries
* point to each other.
@@ -644,6 +667,20 @@ static inline void list_splice_tail_init(struct list_head *list,
pos__ != head__ ? list_entry(pos__, type, member) : NULL; \
})
/**
* list_first_entry_or_null_acquire - get the first element from a list with barrier
* @ptr: the list head to take the element from.
* @type: the type of the struct this is embedded in.
* @member: the name of the list_head within the struct.
*
* Note that if the list is empty, it returns NULL.
*/
#define list_first_entry_or_null_acquire(ptr, type, member) ({ \
struct list_head *head__ = (ptr); \
struct list_head *pos__ = smp_load_acquire(&head__->next); \
pos__ != head__ ? list_entry(pos__, type, member) : NULL; \
})
/**
* list_last_entry_or_null - get the last element from a list
* @ptr: the list head to take the element from.

View File

@@ -62,8 +62,8 @@ struct netfs_inode {
struct fscache_cookie *cache;
#endif
struct mutex wb_lock; /* Writeback serialisation */
loff_t remote_i_size; /* Size of the remote file */
loff_t zero_point; /* Size after which we assume there's no data
loff_t _remote_i_size; /* Size of the remote file */
loff_t _zero_point; /* Size after which we assume there's no data
* on the server */
atomic_t io_count; /* Number of outstanding reqs */
unsigned long flags;
@@ -252,7 +252,7 @@ struct netfs_io_request {
unsigned long long collected_to; /* Point we've collected to */
unsigned long long cleaned_to; /* Position we've cleaned folios to */
unsigned long long abandon_to; /* Position to abandon folios to */
pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
const struct folio *no_unlock_folio; /* Don't unlock this folio after read */
unsigned int direct_bv_count; /* Number of elements in direct_bv[] */
unsigned int debug_id;
unsigned int rsize; /* Maximum read size (0 for none) */
@@ -474,6 +474,254 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode)
return container_of(inode, struct netfs_inode, inode);
}
/**
* netfs_read_remote_i_size - Read remote_i_size safely
* @inode: The inode to access
*
* Read remote_i_size safely without the potential for tearing on 32-bit
* arches.
*
* NOTE: in a 32bit arch with a preemptable kernel and an UP compile the
* i_size_read/write must be atomic with respect to the local cpu (unlike with
* preempt disabled), but they don't need to be atomic with respect to other
* cpus like in true SMP (so they need either to either locally disable irq
* around the read or for example on x86 they can be still implemented as a
* cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit
* archs it makes no difference if preempt is enabled or not.
*/
static inline unsigned long long netfs_read_remote_i_size(const struct inode *inode)
{
const struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode);
unsigned long long remote_i_size;
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
unsigned int seq;
do {
seq = read_seqcount_begin(&inode->i_size_seqcount);
remote_i_size = ictx->_remote_i_size;
} while (read_seqcount_retry(&inode->i_size_seqcount, seq));
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
preempt_disable();
remote_i_size = ictx->_remote_i_size;
preempt_enable();
#else
/* Pairs with smp_store_release() in netfs_write_remote_i_size() */
remote_i_size = smp_load_acquire(&ictx->_remote_i_size);
#endif
return remote_i_size;
}
/*
* netfs_write_remote_i_size - Set remote_i_size safely
* @inode: The inode to access
* @remote_i_size: The new value for the size of the file on the server
*
* Set remote_i_size safely without the potential for tearing on 32-bit arches.
*
* Context: The caller must hold inode->i_lock.
*
* NOTE: unlike netfs_read_remote_i_size(), netfs_write_remote_i_size() does
* need locking around it (normally i_rwsem), otherwise on 32bit/SMP an update
* of i_size_seqcount can be lost, resulting in subsequent i_size_read() calls
* spinning forever.
*/
static inline void netfs_write_remote_i_size(struct inode *inode,
unsigned long long remote_i_size)
{
struct netfs_inode *ictx = netfs_inode(inode);
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
write_seqcount_begin(&inode->i_size_seqcount);
ictx->_remote_i_size = remote_i_size;
write_seqcount_end(&inode->i_size_seqcount);
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
preempt_disable();
ictx->_remote_i_size = remote_i_size;
preempt_enable();
#else
/*
* Pairs with smp_load_acquire() in netfs_read_remote_i_size() to
* ensure changes related to inode size (such as page contents) are
* visible before we see the changed inode size.
*/
smp_store_release(&ictx->_remote_i_size, remote_i_size);
#endif
}
/**
* netfs_read_zero_point - Read zero_point safely
* @inode: The inode to access
*
* Read zero_point safely without the potential for tearing on 32-bit
* arches.
*
* NOTE: in a 32bit arch with a preemptable kernel and an UP compile the
* i_size_read/write must be atomic with respect to the local cpu (unlike with
* preempt disabled), but they don't need to be atomic with respect to other
* cpus like in true SMP (so they need either to either locally disable irq
* around the read or for example on x86 they can be still implemented as a
* cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit
* archs it makes no difference if preempt is enabled or not.
*/
static inline unsigned long long netfs_read_zero_point(const struct inode *inode)
{
struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode);
unsigned long long zero_point;
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
unsigned int seq;
do {
seq = read_seqcount_begin(&inode->i_size_seqcount);
zero_point = ictx->_zero_point;
} while (read_seqcount_retry(&inode->i_size_seqcount, seq));
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
preempt_disable();
zero_point = ictx->_zero_point;
preempt_enable();
#else
/* Pairs with smp_store_release() in netfs_write_zero_point() */
zero_point = smp_load_acquire(&ictx->_zero_point);
#endif
return zero_point;
}
/*
* netfs_write_zero_point - Set zero_point safely
* @inode: The inode to access
* @zero_point: The new value for the point beyond which the server has no data
*
* Set zero_point safely without the potential for tearing on 32-bit arches.
*
* Context: The caller must hold inode->i_lock.
*
* NOTE: unlike netfs_read_zero_point(), netfs_write_zero_point() does need
* locking around it (normally i_rwsem), otherwise on 32bit/SMP an update of
* i_size_seqcount can be lost, resulting in subsequent read calls spinning
* forever.
*/
static inline void netfs_write_zero_point(struct inode *inode,
unsigned long long zero_point)
{
struct netfs_inode *ictx = netfs_inode(inode);
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
write_seqcount_begin(&inode->i_size_seqcount);
ictx->_zero_point = zero_point;
write_seqcount_end(&inode->i_size_seqcount);
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
preempt_disable();
ictx->_zero_point = zero_point;
preempt_enable();
#else
/*
* Pairs with smp_load_acquire() in netfs_read_zero_point() to
* ensure changes related to inode size (such as page contents) are
* visible before we see the changed inode size.
*/
smp_store_release(&ictx->_zero_point, zero_point);
#endif
}
/**
* netfs_read_sizes - Read remote_i_size and zero_point safely
* @inode: The inode to access
* @i_size: Where to return the local file size.
* @remote_i_size: Where to return the size of the file on the server
* @zero_point: Where to return the the point beyond which the server has no data
*
* Read remote_i_size and zero_point safely without the potential for tearing
* on 32-bit arches.
*
* NOTE: in a 32bit arch with a preemptable kernel and an UP compile the
* i_size_read/write must be atomic with respect to the local cpu (unlike with
* preempt disabled), but they don't need to be atomic with respect to other
* cpus like in true SMP (so they need either to either locally disable irq
* around the read or for example on x86 they can be still implemented as a
* cmpxchg8b without the need of the lock prefix). For SMP compiles and 64bit
* archs it makes no difference if preempt is enabled or not.
*/
static inline void netfs_read_sizes(const struct inode *inode,
unsigned long long *i_size,
unsigned long long *remote_i_size,
unsigned long long *zero_point)
{
const struct netfs_inode *ictx = container_of(inode, struct netfs_inode, inode);
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
unsigned int seq;
do {
seq = read_seqcount_begin(&inode->i_size_seqcount);
*i_size = inode->i_size;
*remote_i_size = ictx->_remote_i_size;
*zero_point = ictx->_zero_point;
} while (read_seqcount_retry(&inode->i_size_seqcount, seq));
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
preempt_disable();
*i_size = inode->i_size;
*remote_i_size = ictx->_remote_i_size;
*zero_point = ictx->_zero_point;
preempt_enable();
#else
/* Pairs with smp_store_release() in i_size_write() */
*i_size = smp_load_acquire(&inode->i_size);
/* Pairs with smp_store_release() in netfs_write_remote_i_size() */
*remote_i_size = smp_load_acquire(&ictx->_remote_i_size);
/* Pairs with smp_store_release() in netfs_write_zero_point() */
*zero_point = smp_load_acquire(&ictx->_zero_point);
#endif
}
/*
* netfs_write_sizes - Set i_size, remote_i_size and zero_point safely
* @inode: The inode to access
* @i_size: The new value for the local size of the file
* @remote_i_size: The new value for the size of the file on the server
* @zero_point: The new value for the point beyond which the server has no data
*
* Set both remote_i_size and zero_point safely without the potential for
* tearing on 32-bit arches.
*
* Context: The caller must hold inode->i_lock.
*
* NOTE: unlike netfs_read_zero_point(), netfs_write_zero_point() does need
* locking around it (normally i_rwsem), otherwise on 32bit/SMP an update of
* i_size_seqcount can be lost, resulting in subsequent read calls spinning
* forever.
*/
static inline void netfs_write_sizes(struct inode *inode,
unsigned long long i_size,
unsigned long long remote_i_size,
unsigned long long zero_point)
{
struct netfs_inode *ictx = netfs_inode(inode);
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
write_seqcount_begin(&inode->i_size_seqcount);
inode->i_size = i_size;
ictx->_remote_i_size = remote_i_size;
ictx->_zero_point = zero_point;
write_seqcount_end(&inode->i_size_seqcount);
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
preempt_disable();
inode->i_size = i_size;
ictx->_remote_i_size = remote_i_size;
ictx->_zero_point = zero_point;
preempt_enable();
#else
/*
* Pairs with smp_load_acquire() in i_size_read(),
* netfs_read_remote_i_size() and netfs_read_zero_point() to ensure
* changes related to inode size (such as page contents) are visible
* before we see the changed inode size.
*/
smp_store_release(&inode->i_size, i_size);
smp_store_release(&ictx->_remote_i_size, remote_i_size);
smp_store_release(&ictx->_zero_point, zero_point);
#endif
}
/**
* netfs_inode_init - Initialise a netfslib inode context
* @ctx: The netfs inode to initialise
@@ -488,8 +736,8 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
bool use_zero_point)
{
ctx->ops = ops;
ctx->remote_i_size = i_size_read(&ctx->inode);
ctx->zero_point = LLONG_MAX;
ctx->_remote_i_size = i_size_read(&ctx->inode);
ctx->_zero_point = LLONG_MAX;
ctx->flags = 0;
atomic_set(&ctx->io_count, 0);
#if IS_ENABLED(CONFIG_FSCACHE)
@@ -498,7 +746,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
mutex_init(&ctx->wb_lock);
/* ->releasepage() drives zero_point */
if (use_zero_point) {
ctx->zero_point = ctx->remote_i_size;
ctx->_zero_point = ctx->_remote_i_size;
mapping_set_release_always(ctx->inode.i_mapping);
}
}
@@ -511,13 +759,40 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
*
* Inform the netfs lib that a file got resized so that it can adjust its state.
*/
static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size,
static inline void netfs_resize_file(struct netfs_inode *ictx,
unsigned long long new_i_size,
bool changed_on_server)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
struct inode *inode = &ictx->inode;
preempt_disable();
write_seqcount_begin(&inode->i_size_seqcount);
if (changed_on_server)
ctx->remote_i_size = new_i_size;
if (new_i_size < ctx->zero_point)
ctx->zero_point = new_i_size;
ictx->_remote_i_size = new_i_size;
if (new_i_size < ictx->_zero_point)
ictx->_zero_point = new_i_size;
write_seqcount_end(&inode->i_size_seqcount);
preempt_enable();
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
preempt_disable();
if (changed_on_server)
ictx->_remote_i_size = new_i_size;
if (new_i_size < ictx->_zero_point)
ictx->_zero_point = new_i_size;
preempt_enable();
#else
/*
* Pairs with smp_load_acquire() in netfs_read_remote_i_size and
* netfs_read_zero_point() to ensure changes related to inode size
* (such as page contents) are visible before we see the changed inode
* size.
*/
if (changed_on_server)
smp_store_release(&ictx->_remote_i_size, new_i_size);
if (new_i_size < ictx->_zero_point)
smp_store_release(&ictx->_zero_point, new_i_size);
#endif
}
/**

View File

@@ -177,7 +177,11 @@
EM(netfs_folio_is_uptodate, "mod-uptodate") \
EM(netfs_just_prefetch, "mod-prefetch") \
EM(netfs_whole_folio_modify, "mod-whole-f") \
EM(netfs_whole_folio_modify_efault, "mod-whole-f!") \
EM(netfs_whole_folio_modify_filled, "mod-whole-f+") \
EM(netfs_whole_folio_modify_filled_efault, "mod-whole-f+!") \
EM(netfs_modify_and_clear, "mod-n-clear") \
EM(netfs_modify_and_clear_rm_finfo, "mod-n-clear+") \
EM(netfs_streaming_write, "mod-streamw") \
EM(netfs_streaming_write_cont, "mod-streamw+") \
EM(netfs_flush_content, "flush") \
@@ -194,6 +198,10 @@
EM(netfs_folio_trace_copy_to_cache, "mark-copy") \
EM(netfs_folio_trace_end_copy, "end-copy") \
EM(netfs_folio_trace_filled_gaps, "filled-gaps") \
EM(netfs_folio_trace_invalidate_all, "inval-all") \
EM(netfs_folio_trace_invalidate_front, "inval-front") \
EM(netfs_folio_trace_invalidate_middle, "inval-mid") \
EM(netfs_folio_trace_invalidate_tail, "inval-tail") \
EM(netfs_folio_trace_kill, "kill") \
EM(netfs_folio_trace_kill_cc, "kill-cc") \
EM(netfs_folio_trace_kill_g, "kill-g") \