From 66d938e89e940e512f4c3deac938ecef399c13f9 Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Fri, 5 Sep 2025 09:59:25 +0800 Subject: [PATCH 1/3] netfs: Prevent duplicate unlocking The filio lock has been released here, so there is no need to jump to error_folio_unlock to release it again. Reported-by: syzbot+b73c7d94a151e2ee1e9b@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b73c7d94a151e2ee1e9b Signed-off-by: Lizhi Xu Acked-by: David Howells Reviewed-by: Paulo Alcantara (Red Hat) Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index f27ea5099a68..09394ac2c180 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -347,7 +347,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, folio_put(folio); ret = filemap_write_and_wait_range(mapping, fpos, fpos + flen - 1); if (ret < 0) - goto error_folio_unlock; + goto out; continue; copied: From 9158c6bb245113d4966df9b2ba602197a379412e Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Tue, 23 Sep 2025 15:51:04 +0800 Subject: [PATCH 2/3] afs: Fix potential null pointer dereference in afs_put_server afs_put_server() accessed server->debug_id before the NULL check, which could lead to a null pointer dereference. Move the debug_id assignment, ensuring we never dereference a NULL server pointer. Fixes: 2757a4dc1849 ("afs: Fix access after dec in put functions") Cc: stable@vger.kernel.org Signed-off-by: Zhen Ni Acked-by: David Howells Reviewed-by: Jeffrey Altman Signed-off-by: Christian Brauner --- fs/afs/server.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/afs/server.c b/fs/afs/server.c index a97562f831eb..c4428ebddb1d 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -331,13 +331,14 @@ struct afs_server *afs_use_server(struct afs_server *server, bool activate, void afs_put_server(struct afs_net *net, struct afs_server *server, enum afs_server_trace reason) { - unsigned int a, debug_id = server->debug_id; + unsigned int a, debug_id; bool zero; int r; if (!server) return; + debug_id = server->debug_id; a = atomic_read(&server->active); zero = __refcount_dec_and_test(&server->ref, &r); trace_afs_server(debug_id, r - 1, a, reason); From 4d428dca252c858bfac691c31fa95d26cd008706 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Thu, 25 Sep 2025 14:08:20 +0100 Subject: [PATCH 3/3] netfs: fix reference leak Commit 20d72b00ca81 ("netfs: Fix the request's work item to not require a ref") modified netfs_alloc_request() to initialize the reference counter to 2 instead of 1. The rationale was that the requet's "work" would release the second reference after completion (via netfs_{read,write}_collection_worker()). That works most of the time if all goes well. However, it leaks this additional reference if the request is released before the I/O operation has been submitted: the error code path only decrements the reference counter once and the work item will never be queued because there will never be a completion. This has caused outages of our whole server cluster today because tasks were blocked in netfs_wait_for_outstanding_io(), leading to deadlocks in Ceph (another bug that I will address soon in another patch). This was caused by a netfs_pgpriv2_begin_copy_to_cache() call which failed in fscache_begin_write_operation(). The leaked netfs_io_request was never completed, leaving `netfs_inode.io_count` with a positive value forever. All of this is super-fragile code. Finding out which code paths will lead to an eventual completion and which do not is hard to see: - Some functions like netfs_create_write_req() allocate a request, but will never submit any I/O. - netfs_unbuffered_read_iter_locked() calls netfs_unbuffered_read() and then netfs_put_request(); however, netfs_unbuffered_read() can also fail early before submitting the I/O request, therefore another netfs_put_request() call must be added there. A rule of thumb is that functions that return a `netfs_io_request` do not submit I/O, and all of their callers must be checked. For my taste, the whole netfs code needs an overhaul to make reference counting easier to understand and less fragile & obscure. But to fix this bug here and now and produce a patch that is adequate for a stable backport, I tried a minimal approach that quickly frees the request object upon early failure. I decided against adding a second netfs_put_request() each time because that would cause code duplication which obscures the code further. Instead, I added the function netfs_put_failed_request() which frees such a failed request synchronously under the assumption that the reference count is exactly 2 (as initially set by netfs_alloc_request() and never touched), verified by a WARN_ON_ONCE(). It then deinitializes the request object (without going through the "cleanup_work" indirection) and frees the allocation (with RCU protection to protect against concurrent access by netfs_requests_seq_start()). All code paths that fail early have been changed to call netfs_put_failed_request() instead of netfs_put_request(). Additionally, I have added a netfs_put_request() call to netfs_unbuffered_read() as explained above because the netfs_put_failed_request() approach does not work there. Fixes: 20d72b00ca81 ("netfs: Fix the request's work item to not require a ref") Signed-off-by: Max Kellermann Signed-off-by: David Howells cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: stable@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 10 +++++----- fs/netfs/direct_read.c | 7 ++++++- fs/netfs/direct_write.c | 6 +++++- fs/netfs/internal.h | 1 + fs/netfs/objects.c | 30 +++++++++++++++++++++++++++--- fs/netfs/read_pgpriv2.c | 2 +- fs/netfs/read_single.c | 2 +- fs/netfs/write_issue.c | 3 +-- 8 files changed, 47 insertions(+), 14 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 18b3dc74c70e..37ab6f28b5ad 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -369,7 +369,7 @@ void netfs_readahead(struct readahead_control *ractl) return netfs_put_request(rreq, netfs_rreq_trace_put_return); cleanup_free: - return netfs_put_request(rreq, netfs_rreq_trace_put_failed); + return netfs_put_failed_request(rreq); } EXPORT_SYMBOL(netfs_readahead); @@ -472,7 +472,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio) return ret < 0 ? ret : 0; discard: - netfs_put_request(rreq, netfs_rreq_trace_put_discard); + netfs_put_failed_request(rreq); alloc_error: folio_unlock(folio); return ret; @@ -532,7 +532,7 @@ int netfs_read_folio(struct file *file, struct folio *folio) return ret < 0 ? ret : 0; discard: - netfs_put_request(rreq, netfs_rreq_trace_put_discard); + netfs_put_failed_request(rreq); alloc_error: folio_unlock(folio); return ret; @@ -699,7 +699,7 @@ int netfs_write_begin(struct netfs_inode *ctx, return 0; error_put: - netfs_put_request(rreq, netfs_rreq_trace_put_failed); + netfs_put_failed_request(rreq); error: if (folio) { folio_unlock(folio); @@ -754,7 +754,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, return ret < 0 ? ret : 0; error_put: - netfs_put_request(rreq, netfs_rreq_trace_put_discard); + netfs_put_failed_request(rreq); error: _leave(" = %d", ret); return ret; diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index a05e13472baf..a498ee8d6674 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -131,6 +131,7 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync) if (rreq->len == 0) { pr_err("Zero-sized read [R=%x]\n", rreq->debug_id); + netfs_put_request(rreq, netfs_rreq_trace_put_discard); return -EIO; } @@ -205,7 +206,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i if (user_backed_iter(iter)) { ret = netfs_extract_user_iter(iter, rreq->len, &rreq->buffer.iter, 0); if (ret < 0) - goto out; + goto error_put; rreq->direct_bv = (struct bio_vec *)rreq->buffer.iter.bvec; rreq->direct_bv_count = ret; rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter); @@ -238,6 +239,10 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i if (ret > 0) orig_count -= ret; return ret; + +error_put: + netfs_put_failed_request(rreq); + return ret; } EXPORT_SYMBOL(netfs_unbuffered_read_iter_locked); diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index a16660ab7f83..a9d1c3b2c084 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -57,7 +57,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * n = netfs_extract_user_iter(iter, len, &wreq->buffer.iter, 0); if (n < 0) { ret = n; - goto out; + goto error_put; } wreq->direct_bv = (struct bio_vec *)wreq->buffer.iter.bvec; wreq->direct_bv_count = n; @@ -101,6 +101,10 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * out: netfs_put_request(wreq, netfs_rreq_trace_put_return); return ret; + +error_put: + netfs_put_failed_request(wreq); + return ret; } EXPORT_SYMBOL(netfs_unbuffered_write_iter_locked); diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index d4f16fefd965..4319611f5354 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -87,6 +87,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what); void netfs_clear_subrequests(struct netfs_io_request *rreq); void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what); +void netfs_put_failed_request(struct netfs_io_request *rreq); struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq); static inline void netfs_see_request(struct netfs_io_request *rreq, diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index e8c99738b5bb..40a1c7d6f6e0 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -116,10 +116,8 @@ static void netfs_free_request_rcu(struct rcu_head *rcu) netfs_stat_d(&netfs_n_rh_rreq); } -static void netfs_free_request(struct work_struct *work) +static void netfs_deinit_request(struct netfs_io_request *rreq) { - struct netfs_io_request *rreq = - container_of(work, struct netfs_io_request, cleanup_work); struct netfs_inode *ictx = netfs_inode(rreq->inode); unsigned int i; @@ -149,6 +147,14 @@ static void netfs_free_request(struct work_struct *work) if (atomic_dec_and_test(&ictx->io_count)) wake_up_var(&ictx->io_count); +} + +static void netfs_free_request(struct work_struct *work) +{ + struct netfs_io_request *rreq = + container_of(work, struct netfs_io_request, cleanup_work); + + netfs_deinit_request(rreq); call_rcu(&rreq->rcu, netfs_free_request_rcu); } @@ -167,6 +173,24 @@ void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace } } +/* + * Free a request (synchronously) that was just allocated but has + * failed before it could be submitted. + */ +void netfs_put_failed_request(struct netfs_io_request *rreq) +{ + int r = refcount_read(&rreq->ref); + + /* new requests have two references (see + * netfs_alloc_request(), and this function is only allowed on + * new request objects + */ + WARN_ON_ONCE(r != 2); + + trace_netfs_rreq_ref(rreq->debug_id, r, netfs_rreq_trace_put_failed); + netfs_free_request(&rreq->cleanup_work); +} + /* * Allocate and partially initialise an I/O request structure. */ diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c index 8097bc069c1d..a1489aa29f78 100644 --- a/fs/netfs/read_pgpriv2.c +++ b/fs/netfs/read_pgpriv2.c @@ -118,7 +118,7 @@ static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache( return creq; cancel_put: - netfs_put_request(creq, netfs_rreq_trace_put_return); + netfs_put_failed_request(creq); cancel: rreq->copy_to_cache = ERR_PTR(-ENOBUFS); clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c index fa622a6cd56d..5c0dc4efc792 100644 --- a/fs/netfs/read_single.c +++ b/fs/netfs/read_single.c @@ -189,7 +189,7 @@ ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_ite return ret; cleanup_free: - netfs_put_request(rreq, netfs_rreq_trace_put_failed); + netfs_put_failed_request(rreq); return ret; } EXPORT_SYMBOL(netfs_read_single); diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 0584cba1a043..dd8743bc8d7f 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -133,8 +133,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, return wreq; nomem: - wreq->error = -ENOMEM; - netfs_put_request(wreq, netfs_rreq_trace_put_failed); + netfs_put_failed_request(wreq); return ERR_PTR(-ENOMEM); }