From 19a288110435408fcf2cb274823f2a17d03caf9b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Feb 2025 18:51:09 +0000 Subject: [PATCH 01/10] ceph: Remove ceph_writepage() Ceph already has a writepages operation which is preferred over writepage in all situations except for page migration. By adding a migrate_folio operation, there will be no situations in which ->writepage should be called. filemap_migrate_folio() is an appropriate operation to use because the ceph data stored in folio->private does not contain any reference to the memory address of the folio. Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250217185119.430193-2-willy@infradead.org Tested-by: Viacheslav Dubeyko Reviewed-by: Viacheslav Dubeyko Signed-off-by: Christian Brauner --- fs/ceph/addr.c | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index d82ce4867fca..9503cae07929 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -852,32 +852,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) return err; } -static int ceph_writepage(struct page *page, struct writeback_control *wbc) -{ - int err; - struct inode *inode = page->mapping->host; - BUG_ON(!inode); - ihold(inode); - - if (wbc->sync_mode == WB_SYNC_NONE && - ceph_inode_to_fs_client(inode)->write_congested) { - redirty_page_for_writepage(wbc, page); - return AOP_WRITEPAGE_ACTIVATE; - } - - folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */ - - err = writepage_nounlock(page, wbc); - if (err == -ERESTARTSYS) { - /* direct memory reclaimer was killed by SIGKILL. return 0 - * to prevent caller from setting mapping/page error */ - err = 0; - } - unlock_page(page); - iput(inode); - return err; -} - /* * async writeback completion handler. * @@ -1944,7 +1918,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, const struct address_space_operations ceph_aops = { .read_folio = netfs_read_folio, .readahead = netfs_readahead, - .writepage = ceph_writepage, .writepages = ceph_writepages_start, .write_begin = ceph_write_begin, .write_end = ceph_write_end, @@ -1952,6 +1925,7 @@ const struct address_space_operations ceph_aops = { .invalidate_folio = ceph_invalidate_folio, .release_folio = netfs_release_folio, .direct_IO = noop_direct_IO, + .migrate_folio = filemap_migrate_folio, }; static void ceph_block_sigs(sigset_t *oldset) From 88a59bda3f3786107694a3f5fd7f9df421752c21 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Feb 2025 18:51:10 +0000 Subject: [PATCH 02/10] ceph: Use a folio in ceph_page_mkwrite() Convert the passed page to a folio and use it throughout ceph_page_mkwrite(). Removes the last call to page_mkwrite_check_truncate(), the last call to offset_in_thp() and one of the last calls to thp_size(). Saves a few calls to compound_head(). Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250217185119.430193-3-willy@infradead.org Tested-by: Viacheslav Dubeyko Reviewed-by: Viacheslav Dubeyko Signed-off-by: Christian Brauner --- fs/ceph/addr.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 9503cae07929..98902989523f 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -2042,8 +2042,8 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; struct ceph_cap_flush *prealloc_cf; - struct page *page = vmf->page; - loff_t off = page_offset(page); + struct folio *folio = page_folio(vmf->page); + loff_t off = folio_pos(folio); loff_t size = i_size_read(inode); size_t len; int want, got, err; @@ -2060,10 +2060,10 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) sb_start_pagefault(inode->i_sb); ceph_block_sigs(&oldset); - if (off + thp_size(page) <= size) - len = thp_size(page); + if (off + folio_size(folio) <= size) + len = folio_size(folio); else - len = offset_in_thp(page, size); + len = offset_in_folio(folio, size); doutc(cl, "%llx.%llx %llu~%zd getting caps i_size %llu\n", ceph_vinop(inode), off, len, size); @@ -2080,30 +2080,30 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) doutc(cl, "%llx.%llx %llu~%zd got cap refs on %s\n", ceph_vinop(inode), off, len, ceph_cap_string(got)); - /* Update time before taking page lock */ + /* Update time before taking folio lock */ file_update_time(vma->vm_file); inode_inc_iversion_raw(inode); do { struct ceph_snap_context *snapc; - lock_page(page); + folio_lock(folio); - if (page_mkwrite_check_truncate(page, inode) < 0) { - unlock_page(page); + if (folio_mkwrite_check_truncate(folio, inode) < 0) { + folio_unlock(folio); ret = VM_FAULT_NOPAGE; break; } - snapc = ceph_find_incompatible(page); + snapc = ceph_find_incompatible(&folio->page); if (!snapc) { - /* success. we'll keep the page locked. */ - set_page_dirty(page); + /* success. we'll keep the folio locked. */ + folio_mark_dirty(folio); ret = VM_FAULT_LOCKED; break; } - unlock_page(page); + folio_unlock(folio); if (IS_ERR(snapc)) { ret = VM_FAULT_SIGBUS; From f9707a8b5b9d0a631e0a64eab5c3d2bb6d43758c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Feb 2025 18:51:11 +0000 Subject: [PATCH 03/10] ceph: Convert ceph_find_incompatible() to take a folio Both callers already have the folio. Pass it in and use it throughout. Removes some hidden calls to compound_head() and a reference to page->mapping. Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250217185119.430193-4-willy@infradead.org Tested-by: Viacheslav Dubeyko Reviewed-by: Viacheslav Dubeyko Signed-off-by: Christian Brauner --- fs/ceph/addr.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 98902989523f..30355a28c4ba 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1771,56 +1771,56 @@ static int context_is_writeable_or_written(struct inode *inode, /** * ceph_find_incompatible - find an incompatible context and return it - * @page: page being dirtied + * @folio: folio being dirtied * - * We are only allowed to write into/dirty a page if the page is + * We are only allowed to write into/dirty a folio if the folio is * clean, or already dirty within the same snap context. Returns a * conflicting context if there is one, NULL if there isn't, or a * negative error code on other errors. * - * Must be called with page lock held. + * Must be called with folio lock held. */ static struct ceph_snap_context * -ceph_find_incompatible(struct page *page) +ceph_find_incompatible(struct folio *folio) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct ceph_client *cl = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); if (ceph_inode_is_shutdown(inode)) { - doutc(cl, " %llx.%llx page %p is shutdown\n", - ceph_vinop(inode), page); + doutc(cl, " %llx.%llx folio %p is shutdown\n", + ceph_vinop(inode), folio); return ERR_PTR(-ESTALE); } for (;;) { struct ceph_snap_context *snapc, *oldest; - wait_on_page_writeback(page); + folio_wait_writeback(folio); - snapc = page_snap_context(page); + snapc = page_snap_context(&folio->page); if (!snapc || snapc == ci->i_head_snapc) break; /* - * this page is already dirty in another (older) snap + * this folio is already dirty in another (older) snap * context! is it writeable now? */ oldest = get_oldest_context(inode, NULL, NULL); if (snapc->seq > oldest->seq) { /* not writeable -- return it for the caller to deal with */ ceph_put_snap_context(oldest); - doutc(cl, " %llx.%llx page %p snapc %p not current or oldest\n", - ceph_vinop(inode), page, snapc); + doutc(cl, " %llx.%llx folio %p snapc %p not current or oldest\n", + ceph_vinop(inode), folio, snapc); return ceph_get_snap_context(snapc); } ceph_put_snap_context(oldest); - /* yay, writeable, do it now (without dropping page lock) */ - doutc(cl, " %llx.%llx page %p snapc %p not current, but oldest\n", - ceph_vinop(inode), page, snapc); - if (clear_page_dirty_for_io(page)) { - int r = writepage_nounlock(page, NULL); + /* yay, writeable, do it now (without dropping folio lock) */ + doutc(cl, " %llx.%llx folio %p snapc %p not current, but oldest\n", + ceph_vinop(inode), folio, snapc); + if (folio_clear_dirty_for_io(folio)) { + int r = writepage_nounlock(&folio->page, NULL); if (r < 0) return ERR_PTR(r); } @@ -1835,7 +1835,7 @@ static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc; - snapc = ceph_find_incompatible(folio_page(*foliop, 0)); + snapc = ceph_find_incompatible(*foliop); if (snapc) { int r; @@ -2095,7 +2095,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) break; } - snapc = ceph_find_incompatible(&folio->page); + snapc = ceph_find_incompatible(folio); if (!snapc) { /* success. we'll keep the folio locked. */ folio_mark_dirty(folio); From baff9740bc8f1a48aba694ac17d5a026b34982de Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Feb 2025 18:51:12 +0000 Subject: [PATCH 04/10] ceph: Convert ceph_readdir_cache_control to store a folio Pass a folio around instead of a page. This removes an access to page->index and a few hidden calls to compound_head(). Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250217185119.430193-5-willy@infradead.org Tested-by: Viacheslav Dubeyko Reviewed-by: Viacheslav Dubeyko Signed-off-by: Christian Brauner --- fs/ceph/dir.c | 15 ++++++++------- fs/ceph/inode.c | 26 ++++++++++++++------------ fs/ceph/super.h | 2 +- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 62e99e65250d..d22be9314de3 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -141,17 +141,18 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, if (ptr_pos >= i_size_read(dir)) return NULL; - if (!cache_ctl->page || ptr_pgoff != cache_ctl->page->index) { + if (!cache_ctl->folio || ptr_pgoff != cache_ctl->folio->index) { ceph_readdir_cache_release(cache_ctl); - cache_ctl->page = find_lock_page(&dir->i_data, ptr_pgoff); - if (!cache_ctl->page) { - doutc(cl, " page %lu not found\n", ptr_pgoff); + cache_ctl->folio = filemap_lock_folio(&dir->i_data, ptr_pgoff); + if (IS_ERR(cache_ctl->folio)) { + cache_ctl->folio = NULL; + doutc(cl, " folio %lu not found\n", ptr_pgoff); return ERR_PTR(-EAGAIN); } /* reading/filling the cache are serialized by - i_rwsem, no need to use page lock */ - unlock_page(cache_ctl->page); - cache_ctl->dentries = kmap(cache_ctl->page); + i_rwsem, no need to use folio lock */ + folio_unlock(cache_ctl->folio); + cache_ctl->dentries = kmap_local_folio(cache_ctl->folio, 0); } cache_ctl->index = idx & idx_mask; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 7dd6c2275085..c15970fa240f 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1845,10 +1845,9 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl) { - if (ctl->page) { - kunmap(ctl->page); - put_page(ctl->page); - ctl->page = NULL; + if (ctl->folio) { + folio_release_kmap(ctl->folio, ctl->dentries); + ctl->folio = NULL; } } @@ -1862,20 +1861,23 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn, unsigned idx = ctl->index % nsize; pgoff_t pgoff = ctl->index / nsize; - if (!ctl->page || pgoff != ctl->page->index) { + if (!ctl->folio || pgoff != ctl->folio->index) { ceph_readdir_cache_release(ctl); + fgf_t fgf = FGP_LOCK; + if (idx == 0) - ctl->page = grab_cache_page(&dir->i_data, pgoff); - else - ctl->page = find_lock_page(&dir->i_data, pgoff); - if (!ctl->page) { + fgf |= FGP_ACCESSED | FGP_CREAT; + + ctl->folio = __filemap_get_folio(&dir->i_data, pgoff, + fgf, mapping_gfp_mask(&dir->i_data)); + if (!ctl->folio) { ctl->index = -1; return idx == 0 ? -ENOMEM : 0; } /* reading/filling the cache are serialized by - * i_rwsem, no need to use page lock */ - unlock_page(ctl->page); - ctl->dentries = kmap(ctl->page); + * i_rwsem, no need to use folio lock */ + folio_unlock(ctl->folio); + ctl->dentries = kmap_local_folio(ctl->folio, 0); if (idx == 0) memset(ctl->dentries, 0, PAGE_SIZE); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7fa1e7be50e4..bb0db0cc8003 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -903,7 +903,7 @@ ceph_find_rw_context(struct ceph_file_info *cf) } struct ceph_readdir_cache_control { - struct page *page; + struct folio *folio; struct dentry **dentries; int index; }; From 62171c16da6000811e172a76a1f73d132c4697e8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Feb 2025 18:51:13 +0000 Subject: [PATCH 05/10] ceph: Convert writepage_nounlock() to write_folio_nounlock() Remove references to page->index, page->mapping, thp_size(), page_offset() and other page APIs in favour of their more efficient folio replacements. Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250217185119.430193-6-willy@infradead.org Tested-by: Viacheslav Dubeyko Reviewed-by: Viacheslav Dubeyko Signed-off-by: Christian Brauner --- fs/ceph/addr.c | 67 +++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 30355a28c4ba..d5c43492e92c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -698,22 +698,23 @@ static u64 get_writepages_data_length(struct inode *inode, } /* - * Write a single page, but leave the page locked. + * Write a folio, but leave it locked. * * If we get a write error, mark the mapping for error, but still adjust the - * dirty page accounting (i.e., page is no longer dirty). + * dirty page accounting (i.e., folio is no longer dirty). */ -static int writepage_nounlock(struct page *page, struct writeback_control *wbc) +static int write_folio_nounlock(struct folio *folio, + struct writeback_control *wbc) { - struct folio *folio = page_folio(page); - struct inode *inode = page->mapping->host; + struct page *page = &folio->page; + struct inode *inode = folio->mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_client *cl = fsc->client; struct ceph_snap_context *snapc, *oldest; - loff_t page_off = page_offset(page); + loff_t page_off = folio_pos(folio); int err; - loff_t len = thp_size(page); + loff_t len = folio_size(folio); loff_t wlen; struct ceph_writeback_ctl ceph_wbc; struct ceph_osd_client *osdc = &fsc->client->osdc; @@ -721,27 +722,27 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) bool caching = ceph_is_cache_enabled(inode); struct page *bounce_page = NULL; - doutc(cl, "%llx.%llx page %p idx %lu\n", ceph_vinop(inode), page, - page->index); + doutc(cl, "%llx.%llx folio %p idx %lu\n", ceph_vinop(inode), folio, + folio->index); if (ceph_inode_is_shutdown(inode)) return -EIO; /* verify this is a writeable snap context */ - snapc = page_snap_context(page); + snapc = page_snap_context(&folio->page); if (!snapc) { - doutc(cl, "%llx.%llx page %p not dirty?\n", ceph_vinop(inode), - page); + doutc(cl, "%llx.%llx folio %p not dirty?\n", ceph_vinop(inode), + folio); return 0; } oldest = get_oldest_context(inode, &ceph_wbc, snapc); if (snapc->seq > oldest->seq) { - doutc(cl, "%llx.%llx page %p snapc %p not writeable - noop\n", - ceph_vinop(inode), page, snapc); + doutc(cl, "%llx.%llx folio %p snapc %p not writeable - noop\n", + ceph_vinop(inode), folio, snapc); /* we should only noop if called by kswapd */ WARN_ON(!(current->flags & PF_MEMALLOC)); ceph_put_snap_context(oldest); - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); return 0; } ceph_put_snap_context(oldest); @@ -758,8 +759,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) len = ceph_wbc.i_size - page_off; wlen = IS_ENCRYPTED(inode) ? round_up(len, CEPH_FSCRYPT_BLOCK_SIZE) : len; - doutc(cl, "%llx.%llx page %p index %lu on %llu~%llu snapc %p seq %lld\n", - ceph_vinop(inode), page, page->index, page_off, wlen, snapc, + doutc(cl, "%llx.%llx folio %p index %lu on %llu~%llu snapc %p seq %lld\n", + ceph_vinop(inode), folio, folio->index, page_off, wlen, snapc, snapc->seq); if (atomic_long_inc_return(&fsc->writeback_count) > @@ -772,32 +773,32 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) ceph_wbc.truncate_seq, ceph_wbc.truncate_size, true); if (IS_ERR(req)) { - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); return PTR_ERR(req); } if (wlen < len) len = wlen; - set_page_writeback(page); + folio_start_writeback(folio); if (caching) - ceph_set_page_fscache(page); + ceph_set_page_fscache(&folio->page); ceph_fscache_write_to_cache(inode, page_off, len, caching); if (IS_ENCRYPTED(inode)) { - bounce_page = fscrypt_encrypt_pagecache_blocks(page, + bounce_page = fscrypt_encrypt_pagecache_blocks(&folio->page, CEPH_FSCRYPT_BLOCK_SIZE, 0, GFP_NOFS); if (IS_ERR(bounce_page)) { - redirty_page_for_writepage(wbc, page); - end_page_writeback(page); + folio_redirty_for_writepage(wbc, folio); + folio_end_writeback(folio); ceph_osdc_put_request(req); return PTR_ERR(bounce_page); } } /* it may be a short write due to an object boundary */ - WARN_ON_ONCE(len > thp_size(page)); + WARN_ON_ONCE(len > folio_size(folio)); osd_req_op_extent_osd_data_pages(req, 0, bounce_page ? &bounce_page : &page, wlen, 0, false, false); @@ -823,25 +824,25 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) if (err == -ERESTARTSYS) { /* killed by SIGKILL */ doutc(cl, "%llx.%llx interrupted page %p\n", - ceph_vinop(inode), page); - redirty_page_for_writepage(wbc, page); - end_page_writeback(page); + ceph_vinop(inode), folio); + folio_redirty_for_writepage(wbc, folio); + folio_end_writeback(folio); return err; } if (err == -EBLOCKLISTED) fsc->blocklisted = true; - doutc(cl, "%llx.%llx setting page/mapping error %d %p\n", - ceph_vinop(inode), err, page); + doutc(cl, "%llx.%llx setting mapping error %d %p\n", + ceph_vinop(inode), err, folio); mapping_set_error(&inode->i_data, err); wbc->pages_skipped++; } else { doutc(cl, "%llx.%llx cleaned page %p\n", - ceph_vinop(inode), page); + ceph_vinop(inode), folio); err = 0; /* vfs expects us to return 0 */ } - oldest = detach_page_private(page); + oldest = folio_detach_private(folio); WARN_ON_ONCE(oldest != snapc); - end_page_writeback(page); + folio_end_writeback(folio); ceph_put_wrbuffer_cap_refs(ci, 1, snapc); ceph_put_snap_context(snapc); /* page's reference */ @@ -1820,7 +1821,7 @@ ceph_find_incompatible(struct folio *folio) doutc(cl, " %llx.%llx folio %p snapc %p not current, but oldest\n", ceph_vinop(inode), folio, snapc); if (folio_clear_dirty_for_io(folio)) { - int r = writepage_nounlock(&folio->page, NULL); + int r = write_folio_nounlock(folio, NULL); if (r < 0) return ERR_PTR(r); } From 15fdaf2fd60d76ba72d8e729191e5b6a54a87ad4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Feb 2025 18:51:14 +0000 Subject: [PATCH 06/10] ceph: Convert ceph_check_page_before_write() to use a folio Remove the conversion back to a struct page and just use the folio passed in. Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250217185119.430193-7-willy@infradead.org Tested-by: Viacheslav Dubeyko Signed-off-by: Christian Brauner --- fs/ceph/addr.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index d5c43492e92c..8cc8a32d34ad 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1132,18 +1132,17 @@ int ceph_check_page_before_write(struct address_space *mapping, struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_client *cl = fsc->client; struct ceph_snap_context *pgsnapc; - struct page *page = &folio->page; - /* only dirty pages, or our accounting breaks */ - if (unlikely(!PageDirty(page)) || unlikely(page->mapping != mapping)) { - doutc(cl, "!dirty or !mapping %p\n", page); + /* only dirty folios, or our accounting breaks */ + if (unlikely(!folio_test_dirty(folio) || folio->mapping != mapping)) { + doutc(cl, "!dirty or !mapping %p\n", folio); return -ENODATA; } /* only if matching snap context */ - pgsnapc = page_snap_context(page); + pgsnapc = page_snap_context(&folio->page); if (pgsnapc != ceph_wbc->snapc) { - doutc(cl, "page snapc %p %lld != oldest %p %lld\n", + doutc(cl, "folio snapc %p %lld != oldest %p %lld\n", pgsnapc, pgsnapc->seq, ceph_wbc->snapc, ceph_wbc->snapc->seq); @@ -1154,7 +1153,7 @@ int ceph_check_page_before_write(struct address_space *mapping, return -ENODATA; } - if (page_offset(page) >= ceph_wbc->i_size) { + if (folio_pos(folio) >= ceph_wbc->i_size) { doutc(cl, "folio at %lu beyond eof %llu\n", folio->index, ceph_wbc->i_size); @@ -1167,8 +1166,8 @@ int ceph_check_page_before_write(struct address_space *mapping, } if (ceph_wbc->strip_unit_end && - (page->index > ceph_wbc->strip_unit_end)) { - doutc(cl, "end of strip unit %p\n", page); + (folio->index > ceph_wbc->strip_unit_end)) { + doutc(cl, "end of strip unit %p\n", folio); return -E2BIG; } From a55cf4fd8faea83dbb1ac7a3cb708193e33aa7a1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Feb 2025 18:51:15 +0000 Subject: [PATCH 07/10] ceph: Remove uses of page from ceph_process_folio_batch() Remove uses of page->index and deprecated page APIs. Saves a lot of hidden calls to compound_head(). Also convert is_page_index_contiguous() to is_folio_index_contiguous() and make its arguments const. Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250217185119.430193-8-willy@infradead.org Tested-by: Viacheslav Dubeyko Signed-off-by: Christian Brauner --- fs/ceph/addr.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8cc8a32d34ad..c1755799e335 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1216,10 +1216,10 @@ void ceph_allocate_page_array(struct address_space *mapping, } static inline -bool is_page_index_contiguous(struct ceph_writeback_ctl *ceph_wbc, - struct page *page) +bool is_folio_index_contiguous(const struct ceph_writeback_ctl *ceph_wbc, + const struct folio *folio) { - return page->index == (ceph_wbc->offset + ceph_wbc->len) >> PAGE_SHIFT; + return folio->index == (ceph_wbc->offset + ceph_wbc->len) >> PAGE_SHIFT; } static inline @@ -1284,7 +1284,6 @@ int ceph_process_folio_batch(struct address_space *mapping, struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); struct ceph_client *cl = fsc->client; struct folio *folio = NULL; - struct page *page = NULL; unsigned i; int rc = 0; @@ -1294,11 +1293,9 @@ int ceph_process_folio_batch(struct address_space *mapping, if (!folio) continue; - page = &folio->page; - doutc(cl, "? %p idx %lu, folio_test_writeback %#x, " "folio_test_dirty %#x, folio_test_locked %#x\n", - page, page->index, folio_test_writeback(folio), + folio, folio->index, folio_test_writeback(folio), folio_test_dirty(folio), folio_test_locked(folio)); @@ -1311,27 +1308,27 @@ int ceph_process_folio_batch(struct address_space *mapping, } if (ceph_wbc->locked_pages == 0) - lock_page(page); /* first page */ - else if (!trylock_page(page)) + folio_lock(folio); + else if (!folio_trylock(folio)) break; rc = ceph_check_page_before_write(mapping, wbc, ceph_wbc, folio); if (rc == -ENODATA) { rc = 0; - unlock_page(page); + folio_unlock(folio); ceph_wbc->fbatch.folios[i] = NULL; continue; } else if (rc == -E2BIG) { rc = 0; - unlock_page(page); + folio_unlock(folio); ceph_wbc->fbatch.folios[i] = NULL; break; } - if (!clear_page_dirty_for_io(page)) { - doutc(cl, "%p !clear_page_dirty_for_io\n", page); - unlock_page(page); + if (!folio_clear_dirty_for_io(folio)) { + doutc(cl, "%p !folio_clear_dirty_for_io\n", folio); + folio_unlock(folio); ceph_wbc->fbatch.folios[i] = NULL; continue; } @@ -1343,35 +1340,35 @@ int ceph_process_folio_batch(struct address_space *mapping, * allocate a page array */ if (ceph_wbc->locked_pages == 0) { - ceph_allocate_page_array(mapping, ceph_wbc, page); - } else if (!is_page_index_contiguous(ceph_wbc, page)) { + ceph_allocate_page_array(mapping, ceph_wbc, &folio->page); + } else if (!is_folio_index_contiguous(ceph_wbc, folio)) { if (is_num_ops_too_big(ceph_wbc)) { - redirty_page_for_writepage(wbc, page); - unlock_page(page); + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); break; } ceph_wbc->num_ops++; - ceph_wbc->offset = (u64)page_offset(page); + ceph_wbc->offset = (u64)folio_pos(folio); ceph_wbc->len = 0; } /* note position of first page in fbatch */ - doutc(cl, "%llx.%llx will write page %p idx %lu\n", - ceph_vinop(inode), page, page->index); + doutc(cl, "%llx.%llx will write folio %p idx %lu\n", + ceph_vinop(inode), folio, folio->index); fsc->write_congested = is_write_congestion_happened(fsc); rc = ceph_move_dirty_page_in_page_array(mapping, wbc, - ceph_wbc, page); + ceph_wbc, &folio->page); if (rc) { - redirty_page_for_writepage(wbc, page); - unlock_page(page); + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); break; } ceph_wbc->fbatch.folios[i] = NULL; - ceph_wbc->len += thp_size(page); + ceph_wbc->len += folio_size(folio); } ceph_wbc->processed_in_fbatch = i; From ad49fe2b3d54fe657756e1dbf0acb5ed9cf5c576 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Feb 2025 18:51:16 +0000 Subject: [PATCH 08/10] ceph: Convert ceph_move_dirty_page_in_page_array() to move_dirty_folio_in_page_array() Shorten the name of this internal function by dropping the 'ceph_' prefix and pass in a folio instead of a page. Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250217185119.430193-9-willy@infradead.org Tested-by: Viacheslav Dubeyko Signed-off-by: Christian Brauner --- fs/ceph/addr.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index c1755799e335..13f4fdd9a8ba 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1236,11 +1236,9 @@ bool is_write_congestion_happened(struct ceph_fs_client *fsc) CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb); } -static inline -int ceph_move_dirty_page_in_page_array(struct address_space *mapping, - struct writeback_control *wbc, - struct ceph_writeback_ctl *ceph_wbc, - struct page *page) +static inline int move_dirty_folio_in_page_array(struct address_space *mapping, + struct writeback_control *wbc, + struct ceph_writeback_ctl *ceph_wbc, struct folio *folio) { struct inode *inode = mapping->host; struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode); @@ -1250,7 +1248,7 @@ int ceph_move_dirty_page_in_page_array(struct address_space *mapping, gfp_t gfp_flags = ceph_wbc->locked_pages ? GFP_NOWAIT : GFP_NOFS; if (IS_ENCRYPTED(inode)) { - pages[index] = fscrypt_encrypt_pagecache_blocks(page, + pages[index] = fscrypt_encrypt_pagecache_blocks(&folio->page, PAGE_SIZE, 0, gfp_flags); @@ -1267,7 +1265,7 @@ int ceph_move_dirty_page_in_page_array(struct address_space *mapping, return PTR_ERR(pages[index]); } } else { - pages[index] = page; + pages[index] = &folio->page; } ceph_wbc->locked_pages++; @@ -1359,8 +1357,8 @@ int ceph_process_folio_batch(struct address_space *mapping, fsc->write_congested = is_write_congestion_happened(fsc); - rc = ceph_move_dirty_page_in_page_array(mapping, wbc, - ceph_wbc, &folio->page); + rc = move_dirty_folio_in_page_array(mapping, wbc, ceph_wbc, + folio); if (rc) { folio_redirty_for_writepage(wbc, folio); folio_unlock(folio); From d1b452673af4e5b3fc211d1344288abcbefb4fac Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 17 Feb 2025 18:51:17 +0000 Subject: [PATCH 09/10] ceph: Pass a folio to ceph_allocate_page_array() Remove two accesses to page->index. Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250217185119.430193-10-willy@infradead.org Tested-by: Viacheslav Dubeyko Signed-off-by: Christian Brauner --- fs/ceph/addr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 13f4fdd9a8ba..19efba28e461 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1191,7 +1191,7 @@ void __ceph_allocate_page_array(struct ceph_writeback_ctl *ceph_wbc, static inline void ceph_allocate_page_array(struct address_space *mapping, struct ceph_writeback_ctl *ceph_wbc, - struct page *page) + struct folio *folio) { struct inode *inode = mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); @@ -1200,13 +1200,13 @@ void ceph_allocate_page_array(struct address_space *mapping, u32 xlen; /* prepare async write request */ - ceph_wbc->offset = (u64)page_offset(page); + ceph_wbc->offset = (u64)folio_pos(folio); ceph_calc_file_object_mapping(&ci->i_layout, ceph_wbc->offset, ceph_wbc->wsize, &objnum, &objoff, &xlen); ceph_wbc->num_ops = 1; - ceph_wbc->strip_unit_end = page->index + ((xlen - 1) >> PAGE_SHIFT); + ceph_wbc->strip_unit_end = folio->index + ((xlen - 1) >> PAGE_SHIFT); BUG_ON(ceph_wbc->pages); ceph_wbc->max_pages = calc_pages_for(0, (u64)xlen); @@ -1338,7 +1338,7 @@ int ceph_process_folio_batch(struct address_space *mapping, * allocate a page array */ if (ceph_wbc->locked_pages == 0) { - ceph_allocate_page_array(mapping, ceph_wbc, &folio->page); + ceph_allocate_page_array(mapping, ceph_wbc, folio); } else if (!is_folio_index_contiguous(ceph_wbc, folio)) { if (is_num_ops_too_big(ceph_wbc)) { folio_redirty_for_writepage(wbc, folio); From 9dcef93363e7f7b925b3adc4a3171bd00250c8dd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 21 Feb 2025 20:44:19 +0000 Subject: [PATCH 10/10] fs: Remove page_mkwrite_check_truncate() All callers of this function have now been converted to use folio_mkwrite_check_truncate(). Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250221204421.3590340-1-willy@infradead.org Tested-by: Viacheslav Dubeyko Reviewed-by: Viacheslav Dubeyko Signed-off-by: Christian Brauner --- include/linux/pagemap.h | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 47bfc6b1b632..7fe82d43cf39 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1602,34 +1602,6 @@ static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio, return offset; } -/** - * page_mkwrite_check_truncate - check if page was truncated - * @page: the page to check - * @inode: the inode to check the page against - * - * Returns the number of bytes in the page up to EOF, - * or -EFAULT if the page was truncated. - */ -static inline int page_mkwrite_check_truncate(struct page *page, - struct inode *inode) -{ - loff_t size = i_size_read(inode); - pgoff_t index = size >> PAGE_SHIFT; - int offset = offset_in_page(size); - - if (page->mapping != inode->i_mapping) - return -EFAULT; - - /* page is wholly inside EOF */ - if (page->index < index) - return PAGE_SIZE; - /* page is wholly past EOF */ - if (page->index > index || !offset) - return -EFAULT; - /* page is partially inside EOF */ - return offset; -} - /** * i_blocks_per_folio - How many blocks fit in this folio. * @inode: The inode which contains the blocks.