From 095f627add86a6ddda2c2cfd563b0ee05d0172b2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:52 -0600 Subject: [PATCH 1/5] mm/filemap: gate dropbehind invalidate on folio !dirty && !writeback It's possible for the folio to either get marked for writeback or redirtied. Add a helper, filemap_end_dropbehind(), which guards the folio_unmap_invalidate() call behind check for the folio being both non-dirty and not under writeback AFTER the folio lock has been acquired. Use this helper folio_end_dropbehind_write(). Cc: stable@vger.kernel.org Reported-by: Al Viro Fixes: fb7d3bc41493 ("mm/filemap: drop streaming/uncached pages when writeback completes") Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/ Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-2-axboe@kernel.dk Signed-off-by: Christian Brauner --- mm/filemap.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 7b90cbeb4a1a..008a55290f34 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1589,6 +1589,16 @@ int folio_wait_private_2_killable(struct folio *folio) } EXPORT_SYMBOL(folio_wait_private_2_killable); +static void filemap_end_dropbehind(struct folio *folio) +{ + struct address_space *mapping = folio->mapping; + + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + + if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio)) + folio_unmap_invalidate(mapping, folio, 0); +} + /* * If folio was marked as dropbehind, then pages should be dropped when writeback * completes. Do that now. If we fail, it's likely because of a big folio - @@ -1604,8 +1614,7 @@ static void folio_end_dropbehind_write(struct folio *folio) * invalidation in that case. */ if (in_task() && folio_trylock(folio)) { - if (folio->mapping) - folio_unmap_invalidate(folio->mapping, folio, 0); + filemap_end_dropbehind(folio); folio_unlock(folio); } } From 25b065a744ff0c1099bb357be1c40030b5a14c07 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:53 -0600 Subject: [PATCH 2/5] mm/filemap: use filemap_end_dropbehind() for read invalidation Use the filemap_end_dropbehind() helper rather than calling folio_unmap_invalidate() directly, as we need to check if the folio has been redirtied or marked for writeback once the folio lock has been re-acquired. Cc: stable@vger.kernel.org Reported-by: Trond Myklebust Fixes: 8026e49bff9b ("mm/filemap: add read support for RWF_DONTCACHE") Link: https://lore.kernel.org/linux-fsdevel/ba8a9805331ce258a622feaca266b163db681a10.camel@hammerspace.com/ Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-3-axboe@kernel.dk Signed-off-by: Christian Brauner --- mm/filemap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 008a55290f34..6af6d8f2929c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2644,8 +2644,7 @@ static inline bool pos_same_folio(loff_t pos1, loff_t pos2, struct folio *folio) return (pos1 >> shift == pos2 >> shift); } -static void filemap_end_dropbehind_read(struct address_space *mapping, - struct folio *folio) +static void filemap_end_dropbehind_read(struct folio *folio) { if (!folio_test_dropbehind(folio)) return; @@ -2653,7 +2652,7 @@ static void filemap_end_dropbehind_read(struct address_space *mapping, return; if (folio_trylock(folio)) { if (folio_test_clear_dropbehind(folio)) - folio_unmap_invalidate(mapping, folio, 0); + filemap_end_dropbehind(folio); folio_unlock(folio); } } @@ -2774,7 +2773,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; - filemap_end_dropbehind_read(mapping, folio); + filemap_end_dropbehind_read(folio); folio_put(folio); } folio_batch_init(&fbatch); From 7b2b67dbd449afd00fc7279b1ab7ffa3d26fe0c9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:54 -0600 Subject: [PATCH 3/5] Revert "Disable FOP_DONTCACHE for now due to bugs" This reverts commit 478ad02d6844217cc7568619aeb0809d93ade43d. Both the read and write side dirty && writeback races should be resolved now, revert the commit that disabled FOP_DONTCACHE for filesystems. Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/ Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-4-axboe@kernel.dk Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 0db87f8e676c..57c3db3ef6ad 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2207,7 +2207,7 @@ struct file_operations { /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* File system supports uncached read/write buffered IO */ -#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */ +#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, From 1da7a06d9ce4edea3370945af8bfcc71b7744788 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:55 -0600 Subject: [PATCH 4/5] mm/filemap: unify read/write dropbehind naming The read side is filemap_end_dropbehind_read(), while the write side used folio_ as the prefix rather than filemap_. The read side makes more sense, unify the naming such that the write side follows that. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-5-axboe@kernel.dk Signed-off-by: Christian Brauner --- mm/filemap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 6af6d8f2929c..2ba1ed116103 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1604,7 +1604,7 @@ static void filemap_end_dropbehind(struct folio *folio) * completes. Do that now. If we fail, it's likely because of a big folio - * just reset dropbehind for that case and latter completions should invalidate. */ -static void folio_end_dropbehind_write(struct folio *folio) +static void filemap_end_dropbehind_write(struct folio *folio) { /* * Hitting !in_task() should not happen off RWF_DONTCACHE writeback, @@ -1659,7 +1659,7 @@ void folio_end_writeback(struct folio *folio) acct_reclaim_writeback(folio); if (folio_dropbehind) - folio_end_dropbehind_write(folio); + filemap_end_dropbehind_write(folio); folio_put(folio); } EXPORT_SYMBOL(folio_end_writeback); From a1d98e4ffb972ab007f5de850ef53c2a46cacf15 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:56 -0600 Subject: [PATCH 5/5] mm/filemap: unify dropbehind flag testing and clearing The read and write side does this a bit differently, unify it such that the _{read,write} helpers check the bit before locking, and the generic handler is in charge of clearing the bit and invalidating, once under the folio lock. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-6-axboe@kernel.dk Signed-off-by: Christian Brauner --- mm/filemap.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 2ba1ed116103..eef44d7ea12e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1595,7 +1595,11 @@ static void filemap_end_dropbehind(struct folio *folio) VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio)) + if (folio_test_writeback(folio) || folio_test_dirty(folio)) + return; + if (!folio_test_clear_dropbehind(folio)) + return; + if (mapping) folio_unmap_invalidate(mapping, folio, 0); } @@ -1606,6 +1610,9 @@ static void filemap_end_dropbehind(struct folio *folio) */ static void filemap_end_dropbehind_write(struct folio *folio) { + if (!folio_test_dropbehind(folio)) + return; + /* * Hitting !in_task() should not happen off RWF_DONTCACHE writeback, * but can happen if normal writeback just happens to find dirty folios @@ -1629,8 +1636,6 @@ static void filemap_end_dropbehind_write(struct folio *folio) */ void folio_end_writeback(struct folio *folio) { - bool folio_dropbehind = false; - VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio); /* @@ -1652,14 +1657,11 @@ void folio_end_writeback(struct folio *folio) * reused before the folio_wake_bit(). */ folio_get(folio); - if (!folio_test_dirty(folio)) - folio_dropbehind = folio_test_clear_dropbehind(folio); if (__folio_end_writeback(folio)) folio_wake_bit(folio, PG_writeback); - acct_reclaim_writeback(folio); - if (folio_dropbehind) - filemap_end_dropbehind_write(folio); + filemap_end_dropbehind_write(folio); + acct_reclaim_writeback(folio); folio_put(folio); } EXPORT_SYMBOL(folio_end_writeback); @@ -2651,8 +2653,7 @@ static void filemap_end_dropbehind_read(struct folio *folio) if (folio_test_writeback(folio) || folio_test_dirty(folio)) return; if (folio_trylock(folio)) { - if (folio_test_clear_dropbehind(folio)) - filemap_end_dropbehind(folio); + filemap_end_dropbehind(folio); folio_unlock(folio); } }