From d69ee59d38a28ba94347aa8c5cf829825f02f243 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 21 Feb 2026 12:13:16 -0800 Subject: [PATCH 01/35] f2fs: remove unreachable code in f2fs_encrypt_one_page() Since commit 52e7e0d88933 ("fscrypt: Switch to sync_skcipher and on-stack requests") eliminated the dynamic allocation of crypto requests, the only remaining dynamic memory allocation done by fscrypt_encrypt_pagecache_blocks() is the bounce page allocation. The bounce page is allocated from a mempool. Mempool allocations with GFP_NOFS never fail. Therefore, fscrypt_encrypt_pagecache_blocks() can no longer return -ENOMEM when passed GFP_NOFS. Remove the now-unreachable code from f2fs_encrypt_one_page(). Suggested-by: Vlastimil Babka Link: https://lore.kernel.org/all/d9dc2ee1-283d-4467-ad36-a6a4aa557589@suse.cz/ Signed-off-by: Eric Biggers Acked-by: Vlastimil Babka (SUSE) Reviewed-by: Christoph Hellwig Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 338df7a2aea6..400f0400e13d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2787,7 +2787,6 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio) struct inode *inode = fio_inode(fio); struct folio *mfolio; struct page *page; - gfp_t gfp_flags = GFP_NOFS; if (!f2fs_encrypted_file(inode)) return 0; @@ -2797,19 +2796,10 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio) if (fscrypt_inode_uses_inline_crypto(inode)) return 0; -retry_encrypt: fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page_folio(page), - PAGE_SIZE, 0, gfp_flags); - if (IS_ERR(fio->encrypted_page)) { - /* flush pending IOs and wait for a while in the ENOMEM case */ - if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { - f2fs_flush_merged_writes(fio->sbi); - memalloc_retry_wait(GFP_NOFS); - gfp_flags |= __GFP_NOFAIL; - goto retry_encrypt; - } + PAGE_SIZE, 0, GFP_NOFS); + if (IS_ERR(fio->encrypted_page)) return PTR_ERR(fio->encrypted_page); - } mfolio = filemap_lock_folio(META_MAPPING(fio->sbi), fio->old_blkaddr); if (!IS_ERR(mfolio)) { From 3cf11e6f36c170050c12171dd6fd3142711478fc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Mar 2026 16:22:31 +0800 Subject: [PATCH 02/35] f2fs: fix to avoid memory leak in f2fs_rename() syzbot reported a f2fs bug as below: BUG: memory leak unreferenced object 0xffff888127f70830 (size 16): comm "syz.0.23", pid 6144, jiffies 4294943712 hex dump (first 16 bytes): 3c af 57 72 5b e6 8f ad 6e 8e fd 33 42 39 03 ff <.Wr[...n..3B9.. backtrace (crc 925f8a80): kmemleak_alloc_recursive include/linux/kmemleak.h:44 [inline] slab_post_alloc_hook mm/slub.c:4520 [inline] slab_alloc_node mm/slub.c:4844 [inline] __do_kmalloc_node mm/slub.c:5237 [inline] __kmalloc_noprof+0x3bd/0x560 mm/slub.c:5250 kmalloc_noprof include/linux/slab.h:954 [inline] fscrypt_setup_filename+0x15e/0x3b0 fs/crypto/fname.c:364 f2fs_setup_filename+0x52/0xb0 fs/f2fs/dir.c:143 f2fs_rename+0x159/0xca0 fs/f2fs/namei.c:961 f2fs_rename2+0xd5/0xf20 fs/f2fs/namei.c:1308 vfs_rename+0x7ff/0x1250 fs/namei.c:6026 filename_renameat2+0x4f4/0x660 fs/namei.c:6144 __do_sys_renameat2 fs/namei.c:6173 [inline] __se_sys_renameat2 fs/namei.c:6168 [inline] __x64_sys_renameat2+0x59/0x80 fs/namei.c:6168 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xe2/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f The root cause is in commit 40b2d55e0452 ("f2fs: fix to create selinux label during whiteout initialization"), we added a call to f2fs_setup_filename() without a matching call to f2fs_free_filename(), fix it. Fixes: 40b2d55e0452 ("f2fs: fix to create selinux label during whiteout initialization") Cc: stable@kernel.org Reported-by: syzbot+cf7946ab25b21abc4b66@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/69a75fe1.a70a0220.b118c.0014.GAE@google.com Suggested-by: Eric Biggers Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e360f08a9586..6ef21deeef1c 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -964,6 +964,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, return err; err = f2fs_create_whiteout(idmap, old_dir, &whiteout, &fname); + f2fs_free_filename(&fname); if (err) return err; } From 5604129b6504c2d6dfbc02515c43e6186a1285e7 Mon Sep 17 00:00:00 2001 From: liujinbao1 Date: Fri, 13 Feb 2026 20:26:30 +0800 Subject: [PATCH 03/35] f2fs:Fix incomplete search range in f2fs_get_victim when f2fs_need_rand_seg is enabled During the f2fs_get_victim process, when the f2fs_need_rand_seg is enabled in select_policy, p->offset is a random value, and the search range is from p->offset to MAIN_SECS. When segno >= last_segment, the loop breaks and exits directly without searching the range from 0 to p->offset.This results in an incomplete search when the random offset is not zero. Signed-off-by: liujinbao1 Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f46b2673d31f..d15e122b470c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -316,10 +316,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->max_search = sbi->max_victim_search; /* let's select beginning hot/small space first. */ - if (f2fs_need_rand_seg(sbi)) + if (f2fs_need_rand_seg(sbi)) { p->offset = get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); - else if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + SIT_I(sbi)->last_victim[p->gc_mode] = p->offset; + } else if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) p->offset = 0; else p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; From 68a0178981a0f493295afa29f8880246e561494c Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 3 Feb 2026 21:36:35 +0800 Subject: [PATCH 04/35] f2fs: fix incorrect file address mapping when inline inode is unwritten When `fileinfo->fi_flags` does not have the `FIEMAP_FLAG_SYNC` bit set and inline data has not been persisted yet, the physical address of the extent is calculated incorrectly for unwritten inline inodes. root@vm:/mnt/f2fs# dd if=/dev/zero of=data.3k bs=3k count=1 root@vm:/mnt/f2fs# f2fs_io fiemap 0 100 data.3k Fiemap: offset = 0 len = 100 logical addr. physical addr. length flags 0 0000000000000000 00000ffffffff16c 0000000000000c00 00000301 This patch fixes the issue by checking if the inode's address is valid. If the inline inode is unwritten, set the physical address to 0 and mark the extent with `FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_DELALLOC` flags. Cc: stable@kernel.org Fixes: 67f8cf3cee6f ("f2fs: support fiemap for inline_data") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 0a1052d5ee62..86d2abbb40ff 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -792,7 +792,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, int f2fs_inline_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { - __u64 byteaddr, ilen; + __u64 byteaddr = 0, ilen; __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED | FIEMAP_EXTENT_LAST; struct node_info ni; @@ -825,9 +825,14 @@ int f2fs_inline_data_fiemap(struct inode *inode, if (err) goto out; - byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; - byteaddr += (char *)inline_data_addr(inode, ifolio) - - (char *)F2FS_INODE(ifolio); + if (__is_valid_data_blkaddr(ni.blk_addr)) { + byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; + byteaddr += (char *)inline_data_addr(inode, ifolio) - + (char *)F2FS_INODE(ifolio); + } else { + f2fs_bug_on(F2FS_I_SB(inode), ni.blk_addr != NEW_ADDR); + flags |= FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN; + } err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err); out: From 265dccda706667b9c2b6d690636db1df1f751948 Mon Sep 17 00:00:00 2001 From: liujinbao1 Date: Fri, 27 Feb 2026 11:02:54 +0800 Subject: [PATCH 05/35] f2fs: Add defrag_blocks sysfs node Add the defrag_blocks sysfs node to track the amount of data blocks moved during filesystem defragmentation. Signed-off-by: Sheng Yong Signed-off-by: liujinbao1 Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/debug.c | 1 + fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/file.c | 4 +++- fs/f2fs/sysfs.c | 10 ++++++++++ 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index c1d2b3fd9c65..423ec40e2e4e 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -407,6 +407,12 @@ Contact: "Hridya Valsaraju" Description: Average number of valid blocks. Available when CONFIG_F2FS_STAT_FS=y. +What: /sys/fs/f2fs//defrag_blocks +Date: February 2026 +Contact: "Jinbao Liu" +Description: Number of blocks moved by defragment. + Available when CONFIG_F2FS_STAT_FS=y. + What: /sys/fs/f2fs//mounted_time_sec Date: February 2020 Contact: "Jaegeuk Kim" diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8e1040e375a7..af88db8fdb71 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -659,6 +659,7 @@ static int stat_show(struct seq_file *s, void *v) si->bg_node_blks); seq_printf(s, "BG skip : IO: %u, Other: %u\n", si->io_skip_bggc, si->other_skip_bggc); + seq_printf(s, "defrag blocks : %u\n", si->defrag_blks); seq_puts(s, "\nExtent Cache (Read):\n"); seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n", si->hit_largest, si->hit_cached[EX_READ], diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bb34e864d0ef..dbf23cb2c501 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4288,6 +4288,7 @@ struct f2fs_stat_info { int gc_secs[2][2]; int tot_blks, data_blks, node_blks; int bg_data_blks, bg_node_blks; + unsigned int defrag_blks; int blkoff[NR_CURSEG_TYPE]; int curseg[NR_CURSEG_TYPE]; int cursec[NR_CURSEG_TYPE]; @@ -4422,6 +4423,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) si->bg_node_blks += ((gc_type) == BG_GC) ? (blks) : 0; \ } while (0) +#define stat_inc_defrag_blk_count(sbi, blks) \ + (F2FS_STAT(sbi)->defrag_blks += (blks)) + int f2fs_build_stats(struct f2fs_sb_info *sbi); void f2fs_destroy_stats(struct f2fs_sb_info *sbi); void __init f2fs_create_root_stats(void); @@ -4463,6 +4467,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #define stat_inc_tot_blk_count(si, blks) do { } while (0) #define stat_inc_data_blk_count(sbi, blks, gc_type) do { } while (0) #define stat_inc_node_blk_count(sbi, blks, gc_type) do { } while (0) +#define stat_inc_defrag_blk_count(sbi, blks) do { } while (0) static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c8a2f17a8f11..2c4880f24b54 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3043,8 +3043,10 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, clear_inode_flag(inode, FI_OPU_WRITE); unlock_out: inode_unlock(inode); - if (!err) + if (!err) { range->len = (u64)total << PAGE_SHIFT; + stat_inc_defrag_blk_count(sbi, total); + } return err; } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 5fbfdc96e502..969e06b65b04 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -338,6 +338,14 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a, f2fs_update_sit_info(sbi); return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->avg_vblocks)); } + +static ssize_t defrag_blocks_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->defrag_blks)); +} #endif static ssize_t main_blkaddr_show(struct f2fs_attr *a, @@ -1351,6 +1359,7 @@ F2FS_GENERAL_RO_ATTR(gc_mode); F2FS_GENERAL_RO_ATTR(moved_blocks_background); F2FS_GENERAL_RO_ATTR(moved_blocks_foreground); F2FS_GENERAL_RO_ATTR(avg_vblocks); +F2FS_GENERAL_RO_ATTR(defrag_blocks); #endif #ifdef CONFIG_FS_ENCRYPTION @@ -1473,6 +1482,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(moved_blocks_foreground), ATTR_LIST(moved_blocks_background), ATTR_LIST(avg_vblocks), + ATTR_LIST(defrag_blocks), #endif #ifdef CONFIG_BLK_DEV_ZONED ATTR_LIST(unusable_blocks_per_sec), From 570e2ccc7cb35fe720106964e65060602d3d2ac4 Mon Sep 17 00:00:00 2001 From: Jianan Huang Date: Thu, 5 Mar 2026 09:18:10 +0800 Subject: [PATCH 06/35] f2fs: avoid reading already updated pages during GC We found the following issue during fuzz testing: page: refcount:3 mapcount:0 mapping:00000000b6e89c65 index:0x18b2dc pfn:0x161ba9 memcg:f8ffff800e269c00 aops:f2fs_meta_aops ino:2 flags: 0x52880000000080a9(locked|waiters|uptodate|lru|private|zone=1|kasantag=0x4a) raw: 52880000000080a9 fffffffec6e17588 fffffffec0ccc088 a7ffff8067063618 raw: 000000000018b2dc 0000000000000009 00000003ffffffff f8ffff800e269c00 page dumped because: VM_BUG_ON_FOLIO(folio_test_uptodate(folio)) page_owner tracks the page as allocated post_alloc_hook+0x58c/0x5ec prep_new_page+0x34/0x284 get_page_from_freelist+0x2dcc/0x2e8c __alloc_pages_noprof+0x280/0x76c __folio_alloc_noprof+0x18/0xac __filemap_get_folio+0x6bc/0xdc4 pagecache_get_page+0x3c/0x104 do_garbage_collect+0x5c78/0x77a4 f2fs_gc+0xd74/0x25f0 gc_thread_func+0xb28/0x2930 kthread+0x464/0x5d8 ret_from_fork+0x10/0x20 ------------[ cut here ]------------ kernel BUG at mm/filemap.c:1563! folio_end_read+0x140/0x168 f2fs_finish_read_bio+0x5c4/0xb80 f2fs_read_end_io+0x64c/0x708 bio_endio+0x85c/0x8c0 blk_update_request+0x690/0x127c scsi_end_request+0x9c/0xb8c scsi_io_completion+0xf0/0x250 scsi_finish_command+0x430/0x45c scsi_complete+0x178/0x6d4 blk_mq_complete_request+0xcc/0x104 scsi_done_internal+0x214/0x454 scsi_done+0x24/0x34 which is similar to the problem reported by syzbot: https://syzkaller.appspot.com/bug?extid=3686758660f980b402dc This case is consistent with the description in commit 9bf1a3f ("f2fs: avoid GC causing encrypted file corrupted"): Page 1 is moved from blkaddr A to blkaddr B by move_data_block, and after being written it is marked as uptodate. Then, Page 1 is moved from blkaddr B to blkaddr C, VM_BUG_ON_FOLIO was triggered in the endio initiated by ra_data_block. There is no need to read Page 1 again from blkaddr B, since it has already been updated. Therefore, avoid initiating I/O in this case. Fixes: 6aa58d8ad20a ("f2fs: readahead encrypted block during GC") Signed-off-by: Jianan Huang Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d15e122b470c..80b8500fa987 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1231,7 +1231,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) .encrypted_page = NULL, .in_list = 0, }; - int err; + int err = 0; folio = f2fs_grab_cache_folio(mapping, index, true); if (IS_ERR(folio)) @@ -1284,6 +1284,9 @@ static int ra_data_block(struct inode *inode, pgoff_t index) fio.encrypted_page = &efolio->page; + if (folio_test_uptodate(efolio)) + goto put_encrypted_page; + err = f2fs_submit_page_bio(&fio); if (err) goto put_encrypted_page; From 2d9c4a4ed4eef1f82c5b16b037aee8bad819fd53 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Fri, 27 Feb 2026 15:30:52 +0800 Subject: [PATCH 07/35] f2fs: fix UAF caused by decrementing sbi->nr_pages[] in f2fs_write_end_io() The xfstests case "generic/107" and syzbot have both reported a NULL pointer dereference. The concurrent scenario that triggers the panic is as follows: F2FS_WB_CP_DATA write callback umount - f2fs_write_checkpoint - f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA) - blk_mq_end_request - bio_endio - f2fs_write_end_io : dec_page_count(sbi, F2FS_WB_CP_DATA) : wake_up(&sbi->cp_wait) - kill_f2fs_super - kill_block_super - f2fs_put_super : iput(sbi->node_inode) : sbi->node_inode = NULL : f2fs_in_warm_node_list - is_node_folio // sbi->node_inode is NULL and panic The root cause is that f2fs_put_super() calls iput(sbi->node_inode) and sets sbi->node_inode to NULL after sbi->nr_pages[F2FS_WB_CP_DATA] is decremented to zero. As a result, f2fs_in_warm_node_list() may dereference a NULL node_inode when checking whether a folio belongs to the node inode, leading to a panic. This patch fixes the issue by calling f2fs_in_warm_node_list() before decrementing sbi->nr_pages[F2FS_WB_CP_DATA], thus preventing the use-after-free condition. Cc: stable@kernel.org Fixes: 50fa53eccf9f ("f2fs: fix to avoid broken of dnode block list") Reported-by: syzbot+6e4cb1cac5efc96ea0ca@syzkaller.appspotmail.com Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 400f0400e13d..57fc9bad31bf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -386,6 +386,8 @@ static void f2fs_write_end_io(struct bio *bio) folio->index, NODE_TYPE_REGULAR, true); f2fs_bug_on(sbi, folio->index != nid_of_node(folio)); } + if (f2fs_in_warm_node_list(sbi, folio)) + f2fs_del_fsync_node_entry(sbi, folio); dec_page_count(sbi, type); @@ -397,8 +399,6 @@ static void f2fs_write_end_io(struct bio *bio) wq_has_sleeper(&sbi->cp_wait)) wake_up(&sbi->cp_wait); - if (f2fs_in_warm_node_list(sbi, folio)) - f2fs_del_fsync_node_entry(sbi, folio); folio_clear_f2fs_gcing(folio); folio_end_writeback(folio); } From 1eaf7ee2e682cfd9f9fd48272d50ff5d3a88e9bc Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Fri, 27 Feb 2026 15:30:54 +0800 Subject: [PATCH 08/35] f2fs: drop unused sbi parameter from f2fs_in_warm_node_list() The sbi parameter in f2fs_in_warm_node_list() is not used. Remove it to simplify the function. Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 4 ++-- fs/f2fs/segment.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 57fc9bad31bf..e3c94c0ad05d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -386,7 +386,7 @@ static void f2fs_write_end_io(struct bio *bio) folio->index, NODE_TYPE_REGULAR, true); f2fs_bug_on(sbi, folio->index != nid_of_node(folio)); } - if (f2fs_in_warm_node_list(sbi, folio)) + if (f2fs_in_warm_node_list(folio)) f2fs_del_fsync_node_entry(sbi, folio); dec_page_count(sbi, type); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dbf23cb2c501..8942b2a63cfd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3921,7 +3921,7 @@ enum node_type; int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type); -bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio); +bool f2fs_in_warm_node_list(struct folio *folio); void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi); void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct folio *folio); void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 74992fd9c9b6..bbfa677ef46f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -325,7 +325,7 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, start, nr); } -bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio) +bool f2fs_in_warm_node_list(struct folio *folio) { return is_node_folio(folio) && IS_DNODE(folio) && is_cold_node(folio); } @@ -1810,7 +1810,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted } /* should add to global list before clearing PAGECACHE status */ - if (f2fs_in_warm_node_list(sbi, folio)) { + if (f2fs_in_warm_node_list(folio)) { seq = f2fs_add_fsync_node_entry(sbi, folio); if (seq_id) *seq_id = seq; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6a97fe76712b..23faf6725632 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3980,7 +3980,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host)) fscrypt_finalize_bounce_page(&fio->encrypted_page); folio_end_writeback(folio); - if (f2fs_in_warm_node_list(fio->sbi, folio)) + if (f2fs_in_warm_node_list(folio)) f2fs_del_fsync_node_entry(fio->sbi, folio); f2fs_bug_on(fio->sbi, !is_set_ckpt_flags(fio->sbi, CP_ERROR_FLAG)); From 39d4ee19c1e7d753dd655aebee632271b171f43a Mon Sep 17 00:00:00 2001 From: George Saad Date: Mon, 23 Mar 2026 11:21:23 +0000 Subject: [PATCH 09/35] f2fs: fix use-after-free of sbi in f2fs_compress_write_end_io() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In f2fs_compress_write_end_io(), dec_page_count(sbi, type) can bring the F2FS_WB_CP_DATA counter to zero, unblocking f2fs_wait_on_all_pages() in f2fs_put_super() on a concurrent unmount CPU. The unmount path then proceeds to call f2fs_destroy_page_array_cache(sbi), which destroys sbi->page_array_slab via kmem_cache_destroy(), and eventually kfree(sbi). Meanwhile, the bio completion callback is still executing: when it reaches page_array_free(sbi, ...), it dereferences sbi->page_array_slab — a destroyed slab cache — to call kmem_cache_free(), causing a use-after-free. This is the same class of bug as CVE-2026-23234 (which fixed the equivalent race in f2fs_write_end_io() in data.c), but in the compressed writeback completion path that was not covered by that fix. Fix this by moving dec_page_count() to after page_array_free(), so that all sbi accesses complete before the counter decrement that can unblock unmount. For non-last folios (where atomic_dec_return on cic->pending_pages is nonzero), dec_page_count is called immediately before returning — page_array_free is not reached on this path, so there is no post-decrement sbi access. For the last folio, page_array_free runs while the F2FS_WB_CP_DATA counter is still nonzero (this folio has not yet decremented it), keeping sbi alive, and dec_page_count runs as the final operation. Fixes: 4c8ff7095bef ("f2fs: support data compression") Cc: stable@vger.kernel.org Signed-off-by: George Saad Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 8c76400ba631..aa8ba4cdfe34 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1491,10 +1491,10 @@ void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio) f2fs_compress_free_page(page); - dec_page_count(sbi, type); - - if (atomic_dec_return(&cic->pending_pages)) + if (atomic_dec_return(&cic->pending_pages)) { + dec_page_count(sbi, type); return; + } for (i = 0; i < cic->nr_rpages; i++) { WARN_ON(!cic->rpages[i]); @@ -1504,6 +1504,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio) page_array_free(sbi, cic->rpages, cic->nr_rpages); kmem_cache_free(cic_entry_slab, cic); + + /* + * Make sure dec_page_count() is the last access to sbi. + * Once it drops the F2FS_WB_CP_DATA counter to zero, the + * unmount thread can proceed to destroy sbi and + * sbi->page_array_slab. + */ + dec_page_count(sbi, type); } static int f2fs_write_raw_pages(struct compress_ctx *cc, From eb2ca3ca983551a80e16a4a25df5a4ce59df8484 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Mon, 23 Mar 2026 20:06:22 +0800 Subject: [PATCH 10/35] f2fs: fix incorrect multidevice info in trace_f2fs_map_blocks() When f2fs_map_blocks()->f2fs_map_blocks_cached() hits the read extent cache, map->m_multidev_dio is not updated, which leads to incorrect multidevice information being reported by trace_f2fs_map_blocks(). This patch updates map->m_multidev_dio in f2fs_map_blocks_cached() when the read extent cache is hit. Cc: stable@kernel.org Fixes: 0094e98bd147 ("f2fs: factor a f2fs_map_blocks_cached helper") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e3c94c0ad05d..a690442b7440 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1575,7 +1575,8 @@ static bool f2fs_map_blocks_cached(struct inode *inode, f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); - if (f2fs_allow_multi_device_dio(sbi, flag)) { + map->m_multidev_dio = f2fs_allow_multi_device_dio(sbi, flag); + if (map->m_multidev_dio) { int bidx = f2fs_target_device_index(sbi, map->m_pblk); struct f2fs_dev_info *dev = &sbi->devs[bidx]; From 95e159ad3e52f7478cfd22e44ec37c9f334f8993 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Mon, 23 Mar 2026 20:06:24 +0800 Subject: [PATCH 11/35] f2fs: fix fiemap boundary handling when read extent cache is incomplete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit f2fs_fiemap() calls f2fs_map_blocks() to obtain the block mapping a file, and then merges contiguous mappings into extents. If the mapping is found in the read extent cache, node blocks do not need to be read. However, in the following scenario, a contiguous extent can be split into two extents: $ dd if=/dev/zero of=data.128M bs=1M count=128 $ losetup -f data.128M $ mkfs.f2fs /dev/loop0 -f $ mount -o mode=lfs /dev/loop0 /mnt/f2fs/ $ cd /mnt/f2fs/ $ dd if=/dev/zero of=data.72M bs=1M count=72 && sync $ dd if=/dev/zero of=data.4M bs=1M count=4 && sync $ dd if=/dev/zero of=data.4M bs=1M count=2 seek=2 conv=notrunc && sync $ echo 3 > /proc/sys/vm/drop_caches $ dd if=/dev/zero of=data.4M bs=1M count=2 seek=0 conv=notrunc && sync $ dd if=/dev/zero of=data.4M bs=1M count=2 seek=0 conv=notrunc && sync $ f2fs_io fiemap 0 1024 data.4M Fiemap: offset = 0 len = 1024 logical addr. physical addr. length flags 0 0000000000000000 0000000006400000 0000000000200000 00001000 1 0000000000200000 0000000006600000 0000000000200000 00001001 Although the physical addresses of the ranges 0~2MB and 2M~4MB are contiguous, the mapping for the 2M~4MB range is not present in memory. When the physical addresses for the 0~2MB range are updated, no merge happens because the adjacent mapping is missing from the in-memory cache. As a result, fiemap reports two separate extents instead of a single contiguous one. The root cause is that the read extent cache does not guarantee that all blocks of an extent are present in memory. Therefore, when the extent length returned by f2fs_map_blocks_cached() is smaller than maxblocks, the remaining mappings are retrieved via f2fs_get_dnode_of_data() to ensure correct fiemap extent boundary handling. Cc: stable@kernel.org Fixes: cd8fc5226bef ("f2fs: remove the create argument to f2fs_map_blocks") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a690442b7440..0e108c701aa3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1636,8 +1636,26 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && map->m_may_create); - if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) - goto out; + if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) { + struct extent_info ei; + + /* + * 1. If map->m_multidev_dio is true, map->m_pblk cannot be + * waitted by f2fs_wait_on_block_writeback_range() and are not + * mergeable. + * 2. If pgofs hits the read extent cache, it means the mapping + * is already cached in the extent cache, but it is not + * mergeable, and there is no need to query the mapping again + * via f2fs_get_dnode_of_data(). + */ + pgofs = (pgoff_t)map->m_lblk + map->m_len; + if (map->m_len == maxblocks || + map->m_multidev_dio || + f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) + goto out; + ofs = map->m_len; + goto map_more; + } map->m_bdev = inode->i_sb->s_bdev; map->m_multidev_dio = @@ -1648,7 +1666,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) /* it only supports block size == page size */ pgofs = (pgoff_t)map->m_lblk; - end = pgofs + maxblocks; +map_more: + end = (pgoff_t)map->m_lblk + maxblocks; if (flag == F2FS_GET_BLOCK_PRECACHE) mode = LOOKUP_NODE_RA; From 1e134c33b931a1b082605b15116403571dab6bbb Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Thu, 19 Mar 2026 16:35:27 +0800 Subject: [PATCH 12/35] f2fs: drop unused ri parameter from truncate_partial_nodes() The ri parameter in truncate_partial_nodes() is unused. Remove it along with the related code. No logical changes. Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bbfa677ef46f..f04d3ac189cd 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1113,7 +1113,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, } static int truncate_partial_nodes(struct dnode_of_data *dn, - struct f2fs_inode *ri, int *offset, int depth) + int *offset, int depth) { struct folio *folios[2]; nid_t nid[3]; @@ -1184,7 +1184,6 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) int err = 0, cont = 1; int level, offset[4], noffset[4]; unsigned int nofs = 0; - struct f2fs_inode *ri; struct dnode_of_data dn; struct folio *folio; @@ -1212,7 +1211,6 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) set_new_dnode(&dn, inode, folio, NULL, 0); folio_unlock(folio); - ri = F2FS_INODE(folio); switch (level) { case 0: case 1: @@ -1222,7 +1220,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) nofs = noffset[1]; if (!offset[level - 1]) goto skip_partial; - err = truncate_partial_nodes(&dn, ri, offset, level); + err = truncate_partial_nodes(&dn, offset, level); if (err < 0 && err != -ENOENT) goto fail; nofs += 1 + NIDS_PER_BLOCK; @@ -1231,7 +1229,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) nofs = 5 + 2 * NIDS_PER_BLOCK; if (!offset[level - 1]) goto skip_partial; - err = truncate_partial_nodes(&dn, ri, offset, level); + err = truncate_partial_nodes(&dn, offset, level); if (err < 0 && err != -ENOENT) goto fail; break; From 5471834a96fb697874be2ca0b052e74bcf3c23d1 Mon Sep 17 00:00:00 2001 From: Cen Zhang Date: Wed, 18 Mar 2026 15:32:53 +0800 Subject: [PATCH 13/35] f2fs: add READ_ONCE() for i_blocks in f2fs_update_inode() f2fs_update_inode() reads inode->i_blocks without holding i_lock to serialize it to the on-disk inode, while concurrent truncate or allocation paths may modify i_blocks under i_lock. Since blkcnt_t is u64, this risks torn reads on 32-bit architectures. Following the approach in ext4_inode_blocks_set(), add READ_ONCE() to prevent potential compiler-induced tearing. Fixes: 19f99cee206c ("f2fs: add core inode operations") Cc: stable@vger.kernel.org Signed-off-by: Cen Zhang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index e0f850b3f0c3..89240be8cc59 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -687,7 +687,7 @@ void f2fs_update_inode(struct inode *inode, struct folio *node_folio) ri->i_uid = cpu_to_le32(i_uid_read(inode)); ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); - ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1); + ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(READ_ONCE(inode->i_blocks)) + 1); if (!f2fs_is_atomic_file(inode) || is_inode_flag_set(inode, FI_ATOMIC_COMMITTED)) From bd882ffdd48a200ca2faa7c3e690ecf765784b16 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Mar 2026 16:38:32 +0800 Subject: [PATCH 14/35] f2fs: call f2fs_handle_critical_error() to set cp_error flag f2fs_handle_page_eio() is the only left place we set CP_ERROR_FLAG directly, it missed to update superblock.s_stop_reason, let's call f2fs_handle_critical_error() instead to fix that. Introduce STOP_CP_REASON_READ_{META,NODE,DATA} stop_cp_reason enum variable to indicate which kind of data we failed to read. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 21 +++++++++++++++++++-- include/linux/f2fs_fs.h | 3 +++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8942b2a63cfd..931f8394bb18 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -5070,8 +5070,25 @@ static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, return; if (ofs == sbi->page_eio_ofs[type]) { - if (sbi->page_eio_cnt[type]++ == MAX_RETRY_PAGE_EIO) - set_ckpt_flags(sbi, CP_ERROR_FLAG); + if (sbi->page_eio_cnt[type]++ == MAX_RETRY_PAGE_EIO) { + enum stop_cp_reason stop_reason; + + switch (type) { + case META: + stop_reason = STOP_CP_REASON_READ_META; + break; + case NODE: + stop_reason = STOP_CP_REASON_READ_NODE; + break; + case DATA: + stop_reason = STOP_CP_REASON_READ_DATA; + break; + default: + f2fs_bug_on(sbi, 1); + return; + } + f2fs_handle_critical_error(sbi, stop_reason); + } } else { sbi->page_eio_ofs[type] = ofs; sbi->page_eio_cnt[type] = 0; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index dc41722fcc9d..829a59399dac 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -80,6 +80,9 @@ enum stop_cp_reason { STOP_CP_REASON_NO_SEGMENT, STOP_CP_REASON_CORRUPTED_FREE_BITMAP, STOP_CP_REASON_CORRUPTED_NID, + STOP_CP_REASON_READ_META, + STOP_CP_REASON_READ_NODE, + STOP_CP_REASON_READ_DATA, STOP_CP_REASON_MAX, }; From be09d78b6d540032fd3841c2708061e13043d7e8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Mar 2026 16:38:33 +0800 Subject: [PATCH 15/35] f2fs: use more generic f2fs_stop_checkpoint() Let's use more generic f2fs_stop_checkpoint() instead of f2fs_handle_critical_error() to handle critical error in f2fs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 9 --------- fs/f2fs/f2fs.h | 3 +-- fs/f2fs/node.c | 2 +- fs/f2fs/super.c | 13 ++++++++++++- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6dd39b7de11a..01e1ba77263e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -232,15 +232,6 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) static struct kmem_cache *ino_entry_slab; struct kmem_cache *f2fs_inode_entry_slab; -void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, - unsigned char reason) -{ - f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL); - if (!end_io) - f2fs_flush_merged_writes(sbi); - f2fs_handle_critical_error(sbi, reason); -} - /* * We guarantee no failure on the returned page. */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 931f8394bb18..f2580faa0763 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3902,7 +3902,6 @@ int f2fs_do_quota_sync(struct super_block *sb, int type); loff_t max_file_blocks(struct inode *inode); void f2fs_quota_off_umount(struct super_block *sb); void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag); -void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason); void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); @@ -5087,7 +5086,7 @@ static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, 1); return; } - f2fs_handle_critical_error(sbi, stop_reason); + f2fs_stop_checkpoint(sbi, false, stop_reason); } } else { sbi->page_eio_ofs[type] = ofs; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f04d3ac189cd..31085d659ccc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1774,7 +1774,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted if (f2fs_sanity_check_node_footer(sbi, folio, nid, NODE_TYPE_REGULAR, false)) { - f2fs_handle_critical_error(sbi, STOP_CP_REASON_CORRUPTED_NID); + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_NID); goto redirty_out; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8774c60b4be4..a9adb6198184 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4650,7 +4650,8 @@ static bool system_going_down(void) || system_state == SYSTEM_RESTART; } -void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason) +static void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, + unsigned char reason) { struct super_block *sb = sbi->sb; bool shutdown = reason == STOP_CP_REASON_SHUTDOWN; @@ -4707,6 +4708,16 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason) */ } +void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, + unsigned char reason) +{ + f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL); + if (!end_io) + f2fs_flush_merged_writes(sbi); + f2fs_handle_critical_error(sbi, reason); +} + + static void f2fs_record_error_work(struct work_struct *work) { struct f2fs_sb_info *sbi = container_of(work, From 92c20989366e023b74fa0c1028af9436c1917dbf Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Wed, 18 Mar 2026 16:45:32 +0800 Subject: [PATCH 16/35] f2fs: refactor f2fs_move_node_folio function This patch refactor the f2fs_move_node_folio() function. No logical changes. Cc: stable@kernel.org Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 56 +++++++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 31085d659ccc..b8e5cadbab3a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1841,41 +1841,51 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted return false; } -int f2fs_move_node_folio(struct folio *node_folio, int gc_type) +static int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, + bool mark_dirty, enum iostat_type io_type) { int err = 0; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 1, + }; - if (gc_type == FG_GC) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 1, - }; - - f2fs_folio_wait_writeback(node_folio, NODE, true, true); - - folio_mark_dirty(node_folio); - - if (!folio_clear_dirty_for_io(node_folio)) { - err = -EAGAIN; - goto out_page; - } - - if (!__write_node_folio(node_folio, false, NULL, - &wbc, false, FS_GC_NODE_IO, NULL)) - err = -EAGAIN; - goto release_page; - } else { + if (!sync_mode) { /* set page dirty and write it */ if (!folio_test_writeback(node_folio)) folio_mark_dirty(node_folio); + goto out_folio; } -out_page: + + f2fs_folio_wait_writeback(node_folio, NODE, true, true); + + if (mark_dirty) + folio_mark_dirty(node_folio); + else if (!folio_test_dirty(node_folio)) + goto out_folio; + + if (!folio_clear_dirty_for_io(node_folio)) { + err = -EAGAIN; + goto out_folio; + } + + if (!__write_node_folio(node_folio, false, NULL, + &wbc, false, FS_GC_NODE_IO, NULL)) + err = -EAGAIN; + goto release_folio; +out_folio: folio_unlock(node_folio); -release_page: +release_folio: f2fs_folio_put(node_folio, false); return err; } +int f2fs_move_node_folio(struct folio *node_folio, int gc_type) +{ + return f2fs_write_single_node_folio(node_folio, gc_type == FG_GC, + true, FS_GC_NODE_IO); +} + int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, unsigned int *seq_id) From 68cb1a6bf3895dedc8540caf2d459b7d9249b3b0 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Wed, 18 Mar 2026 16:45:33 +0800 Subject: [PATCH 17/35] f2fs: refactor node footer flag setting related code This patch refactors the node footer flag setting code to simplify redundant logic and adjust function parameters and return types. No logical changes. Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 2 +- fs/f2fs/node.h | 23 +++++++++++------------ 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f2580faa0763..04891aaf476f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3924,7 +3924,7 @@ bool f2fs_in_warm_node_list(struct folio *folio); void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi); void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct folio *folio); void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi); -int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); +bool f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b8e5cadbab3a..e027c388207f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -391,7 +391,7 @@ void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi) spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); } -int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) +bool f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 824ac9f0e6e4..bcf2034e4263 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -400,27 +400,26 @@ static inline int is_node(const struct folio *folio, int type) #define is_fsync_dnode(folio) is_node(folio, FSYNC_BIT_SHIFT) #define is_dent_dnode(folio) is_node(folio, DENT_BIT_SHIFT) -static inline void set_cold_node(const struct folio *folio, bool is_dir) +static inline void __set_mark(const struct folio *folio, bool mark, int type) { struct f2fs_node *rn = F2FS_NODE(folio); unsigned int flag = le32_to_cpu(rn->footer.flag); - if (is_dir) - flag &= ~BIT(COLD_BIT_SHIFT); - else - flag |= BIT(COLD_BIT_SHIFT); - rn->footer.flag = cpu_to_le32(flag); -} - -static inline void set_mark(struct folio *folio, int mark, int type) -{ - struct f2fs_node *rn = F2FS_NODE(folio); - unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) flag |= BIT(type); else flag &= ~BIT(type); rn->footer.flag = cpu_to_le32(flag); +} + +static inline void set_cold_node(const struct folio *folio, bool is_dir) +{ + __set_mark(folio, !is_dir, COLD_BIT_SHIFT); +} + +static inline void set_mark(struct folio *folio, bool mark, int type) +{ + __set_mark(folio, mark, type); #ifdef CONFIG_F2FS_CHECK_FS f2fs_inode_chksum_set(F2FS_F_SB(folio), folio); From 6af249c996f7d73a3435f9e577956fa259347d18 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 11 Mar 2026 21:35:42 +0800 Subject: [PATCH 18/35] f2fs: fix to do sanity check on dcc->discard_cmd_cnt conditionally Syzbot reported a f2fs bug as below: ------------[ cut here ]------------ kernel BUG at fs/f2fs/segment.c:1900! Oops: invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 1 UID: 0 PID: 6527 Comm: syz.5.110 Not tainted syzkaller #0 PREEMPT_{RT,(full)} Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2026 RIP: 0010:f2fs_issue_discard_timeout+0x59b/0x5a0 fs/f2fs/segment.c:1900 Code: d9 80 e1 07 80 c1 03 38 c1 0f 8c d6 fe ff ff 48 89 df e8 a8 5e fa fd e9 c9 fe ff ff e8 4e 46 94 fd 90 0f 0b e8 46 46 94 fd 90 <0f> 0b 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 RSP: 0018:ffffc9000494f940 EFLAGS: 00010283 RAX: ffffffff843009ca RBX: 0000000000000001 RCX: 0000000000080000 RDX: ffffc9001ca78000 RSI: 00000000000029f3 RDI: 00000000000029f4 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: dffffc0000000000 R11: ffffed100893a431 R12: 1ffff1100893a430 R13: 1ffff1100c2b702c R14: dffffc0000000000 R15: ffff8880449d2160 FS: 00007ffa35fed6c0(0000) GS:ffff88812643d000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2b68634000 CR3: 0000000039f62000 CR4: 00000000003526f0 Call Trace: __f2fs_remount fs/f2fs/super.c:2960 [inline] f2fs_reconfigure+0x108a/0x1710 fs/f2fs/super.c:5443 reconfigure_super+0x227/0x8a0 fs/super.c:1080 do_remount fs/namespace.c:3391 [inline] path_mount+0xdc5/0x10e0 fs/namespace.c:4151 do_mount fs/namespace.c:4172 [inline] __do_sys_mount fs/namespace.c:4361 [inline] __se_sys_mount+0x31d/0x420 fs/namespace.c:4338 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x14d/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ffa37dbda0a The root cause is there will be race condition in between f2fs_ioc_fitrim() and f2fs_remount(): - f2fs_remount - f2fs_ioc_fitrim - f2fs_issue_discard_timeout - __issue_discard_cmd - __drop_discard_cmd - __wait_all_discard_cmd - f2fs_trim_fs - f2fs_write_checkpoint - f2fs_clear_prefree_segments - f2fs_issue_discard - __issue_discard_async - __queue_discard_cmd - __update_discard_tree_range - __insert_discard_cmd - __create_discard_cmd : atomic_inc(&dcc->discard_cmd_cnt); - sanity check on dcc->discard_cmd_cnt (expect discard_cmd_cnt to be zero) This will only happen when fitrim races w/ remount rw, if we remount to readonly filesystem, remount will wait until mnt_pcp.mnt_writers to zero, that means fitrim is not in process at that time. Cc: stable@kernel.org Fixes: 2482c4325dfe ("f2fs: detect bug_on in f2fs_wait_discard_bios") Reported-by: syzbot+62538b67389ee582837a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/69b07d7c.050a0220.8df7.09a1.GAE@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 6 +++--- fs/f2fs/super.c | 11 ++++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 04891aaf476f..df4cdf804376 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3988,7 +3988,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); int f2fs_start_discard_thread(struct f2fs_sb_info *sbi); void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi); void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi); -bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi); +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi, bool need_check); void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 23faf6725632..0bf25786667f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1880,7 +1880,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi) * * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false. */ -bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) +bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi, bool need_check) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_policy dpolicy; @@ -1897,7 +1897,7 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) /* just to make sure there is no pending discard commands */ __wait_all_discard_cmd(sbi, NULL); - f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt)); + f2fs_bug_on(sbi, need_check && atomic_read(&dcc->discard_cmd_cnt)); return !dropped; } @@ -2367,7 +2367,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) * Recovery can cache discard commands, so in error path of * fill_super(), it needs to give a chance to handle them. */ - f2fs_issue_discard_timeout(sbi); + f2fs_issue_discard_timeout(sbi, true); kfree(dcc); SM_I(sbi)->dcc_info = NULL; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a9adb6198184..f626e5ca089d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2009,7 +2009,7 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - done = f2fs_issue_discard_timeout(sbi); + done = f2fs_issue_discard_timeout(sbi, true); if (f2fs_realtime_discard_enable(sbi) && !sbi->discard_blks && done) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, @@ -2152,7 +2152,7 @@ static int f2fs_unfreeze(struct super_block *sb) * will recover after removal of snapshot. */ if (test_opt(sbi, DISCARD) && !f2fs_hw_support_discard(sbi)) - f2fs_issue_discard_timeout(sbi); + f2fs_issue_discard_timeout(sbi, true); clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; @@ -2957,7 +2957,12 @@ static int __f2fs_remount(struct fs_context *fc, struct super_block *sb) need_stop_discard = true; } else { f2fs_stop_discard_thread(sbi); - f2fs_issue_discard_timeout(sbi); + /* + * f2fs_ioc_fitrim() won't race w/ "remount ro" + * so it's safe to check discard_cmd_cnt in + * f2fs_issue_discard_timeout(). + */ + f2fs_issue_discard_timeout(sbi, flags & SB_RDONLY); need_restart_discard = true; } } From 019f9dda7f66e55eb94cd32e1d3fff5835f73fbc Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 10 Mar 2026 17:36:12 +0800 Subject: [PATCH 19/35] f2fs: fix fsck inconsistency caused by incorrect nat_entry flag usage f2fs_need_dentry_mark() reads nat_entry flags without mutual exclusion with the checkpoint path, which can result in an incorrect inode block marking state. The scenario is as follows: create & write & fsync 'file A' write checkpoint - f2fs_do_sync_file // inline inode - f2fs_write_inode // inode folio is dirty - f2fs_write_checkpoint - f2fs_flush_merged_writes - f2fs_sync_node_pages - f2fs_fsync_node_pages // no dirty node - f2fs_need_inode_block_update // return true - f2fs_fsync_node_pages // inode dirtied - f2fs_need_dentry_mark //return true - f2fs_flush_nat_entries - f2fs_write_checkpoint end - __write_node_folio // inode with DENT_BIT_SHIFT set SPO, "fsck --dry-run" find inode has already checkpointed but still with DENT_BIT_SHIFT set The state observed by f2fs_need_dentry_mark() can differ from the state observed in __write_node_folio() after acquiring sbi->node_write. The root cause is that the semantics of IS_CHECKPOINTED and HAS_FSYNCED_INODE are only guaranteed after the checkpoint write has fully completed. This patch moves set_dentry_mark() into __write_node_folio() and protects it with the sbi->node_write lock. Cc: stable@kernel.org Fixes: 88bd02c9472a ("f2fs: fix conditions to remain recovery information in f2fs_sync_file") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e027c388207f..630fd3b43a08 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1799,13 +1799,12 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted goto redirty_out; } - if (atomic) { - if (!test_opt(sbi, NOBARRIER)) - fio.op_flags |= REQ_PREFLUSH | REQ_FUA; - if (IS_INODE(folio)) - set_dentry_mark(folio, + if (atomic && !test_opt(sbi, NOBARRIER)) + fio.op_flags |= REQ_PREFLUSH | REQ_FUA; + + if (IS_INODE(folio) && (atomic || is_fsync_dnode(folio))) + set_dentry_mark(folio, f2fs_need_dentry_mark(sbi, ino_of_node(folio))); - } /* should add to global list before clearing PAGECACHE status */ if (f2fs_in_warm_node_list(folio)) { @@ -1956,9 +1955,6 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, if (is_inode_flag_set(inode, FI_DIRTY_INODE)) f2fs_update_inode(inode, folio); - if (!atomic) - set_dentry_mark(folio, - f2fs_need_dentry_mark(sbi, ino)); } /* may be written by other thread */ if (!folio_test_dirty(folio)) From c3e238bd1f56993f205ef83889d406dfeaf717a8 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Wed, 18 Mar 2026 16:45:34 +0800 Subject: [PATCH 20/35] f2fs: fix fsck inconsistency caused by FGGC of node block During FGGC node block migration, fsck may incorrectly treat the migrated node block as fsync-written data. The reproduction scenario: root@vm:/mnt/f2fs# seq 1 2048 | xargs -n 1 ./test_sync // write inline inode and sync root@vm:/mnt/f2fs# rm -f 1 root@vm:/mnt/f2fs# sync root@vm:/mnt/f2fs# f2fs_io gc_range // move data block in sync mode and not write CP SPO, "fsck --dry-run" find inode has already checkpointed but still with DENT_BIT_SHIFT set The root cause is that GC does not clear the dentry mark and fsync mark during node block migration, leading fsck to misinterpret them as user-issued fsync writes. In BGGC mode, node block migration is handled by f2fs_sync_node_pages(), which guarantees the dentry and fsync marks are cleared before writing. This patch move the set/clear of the fsync|dentry marks into __write_node_folio to make the logic clearer, and ensures the fsync|dentry mark is cleared in FGGC. Cc: stable@kernel.org Fixes: da011cc0da8c ("f2fs: move node pages only in victim section during GC") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 630fd3b43a08..c7499cb52745 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1727,9 +1727,10 @@ static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) return last_folio; } -static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted, - struct writeback_control *wbc, bool do_balance, - enum iostat_type io_type, unsigned int *seq_id) +static bool __write_node_folio(struct folio *folio, bool atomic, bool do_fsync, + bool *submitted, struct writeback_control *wbc, + bool do_balance, enum iostat_type io_type, + unsigned int *seq_id) { struct f2fs_sb_info *sbi = F2FS_F_SB(folio); nid_t nid; @@ -1802,6 +1803,8 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; + set_dentry_mark(folio, false); + set_fsync_mark(folio, do_fsync); if (IS_INODE(folio) && (atomic || is_fsync_dnode(folio))) set_dentry_mark(folio, f2fs_need_dentry_mark(sbi, ino_of_node(folio))); @@ -1868,7 +1871,7 @@ static int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, goto out_folio; } - if (!__write_node_folio(node_folio, false, NULL, + if (!__write_node_folio(node_folio, false, false, NULL, &wbc, false, FS_GC_NODE_IO, NULL)) err = -EAGAIN; goto release_folio; @@ -1915,6 +1918,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, for (i = 0; i < nr_folios; i++) { struct folio *folio = fbatch.folios[i]; bool submitted = false; + bool do_fsync = false; if (unlikely(f2fs_cp_error(sbi))) { f2fs_folio_put(last_folio, false); @@ -1945,11 +1949,8 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, f2fs_folio_wait_writeback(folio, NODE, true, true); - set_fsync_mark(folio, 0); - set_dentry_mark(folio, 0); - if (!atomic || folio == last_folio) { - set_fsync_mark(folio, 1); + do_fsync = true; percpu_counter_inc(&sbi->rf_node_block_count); if (IS_INODE(folio)) { if (is_inode_flag_set(inode, @@ -1966,8 +1967,9 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, if (!__write_node_folio(folio, atomic && folio == last_folio, - &submitted, wbc, true, - FS_NODE_IO, seq_id)) { + do_fsync, &submitted, + wbc, true, FS_NODE_IO, + seq_id)) { f2fs_folio_put(last_folio, false); folio_batch_release(&fbatch); ret = -EIO; @@ -2167,10 +2169,7 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - set_fsync_mark(folio, 0); - set_dentry_mark(folio, 0); - - if (!__write_node_folio(folio, false, &submitted, + if (!__write_node_folio(folio, false, false, &submitted, wbc, do_balance, io_type, NULL)) { folio_batch_release(&fbatch); ret = -EIO; From fe9b8b30b97102859a9102be7bd2a09803bd90bd Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Wed, 18 Mar 2026 16:46:35 +0800 Subject: [PATCH 21/35] f2fs: fix inline data not being written to disk in writeback path When f2fs_fiemap() is called with `fileinfo->fi_flags` containing the FIEMAP_FLAG_SYNC flag, it attempts to write data to disk before retrieving file mappings via filemap_write_and_wait(). However, there is an issue where the file does not get mapped as expected. The following scenario can occur: root@vm:/mnt/f2fs# dd if=/dev/zero of=data.3k bs=3k count=1 root@vm:/mnt/f2fs# xfs_io data.3k -c "fiemap -v 0 4096" data.3k: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..5]: 0..5 6 0x307 The root cause of this issue is that f2fs_write_single_data_page() only calls f2fs_write_inline_data() to copy data from the data folio to the inode folio, and it clears the dirty flag on the data folio. However, it does not mark the data folio as writeback. When __filemap_fdatawait_range() checks for folios with the writeback flag, it returns early, causing f2fs_fiemap() to report that the file has no mapping. To fix this issue, the solution is to call f2fs_write_single_node_folio() in f2fs_inline_data_fiemap() when getting fiemap with FIEMAP_FLAG_SYNC flags. This patch ensures that the inode folio is written back and the writeback process completes before proceeding. Cc: stable@kernel.org Fixes: 9ffe0fb5f3bb ("f2fs: handle inline data operations") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/inline.c | 9 +++++++++ fs/f2fs/node.c | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index df4cdf804376..39b97621456a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3946,6 +3946,8 @@ int f2fs_sanity_check_node_footer(struct f2fs_sb_info *sbi, enum node_type ntype, bool in_irq); struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino); struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid); +int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, + bool mark_dirty, enum iostat_type io_type); int f2fs_move_node_folio(struct folio *node_folio, int gc_type); void f2fs_flush_inline_data(struct f2fs_sb_info *sbi); int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 86d2abbb40ff..62a8a1192a41 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -814,6 +814,15 @@ int f2fs_inline_data_fiemap(struct inode *inode, goto out; } + if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { + err = f2fs_write_single_node_folio(ifolio, true, false, FS_NODE_IO); + if (err) + return err; + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); + } ilen = min_t(size_t, MAX_INLINE_DATA(inode), i_size_read(inode)); if (start >= ilen) goto out; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c7499cb52745..a9cd2803e681 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1843,7 +1843,7 @@ static bool __write_node_folio(struct folio *folio, bool atomic, bool do_fsync, return false; } -static int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, +int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode, bool mark_dirty, enum iostat_type io_type) { int err = 0; From dccd324fa9bd1a2907a63fa4cc2651f687b2b5d0 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 16 Mar 2026 11:59:21 -0700 Subject: [PATCH 22/35] f2fs: fix to skip empty sections in f2fs_get_victim In age-based victim selection (ATGC, AT_SSR, or GC_CB), f2fs_get_victim can encounter sections with zero valid blocks. This situation often arises when checkpoint is disabled or due to race conditions between SIT updates and dirty list management. In such cases, f2fs_get_section_mtime() returns INVALID_MTIME, which subsequently triggers a fatal f2fs_bug_on(sbi, mtime == INVALID_MTIME) in add_victim_entry() or get_cb_cost(). This patch adds a check in f2fs_get_victim's selection loop to skip sections with no valid blocks. This prevents unnecessary age calculations for empty sections and avoids the associated kernel panic. This change also allows removing redundant checks in add_victim_entry(). Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 80b8500fa987..4bfc4452f299 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -910,6 +910,9 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, if (!f2fs_segment_has_free_slot(sbi, segno)) goto next; } + + if (!get_valid_blocks(sbi, segno, true)) + goto next; } if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) From 238e14eb7226f883b72caccd2d37bf5707df066b Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 10 Mar 2026 17:36:14 +0800 Subject: [PATCH 23/35] f2fs: fix data loss caused by incorrect use of nat_entry flag Data loss can occur when fsync is performed on a newly created file (before any checkpoint has been written) concurrently with a checkpoint operation. The scenario is as follows: create & write & fsync 'file A' write checkpoint - f2fs_do_sync_file // inline inode - f2fs_write_inode // inode folio is dirty - f2fs_write_checkpoint - f2fs_flush_merged_writes - f2fs_sync_node_pages - f2fs_flush_nat_entries - f2fs_fsync_node_pages // no dirty node - f2fs_need_inode_block_update // return false SPO and lost 'file A' f2fs_flush_nat_entries() sets the IS_CHECKPOINTED and HAS_LAST_FSYNC flags for the nat_entry, but this does not mean that the checkpoint has actually completed successfully. However, f2fs_need_inode_block_update() checks these flags and incorrectly assumes that the checkpoint has finished. The root cause is that the semantics of IS_CHECKPOINTED and HAS_LAST_FSYNC are only guaranteed after the checkpoint write fully completes. This patch modifies f2fs_need_inode_block_update() to acquire the sbi->node_write lock before reading the nat_entry flags, ensuring that once IS_CHECKPOINTED and HAS_LAST_FSYNC are observed to be set, the checkpoint operation has already completed. Fixes: e05df3b115e7 ("f2fs: add node operations") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a9cd2803e681..662a61306ec6 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -427,7 +427,9 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; bool need_update = true; + struct f2fs_lock_context lc; + f2fs_down_read_trace(&sbi->node_write, &lc); f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino, false); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && @@ -435,6 +437,7 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; f2fs_up_read(&nm_i->nat_tree_lock); + f2fs_up_read_trace(&sbi->node_write, &lc); return need_update; } From 6a5e3de9c2bb0b691d16789a5d19e9276a09b308 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 6 Mar 2026 12:24:20 +0000 Subject: [PATCH 24/35] f2fs: fix false alarm of lockdep on cp_global_sem lock lockdep reported a potential deadlock: a) TCMU device removal context: - call del_gendisk() to get q->q_usage_counter - call start_flush_work() to get work_completion of wb->dwork b) f2fs writeback context: - in wb_workfn(), which holds work_completion of wb->dwork - call f2fs_balance_fs() to get sbi->gc_lock c) f2fs vfs_write context: - call f2fs_gc() to get sbi->gc_lock - call f2fs_write_checkpoint() to get sbi->cp_global_sem d) f2fs mount context: - call recover_fsync_data() to get sbi->cp_global_sem - call f2fs_check_and_fix_write_pointer() to call blkdev_report_zones() that goes down to blk_mq_alloc_request and get q->q_usage_counter Original callstack is in Closes tag. However, I think this is a false alarm due to before mount returns successfully (context d), we can not access file therein via vfs_write (context c). Let's introduce per-sb cp_global_sem_key, and assign the key for cp_global_sem, so that lockdep can recognize cp_global_sem from different super block correctly. A lot of work are done by Shin'ichiro Kawasaki, thanks a lot for the work. Fixes: c426d99127b1 ("f2fs: Check write pointer consistency of open zones") Cc: stable@kernel.org Reported-and-tested-by: Shin'ichiro Kawasaki Closes: https://lore.kernel.org/linux-f2fs-devel/20260218125237.3340441-1-shinichiro.kawasaki@wdc.com Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/super.c | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 39b97621456a..56c4af4b1737 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2042,6 +2042,9 @@ struct f2fs_sb_info { spinlock_t iostat_lat_lock; struct iostat_lat_info *iostat_io_lat; #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key cp_global_sem_key; +#endif }; /* Definitions to access f2fs_sb_info */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f626e5ca089d..ae81af5a8d29 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4964,6 +4964,11 @@ static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc) init_f2fs_rwsem_trace(&sbi->gc_lock, sbi, LOCK_NAME_GC_LOCK); mutex_init(&sbi->writepages); init_f2fs_rwsem_trace(&sbi->cp_global_sem, sbi, LOCK_NAME_CP_GLOBAL); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + lockdep_register_key(&sbi->cp_global_sem_key); + lockdep_set_class(&sbi->cp_global_sem.internal_rwsem, + &sbi->cp_global_sem_key); +#endif init_f2fs_rwsem_trace(&sbi->node_write, sbi, LOCK_NAME_NODE_WRITE); init_f2fs_rwsem_trace(&sbi->node_change, sbi, LOCK_NAME_NODE_CHANGE); spin_lock_init(&sbi->stat_lock); @@ -5435,6 +5440,9 @@ static int f2fs_fill_super(struct super_block *sb, struct fs_context *fc) free_sb_buf: kfree(raw_super); free_sbi: +#ifdef CONFIG_DEBUG_LOCK_ALLOC + lockdep_unregister_key(&sbi->cp_global_sem_key); +#endif kfree(sbi); sb->s_fs_info = NULL; @@ -5516,6 +5524,9 @@ static void kill_f2fs_super(struct super_block *sb) /* Release block devices last, after fscrypt_destroy_keyring(). */ if (sbi) { destroy_device_list(sbi); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + lockdep_unregister_key(&sbi->cp_global_sem_key); +#endif kfree(sbi); sb->s_fs_info = NULL; } From 7b9161a605e91d0987e2596a245dc1f21621b23f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 Mar 2026 02:22:37 +0000 Subject: [PATCH 25/35] f2fs: fix to avoid uninit-value access in f2fs_sanity_check_node_footer syzbot reported a f2fs bug as below: BUG: KMSAN: uninit-value in f2fs_sanity_check_node_footer+0x374/0xa20 fs/f2fs/node.c:1520 f2fs_sanity_check_node_footer+0x374/0xa20 fs/f2fs/node.c:1520 f2fs_finish_read_bio+0xe1e/0x1d60 fs/f2fs/data.c:177 f2fs_read_end_io+0x6ab/0x2220 fs/f2fs/data.c:-1 bio_endio+0x1006/0x1160 block/bio.c:1792 submit_bio_noacct+0x533/0x2960 block/blk-core.c:891 submit_bio+0x57a/0x620 block/blk-core.c:926 blk_crypto_submit_bio include/linux/blk-crypto.h:203 [inline] f2fs_submit_read_bio+0x12c/0x360 fs/f2fs/data.c:557 f2fs_submit_page_bio+0xee2/0x1450 fs/f2fs/data.c:775 read_node_folio+0x384/0x4b0 fs/f2fs/node.c:1481 __get_node_folio+0x5db/0x15d0 fs/f2fs/node.c:1576 f2fs_get_inode_folio+0x40/0x50 fs/f2fs/node.c:1623 do_read_inode fs/f2fs/inode.c:425 [inline] f2fs_iget+0x1209/0x9380 fs/f2fs/inode.c:596 f2fs_fill_super+0x8f5a/0xb2e0 fs/f2fs/super.c:5184 get_tree_bdev_flags+0x6e6/0x920 fs/super.c:1694 get_tree_bdev+0x38/0x50 fs/super.c:1717 f2fs_get_tree+0x35/0x40 fs/f2fs/super.c:5436 vfs_get_tree+0xb3/0x5d0 fs/super.c:1754 fc_mount fs/namespace.c:1193 [inline] do_new_mount_fc fs/namespace.c:3763 [inline] do_new_mount+0x885/0x1dd0 fs/namespace.c:3839 path_mount+0x7a2/0x20b0 fs/namespace.c:4159 do_mount fs/namespace.c:4172 [inline] __do_sys_mount fs/namespace.c:4361 [inline] __se_sys_mount+0x704/0x7f0 fs/namespace.c:4338 __x64_sys_mount+0xe4/0x150 fs/namespace.c:4338 x64_sys_call+0x39f0/0x3ea0 arch/x86/include/generated/asm/syscalls_64.h:166 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x134/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f The root cause is: in f2fs_finish_read_bio(), we may access uninit data in folio if we failed to read the data from device into folio, let's add a check condition to avoid such issue. Cc: stable@kernel.org Fixes: 50ac3ecd8e05 ("f2fs: fix to do sanity check on node footer in {read,write}_end_io") Reported-by: syzbot+9aac813cdc456cdd49f8@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/69a9ca26.a70a0220.305d9a.0000.GAE@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0e108c701aa3..a210a7a627c6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -173,7 +173,8 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) while (nr_pages--) dec_page_count(F2FS_F_SB(folio), __read_io_type(folio)); - if (F2FS_F_SB(folio)->node_inode && is_node_folio(folio) && + if (bio->bi_status == BLK_STS_OK && + F2FS_F_SB(folio)->node_inode && is_node_folio(folio) && f2fs_sanity_check_node_footer(F2FS_F_SB(folio), folio, folio->index, NODE_TYPE_REGULAR, true)) bio->bi_status = BLK_STS_IOERR; From 02d91398a602c394d72cd61a67c84e2730c5f79b Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 16 Mar 2026 11:59:54 -0700 Subject: [PATCH 26/35] f2fs: fix to freeze GC and discard threads quickly Suspend can fail if kernel threads do not freeze for a while. f2fs_gc and f2fs_discard threads can perform long-running operations that prevent them from reaching a freeze point in a timely manner. This patch adds explicit freezing checks in the following locations: 1. f2fs_gc: Added a check at the 'retry' label to exit the loop quickly if freezing is requested, especially during heavy GC rounds. 2. __issue_discard_cmd: Added a 'suspended' flag to break both inner and outer loops during discard command issuance if freezing is detected after at least one command has been issued. 3. __issue_discard_cmd_orderly: Added a similar check for orderly discard to ensure responsiveness. These checks ensure that the threads release locks safely and enter the frozen state. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 10 ++++++++++ fs/f2fs/segment.c | 12 +++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 4bfc4452f299..e60c1106f70b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1895,12 +1895,18 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, sbi->next_victim_seg[gc_type] = (cur_segno + 1 < sec_end_segno) ? cur_segno + 1 : NULL_SEGNO; + + if (unlikely(freezing(current))) { + folio_put_refs(sum_folio, 2); + goto stop; + } } next_block: folio_put_refs(sum_folio, 2); segno = block_end_segno; } +stop: if (submitted) f2fs_submit_merged_write(sbi, data_type); @@ -1974,6 +1980,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) goto stop; } retry: + if (unlikely(freezing(current))) { + ret = 0; + goto stop; + } ret = __get_victim(sbi, &segno, gc_type, gc_control->one_time); if (ret) { /* allow to search victim from sections has pinned data */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0bf25786667f..788f8b050249 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1606,6 +1606,9 @@ static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, if (dc->state != D_PREP) goto next; + if (*issued > 0 && unlikely(freezing(current))) + break; + if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; break; @@ -1645,6 +1648,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct blk_plug plug; int i, issued; bool io_interrupted = false; + bool suspended = false; if (dpolicy->timeout) f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); @@ -1675,6 +1679,11 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); + if (issued > 0 && unlikely(freezing(current))) { + suspended = true; + break; + } + if (dpolicy->timeout && f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT)) break; @@ -1694,7 +1703,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, next: mutex_unlock(&dcc->cmd_lock); - if (issued >= dpolicy->max_requests || io_interrupted) + if (issued >= dpolicy->max_requests || io_interrupted || + suspended) break; } From 8979bc3d2a252940a277392b5eb6e52be7a3e1a5 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 24 Mar 2026 17:47:08 +0800 Subject: [PATCH 27/35] f2fs: invalidate block device page cache on umount Neither F2FS nor VFS invalidates the block device page cache, which results in reading stale metadata. An example scenario is shown below: Terminal A Terminal B mount /dev/vdb /mnt/f2fs touch mx // ino = 4 sync dump.f2fs -i 4 /dev/vdb// block on "[Y/N]" touch mx2 // ino = 5 sync umount /mnt/f2fs dump.f2fs -i 5 /dev/vdb // block addr is 0 After umount, the block device page cache is not purged, causing `dump.f2fs -i 5 /dev/vdb` to read stale metadata and see inode 5 with block address 0. Btrfs has encountered a similar issue before, the solution there was to call sync_blockdev() and invalidate_bdev() when the device is closed: mail-archive.com/linux-btrfs@vger.kernel.org/msg54188.html For the root user, the f2fs kernel calls sync_blockdev() on umount to flush all cached data to disk, and f2fs-tools can release the page cache by issuing ioctl(fd, BLKFLSBUF) when accessing the device. However, non-root users are not permitted to drop the page cache, and may still observe stale data. This patch calls sync_blockdev() and invalidate_bdev() during umount to invalidate the block device page cache, thereby preventing stale metadata from being read. Note that this may result in an extra sync_blockdev() call on the first device, in both f2fs_put_super() and kill_block_super(). The second call do nothing, as there are no dirty pages left to flush. It ensures that non-root users do not observe stale data. Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ae81af5a8d29..e20e95696af4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2088,6 +2088,12 @@ static void f2fs_put_super(struct super_block *sb) #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif + sync_blockdev(sb->s_bdev); + invalidate_bdev(sb->s_bdev); + for (i = 1; i < sbi->s_ndevs; i++) { + sync_blockdev(FDEV(i).bdev); + invalidate_bdev(FDEV(i).bdev); + } } int f2fs_sync_fs(struct super_block *sb, int sync) From 01968164d94762db2f703647c5acfa28613844f1 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Thu, 5 Mar 2026 11:22:46 +0800 Subject: [PATCH 28/35] f2fs: fix to preserve previous reserve_{blocks,node} value when remount The following steps will change previous value of reserve_{blocks,node}, this dones not match the original intention. 1.mount -t f2fs -o reserve_root=8192 imgfile test_mount/ F2FS-fs (loop56): Mounted with checkpoint version = 1b69f8c7 mount info: /dev/block/loop56 on /data/test_mount type f2fs (xxx,reserve_root=8192,reserve_node=0,resuid=0,resgid=0,xxx) 2.mount -t f2fs -o remount,reserve_root=4096 /data/test_mount F2FS-fs (loop56): Preserve previous reserve_root=8192 check mount info: reserve_root change to 4096 /dev/block/loop56 on /data/test_mount type f2fs (xxx,reserve_root=4096,reserve_node=0,resuid=0,resgid=0,xxx) Prior to commit d18535132523 ("f2fs: separate the options parsing and options checking"), the value of reserve_{blocks,node} was only set during the first mount, along with the corresponding mount option F2FS_MOUNT_RESERVE_{ROOT,NODE} . If the mount option F2FS_MOUNT_RESERVE_{ROOT,NODE} was found to have been set during the mount/remount, the previously value of reserve_{blocks,node} would also be preserved, as shown in the code below. if (test_opt(sbi, RESERVE_ROOT)) { f2fs_info(sbi, "Preserve previous reserve_root=%u", F2FS_OPTION(sbi).root_reserved_blocks); } else { F2FS_OPTION(sbi).root_reserved_blocks = arg; set_opt(sbi, RESERVE_ROOT); } But commit d18535132523 ("f2fs: separate the options parsing and options checking") only preserved the previous mount option; it did not preserve the previous value of reserve_{blocks,node}. Since value of reserve_{blocks,node} value is assigned or not depends on ctx->spec_mask, ctx->spec_mask should be alos handled in f2fs_check_opt_consistency. This patch will clear the corresponding ctx->spec_mask bits in f2fs_check_opt_consistency to preserve the previously values of reserve_{blocks,node} if it already have a value. Fixes: d18535132523 ("f2fs: separate the options parsing and options checking") Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e20e95696af4..5b552f08fe7b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1515,6 +1515,7 @@ static int f2fs_check_opt_consistency(struct fs_context *fc, F2FS_OPTION(sbi).root_reserved_blocks); ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_ROOT); ctx->opt_mask &= ~BIT(F2FS_MOUNT_RESERVE_ROOT); + ctx->spec_mask &= ~F2FS_SPEC_reserve_root; } if (test_opt(sbi, RESERVE_NODE) && (ctx->opt_mask & BIT(F2FS_MOUNT_RESERVE_NODE)) && @@ -1523,6 +1524,7 @@ static int f2fs_check_opt_consistency(struct fs_context *fc, F2FS_OPTION(sbi).root_reserved_nodes); ctx_clear_opt(ctx, F2FS_MOUNT_RESERVE_NODE); ctx->opt_mask &= ~BIT(F2FS_MOUNT_RESERVE_NODE); + ctx->spec_mask &= ~F2FS_SPEC_reserve_node; } err = f2fs_check_test_dummy_encryption(fc, sb); From 2a3db1e02ce08c14af04da70bb99e8a0a31eb9e8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 30 Mar 2026 23:40:59 +0000 Subject: [PATCH 29/35] f2fs: allow empty mount string for Opt_usr|grp|projjquota The fsparam_string_empty() gives an error when mounting without string, since its type is set to fsparam_flag in VFS. So, let's allow the flag as well. This addresses xfstests/f2fs/015 and f2fs/021. Fixes: d18535132523 ("f2fs: separate the options parsing and options checking") Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5b552f08fe7b..ccf806b676f5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -336,9 +336,12 @@ static const struct fs_parameter_spec f2fs_param_specs[] = { fsparam_flag("usrquota", Opt_usrquota), fsparam_flag("grpquota", Opt_grpquota), fsparam_flag("prjquota", Opt_prjquota), - fsparam_string_empty("usrjquota", Opt_usrjquota), - fsparam_string_empty("grpjquota", Opt_grpjquota), - fsparam_string_empty("prjjquota", Opt_prjjquota), + fsparam_string("usrjquota", Opt_usrjquota), + fsparam_flag("usrjquota", Opt_usrjquota), + fsparam_string("grpjquota", Opt_grpjquota), + fsparam_flag("grpjquota", Opt_grpjquota), + fsparam_string("prjjquota", Opt_prjjquota), + fsparam_flag("prjjquota", Opt_prjjquota), fsparam_flag("nat_bits", Opt_nat_bits), fsparam_enum("jqfmt", Opt_jqfmt, f2fs_param_jqfmt), fsparam_enum("alloc_mode", Opt_alloc, f2fs_param_alloc_mode), @@ -979,26 +982,26 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx_set_opt(ctx, F2FS_MOUNT_PRJQUOTA); break; case Opt_usrjquota: - if (!*param->string) - ret = f2fs_unnote_qf_name(fc, USRQUOTA); - else + if (param->type == fs_value_is_string && *param->string) ret = f2fs_note_qf_name(fc, USRQUOTA, param); + else + ret = f2fs_unnote_qf_name(fc, USRQUOTA); if (ret) return ret; break; case Opt_grpjquota: - if (!*param->string) - ret = f2fs_unnote_qf_name(fc, GRPQUOTA); - else + if (param->type == fs_value_is_string && *param->string) ret = f2fs_note_qf_name(fc, GRPQUOTA, param); + else + ret = f2fs_unnote_qf_name(fc, GRPQUOTA); if (ret) return ret; break; case Opt_prjjquota: - if (!*param->string) - ret = f2fs_unnote_qf_name(fc, PRJQUOTA); - else + if (param->type == fs_value_is_string && *param->string) ret = f2fs_note_qf_name(fc, PRJQUOTA, param); + else + ret = f2fs_unnote_qf_name(fc, PRJQUOTA); if (ret) return ret; break; From ed78aeebef05212ef7dca93bd931e4eff67c113f Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Fri, 3 Apr 2026 22:40:17 +0800 Subject: [PATCH 30/35] f2fs: fix node_cnt race between extent node destroy and writeback f2fs_destroy_extent_node() does not set FI_NO_EXTENT before clearing extent nodes. When called from f2fs_drop_inode() with I_SYNC set, concurrent kworker writeback can insert new extent nodes into the same extent tree, racing with the destroy and triggering f2fs_bug_on() in __destroy_extent_node(). The scenario is as follows: drop inode writeback - iput - f2fs_drop_inode // I_SYNC set - f2fs_destroy_extent_node - __destroy_extent_node - while (node_cnt) { write_lock(&et->lock) __free_extent_tree write_unlock(&et->lock) - __writeback_single_inode - f2fs_outplace_write_data - f2fs_update_read_extent_cache - __update_extent_tree_range // FI_NO_EXTENT not set, // insert new extent node } // node_cnt == 0, exit while - f2fs_bug_on(node_cnt) // node_cnt > 0 Additionally, __update_extent_tree_range() only checks FI_NO_EXTENT for EX_READ type, leaving EX_BLOCK_AGE updates completely unprotected. This patch set FI_NO_EXTENT under et->lock in __destroy_extent_node(), consistent with other callers (__update_extent_tree_range and __drop_extent_tree) and check FI_NO_EXTENT for both EX_READ and EX_BLOCK_AGE tree. Fixes: 3fc5d5a182f6 ("f2fs: fix to shrink read extent node in batches") Cc: stable@vger.kernel.org Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 0ed84cc065a7..87169fd29d89 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -119,9 +119,10 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type) if (!__init_may_extent_tree(inode, type)) return false; + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return false; + if (type == EX_READ) { - if (is_inode_flag_set(inode, FI_NO_EXTENT)) - return false; if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && !f2fs_sb_has_readonly(F2FS_I_SB(inode))) return false; @@ -644,6 +645,8 @@ static unsigned int __destroy_extent_node(struct inode *inode, while (atomic_read(&et->node_cnt)) { write_lock(&et->lock); + if (!is_inode_flag_set(inode, FI_NO_EXTENT)) + set_inode_flag(inode, FI_NO_EXTENT); node_cnt += __free_extent_tree(sbi, et, nr_shrink); write_unlock(&et->lock); } @@ -688,12 +691,12 @@ static void __update_extent_tree_range(struct inode *inode, write_lock(&et->lock); - if (type == EX_READ) { - if (is_inode_flag_set(inode, FI_NO_EXTENT)) { - write_unlock(&et->lock); - return; - } + if (is_inode_flag_set(inode, FI_NO_EXTENT)) { + write_unlock(&et->lock); + return; + } + if (type == EX_READ) { prev = et->largest; dei.len = 0; From b8b902fd57fbaec70eb5ae2f0ec12a650ae62d96 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Fri, 10 Apr 2026 23:05:37 +0800 Subject: [PATCH 31/35] f2fs: disallow setting an extension to both cold and hot An extension should not exist in both the cold and hot extension lists simultaneously. When adding a hot extension, check whether it already exists in the cold list, and vice versa. Reject the operation with -EINVAL if a conflict is found. Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6ef21deeef1c..2e9c6be56518 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -83,6 +83,21 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, if (set) { if (total_count == F2FS_MAX_EXTENSION) return -EINVAL; + + if (hot) { + start = 0; + count = cold_count; + } else { + start = cold_count; + count = total_count; + } + for (i = start; i < count; i++) { + if (!strcmp(name, extlist[i])) { + f2fs_warn(sbi, "extension '%s' already exists in %s list", + name, hot ? "cold" : "hot"); + return -EINVAL; + } + } } else { if (!hot && !cold_count) return -EINVAL; From 5909bedbed38c558bee7cb6758ceedf9bc3a9194 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Fri, 10 Apr 2026 23:05:39 +0800 Subject: [PATCH 32/35] f2fs: protect extension_list reading with sb_lock in f2fs_sbi_show() In f2fs_sbi_show(), the extension_list, extension_count and hot_ext_count are read without holding sbi->sb_lock. If a concurrent sysfs store modifies the extension list via f2fs_update_extension_list(), the show path may read inconsistent count and array contents, potentially leading to out-of-bounds access or displaying stale data. Fix this by holding sb_lock around the entire extension list read and format operation. Fixes: b6a06cbbb5f7 ("f2fs: support hot file extension") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 969e06b65b04..12993ae1713b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -387,10 +387,12 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!strcmp(a->attr.name, "extension_list")) { __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; - int cold_count = le32_to_cpu(sbi->raw_super->extension_count); - int hot_count = sbi->raw_super->hot_ext_count; + int cold_count, hot_count; int len = 0, i; + f2fs_down_read(&sbi->sb_lock); + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; len += sysfs_emit_at(buf, len, "cold file extension:\n"); for (i = 0; i < cold_count; i++) len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); @@ -398,6 +400,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, len += sysfs_emit_at(buf, len, "hot file extension:\n"); for (i = cold_count; i < cold_count + hot_count; i++) len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); + f2fs_up_read(&sbi->sb_lock); return len; } From b635f2ecdb5ad34f9c967cabb704d6bed9382fd0 Mon Sep 17 00:00:00 2001 From: Guangshuo Li Date: Fri, 10 Apr 2026 20:47:26 +0800 Subject: [PATCH 33/35] f2fs: fix uninitialized kobject put in f2fs_init_sysfs() In f2fs_init_sysfs(), all failure paths after kset_register() jump to put_kobject, which unconditionally releases both f2fs_tune and f2fs_feat. If kobject_init_and_add(&f2fs_feat, ...) fails, f2fs_tune has not been initialized yet, so calling kobject_put(&f2fs_tune) is invalid. Fix this by splitting the unwind path so each error path only releases objects that were successfully initialized. Fixes: a907f3a68ee26ba4 ("f2fs: add a sysfs entry to reclaim POSIX_FADV_NOREUSE pages") Cc: stable@vger.kernel.org Signed-off-by: Guangshuo Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 12993ae1713b..352e96ad5c3a 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1997,24 +1997,26 @@ int __init f2fs_init_sysfs(void) ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, NULL, "features"); if (ret) - goto put_kobject; + goto unregister_kset; ret = kobject_init_and_add(&f2fs_tune, &f2fs_tune_ktype, NULL, "tuning"); if (ret) - goto put_kobject; + goto put_feat; f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); if (!f2fs_proc_root) { ret = -ENOMEM; - goto put_kobject; + goto put_tune; } return 0; -put_kobject: +put_tune: kobject_put(&f2fs_tune); +put_feat: kobject_put(&f2fs_feat); +unregister_kset: kset_unregister(&f2fs_kset); return ret; } From 1583a7ded0d3d67fd6e7e4336600bc191d068a20 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 1 Apr 2026 04:05:56 +0000 Subject: [PATCH 34/35] f2fs: do not support mmap write for large folio Let's check mmap writes onto the large folio, since we don't support writing large folios. Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2c4880f24b54..e917342cb828 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -82,8 +82,17 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) int err = 0; vm_fault_t ret; - if (unlikely(IS_IMMUTABLE(inode))) + /* + * We only support large folio on the read case. + * Don't make any dirty pages. + */ + if (unlikely(IS_IMMUTABLE(inode)) || + mapping_large_folio_support(inode->i_mapping)) { + f2fs_err(sbi, "Not expected: immutable: %d large_folio: %d", + IS_IMMUTABLE(inode), + mapping_large_folio_support(inode->i_mapping)); return VM_FAULT_SIGBUS; + } if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { err = -EIO; From cb8ff3ead9a3fc43727980be58c7099506f65261 Mon Sep 17 00:00:00 2001 From: Daniel Lee Date: Fri, 17 Apr 2026 10:50:40 -0700 Subject: [PATCH 35/35] f2fs: add page-order information for large folio reads in iostat Track read folio counts by order in F2FS iostat sysfs and tracepoints. Signed-off-by: Daniel Lee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/iostat.c | 38 ++++++++++++++++++++++++++++++++++++- fs/f2fs/iostat.h | 4 ++++ include/trace/events/f2fs.h | 21 ++++++++++++++++---- 5 files changed, 65 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a210a7a627c6..965d4e6443c6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2508,6 +2508,8 @@ static int f2fs_read_data_large_folio(struct inode *inode, if (!folio) goto out; + f2fs_update_read_folio_count(F2FS_I_SB(inode), folio); + folio_in_bio = false; index = folio->index; offset = 0; @@ -2682,6 +2684,8 @@ static int f2fs_mpage_readpages(struct inode *inode, struct fsverity_info *vi, prefetchw(&folio->flags); } + f2fs_update_read_folio_count(F2FS_I_SB(inode), folio); + #ifdef CONFIG_F2FS_FS_COMPRESSION index = folio->index; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 56c4af4b1737..e40b6b2784ee 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -2034,6 +2035,8 @@ struct f2fs_sb_info { unsigned long long iostat_count[NR_IO_TYPE]; unsigned long long iostat_bytes[NR_IO_TYPE]; unsigned long long prev_iostat_bytes[NR_IO_TYPE]; + unsigned long long iostat_read_folio_count[NR_PAGE_ORDERS]; + unsigned long long prev_iostat_read_folio_count[NR_PAGE_ORDERS]; bool iostat_enable; unsigned long iostat_next_period; unsigned int iostat_period_ms; diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c index f8703038e1d8..ae265e3e9b2c 100644 --- a/fs/f2fs/iostat.c +++ b/fs/f2fs/iostat.c @@ -34,6 +34,7 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset) { struct super_block *sb = seq->private; struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i; if (!sbi->iostat_enable) return 0; @@ -76,6 +77,12 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset) IOSTAT_INFO_SHOW("fs node", FS_NODE_READ_IO); IOSTAT_INFO_SHOW("fs meta", FS_META_READ_IO); + /* print read folio order stats */ + seq_printf(seq, "%-23s", "fs read folio order:"); + for (i = 0; i < NR_PAGE_ORDERS; i++) + seq_printf(seq, " %llu", sbi->iostat_read_folio_count[i]); + seq_putc(seq, '\n'); + /* print other IOs */ seq_puts(seq, "[OTHER]\n"); IOSTAT_INFO_SHOW("fs discard", FS_DISCARD_IO); @@ -113,6 +120,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi) static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi) { unsigned long long iostat_diff[NR_IO_TYPE]; + unsigned long long read_folio_count_diff[NR_PAGE_ORDERS]; int i; unsigned long flags; @@ -133,9 +141,15 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi) sbi->prev_iostat_bytes[i]; sbi->prev_iostat_bytes[i] = sbi->iostat_bytes[i]; } + + for (i = 0; i < NR_PAGE_ORDERS; i++) { + read_folio_count_diff[i] = sbi->iostat_read_folio_count[i] - + sbi->prev_iostat_read_folio_count[i]; + sbi->prev_iostat_read_folio_count[i] = sbi->iostat_read_folio_count[i]; + } spin_unlock_irqrestore(&sbi->iostat_lock, flags); - trace_f2fs_iostat(sbi, iostat_diff); + trace_f2fs_iostat(sbi, iostat_diff, read_folio_count_diff); __record_iostat_latency(sbi); } @@ -151,6 +165,10 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi) sbi->iostat_bytes[i] = 0; sbi->prev_iostat_bytes[i] = 0; } + for (i = 0; i < NR_PAGE_ORDERS; i++) { + sbi->iostat_read_folio_count[i] = 0; + sbi->prev_iostat_read_folio_count[i] = 0; + } spin_unlock_irq(&sbi->iostat_lock); spin_lock_irq(&sbi->iostat_lat_lock); @@ -165,6 +183,24 @@ static inline void __f2fs_update_iostat(struct f2fs_sb_info *sbi, sbi->iostat_count[type]++; } +void f2fs_update_read_folio_count(struct f2fs_sb_info *sbi, struct folio *folio) +{ + unsigned int order = folio_order(folio); + unsigned long flags; + + if (!sbi->iostat_enable) + return; + + if (order >= NR_PAGE_ORDERS) + order = NR_PAGE_ORDERS - 1; + + spin_lock_irqsave(&sbi->iostat_lock, flags); + sbi->iostat_read_folio_count[order]++; + spin_unlock_irqrestore(&sbi->iostat_lock, flags); + + f2fs_record_iostat(sbi); +} + void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode, enum iostat_type type, unsigned long long io_bytes) { diff --git a/fs/f2fs/iostat.h b/fs/f2fs/iostat.h index eb99d05cf272..2025225b5bed 100644 --- a/fs/f2fs/iostat.h +++ b/fs/f2fs/iostat.h @@ -34,6 +34,8 @@ extern int __maybe_unused iostat_info_seq_show(struct seq_file *seq, extern void f2fs_reset_iostat(struct f2fs_sb_info *sbi); extern void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode, enum iostat_type type, unsigned long long io_bytes); +extern void f2fs_update_read_folio_count(struct f2fs_sb_info *sbi, + struct folio *folio); struct bio_iostat_ctx { struct f2fs_sb_info *sbi; @@ -68,6 +70,8 @@ extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi); #else static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, struct inode *inode, enum iostat_type type, unsigned long long io_bytes) {} +static inline void f2fs_update_read_folio_count(struct f2fs_sb_info *sbi, + struct folio *folio) {} static inline void iostat_update_and_unbind_ctx(struct bio *bio) {} static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi, struct bio *bio, struct bio_post_read_ctx *ctx) {} diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 9364e6775562..ff4a58c2cbbb 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -2116,9 +2116,10 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end, #ifdef CONFIG_F2FS_IOSTAT TRACE_EVENT(f2fs_iostat, - TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat), + TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat, + unsigned long long *read_folio_count), - TP_ARGS(sbi, iostat), + TP_ARGS(sbi, iostat, read_folio_count), TP_STRUCT__entry( __field(dev_t, dev) @@ -2150,6 +2151,7 @@ TRACE_EVENT(f2fs_iostat, __field(unsigned long long, fs_mrio) __field(unsigned long long, fs_discard) __field(unsigned long long, fs_reset_zone) + __array(unsigned long long, read_folio_count, 11) ), TP_fast_assign( @@ -2182,6 +2184,9 @@ TRACE_EVENT(f2fs_iostat, __entry->fs_mrio = iostat[FS_META_READ_IO]; __entry->fs_discard = iostat[FS_DISCARD_IO]; __entry->fs_reset_zone = iostat[FS_ZONE_RESET_IO]; + memset(__entry->read_folio_count, 0, sizeof(__entry->read_folio_count)); + memcpy(__entry->read_folio_count, read_folio_count, + sizeof(unsigned long long) * min_t(int, NR_PAGE_ORDERS, 11)); ), TP_printk("dev = (%d,%d), " @@ -2194,7 +2199,9 @@ TRACE_EVENT(f2fs_iostat, "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], " "compr(buffered=%llu, mapped=%llu)], " "fs [data=%llu, (gc_data=%llu, cdata=%llu), " - "node=%llu, meta=%llu]", + "node=%llu, meta=%llu], " + "read_folio_count [0=%llu, 1=%llu, 2=%llu, 3=%llu, 4=%llu, " + "5=%llu, 6=%llu, 7=%llu, 8=%llu, 9=%llu, 10=%llu]", show_dev(__entry->dev), __entry->app_wio, __entry->app_dio, __entry->app_bio, __entry->app_mio, __entry->app_bcdio, __entry->app_mcdio, __entry->fs_dio, __entry->fs_cdio, @@ -2205,7 +2212,13 @@ TRACE_EVENT(f2fs_iostat, __entry->app_rio, __entry->app_drio, __entry->app_brio, __entry->app_mrio, __entry->app_bcrio, __entry->app_mcrio, __entry->fs_drio, __entry->fs_gdrio, - __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio) + __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio, + __entry->read_folio_count[0], __entry->read_folio_count[1], + __entry->read_folio_count[2], __entry->read_folio_count[3], + __entry->read_folio_count[4], __entry->read_folio_count[5], + __entry->read_folio_count[6], __entry->read_folio_count[7], + __entry->read_folio_count[8], __entry->read_folio_count[9], + __entry->read_folio_count[10]) ); #ifndef __F2FS_IOSTAT_LATENCY_TYPE