From e0a85137a882db789b1bccc1e7db06356ac8c69f Mon Sep 17 00:00:00 2001 From: Leo Martins Date: Thu, 19 Mar 2026 16:49:08 -0700 Subject: [PATCH] btrfs: avoid GFP_ATOMIC allocations in qgroup free paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When qgroups are enabled, __btrfs_qgroup_release_data() and qgroup_free_reserved_data() pass an extent_changeset to btrfs_clear_record_extent_bits() to track how many bytes had their EXTENT_QGROUP_RESERVED bits cleared. Inside the extent IO tree spinlock, add_extent_changeset() calls ulist_add() with GFP_ATOMIC to record each changed range. If this allocation fails, it hits a BUG_ON and panics the kernel. However, both of these callers only read changeset.bytes_changed afterwards — the range_changed ulist is populated and immediately freed without ever being iterated. The GFP_ATOMIC allocation is entirely unnecessary for these paths. Introduce extent_changeset_init_bytes_only() which uses a sentinel value (EXTENT_CHANGESET_BYTES_ONLY) on the ulist's prealloc field to signal that only bytes_changed should be tracked. add_extent_changeset() checks for this sentinel and returns early after updating bytes_changed, skipping the ulist_add() call entirely. This eliminates the GFP_ATOMIC allocation and makes the BUG_ON unreachable for these paths. Callers that need range tracking (qgroup_reserve_data, qgroup_unreserve_range, btrfs_qgroup_check_reserved_leak) continue to use extent_changeset_init() and are unaffected. Reviewed-by: Qu Wenruo Signed-off-by: Leo Martins Signed-off-by: David Sterba --- fs/btrfs/extent-io-tree.c | 3 +++ fs/btrfs/extent_io.h | 23 ++++++++++++++++++++++- fs/btrfs/qgroup.c | 5 +++-- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 6ae7709cba23..626702244809 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -195,7 +195,10 @@ static int add_extent_changeset(struct extent_state *state, u32 bits, return 0; if (!set && (state->state & bits) == 0) return 0; + changeset->bytes_changed += state->end - state->start + 1; + if (!extent_changeset_tracks_ranges(changeset)) + return 0; ret = ulist_add(&changeset->range_changed, state->start, state->end, GFP_ATOMIC); if (ret < 0) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a3b0ab501361..fd209233317f 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -198,6 +198,25 @@ static inline void extent_changeset_init(struct extent_changeset *changeset) ulist_init(&changeset->range_changed); } +/* + * Sentinel value for range_changed.prealloc indicating that the changeset + * only tracks bytes_changed and does not record individual ranges. This + * avoids GFP_ATOMIC allocations inside add_extent_changeset() when the + * caller doesn't need to iterate the changed ranges afterwards. + */ +#define EXTENT_CHANGESET_BYTES_ONLY ((struct ulist_node *)1) + +static inline void extent_changeset_init_bytes_only(struct extent_changeset *changeset) +{ + changeset->bytes_changed = 0; + changeset->range_changed.prealloc = EXTENT_CHANGESET_BYTES_ONLY; +} + +static inline bool extent_changeset_tracks_ranges(const struct extent_changeset *changeset) +{ + return changeset->range_changed.prealloc != EXTENT_CHANGESET_BYTES_ONLY; +} + static inline struct extent_changeset *extent_changeset_alloc(void) { struct extent_changeset *ret; @@ -212,6 +231,7 @@ static inline struct extent_changeset *extent_changeset_alloc(void) static inline void extent_changeset_prealloc(struct extent_changeset *changeset, gfp_t gfp_mask) { + ASSERT(extent_changeset_tracks_ranges(changeset)); ulist_prealloc(&changeset->range_changed, gfp_mask); } @@ -220,7 +240,8 @@ static inline void extent_changeset_release(struct extent_changeset *changeset) if (!changeset) return; changeset->bytes_changed = 0; - ulist_release(&changeset->range_changed); + if (extent_changeset_tracks_ranges(changeset)) + ulist_release(&changeset->range_changed); } static inline void extent_changeset_free(struct extent_changeset *changeset) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 0bf3ebc1ffb7..cdf736d3a4e5 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4324,7 +4324,7 @@ static int qgroup_free_reserved_data(struct btrfs_inode *inode, u64 freed = 0; int ret; - extent_changeset_init(&changeset); + extent_changeset_init_bytes_only(&changeset); len = round_up(start + len, root->fs_info->sectorsize); start = round_down(start, root->fs_info->sectorsize); @@ -4389,7 +4389,7 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, WARN_ON(!free && reserved); if (free && reserved) return qgroup_free_reserved_data(inode, reserved, start, len, released); - extent_changeset_init(&changeset); + extent_changeset_init_bytes_only(&changeset); ret = btrfs_clear_record_extent_bits(&inode->io_tree, start, start + len - 1, EXTENT_QGROUP_RESERVED, &changeset); if (ret < 0) @@ -4647,6 +4647,7 @@ void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode) WARN_ON(ret < 0); if (WARN_ON(changeset.bytes_changed)) { + ASSERT(extent_changeset_tracks_ranges(&changeset)); ULIST_ITER_INIT(&iter); while ((unode = ulist_next(&changeset.range_changed, &iter))) { btrfs_warn(inode->root->fs_info,