btrfs: avoid GFP_ATOMIC allocations in qgroup free paths

When qgroups are enabled, __btrfs_qgroup_release_data() and
qgroup_free_reserved_data() pass an extent_changeset to
btrfs_clear_record_extent_bits() to track how many bytes had their
EXTENT_QGROUP_RESERVED bits cleared. Inside the extent IO tree spinlock,
add_extent_changeset() calls ulist_add() with GFP_ATOMIC to record each
changed range. If this allocation fails, it hits a BUG_ON and panics the
kernel.

However, both of these callers only read changeset.bytes_changed
afterwards — the range_changed ulist is populated and immediately freed
without ever being iterated. The GFP_ATOMIC allocation is entirely
unnecessary for these paths.

Introduce extent_changeset_init_bytes_only() which uses a sentinel value
(EXTENT_CHANGESET_BYTES_ONLY) on the ulist's prealloc field to signal
that only bytes_changed should be tracked. add_extent_changeset() checks
for this sentinel and returns early after updating bytes_changed,
skipping the ulist_add() call entirely. This eliminates the GFP_ATOMIC
allocation and makes the BUG_ON unreachable for these paths.

Callers that need range tracking (qgroup_reserve_data,
qgroup_unreserve_range, btrfs_qgroup_check_reserved_leak) continue to
use extent_changeset_init() and are unaffected.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Leo Martins <loemra.dev@gmail.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Leo Martins
2026-03-19 16:49:08 -07:00
committed by David Sterba
parent 390aa432f3
commit e0a85137a8
3 changed files with 28 additions and 3 deletions

View File

@@ -195,7 +195,10 @@ static int add_extent_changeset(struct extent_state *state, u32 bits,
return 0;
if (!set && (state->state & bits) == 0)
return 0;
changeset->bytes_changed += state->end - state->start + 1;
if (!extent_changeset_tracks_ranges(changeset))
return 0;
ret = ulist_add(&changeset->range_changed, state->start, state->end, GFP_ATOMIC);
if (ret < 0)

View File

@@ -198,6 +198,25 @@ static inline void extent_changeset_init(struct extent_changeset *changeset)
ulist_init(&changeset->range_changed);
}
/*
* Sentinel value for range_changed.prealloc indicating that the changeset
* only tracks bytes_changed and does not record individual ranges. This
* avoids GFP_ATOMIC allocations inside add_extent_changeset() when the
* caller doesn't need to iterate the changed ranges afterwards.
*/
#define EXTENT_CHANGESET_BYTES_ONLY ((struct ulist_node *)1)
static inline void extent_changeset_init_bytes_only(struct extent_changeset *changeset)
{
changeset->bytes_changed = 0;
changeset->range_changed.prealloc = EXTENT_CHANGESET_BYTES_ONLY;
}
static inline bool extent_changeset_tracks_ranges(const struct extent_changeset *changeset)
{
return changeset->range_changed.prealloc != EXTENT_CHANGESET_BYTES_ONLY;
}
static inline struct extent_changeset *extent_changeset_alloc(void)
{
struct extent_changeset *ret;
@@ -212,6 +231,7 @@ static inline struct extent_changeset *extent_changeset_alloc(void)
static inline void extent_changeset_prealloc(struct extent_changeset *changeset, gfp_t gfp_mask)
{
ASSERT(extent_changeset_tracks_ranges(changeset));
ulist_prealloc(&changeset->range_changed, gfp_mask);
}
@@ -220,7 +240,8 @@ static inline void extent_changeset_release(struct extent_changeset *changeset)
if (!changeset)
return;
changeset->bytes_changed = 0;
ulist_release(&changeset->range_changed);
if (extent_changeset_tracks_ranges(changeset))
ulist_release(&changeset->range_changed);
}
static inline void extent_changeset_free(struct extent_changeset *changeset)

View File

@@ -4324,7 +4324,7 @@ static int qgroup_free_reserved_data(struct btrfs_inode *inode,
u64 freed = 0;
int ret;
extent_changeset_init(&changeset);
extent_changeset_init_bytes_only(&changeset);
len = round_up(start + len, root->fs_info->sectorsize);
start = round_down(start, root->fs_info->sectorsize);
@@ -4389,7 +4389,7 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
WARN_ON(!free && reserved);
if (free && reserved)
return qgroup_free_reserved_data(inode, reserved, start, len, released);
extent_changeset_init(&changeset);
extent_changeset_init_bytes_only(&changeset);
ret = btrfs_clear_record_extent_bits(&inode->io_tree, start, start + len - 1,
EXTENT_QGROUP_RESERVED, &changeset);
if (ret < 0)
@@ -4647,6 +4647,7 @@ void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode)
WARN_ON(ret < 0);
if (WARN_ON(changeset.bytes_changed)) {
ASSERT(extent_changeset_tracks_ranges(&changeset));
ULIST_ITER_INIT(&iter);
while ((unode = ulist_next(&changeset.range_changed, &iter))) {
btrfs_warn(inode->root->fs_info,