btrfs: exit early when replaying hole file extent item from a log tree

At replay_one_extent(), we can jump to the code that updates the file
extent range and updates the inode when processing a file extent item that
represents a hole and we don't have the NO_HOLES feature enabled. This
helps reduce the high indentation level by one in replay_one_extent() and
avoid splitting some lines to make the code easier to read.

Reviewed-by: Boris Burkov <boris@bur.io>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Filipe Manana
2025-07-21 12:54:48 +01:00
committed by David Sterba
parent 912c257c88
commit 88666b6df9

View File

@@ -725,6 +725,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
u64 csum_start;
u64 csum_end;
LIST_HEAD(ordered_sums);
u64 offset;
unsigned long dest_offset;
struct btrfs_key ins;
@@ -744,6 +747,17 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
copy_extent_buffer(path->nodes[0], eb, dest_offset,
(unsigned long)item, sizeof(*item));
/*
* We have an explicit hole and NO_HOLES is not enabled. We have
* added the hole file extent item to the subvolume tree, so we
* don't have anything else to do other than update the file
* extent item range and update the inode item.
*/
if (btrfs_file_extent_disk_bytenr(eb, item) == 0) {
btrfs_release_path(path);
goto update_inode;
}
ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
ins.type = BTRFS_EXTENT_ITEM_KEY;
ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
@@ -765,148 +779,125 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
goto out;
}
if (ins.objectid > 0) {
u64 csum_start;
u64 csum_end;
LIST_HEAD(ordered_sums);
/*
* Is this extent already allocated in the extent tree?
* If so, just add a reference.
*/
ret = btrfs_lookup_data_extent(fs_info, ins.objectid, ins.offset);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
} else if (ret == 0) {
struct btrfs_ref ref = {
.action = BTRFS_ADD_DELAYED_REF,
.bytenr = ins.objectid,
.num_bytes = ins.offset,
.owning_root = btrfs_root_id(root),
.ref_root = btrfs_root_id(root),
};
/*
* is this extent already allocated in the extent
* allocation tree? If so, just add a reference
*/
ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
ins.offset);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
} else if (ret == 0) {
struct btrfs_ref ref = {
.action = BTRFS_ADD_DELAYED_REF,
.bytenr = ins.objectid,
.num_bytes = ins.offset,
.owning_root = btrfs_root_id(root),
.ref_root = btrfs_root_id(root),
};
btrfs_init_data_ref(&ref, key->objectid, offset,
0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
} else {
/*
* insert the extent pointer in the extent
* allocation tree
*/
ret = btrfs_alloc_logged_file_extent(trans,
btrfs_root_id(root),
key->objectid, offset, &ins);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
}
btrfs_release_path(path);
if (btrfs_file_extent_compression(eb, item)) {
csum_start = ins.objectid;
csum_end = csum_start + ins.offset;
} else {
csum_start = ins.objectid +
btrfs_file_extent_offset(eb, item);
csum_end = csum_start +
btrfs_file_extent_num_bytes(eb, item);
}
ret = btrfs_lookup_csums_list(root->log_root,
csum_start, csum_end - 1,
&ordered_sums, false);
if (ret < 0) {
btrfs_init_data_ref(&ref, key->objectid, offset, 0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = 0;
/*
* Now delete all existing cums in the csum root that
* cover our range. We do this because we can have an
* extent that is completely referenced by one file
* extent item and partially referenced by another
* file extent item (like after using the clone or
* extent_same ioctls). In this case if we end up doing
* the replay of the one that partially references the
* extent first, and we do not do the csum deletion
* below, we can get 2 csum items in the csum tree that
* overlap each other. For example, imagine our log has
* the two following file extent items:
*
* key (257 EXTENT_DATA 409600)
* extent data disk byte 12845056 nr 102400
* extent data offset 20480 nr 20480 ram 102400
*
* key (257 EXTENT_DATA 819200)
* extent data disk byte 12845056 nr 102400
* extent data offset 0 nr 102400 ram 102400
*
* Where the second one fully references the 100K extent
* that starts at disk byte 12845056, and the log tree
* has a single csum item that covers the entire range
* of the extent:
*
* key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
*
* After the first file extent item is replayed, the
* csum tree gets the following csum item:
*
* key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
*
* Which covers the 20K sub-range starting at offset 20K
* of our extent. Now when we replay the second file
* extent item, if we do not delete existing csum items
* that cover any of its blocks, we end up getting two
* csum items in our csum tree that overlap each other:
*
* key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
* key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
*
* Which is a problem, because after this anyone trying
* to lookup up for the checksum of any block of our
* extent starting at an offset of 40K or higher, will
* end up looking at the second csum item only, which
* does not contain the checksum for any block starting
* at offset 40K or higher of our extent.
*/
while (!list_empty(&ordered_sums)) {
struct btrfs_ordered_sum *sums;
struct btrfs_root *csum_root;
sums = list_first_entry(&ordered_sums,
struct btrfs_ordered_sum,
list);
csum_root = btrfs_csum_root(fs_info,
sums->logical);
if (!ret) {
ret = btrfs_del_csums(trans, csum_root,
sums->logical,
sums->len);
if (ret)
btrfs_abort_transaction(trans, ret);
}
if (!ret) {
ret = btrfs_csum_file_blocks(trans,
csum_root,
sums);
if (ret)
btrfs_abort_transaction(trans, ret);
}
list_del(&sums->list);
kfree(sums);
}
if (ret)
goto out;
} else {
btrfs_release_path(path);
/* Insert the extent pointer in the extent tree. */
ret = btrfs_alloc_logged_file_extent(trans, btrfs_root_id(root),
key->objectid, offset, &ins);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
}
btrfs_release_path(path);
if (btrfs_file_extent_compression(eb, item)) {
csum_start = ins.objectid;
csum_end = csum_start + ins.offset;
} else {
csum_start = ins.objectid + btrfs_file_extent_offset(eb, item);
csum_end = csum_start + btrfs_file_extent_num_bytes(eb, item);
}
ret = btrfs_lookup_csums_list(root->log_root, csum_start, csum_end - 1,
&ordered_sums, false);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = 0;
/*
* Now delete all existing cums in the csum root that cover our
* range. We do this because we can have an extent that is
* completely referenced by one file extent item and partially
* referenced by another file extent item (like after using the
* clone or extent_same ioctls). In this case if we end up doing
* the replay of the one that partially references the extent
* first, and we do not do the csum deletion below, we can get 2
* csum items in the csum tree that overlap each other. For
* example, imagine our log has the two following file extent items:
*
* key (257 EXTENT_DATA 409600)
* extent data disk byte 12845056 nr 102400
* extent data offset 20480 nr 20480 ram 102400
*
* key (257 EXTENT_DATA 819200)
* extent data disk byte 12845056 nr 102400
* extent data offset 0 nr 102400 ram 102400
*
* Where the second one fully references the 100K extent that
* starts at disk byte 12845056, and the log tree has a single
* csum item that covers the entire range of the extent:
*
* key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
*
* After the first file extent item is replayed, the csum tree
* gets the following csum item:
*
* key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
*
* Which covers the 20K sub-range starting at offset 20K of our
* extent. Now when we replay the second file extent item, if we
* do not delete existing csum items that cover any of its
* blocks, we end up getting two csum items in our csum tree
* that overlap each other:
*
* key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
* key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
*
* Which is a problem, because after this anyone trying to
* lookup up for the checksum of any block of our extent
* starting at an offset of 40K or higher, will end up looking
* at the second csum item only, which does not contain the
* checksum for any block starting at offset 40K or higher of
* our extent.
*/
while (!list_empty(&ordered_sums)) {
struct btrfs_ordered_sum *sums;
struct btrfs_root *csum_root;
sums = list_first_entry(&ordered_sums,
struct btrfs_ordered_sum, list);
csum_root = btrfs_csum_root(fs_info, sums->logical);
if (!ret) {
ret = btrfs_del_csums(trans, csum_root, sums->logical,
sums->len);
if (ret)
btrfs_abort_transaction(trans, ret);
}
if (!ret) {
ret = btrfs_csum_file_blocks(trans, csum_root, sums);
if (ret)
btrfs_abort_transaction(trans, ret);
}
list_del(&sums->list);
kfree(sums);
}
if (ret)
goto out;
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
/* inline extents are easy, we just overwrite them */
ret = overwrite_item(trans, root, path, eb, slot, key);
@@ -914,13 +905,13 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
goto out;
}
update_inode:
ret = btrfs_inode_set_file_extent_range(inode, start, extent_end - start);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
update_inode:
btrfs_update_inode_bytes(inode, nbytes, drop_args.bytes_found);
ret = btrfs_update_inode(trans, inode);
if (ret)