From cc970d21c4f37b7cbedd73e043b69faf2c66a6fe Mon Sep 17 00:00:00 2001 From: Leo Martins Date: Thu, 26 Feb 2026 01:51:08 -0800 Subject: [PATCH] btrfs: add tracepoint for search slot restart tracking Add a btrfs_search_slot_restart tracepoint that fires at each restart site in btrfs_search_slot(), recording the root, tree level, and reason for the restart. This enables tracking search slot restarts which contribute to COW amplification under memory pressure. The four restart reasons are: - write_lock: insufficient write lock level, need to restart with higher lock - setup_nodes: node setup returned -EAGAIN - slot_zero: insertion at slot 0 requires higher write lock level - read_block: read_block_for_search returned -EAGAIN (block not cached or lock contention) COW counts are already tracked by the existing trace_btrfs_cow_block() tracepoint. The per-restart-site tracepoint avoids counter overhead in the critical path when tracepoints are disabled, and provides richer per-event information that bpftrace scripts can aggregate into counts, histograms, and per-root breakdowns. Reviewed-by: Filipe Manana Reviewed-by: Boris Burkov Signed-off-by: Leo Martins Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 10 ++++++++-- include/trace/events/btrfs.h | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e8d260ecdcf6..71e7ada95477 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2102,6 +2102,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, p->nodes[level + 1])) { write_lock_level = level + 1; btrfs_release_path(p); + trace_btrfs_search_slot_restart(root, level, "write_lock"); goto again; } @@ -2164,8 +2165,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, p->slots[level] = slot; ret2 = setup_nodes_for_search(trans, root, p, b, level, ins_len, &write_lock_level); - if (ret2 == -EAGAIN) + if (ret2 == -EAGAIN) { + trace_btrfs_search_slot_restart(root, level, "setup_nodes"); goto again; + } if (ret2) { ret = ret2; goto done; @@ -2181,6 +2184,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (slot == 0 && ins_len && write_lock_level < level + 1) { write_lock_level = level + 1; btrfs_release_path(p); + trace_btrfs_search_slot_restart(root, level, "slot_zero"); goto again; } @@ -2194,8 +2198,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, } ret2 = read_block_for_search(root, p, &b, slot, key); - if (ret2 == -EAGAIN && !p->nowait) + if (ret2 == -EAGAIN && !p->nowait) { + trace_btrfs_search_slot_restart(root, level, "read_block"); goto again; + } if (ret2) { ret = ret2; goto done; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0864700f76e0..8ad7a2d76c1d 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1113,6 +1113,30 @@ TRACE_EVENT(btrfs_cow_block, __entry->cow_level) ); +TRACE_EVENT(btrfs_search_slot_restart, + + TP_PROTO(const struct btrfs_root *root, int level, + const char *reason), + + TP_ARGS(root, level, reason), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( int, level ) + __string( reason, reason ) + ), + + TP_fast_assign_btrfs(root->fs_info, + __entry->root_objectid = btrfs_root_id(root); + __entry->level = level; + __assign_str(reason); + ), + + TP_printk_btrfs("root=%llu(%s) level=%d reason=%s", + show_root_type(__entry->root_objectid), + __entry->level, __get_str(reason)) +); + TRACE_EVENT(btrfs_space_reservation, TP_PROTO(const struct btrfs_fs_info *fs_info, const char *type, u64 val,