xfs: use a lockref for the buffer reference count

The lockref structure allows incrementing/decrementing counters like
an atomic_t for the fast path, while still allowing complex slow path
operations as if the counter was protected by a lock.  The only slow
path operations that actually need to take the lock are the final
put, LRU evictions and marking a buffer stale.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
This commit is contained in:
Christoph Hellwig
2026-03-23 08:50:52 +01:00
committed by Carlos Maiolino
parent 67fe430397
commit d02ee47bbe
3 changed files with 39 additions and 55 deletions

View File

@@ -31,20 +31,20 @@ struct kmem_cache *xfs_buf_cache;
*
* xfs_buf_stale:
* b_sema (caller holds)
* b_lock
* b_lockref.lock
* lru_lock
*
* xfs_buf_rele:
* b_lock
* b_lockref.lock
* lru_lock
*
* xfs_buftarg_drain_rele
* lru_lock
* b_lock (trylock due to inversion)
* b_lockref.lock (trylock due to inversion)
*
* xfs_buftarg_isolate
* lru_lock
* b_lock (trylock due to inversion)
* b_lockref.lock (trylock due to inversion)
*/
static void xfs_buf_submit(struct xfs_buf *bp);
@@ -78,11 +78,11 @@ xfs_buf_stale(
*/
bp->b_flags &= ~_XBF_DELWRI_Q;
spin_lock(&bp->b_lock);
spin_lock(&bp->b_lockref.lock);
atomic_set(&bp->b_lru_ref, 0);
if (bp->b_hold >= 0)
if (!__lockref_is_dead(&bp->b_lockref))
list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
spin_unlock(&bp->b_lock);
spin_unlock(&bp->b_lockref.lock);
}
static void
@@ -274,10 +274,8 @@ xfs_buf_alloc(
* inserting into the hash table are safe (and will have to wait for
* the unlock to do anything non-trivial).
*/
bp->b_hold = 1;
lockref_init(&bp->b_lockref);
sema_init(&bp->b_sema, 0); /* held, no waiters */
spin_lock_init(&bp->b_lock);
atomic_set(&bp->b_lru_ref, 1);
init_completion(&bp->b_iowait);
INIT_LIST_HEAD(&bp->b_lru);
@@ -434,20 +432,6 @@ xfs_buf_find_lock(
return 0;
}
static bool
xfs_buf_try_hold(
struct xfs_buf *bp)
{
spin_lock(&bp->b_lock);
if (bp->b_hold == -1) {
spin_unlock(&bp->b_lock);
return false;
}
bp->b_hold++;
spin_unlock(&bp->b_lock);
return true;
}
static inline int
xfs_buf_lookup(
struct xfs_buf_cache *bch,
@@ -460,7 +444,7 @@ xfs_buf_lookup(
rcu_read_lock();
bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params);
if (!bp || !xfs_buf_try_hold(bp)) {
if (!bp || !lockref_get_not_dead(&bp->b_lockref)) {
rcu_read_unlock();
return -ENOENT;
}
@@ -511,7 +495,7 @@ xfs_buf_find_insert(
error = PTR_ERR(bp);
goto out_free_buf;
}
if (bp && xfs_buf_try_hold(bp)) {
if (bp && lockref_get_not_dead(&bp->b_lockref)) {
/* found an existing buffer */
rcu_read_unlock();
error = xfs_buf_find_lock(bp, flags);
@@ -853,16 +837,14 @@ xfs_buf_hold(
{
trace_xfs_buf_hold(bp, _RET_IP_);
spin_lock(&bp->b_lock);
bp->b_hold++;
spin_unlock(&bp->b_lock);
lockref_get(&bp->b_lockref);
}
static void
xfs_buf_destroy(
struct xfs_buf *bp)
{
ASSERT(bp->b_hold < 0);
ASSERT(__lockref_is_dead(&bp->b_lockref));
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
if (!xfs_buf_is_uncached(bp)) {
@@ -888,19 +870,20 @@ xfs_buf_rele(
{
trace_xfs_buf_rele(bp, _RET_IP_);
spin_lock(&bp->b_lock);
if (!--bp->b_hold) {
if (lockref_put_or_lock(&bp->b_lockref))
return;
if (!--bp->b_lockref.count) {
if (xfs_buf_is_uncached(bp) || !atomic_read(&bp->b_lru_ref))
goto kill;
list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru);
}
spin_unlock(&bp->b_lock);
spin_unlock(&bp->b_lockref.lock);
return;
kill:
bp->b_hold = -1;
lockref_mark_dead(&bp->b_lockref);
list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
spin_unlock(&bp->b_lock);
spin_unlock(&bp->b_lockref.lock);
xfs_buf_destroy(bp);
}
@@ -1471,18 +1454,18 @@ xfs_buftarg_drain_rele(
struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
struct list_head *dispose = arg;
if (!spin_trylock(&bp->b_lock))
if (!spin_trylock(&bp->b_lockref.lock))
return LRU_SKIP;
if (bp->b_hold > 0) {
if (bp->b_lockref.count > 0) {
/* need to wait, so skip it this pass */
spin_unlock(&bp->b_lock);
spin_unlock(&bp->b_lockref.lock);
trace_xfs_buf_drain_buftarg(bp, _RET_IP_);
return LRU_SKIP;
}
bp->b_hold = -1;
lockref_mark_dead(&bp->b_lockref);
list_lru_isolate_move(lru, item, dispose);
spin_unlock(&bp->b_lock);
spin_unlock(&bp->b_lockref.lock);
return LRU_REMOVED;
}
@@ -1564,18 +1547,19 @@ xfs_buftarg_isolate(
struct list_head *dispose = arg;
/*
* we are inverting the lru lock/bp->b_lock here, so use a trylock.
* If we fail to get the lock, just skip it.
* We are inverting the lru lock vs bp->b_lockref.lock order here, so
* use a trylock. If we fail to get the lock, just skip the buffer.
*/
if (!spin_trylock(&bp->b_lock))
if (!spin_trylock(&bp->b_lockref.lock))
return LRU_SKIP;
/*
* Decrement the b_lru_ref count unless the value is already
* zero. If the value is already zero, we need to reclaim the
* buffer, otherwise it gets another trip through the LRU.
*/
if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
spin_unlock(&bp->b_lock);
spin_unlock(&bp->b_lockref.lock);
return LRU_ROTATE;
}
@@ -1583,15 +1567,15 @@ xfs_buftarg_isolate(
* If the buffer is in use, remove it from the LRU for now as we can't
* free it. It will be freed when the last reference drops.
*/
if (bp->b_hold > 0) {
if (bp->b_lockref.count > 0) {
list_lru_isolate(lru, &bp->b_lru);
spin_unlock(&bp->b_lock);
spin_unlock(&bp->b_lockref.lock);
return LRU_REMOVED;
}
bp->b_hold = -1;
lockref_mark_dead(&bp->b_lockref);
list_lru_isolate_move(lru, item, dispose);
spin_unlock(&bp->b_lock);
spin_unlock(&bp->b_lockref.lock);
return LRU_REMOVED;
}

View File

@@ -14,6 +14,7 @@
#include <linux/dax.h>
#include <linux/uio.h>
#include <linux/list_lru.h>
#include <linux/lockref.h>
extern struct kmem_cache *xfs_buf_cache;
@@ -154,7 +155,7 @@ struct xfs_buf {
xfs_daddr_t b_rhash_key; /* buffer cache index */
int b_length; /* size of buffer in BBs */
int b_hold; /* reference count */
struct lockref b_lockref; /* refcount + lock */
atomic_t b_lru_ref; /* lru reclaim ref count */
xfs_buf_flags_t b_flags; /* status flags */
struct semaphore b_sema; /* semaphore for lockables */
@@ -164,7 +165,6 @@ struct xfs_buf {
* bt_lru_lock and not by b_sema
*/
struct list_head b_lru; /* lru list */
spinlock_t b_lock; /* internal state lock */
wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list;
struct xfs_perag *b_pag;

View File

@@ -740,7 +740,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
__entry->dev = bp->b_target->bt_dev;
__entry->bno = xfs_buf_daddr(bp);
__entry->nblks = bp->b_length;
__entry->hold = bp->b_hold;
__entry->hold = bp->b_lockref.count;
__entry->pincount = atomic_read(&bp->b_pin_count);
__entry->lockval = bp->b_sema.count;
__entry->flags = bp->b_flags;
@@ -814,7 +814,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
__entry->bno = xfs_buf_daddr(bp);
__entry->length = bp->b_length;
__entry->flags = flags;
__entry->hold = bp->b_hold;
__entry->hold = bp->b_lockref.count;
__entry->pincount = atomic_read(&bp->b_pin_count);
__entry->lockval = bp->b_sema.count;
__entry->caller_ip = caller_ip;
@@ -858,7 +858,7 @@ TRACE_EVENT(xfs_buf_ioerror,
__entry->dev = bp->b_target->bt_dev;
__entry->bno = xfs_buf_daddr(bp);
__entry->length = bp->b_length;
__entry->hold = bp->b_hold;
__entry->hold = bp->b_lockref.count;
__entry->pincount = atomic_read(&bp->b_pin_count);
__entry->lockval = bp->b_sema.count;
__entry->error = error;
@@ -902,7 +902,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
__entry->buf_bno = xfs_buf_daddr(bip->bli_buf);
__entry->buf_len = bip->bli_buf->b_length;
__entry->buf_flags = bip->bli_buf->b_flags;
__entry->buf_hold = bip->bli_buf->b_hold;
__entry->buf_hold = bip->bli_buf->b_lockref.count;
__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
__entry->buf_lockval = bip->bli_buf->b_sema.count;
__entry->li_flags = bip->bli_item.li_flags;
@@ -5206,7 +5206,7 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
__entry->xfino = file_inode(xfbt->target->bt_file)->i_ino;
__entry->bno = xfs_buf_daddr(bp);
__entry->nblks = bp->b_length;
__entry->hold = bp->b_hold;
__entry->hold = bp->b_lockref.count;
__entry->pincount = atomic_read(&bp->b_pin_count);
__entry->lockval = bp->b_sema.count;
__entry->flags = bp->b_flags;