mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-16 07:51:31 -04:00
xfs: use a lockref for the buffer reference count
The lockref structure allows incrementing/decrementing counters like an atomic_t for the fast path, while still allowing complex slow path operations as if the counter was protected by a lock. The only slow path operations that actually need to take the lock are the final put, LRU evictions and marking a buffer stale. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Carlos Maiolino <cem@kernel.org>
This commit is contained in:
committed by
Carlos Maiolino
parent
67fe430397
commit
d02ee47bbe
@@ -31,20 +31,20 @@ struct kmem_cache *xfs_buf_cache;
|
||||
*
|
||||
* xfs_buf_stale:
|
||||
* b_sema (caller holds)
|
||||
* b_lock
|
||||
* b_lockref.lock
|
||||
* lru_lock
|
||||
*
|
||||
* xfs_buf_rele:
|
||||
* b_lock
|
||||
* b_lockref.lock
|
||||
* lru_lock
|
||||
*
|
||||
* xfs_buftarg_drain_rele
|
||||
* lru_lock
|
||||
* b_lock (trylock due to inversion)
|
||||
* b_lockref.lock (trylock due to inversion)
|
||||
*
|
||||
* xfs_buftarg_isolate
|
||||
* lru_lock
|
||||
* b_lock (trylock due to inversion)
|
||||
* b_lockref.lock (trylock due to inversion)
|
||||
*/
|
||||
|
||||
static void xfs_buf_submit(struct xfs_buf *bp);
|
||||
@@ -78,11 +78,11 @@ xfs_buf_stale(
|
||||
*/
|
||||
bp->b_flags &= ~_XBF_DELWRI_Q;
|
||||
|
||||
spin_lock(&bp->b_lock);
|
||||
spin_lock(&bp->b_lockref.lock);
|
||||
atomic_set(&bp->b_lru_ref, 0);
|
||||
if (bp->b_hold >= 0)
|
||||
if (!__lockref_is_dead(&bp->b_lockref))
|
||||
list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -274,10 +274,8 @@ xfs_buf_alloc(
|
||||
* inserting into the hash table are safe (and will have to wait for
|
||||
* the unlock to do anything non-trivial).
|
||||
*/
|
||||
bp->b_hold = 1;
|
||||
lockref_init(&bp->b_lockref);
|
||||
sema_init(&bp->b_sema, 0); /* held, no waiters */
|
||||
|
||||
spin_lock_init(&bp->b_lock);
|
||||
atomic_set(&bp->b_lru_ref, 1);
|
||||
init_completion(&bp->b_iowait);
|
||||
INIT_LIST_HEAD(&bp->b_lru);
|
||||
@@ -434,20 +432,6 @@ xfs_buf_find_lock(
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
xfs_buf_try_hold(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
spin_lock(&bp->b_lock);
|
||||
if (bp->b_hold == -1) {
|
||||
spin_unlock(&bp->b_lock);
|
||||
return false;
|
||||
}
|
||||
bp->b_hold++;
|
||||
spin_unlock(&bp->b_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int
|
||||
xfs_buf_lookup(
|
||||
struct xfs_buf_cache *bch,
|
||||
@@ -460,7 +444,7 @@ xfs_buf_lookup(
|
||||
|
||||
rcu_read_lock();
|
||||
bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params);
|
||||
if (!bp || !xfs_buf_try_hold(bp)) {
|
||||
if (!bp || !lockref_get_not_dead(&bp->b_lockref)) {
|
||||
rcu_read_unlock();
|
||||
return -ENOENT;
|
||||
}
|
||||
@@ -511,7 +495,7 @@ xfs_buf_find_insert(
|
||||
error = PTR_ERR(bp);
|
||||
goto out_free_buf;
|
||||
}
|
||||
if (bp && xfs_buf_try_hold(bp)) {
|
||||
if (bp && lockref_get_not_dead(&bp->b_lockref)) {
|
||||
/* found an existing buffer */
|
||||
rcu_read_unlock();
|
||||
error = xfs_buf_find_lock(bp, flags);
|
||||
@@ -853,16 +837,14 @@ xfs_buf_hold(
|
||||
{
|
||||
trace_xfs_buf_hold(bp, _RET_IP_);
|
||||
|
||||
spin_lock(&bp->b_lock);
|
||||
bp->b_hold++;
|
||||
spin_unlock(&bp->b_lock);
|
||||
lockref_get(&bp->b_lockref);
|
||||
}
|
||||
|
||||
static void
|
||||
xfs_buf_destroy(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
ASSERT(bp->b_hold < 0);
|
||||
ASSERT(__lockref_is_dead(&bp->b_lockref));
|
||||
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
|
||||
|
||||
if (!xfs_buf_is_uncached(bp)) {
|
||||
@@ -888,19 +870,20 @@ xfs_buf_rele(
|
||||
{
|
||||
trace_xfs_buf_rele(bp, _RET_IP_);
|
||||
|
||||
spin_lock(&bp->b_lock);
|
||||
if (!--bp->b_hold) {
|
||||
if (lockref_put_or_lock(&bp->b_lockref))
|
||||
return;
|
||||
if (!--bp->b_lockref.count) {
|
||||
if (xfs_buf_is_uncached(bp) || !atomic_read(&bp->b_lru_ref))
|
||||
goto kill;
|
||||
list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru);
|
||||
}
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
return;
|
||||
|
||||
kill:
|
||||
bp->b_hold = -1;
|
||||
lockref_mark_dead(&bp->b_lockref);
|
||||
list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
|
||||
xfs_buf_destroy(bp);
|
||||
}
|
||||
@@ -1471,18 +1454,18 @@ xfs_buftarg_drain_rele(
|
||||
struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru);
|
||||
struct list_head *dispose = arg;
|
||||
|
||||
if (!spin_trylock(&bp->b_lock))
|
||||
if (!spin_trylock(&bp->b_lockref.lock))
|
||||
return LRU_SKIP;
|
||||
if (bp->b_hold > 0) {
|
||||
if (bp->b_lockref.count > 0) {
|
||||
/* need to wait, so skip it this pass */
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
trace_xfs_buf_drain_buftarg(bp, _RET_IP_);
|
||||
return LRU_SKIP;
|
||||
}
|
||||
|
||||
bp->b_hold = -1;
|
||||
lockref_mark_dead(&bp->b_lockref);
|
||||
list_lru_isolate_move(lru, item, dispose);
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
return LRU_REMOVED;
|
||||
}
|
||||
|
||||
@@ -1564,18 +1547,19 @@ xfs_buftarg_isolate(
|
||||
struct list_head *dispose = arg;
|
||||
|
||||
/*
|
||||
* we are inverting the lru lock/bp->b_lock here, so use a trylock.
|
||||
* If we fail to get the lock, just skip it.
|
||||
* We are inverting the lru lock vs bp->b_lockref.lock order here, so
|
||||
* use a trylock. If we fail to get the lock, just skip the buffer.
|
||||
*/
|
||||
if (!spin_trylock(&bp->b_lock))
|
||||
if (!spin_trylock(&bp->b_lockref.lock))
|
||||
return LRU_SKIP;
|
||||
|
||||
/*
|
||||
* Decrement the b_lru_ref count unless the value is already
|
||||
* zero. If the value is already zero, we need to reclaim the
|
||||
* buffer, otherwise it gets another trip through the LRU.
|
||||
*/
|
||||
if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
return LRU_ROTATE;
|
||||
}
|
||||
|
||||
@@ -1583,15 +1567,15 @@ xfs_buftarg_isolate(
|
||||
* If the buffer is in use, remove it from the LRU for now as we can't
|
||||
* free it. It will be freed when the last reference drops.
|
||||
*/
|
||||
if (bp->b_hold > 0) {
|
||||
if (bp->b_lockref.count > 0) {
|
||||
list_lru_isolate(lru, &bp->b_lru);
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
return LRU_REMOVED;
|
||||
}
|
||||
|
||||
bp->b_hold = -1;
|
||||
lockref_mark_dead(&bp->b_lockref);
|
||||
list_lru_isolate_move(lru, item, dispose);
|
||||
spin_unlock(&bp->b_lock);
|
||||
spin_unlock(&bp->b_lockref.lock);
|
||||
return LRU_REMOVED;
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include <linux/dax.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/list_lru.h>
|
||||
#include <linux/lockref.h>
|
||||
|
||||
extern struct kmem_cache *xfs_buf_cache;
|
||||
|
||||
@@ -154,7 +155,7 @@ struct xfs_buf {
|
||||
|
||||
xfs_daddr_t b_rhash_key; /* buffer cache index */
|
||||
int b_length; /* size of buffer in BBs */
|
||||
int b_hold; /* reference count */
|
||||
struct lockref b_lockref; /* refcount + lock */
|
||||
atomic_t b_lru_ref; /* lru reclaim ref count */
|
||||
xfs_buf_flags_t b_flags; /* status flags */
|
||||
struct semaphore b_sema; /* semaphore for lockables */
|
||||
@@ -164,7 +165,6 @@ struct xfs_buf {
|
||||
* bt_lru_lock and not by b_sema
|
||||
*/
|
||||
struct list_head b_lru; /* lru list */
|
||||
spinlock_t b_lock; /* internal state lock */
|
||||
wait_queue_head_t b_waiters; /* unpin waiters */
|
||||
struct list_head b_list;
|
||||
struct xfs_perag *b_pag;
|
||||
|
||||
@@ -740,7 +740,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
|
||||
__entry->dev = bp->b_target->bt_dev;
|
||||
__entry->bno = xfs_buf_daddr(bp);
|
||||
__entry->nblks = bp->b_length;
|
||||
__entry->hold = bp->b_hold;
|
||||
__entry->hold = bp->b_lockref.count;
|
||||
__entry->pincount = atomic_read(&bp->b_pin_count);
|
||||
__entry->lockval = bp->b_sema.count;
|
||||
__entry->flags = bp->b_flags;
|
||||
@@ -814,7 +814,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
|
||||
__entry->bno = xfs_buf_daddr(bp);
|
||||
__entry->length = bp->b_length;
|
||||
__entry->flags = flags;
|
||||
__entry->hold = bp->b_hold;
|
||||
__entry->hold = bp->b_lockref.count;
|
||||
__entry->pincount = atomic_read(&bp->b_pin_count);
|
||||
__entry->lockval = bp->b_sema.count;
|
||||
__entry->caller_ip = caller_ip;
|
||||
@@ -858,7 +858,7 @@ TRACE_EVENT(xfs_buf_ioerror,
|
||||
__entry->dev = bp->b_target->bt_dev;
|
||||
__entry->bno = xfs_buf_daddr(bp);
|
||||
__entry->length = bp->b_length;
|
||||
__entry->hold = bp->b_hold;
|
||||
__entry->hold = bp->b_lockref.count;
|
||||
__entry->pincount = atomic_read(&bp->b_pin_count);
|
||||
__entry->lockval = bp->b_sema.count;
|
||||
__entry->error = error;
|
||||
@@ -902,7 +902,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
|
||||
__entry->buf_bno = xfs_buf_daddr(bip->bli_buf);
|
||||
__entry->buf_len = bip->bli_buf->b_length;
|
||||
__entry->buf_flags = bip->bli_buf->b_flags;
|
||||
__entry->buf_hold = bip->bli_buf->b_hold;
|
||||
__entry->buf_hold = bip->bli_buf->b_lockref.count;
|
||||
__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
|
||||
__entry->buf_lockval = bip->bli_buf->b_sema.count;
|
||||
__entry->li_flags = bip->bli_item.li_flags;
|
||||
@@ -5206,7 +5206,7 @@ DECLARE_EVENT_CLASS(xfbtree_buf_class,
|
||||
__entry->xfino = file_inode(xfbt->target->bt_file)->i_ino;
|
||||
__entry->bno = xfs_buf_daddr(bp);
|
||||
__entry->nblks = bp->b_length;
|
||||
__entry->hold = bp->b_hold;
|
||||
__entry->hold = bp->b_lockref.count;
|
||||
__entry->pincount = atomic_read(&bp->b_pin_count);
|
||||
__entry->lockval = bp->b_sema.count;
|
||||
__entry->flags = bp->b_flags;
|
||||
|
||||
Reference in New Issue
Block a user