mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-15 17:51:41 -04:00
Merge patch series "fs: add i_state helpers"
Christian Brauner <brauner@kernel.org> says: I've recently looked for some free space in struct inode again because of some exec kerfuffle we had and while my idea didn't turn into anything I noticed that we often waste bytes when using wait bit operations. So I set out to switch that to another mechanism that would allow us to free up bytes. So this is an attempt to turn i_state from an unsigned long into an u32 using the individual bytes of i_state as addresses for the wait var event mechanism (Thanks to Linus for that idea.). This survives LTP, xfstests on various filesystems, and will-it-scale. * patches from https://lore.kernel.org/r/20240823-work-i_state-v3-1-5cd5fd207a57@kernel.org: inode: make i_state a u32 inode: port __I_LRU_ISOLATING to var event inode: port __I_NEW to var event inode: port __I_SYNC to var event fs: reorder i_state bits fs: add i_state helpers Link: https://lore.kernel.org/r/20240823-work-i_state-v3-1-5cd5fd207a57@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
@@ -1644,14 +1644,16 @@ void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
|
||||
break;
|
||||
}
|
||||
} else if (clean_pass && this_pass_clean) {
|
||||
wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
|
||||
DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
|
||||
struct wait_bit_queue_entry wqe;
|
||||
struct wait_queue_head *wq_head;
|
||||
|
||||
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
|
||||
wq_head = inode_bit_waitqueue(&wqe, &inode->v, __I_NEW);
|
||||
prepare_to_wait_event(wq_head, &wqe.wq_entry,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
mutex_unlock(&c->vfs_inodes_lock);
|
||||
|
||||
schedule();
|
||||
finish_wait(wq, &wait.wq_entry);
|
||||
finish_wait(wq_head, &wqe.wq_entry);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1908,8 +1908,13 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
|
||||
__d_instantiate(entry, inode);
|
||||
WARN_ON(!(inode->i_state & I_NEW));
|
||||
inode->i_state &= ~I_NEW & ~I_CREATING;
|
||||
/*
|
||||
* Pairs with the barrier in prepare_to_wait_event() to make sure
|
||||
* ___wait_var_event() either sees the bit cleared or
|
||||
* waitqueue_active() check in wake_up_var() sees the waiter.
|
||||
*/
|
||||
smp_mb();
|
||||
wake_up_bit(&inode->i_state, __I_NEW);
|
||||
inode_wake_up_bit(inode, __I_NEW);
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(d_instantiate_new);
|
||||
|
||||
@@ -1386,12 +1386,13 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
|
||||
|
||||
static void inode_sync_complete(struct inode *inode)
|
||||
{
|
||||
assert_spin_locked(&inode->i_lock);
|
||||
|
||||
inode->i_state &= ~I_SYNC;
|
||||
/* If inode is clean an unused, put it into LRU now... */
|
||||
inode_add_lru(inode);
|
||||
/* Waiters must see I_SYNC cleared before being woken up */
|
||||
smp_mb();
|
||||
wake_up_bit(&inode->i_state, __I_SYNC);
|
||||
/* Called with inode->i_lock which ensures memory ordering. */
|
||||
inode_wake_up_bit(inode, __I_SYNC);
|
||||
}
|
||||
|
||||
static bool inode_dirtied_after(struct inode *inode, unsigned long t)
|
||||
@@ -1512,17 +1513,25 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
*/
|
||||
void inode_wait_for_writeback(struct inode *inode)
|
||||
{
|
||||
DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
|
||||
wait_queue_head_t *wqh;
|
||||
struct wait_bit_queue_entry wqe;
|
||||
struct wait_queue_head *wq_head;
|
||||
|
||||
lockdep_assert_held(&inode->i_lock);
|
||||
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
|
||||
while (inode->i_state & I_SYNC) {
|
||||
assert_spin_locked(&inode->i_lock);
|
||||
|
||||
if (!(inode->i_state & I_SYNC))
|
||||
return;
|
||||
|
||||
wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
|
||||
for (;;) {
|
||||
prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
|
||||
/* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
|
||||
if (!(inode->i_state & I_SYNC))
|
||||
break;
|
||||
spin_unlock(&inode->i_lock);
|
||||
__wait_on_bit(wqh, &wq, bit_wait,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
schedule();
|
||||
spin_lock(&inode->i_lock);
|
||||
}
|
||||
finish_wait(wq_head, &wqe.wq_entry);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1533,16 +1542,20 @@ void inode_wait_for_writeback(struct inode *inode)
|
||||
static void inode_sleep_on_writeback(struct inode *inode)
|
||||
__releases(inode->i_lock)
|
||||
{
|
||||
DEFINE_WAIT(wait);
|
||||
wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
|
||||
int sleep;
|
||||
struct wait_bit_queue_entry wqe;
|
||||
struct wait_queue_head *wq_head;
|
||||
bool sleep;
|
||||
|
||||
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
|
||||
sleep = inode->i_state & I_SYNC;
|
||||
assert_spin_locked(&inode->i_lock);
|
||||
|
||||
wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
|
||||
prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
|
||||
/* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
|
||||
sleep = !!(inode->i_state & I_SYNC);
|
||||
spin_unlock(&inode->i_lock);
|
||||
if (sleep)
|
||||
schedule();
|
||||
finish_wait(wqh, &wait);
|
||||
finish_wait(wq_head, &wqe.wq_entry);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
72
fs/inode.c
72
fs/inode.c
@@ -472,6 +472,17 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
|
||||
inode->i_state |= I_REFERENCED;
|
||||
}
|
||||
|
||||
struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
|
||||
struct inode *inode, u32 bit)
|
||||
{
|
||||
void *bit_address;
|
||||
|
||||
bit_address = inode_state_wait_address(inode, bit);
|
||||
init_wait_var_entry(wqe, bit_address, 0);
|
||||
return __var_waitqueue(bit_address);
|
||||
}
|
||||
EXPORT_SYMBOL(inode_bit_waitqueue);
|
||||
|
||||
/*
|
||||
* Add inode to LRU if needed (inode is unused and clean).
|
||||
*
|
||||
@@ -500,24 +511,35 @@ static void inode_unpin_lru_isolating(struct inode *inode)
|
||||
spin_lock(&inode->i_lock);
|
||||
WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
|
||||
inode->i_state &= ~I_LRU_ISOLATING;
|
||||
smp_mb();
|
||||
wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
|
||||
/* Called with inode->i_lock which ensures memory ordering. */
|
||||
inode_wake_up_bit(inode, __I_LRU_ISOLATING);
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
|
||||
static void inode_wait_for_lru_isolating(struct inode *inode)
|
||||
{
|
||||
lockdep_assert_held(&inode->i_lock);
|
||||
if (inode->i_state & I_LRU_ISOLATING) {
|
||||
DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
|
||||
wait_queue_head_t *wqh;
|
||||
struct wait_bit_queue_entry wqe;
|
||||
struct wait_queue_head *wq_head;
|
||||
|
||||
wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
|
||||
lockdep_assert_held(&inode->i_lock);
|
||||
if (!(inode->i_state & I_LRU_ISOLATING))
|
||||
return;
|
||||
|
||||
wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING);
|
||||
for (;;) {
|
||||
prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
|
||||
/*
|
||||
* Checking I_LRU_ISOLATING with inode->i_lock guarantees
|
||||
* memory ordering.
|
||||
*/
|
||||
if (!(inode->i_state & I_LRU_ISOLATING))
|
||||
break;
|
||||
spin_unlock(&inode->i_lock);
|
||||
__wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
|
||||
schedule();
|
||||
spin_lock(&inode->i_lock);
|
||||
WARN_ON(inode->i_state & I_LRU_ISOLATING);
|
||||
}
|
||||
finish_wait(wq_head, &wqe.wq_entry);
|
||||
WARN_ON(inode->i_state & I_LRU_ISOLATING);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -723,7 +745,13 @@ static void evict(struct inode *inode)
|
||||
* used as an indicator whether blocking on it is safe.
|
||||
*/
|
||||
spin_lock(&inode->i_lock);
|
||||
wake_up_bit(&inode->i_state, __I_NEW);
|
||||
/*
|
||||
* Pairs with the barrier in prepare_to_wait_event() to make sure
|
||||
* ___wait_var_event() either sees the bit cleared or
|
||||
* waitqueue_active() check in wake_up_var() sees the waiter.
|
||||
*/
|
||||
smp_mb();
|
||||
inode_wake_up_bit(inode, __I_NEW);
|
||||
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
@@ -1135,8 +1163,13 @@ void unlock_new_inode(struct inode *inode)
|
||||
spin_lock(&inode->i_lock);
|
||||
WARN_ON(!(inode->i_state & I_NEW));
|
||||
inode->i_state &= ~I_NEW & ~I_CREATING;
|
||||
/*
|
||||
* Pairs with the barrier in prepare_to_wait_event() to make sure
|
||||
* ___wait_var_event() either sees the bit cleared or
|
||||
* waitqueue_active() check in wake_up_var() sees the waiter.
|
||||
*/
|
||||
smp_mb();
|
||||
wake_up_bit(&inode->i_state, __I_NEW);
|
||||
inode_wake_up_bit(inode, __I_NEW);
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(unlock_new_inode);
|
||||
@@ -1147,8 +1180,13 @@ void discard_new_inode(struct inode *inode)
|
||||
spin_lock(&inode->i_lock);
|
||||
WARN_ON(!(inode->i_state & I_NEW));
|
||||
inode->i_state &= ~I_NEW;
|
||||
/*
|
||||
* Pairs with the barrier in prepare_to_wait_event() to make sure
|
||||
* ___wait_var_event() either sees the bit cleared or
|
||||
* waitqueue_active() check in wake_up_var() sees the waiter.
|
||||
*/
|
||||
smp_mb();
|
||||
wake_up_bit(&inode->i_state, __I_NEW);
|
||||
inode_wake_up_bit(inode, __I_NEW);
|
||||
spin_unlock(&inode->i_lock);
|
||||
iput(inode);
|
||||
}
|
||||
@@ -2337,8 +2375,8 @@ EXPORT_SYMBOL(inode_needs_sync);
|
||||
*/
|
||||
static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
|
||||
{
|
||||
wait_queue_head_t *wq;
|
||||
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
|
||||
struct wait_bit_queue_entry wqe;
|
||||
struct wait_queue_head *wq_head;
|
||||
|
||||
/*
|
||||
* Handle racing against evict(), see that routine for more details.
|
||||
@@ -2349,14 +2387,14 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
|
||||
return;
|
||||
}
|
||||
|
||||
wq = bit_waitqueue(&inode->i_state, __I_NEW);
|
||||
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
|
||||
wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
|
||||
prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
|
||||
spin_unlock(&inode->i_lock);
|
||||
rcu_read_unlock();
|
||||
if (is_inode_hash_locked)
|
||||
spin_unlock(&inode_hash_lock);
|
||||
schedule();
|
||||
finish_wait(wq, &wait.wq_entry);
|
||||
finish_wait(wq_head, &wqe.wq_entry);
|
||||
if (is_inode_hash_locked)
|
||||
spin_lock(&inode_hash_lock);
|
||||
rcu_read_lock();
|
||||
|
||||
@@ -681,7 +681,8 @@ struct inode {
|
||||
#endif
|
||||
|
||||
/* Misc */
|
||||
unsigned long i_state;
|
||||
u32 i_state;
|
||||
/* 32-bit hole */
|
||||
struct rw_semaphore i_rwsem;
|
||||
|
||||
unsigned long dirtied_when; /* jiffies of first dirtying */
|
||||
@@ -744,6 +745,21 @@ struct inode {
|
||||
void *i_private; /* fs or device private pointer */
|
||||
} __randomize_layout;
|
||||
|
||||
/*
|
||||
* Get bit address from inode->i_state to use with wait_var_event()
|
||||
* infrastructre.
|
||||
*/
|
||||
#define inode_state_wait_address(inode, bit) ((char *)&(inode)->i_state + (bit))
|
||||
|
||||
struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
|
||||
struct inode *inode, u32 bit);
|
||||
|
||||
static inline void inode_wake_up_bit(struct inode *inode, u32 bit)
|
||||
{
|
||||
/* Caller is responsible for correct memory barriers. */
|
||||
wake_up_var(inode_state_wait_address(inode, bit));
|
||||
}
|
||||
|
||||
struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
|
||||
|
||||
static inline unsigned int i_blocksize(const struct inode *node)
|
||||
@@ -2395,28 +2411,32 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
|
||||
* i_count.
|
||||
*
|
||||
* Q: What is the difference between I_WILL_FREE and I_FREEING?
|
||||
*
|
||||
* __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait
|
||||
* upon. There's one free address left.
|
||||
*/
|
||||
#define I_DIRTY_SYNC (1 << 0)
|
||||
#define I_DIRTY_DATASYNC (1 << 1)
|
||||
#define I_DIRTY_PAGES (1 << 2)
|
||||
#define __I_NEW 3
|
||||
#define __I_NEW 0
|
||||
#define I_NEW (1 << __I_NEW)
|
||||
#define I_WILL_FREE (1 << 4)
|
||||
#define I_FREEING (1 << 5)
|
||||
#define I_CLEAR (1 << 6)
|
||||
#define __I_SYNC 7
|
||||
#define __I_SYNC 1
|
||||
#define I_SYNC (1 << __I_SYNC)
|
||||
#define I_REFERENCED (1 << 8)
|
||||
#define __I_LRU_ISOLATING 2
|
||||
#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING)
|
||||
|
||||
#define I_DIRTY_SYNC (1 << 3)
|
||||
#define I_DIRTY_DATASYNC (1 << 4)
|
||||
#define I_DIRTY_PAGES (1 << 5)
|
||||
#define I_WILL_FREE (1 << 6)
|
||||
#define I_FREEING (1 << 7)
|
||||
#define I_CLEAR (1 << 8)
|
||||
#define I_REFERENCED (1 << 9)
|
||||
#define I_LINKABLE (1 << 10)
|
||||
#define I_DIRTY_TIME (1 << 11)
|
||||
#define I_WB_SWITCH (1 << 13)
|
||||
#define I_OVL_INUSE (1 << 14)
|
||||
#define I_CREATING (1 << 15)
|
||||
#define I_DONTCACHE (1 << 16)
|
||||
#define I_SYNC_QUEUED (1 << 17)
|
||||
#define I_PINNING_NETFS_WB (1 << 18)
|
||||
#define __I_LRU_ISOLATING 19
|
||||
#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING)
|
||||
#define I_WB_SWITCH (1 << 12)
|
||||
#define I_OVL_INUSE (1 << 13)
|
||||
#define I_CREATING (1 << 14)
|
||||
#define I_DONTCACHE (1 << 15)
|
||||
#define I_SYNC_QUEUED (1 << 16)
|
||||
#define I_PINNING_NETFS_WB (1 << 17)
|
||||
|
||||
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
|
||||
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
|
||||
|
||||
@@ -200,7 +200,8 @@ void inode_io_list_del(struct inode *inode);
|
||||
/* writeback.h requires fs.h; it, too, is not included from here. */
|
||||
static inline void wait_on_inode(struct inode *inode)
|
||||
{
|
||||
wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
|
||||
wait_var_event(inode_state_wait_address(inode, __I_NEW),
|
||||
!(READ_ONCE(inode->i_state) & I_NEW));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
|
||||
Reference in New Issue
Block a user