mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-05 20:33:49 -04:00
bcachefs: Convert bucket invalidation to key marking path
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
committed by
Kent Overstreet
parent
73c27c6095
commit
8fe826f90a
@@ -129,6 +129,34 @@ static inline void put_alloc_field(struct bkey_i_alloc *a, void **p,
|
||||
*p += bytes;
|
||||
}
|
||||
|
||||
struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *a)
|
||||
{
|
||||
struct bkey_alloc_unpacked ret = { .gen = a->gen };
|
||||
const void *d = a->data;
|
||||
unsigned idx = 0;
|
||||
|
||||
#define x(_name, _bits) ret._name = get_alloc_field(a, &d, idx++);
|
||||
BCH_ALLOC_FIELDS()
|
||||
#undef x
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_alloc_pack(struct bkey_i_alloc *dst,
|
||||
const struct bkey_alloc_unpacked src)
|
||||
{
|
||||
unsigned idx = 0;
|
||||
void *d = dst->v.data;
|
||||
|
||||
dst->v.fields = 0;
|
||||
dst->v.gen = src.gen;
|
||||
|
||||
#define x(_name, _bits) put_alloc_field(dst, &d, idx++, src._name);
|
||||
BCH_ALLOC_FIELDS()
|
||||
#undef x
|
||||
|
||||
set_bkey_val_bytes(&dst->k, (void *) d - (void *) &dst->v);
|
||||
}
|
||||
|
||||
static unsigned bch_alloc_val_u64s(const struct bch_alloc *a)
|
||||
{
|
||||
unsigned i, bytes = offsetof(struct bch_alloc, data);
|
||||
@@ -174,16 +202,24 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
static void __alloc_read_key(struct bucket *g, const struct bch_alloc *a)
|
||||
{
|
||||
const void *d = a->data;
|
||||
unsigned idx = 0;
|
||||
unsigned idx = 0, data_type, dirty_sectors, cached_sectors;
|
||||
struct bucket_mark m;
|
||||
|
||||
g->_mark.gen = a->gen;
|
||||
g->gen_valid = 1;
|
||||
g->io_time[READ] = get_alloc_field(a, &d, idx++);
|
||||
g->io_time[WRITE] = get_alloc_field(a, &d, idx++);
|
||||
g->_mark.data_type = get_alloc_field(a, &d, idx++);
|
||||
g->_mark.dirty_sectors = get_alloc_field(a, &d, idx++);
|
||||
g->_mark.cached_sectors = get_alloc_field(a, &d, idx++);
|
||||
data_type = get_alloc_field(a, &d, idx++);
|
||||
dirty_sectors = get_alloc_field(a, &d, idx++);
|
||||
cached_sectors = get_alloc_field(a, &d, idx++);
|
||||
g->oldest_gen = get_alloc_field(a, &d, idx++);
|
||||
|
||||
bucket_cmpxchg(g, m, ({
|
||||
m.gen = a->gen;
|
||||
m.data_type = data_type;
|
||||
m.dirty_sectors = dirty_sectors;
|
||||
m.cached_sectors = cached_sectors;
|
||||
}));
|
||||
|
||||
g->gen_valid = 1;
|
||||
}
|
||||
|
||||
static void __alloc_write_key(struct bkey_i_alloc *a, struct bucket *g,
|
||||
@@ -318,6 +354,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE|
|
||||
BTREE_INSERT_NOMARK|
|
||||
flags,
|
||||
BTREE_INSERT_ENTRY(iter, &a->k_i));
|
||||
if (ret)
|
||||
@@ -361,7 +398,8 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
|
||||
? 0
|
||||
: bch2_btree_insert_at(c, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_REPLAY,
|
||||
BTREE_INSERT_JOURNAL_REPLAY|
|
||||
BTREE_INSERT_NOMARK,
|
||||
BTREE_INSERT_ENTRY(&iter, k));
|
||||
err:
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
@@ -827,6 +865,142 @@ static inline long next_alloc_bucket(struct bch_dev *ca)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* returns sequence number of most recent journal entry that updated this
|
||||
* bucket:
|
||||
*/
|
||||
static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
|
||||
{
|
||||
if (m.journal_seq_valid) {
|
||||
u64 journal_seq = atomic64_read(&c->journal.seq);
|
||||
u64 bucket_seq = journal_seq;
|
||||
|
||||
bucket_seq &= ~((u64) U16_MAX);
|
||||
bucket_seq |= m.journal_seq;
|
||||
|
||||
if (bucket_seq > journal_seq)
|
||||
bucket_seq -= 1 << 16;
|
||||
|
||||
return bucket_seq;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int bch2_invalidate_one_bucket2(struct bch_fs *c, struct bch_dev *ca,
|
||||
struct btree_iter *iter,
|
||||
u64 *journal_seq, unsigned flags)
|
||||
{
|
||||
#if 0
|
||||
__BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
|
||||
#else
|
||||
/* hack: */
|
||||
__BKEY_PADDED(k, 8) alloc_key;
|
||||
#endif
|
||||
struct bkey_i_alloc *a;
|
||||
struct bkey_alloc_unpacked u;
|
||||
struct bucket_mark m;
|
||||
struct bkey_s_c k;
|
||||
bool invalidating_cached_data;
|
||||
size_t b;
|
||||
int ret;
|
||||
|
||||
BUG_ON(!ca->alloc_heap.used ||
|
||||
!ca->alloc_heap.data[0].nr);
|
||||
b = ca->alloc_heap.data[0].bucket;
|
||||
|
||||
/* first, put on free_inc and mark as owned by allocator: */
|
||||
percpu_down_read(&c->mark_lock);
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
verify_not_on_freelist(c, ca, b);
|
||||
|
||||
BUG_ON(!fifo_push(&ca->free_inc, b));
|
||||
|
||||
bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
|
||||
m = bucket(ca, b)->mark;
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
bch2_btree_iter_cond_resched(iter);
|
||||
|
||||
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
|
||||
|
||||
bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
|
||||
retry:
|
||||
k = bch2_btree_iter_peek_slot(iter);
|
||||
ret = btree_iter_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (k.k && k.k->type == KEY_TYPE_alloc)
|
||||
u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
|
||||
else
|
||||
memset(&u, 0, sizeof(u));
|
||||
|
||||
invalidating_cached_data = u.cached_sectors != 0;
|
||||
|
||||
//BUG_ON(u.dirty_sectors);
|
||||
u.data_type = 0;
|
||||
u.dirty_sectors = 0;
|
||||
u.cached_sectors = 0;
|
||||
u.read_time = c->bucket_clock[READ].hand;
|
||||
u.write_time = c->bucket_clock[WRITE].hand;
|
||||
u.gen++;
|
||||
|
||||
a = bkey_alloc_init(&alloc_key.k);
|
||||
a->k.p = iter->pos;
|
||||
bch2_alloc_pack(a, u);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL,
|
||||
invalidating_cached_data ? journal_seq : NULL,
|
||||
BTREE_INSERT_ATOMIC|
|
||||
BTREE_INSERT_NOCHECK_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE|
|
||||
flags,
|
||||
BTREE_INSERT_ENTRY(iter, &a->k_i));
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
||||
if (!ret) {
|
||||
/* remove from alloc_heap: */
|
||||
struct alloc_heap_entry e, *top = ca->alloc_heap.data;
|
||||
|
||||
top->bucket++;
|
||||
top->nr--;
|
||||
|
||||
if (!top->nr)
|
||||
heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
|
||||
|
||||
/*
|
||||
* Make sure we flush the last journal entry that updated this
|
||||
* bucket (i.e. deleting the last reference) before writing to
|
||||
* this bucket again:
|
||||
*/
|
||||
*journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
|
||||
} else {
|
||||
size_t b2;
|
||||
|
||||
/* remove from free_inc: */
|
||||
percpu_down_read(&c->mark_lock);
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
bch2_mark_alloc_bucket(c, ca, b, false,
|
||||
gc_pos_alloc(c, NULL), 0);
|
||||
|
||||
BUG_ON(!fifo_pop_back(&ca->free_inc, b2));
|
||||
BUG_ON(b != b2);
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t bucket, u64 *flush_seq)
|
||||
{
|
||||
@@ -847,18 +1021,7 @@ static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
if (m.journal_seq_valid) {
|
||||
u64 journal_seq = atomic64_read(&c->journal.seq);
|
||||
u64 bucket_seq = journal_seq;
|
||||
|
||||
bucket_seq &= ~((u64) U16_MAX);
|
||||
bucket_seq |= m.journal_seq;
|
||||
|
||||
if (bucket_seq > journal_seq)
|
||||
bucket_seq -= 1 << 16;
|
||||
|
||||
*flush_seq = max(*flush_seq, bucket_seq);
|
||||
}
|
||||
*flush_seq = max(*flush_seq, bucket_journal_seq(c, m));
|
||||
|
||||
return m.cached_sectors != 0;
|
||||
}
|
||||
@@ -871,7 +1034,6 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
struct btree_iter iter;
|
||||
u64 journal_seq = 0;
|
||||
int ret = 0;
|
||||
long b;
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0),
|
||||
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
|
||||
@@ -879,16 +1041,11 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
/* Only use nowait if we've already invalidated at least one bucket: */
|
||||
while (!ret &&
|
||||
!fifo_full(&ca->free_inc) &&
|
||||
(b = next_alloc_bucket(ca)) >= 0) {
|
||||
bool must_flush =
|
||||
bch2_invalidate_one_bucket(c, ca, b, &journal_seq);
|
||||
|
||||
ret = __bch2_alloc_write_key(c, ca, b, &iter,
|
||||
must_flush ? &journal_seq : NULL,
|
||||
ca->alloc_heap.used)
|
||||
ret = bch2_invalidate_one_bucket2(c, ca, &iter, &journal_seq,
|
||||
BTREE_INSERT_GC_LOCK_HELD|
|
||||
(!fifo_empty(&ca->free_inc)
|
||||
? BTREE_INSERT_NOWAIT : 0));
|
||||
}
|
||||
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
|
||||
@@ -6,6 +6,15 @@
|
||||
#include "alloc_types.h"
|
||||
#include "debug.h"
|
||||
|
||||
struct bkey_alloc_unpacked {
|
||||
u8 gen;
|
||||
#define x(_name, _bits) u##_bits _name;
|
||||
BCH_ALLOC_FIELDS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *);
|
||||
|
||||
#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
|
||||
|
||||
const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
@@ -826,12 +826,12 @@ struct bch_alloc {
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
#define BCH_ALLOC_FIELDS() \
|
||||
x(read_time, 2) \
|
||||
x(write_time, 2) \
|
||||
x(data_type, 1) \
|
||||
x(dirty_sectors, 2) \
|
||||
x(cached_sectors, 2) \
|
||||
x(oldest_gen, 1)
|
||||
x(read_time, 16) \
|
||||
x(write_time, 16) \
|
||||
x(data_type, 8) \
|
||||
x(dirty_sectors, 16) \
|
||||
x(cached_sectors, 16) \
|
||||
x(oldest_gen, 8)
|
||||
|
||||
enum {
|
||||
#define x(name, bytes) BCH_ALLOC_FIELD_##name,
|
||||
@@ -841,12 +841,12 @@ enum {
|
||||
};
|
||||
|
||||
static const unsigned BCH_ALLOC_FIELD_BYTES[] = {
|
||||
#define x(name, bytes) [BCH_ALLOC_FIELD_##name] = bytes,
|
||||
#define x(name, bits) [BCH_ALLOC_FIELD_##name] = bits / 8,
|
||||
BCH_ALLOC_FIELDS()
|
||||
#undef x
|
||||
};
|
||||
|
||||
#define x(name, bytes) + bytes
|
||||
#define x(name, bits) + (bits / 8)
|
||||
static const unsigned BKEY_ALLOC_VAL_U64s_MAX =
|
||||
DIV_ROUND_UP(offsetof(struct bch_alloc, data)
|
||||
BCH_ALLOC_FIELDS(), sizeof(u64));
|
||||
|
||||
@@ -456,6 +456,7 @@ static inline bool btree_node_is_extents(struct btree *b)
|
||||
static inline bool btree_node_type_needs_gc(enum btree_node_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BKEY_TYPE_ALLOC:
|
||||
case BKEY_TYPE_BTREE:
|
||||
case BKEY_TYPE_EXTENTS:
|
||||
case BKEY_TYPE_INODES:
|
||||
|
||||
@@ -82,6 +82,7 @@ enum {
|
||||
__BTREE_INSERT_USE_RESERVE,
|
||||
__BTREE_INSERT_USE_ALLOC_RESERVE,
|
||||
__BTREE_INSERT_JOURNAL_REPLAY,
|
||||
__BTREE_INSERT_NOMARK,
|
||||
__BTREE_INSERT_NOWAIT,
|
||||
__BTREE_INSERT_GC_LOCK_HELD,
|
||||
__BCH_HASH_SET_MUST_CREATE,
|
||||
@@ -108,12 +109,12 @@ enum {
|
||||
#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE)
|
||||
#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
|
||||
|
||||
/*
|
||||
* Insert is for journal replay: don't get journal reservations, or mark extents
|
||||
* (bch_mark_key)
|
||||
*/
|
||||
/* Insert is for journal replay - don't get journal reservations: */
|
||||
#define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY)
|
||||
|
||||
/* Don't call bch2_mark_key: */
|
||||
#define BTREE_INSERT_NOMARK (1 << __BTREE_INSERT_NOMARK)
|
||||
|
||||
/* Don't block on allocation failure (for new btree nodes: */
|
||||
#define BTREE_INSERT_NOWAIT (1 << __BTREE_INSERT_NOWAIT)
|
||||
#define BTREE_INSERT_GC_LOCK_HELD (1 << __BTREE_INSERT_GC_LOCK_HELD)
|
||||
|
||||
@@ -476,6 +476,60 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
ca, b, owned_by_allocator);
|
||||
}
|
||||
|
||||
static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
|
||||
bool inserting,
|
||||
struct bch_fs_usage *fs_usage,
|
||||
unsigned journal_seq, unsigned flags,
|
||||
bool gc)
|
||||
{
|
||||
struct bkey_alloc_unpacked u;
|
||||
struct bch_dev *ca;
|
||||
struct bucket *g;
|
||||
struct bucket_mark old, m;
|
||||
|
||||
if (!inserting)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* alloc btree is read in by bch2_alloc_read, not gc:
|
||||
*/
|
||||
if (flags & BCH_BUCKET_MARK_GC)
|
||||
return 0;
|
||||
|
||||
u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
|
||||
ca = bch_dev_bkey_exists(c, k.k->p.inode);
|
||||
g = __bucket(ca, k.k->p.offset, gc);
|
||||
|
||||
/*
|
||||
* this should currently only be getting called from the bucket
|
||||
* invalidate path:
|
||||
*/
|
||||
BUG_ON(u.dirty_sectors);
|
||||
BUG_ON(u.cached_sectors);
|
||||
BUG_ON(!g->mark.owned_by_allocator);
|
||||
|
||||
old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({
|
||||
m.gen = u.gen;
|
||||
m.data_type = u.data_type;
|
||||
m.dirty_sectors = u.dirty_sectors;
|
||||
m.cached_sectors = u.cached_sectors;
|
||||
}));
|
||||
|
||||
g->io_time[READ] = u.read_time;
|
||||
g->io_time[WRITE] = u.write_time;
|
||||
g->oldest_gen = u.oldest_gen;
|
||||
g->gen_valid = 1;
|
||||
|
||||
if (old.cached_sectors) {
|
||||
update_cached_sectors(c, fs_usage, ca->dev_idx,
|
||||
-old.cached_sectors);
|
||||
trace_invalidate(ca, bucket_to_sector(ca, k.k->p.offset),
|
||||
old.cached_sectors);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define checked_add(a, b) \
|
||||
do { \
|
||||
unsigned _res = (unsigned) (a) + (b); \
|
||||
@@ -840,18 +894,21 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
fs_usage = this_cpu_ptr(c->usage[gc]);
|
||||
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_alloc:
|
||||
return bch2_mark_alloc(c, k, inserting,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
case KEY_TYPE_btree_ptr:
|
||||
return bch2_mark_extent(c, k, inserting
|
||||
? c->opts.btree_node_size
|
||||
: -c->opts.btree_node_size,
|
||||
BCH_DATA_BTREE,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
? c->opts.btree_node_size
|
||||
: -c->opts.btree_node_size,
|
||||
BCH_DATA_BTREE,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
case KEY_TYPE_extent:
|
||||
return bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
case KEY_TYPE_stripe:
|
||||
return bch2_mark_stripe(c, k, inserting,
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
fs_usage, journal_seq, flags, gc);
|
||||
case KEY_TYPE_inode:
|
||||
if (inserting)
|
||||
fs_usage->s.nr_inodes++;
|
||||
@@ -922,7 +979,7 @@ void bch2_mark_update(struct btree_insert *trans,
|
||||
preempt_disable();
|
||||
fs_usage = bch2_fs_usage_get_scratch(c);
|
||||
|
||||
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
|
||||
if (!(trans->flags & BTREE_INSERT_NOMARK))
|
||||
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
|
||||
bpos_min(insert->k->k.p, b->key.k.p).offset -
|
||||
bkey_start_offset(&insert->k->k),
|
||||
|
||||
@@ -17,13 +17,14 @@
|
||||
|
||||
#define bucket_cmpxchg(g, new, expr) \
|
||||
({ \
|
||||
struct bucket *_g = g; \
|
||||
u64 _v = atomic64_read(&(g)->_mark.v); \
|
||||
struct bucket_mark _old; \
|
||||
\
|
||||
do { \
|
||||
(new).v.counter = _old.v.counter = _v; \
|
||||
expr; \
|
||||
} while ((_v = atomic64_cmpxchg(&(g)->_mark.v, \
|
||||
} while ((_v = atomic64_cmpxchg(&(_g)->_mark.v, \
|
||||
_old.v.counter, \
|
||||
(new).v.counter)) != _old.v.counter);\
|
||||
_old; \
|
||||
|
||||
@@ -101,7 +101,7 @@ do { \
|
||||
({ \
|
||||
bool _r = !fifo_empty((fifo)); \
|
||||
if (_r) \
|
||||
(i) = (fifo)->data[--(fifo)->back & (fifo)->mask] \
|
||||
(i) = (fifo)->data[--(fifo)->back & (fifo)->mask]; \
|
||||
_r; \
|
||||
})
|
||||
|
||||
|
||||
@@ -854,7 +854,8 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
||||
ret = bch2_btree_insert(c, entry->btree_id, k,
|
||||
&disk_res, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_REPLAY);
|
||||
BTREE_INSERT_JOURNAL_REPLAY|
|
||||
BTREE_INSERT_NOMARK);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
|
||||
Reference in New Issue
Block a user