mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-10 10:20:17 -04:00
bcachefs: bch2_check_bucket_backpointer_mismatch()
Detect buckets with missing backpointers, and run repair on demand. __bch2_move_data_phys() now calls bch2_check_bucket_backpointer_mismatch() as it walks buckets, which checks for missing backpointers by comparing backpointers against bucket sector counts. When missing backpointers are detected, we kick off bch2_check_extents_to_backpointers() asynchronously - right away if we're trying to evacuate, or with a threshold if we're just running copygc. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
@@ -2175,8 +2175,11 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
||||
BUG_ON(a->data_type != BCH_DATA_cached);
|
||||
BUG_ON(a->dirty_sectors);
|
||||
|
||||
if (!a->cached_sectors)
|
||||
bch_err(c, "invalidating empty bucket, confused");
|
||||
if (!a->cached_sectors) {
|
||||
bch2_check_bucket_backpointer_mismatch(trans, ca, bucket.offset,
|
||||
true, last_flushed);
|
||||
goto out;
|
||||
}
|
||||
|
||||
unsigned cached_sectors = a->cached_sectors;
|
||||
u8 gen = a->gen;
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "disk_accounting.h"
|
||||
#include "error.h"
|
||||
#include "progress.h"
|
||||
#include "recovery_passes.h"
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
||||
@@ -804,6 +805,13 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int bch2_fs_going_ro(struct bch_fs *c)
|
||||
{
|
||||
return test_bit(BCH_FS_going_ro, &c->flags)
|
||||
? -EROFS
|
||||
: 0;
|
||||
}
|
||||
|
||||
static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
struct extents_to_bp_state *s)
|
||||
{
|
||||
@@ -831,6 +839,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
|
||||
|
||||
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
|
||||
bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
|
||||
bch2_fs_going_ro(c) ?:
|
||||
check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
|
||||
}));
|
||||
@@ -870,6 +879,7 @@ static int data_type_to_alloc_counter(enum bch_data_type t)
|
||||
static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos);
|
||||
|
||||
static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
|
||||
bool *had_mismatch,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
@@ -877,6 +887,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
|
||||
bool need_commit = false;
|
||||
|
||||
*had_mismatch = false;
|
||||
|
||||
if (a->data_type == BCH_DATA_sb ||
|
||||
a->data_type == BCH_DATA_journal ||
|
||||
a->data_type == BCH_DATA_parity)
|
||||
@@ -957,6 +969,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
|
||||
? bch2_bucket_bitmap_set(ca, &ca->bucket_backpointer_empty,
|
||||
alloc_k.k->p.offset)
|
||||
: 0);
|
||||
|
||||
*had_mismatch = true;
|
||||
}
|
||||
err:
|
||||
bch2_dev_put(ca);
|
||||
@@ -1104,7 +1118,9 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
||||
|
||||
ret = for_each_btree_key(trans, iter, BTREE_ID_alloc,
|
||||
POS_MIN, BTREE_ITER_prefetch, k, ({
|
||||
check_bucket_backpointer_mismatch(trans, k, &s.last_flushed);
|
||||
bool had_mismatch;
|
||||
bch2_fs_going_ro(c) ?:
|
||||
check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed);
|
||||
}));
|
||||
if (ret)
|
||||
goto err;
|
||||
@@ -1150,20 +1166,69 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
||||
|
||||
s.bp_start = bpos_successor(s.bp_end);
|
||||
}
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
bch2_bkey_buf_exit(&s.last_flushed, c);
|
||||
bch2_btree_cache_unpin(c);
|
||||
|
||||
for_each_member_device(c, ca) {
|
||||
bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatch);
|
||||
bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty);
|
||||
}
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
bch2_bkey_buf_exit(&s.last_flushed, c);
|
||||
bch2_btree_cache_unpin(c);
|
||||
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans,
|
||||
struct bpos bucket,
|
||||
bool *had_mismatch,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct btree_iter alloc_iter;
|
||||
struct bkey_s_c k = bch2_bkey_get_iter(trans, &alloc_iter,
|
||||
BTREE_ID_alloc, bucket,
|
||||
BTREE_ITER_cached);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed);
|
||||
bch2_trans_iter_exit(trans, &alloc_iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans,
|
||||
struct bch_dev *ca, u64 bucket,
|
||||
bool copygc,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
bool had_mismatch;
|
||||
int ret = lockrestart_do(trans,
|
||||
check_bucket_backpointer_pos_mismatch(trans, POS(ca->dev_idx, bucket),
|
||||
&had_mismatch, last_flushed));
|
||||
if (ret || !had_mismatch)
|
||||
return ret;
|
||||
|
||||
u64 nr = ca->bucket_backpointer_mismatch.nr;
|
||||
u64 allowed = copygc ? ca->mi.nbuckets >> 7 : 0;
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
__bch2_log_msg_start(ca->name, &buf);
|
||||
|
||||
prt_printf(&buf, "Detected missing backpointers in bucket %llu, now have %llu/%llu with missing\n",
|
||||
bucket, nr, ca->mi.nbuckets);
|
||||
|
||||
bch2_run_explicit_recovery_pass(c, &buf,
|
||||
BCH_RECOVERY_PASS_check_extents_to_backpointers,
|
||||
nr < allowed ? RUN_RECOVERY_PASS_ratelimit : 0);
|
||||
|
||||
bch2_print_str(c, KERN_ERR, buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* backpointers -> extents */
|
||||
|
||||
static int check_one_backpointer(struct btree_trans *trans,
|
||||
|
||||
@@ -182,7 +182,8 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_b
|
||||
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer,
|
||||
struct btree_iter *, struct bkey_buf *);
|
||||
|
||||
int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bpos, struct bkey_buf *);
|
||||
int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bch_dev *, u64,
|
||||
bool, struct bkey_buf *);
|
||||
|
||||
int bch2_check_btree_backpointers(struct bch_fs *);
|
||||
int bch2_check_extents_to_backpointers(struct bch_fs *);
|
||||
|
||||
@@ -815,6 +815,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
|
||||
u64 bucket_start,
|
||||
u64 bucket_end,
|
||||
unsigned data_types,
|
||||
bool copygc,
|
||||
move_pred_fn pred, void *arg)
|
||||
{
|
||||
struct btree_trans *trans = ctxt->trans;
|
||||
@@ -825,6 +826,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
|
||||
struct bkey_buf sk;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_buf last_flushed;
|
||||
u64 check_mismatch_done = bucket_start;
|
||||
int ret = 0;
|
||||
|
||||
struct bch_dev *ca = bch2_dev_tryget(c, dev);
|
||||
@@ -835,8 +837,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
|
||||
|
||||
struct bpos bp_start = bucket_pos_to_bp_start(ca, POS(dev, bucket_start));
|
||||
struct bpos bp_end = bucket_pos_to_bp_end(ca, POS(dev, bucket_end));
|
||||
bch2_dev_put(ca);
|
||||
ca = NULL;
|
||||
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
@@ -871,6 +871,14 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
|
||||
if (!k.k || bkey_gt(k.k->p, bp_end))
|
||||
break;
|
||||
|
||||
if (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) {
|
||||
while (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) {
|
||||
bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++,
|
||||
copygc, &last_flushed);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (k.k->type != KEY_TYPE_backpointer)
|
||||
goto next;
|
||||
|
||||
@@ -946,10 +954,15 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
|
||||
next:
|
||||
bch2_btree_iter_advance(trans, &bp_iter);
|
||||
}
|
||||
|
||||
while (check_mismatch_done < bucket_end)
|
||||
bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++,
|
||||
copygc, &last_flushed);
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
bch2_bkey_buf_exit(&sk, c);
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
bch2_dev_put(ca);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -974,7 +987,8 @@ int bch2_move_data_phys(struct bch_fs *c,
|
||||
ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys;
|
||||
}
|
||||
|
||||
int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg);
|
||||
int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end,
|
||||
data_types, false, pred, arg);
|
||||
bch2_moving_ctxt_exit(&ctxt);
|
||||
|
||||
return ret;
|
||||
@@ -1019,6 +1033,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
|
||||
bucket.offset,
|
||||
bucket.offset + 1,
|
||||
~0,
|
||||
true,
|
||||
evacuate_bucket_pred, &arg);
|
||||
}
|
||||
|
||||
|
||||
@@ -75,6 +75,9 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
|
||||
if (!ca)
|
||||
goto out;
|
||||
|
||||
if (bch2_bucket_bitmap_test(&ca->bucket_backpointer_mismatch, b->k.bucket.offset))
|
||||
goto out;
|
||||
|
||||
if (ca->mi.state != BCH_MEMBER_STATE_rw ||
|
||||
!bch2_dev_is_online(ca))
|
||||
goto out;
|
||||
|
||||
Reference in New Issue
Block a user