mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-02-24 04:15:07 -05:00
bcachefs: bch2_write_op_error() now prints info about data update
A user has been seeing the "error verifying existing checksum while rewriting existing data (memory corruption?)" error. This generally indicates a hardware issue (and that may be the case here), but it might also indicate a bug, in which case we need more information to look for patterns. Reported-by: Roland Vet <vet.roland@protonmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
@@ -271,8 +271,8 @@ int bch2_bio_uncompress_inplace(struct bch_write_op *op,
|
||||
if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
|
||||
crc->compressed_size << 9 > c->opts.encoded_extent_max) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "error rewriting existing data: extent too big");
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"extent too big to decompress");
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
return -EIO;
|
||||
@@ -283,8 +283,8 @@ int bch2_bio_uncompress_inplace(struct bch_write_op *op,
|
||||
if (__bio_uncompress(c, bio, data.b, *crc)) {
|
||||
if (!c->opts.no_data_io) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "error rewriting existing data: decompression error");
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"decompression error");
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
|
||||
@@ -580,3 +580,9 @@ int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printb
|
||||
prt_printf(out, " offset %llu: ", pos.offset << 8);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_inum_snap_offset_err_msg(struct bch_fs *c, struct printbuf *out,
|
||||
struct bpos pos)
|
||||
{
|
||||
bch2_trans_do(c, bch2_inum_snap_offset_err_msg_trans(trans, out, pos));
|
||||
}
|
||||
|
||||
@@ -243,5 +243,6 @@ int bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, subv
|
||||
void bch2_inum_offset_err_msg(struct bch_fs *, struct printbuf *, subvol_inum, u64);
|
||||
|
||||
int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *, struct printbuf *, struct bpos);
|
||||
void bch2_inum_snap_offset_err_msg(struct bch_fs *, struct printbuf *, struct bpos);
|
||||
|
||||
#endif /* _BCACHEFS_ERROR_H */
|
||||
|
||||
@@ -396,29 +396,61 @@ static int bch2_write_index_default(struct bch_write_op *op)
|
||||
|
||||
/* Writes */
|
||||
|
||||
static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op,
|
||||
u64 offset)
|
||||
void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
struct bch_write_op *op, u64 offset, const char *fmt, ...)
|
||||
{
|
||||
bch2_inum_offset_err_msg(op->c, out,
|
||||
(subvol_inum) { op->subvol, op->pos.inode, },
|
||||
offset << 9);
|
||||
prt_printf(out, "write error%s: ",
|
||||
op->flags & BCH_WRITE_move ? "(internal move)" : "");
|
||||
if (op->subvol)
|
||||
lockrestart_do(trans,
|
||||
bch2_inum_offset_err_msg_trans(trans, out,
|
||||
(subvol_inum) { op->subvol, op->pos.inode, },
|
||||
offset << 9));
|
||||
else {
|
||||
struct bpos pos = op->pos;
|
||||
pos.offset = offset;
|
||||
lockrestart_do(trans, bch2_inum_snap_offset_err_msg_trans(trans, out, pos));
|
||||
}
|
||||
|
||||
prt_str(out, "write error: ");
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (op->flags & BCH_WRITE_move) {
|
||||
struct data_update *u = container_of(op, struct data_update, op);
|
||||
|
||||
prt_printf(out, "\n from internal move ");
|
||||
bch2_bkey_val_to_text(out, op->c, bkey_i_to_s_c(u->k.k));
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
|
||||
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, u64 offset,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
__bch2_write_op_error(out, op, op->pos.offset);
|
||||
}
|
||||
if (op->subvol)
|
||||
bch2_inum_offset_err_msg(op->c, out,
|
||||
(subvol_inum) { op->subvol, op->pos.inode, },
|
||||
offset << 9);
|
||||
else {
|
||||
struct bpos pos = op->pos;
|
||||
pos.offset = offset;
|
||||
bch2_inum_snap_offset_err_msg(op->c, out, pos);
|
||||
}
|
||||
|
||||
static void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
struct bch_write_op *op, u64 offset)
|
||||
{
|
||||
bch2_inum_offset_err_msg_trans(trans, out,
|
||||
(subvol_inum) { op->subvol, op->pos.inode, },
|
||||
offset << 9);
|
||||
prt_printf(out, "write error%s: ",
|
||||
op->flags & BCH_WRITE_move ? "(internal move)" : "");
|
||||
prt_str(out, "write error: ");
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
prt_vprintf(out, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (op->flags & BCH_WRITE_move) {
|
||||
struct data_update *u = container_of(op, struct data_update, op);
|
||||
|
||||
prt_printf(out, "\n from internal move ");
|
||||
bch2_bkey_val_to_text(out, op->c, bkey_i_to_s_c(u->k.k));
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
|
||||
@@ -561,8 +593,8 @@ static void __bch2_write_index(struct bch_write_op *op)
|
||||
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
__bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k));
|
||||
prt_printf(&buf, "btree update error: %s", bch2_err_str(ret));
|
||||
bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k),
|
||||
"btree update error: %s", bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
@@ -1114,8 +1146,8 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
|
||||
csum_err:
|
||||
{
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "error verifying existing checksum while rewriting existing data (memory corruption?)");
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"error verifying existing checksum while rewriting existing data (memory corruption?)");
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
@@ -1211,8 +1243,8 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
|
||||
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
|
||||
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k));
|
||||
prt_printf(&buf, "btree update error: %s", bch2_err_str(ret));
|
||||
bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k),
|
||||
"btree update error: %s", bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
@@ -1379,8 +1411,8 @@ static void bch2_nocow_write(struct bch_write_op *op)
|
||||
|
||||
if (ret) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "%s(): btree lookup error: %s", __func__, bch2_err_str(ret));
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"%s(): btree lookup error: %s", __func__, bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
op->error = ret;
|
||||
@@ -1502,8 +1534,8 @@ static void __bch2_write(struct bch_write_op *op)
|
||||
if (unlikely(ret < 0)) {
|
||||
if (!(op->flags & BCH_WRITE_alloc_nowait)) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "%s(): %s", __func__, bch2_err_str(ret));
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"%s(): %s", __func__, bch2_err_str(ret));
|
||||
bch_err_ratelimited(c, "%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
}
|
||||
@@ -1634,8 +1666,8 @@ CLOSURE_CALLBACK(bch2_write)
|
||||
|
||||
if (unlikely(bio->bi_iter.bi_size & (c->opts.block_size - 1))) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bch2_write_op_error(&buf, op);
|
||||
prt_printf(&buf, "misaligned write");
|
||||
bch2_write_op_error(&buf, op, op->pos.offset,
|
||||
"misaligned write");
|
||||
printbuf_exit(&buf);
|
||||
op->error = -EIO;
|
||||
goto err;
|
||||
|
||||
@@ -20,7 +20,13 @@ static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw
|
||||
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
|
||||
enum bch_data_type, const struct bkey_i *, bool);
|
||||
|
||||
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op);
|
||||
__printf(5, 6)
|
||||
void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
|
||||
struct bch_write_op *op, u64, const char *, ...);
|
||||
|
||||
__printf(4, 5)
|
||||
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, u64,
|
||||
const char *, ...);
|
||||
|
||||
#define BCH_WRITE_FLAGS() \
|
||||
x(alloc_nowait) \
|
||||
|
||||
Reference in New Issue
Block a user