From 3a04334d6282d08fbdd6201e374db17d31927ba3 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Sat, 8 Mar 2025 00:58:27 +0800 Subject: [PATCH 1/6] bcachefs: Fix b->written overflow When bset past end of btree node, we should not add sectors to b->written, which will overflow b->written. Reported-by: syzbot+3cb3d9e8c3f197754825@syzkaller.appspotmail.com Tested-by: syzbot+3cb3d9e8c3f197754825@syzkaller.appspotmail.com Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index dece27d9db04..756736f9243d 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1186,7 +1186,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, le64_to_cpu(i->journal_seq), b->written, b->written + sectors, ptr_written); - b->written += sectors; + b->written = min(b->written + sectors, btree_sectors(c)); if (blacklisted && !first) continue; From 58517f4df8424ec28dfe7290ccc61908eda57aae Mon Sep 17 00:00:00 2001 From: Roxana Nicolescu Date: Tue, 11 Mar 2025 15:06:10 +0000 Subject: [PATCH 2/6] bcachefs: Initialize from_inode members for bch_io_opts When there is no inode source, all "from_inode" members in the structure bhc_io_opts should be set false. Fixes: 7a7c43a0c1ecf ("bcachefs: Add bch_io_opts fields for indicating whether the opts came from the inode") Reported-by: syzbot+c17ad4b4367b72a853cb@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c17ad4b4367b72a853cb Signed-off-by: Roxana Nicolescu Signed-off-by: Kent Overstreet --- fs/bcachefs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 04ec05206f8c..339b80770f1d 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1198,6 +1198,7 @@ void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c, opts->_name##_from_inode = true; \ } else { \ opts->_name = c->opts._name; \ + opts->_name##_from_inode = false; \ } BCH_INODE_OPTS() #undef x From dbac8feb23382af1efa2e1a86049e079b6e42e12 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 11 Mar 2025 10:39:36 -0400 Subject: [PATCH 3/6] bcachefs: Make sure trans is unlocked when submitting read IO We were still using the trans after the unlock, leading to this bug in the retry path: 00255 ------------[ cut here ]------------ 00255 kernel BUG at fs/bcachefs/btree_iter.c:3348! 00255 Internal error: Oops - BUG: 00000000f2000800 [#1] SMP 00255 bcachefs (0ca38fe8-0a26-41f9-9b5d-6a27796c7803): /fiotest offset 86048768: no device to read from: 00255 u64s 8 type extent 4098:168192:U32_MAX len 128 ver 0: durability: 0 crc: c_size 128 size 128 offset 0 nonce 0 csum crc32c 0:8040a368 compress none ec: idx 83 block 1 ptr: 0:302:128 gen 0 00255 bcachefs (0ca38fe8-0a26-41f9-9b5d-6a27796c7803): /fiotest offset 85983232: no device to read from: 00255 u64s 8 type extent 4098:168064:U32_MAX len 128 ver 0: durability: 0 crc: c_size 128 size 128 offset 0 nonce 0 csum crc32c 0:43311336 compress none ec: idx 83 block 1 ptr: 0:302:0 gen 0 00255 Modules linked in: 00255 CPU: 5 UID: 0 PID: 304 Comm: kworker/u70:2 Not tainted 6.14.0-rc6-ktest-g526aae23d67d #16040 00255 Hardware name: linux,dummy-virt (DT) 00255 Workqueue: events_unbound bch2_rbio_retry 00255 pstate: 60001005 (nZCv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=--) 00255 pc : __bch2_trans_get+0x100/0x378 00255 lr : __bch2_trans_get+0xa0/0x378 00255 sp : ffffff80c865b760 00255 x29: ffffff80c865b760 x28: 0000000000000000 x27: ffffff80d76ed880 00255 x26: 0000000000000018 x25: 0000000000000000 x24: ffffff80f4ec3760 00255 x23: ffffff80f4010140 x22: 0000000000000056 x21: ffffff80f4ec0000 00255 x20: ffffff80f4ec3788 x19: ffffff80d75f8000 x18: 00000000ffffffff 00255 x17: 2065707974203820 x16: 7334367520200a3a x15: 0000000000000008 00255 x14: 0000000000000001 x13: 0000000000000100 x12: 0000000000000006 00255 x11: ffffffc080b47a40 x10: 0000000000000000 x9 : ffffffc08038dea8 00255 x8 : ffffff80d75fc018 x7 : 0000000000000000 x6 : 0000000000003788 00255 x5 : 0000000000003760 x4 : ffffff80c922de80 x3 : ffffff80f18f0000 00255 x2 : ffffff80c922de80 x1 : 0000000000000130 x0 : 0000000000000006 00255 Call trace: 00255 __bch2_trans_get+0x100/0x378 (P) 00255 bch2_read_io_err+0x98/0x260 00255 bch2_read_endio+0xb8/0x2d0 00255 __bch2_read_extent+0xce8/0xfe0 00255 __bch2_read+0x2a8/0x978 00255 bch2_rbio_retry+0x188/0x318 00255 process_one_work+0x154/0x390 00255 worker_thread+0x20c/0x3b8 00255 kthread+0xf0/0x1b0 00255 ret_from_fork+0x10/0x20 00255 Code: 6b01001f 54ffff01 79408460 3617fec0 (d4210000) 00255 ---[ end trace 0000000000000000 ]--- 00255 Kernel panic - not syncing: Oops - BUG: Fatal exception 00255 SMP: stopping secondary CPUs 00255 Kernel Offset: disabled 00255 CPU features: 0x000,00000070,00000010,8240500b 00255 Memory Limit: none 00255 ---[ end Kernel panic - not syncing: Oops - BUG: Fatal exception ]--- Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 8c7b2d3d779d..726da68073e2 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -951,12 +951,6 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, goto retry_pick; } - /* - * Unlock the iterator while the btree node's lock is still in - * cache, before doing the IO: - */ - bch2_trans_unlock(trans); - if (flags & BCH_READ_NODECODE) { /* * can happen if we retry, and the extent we were going to read @@ -1113,6 +1107,15 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, trace_and_count(c, read_split, &orig->bio); } + /* + * Unlock the iterator while the btree node's lock is still in + * cache, before doing the IO: + */ + if (!(flags & BCH_READ_IN_RETRY)) + bch2_trans_unlock(trans); + else + bch2_trans_unlock_long(trans); + if (!rbio->pick.idx) { if (unlikely(!rbio->have_ioref)) { struct printbuf buf = PRINTBUF; @@ -1160,6 +1163,8 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, if (likely(!(flags & BCH_READ_IN_RETRY))) { return 0; } else { + bch2_trans_unlock(trans); + int ret; rbio->context = RBIO_CONTEXT_UNBOUND; From 3bcde88d381a336ff252d67867c186ee602e6656 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 12 Mar 2025 17:21:31 -0400 Subject: [PATCH 4/6] bcachefs: fix tiny leak in bch2_dev_add() Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 6d97d412fed9..0459c875e189 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1811,7 +1811,11 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err_late; up_write(&c->state_lock); - return 0; +out: + printbuf_exit(&label); + printbuf_exit(&errbuf); + bch_err_fn(c, ret); + return ret; err_unlock: mutex_unlock(&c->sb_lock); @@ -1820,10 +1824,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (ca) bch2_dev_free(ca); bch2_free_super(&sb); - printbuf_exit(&label); - printbuf_exit(&errbuf); - bch_err_fn(c, ret); - return ret; + goto out; err_late: up_write(&c->state_lock); ca = NULL; From 69a5a13a22b1def29dce62b5b7c86e6098c20c68 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 13 Mar 2025 09:56:07 -0400 Subject: [PATCH 5/6] bcachefs: target_congested -> get_random_u32_below() get_random_u32_below() has a better algorithm than bch2_rand_range(), it just didn't exist at the time. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 726da68073e2..aa91fcf51eec 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -59,7 +59,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target) } rcu_read_unlock(); - return bch2_rand_range(nr * CONGESTED_MAX) < total; + return get_random_u32_below(nr * CONGESTED_MAX) < total; } #else From 9c18ea7ffee090b47afaa7dc41903fb1b436d7bd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 13 Mar 2025 11:16:28 -0400 Subject: [PATCH 6/6] bcachefs: bch2_get_random_u64_below() steal the (clever) algorithm from get_random_u32_below() this fixes a bug where we were passing roundup_pow_of_two() a 64 bit number - we're squaring device latencies now: [ +1.681698] ------------[ cut here ]------------ [ +0.000010] UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13 [ +0.000011] shift exponent 64 is too large for 64-bit type 'long unsigned int' [ +0.000011] CPU: 1 UID: 0 PID: 196 Comm: kworker/u32:13 Not tainted 6.14.0-rc6-dave+ #10 [ +0.000012] Hardware name: ASUS System Product Name/PRIME B460I-PLUS, BIOS 1301 07/13/2021 [ +0.000005] Workqueue: events_unbound __bch2_read_endio [bcachefs] [ +0.000354] Call Trace: [ +0.000005] [ +0.000007] dump_stack_lvl+0x5d/0x80 [ +0.000018] ubsan_epilogue+0x5/0x30 [ +0.000008] __ubsan_handle_shift_out_of_bounds.cold+0x61/0xe6 [ +0.000011] bch2_rand_range.cold+0x17/0x20 [bcachefs] [ +0.000231] bch2_bkey_pick_read_device+0x547/0x920 [bcachefs] [ +0.000229] __bch2_read_extent+0x1e4/0x18e0 [bcachefs] [ +0.000241] ? bch2_btree_iter_peek_slot+0x3df/0x800 [bcachefs] [ +0.000180] ? bch2_read_retry_nodecode+0x270/0x330 [bcachefs] [ +0.000230] bch2_read_retry_nodecode+0x270/0x330 [bcachefs] [ +0.000230] bch2_rbio_retry+0x1fa/0x600 [bcachefs] [ +0.000224] ? bch2_printbuf_make_room+0x71/0xb0 [bcachefs] [ +0.000243] ? bch2_read_csum_err+0x4a4/0x610 [bcachefs] [ +0.000278] bch2_read_csum_err+0x4a4/0x610 [bcachefs] [ +0.000227] ? __bch2_read_endio+0x58b/0x870 [bcachefs] [ +0.000220] __bch2_read_endio+0x58b/0x870 [bcachefs] [ +0.000268] ? try_to_wake_up+0x31c/0x7f0 [ +0.000011] ? process_one_work+0x176/0x330 [ +0.000008] process_one_work+0x176/0x330 [ +0.000008] worker_thread+0x252/0x390 [ +0.000008] ? __pfx_worker_thread+0x10/0x10 [ +0.000006] kthread+0xec/0x230 [ +0.000011] ? __pfx_kthread+0x10/0x10 [ +0.000009] ret_from_fork+0x31/0x50 [ +0.000009] ? __pfx_kthread+0x10/0x10 [ +0.000008] ret_from_fork_asm+0x1a/0x30 [ +0.000012] [ +0.000046] ---[ end trace ]--- Reported-by: Roland Vet Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 2 +- fs/bcachefs/util.c | 23 ++++++++++++++--------- fs/bcachefs/util.h | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 05d5f71a7ca9..2d8042f853dc 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -99,7 +99,7 @@ static inline bool ptr_better(struct bch_fs *c, /* Pick at random, biased in favor of the faster device: */ - return bch2_rand_range(l1 + l2) > l1; + return bch2_get_random_u64_below(l1 + l2) > l1; } if (bch2_force_reconstruct_read) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index e0a876cbaa6b..8e3ab4bf79a9 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -653,19 +653,24 @@ int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) return 0; } -size_t bch2_rand_range(size_t max) +u64 bch2_get_random_u64_below(u64 ceil) { - size_t rand; + if (ceil <= U32_MAX) + return __get_random_u32_below(ceil); - if (!max) - return 0; + /* this is the same (clever) algorithm as in __get_random_u32_below() */ + u64 rand = get_random_u64(); + u64 mult = ceil * rand; - do { - rand = get_random_long(); - rand &= roundup_pow_of_two(max) - 1; - } while (rand >= max); + if (unlikely(mult < ceil)) { + u64 bound = -ceil % ceil; + while (unlikely(mult < bound)) { + rand = get_random_u64(); + mult = ceil * rand; + } + } - return rand; + return mul_u64_u64_shr(ceil, rand, 64); } void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src) diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index e7c3541b38f3..f4a4783219d9 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -401,7 +401,7 @@ do { \ _ret; \ }) -size_t bch2_rand_range(size_t); +u64 bch2_get_random_u64_below(u64); void memcpy_to_bio(struct bio *, struct bvec_iter, const void *); void memcpy_from_bio(void *, struct bio *, struct bvec_iter);