From 4c79fc2d598694bda845b46229c9d48b65042970 Mon Sep 17 00:00:00 2001
From: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Date: Wed, 22 Apr 2026 10:47:13 +0200
Subject: [PATCH 01/10] libceph: Fix potential out-of-bounds access in
 crush_decode()

A message of type CEPH_MSG_OSD_MAP containing a crush map with at least
one bucket has two fields holding the bucket algorithm. If the values
in these two fields differ, an out-of-bounds access can occur. This is
the case because the first algorithm field (alg) is used to allocate
the correct amount of memory for a bucket of this type, while the second
algorithm field inside the bucket (b->alg) is used in the subsequent
processing.

This patch fixes the issue by adding a check that compares alg and
b->alg and aborts the processing in case they differ. Furthermore,
b->alg is set to 0 in this case, because the destruction of the crush
map also uses this field to determine the bucket type, which can again
result in an out-of-bounds access when trying to free the memory pointed
to by the fields of the bucket. To correctly free the memory allocated
for the bucket in such a case, the corresponding call to kfree is moved
from the algorithm-specific crush_destroy_bucket functions to the
generic crush_destroy_bucket().

Cc: stable@vger.kernel.org
Signed-off-by: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/crush/crush.c | 6 +-----
 net/ceph/osdmap.c      | 4 ++++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 254ded0b05f6..521aec1d5fc0 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -47,7 +47,6 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
 void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
 {
 	kfree(b->h.items);
-	kfree(b);
 }
 
 void crush_destroy_bucket_list(struct crush_bucket_list *b)
@@ -55,14 +54,12 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
 	kfree(b->item_weights);
 	kfree(b->sum_weights);
 	kfree(b->h.items);
-	kfree(b);
 }
 
 void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
 {
 	kfree(b->h.items);
 	kfree(b->node_weights);
-	kfree(b);
 }
 
 void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
@@ -70,14 +67,12 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
 	kfree(b->straws);
 	kfree(b->item_weights);
 	kfree(b->h.items);
-	kfree(b);
 }
 
 void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
 {
 	kfree(b->item_weights);
 	kfree(b->h.items);
-	kfree(b);
 }
 
 void crush_destroy_bucket(struct crush_bucket *b)
@@ -99,6 +94,7 @@ void crush_destroy_bucket(struct crush_bucket *b)
 		crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b);
 		break;
 	}
+	kfree(b);
 }
 
 /**
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index c89e66d4fcb7..a87268058e61 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -516,6 +516,10 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 		b->id = ceph_decode_32(p);
 		b->type = ceph_decode_16(p);
 		b->alg = ceph_decode_8(p);
+		if (b->alg != alg) {
+			b->alg = 0;
+			goto bad;
+		}
 		b->hash = ceph_decode_8(p);
 		b->weight = ceph_decode_32(p);
 		b->size = ceph_decode_32(p);

From 596f91294b351866956808b1ecb8dfae15382a6d Mon Sep 17 00:00:00 2001
From: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Date: Fri, 24 Apr 2026 15:37:37 +0200
Subject: [PATCH 02/10] libceph: Fix unnecessarily high ceph_decode_need() for
 uniform bucket

In crush_decode_uniform_bucket(), the item_weight field of the bucket
is set. This is a single field of type u32 since the uniform bucket uses
the same weight for all items. The value in ceph_decode_need() is set to
(1+b->h.size) * sizeof(u32), which is higher than actually needed.

This patch removes the call to ceph_decode_need() with the unnecessarily
high value and switches the subsequent operation from ceph_decode_32()
to ceph_decode_32_safe(), which already includes the correct bounds
check.

Signed-off-by: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index a87268058e61..669348d883f0 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -72,8 +72,7 @@ static int crush_decode_uniform_bucket(void **p, void *end,
 				       struct crush_bucket_uniform *b)
 {
 	dout("crush_decode_uniform_bucket %p to %p\n", *p, end);
-	ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
-	b->item_weight = ceph_decode_32(p);
+	ceph_decode_32_safe(p, end, b->item_weight, bad);
 	return 0;
 bad:
 	return -EINVAL;

From 5d3cc36b4e77a27ce7b686b7c59c7072bcb3fa8e Mon Sep 17 00:00:00 2001
From: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Date: Thu, 9 Apr 2026 12:26:02 -0700
Subject: [PATCH 03/10] ceph: fix a buffer leak in __ceph_setxattr()

The old_blob in __ceph_setxattr() can store
ci->i_xattrs.prealloc_blob value during the retry.
However, it is never called the ceph_buffer_put()
for the old_blob object. This patch fixes the issue of
the buffer leak.

Cc: stable@vger.kernel.org
Signed-off-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Reviewed-by: Alex Markuze <amarkuze@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/xattr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 5f87f62091a1..c6fcbf428317 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1294,6 +1294,7 @@ int __ceph_setxattr(struct inode *inode, const char *name,
 
 do_sync:
 	spin_unlock(&ci->i_ceph_lock);
+	ceph_buffer_put(old_blob);
 do_sync_unlocked:
 	if (lock_snap_rwsem)
 		up_read(&mdsc->snap_rwsem);

From 0c22d9511cbde746622f8e4c11aaa63fe76d45f9 Mon Sep 17 00:00:00 2001
From: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Date: Thu, 9 Apr 2026 12:43:40 -0700
Subject: [PATCH 04/10] ceph: fix BUG_ON in __ceph_build_xattrs_blob() due to
 stale blob size

The generic/642 test-case can reproduce the kernel crash:

[40243.605254] ------------[ cut here ]------------
[40243.605956] kernel BUG at fs/ceph/xattr.c:918!
[40243.607142] Oops: invalid opcode: 0000 [#1] SMP PTI
[40243.608067] CPU: 7 UID: 0 PID: 498762 Comm: kworker/7:1 Not tainted 7.0.0-rc7+ #3 PREEMPT(full)
[40243.609700] Hardware name: QEMU Ubuntu 25.10 PC v2 (i440FX + PIIX, + 10.1 machine, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[40243.611820] Workqueue: ceph-msgr ceph_con_workfn
[40243.612715] RIP: 0010:__ceph_build_xattrs_blob+0x1b8/0x1e0
[40243.613731] Code: 0f 84 82 fe ff ff e9 cf 8e 56 ff 48 8d 65 e8 31 c0 5b 41 5c 41 5d 5d 31 d2 31 c9 31 f6 31 ff 45 31 c0 45 31 c9 c3 cc cc cc cc <0f> 0b 4c 8b 62 08 41 8b 85 24 07 00 00 49 83 c4 04 41 89 44 24 fc
[40243.616888] RSP: 0018:ffffcc80c4d4b688 EFLAGS: 00010287
[40243.617773] RAX: 0000000000010026 RBX: 0000000000000001 RCX: 0000000000000000
[40243.618928] RDX: ffff8a773798dee0 RSI: 0000000000000000 RDI: 0000000000000000
[40243.620158] RBP: ffffcc80c4d4b6a0 R08: 0000000000000000 R09: 0000000000000000
[40243.621573] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8a75f3b58000
[40243.622907] R13: ffff8a75f3b58000 R14: 0000000000000080 R15: 000000000000bffd
[40243.624054] FS:  0000000000000000(0000) GS:ffff8a787d1b4000(0000) knlGS:0000000000000000
[40243.625331] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[40243.626269] CR2: 000072f390b623c0 CR3: 000000011c02a003 CR4: 0000000000372ef0
[40243.627408] Call Trace:
[40243.627839]  <TASK>
[40243.628188]  __prep_cap+0x3fd/0x4a0
[40243.628789]  ? do_raw_spin_unlock+0x4e/0xe0
[40243.629474]  ceph_check_caps+0x46a/0xc80
[40243.630094]  ? __lock_acquire+0x4a2/0x2650
[40243.630773]  ? find_held_lock+0x31/0x90
[40243.631347]  ? handle_cap_grant+0x79f/0x1060
[40243.632068]  ? lock_release+0xd9/0x300
[40243.632696]  ? __mutex_unlock_slowpath+0x3e/0x340
[40243.633429]  ? lock_release+0xd9/0x300
[40243.634052]  handle_cap_grant+0xcf6/0x1060
[40243.634745]  ceph_handle_caps+0x122b/0x2110
[40243.635415]  mds_dispatch+0x5bd/0x2160
[40243.636034]  ? ceph_con_process_message+0x65/0x190
[40243.636828]  ? lock_release+0xd9/0x300
[40243.637431]  ceph_con_process_message+0x7a/0x190
[40243.638184]  ? kfree+0x311/0x4f0
[40243.638749]  ? kfree+0x311/0x4f0
[40243.639268]  process_message+0x16/0x1a0
[40243.639915]  ? sg_free_table+0x39/0x90
[40243.640572]  ceph_con_v2_try_read+0xf58/0x2120
[40243.641255]  ? lock_acquire+0xc8/0x300
[40243.641863]  ceph_con_workfn+0x151/0x820
[40243.642493]  process_one_work+0x22f/0x630
[40243.643093]  ? process_one_work+0x254/0x630
[40243.643770]  worker_thread+0x1e2/0x400
[40243.644332]  ? __pfx_worker_thread+0x10/0x10
[40243.645020]  kthread+0x109/0x140
[40243.645560]  ? __pfx_kthread+0x10/0x10
[40243.646125]  ret_from_fork+0x3f8/0x480
[40243.646752]  ? __pfx_kthread+0x10/0x10
[40243.647316]  ? __pfx_kthread+0x10/0x10
[40243.647919]  ret_from_fork_asm+0x1a/0x30
[40243.648556]  </TASK>
[40243.648902] Modules linked in: overlay hctr2 libpolyval chacha libchacha adiantum libnh libpoly1305 essiv intel_rapl_msr intel_rapl_common intel_uncore_frequency_common skx_edac_common nfit kvm_intel kvm irqbypass joydev ghash_clmulni_intel aesni_intel rapl input_leds mac_hid psmouse vga16fb serio_raw vgastate floppy i2c_piix4 pata_acpi bochs qemu_fw_cfg i2c_smbus sch_fq_codel rbd dm_crypt msr parport_pc ppdev lp parport efi_pstore
[40243.654766] ---[ end trace 0000000000000000 ]---

Commit d93231a6bc8a ("ceph: prevent a client from exceeding the MDS
maximum xattr size") moved the required_blob_size computation to before
the __build_xattrs() call, introducing a race.

__build_xattrs() releases and reacquires i_ceph_lock during execution.
In that window, handle_cap_grant() may update i_xattrs.blob with a
newer MDS-provided blob and bump i_xattrs.version.  When
__build_xattrs() detects that index_version < version, it destroys and
rebuilds the entire xattr rb-tree from the new blob, potentially
increasing count, names_size, and vals_size.

The prealloc_blob size check that follows still uses the stale
required_blob_size computed before the rebuild, so it passes even when
prealloc_blob is too small for the now-larger tree. After __set_xattr()
adds one more xattr on top, __ceph_build_xattrs_blob() is called from
the cap flush path and hits:

    BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);

Fix this by recomputing required_blob_size after __build_xattrs()
returns, using the current tree state. Also re-validate against
m_max_xattr_size to fall back to the sync path if the rebuilt tree now
exceeds the MDS limit.

Cc: stable@vger.kernel.org
Fixes: d93231a6bc8a ("ceph: prevent a client from exceeding the MDS maximum xattr size")
Signed-off-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Reviewed-by: Alex Markuze <amarkuze@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/xattr.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index c6fcbf428317..e773be07f767 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1254,6 +1254,22 @@ int __ceph_setxattr(struct inode *inode, const char *name,
 	      ceph_vinop(inode), name, ceph_cap_string(issued));
 	__build_xattrs(inode);
 
+	/*
+	 * __build_xattrs() may have released and reacquired i_ceph_lock,
+	 * during which handle_cap_grant() could have replaced i_xattrs.blob
+	 * with a newer MDS-provided blob and bumped i_xattrs.version. If that
+	 * caused __build_xattrs() to rebuild the rb-tree from the new blob,
+	 * count/names_size/vals_size may now be larger than when
+	 * required_blob_size was computed above. Recompute it here so the
+	 * prealloc_blob size check below reflects the current tree state.
+	 */
+	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
+	if (required_blob_size > mdsc->mdsmap->m_max_xattr_size) {
+		doutc(cl, "sync (size too large): %d > %llu\n",
+		      required_blob_size, mdsc->mdsmap->m_max_xattr_size);
+		goto do_sync;
+	}
+
 	if (!ci->i_xattrs.prealloc_blob ||
 	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
 		struct ceph_buffer *blob;

From 821365487aa58d06bda65c676ba215d506ba9768 Mon Sep 17 00:00:00 2001
From: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Date: Tue, 28 Apr 2026 14:15:46 +0200
Subject: [PATCH 05/10] libceph: Fix potential out-of-bounds access in
 __ceph_x_decrypt()

In __ceph_x_decrypt(), a part of the buffer p is interpreted as a
ceph_x_encrypt_header, and the magic field of this struct is accessed.
This happens without any guarantee that the buffer is large enough to
hold this struct. The function parameter ciphertext_len represents the
length of the ciphertext to decrypt and is guaranteed to be at most the
remaining size of the allocated buffer p. However, this value is not
necessarily greater than sizeof(ceph_x_encrypt_header). E.g., a message
frame of type FRAME_TAG_AUTH_REPLY_MORE, that is just as long to hold
the ciphertext at its end with a ciphertext_len of 8 or less, can
trigger an out-of-bounds memory access when accessing hdr->magic.

This patch fixes the issue by adding a check to ensure that the
decrypted plaintext in the buffer is large enough to represent at least
the ceph_x_encrypt_header.

Cc: stable@vger.kernel.org
Signed-off-by: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/auth_x.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 692e0b868822..9e64e82d0b63 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -115,6 +115,11 @@ static int __ceph_x_decrypt(const struct ceph_crypto_key *key, int usage_slot,
 	if (ret)
 		return ret;
 
+	if (plaintext_len < sizeof(*hdr)) {
+		pr_err("%s plaintext too small %d\n", __func__, plaintext_len);
+		return -EINVAL;
+	}
+
 	hdr = p + ceph_crypt_data_offset(key);
 	if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) {
 		pr_err("%s bad magic\n", __func__);

From 10d9be401108a0fc8b3bc99ba07bdee8fff875ac Mon Sep 17 00:00:00 2001
From: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Date: Thu, 9 Apr 2026 11:33:23 -0700
Subject: [PATCH 06/10] ceph: add ceph_has_realms_with_quotas() check to
 ceph_quota_update_statfs()

When MDS rejects a session, remove_session_caps() ->
__ceph_remove_cap() -> ceph_change_snap_realm() clears
i_snap_realm for every inode that loses its last cap.
The realm is restored once caps are re-granted after
reconnect. It is not a real error and this patch changes
pr_err_ratelimited_client() on doutc().

Every quota methods ceph_quota_is_max_files_exceeded(),
ceph_quota_is_max_bytes_exceeded(),
ceph_quota_is_max_bytes_approaching() calls
ceph_has_realms_with_quotas() check. This patch adds
the missing ceph_has_realms_with_quotas() call into
ceph_quota_update_statfs().

[ idryomov: add braces around both arms of multiline ifs ]

Signed-off-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Reviewed-by: Alex Markuze <amarkuze@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/quota.c | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 4dc9426643e8..053d5bf0c9f0 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -228,12 +228,19 @@ static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode,
 
 restart:
 	realm = ceph_inode(inode)->i_snap_realm;
-	if (realm)
+	if (realm) {
 		ceph_get_snap_realm(mdsc, realm);
-	else
-		pr_err_ratelimited_client(cl,
-				"%p %llx.%llx null i_snap_realm\n",
-				inode, ceph_vinop(inode));
+	} else {
+		/*
+		 * i_snap_realm is NULL when all caps have been released, e.g.
+		 * after an MDS session rejection. This is a transient state;
+		 * the realm will be restored once caps are re-granted.
+		 * Treat it as "no quota realm found".
+		 */
+		doutc(cl, "%p %llx.%llx null i_snap_realm\n",
+		      inode, ceph_vinop(inode));
+	}
+
 	while (realm) {
 		bool has_inode;
 
@@ -340,12 +347,19 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
 	down_read(&mdsc->snap_rwsem);
 restart:
 	realm = ceph_inode(inode)->i_snap_realm;
-	if (realm)
+	if (realm) {
 		ceph_get_snap_realm(mdsc, realm);
-	else
-		pr_err_ratelimited_client(cl,
-				"%p %llx.%llx null i_snap_realm\n",
-				inode, ceph_vinop(inode));
+	} else {
+		/*
+		 * i_snap_realm is NULL when all caps have been released, e.g.
+		 * after an MDS session rejection. This is a transient state;
+		 * the realm will be restored once caps are re-granted.
+		 * Treat it as "quota not exceeded".
+		 */
+		doutc(cl, "%p %llx.%llx null i_snap_realm\n",
+		      inode, ceph_vinop(inode));
+	}
+
 	while (realm) {
 		bool has_inode;
 
@@ -496,6 +510,9 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
 	u64 total = 0, used, free;
 	bool is_updated = false;
 
+	if (!ceph_has_realms_with_quotas(d_inode(fsc->sb->s_root)))
+		return false;
+
 	down_read(&mdsc->snap_rwsem);
 	get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES,
 			&realm, true);

From 544576f0f05c4a759806acddfaaeb686f14fb4b0 Mon Sep 17 00:00:00 2001
From: Hristo Venev <hristo@venev.name>
Date: Mon, 4 May 2026 18:54:45 +0300
Subject: [PATCH 07/10] ceph: put folios not suitable for writeback

The batch holds references to the folios (see `filemap_get_folios`,
`folio_batch_release`), so we need to `folio_put` the folios we remove.

Tested on v6.18.

Cc: stable@vger.kernel.org
Link: https://tracker.ceph.com/issues/74156
Signed-off-by: Hristo Venev <hristo@venev.name>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/addr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1454760332ff..0a86f672cc09 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1336,6 +1336,7 @@ void ceph_process_folio_batch(struct address_space *mapping,
 						  ceph_wbc, folio);
 		if (rc == -ENODATA) {
 			folio_unlock(folio);
+			folio_put(folio);
 			ceph_wbc->fbatch.folios[i] = NULL;
 			continue;
 		} else if (rc == -E2BIG) {
@@ -1346,6 +1347,7 @@ void ceph_process_folio_batch(struct address_space *mapping,
 		if (!folio_clear_dirty_for_io(folio)) {
 			doutc(cl, "%p !folio_clear_dirty_for_io\n", folio);
 			folio_unlock(folio);
+			folio_put(folio);
 			ceph_wbc->fbatch.folios[i] = NULL;
 			continue;
 		}

From 35d0ed82d03e5ee77ea4f31f20e29562a7721649 Mon Sep 17 00:00:00 2001
From: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Date: Tue, 5 May 2026 11:08:12 +0200
Subject: [PATCH 08/10] libceph: Fix potential out-of-bounds access in
 osdmap_decode()

When decoding osd_state and osd_weight from an incoming osdmap in
osdmap_decode(), both are decoded for each osd, i.e., map->max_osd
times. The ceph_decode_need() check only accounts for
sizeof(*map->osd_weight) once. This can potentially result in an
out-of-bounds memory access if the incoming message is corrupted such
that the max_osd value exceeds the actual content of the osdmap message.

This patch fixes the issue by changing the corresponding part in the
ceph_decode_need() check to account for
map->max_osd*sizeof(*map->osd_weight).

Cc: stable@vger.kernel.org
Fixes: dcbc919a5dc8 ("libceph: switch osdmap decoding to use ceph_decode_entity_addr")
Signed-off-by: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 669348d883f0..2095e73ccf6c 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1705,7 +1705,7 @@ static int osdmap_decode(void **p, void *end, bool msgr2,
 	ceph_decode_need(p, end, 3*sizeof(u32) +
 			 map->max_osd*(struct_v >= 5 ? sizeof(u32) :
 						       sizeof(u8)) +
-				       sizeof(*map->osd_weight), e_inval);
+			 map->max_osd*sizeof(*map->osd_weight), e_inval);
 	if (ceph_decode_32(p) != map->max_osd)
 		goto e_inval;
 

From d289478cfc0bcf81c7914200d6abdcb78bd04ded Mon Sep 17 00:00:00 2001
From: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Date: Tue, 12 May 2026 09:29:30 +0200
Subject: [PATCH 09/10] libceph: handle rbtree insertion error in
 decode_choose_args()

A message of type CEPH_MSG_OSD_MAP contains an OSD map that itself
contains a CRUSH map. The received CRUSH map may optionally contain
choose_args that get decoded in decode_choose_args(). In this function,
num_choose_arg_maps is read from the message, and a corresponding number
of crush_choose_arg_maps gets decoded afterwards. Each
crush_choose_arg_map has a choose_args_index, which serves as the key
when inserting it into the choose_args rbtree of the decoded crush_map.
If a (potentially corrupted) message contains two crush_choose_arg_maps
with the same index, the assertion in insert_choose_arg_map() triggers a
kernel BUG when trying to insert the second crush_choose_arg_map.

This patch fixes the issue by switching to the non-asserting rbtree
insertion function and rejecting the message if the insertion fails.

[ idryomov: changelog ]

Cc: stable@vger.kernel.org
Signed-off-by: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 2095e73ccf6c..0752a3808b91 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -392,7 +392,10 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c)
 				goto e_inval;
 		}
 
-		insert_choose_arg_map(&c->choose_args, arg_map);
+		if (!__insert_choose_arg_map(&c->choose_args, arg_map)) {
+			ret = -EEXIST;
+			goto fail;
+		}
 	}
 
 	return 0;

From 28b0a2ab8c82d0bbdeb8013029c67c978ce6e4bf Mon Sep 17 00:00:00 2001
From: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Date: Tue, 12 May 2026 18:16:40 +0200
Subject: [PATCH 10/10] libceph: Fix potential null-ptr-deref in
 decode_choose_args()

A message of type CEPH_MSG_OSD_MAP contains an OSD map that itself
contains a CRUSH map. When decoding this CRUSH map in crush_decode(), an
array of max_buckets CRUSH buckets is decoded, where some indices may
not refer to actual buckets and are therefore set to NULL. The received
CRUSH map may optionally contain choose_args that get decoded in
decode_choose_args(). When decoding a crush_choose_arg_map, a series of
choose_args for different buckets is decoded, with the bucket_index
being read from the incoming message. It is only checked that the bucket
index does not exceed max_buckets, but not that it doesn't point to an
index with a NULL bucket. If a (potentially corrupted) message contains
a crush_choose_arg_map including such a bucket_index, a null pointer
dereference may occur in the subsequent processing when attempting to
access the bucket with the given index.

This patch fixes the issue by extending the affected check. Now, it is
only attempted to access the bucket if it is not NULL.

Cc: stable@vger.kernel.org
Signed-off-by: Raphael Zimmer <raphael.zimmer@tu-ilmenau.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 0752a3808b91..8b5b0587a0cf 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -388,7 +388,8 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c)
 				goto fail;
 
 			if (arg->ids_size &&
-			    arg->ids_size != c->buckets[bucket_index]->size)
+			    (!c->buckets[bucket_index] ||
+			     arg->ids_size != c->buckets[bucket_index]->size))
 				goto e_inval;
 		}