From 27c0a7b05d13a0dc54ed0b95fc12218210fdea1a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Jul 2025 12:02:27 -0700 Subject: [PATCH 01/15] libceph: Use HMAC-SHA256 library instead of crypto_shash Use the HMAC-SHA256 library functions instead of crypto_shash. This is simpler and faster. Signed-off-by: Eric Biggers Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 4 +- net/ceph/Kconfig | 3 +- net/ceph/messenger_v2.c | 77 ++++++++++------------------------ 3 files changed, 26 insertions(+), 58 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 1717cc57cdac..4b49592a738f 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -2,6 +2,7 @@ #ifndef __FS_CEPH_MESSENGER_H #define __FS_CEPH_MESSENGER_H +#include #include #include #include @@ -412,7 +413,8 @@ struct ceph_connection_v2_info { struct ceph_msg_data_cursor in_cursor; struct ceph_msg_data_cursor out_cursor; - struct crypto_shash *hmac_tfm; /* post-auth signature */ + struct hmac_sha256_key hmac_key; /* post-auth signature */ + bool hmac_key_set; struct crypto_aead *gcm_tfm; /* on-wire encryption */ struct aead_request *gcm_req; struct crypto_wait gcm_wait; diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index 0aa21fcbf6ec..ea60e3ef0834 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -6,8 +6,7 @@ config CEPH_LIB select CRYPTO_AES select CRYPTO_CBC select CRYPTO_GCM - select CRYPTO_HMAC - select CRYPTO_SHA256 + select CRYPTO_LIB_SHA256 select CRYPTO select KEYS default n diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index 5483b4eed94e..c54c8b5a6526 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -709,7 +709,7 @@ static int setup_crypto(struct ceph_connection *con, dout("%s con %p con_mode %d session_key_len %d con_secret_len %d\n", __func__, con, con->v2.con_mode, session_key_len, con_secret_len); - WARN_ON(con->v2.hmac_tfm || con->v2.gcm_tfm || con->v2.gcm_req); + WARN_ON(con->v2.hmac_key_set || con->v2.gcm_tfm || con->v2.gcm_req); if (con->v2.con_mode != CEPH_CON_MODE_CRC && con->v2.con_mode != CEPH_CON_MODE_SECURE) { @@ -723,22 +723,8 @@ static int setup_crypto(struct ceph_connection *con, return 0; /* auth_none */ } - noio_flag = memalloc_noio_save(); - con->v2.hmac_tfm = crypto_alloc_shash("hmac(sha256)", 0, 0); - memalloc_noio_restore(noio_flag); - if (IS_ERR(con->v2.hmac_tfm)) { - ret = PTR_ERR(con->v2.hmac_tfm); - con->v2.hmac_tfm = NULL; - pr_err("failed to allocate hmac tfm context: %d\n", ret); - return ret; - } - - ret = crypto_shash_setkey(con->v2.hmac_tfm, session_key, - session_key_len); - if (ret) { - pr_err("failed to set hmac key: %d\n", ret); - return ret; - } + hmac_sha256_preparekey(&con->v2.hmac_key, session_key, session_key_len); + con->v2.hmac_key_set = true; if (con->v2.con_mode == CEPH_CON_MODE_CRC) { WARN_ON(con_secret_len); @@ -793,38 +779,26 @@ static int setup_crypto(struct ceph_connection *con, return 0; /* auth_x, secure mode */ } -static int ceph_hmac_sha256(struct ceph_connection *con, - const struct kvec *kvecs, int kvec_cnt, u8 *hmac) +static void ceph_hmac_sha256(struct ceph_connection *con, + const struct kvec *kvecs, int kvec_cnt, + u8 hmac[SHA256_DIGEST_SIZE]) { - SHASH_DESC_ON_STACK(desc, con->v2.hmac_tfm); /* tfm arg is ignored */ - int ret; + struct hmac_sha256_ctx ctx; int i; - dout("%s con %p hmac_tfm %p kvec_cnt %d\n", __func__, con, - con->v2.hmac_tfm, kvec_cnt); + dout("%s con %p hmac_key_set %d kvec_cnt %d\n", __func__, con, + con->v2.hmac_key_set, kvec_cnt); - if (!con->v2.hmac_tfm) { + if (!con->v2.hmac_key_set) { memset(hmac, 0, SHA256_DIGEST_SIZE); - return 0; /* auth_none */ + return; /* auth_none */ } - desc->tfm = con->v2.hmac_tfm; - ret = crypto_shash_init(desc); - if (ret) - goto out; - - for (i = 0; i < kvec_cnt; i++) { - ret = crypto_shash_update(desc, kvecs[i].iov_base, - kvecs[i].iov_len); - if (ret) - goto out; - } - - ret = crypto_shash_final(desc, hmac); - -out: - shash_desc_zero(desc); - return ret; /* auth_x, both plain and secure modes */ + /* auth_x, both plain and secure modes */ + hmac_sha256_init(&ctx, &con->v2.hmac_key); + for (i = 0; i < kvec_cnt; i++) + hmac_sha256_update(&ctx, kvecs[i].iov_base, kvecs[i].iov_len); + hmac_sha256_final(&ctx, hmac); } static void gcm_inc_nonce(struct ceph_gcm_nonce *nonce) @@ -1455,17 +1429,14 @@ static int prepare_auth_request_more(struct ceph_connection *con, static int prepare_auth_signature(struct ceph_connection *con) { void *buf; - int ret; buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE, con_secure(con))); if (!buf) return -ENOMEM; - ret = ceph_hmac_sha256(con, con->v2.in_sign_kvecs, - con->v2.in_sign_kvec_cnt, CTRL_BODY(buf)); - if (ret) - return ret; + ceph_hmac_sha256(con, con->v2.in_sign_kvecs, con->v2.in_sign_kvec_cnt, + CTRL_BODY(buf)); return prepare_control(con, FRAME_TAG_AUTH_SIGNATURE, buf, SHA256_DIGEST_SIZE); @@ -2460,10 +2431,8 @@ static int process_auth_signature(struct ceph_connection *con, return -EINVAL; } - ret = ceph_hmac_sha256(con, con->v2.out_sign_kvecs, - con->v2.out_sign_kvec_cnt, hmac); - if (ret) - return ret; + ceph_hmac_sha256(con, con->v2.out_sign_kvecs, con->v2.out_sign_kvec_cnt, + hmac); ceph_decode_need(&p, end, SHA256_DIGEST_SIZE, bad); if (crypto_memneq(p, hmac, SHA256_DIGEST_SIZE)) { @@ -3814,10 +3783,8 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con) memzero_explicit(&con->v2.in_gcm_nonce, CEPH_GCM_IV_LEN); memzero_explicit(&con->v2.out_gcm_nonce, CEPH_GCM_IV_LEN); - if (con->v2.hmac_tfm) { - crypto_free_shash(con->v2.hmac_tfm); - con->v2.hmac_tfm = NULL; - } + memzero_explicit(&con->v2.hmac_key, sizeof(con->v2.hmac_key)); + con->v2.hmac_key_set = false; if (con->v2.gcm_req) { aead_request_free(con->v2.gcm_req); con->v2.gcm_req = NULL; From fa073039466f16141807a0f32840ecdceb00e22a Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Fri, 6 Dec 2024 17:50:14 +0100 Subject: [PATCH 02/15] ceph: make ceph_start_io_*() killable This allows killing processes that wait for a lock when one process is stuck waiting for the Ceph server. This is similar to the NFS commit 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable"). [ idryomov: drop comment on include, formatting ] Signed-off-by: Max Kellermann Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 20 +++++++++++--------- fs/ceph/io.c | 47 +++++++++++++++++++++++++++++++++-------------- fs/ceph/io.h | 8 +++++--- 3 files changed, 49 insertions(+), 26 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 978acd3d4b32..08681cf66137 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -2121,10 +2121,10 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) if (ceph_inode_is_shutdown(inode)) return -ESTALE; - if (direct_lock) - ceph_start_io_direct(inode); - else - ceph_start_io_read(inode); + ret = direct_lock ? ceph_start_io_direct(inode) : + ceph_start_io_read(inode); + if (ret) + return ret; if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) want |= CEPH_CAP_FILE_CACHE; @@ -2277,7 +2277,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos, (fi->flags & CEPH_F_SYNC)) return copy_splice_read(in, ppos, pipe, len, flags); - ceph_start_io_read(inode); + ret = ceph_start_io_read(inode); + if (ret) + return ret; want = CEPH_CAP_FILE_CACHE; if (fi->fmode & CEPH_FILE_MODE_LAZY) @@ -2356,10 +2358,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) direct_lock = true; retry_snap: - if (direct_lock) - ceph_start_io_direct(inode); - else - ceph_start_io_write(inode); + err = direct_lock ? ceph_start_io_direct(inode) : + ceph_start_io_write(inode); + if (err) + goto out_unlocked; if (iocb->ki_flags & IOCB_APPEND) { err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); diff --git a/fs/ceph/io.c b/fs/ceph/io.c index c456509b31c3..e10f44182a4c 100644 --- a/fs/ceph/io.c +++ b/fs/ceph/io.c @@ -47,20 +47,29 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) * Note that buffered writes and truncates both take a write lock on * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. */ -void -ceph_start_io_read(struct inode *inode) +int ceph_start_io_read(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + int err; /* Be an optimist! */ - down_read(&inode->i_rwsem); + err = down_read_killable(&inode->i_rwsem); + if (err) + return err; + if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) - return; + return 0; up_read(&inode->i_rwsem); + /* Slow path.... */ - down_write(&inode->i_rwsem); + err = down_write_killable(&inode->i_rwsem); + if (err) + return err; + ceph_block_o_direct(ci, inode); downgrade_write(&inode->i_rwsem); + + return 0; } /** @@ -83,11 +92,12 @@ ceph_end_io_read(struct inode *inode) * Declare that a buffered write operation is about to start, and ensure * that we block all direct I/O. */ -void -ceph_start_io_write(struct inode *inode) +int ceph_start_io_write(struct inode *inode) { - down_write(&inode->i_rwsem); - ceph_block_o_direct(ceph_inode(inode), inode); + int err = down_write_killable(&inode->i_rwsem); + if (!err) + ceph_block_o_direct(ceph_inode(inode), inode); + return err; } /** @@ -133,20 +143,29 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) * Note that buffered writes and truncates both take a write lock on * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. */ -void -ceph_start_io_direct(struct inode *inode) +int ceph_start_io_direct(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + int err; /* Be an optimist! */ - down_read(&inode->i_rwsem); + err = down_read_killable(&inode->i_rwsem); + if (err) + return err; + if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) - return; + return 0; up_read(&inode->i_rwsem); + /* Slow path.... */ - down_write(&inode->i_rwsem); + err = down_write_killable(&inode->i_rwsem); + if (err) + return err; + ceph_block_buffered(ci, inode); downgrade_write(&inode->i_rwsem); + + return 0; } /** diff --git a/fs/ceph/io.h b/fs/ceph/io.h index fa594cd77348..79029825e8b8 100644 --- a/fs/ceph/io.h +++ b/fs/ceph/io.h @@ -2,11 +2,13 @@ #ifndef _FS_CEPH_IO_H #define _FS_CEPH_IO_H -void ceph_start_io_read(struct inode *inode); +#include + +int __must_check ceph_start_io_read(struct inode *inode); void ceph_end_io_read(struct inode *inode); -void ceph_start_io_write(struct inode *inode); +int __must_check ceph_start_io_write(struct inode *inode); void ceph_end_io_write(struct inode *inode); -void ceph_start_io_direct(struct inode *inode); +int __must_check ceph_start_io_direct(struct inode *inode); void ceph_end_io_direct(struct inode *inode); #endif /* FS_CEPH_IO_H */ From b7ed1e29cfe773d648ca09895b92856bd3a2092d Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Fri, 6 Jun 2025 12:04:32 -0700 Subject: [PATCH 03/15] ceph: add checking of wait_for_completion_killable() return value The Coverity Scan service has detected the calling of wait_for_completion_killable() without checking the return value in ceph_lock_wait_for_completion() [1]. The CID 1636232 defect contains explanation: "If the function returns an error value, the error value may be mistaken for a normal value. In ceph_lock_wait_for_completion(): Value returned from a function is not checked for errors before being used. (CWE-252)". The patch adds the checking of wait_for_completion_killable() return value and return the error code from ceph_lock_wait_for_completion(). [1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1636232 Signed-off-by: Viacheslav Dubeyko Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov --- fs/ceph/locks.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ebf4ac0055dd..dd764f9c64b9 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -221,7 +221,10 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, if (err && err != -ERESTARTSYS) return err; - wait_for_completion_killable(&req->r_safe_completion); + err = wait_for_completion_killable(&req->r_safe_completion); + if (err) + return err; + return 0; } From 1ed4471a4ee6cfa902467332042158ca5ef8ad24 Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Fri, 6 Jun 2025 12:05:21 -0700 Subject: [PATCH 04/15] ceph: fix wrong sizeof argument issue in register_session() The Coverity Scan service has detected the wrong sizeof argument in register_session() [1]. The CID 1598909 defect contains explanation: "The wrong sizeof value is used in an expression or as argument to a function. The result is an incorrect value that may cause unexpected program behaviors. In register_session: The sizeof operator is invoked on the wrong argument (CWE-569)". The patch introduces a ptr_size variable that is initialized by sizeof(struct ceph_mds_session *). And this variable is used instead of sizeof(void *) in the code. [1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1598909 Signed-off-by: Viacheslav Dubeyko Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 3bc72b47fe4d..aa2f74142cf4 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -979,14 +979,15 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, if (mds >= mdsc->max_sessions) { int newmax = 1 << get_count_order(mds + 1); struct ceph_mds_session **sa; + size_t ptr_size = sizeof(struct ceph_mds_session *); doutc(cl, "realloc to %d\n", newmax); - sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); + sa = kcalloc(newmax, ptr_size, GFP_NOFS); if (!sa) goto fail_realloc; if (mdsc->sessions) { memcpy(sa, mdsc->sessions, - mdsc->max_sessions * sizeof(void *)); + mdsc->max_sessions * ptr_size); kfree(mdsc->sessions); } mdsc->sessions = sa; From 5b2d1377d6cc4147492780b0bd95fb9c4cb28d1b Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Fri, 6 Jun 2025 12:05:45 -0700 Subject: [PATCH 05/15] ceph: fix overflowed constant issue in ceph_do_objects_copy() The Coverity Scan service has detected overflowed constant issue in ceph_do_objects_copy() [1]. The CID 1624308 defect contains explanation: "The overflowed value due to arithmetic on constants is too small or unexpectedly negative, causing incorrect computations. Expression bytes, which is equal to -95, where ret is known to be equal to -95, underflows the type that receives it, an unsigned integer 64 bits wide. In ceph_do_objects_copy: Integer overflow occurs in arithmetic on constant operands (CWE-190)". The patch changes the type of bytes variable from size_t to ssize_t with the goal of to be capable to receive negative values. [1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1624308 Signed-off-by: Viacheslav Dubeyko Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 08681cf66137..07052f331611 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -2880,7 +2880,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off struct ceph_object_id src_oid, dst_oid; struct ceph_osd_client *osdc; struct ceph_osd_request *req; - size_t bytes = 0; + ssize_t bytes = 0; u64 src_objnum, src_objoff, dst_objnum, dst_objoff; u32 src_objlen, dst_objlen; u32 object_size = src_ci->i_layout.object_size; @@ -2930,7 +2930,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off "OSDs don't support copy-from2; disabling copy offload\n"); } doutc(cl, "returned %d\n", ret); - if (!bytes) + if (bytes <= 0) bytes = ret; goto out; } From 5824ccba9a39a3ad914fc9b2972a2c1119abaac9 Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Fri, 13 Jun 2025 11:31:08 -0700 Subject: [PATCH 06/15] ceph: fix potential race condition in ceph_ioctl_lazyio() The Coverity Scan service has detected potential race condition in ceph_ioctl_lazyio() [1]. The CID 1591046 contains explanation: "Check of thread-shared field evades lock acquisition (LOCK_EVASION). Thread1 sets fmode to a new value. Now the two threads have an inconsistent view of fmode and updates to fields correlated with fmode may be lost. The data guarded by this critical section may be read while in an inconsistent state or modified by multiple racing threads. In ceph_ioctl_lazyio: Checking the value of a thread-shared field outside of a locked region to determine if a locked operation involving that thread shared field has completed. (CWE-543)". The patch places fi->fmode field access under ci->i_ceph_lock protection. Also, it introduces the is_file_already_lazy variable that is set under the lock and it is checked later out of scope of critical section. [1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1591046 Signed-off-by: Viacheslav Dubeyko Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov --- fs/ceph/ioctl.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index e861de3c79b9..15cde055f3da 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -246,21 +246,28 @@ static long ceph_ioctl_lazyio(struct file *file) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; struct ceph_client *cl = mdsc->fsc->client; + bool is_file_already_lazy = false; + spin_lock(&ci->i_ceph_lock); if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { - spin_lock(&ci->i_ceph_lock); fi->fmode |= CEPH_FILE_MODE_LAZY; ci->i_nr_by_mode[ffs(CEPH_FILE_MODE_LAZY)]++; __ceph_touch_fmode(ci, mdsc, fi->fmode); - spin_unlock(&ci->i_ceph_lock); + } else { + is_file_already_lazy = true; + } + spin_unlock(&ci->i_ceph_lock); + + if (is_file_already_lazy) { + doutc(cl, "file %p %p %llx.%llx already lazy\n", file, inode, + ceph_vinop(inode)); + } else { doutc(cl, "file %p %p %llx.%llx marked lazy\n", file, inode, ceph_vinop(inode)); ceph_check_caps(ci, 0); - } else { - doutc(cl, "file %p %p %llx.%llx already lazy\n", file, inode, - ceph_vinop(inode)); } + return 0; } From 53db6f25ee47cb1265141d31562604e56146919a Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Tue, 8 Jul 2025 12:20:57 -0700 Subject: [PATCH 07/15] ceph: refactor wake_up_bit() pattern of calling The wake_up_bit() is called in ceph_async_unlink_cb(), wake_async_create_waiters(), and ceph_finish_async_create(). It makes sense to switch on clear_bit() function, because it makes the code much cleaner and easier to understand. More important rework is the adding of smp_mb__after_atomic() memory barrier after the bit modification and before wake_up_bit() call. It can prevent potential race condition of accessing the modified bit in other threads. Luckily, clear_and_wake_up_bit() already implements the required functionality pattern: static inline void clear_and_wake_up_bit(int bit, unsigned long *word) { clear_bit_unlock(bit, word); /* See wake_up_bit() for which memory barrier you need to use. */ smp_mb__after_atomic(); wake_up_bit(word, bit); } Signed-off-by: Viacheslav Dubeyko Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov --- fs/ceph/dir.c | 3 +-- fs/ceph/file.c | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 32973c62c1a2..d18c0eaef9b7 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1260,8 +1260,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, spin_unlock(&fsc->async_unlink_conflict_lock); spin_lock(&dentry->d_lock); - di->flags &= ~CEPH_DENTRY_ASYNC_UNLINK; - wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT); + clear_and_wake_up_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags); spin_unlock(&dentry->d_lock); synchronize_rcu(); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 07052f331611..99b30f784ee2 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -579,8 +579,7 @@ static void wake_async_create_waiters(struct inode *inode, spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) { - ci->i_ceph_flags &= ~CEPH_I_ASYNC_CREATE; - wake_up_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT); + clear_and_wake_up_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags); if (ci->i_ceph_flags & CEPH_I_ASYNC_CHECK_CAPS) { ci->i_ceph_flags &= ~CEPH_I_ASYNC_CHECK_CAPS; @@ -762,8 +761,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode, } spin_lock(&dentry->d_lock); - di->flags &= ~CEPH_DENTRY_ASYNC_CREATE; - wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_CREATE_BIT); + clear_and_wake_up_bit(CEPH_DENTRY_ASYNC_CREATE_BIT, &di->flags); spin_unlock(&dentry->d_lock); return ret; From fbeafe782bd986bf75544526fb9c0284e045e0a4 Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Wed, 16 Jul 2025 11:40:49 -0700 Subject: [PATCH 08/15] ceph: fix potential race condition on operations with CEPH_I_ODIRECT flag The Coverity Scan service has detected potential race conditions in ceph_block_o_direct(), ceph_start_io_read(), ceph_block_buffered(), and ceph_start_io_direct() [1 - 4]. The CID 1590942, 1590665, 1589664, 1590377 contain explanation: "The value of the shared data will be determined by the interleaving of thread execution. Thread shared data is accessed without holding an appropriate lock, possibly causing a race condition (CWE-366)". This patch reworks the pattern of accessing/modification of CEPH_I_ODIRECT flag by means of adding smp_mb__before_atomic() before reading the status of CEPH_I_ODIRECT flag and smp_mb__after_atomic() after clearing set/clear this flag. Also, it was reworked the pattern of using of ci->i_ceph_lock in ceph_block_o_direct(), ceph_start_io_read(), ceph_block_buffered(), and ceph_start_io_direct() methods. [1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590942 [2] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590665 [3] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1589664 [4] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1590377 Signed-off-by: Viacheslav Dubeyko Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov --- fs/ceph/io.c | 53 +++++++++++++++++++++++++++++++++++++++---------- fs/ceph/super.h | 3 ++- 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/fs/ceph/io.c b/fs/ceph/io.c index e10f44182a4c..2d10f49c93a9 100644 --- a/fs/ceph/io.c +++ b/fs/ceph/io.c @@ -21,14 +21,23 @@ /* Call with exclusively locked inode->i_rwsem */ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) { + bool is_odirect; + lockdep_assert_held_write(&inode->i_rwsem); - if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) { - spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags &= ~CEPH_I_ODIRECT; - spin_unlock(&ci->i_ceph_lock); - inode_dio_wait(inode); + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; + if (is_odirect) { + clear_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); } + spin_unlock(&ci->i_ceph_lock); + + if (is_odirect) + inode_dio_wait(inode); } /** @@ -50,6 +59,7 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) int ceph_start_io_read(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + bool is_odirect; int err; /* Be an optimist! */ @@ -57,7 +67,12 @@ int ceph_start_io_read(struct inode *inode) if (err) return err; - if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; + spin_unlock(&ci->i_ceph_lock); + if (!is_odirect) return 0; up_read(&inode->i_rwsem); @@ -116,12 +131,22 @@ ceph_end_io_write(struct inode *inode) /* Call with exclusively locked inode->i_rwsem */ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) { + bool is_odirect; + lockdep_assert_held_write(&inode->i_rwsem); - if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) { - spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags |= CEPH_I_ODIRECT; - spin_unlock(&ci->i_ceph_lock); + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; + if (!is_odirect) { + set_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags); + /* ensure modified bit is visible */ + smp_mb__after_atomic(); + } + spin_unlock(&ci->i_ceph_lock); + + if (!is_odirect) { /* FIXME: unmap_mapping_range? */ filemap_write_and_wait(inode->i_mapping); } @@ -146,6 +171,7 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) int ceph_start_io_direct(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); + bool is_odirect; int err; /* Be an optimist! */ @@ -153,7 +179,12 @@ int ceph_start_io_direct(struct inode *inode) if (err) return err; - if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) + spin_lock(&ci->i_ceph_lock); + /* ensure that bit state is consistent */ + smp_mb__before_atomic(); + is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; + spin_unlock(&ci->i_ceph_lock); + if (is_odirect) return 0; up_read(&inode->i_rwsem); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index cf176aab0f82..d1e81e11661b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -638,7 +638,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, #define CEPH_I_FLUSH_SNAPS (1 << 8) /* need flush snapss */ #define CEPH_I_ERROR_WRITE (1 << 9) /* have seen write errors */ #define CEPH_I_ERROR_FILELOCK (1 << 10) /* have seen file lock errors */ -#define CEPH_I_ODIRECT (1 << 11) /* inode in direct I/O mode */ +#define CEPH_I_ODIRECT_BIT (11) /* inode in direct I/O mode */ +#define CEPH_I_ODIRECT (1 << CEPH_I_ODIRECT_BIT) #define CEPH_ASYNC_CREATE_BIT (12) /* async create in flight for this */ #define CEPH_I_ASYNC_CREATE (1 << CEPH_ASYNC_CREATE_BIT) #define CEPH_I_SHUTDOWN (1 << 13) /* inode is no longer usable */ From 59699a5a7114f09f890e86c09a6b32afb5eaa64c Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Wed, 6 Aug 2025 11:48:53 +0200 Subject: [PATCH 09/15] libceph: make ceph_con_get_out_msg() return the message pointer The caller in messenger_v1.c loads it anyway, so let's keep the pointer in the register instead of reloading it from memory. This eliminates a tiny bit of unnecessary overhead. Signed-off-by: Max Kellermann Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 2 +- net/ceph/messenger.c | 4 ++-- net/ceph/messenger_v1.c | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 4b49592a738f..9ebcac2981fd 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -550,7 +550,7 @@ void ceph_addr_set_port(struct ceph_entity_addr *addr, int p); void ceph_con_process_message(struct ceph_connection *con); int ceph_con_in_msg_alloc(struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip); -void ceph_con_get_out_msg(struct ceph_connection *con); +struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con); /* messenger_v1.c */ int ceph_con_v1_try_read(struct ceph_connection *con); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 9f6d860411cb..b6c7bfc03503 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2110,7 +2110,7 @@ int ceph_con_in_msg_alloc(struct ceph_connection *con, return ret; } -void ceph_con_get_out_msg(struct ceph_connection *con) +struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con) { struct ceph_msg *msg; @@ -2141,7 +2141,7 @@ void ceph_con_get_out_msg(struct ceph_connection *con) * message or in case of a fault. */ WARN_ON(con->out_msg); - con->out_msg = ceph_msg_get(msg); + return con->out_msg = ceph_msg_get(msg); } /* diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index 0cb61c76b9b8..eebe4e19d75a 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -210,8 +210,7 @@ static void prepare_write_message(struct ceph_connection *con) &con->v1.out_temp_ack); } - ceph_con_get_out_msg(con); - m = con->out_msg; + m = ceph_con_get_out_msg(con); dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", m, con->out_seq, le16_to_cpu(m->hdr.type), From 7399212dcf64d90a6ab239bdd98bd325d922fc7e Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Wed, 6 Aug 2025 11:48:54 +0200 Subject: [PATCH 10/15] libceph: pass the message pointer instead of loading con->out_msg This pointer is in a register anyway, so let's use that instead of reloading from memory everywhere. [ idryomov: formatting ] Signed-off-by: Max Kellermann Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 4 +- net/ceph/messenger.c | 4 +- net/ceph/messenger_v1.c | 45 +++++---- net/ceph/messenger_v2.c | 168 +++++++++++++++++---------------- 4 files changed, 114 insertions(+), 107 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 9ebcac2981fd..6aa4c6478c9f 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -555,7 +555,7 @@ struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con); /* messenger_v1.c */ int ceph_con_v1_try_read(struct ceph_connection *con); int ceph_con_v1_try_write(struct ceph_connection *con); -void ceph_con_v1_revoke(struct ceph_connection *con); +void ceph_con_v1_revoke(struct ceph_connection *con, struct ceph_msg *msg); void ceph_con_v1_revoke_incoming(struct ceph_connection *con); bool ceph_con_v1_opened(struct ceph_connection *con); void ceph_con_v1_reset_session(struct ceph_connection *con); @@ -564,7 +564,7 @@ void ceph_con_v1_reset_protocol(struct ceph_connection *con); /* messenger_v2.c */ int ceph_con_v2_try_read(struct ceph_connection *con); int ceph_con_v2_try_write(struct ceph_connection *con); -void ceph_con_v2_revoke(struct ceph_connection *con); +void ceph_con_v2_revoke(struct ceph_connection *con, struct ceph_msg *msg); void ceph_con_v2_revoke_incoming(struct ceph_connection *con); bool ceph_con_v2_opened(struct ceph_connection *con); void ceph_con_v2_reset_session(struct ceph_connection *con); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b6c7bfc03503..08a6a083609f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1793,9 +1793,9 @@ void ceph_msg_revoke(struct ceph_msg *msg) WARN_ON(con->state != CEPH_CON_S_OPEN); dout("%s con %p msg %p was sending\n", __func__, con, msg); if (ceph_msgr2(from_msgr(con->msgr))) - ceph_con_v2_revoke(con); + ceph_con_v2_revoke(con, msg); else - ceph_con_v1_revoke(con); + ceph_con_v1_revoke(con, msg); ceph_msg_put(con->out_msg); con->out_msg = NULL; } else { diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index eebe4e19d75a..cc4a36ef8462 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -169,10 +169,9 @@ static void prepare_message_data(struct ceph_msg *msg, u32 data_len) * Prepare footer for currently outgoing message, and finish things * off. Assumes out_kvec* are already valid.. we just add on to the end. */ -static void prepare_write_message_footer(struct ceph_connection *con) +static void prepare_write_message_footer(struct ceph_connection *con, + struct ceph_msg *m) { - struct ceph_msg *m = con->out_msg; - m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; dout("prepare_write_message_footer %p\n", con); @@ -230,31 +229,31 @@ static void prepare_write_message(struct ceph_connection *con) /* fill in hdr crc and finalize hdr */ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); - con->out_msg->hdr.crc = cpu_to_le32(crc); - memcpy(&con->v1.out_hdr, &con->out_msg->hdr, sizeof(con->v1.out_hdr)); + m->hdr.crc = cpu_to_le32(crc); + memcpy(&con->v1.out_hdr, &m->hdr, sizeof(con->v1.out_hdr)); /* fill in front and middle crc, footer */ crc = crc32c(0, m->front.iov_base, m->front.iov_len); - con->out_msg->footer.front_crc = cpu_to_le32(crc); + m->footer.front_crc = cpu_to_le32(crc); if (m->middle) { crc = crc32c(0, m->middle->vec.iov_base, m->middle->vec.iov_len); - con->out_msg->footer.middle_crc = cpu_to_le32(crc); + m->footer.middle_crc = cpu_to_le32(crc); } else - con->out_msg->footer.middle_crc = 0; + m->footer.middle_crc = 0; dout("%s front_crc %u middle_crc %u\n", __func__, - le32_to_cpu(con->out_msg->footer.front_crc), - le32_to_cpu(con->out_msg->footer.middle_crc)); - con->out_msg->footer.flags = 0; + le32_to_cpu(m->footer.front_crc), + le32_to_cpu(m->footer.middle_crc)); + m->footer.flags = 0; /* is there a data payload? */ - con->out_msg->footer.data_crc = 0; + m->footer.data_crc = 0; if (m->data_length) { - prepare_message_data(con->out_msg, m->data_length); + prepare_message_data(m, m->data_length); con->v1.out_more = 1; /* data + footer will follow */ } else { /* no, queue up footer too and be done */ - prepare_write_message_footer(con); + prepare_write_message_footer(con, m); } ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); @@ -461,9 +460,9 @@ static int write_partial_kvec(struct ceph_connection *con) * 0 -> socket full, but more to do * <0 -> error */ -static int write_partial_message_data(struct ceph_connection *con) +static int write_partial_message_data(struct ceph_connection *con, + struct ceph_msg *msg) { - struct ceph_msg *msg = con->out_msg; struct ceph_msg_data_cursor *cursor = &msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); u32 crc; @@ -515,7 +514,7 @@ static int write_partial_message_data(struct ceph_connection *con) else msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; con_out_kvec_reset(con); - prepare_write_message_footer(con); + prepare_write_message_footer(con, msg); return 1; /* must return > 0 to indicate success */ } @@ -1471,6 +1470,7 @@ int ceph_con_v1_try_read(struct ceph_connection *con) */ int ceph_con_v1_try_write(struct ceph_connection *con) { + struct ceph_msg *msg; int ret = 1; dout("try_write start %p state %d\n", con, con->state); @@ -1517,14 +1517,15 @@ int ceph_con_v1_try_write(struct ceph_connection *con) } /* msg pages? */ - if (con->out_msg) { + msg = con->out_msg; + if (msg) { if (con->v1.out_msg_done) { - ceph_msg_put(con->out_msg); + ceph_msg_put(msg); con->out_msg = NULL; /* we're done with this one */ goto do_next; } - ret = write_partial_message_data(con); + ret = write_partial_message_data(con, msg); if (ret == 1) goto more; /* we need to send the footer, too! */ if (ret == 0) @@ -1563,10 +1564,8 @@ int ceph_con_v1_try_write(struct ceph_connection *con) return ret; } -void ceph_con_v1_revoke(struct ceph_connection *con) +void ceph_con_v1_revoke(struct ceph_connection *con, struct ceph_msg *msg) { - struct ceph_msg *msg = con->out_msg; - WARN_ON(con->v1.out_skip); /* footer */ if (con->v1.out_msg_done) { diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index c54c8b5a6526..b44e936f3865 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -1560,10 +1560,11 @@ static int prepare_ack(struct ceph_connection *con) return prepare_control(con, FRAME_TAG_ACK, con->v2.out_buf, 8); } -static void prepare_epilogue_plain(struct ceph_connection *con, bool aborted) +static void prepare_epilogue_plain(struct ceph_connection *con, + struct ceph_msg *msg, bool aborted) { dout("%s con %p msg %p aborted %d crcs %u %u %u\n", __func__, con, - con->out_msg, aborted, con->v2.out_epil.front_crc, + msg, aborted, con->v2.out_epil.front_crc, con->v2.out_epil.middle_crc, con->v2.out_epil.data_crc); encode_epilogue_plain(con, aborted); @@ -1574,10 +1575,9 @@ static void prepare_epilogue_plain(struct ceph_connection *con, bool aborted) * For "used" empty segments, crc is -1. For unused (trailing) * segments, crc is 0. */ -static void prepare_message_plain(struct ceph_connection *con) +static void prepare_message_plain(struct ceph_connection *con, + struct ceph_msg *msg) { - struct ceph_msg *msg = con->out_msg; - prepare_head_plain(con, con->v2.out_buf, sizeof(struct ceph_msg_header2), NULL, 0, false); @@ -1618,7 +1618,7 @@ static void prepare_message_plain(struct ceph_connection *con) con->v2.out_state = OUT_S_QUEUE_DATA; } else { con->v2.out_epil.data_crc = 0; - prepare_epilogue_plain(con, false); + prepare_epilogue_plain(con, msg, false); con->v2.out_state = OUT_S_FINISH_MESSAGE; } } @@ -1630,7 +1630,8 @@ static void prepare_message_plain(struct ceph_connection *con) * allocate pages for the entire tail of the message (currently up * to ~32M) and two sgs arrays (up to ~256K each)... */ -static int prepare_message_secure(struct ceph_connection *con) +static int prepare_message_secure(struct ceph_connection *con, + struct ceph_msg *msg) { void *zerop = page_address(ceph_zero_page); struct sg_table enc_sgt = {}; @@ -1645,7 +1646,7 @@ static int prepare_message_secure(struct ceph_connection *con) if (ret) return ret; - tail_len = tail_onwire_len(con->out_msg, true); + tail_len = tail_onwire_len(msg, true); if (!tail_len) { /* * Empty message: once the head is written, @@ -1656,7 +1657,7 @@ static int prepare_message_secure(struct ceph_connection *con) } encode_epilogue_secure(con, false); - ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop, + ret = setup_message_sgs(&sgt, msg, zerop, zerop, zerop, &con->v2.out_epil, NULL, 0, false); if (ret) goto out; @@ -1685,7 +1686,7 @@ static int prepare_message_secure(struct ceph_connection *con) goto out; dout("%s con %p msg %p sg_cnt %d enc_page_cnt %d\n", __func__, con, - con->out_msg, sgt.orig_nents, enc_page_cnt); + msg, sgt.orig_nents, enc_page_cnt); con->v2.out_state = OUT_S_QUEUE_ENC_PAGE; out: @@ -1694,19 +1695,19 @@ static int prepare_message_secure(struct ceph_connection *con) return ret; } -static int prepare_message(struct ceph_connection *con) +static int prepare_message(struct ceph_connection *con, struct ceph_msg *msg) { int lens[] = { sizeof(struct ceph_msg_header2), - front_len(con->out_msg), - middle_len(con->out_msg), - data_len(con->out_msg) + front_len(msg), + middle_len(msg), + data_len(msg) }; struct ceph_frame_desc desc; int ret; dout("%s con %p msg %p logical %d+%d+%d+%d\n", __func__, con, - con->out_msg, lens[0], lens[1], lens[2], lens[3]); + msg, lens[0], lens[1], lens[2], lens[3]); if (con->in_seq > con->in_seq_acked) { dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con, @@ -1717,15 +1718,15 @@ static int prepare_message(struct ceph_connection *con) reset_out_kvecs(con); init_frame_desc(&desc, FRAME_TAG_MESSAGE, lens, 4); encode_preamble(&desc, con->v2.out_buf); - fill_header2(CTRL_BODY(con->v2.out_buf), &con->out_msg->hdr, + fill_header2(CTRL_BODY(con->v2.out_buf), &msg->hdr, con->in_seq_acked); if (con_secure(con)) { - ret = prepare_message_secure(con); + ret = prepare_message_secure(con, msg); if (ret) return ret; } else { - prepare_message_plain(con); + prepare_message_plain(con, msg); } ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); @@ -3153,20 +3154,20 @@ int ceph_con_v2_try_read(struct ceph_connection *con) } } -static void queue_data(struct ceph_connection *con) +static void queue_data(struct ceph_connection *con, struct ceph_msg *msg) { struct bio_vec bv; con->v2.out_epil.data_crc = -1; - ceph_msg_data_cursor_init(&con->v2.out_cursor, con->out_msg, - data_len(con->out_msg)); + ceph_msg_data_cursor_init(&con->v2.out_cursor, msg, + data_len(msg)); get_bvec_at(&con->v2.out_cursor, &bv); set_out_bvec(con, &bv, true); con->v2.out_state = OUT_S_QUEUE_DATA_CONT; } -static void queue_data_cont(struct ceph_connection *con) +static void queue_data_cont(struct ceph_connection *con, struct ceph_msg *msg) { struct bio_vec bv; @@ -3187,7 +3188,7 @@ static void queue_data_cont(struct ceph_connection *con) * we are done. */ reset_out_kvecs(con); - prepare_epilogue_plain(con, false); + prepare_epilogue_plain(con, msg, false); con->v2.out_state = OUT_S_FINISH_MESSAGE; } @@ -3219,7 +3220,7 @@ static void queue_enc_page(struct ceph_connection *con) con->v2.out_state = OUT_S_FINISH_MESSAGE; } -static void queue_zeros(struct ceph_connection *con) +static void queue_zeros(struct ceph_connection *con, struct ceph_msg *msg) { dout("%s con %p out_zero %d\n", __func__, con, con->v2.out_zero); @@ -3236,7 +3237,7 @@ static void queue_zeros(struct ceph_connection *con) * Once it's written, we are done patching up for the revoke. */ reset_out_kvecs(con); - prepare_epilogue_plain(con, true); + prepare_epilogue_plain(con, msg, true); con->v2.out_state = OUT_S_FINISH_MESSAGE; } @@ -3263,6 +3264,7 @@ static void finish_message(struct ceph_connection *con) static int populate_out_iter(struct ceph_connection *con) { + struct ceph_msg *msg; int ret; dout("%s con %p state %d out_state %d\n", __func__, con, con->state, @@ -3278,18 +3280,18 @@ static int populate_out_iter(struct ceph_connection *con) switch (con->v2.out_state) { case OUT_S_QUEUE_DATA: WARN_ON(!con->out_msg); - queue_data(con); + queue_data(con, con->out_msg); goto populated; case OUT_S_QUEUE_DATA_CONT: WARN_ON(!con->out_msg); - queue_data_cont(con); + queue_data_cont(con, con->out_msg); goto populated; case OUT_S_QUEUE_ENC_PAGE: queue_enc_page(con); goto populated; case OUT_S_QUEUE_ZEROS: WARN_ON(con->out_msg); /* revoked */ - queue_zeros(con); + queue_zeros(con, con->out_msg); goto populated; case OUT_S_FINISH_MESSAGE: finish_message(con); @@ -3309,8 +3311,8 @@ static int populate_out_iter(struct ceph_connection *con) return ret; } } else if (!list_empty(&con->out_queue)) { - ceph_con_get_out_msg(con); - ret = prepare_message(con); + msg = ceph_con_get_out_msg(con); + ret = prepare_message(con, msg); if (ret) { pr_err("prepare_message failed: %d\n", ret); return ret; @@ -3422,17 +3424,18 @@ static u32 crc32c_zeros(u32 crc, int zero_len) return crc; } -static void prepare_zero_front(struct ceph_connection *con, int resid) +static void prepare_zero_front(struct ceph_connection *con, + struct ceph_msg *msg, int resid) { int sent; - WARN_ON(!resid || resid > front_len(con->out_msg)); - sent = front_len(con->out_msg) - resid; + WARN_ON(!resid || resid > front_len(msg)); + sent = front_len(msg) - resid; dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid); if (sent) { con->v2.out_epil.front_crc = - crc32c(-1, con->out_msg->front.iov_base, sent); + crc32c(-1, msg->front.iov_base, sent); con->v2.out_epil.front_crc = crc32c_zeros(con->v2.out_epil.front_crc, resid); } else { @@ -3443,17 +3446,18 @@ static void prepare_zero_front(struct ceph_connection *con, int resid) out_zero_add(con, resid); } -static void prepare_zero_middle(struct ceph_connection *con, int resid) +static void prepare_zero_middle(struct ceph_connection *con, + struct ceph_msg *msg, int resid) { int sent; - WARN_ON(!resid || resid > middle_len(con->out_msg)); - sent = middle_len(con->out_msg) - resid; + WARN_ON(!resid || resid > middle_len(msg)); + sent = middle_len(msg) - resid; dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid); if (sent) { con->v2.out_epil.middle_crc = - crc32c(-1, con->out_msg->middle->vec.iov_base, sent); + crc32c(-1, msg->middle->vec.iov_base, sent); con->v2.out_epil.middle_crc = crc32c_zeros(con->v2.out_epil.middle_crc, resid); } else { @@ -3464,61 +3468,64 @@ static void prepare_zero_middle(struct ceph_connection *con, int resid) out_zero_add(con, resid); } -static void prepare_zero_data(struct ceph_connection *con) +static void prepare_zero_data(struct ceph_connection *con, + struct ceph_msg *msg) { dout("%s con %p\n", __func__, con); - con->v2.out_epil.data_crc = crc32c_zeros(-1, data_len(con->out_msg)); - out_zero_add(con, data_len(con->out_msg)); + con->v2.out_epil.data_crc = crc32c_zeros(-1, data_len(msg)); + out_zero_add(con, data_len(msg)); } -static void revoke_at_queue_data(struct ceph_connection *con) +static void revoke_at_queue_data(struct ceph_connection *con, + struct ceph_msg *msg) { int boundary; int resid; - WARN_ON(!data_len(con->out_msg)); + WARN_ON(!data_len(msg)); WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter)); resid = iov_iter_count(&con->v2.out_iter); - boundary = front_len(con->out_msg) + middle_len(con->out_msg); + boundary = front_len(msg) + middle_len(msg); if (resid > boundary) { resid -= boundary; WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN); dout("%s con %p was sending head\n", __func__, con); - if (front_len(con->out_msg)) - prepare_zero_front(con, front_len(con->out_msg)); - if (middle_len(con->out_msg)) - prepare_zero_middle(con, middle_len(con->out_msg)); - prepare_zero_data(con); + if (front_len(msg)) + prepare_zero_front(con, msg, front_len(msg)); + if (middle_len(msg)) + prepare_zero_middle(con, msg, middle_len(msg)); + prepare_zero_data(con, msg); WARN_ON(iov_iter_count(&con->v2.out_iter) != resid); con->v2.out_state = OUT_S_QUEUE_ZEROS; return; } - boundary = middle_len(con->out_msg); + boundary = middle_len(msg); if (resid > boundary) { resid -= boundary; dout("%s con %p was sending front\n", __func__, con); - prepare_zero_front(con, resid); - if (middle_len(con->out_msg)) - prepare_zero_middle(con, middle_len(con->out_msg)); - prepare_zero_data(con); - queue_zeros(con); + prepare_zero_front(con, msg, resid); + if (middle_len(msg)) + prepare_zero_middle(con, msg, middle_len(msg)); + prepare_zero_data(con, msg); + queue_zeros(con, msg); return; } WARN_ON(!resid); dout("%s con %p was sending middle\n", __func__, con); - prepare_zero_middle(con, resid); - prepare_zero_data(con); - queue_zeros(con); + prepare_zero_middle(con, msg, resid); + prepare_zero_data(con, msg); + queue_zeros(con, msg); } -static void revoke_at_queue_data_cont(struct ceph_connection *con) +static void revoke_at_queue_data_cont(struct ceph_connection *con, + struct ceph_msg *msg) { int sent, resid; /* current piece of data */ - WARN_ON(!data_len(con->out_msg)); + WARN_ON(!data_len(msg)); WARN_ON(!iov_iter_is_bvec(&con->v2.out_iter)); resid = iov_iter_count(&con->v2.out_iter); WARN_ON(!resid || resid > con->v2.out_bvec.bv_len); @@ -3537,10 +3544,11 @@ static void revoke_at_queue_data_cont(struct ceph_connection *con) con->v2.out_iter.count -= resid; out_zero_add(con, con->v2.out_cursor.total_resid); - queue_zeros(con); + queue_zeros(con, msg); } -static void revoke_at_finish_message(struct ceph_connection *con) +static void revoke_at_finish_message(struct ceph_connection *con, + struct ceph_msg *msg) { int boundary; int resid; @@ -3548,39 +3556,39 @@ static void revoke_at_finish_message(struct ceph_connection *con) WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter)); resid = iov_iter_count(&con->v2.out_iter); - if (!front_len(con->out_msg) && !middle_len(con->out_msg) && - !data_len(con->out_msg)) { + if (!front_len(msg) && !middle_len(msg) && + !data_len(msg)) { WARN_ON(!resid || resid > MESSAGE_HEAD_PLAIN_LEN); dout("%s con %p was sending head (empty message) - noop\n", __func__, con); return; } - boundary = front_len(con->out_msg) + middle_len(con->out_msg) + + boundary = front_len(msg) + middle_len(msg) + CEPH_EPILOGUE_PLAIN_LEN; if (resid > boundary) { resid -= boundary; WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN); dout("%s con %p was sending head\n", __func__, con); - if (front_len(con->out_msg)) - prepare_zero_front(con, front_len(con->out_msg)); - if (middle_len(con->out_msg)) - prepare_zero_middle(con, middle_len(con->out_msg)); + if (front_len(msg)) + prepare_zero_front(con, msg, front_len(msg)); + if (middle_len(msg)) + prepare_zero_middle(con, msg, middle_len(msg)); con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN; WARN_ON(iov_iter_count(&con->v2.out_iter) != resid); con->v2.out_state = OUT_S_QUEUE_ZEROS; return; } - boundary = middle_len(con->out_msg) + CEPH_EPILOGUE_PLAIN_LEN; + boundary = middle_len(msg) + CEPH_EPILOGUE_PLAIN_LEN; if (resid > boundary) { resid -= boundary; dout("%s con %p was sending front\n", __func__, con); - prepare_zero_front(con, resid); - if (middle_len(con->out_msg)) - prepare_zero_middle(con, middle_len(con->out_msg)); + prepare_zero_front(con, msg, resid); + if (middle_len(msg)) + prepare_zero_middle(con, msg, middle_len(msg)); con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN; - queue_zeros(con); + queue_zeros(con, msg); return; } @@ -3588,9 +3596,9 @@ static void revoke_at_finish_message(struct ceph_connection *con) if (resid > boundary) { resid -= boundary; dout("%s con %p was sending middle\n", __func__, con); - prepare_zero_middle(con, resid); + prepare_zero_middle(con, msg, resid); con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN; - queue_zeros(con); + queue_zeros(con, msg); return; } @@ -3598,7 +3606,7 @@ static void revoke_at_finish_message(struct ceph_connection *con) dout("%s con %p was sending epilogue - noop\n", __func__, con); } -void ceph_con_v2_revoke(struct ceph_connection *con) +void ceph_con_v2_revoke(struct ceph_connection *con, struct ceph_msg *msg) { WARN_ON(con->v2.out_zero); @@ -3611,13 +3619,13 @@ void ceph_con_v2_revoke(struct ceph_connection *con) switch (con->v2.out_state) { case OUT_S_QUEUE_DATA: - revoke_at_queue_data(con); + revoke_at_queue_data(con, msg); break; case OUT_S_QUEUE_DATA_CONT: - revoke_at_queue_data_cont(con); + revoke_at_queue_data_cont(con, msg); break; case OUT_S_FINISH_MESSAGE: - revoke_at_finish_message(con); + revoke_at_finish_message(con, msg); break; default: WARN(1, "bad out_state %d", con->v2.out_state); From 6140f1d43ba9425dc55b12bdfd8877b0c5118d9a Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Wed, 6 Aug 2025 11:48:55 +0200 Subject: [PATCH 11/15] libceph: add empty check to ceph_con_get_out_msg() This moves the list_empty() checks from the two callers (v1 and v2) into the base messenger.c library. Now the v1/v2 specializations do not need to know about con->out_queue; that implementation detail is now hidden behind the ceph_con_get_out_msg() function. [ idryomov: instead of changing prepare_write_message() to return a bool, move ceph_con_get_out_msg() call out to arrive to the same pattern as in messenger_v2.c ] Signed-off-by: Max Kellermann Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- net/ceph/messenger.c | 4 +++- net/ceph/messenger_v1.c | 10 ++++------ net/ceph/messenger_v2.c | 3 +-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 08a6a083609f..878bbfe770b1 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2114,7 +2114,9 @@ struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con) { struct ceph_msg *msg; - BUG_ON(list_empty(&con->out_queue)); + if (list_empty(&con->out_queue)) + return NULL; + msg = list_first_entry(&con->out_queue, struct ceph_msg, list_head); WARN_ON(msg->con != con); diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index cc4a36ef8462..c9e002d96319 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -191,9 +191,9 @@ static void prepare_write_message_footer(struct ceph_connection *con, /* * Prepare headers for the next outgoing message. */ -static void prepare_write_message(struct ceph_connection *con) +static void prepare_write_message(struct ceph_connection *con, + struct ceph_msg *m) { - struct ceph_msg *m; u32 crc; con_out_kvec_reset(con); @@ -209,8 +209,6 @@ static void prepare_write_message(struct ceph_connection *con) &con->v1.out_temp_ack); } - m = ceph_con_get_out_msg(con); - dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", m, con->out_seq, le16_to_cpu(m->hdr.type), le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), @@ -1545,8 +1543,8 @@ int ceph_con_v1_try_write(struct ceph_connection *con) goto more; } /* is anything else pending? */ - if (!list_empty(&con->out_queue)) { - prepare_write_message(con); + if ((msg = ceph_con_get_out_msg(con)) != NULL) { + prepare_write_message(con, msg); goto more; } if (con->in_seq > con->in_seq_acked) { diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index b44e936f3865..9e39378eda00 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -3310,8 +3310,7 @@ static int populate_out_iter(struct ceph_connection *con) pr_err("prepare_keepalive2 failed: %d\n", ret); return ret; } - } else if (!list_empty(&con->out_queue)) { - msg = ceph_con_get_out_msg(con); + } else if ((msg = ceph_con_get_out_msg(con)) != NULL) { ret = prepare_message(con, msg); if (ret) { pr_err("prepare_message failed: %d\n", ret); From 98a2850de49c10a1a09642e17978b925f95e6029 Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Thu, 28 Aug 2025 11:44:42 -0700 Subject: [PATCH 12/15] ceph: fix potential NULL dereference issue in ceph_fill_trace() The Coverity Scan service has detected a potential dereference of an explicit NULL value in ceph_fill_trace() [1]. The variable in is declared in the beggining of ceph_fill_trace() [2]: struct inode *in = NULL; However, the initialization of the variable is happening under condition [3]: if (rinfo->head->is_target) { in = req->r_target_inode; } Potentially, if rinfo->head->is_target == FALSE, then in variable continues to be NULL and later the dereference of NULL value could happen in ceph_fill_trace() logic [4,5]: else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || req->r_op == CEPH_MDS_OP_MKSNAP) && test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { ihold(in); err = splice_dentry(&req->r_dentry, in); if (err < 0) goto done; } This patch adds the checking of in variable for NULL value and it returns -EINVAL error code if it has NULL value. v2 Alex Markuze suggested to add unlikely macro in the checking condition. [1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1141197 [2] https://elixir.bootlin.com/linux/v6.17-rc3/source/fs/ceph/inode.c#L1522 [3] https://elixir.bootlin.com/linux/v6.17-rc3/source/fs/ceph/inode.c#L1629 [4] https://elixir.bootlin.com/linux/v6.17-rc3/source/fs/ceph/inode.c#L1745 [5] https://elixir.bootlin.com/linux/v6.17-rc3/source/fs/ceph/inode.c#L1777 Signed-off-by: Viacheslav Dubeyko Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index f67025465de0..03a8f2e3341e 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1793,6 +1793,11 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) goto done; } + if (unlikely(!in)) { + err = -EINVAL; + goto done; + } + /* attach proper inode */ if (d_really_is_negative(dn)) { ceph_dir_clear_ordered(dir); @@ -1828,6 +1833,12 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) doutc(cl, " linking snapped dir %p to dn %p\n", in, req->r_dentry); ceph_dir_clear_ordered(dir); + + if (unlikely(!in)) { + err = -EINVAL; + goto done; + } + ihold(in); err = splice_dentry(&req->r_dentry, in); if (err < 0) From c66120c84295a0495eb46dcfba829457acd6ef7d Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Tue, 2 Sep 2025 12:08:45 -0700 Subject: [PATCH 13/15] ceph: cleanup in ceph_alloc_readdir_reply_buffer() The Coverity Scan service has reported potential issue in ceph_alloc_readdir_reply_buffer() [1]. If order could be negative one, then it expects the issue in the logic: num_entries = (PAGE_SIZE << order) / size; Technically speaking, this logic [2] should prevent from making the order variable negative: if (!rinfo->dir_entries) return -ENOMEM; However, the allocation logic requires some cleanup. This patch makes sure that calculated bytes count will never exceed ULONG_MAX before get_order() calculation. And it adds the checking of order variable on negative value to guarantee that second half of the function's code will never operate by negative value of order variable even if something will be wrong or to be changed in the first half of the function's logic. v2 Alex Markuze suggested to add unlikely() macro for introduced condition checks. [1] https://scan5.scan.coverity.com/#/project-view/64304/10063?selectedIssue=1198252 [2] https://elixir.bootlin.com/linux/v6.17-rc3/source/fs/ceph/mds_client.c#L2553 Signed-off-by: Viacheslav Dubeyko Reviewed-by: Alex Markuze Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index aa2f74142cf4..8104350b1553 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2533,6 +2533,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; size_t size = sizeof(struct ceph_mds_reply_dir_entry); unsigned int num_entries; + u64 bytes_count; int order; spin_lock(&ci->i_ceph_lock); @@ -2541,7 +2542,11 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, num_entries = max(num_entries, 1U); num_entries = min(num_entries, opt->max_readdir); - order = get_order(size * num_entries); + bytes_count = (u64)size * num_entries; + if (unlikely(bytes_count > ULONG_MAX)) + bytes_count = ULONG_MAX; + + order = get_order((unsigned long)bytes_count); while (order >= 0) { rinfo->dir_entries = (void*)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | @@ -2551,7 +2556,7 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, break; order--; } - if (!rinfo->dir_entries) + if (!rinfo->dir_entries || unlikely(order < 0)) return -ENOMEM; num_entries = (PAGE_SIZE << order) / size; From 22c73d52a6d05c5a2053385c0d6cd9984732799d Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Thu, 11 Sep 2025 15:02:35 +0530 Subject: [PATCH 14/15] ceph: fix multifs mds auth caps issue The mds auth caps check should also validate the fsname along with the associated caps. Not doing so would result in applying the mds auth caps of one fs on to the other fs in a multifs ceph cluster. The bug causes multiple issues w.r.t user authentication, following is one such example. Steps to Reproduce (on vstart cluster): 1. Create two file systems in a cluster, say 'fsname1' and 'fsname2' 2. Authorize read only permission to the user 'client.usr' on fs 'fsname1' $ceph fs authorize fsname1 client.usr / r 3. Authorize read and write permission to the same user 'client.usr' on fs 'fsname2' $ceph fs authorize fsname2 client.usr / rw 4. Update the keyring $ceph auth get client.usr >> ./keyring With above permssions for the user 'client.usr', following is the expectation. a. The 'client.usr' should be able to only read the contents and not allowed to create or delete files on file system 'fsname1'. b. The 'client.usr' should be able to read/write on file system 'fsname2'. But, with this bug, the 'client.usr' is allowed to read/write on file system 'fsname1'. See below. 5. Mount the file system 'fsname1' with the user 'client.usr' $sudo bin/mount.ceph usr@.fsname1=/ /kmnt_fsname1_usr/ 6. Try creating a file on file system 'fsname1' with user 'client.usr'. This should fail but passes with this bug. $touch /kmnt_fsname1_usr/file1 7. Mount the file system 'fsname1' with the user 'client.admin' and create a file. $sudo bin/mount.ceph admin@.fsname1=/ /kmnt_fsname1_admin $echo "data" > /kmnt_fsname1_admin/admin_file1 8. Try removing an existing file on file system 'fsname1' with the user 'client.usr'. This shoudn't succeed but succeeds with the bug. $rm -f /kmnt_fsname1_usr/admin_file1 For more information, please take a look at the corresponding mds/fuse patch and tests added by looking into the tracker mentioned below. v2: Fix a possible null dereference in doutc v3: Don't store fsname from mdsmap, validate against ceph_mount_options's fsname and use it v4: Code refactor, better warning message and fix possible compiler warning [ Slava.Dubeyko: "fsname check failed" -> "fsname mismatch" ] Link: https://tracker.ceph.com/issues/72167 Signed-off-by: Kotresh HR Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 8 ++++++++ fs/ceph/mdsmap.c | 14 +++++++++++++- fs/ceph/super.c | 14 -------------- fs/ceph/super.h | 14 ++++++++++++++ 4 files changed, 35 insertions(+), 15 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 8104350b1553..93650508d41a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -5655,11 +5655,19 @@ static int ceph_mds_auth_match(struct ceph_mds_client *mdsc, u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid); u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid); struct ceph_client *cl = mdsc->fsc->client; + const char *fs_name = mdsc->fsc->mount_options->mds_namespace; const char *spath = mdsc->fsc->mount_options->server_path; bool gid_matched = false; u32 gid, tlen, len; int i, j; + doutc(cl, "fsname check fs_name=%s match.fs_name=%s\n", + fs_name, auth->match.fs_name ? auth->match.fs_name : ""); + if (auth->match.fs_name && strcmp(auth->match.fs_name, fs_name)) { + /* fsname mismatch, try next one */ + return 0; + } + doutc(cl, "match.uid %lld\n", auth->match.uid); if (auth->match.uid != MDS_AUTH_UID_ANY) { if (auth->match.uid != caller_uid) diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 8109aba66e02..2c7b151a7c95 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -353,10 +353,22 @@ struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, __decode_and_drop_type(p, end, u8, bad_ext); } if (mdsmap_ev >= 8) { + u32 fsname_len; /* enabled */ ceph_decode_8_safe(p, end, m->m_enabled, bad_ext); /* fs_name */ - ceph_decode_skip_string(p, end, bad_ext); + ceph_decode_32_safe(p, end, fsname_len, bad_ext); + + /* validate fsname against mds_namespace */ + if (!namespace_equals(mdsc->fsc->mount_options, *p, + fsname_len)) { + pr_warn_client(cl, "fsname %*pE doesn't match mds_namespace %s\n", + (int)fsname_len, (char *)*p, + mdsc->fsc->mount_options->mds_namespace); + goto bad; + } + /* skip fsname after validation */ + ceph_decode_skip_n(p, end, fsname_len, bad); } /* damaged */ if (mdsmap_ev >= 9) { diff --git a/fs/ceph/super.c b/fs/ceph/super.c index c3eb651862c5..ebef5244ae25 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -246,20 +246,6 @@ static void canonicalize_path(char *path) path[j] = '\0'; } -/* - * Check if the mds namespace in ceph_mount_options matches - * the passed in namespace string. First time match (when - * ->mds_namespace is NULL) is treated specially, since - * ->mds_namespace needs to be initialized by the caller. - */ -static int namespace_equals(struct ceph_mount_options *fsopt, - const char *namespace, size_t len) -{ - return !(fsopt->mds_namespace && - (strlen(fsopt->mds_namespace) != len || - strncmp(fsopt->mds_namespace, namespace, len))); -} - static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end, struct fs_context *fc) { diff --git a/fs/ceph/super.h b/fs/ceph/super.h index d1e81e11661b..de6dce077da1 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -104,6 +104,20 @@ struct ceph_mount_options { struct fscrypt_dummy_policy dummy_enc_policy; }; +/* + * Check if the mds namespace in ceph_mount_options matches + * the passed in namespace string. First time match (when + * ->mds_namespace is NULL) is treated specially, since + * ->mds_namespace needs to be initialized by the caller. + */ +static inline int namespace_equals(struct ceph_mount_options *fsopt, + const char *namespace, size_t len) +{ + return !(fsopt->mds_namespace && + (strlen(fsopt->mds_namespace) != len || + strncmp(fsopt->mds_namespace, namespace, len))); +} + /* mount state */ enum { CEPH_MOUNT_MOUNTING, From d74d6c0e98958aa0bdb6f0a93258a856bda58b97 Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Tue, 2 Sep 2025 13:09:58 -0700 Subject: [PATCH 15/15] ceph: add bug tracking system info to MAINTAINERS This patch adds information about Ceph bug tracking system. [ idryomov: add the same for RBD, don't mention include/linux/ceph/ again ] Signed-off-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 97d958c945e4..16e23b76b0bf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5622,6 +5622,7 @@ M: Xiubo Li L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ +B: https://tracker.ceph.com/ T: git https://github.com/ceph/ceph-client.git F: include/linux/ceph/ F: include/linux/crush/ @@ -5633,6 +5634,7 @@ M: Ilya Dryomov L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ +B: https://tracker.ceph.com/ T: git https://github.com/ceph/ceph-client.git F: Documentation/filesystems/ceph.rst F: fs/ceph/ @@ -20980,6 +20982,7 @@ R: Dongsheng Yang L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ +B: https://tracker.ceph.com/ T: git https://github.com/ceph/ceph-client.git F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c