mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-16 05:31:37 -04:00
ksmbd: harden file lifetime during session teardown
__close_file_table_ids() is the per-session teardown that closes every
fp belonging to a session (or to one tree connect on that session) by
walking the session's volatile-id idr. The current loop has three
related problems on busy or racing workloads:
* Sleeping under ft->lock. The session-teardown skip callback,
session_fd_check(), already sleeps in ksmbd_vfs_copy_durable_owner()
-> kstrdup(GFP_KERNEL) and down_write(&fp->f_ci->m_lock) (a
rw_semaphore). Running the callback inside write_lock(&ft->lock)
trips CONFIG_DEBUG_ATOMIC_SLEEP / CONFIG_PROVE_LOCKING on a
durable-fd workload.
* Refcount accounting blind to f_state. The unconditional
atomic_dec_and_test(&fp->refcount) does not distinguish
FP_INITED (idr-owned reference still intact) from FP_CLOSED (an
earlier ksmbd_close_fd() already consumed the idr-owned reference
while leaving fp in the idr because a holder kept refcount
non-zero). When the latter races with teardown the same path
over-decrements into a holder reference and ksmbd_fd_put() later
UAFs that holder.
* FP_NEW window. Between __open_id() publishing fp into the
session idr and ksmbd_update_fstate(..., FP_INITED) committing the
transition at the end of smb2_open(), an fp is in FP_NEW and an
intervening teardown that takes a transient reference and
unpublishes the volatile id leaves the original idr-owned
reference orphaned -- the opener is unaware that fp has been
unpublished, returns success to the client, and the fp leaks at
refcount = 1.
Refactor __close_file_table_ids() to take a transient reference on fp
and unpublish fp from the session idr *under ft->lock* before calling
skip() outside the lock. A transient ref protects lifetime but not
concurrent field mutation, so the idr_remove() is what keeps
__ksmbd_lookup_fd() through this session's idr from granting a new
ksmbd_fp_get() reference to an fp whose fp->conn / fp->tcon /
fp->volatile_id / op->conn / lock_list links are about to be rewritten
by session_fd_check(). Durable reconnect is unaffected because it
reaches fp through the global durable table (ksmbd_lookup_durable_fd
-> global_ft).
Decide n_to_drop together with any FP_INITED -> FP_CLOSED transition
under ft->lock so teardown and ksmbd_close_fd() never both consume the
idr-owned reference. See ksmbd_mark_fp_closed() for the per-state
accounting. For the FP_NEW path to be safe, the opener has to learn
that fp was unpublished: ksmbd_update_fstate() now returns -ENOENT
when an FP_NEW -> FP_INITED transition finds f_state already advanced
or the volatile id cleared (both committed by teardown under
ft->lock); smb2_open() propagates that as STATUS_OBJECT_NAME_INVALID
and drops the original reference via ksmbd_fd_put().
The list removal cannot be left for a deferred final putter because
fp->volatile_id has already been cleared and __ksmbd_remove_fd() will
intentionally skip both idr_remove() and list_del_init(). Move the
m_fp_list unlink in __ksmbd_remove_fd() above the volatile-id check so
that an FP_NEW fp that happened to be added to m_fp_list (smb2_open()
adds fp->node before ksmbd_update_fstate() runs) is still cleaned up
on the deferred putter path; list_del_init() on an empty node is a
no-op and remains safe for fps that were never added.
Add a defensive guard in session_fd_check() that refuses non-FP_INITED
fps so that even if a teardown reaches an FP_NEW fp it falls into the
close branch (where the n_to_drop = 1 accounting keeps the opener's
reference alive) instead of the durable-preserve branch (which mutates
fp->conn / fp->tcon).
Validation on a debug kernel additionally built with CONFIG_DEBUG_LIST
and CONFIG_DEBUG_OBJECTS_WORK used a same-session two-tcon workload
(open/write storm on one tcon, 50 tree disconnects on the other) and
reported no list-corruption, work_struct ODEBUG, sleep-in-atomic,
lockdep or kmemleak reports. Reverting only the
__close_file_table_ids() hunk while keeping a forced-is_reconnectable()
harness produced the expected sleep-in-atomic at vfs_cache.c:1095,
confirming the ft->lock-out-of-sleepable-skip discipline.
KASAN-enabled direct SMB2 coverage with durable handles enabled
exercised ksmbd_close_tree_conn_fds(), ksmbd_close_session_fds(),
the FP_NEW failure path, tree_conn_fd_check(), and a non-zero
session_fd_check() durable-preserve return. This produced no KASAN,
DEBUG_LIST, ODEBUG, or WARNING reports.
Fixes: f441584858 ("cifsd: add file operations")
Signed-off-by: DaeMyung Kang <charsyam@gmail.com>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
This commit is contained in:
committed by
Steve French
parent
b1f1e80620
commit
a42896bebf
@@ -3767,8 +3767,10 @@ int smb2_open(struct ksmbd_work *work)
|
||||
|
||||
err_out2:
|
||||
if (!rc) {
|
||||
ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED);
|
||||
rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len);
|
||||
rc = ksmbd_update_fstate(&work->sess->file_table, fp,
|
||||
FP_INITED);
|
||||
if (!rc)
|
||||
rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len);
|
||||
}
|
||||
if (rc) {
|
||||
if (rc == -EINVAL)
|
||||
|
||||
@@ -431,13 +431,13 @@ static void ksmbd_remove_durable_fd(struct ksmbd_file *fp)
|
||||
|
||||
static void __ksmbd_remove_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
|
||||
{
|
||||
if (!has_file_id(fp->volatile_id))
|
||||
return;
|
||||
|
||||
down_write(&fp->f_ci->m_lock);
|
||||
list_del_init(&fp->node);
|
||||
up_write(&fp->f_ci->m_lock);
|
||||
|
||||
if (!has_file_id(fp->volatile_id))
|
||||
return;
|
||||
|
||||
write_lock(&ft->lock);
|
||||
idr_remove(ft->idr, fp->volatile_id);
|
||||
write_unlock(&ft->lock);
|
||||
@@ -798,15 +798,58 @@ struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp,
|
||||
unsigned int state)
|
||||
/**
|
||||
* ksmbd_update_fstate() - update an fp state under the file-table lock
|
||||
* @ft: file table that publishes @fp's volatile id
|
||||
* @fp: file pointer to update
|
||||
* @state: new state
|
||||
*
|
||||
* Return: 0 on success. The FP_NEW -> FP_INITED transition is special:
|
||||
* -ENOENT if teardown already unpublished @fp by advancing the state or
|
||||
* clearing the volatile id. Other state updates preserve the historical
|
||||
* fire-and-forget behavior.
|
||||
*/
|
||||
int ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp,
|
||||
unsigned int state)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!fp)
|
||||
return;
|
||||
return -ENOENT;
|
||||
|
||||
write_lock(&ft->lock);
|
||||
fp->f_state = state;
|
||||
if (state == FP_INITED &&
|
||||
(fp->f_state != FP_NEW || !has_file_id(fp->volatile_id))) {
|
||||
ret = -ENOENT;
|
||||
} else {
|
||||
fp->f_state = state;
|
||||
ret = 0;
|
||||
}
|
||||
write_unlock(&ft->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* ksmbd_mark_fp_closed() - mark fp closed under ft->lock and return how many
|
||||
* refs the teardown path owns.
|
||||
*
|
||||
* FP_INITED has a normal idr-owned reference, so teardown owns both that
|
||||
* reference and the transient lookup reference. FP_NEW is still owned by the
|
||||
* in-flight opener/reopener, which will drop the original reference after
|
||||
* ksmbd_update_fstate(..., FP_INITED) observes the cleared volatile id.
|
||||
* FP_CLOSED on entry means an earlier ksmbd_close_fd() already consumed the
|
||||
* idr-owned ref.
|
||||
*/
|
||||
static int ksmbd_mark_fp_closed(struct ksmbd_file *fp)
|
||||
{
|
||||
if (fp->f_state == FP_INITED) {
|
||||
set_close_state_blocked_works(fp);
|
||||
fp->f_state = FP_CLOSED;
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -814,7 +857,8 @@ __close_file_table_ids(struct ksmbd_session *sess,
|
||||
struct ksmbd_tree_connect *tcon,
|
||||
bool (*skip)(struct ksmbd_tree_connect *tcon,
|
||||
struct ksmbd_file *fp,
|
||||
struct ksmbd_user *user))
|
||||
struct ksmbd_user *user),
|
||||
bool skip_preserves_fp)
|
||||
{
|
||||
struct ksmbd_file_table *ft = &sess->file_table;
|
||||
struct ksmbd_file *fp;
|
||||
@@ -822,32 +866,120 @@ __close_file_table_ids(struct ksmbd_session *sess,
|
||||
int num = 0;
|
||||
|
||||
while (1) {
|
||||
int n_to_drop;
|
||||
|
||||
write_lock(&ft->lock);
|
||||
fp = idr_get_next(ft->idr, &id);
|
||||
if (!fp) {
|
||||
write_unlock(&ft->lock);
|
||||
break;
|
||||
}
|
||||
|
||||
if (skip(tcon, fp, sess->user) ||
|
||||
!atomic_dec_and_test(&fp->refcount)) {
|
||||
if (!atomic_inc_not_zero(&fp->refcount)) {
|
||||
id++;
|
||||
write_unlock(&ft->lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
set_close_state_blocked_works(fp);
|
||||
idr_remove(ft->idr, fp->volatile_id);
|
||||
fp->volatile_id = KSMBD_NO_FID;
|
||||
write_unlock(&ft->lock);
|
||||
if (skip_preserves_fp) {
|
||||
/*
|
||||
* Session teardown: skip() is session_fd_check(),
|
||||
* which may sleep and mutates fp->conn / fp->tcon /
|
||||
* fp->volatile_id when it chooses to preserve fp
|
||||
* for durable reconnect. Unpublish fp from the
|
||||
* session idr here, under ft->lock, so that
|
||||
* __ksmbd_lookup_fd() through this session cannot
|
||||
* grant a new ksmbd_fp_get() reference to an fp
|
||||
* whose fields are about to be rewritten outside
|
||||
* the lock. Durable reconnect still reaches fp via
|
||||
* global_ft.
|
||||
*/
|
||||
idr_remove(ft->idr, id);
|
||||
fp->volatile_id = KSMBD_NO_FID;
|
||||
write_unlock(&ft->lock);
|
||||
|
||||
if (skip(tcon, fp, sess->user)) {
|
||||
/*
|
||||
* session_fd_check() has converted fp to
|
||||
* durable-preserve state and cleared its
|
||||
* per-conn fields. fp is already unpublished
|
||||
* above; the original idr-owned ref keeps it
|
||||
* alive for the durable scavenger. Drop only
|
||||
* the transient ref. atomic_dec() is safe --
|
||||
* atomic_inc_not_zero() succeeded on a
|
||||
* positive value and we added one more, so
|
||||
* refcount cannot be zero here.
|
||||
*/
|
||||
atomic_dec(&fp->refcount);
|
||||
id++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Keep the close-state decision under the same lock
|
||||
* observed by ksmbd_update_fstate(), which is how an
|
||||
* in-flight FP_NEW opener learns that teardown has
|
||||
* cleared its volatile id.
|
||||
*/
|
||||
write_lock(&ft->lock);
|
||||
n_to_drop = ksmbd_mark_fp_closed(fp);
|
||||
write_unlock(&ft->lock);
|
||||
} else {
|
||||
/*
|
||||
* Tree teardown: skip() is tree_conn_fd_check(), a
|
||||
* cheap pointer compare that doesn't sleep and has
|
||||
* no side effects, so keep the skip decision plus
|
||||
* the unpublish-and-mark-closed sequence atomic
|
||||
* under ft->lock. fps belonging to other tree
|
||||
* connects (skip() == true) stay fully published in
|
||||
* the session idr with no lock window.
|
||||
*/
|
||||
if (skip(tcon, fp, sess->user)) {
|
||||
atomic_dec(&fp->refcount);
|
||||
write_unlock(&ft->lock);
|
||||
id++;
|
||||
continue;
|
||||
}
|
||||
idr_remove(ft->idr, id);
|
||||
fp->volatile_id = KSMBD_NO_FID;
|
||||
n_to_drop = ksmbd_mark_fp_closed(fp);
|
||||
write_unlock(&ft->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* fp->volatile_id is already cleared to prevent stale idr
|
||||
* removal from a deferred final close. Remove fp from
|
||||
* m_fp_list here because __ksmbd_remove_fd() will skip the
|
||||
* list unlink when volatile_id is KSMBD_NO_FID.
|
||||
*/
|
||||
down_write(&fp->f_ci->m_lock);
|
||||
list_del_init(&fp->node);
|
||||
up_write(&fp->f_ci->m_lock);
|
||||
|
||||
__ksmbd_close_fd(ft, fp);
|
||||
|
||||
num++;
|
||||
/*
|
||||
* Drop the references this iteration owns:
|
||||
*
|
||||
* n_to_drop == 2: we observed FP_INITED and committed
|
||||
* the FP_CLOSED transition ourselves, so we own the
|
||||
* transient (+1) and the still-intact idr-owned ref.
|
||||
*
|
||||
* n_to_drop == 1: either a prior ksmbd_close_fd()
|
||||
* already consumed the idr-owned ref, or fp was still
|
||||
* FP_NEW and the in-flight opener/reopener must keep
|
||||
* the original reference until ksmbd_update_fstate()
|
||||
* observes the cleared volatile id.
|
||||
*
|
||||
* If we end up as the final putter, finalize fp and
|
||||
* account the open_files_count decrement via the caller's
|
||||
* atomic_sub(num, ...). Otherwise the remaining user's
|
||||
* ksmbd_fd_put() reaches __put_fd_final(), which does its
|
||||
* own atomic_dec(&open_files_count), so we must not count
|
||||
* this fp here -- doing so would double-decrement the
|
||||
* connection-wide counter.
|
||||
*/
|
||||
if (atomic_sub_and_test(n_to_drop, &fp->refcount)) {
|
||||
__ksmbd_close_fd(NULL, fp);
|
||||
num++;
|
||||
}
|
||||
id++;
|
||||
}
|
||||
|
||||
@@ -1082,6 +1214,9 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon,
|
||||
if (!is_reconnectable(fp))
|
||||
return false;
|
||||
|
||||
if (fp->f_state != FP_INITED)
|
||||
return false;
|
||||
|
||||
if (WARN_ON_ONCE(!fp->conn))
|
||||
return false;
|
||||
|
||||
@@ -1127,7 +1262,8 @@ void ksmbd_close_tree_conn_fds(struct ksmbd_work *work)
|
||||
{
|
||||
int num = __close_file_table_ids(work->sess,
|
||||
work->tcon,
|
||||
tree_conn_fd_check);
|
||||
tree_conn_fd_check,
|
||||
false);
|
||||
|
||||
atomic_sub(num, &work->conn->stats.open_files_count);
|
||||
}
|
||||
@@ -1136,7 +1272,8 @@ void ksmbd_close_session_fds(struct ksmbd_work *work)
|
||||
{
|
||||
int num = __close_file_table_ids(work->sess,
|
||||
work->tcon,
|
||||
session_fd_check);
|
||||
session_fd_check,
|
||||
true);
|
||||
|
||||
atomic_sub(num, &work->conn->stats.open_files_count);
|
||||
}
|
||||
@@ -1268,7 +1405,7 @@ void ksmbd_destroy_file_table(struct ksmbd_session *sess)
|
||||
if (!ft->idr)
|
||||
return;
|
||||
|
||||
__close_file_table_ids(sess, NULL, session_fd_check);
|
||||
__close_file_table_ids(sess, NULL, session_fd_check, true);
|
||||
idr_destroy(ft->idr);
|
||||
kfree(ft->idr);
|
||||
ft->idr = NULL;
|
||||
|
||||
@@ -172,8 +172,8 @@ int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode);
|
||||
int ksmbd_init_global_file_table(void);
|
||||
void ksmbd_free_global_file_table(void);
|
||||
void ksmbd_set_fd_limit(unsigned long limit);
|
||||
void ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp,
|
||||
unsigned int state);
|
||||
int ksmbd_update_fstate(struct ksmbd_file_table *ft, struct ksmbd_file *fp,
|
||||
unsigned int state);
|
||||
bool ksmbd_vfs_compare_durable_owner(struct ksmbd_file *fp,
|
||||
struct ksmbd_user *user);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user