From 0a6428978df1c16971db0b0daae60b8fc0c4eb9b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 24 Oct 2025 15:48:15 +0200 Subject: [PATCH 001/145] smb: smbdirect: introduce smbdirect_all_c_files.c This is a very basic start in order to introduce common functions, which will be shared by client and server. As a start smbdirect_all_c_files.c will be included in fs/smb/client/smbdirect.c and fs/smb/server/transport_rdma.c in order to allow tiny steps in the direction of moving to a few exported functions from an smbdirect.ko. Step by step this will include individual c files with the real functions. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_all_c_files.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 fs/smb/common/smbdirect/smbdirect_all_c_files.c diff --git a/fs/smb/common/smbdirect/smbdirect_all_c_files.c b/fs/smb/common/smbdirect/smbdirect_all_c_files.c new file mode 100644 index 000000000000..610556fb7931 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_all_c_files.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2025, Stefan Metzmacher + */ + +/* + * This is a temporary solution in order + * to include the common smbdirect functions + * into .c files in order to make a transformation + * in tiny bisectable steps possible. + * + * It will be replaced by a smbdirect.ko with + * exported public functions at the end. + */ +#ifndef SMBDIRECT_USE_INLINE_C_FILES +#error SMBDIRECT_USE_INLINE_C_FILES define needed +#endif From 86bca3df0fa0e6f9efd81165900de0af098f6bc2 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 28 Oct 2025 11:11:16 +0100 Subject: [PATCH 002/145] smb: smbdirect: introduce smbdirect_internal.h This will be included by individual .c files as first header. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_internal.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 fs/smb/common/smbdirect/smbdirect_internal.h diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h new file mode 100644 index 000000000000..e593eee06481 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2025, Stefan Metzmacher + */ + +#ifndef __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ +#define __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ + +#include "smbdirect.h" +#include "smbdirect_pdu.h" +#include "smbdirect_socket.h" + +#endif /* __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ */ From bd33b696eb1638a169975552eb4bda3e5ab2cddc Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 15:24:33 +0200 Subject: [PATCH 003/145] smb: client: include smbdirect_all_c_files.c This is the first tiny step in order to use common functions in future. Once we have all functions in common we'll move to an smbdirect.ko that exports public functions instead of including the .c file. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: Arnd Bergmann Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/Kconfig | 1 + fs/smb/client/smbdirect.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig index d112da38c881..ec2abfe0a62f 100644 --- a/fs/smb/client/Kconfig +++ b/fs/smb/client/Kconfig @@ -181,6 +181,7 @@ if CIFS config CIFS_SMB_DIRECT bool "SMB Direct support" depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y + select SG_POOL help Enables SMB Direct support for SMB 3.0, 3.02 and 3.1.1. SMB Direct allows transferring SMB packets over RDMA. If unsure, diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 461658105013..443ff427e28f 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -4,6 +4,9 @@ * * Author(s): Long Li */ + +#define SMBDIRECT_USE_INLINE_C_FILES 1 + #include #include #include @@ -162,6 +165,16 @@ module_param(smbd_logging_level, uint, 0644); MODULE_PARM_DESC(smbd_logging_level, "Logging level for SMBD transport, 0 (default): error, 1: info"); +static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc); + +/* + * This is a temporary solution until all code + * is moved to smbdirect_all_c_files.c and we + * have an smbdirect.ko that exports the required + * functions. + */ +#include "../common/smbdirect/smbdirect_all_c_files.c" + #define log_rdma(level, class, fmt, args...) \ do { \ if (level <= smbd_logging_level || class & smbd_logging_class) \ From cdb9545c238ff175e72b38269dc6d89c9ccd30b2 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 15:24:33 +0200 Subject: [PATCH 004/145] smb: server: include smbdirect_all_c_files.c This is the first tiny step in order to use common functions in future. Once we have all functions in common we'll move to an smbdirect.ko that exports public functions instead of including the .c file. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index dbc8dedb85dc..12a8def52ff8 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -9,6 +9,8 @@ #define SUBMOD_NAME "smb_direct" +#define SMBDIRECT_USE_INLINE_C_FILES 1 + #include #include #include @@ -30,6 +32,16 @@ #include "../common/smbdirect/smbdirect_socket.h" #include "transport_rdma.h" +static void smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc); + +/* + * This is a temporary solution until all code + * is moved to smbdirect_all_c_files.c and we + * have an smbdirect.ko that exports the required + * functions. + */ +#include "../common/smbdirect/smbdirect_all_c_files.c" + #define SMB_DIRECT_PORT_IWARP 5445 #define SMB_DIRECT_PORT_INFINIBAND 445 From 83c769a9f45cc4a111e60690fa5e64929dba948a Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 15:01:28 +0200 Subject: [PATCH 005/145] smb: smbdirect: introduce smbdirect_socket.c to be filled Over time smbdirect_socket.c will get more and more functions which will be included in fs/smb/client/smbdirect.c and fs/smb/server/transport_rdma.c via fs/smb/common/smbdirect/smbdirect_all_c_files.c in order to allow tiny steps in the direction of moving to a few exported functions from an smbdirect.ko. That's why __maybe_unused is added for now it will be removed at the end of the road to common code. Note the Copyright (C) 2017, Microsoft Corporation is added as a lot of functions from fs/smb/client/smbdirect.c will be moved into this file soon and I don't want to forget about adding it. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_all_c_files.c | 1 + fs/smb/common/smbdirect/smbdirect_socket.c | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 fs/smb/common/smbdirect/smbdirect_socket.c diff --git a/fs/smb/common/smbdirect/smbdirect_all_c_files.c b/fs/smb/common/smbdirect/smbdirect_all_c_files.c index 610556fb7931..269d8c28c92e 100644 --- a/fs/smb/common/smbdirect/smbdirect_all_c_files.c +++ b/fs/smb/common/smbdirect/smbdirect_all_c_files.c @@ -15,3 +15,4 @@ #ifndef SMBDIRECT_USE_INLINE_C_FILES #error SMBDIRECT_USE_INLINE_C_FILES define needed #endif +#include "smbdirect_socket.c" diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c new file mode 100644 index 000000000000..0a96f5db6ff3 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2017, Microsoft Corporation. + * Copyright (c) 2025, Stefan Metzmacher + */ + +#include "smbdirect_internal.h" From df76b456280ba2c467907b9f25e1ddc8aea773f1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 15:01:28 +0200 Subject: [PATCH 006/145] smb: smbdirect: introduce smbdirect_socket_prepare_create() This will be used by client and server until we reach the point where we have only public function from an smbdirect.ko. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index 0a96f5db6ff3..421a5c2c705e 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -5,3 +5,22 @@ */ #include "smbdirect_internal.h" + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_socket_prepare_create(struct smbdirect_socket *sc, + const struct smbdirect_socket_parameters *sp, + struct workqueue_struct *workqueue) +{ + smbdirect_socket_init(sc); + + /* + * Make a copy of the callers parameters + * from here we only work on the copy + */ + sc->parameters = *sp; + + /* + * Remember the callers workqueue + */ + sc->workqueue = workqueue; +} From 036614cb738a9d092814eba48286da6e1c63f704 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 23 Oct 2025 13:40:43 +0200 Subject: [PATCH 007/145] smb: smbdirect: introduce smbdirect_socket_set_logging() This will be used by client and server in order to setup their own logging functions. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index 421a5c2c705e..6c2732496cf7 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -24,3 +24,23 @@ static void smbdirect_socket_prepare_create(struct smbdirect_socket *sc, */ sc->workqueue = workqueue; } + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_socket_set_logging(struct smbdirect_socket *sc, + void *private_ptr, + bool (*needed)(struct smbdirect_socket *sc, + void *private_ptr, + unsigned int lvl, + unsigned int cls), + void (*vaprintf)(struct smbdirect_socket *sc, + const char *func, + unsigned int line, + void *private_ptr, + unsigned int lvl, + unsigned int cls, + struct va_format *vaf)) +{ + sc->logging.private_ptr = private_ptr; + sc->logging.needed = needed; + sc->logging.vaprintf = vaprintf; +} From 67ac123e026b8b98e6d2f3f7ba3706c32ecde019 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 2 Sep 2025 12:42:51 +0200 Subject: [PATCH 008/145] smb: smbdirect: introduce smbdirect_socket_wake_up_all() This is a superset of smbd_disconnect_wake_up_all() in the client and smb_direct_disconnect_wake_up_all() in the server and will replace them. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index 6c2732496cf7..30a4e973ce77 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -44,3 +44,22 @@ static void smbdirect_socket_set_logging(struct smbdirect_socket *sc, sc->logging.needed = needed; sc->logging.vaprintf = vaprintf; } + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_socket_wake_up_all(struct smbdirect_socket *sc) +{ + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + */ + wake_up_all(&sc->status_wait); + wake_up_all(&sc->send_io.bcredits.wait_queue); + wake_up_all(&sc->send_io.lcredits.wait_queue); + wake_up_all(&sc->send_io.credits.wait_queue); + wake_up_all(&sc->send_io.pending.dec_wait_queue); + wake_up_all(&sc->send_io.pending.zero_wait_queue); + wake_up_all(&sc->recv_io.reassembly.wait_queue); + wake_up_all(&sc->rw_io.credits.wait_queue); + wake_up_all(&sc->mr_io.ready.wait_queue); + wake_up_all(&sc->mr_io.cleanup.wait_queue); +} From 1be83fad0c74b288d3664ff0677da19a997bcbf3 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 16:01:15 +0200 Subject: [PATCH 009/145] smb: smbdirect: introduce smbdirect_socket_cleanup_work() This is basically a copy of smbd_disconnect_rdma_work() and smb_direct_disconnect_rdma_work() and will replace them in the next steps. Differences is that a message is logged if first error is still 0, which makes it easier to analyze problems. And also disable any complex work from recv_io objects, currently these are not used and the work is always disabled anyway, but this prepares future changes. It also makes sure it's never used in an interrupt, which is not expected anyway... Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.c | 75 +++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index 30a4e973ce77..a851b874b819 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -6,6 +6,8 @@ #include "smbdirect_internal.h" +static void smbdirect_socket_cleanup_work(struct work_struct *work); + __maybe_unused /* this is temporary while this file is included in others */ static void smbdirect_socket_prepare_create(struct smbdirect_socket *sc, const struct smbdirect_socket_parameters *sp, @@ -23,6 +25,8 @@ static void smbdirect_socket_prepare_create(struct smbdirect_socket *sc, * Remember the callers workqueue */ sc->workqueue = workqueue; + + INIT_WORK(&sc->disconnect_work, smbdirect_socket_cleanup_work); } __maybe_unused /* this is temporary while this file is included in others */ @@ -45,7 +49,6 @@ static void smbdirect_socket_set_logging(struct smbdirect_socket *sc, sc->logging.vaprintf = vaprintf; } -__maybe_unused /* this is temporary while this file is included in others */ static void smbdirect_socket_wake_up_all(struct smbdirect_socket *sc) { /* @@ -63,3 +66,73 @@ static void smbdirect_socket_wake_up_all(struct smbdirect_socket *sc) wake_up_all(&sc->mr_io.ready.wait_queue); wake_up_all(&sc->mr_io.cleanup.wait_queue); } + +static void smbdirect_socket_cleanup_work(struct work_struct *work) +{ + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, disconnect_work); + + /* + * This should not never be called in an interrupt! + */ + WARN_ON_ONCE(in_interrupt()); + + if (!sc->first_error) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "%s called with first_error==0\n", + smbdirect_socket_status_string(sc->status)); + + sc->first_error = -ECONNABORTED; + } + + /* + * make sure this and other work is not queued again + * but here we don't block and avoid + * disable[_delayed]_work_sync() + */ + disable_work(&sc->disconnect_work); + disable_work(&sc->connect.work); + disable_work(&sc->recv_io.posted.refill_work); + disable_work(&sc->mr_io.recovery_work); + disable_work(&sc->idle.immediate_work); + disable_delayed_work(&sc->idle.timer_work); + + switch (sc->status) { + case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: + case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: + case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: + case SMBDIRECT_SOCKET_CONNECTED: + case SMBDIRECT_SOCKET_ERROR: + sc->status = SMBDIRECT_SOCKET_DISCONNECTING; + rdma_disconnect(sc->rdma.cm_id); + break; + + case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: + case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: + /* + * rdma_{accept,connect}() never reached + * RDMA_CM_EVENT_ESTABLISHED + */ + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; + break; + + case SMBDIRECT_SOCKET_DISCONNECTING: + case SMBDIRECT_SOCKET_DISCONNECTED: + case SMBDIRECT_SOCKET_DESTROYED: + break; + } + + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + */ + smbdirect_socket_wake_up_all(sc); +} From d85614860184f31153ff243ff06e34d76c22be7b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 17:25:24 +0200 Subject: [PATCH 010/145] smb: smbdirect: introduce smbdirect_socket_schedule_cleanup[{_lvl,_status}]() smbdirect_socket_schedule_cleanup() is more or less copy of smbd_disconnect_rdma_connection() and smb_direct_disconnect_rdma_connection(). It will replace them in the next steps. A difference is that the location of the first error is logged, which makes it easier to analyze problems. And also disable any complex work from recv_io objects, currently these are not used and the work is always disabled anyway, but this prepares future changes. It also gets an explicit error passed in instead of hardcoding -ECONNABORTED. Beside the main smbdirect_socket_schedule_cleanup() there are some special additions: - smbdirect_socket_schedule_cleanup_lvl(), will be used for cases where we don't want a log message with SMBDIRECT_LOG_ERR. - smbdirect_socket_schedule_cleanup_status(), will be used to specify the log level together with a direct final status, for the RDMA_CM_EVENT_DEVICE_REMOVAL and RDMA_CM_EVENT_DISCONNECTED cases where we need to avoid SMBDIRECT_SOCKET_DISCONNECTING and rdma_disconnect() in smbdirect_socket_cleanup_work(). With this we're also able to define a default for __SMBDIRECT_SOCKET_DISCONNECT() just using: smbdirect_socket_schedule_cleanup(__sc, -ECONNABORTED) Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_internal.h | 22 +++++ fs/smb/common/smbdirect/smbdirect_socket.c | 95 ++++++++++++++++++++ fs/smb/common/smbdirect/smbdirect_socket.h | 5 ++ 3 files changed, 122 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h index e593eee06481..c946e53f94cd 100644 --- a/fs/smb/common/smbdirect/smbdirect_internal.h +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -10,4 +10,26 @@ #include "smbdirect_pdu.h" #include "smbdirect_socket.h" +static void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, + const char *macro_name, + unsigned int lvl, + const char *func, + unsigned int line, + int error, + enum smbdirect_socket_status *force_status); +#define smbdirect_socket_schedule_cleanup(__sc, __error) \ + __smbdirect_socket_schedule_cleanup(__sc, \ + "smbdirect_socket_schedule_cleanup", SMBDIRECT_LOG_ERR, \ + __func__, __LINE__, __error, NULL) +#define smbdirect_socket_schedule_cleanup_lvl(__sc, __lvl, __error) \ + __smbdirect_socket_schedule_cleanup(__sc, \ + "smbdirect_socket_schedule_cleanup_lvl", __lvl, \ + __func__, __LINE__, __error, NULL) +#define smbdirect_socket_schedule_cleanup_status(__sc, __lvl, __error, __status) do { \ + enum smbdirect_socket_status __force_status = __status; \ + __smbdirect_socket_schedule_cleanup(__sc, \ + "smbdirect_socket_schedule_cleanup_status", __lvl, \ + __func__, __LINE__, __error, &__force_status); \ +} while (0) + #endif /* __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index a851b874b819..ba7e3ac32d92 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -67,6 +67,101 @@ static void smbdirect_socket_wake_up_all(struct smbdirect_socket *sc) wake_up_all(&sc->mr_io.cleanup.wait_queue); } +__maybe_unused /* this is temporary while this file is included in others */ +static void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, + const char *macro_name, + unsigned int lvl, + const char *func, + unsigned int line, + int error, + enum smbdirect_socket_status *force_status) +{ + bool was_first = false; + + if (!sc->first_error) { + ___smbdirect_log_generic(sc, func, line, + lvl, + SMBDIRECT_LOG_RDMA_EVENT, + "%s(%1pe%s%s) called from %s in line=%u status=%s\n", + macro_name, + SMBDIRECT_DEBUG_ERR_PTR(error), + force_status ? ", " : "", + force_status ? smbdirect_socket_status_string(*force_status) : "", + func, line, + smbdirect_socket_status_string(sc->status)); + if (error) + sc->first_error = error; + else + sc->first_error = -ECONNABORTED; + was_first = true; + } + + /* + * make sure other work (than disconnect_work) + * is not queued again but here we don't block and avoid + * disable[_delayed]_work_sync() + */ + disable_work(&sc->connect.work); + disable_work(&sc->recv_io.posted.refill_work); + disable_work(&sc->mr_io.recovery_work); + disable_work(&sc->idle.immediate_work); + disable_delayed_work(&sc->idle.timer_work); + + switch (sc->status) { + case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: + case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: + case SMBDIRECT_SOCKET_ERROR: + case SMBDIRECT_SOCKET_DISCONNECTING: + case SMBDIRECT_SOCKET_DISCONNECTED: + case SMBDIRECT_SOCKET_DESTROYED: + /* + * Keep the current error status + */ + break; + + case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; + break; + + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; + break; + + case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; + break; + + case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: + case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; + break; + + case SMBDIRECT_SOCKET_CREATED: + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; + break; + + case SMBDIRECT_SOCKET_CONNECTED: + sc->status = SMBDIRECT_SOCKET_ERROR; + break; + } + + if (force_status && (was_first || *force_status > sc->status)) + sc->status = *force_status; + + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + */ + smbdirect_socket_wake_up_all(sc); + + queue_work(sc->workqueue, &sc->disconnect_work); +} + static void smbdirect_socket_cleanup_work(struct work_struct *work) { struct smbdirect_socket *sc = diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 22184e53d445..44506fc5cb92 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -602,6 +602,11 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) #define SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status) \ __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, /* nothing */) +#ifndef __SMBDIRECT_SOCKET_DISCONNECT +#define __SMBDIRECT_SOCKET_DISCONNECT(__sc) \ + smbdirect_socket_schedule_cleanup(__sc, -ECONNABORTED) +#endif /* ! __SMBDIRECT_SOCKET_DISCONNECT */ + #define SMBDIRECT_CHECK_STATUS_DISCONNECT(__sc, __expected_status) \ __SMBDIRECT_CHECK_STATUS_WARN(__sc, __expected_status, \ __SMBDIRECT_SOCKET_DISCONNECT(__sc);) From 3e319f26a79afa65fc98b9ce15979c6219e1be04 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 15:01:28 +0200 Subject: [PATCH 011/145] smb: smbdirect: introduce smbdirect_connection.c to be filled Over time smbdirect_connection.c will get more and more functions which will be included in fs/smb/client/smbdirect.c and fs/smb/server/transport_rdma.c via fs/smb/common/smbdirect/smbdirect_all_c_files.c in order to allow tiny steps in the direction of moving to a few exported functions from an smbdirect.ko. That's why __maybe_unused is added for now it will be removed at the end of the road to common code. Note the Copyright (C) 2017, Microsoft Corporation is added as a lot of functions from fs/smb/client/smbdirect.c will be moved into this file soon and I don't want to forget about adding it. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_all_c_files.c | 1 + fs/smb/common/smbdirect/smbdirect_connection.c | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 fs/smb/common/smbdirect/smbdirect_connection.c diff --git a/fs/smb/common/smbdirect/smbdirect_all_c_files.c b/fs/smb/common/smbdirect/smbdirect_all_c_files.c index 269d8c28c92e..93098598fbdc 100644 --- a/fs/smb/common/smbdirect/smbdirect_all_c_files.c +++ b/fs/smb/common/smbdirect/smbdirect_all_c_files.c @@ -16,3 +16,4 @@ #error SMBDIRECT_USE_INLINE_C_FILES define needed #endif #include "smbdirect_socket.c" +#include "smbdirect_connection.c" diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c new file mode 100644 index 000000000000..0a96f5db6ff3 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2017, Microsoft Corporation. + * Copyright (c) 2025, Stefan Metzmacher + */ + +#include "smbdirect_internal.h" From bb0a49edfe1ac5d831c897e4869a167cddea835f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 19:05:01 +0200 Subject: [PATCH 012/145] smb: smbdirect: introduce smbdirect_connection_{get,put}_recv_io() These are basically copies of {get,put}_receive_buffer() in the client and they are very similar to {get_free,put}_recvmsg() in the server. The only difference to {get_free,put}_recvmsg() are the updating of the sc->statistics.*. In addition smbdirect_connection_get_recv_io() uses list_first_entry_or_null() in order to simplify the code. We also only use it on a healthy connection. smbdirect_connection_put_recv_io() uses msg->socket instead of an explicit argument. And it disables any complex_work. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 0a96f5db6ff3..96f00c342a89 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -5,3 +5,45 @@ */ #include "smbdirect_internal.h" + +__maybe_unused /* this is temporary while this file is included in others */ +static struct smbdirect_recv_io *smbdirect_connection_get_recv_io(struct smbdirect_socket *sc) +{ + struct smbdirect_recv_io *msg = NULL; + unsigned long flags; + + spin_lock_irqsave(&sc->recv_io.free.lock, flags); + if (likely(!sc->first_error)) + msg = list_first_entry_or_null(&sc->recv_io.free.list, + struct smbdirect_recv_io, + list); + if (likely(msg)) { + list_del(&msg->list); + sc->statistics.get_receive_buffer++; + } + spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); + + return msg; +} + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg) +{ + struct smbdirect_socket *sc = msg->socket; + unsigned long flags; + + if (likely(msg->sge.length != 0)) { + ib_dma_unmap_single(sc->ib.dev, + msg->sge.addr, + msg->sge.length, + DMA_FROM_DEVICE); + msg->sge.length = 0; + } + + spin_lock_irqsave(&sc->recv_io.free.lock, flags); + list_add_tail(&msg->list, &sc->recv_io.free.list); + sc->statistics.put_receive_buffer++; + spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); + + queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); +} From b90169bcb2a6f46b1b5d7a17d5fd15a64ab552ff Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 27 Aug 2025 16:39:22 +0200 Subject: [PATCH 013/145] smb: smbdirect: introduce smbdirect_connection_reassembly_{append,first}_recv_io() These are basically copies of enqueue_reassembly() and [_]get_first_reassembly() of both client and server. The only difference is that enqueue_reassembly() of the server does not have: sc->statistics.enqueue_reassembly_queue++ Also smbdirect_connection_reassembly_first_recv_io() makes use of list_first_entry_or_null() in order to simplify the code. In the next commits they will replace the existing functions. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 96f00c342a89..ce10aff54349 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -47,3 +47,38 @@ static void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg) queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); } + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_reassembly_append_recv_io(struct smbdirect_socket *sc, + struct smbdirect_recv_io *msg, + u32 data_length) +{ + unsigned long flags; + + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); + list_add_tail(&msg->list, &sc->recv_io.reassembly.list); + sc->recv_io.reassembly.queue_length++; + /* + * Make sure reassembly_data_length is updated after list and + * reassembly_queue_length are updated. On the dequeue side + * reassembly_data_length is checked without a lock to determine + * if reassembly_queue_length and list is up to date + */ + virt_wmb(); + sc->recv_io.reassembly.data_length += data_length; + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); + sc->statistics.enqueue_reassembly_queue++; +} + +__maybe_unused /* this is temporary while this file is included in others */ +static struct smbdirect_recv_io * +smbdirect_connection_reassembly_first_recv_io(struct smbdirect_socket *sc) +{ + struct smbdirect_recv_io *msg; + + msg = list_first_entry_or_null(&sc->recv_io.reassembly.list, + struct smbdirect_recv_io, + list); + + return msg; +} From 1593f5d004f5f3812ba175f4253fba07853db24e Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 27 Aug 2025 17:15:55 +0200 Subject: [PATCH 014/145] smb: smbdirect: introduce smbdirect_connection_idle_timer_work() This is basically a copy of idle_connection_timer() in the client and smb_direct_idle_connection_timer() in the server. The only difference is that the server does not have logging. Currently the callers set their own timer function after smbdirect_socket_prepare_create(), but that will change in the next steps... Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 31 +++++++++++++++++++ fs/smb/common/smbdirect/smbdirect_internal.h | 2 ++ fs/smb/common/smbdirect/smbdirect_socket.c | 3 +- 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index ce10aff54349..6e4b7aa2440d 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -82,3 +82,34 @@ smbdirect_connection_reassembly_first_recv_io(struct smbdirect_socket *sc) return msg; } + +static void smbdirect_connection_idle_timer_work(struct work_struct *work) +{ + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, idle.timer_work.work); + const struct smbdirect_socket_parameters *sp = &sc->parameters; + + if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { + smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_ERR, + "%s => timeout sc->idle.keepalive=%s\n", + smbdirect_socket_status_string(sc->status), + sc->idle.keepalive == SMBDIRECT_KEEPALIVE_SENT ? + "SENT" : "PENDING"); + smbdirect_socket_schedule_cleanup(sc, -ETIMEDOUT); + return; + } + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) + return; + + /* + * Now use the keepalive timeout (instead of keepalive interval) + * in order to wait for a response + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_timeout_msec)); + smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, + "schedule send of empty idle message\n"); + queue_work(sc->workqueue, &sc->idle.immediate_work); +} diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h index c946e53f94cd..2d7c69f71ee0 100644 --- a/fs/smb/common/smbdirect/smbdirect_internal.h +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -32,4 +32,6 @@ static void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, __func__, __LINE__, __error, &__force_status); \ } while (0) +static void smbdirect_connection_idle_timer_work(struct work_struct *work); + #endif /* __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index ba7e3ac32d92..34971c2700ee 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -27,6 +27,8 @@ static void smbdirect_socket_prepare_create(struct smbdirect_socket *sc, sc->workqueue = workqueue; INIT_WORK(&sc->disconnect_work, smbdirect_socket_cleanup_work); + + INIT_DELAYED_WORK(&sc->idle.timer_work, smbdirect_connection_idle_timer_work); } __maybe_unused /* this is temporary while this file is included in others */ @@ -67,7 +69,6 @@ static void smbdirect_socket_wake_up_all(struct smbdirect_socket *sc) wake_up_all(&sc->mr_io.cleanup.wait_queue); } -__maybe_unused /* this is temporary while this file is included in others */ static void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, const char *macro_name, unsigned int lvl, From 8e342e68992e93db0c999f892e7aa1eb35c67709 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 28 Aug 2025 13:38:29 +0200 Subject: [PATCH 015/145] smb: smbdirect: set SMBDIRECT_KEEPALIVE_NONE before disable_delayed_work(&sc->idle.timer_work); This avoids a potential confusing log message from smbdirect_connection_idle_timer_work() if it's already running. This is a very small race windows and not really needed, but it feels better when reading the code. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index 34971c2700ee..b5ce5f6443f6 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -106,6 +106,7 @@ static void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, disable_work(&sc->recv_io.posted.refill_work); disable_work(&sc->mr_io.recovery_work); disable_work(&sc->idle.immediate_work); + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; disable_delayed_work(&sc->idle.timer_work); switch (sc->status) { @@ -191,6 +192,7 @@ static void smbdirect_socket_cleanup_work(struct work_struct *work) disable_work(&sc->recv_io.posted.refill_work); disable_work(&sc->mr_io.recovery_work); disable_work(&sc->idle.immediate_work); + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; disable_delayed_work(&sc->idle.timer_work); switch (sc->status) { From 66a840b3ba538142fec5895cb197b1ec9f3a717c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 28 Aug 2025 16:45:23 +0200 Subject: [PATCH 016/145] smb: smbdirect: introduce smbdirect_frwr_is_supported() This will replace frwr_is_supported() on the client and rdma_frwr_is_supported() on the server. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index b5ce5f6443f6..05a284526aa2 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -6,6 +6,22 @@ #include "smbdirect_internal.h" +__maybe_unused /* this is temporary while this file is included in others */ +static bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs) +{ + /* + * Test if FRWR (Fast Registration Work Requests) is supported on the + * device This implementation requires FRWR on RDMA read/write return + * value: true if it is supported + */ + + if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) + return false; + if (attrs->max_fast_reg_page_list_len == 0) + return false; + return true; +} + static void smbdirect_socket_cleanup_work(struct work_struct *work); __maybe_unused /* this is temporary while this file is included in others */ From 64d6bd25339bb0820556af6a46e41a23a34a2ed3 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 11 Sep 2025 19:41:53 +0200 Subject: [PATCH 017/145] smb: smbdirect: introduce smbdirect_socket.{send,recv}_io.mem.gfp_mask This will allow common code to be split out while still using the gfp_mask currently used. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 44506fc5cb92..ef0c48814311 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -158,8 +158,9 @@ struct smbdirect_socket { * smbdirect_send_io buffers */ struct { - struct kmem_cache *cache; - mempool_t *pool; + struct kmem_cache *cache; + mempool_t *pool; + gfp_t gfp_mask; } mem; /* @@ -223,8 +224,9 @@ struct smbdirect_socket { * smbdirect_recv_io buffers */ struct { - struct kmem_cache *cache; - mempool_t *pool; + struct kmem_cache *cache; + mempool_t *pool; + gfp_t gfp_mask; } mem; /* @@ -505,6 +507,8 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work); disable_delayed_work_sync(&sc->idle.timer_work); + sc->send_io.mem.gfp_mask = GFP_KERNEL; + atomic_set(&sc->send_io.bcredits.count, 0); init_waitqueue_head(&sc->send_io.bcredits.wait_queue); @@ -518,6 +522,8 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) init_waitqueue_head(&sc->send_io.pending.dec_wait_queue); init_waitqueue_head(&sc->send_io.pending.zero_wait_queue); + sc->recv_io.mem.gfp_mask = GFP_KERNEL; + INIT_LIST_HEAD(&sc->recv_io.free.list); spin_lock_init(&sc->recv_io.free.lock); From 8fde1963386a2ba1b7e57a347a00fd8b98cd07d3 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 11 Sep 2025 19:49:18 +0200 Subject: [PATCH 018/145] smb: smbdirect: introduce smbdirect_connection_{alloc,free}_send_io() These are more or less copies of smb_direct_{alloc,free}_sendmsg() in the server. The only difference is that we use ib_dma_unmap_page() for all sges, this simplifies the logic and doesn't matter as ib_dma_unmap_single() and ib_dma_unmap_page() both operate on dma_addr_t and dma_unmap_single_attrs() is just an alias for dma_unmap_page_attrs(). We already have in inconsistency like that in the client code where we use ib_dma_unmap_single(), while we mapped using ib_dma_map_page(). The new functions will replace the existing once in the next commits and will also be used in the client. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 6e4b7aa2440d..0b35840ad4f2 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -6,6 +6,53 @@ #include "smbdirect_internal.h" +__maybe_unused /* this is temporary while this file is included in others */ +static struct smbdirect_send_io *smbdirect_connection_alloc_send_io(struct smbdirect_socket *sc) +{ + struct smbdirect_send_io *msg; + + msg = mempool_alloc(sc->send_io.mem.pool, sc->send_io.mem.gfp_mask); + if (!msg) + return ERR_PTR(-ENOMEM); + msg->socket = sc; + INIT_LIST_HEAD(&msg->sibling_list); + msg->num_sge = 0; + + return msg; +} + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_free_send_io(struct smbdirect_send_io *msg) +{ + struct smbdirect_socket *sc = msg->socket; + size_t i; + + /* + * The list needs to be empty! + * The caller should take care of it. + */ + WARN_ON_ONCE(!list_empty(&msg->sibling_list)); + + /* + * Note we call ib_dma_unmap_page(), even if some sges are mapped using + * ib_dma_map_single(). + * + * The difference between _single() and _page() only matters for the + * ib_dma_map_*() case. + * + * For the ib_dma_unmap_*() case it does not matter as both take the + * dma_addr_t and dma_unmap_single_attrs() is just an alias to + * dma_unmap_page_attrs(). + */ + for (i = 0; i < msg->num_sge; i++) + ib_dma_unmap_page(sc->ib.dev, + msg->sge[i].addr, + msg->sge[i].length, + DMA_TO_DEVICE); + + mempool_free(msg, sc->send_io.mem.pool); +} + __maybe_unused /* this is temporary while this file is included in others */ static struct smbdirect_recv_io *smbdirect_connection_get_recv_io(struct smbdirect_socket *sc) { From c81459bddbf758e1b9915f0c6d00d9f18ce21f49 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 11 Sep 2025 20:45:09 +0200 Subject: [PATCH 019/145] smb: smbdirect: introduce smbdirect_connection_send_io_done() This is a combination of send_done() of client and server. It will replace both... Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 0b35840ad4f2..573dc278ca71 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -160,3 +160,73 @@ static void smbdirect_connection_idle_timer_work(struct work_struct *work) "schedule send of empty idle message\n"); queue_work(sc->workqueue, &sc->idle.immediate_work); } + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct smbdirect_send_io *msg = + container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); + struct smbdirect_socket *sc = msg->socket; + struct smbdirect_send_io *sibling, *next; + int lcredits = 0; + + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_INFO, + "smbdirect_send_io completed. status='%s (%d)', opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, wc->opcode); + + if (unlikely(!(msg->wr.send_flags & IB_SEND_SIGNALED))) { + /* + * This happens when smbdirect_send_io is a sibling + * before the final message, it is signaled on + * error anyway, so we need to skip + * smbdirect_connection_free_send_io here, + * otherwise is will destroy the memory + * of the siblings too, which will cause + * use after free problems for the others + * triggered from ib_drain_qp(). + */ + if (wc->status != IB_WC_SUCCESS) + goto skip_free; + + /* + * This should not happen! + * But we better just close the + * connection... + */ + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, + "unexpected send completion wc->status=%s (%d) wc->opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, wc->opcode); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + /* + * Free possible siblings and then the main send_io + */ + list_for_each_entry_safe(sibling, next, &msg->sibling_list, sibling_list) { + list_del_init(&sibling->sibling_list); + smbdirect_connection_free_send_io(sibling); + lcredits += 1; + } + /* Note this frees wc->wr_cqe, but not wc */ + smbdirect_connection_free_send_io(msg); + lcredits += 1; + + if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_SEND))) { +skip_free: + if (wc->status != IB_WC_WR_FLUSH_ERR) + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, + "wc->status=%s (%d) wc->opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, wc->opcode); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + atomic_add(lcredits, &sc->send_io.lcredits.count); + wake_up(&sc->send_io.lcredits.wait_queue); + + if (atomic_dec_and_test(&sc->send_io.pending.count)) + wake_up(&sc->send_io.pending.zero_wait_queue); + + wake_up(&sc->send_io.pending.dec_wait_queue); +} From d674665d514e4cb58455a7380c5a927ea2859585 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 12 Sep 2025 21:35:11 +0200 Subject: [PATCH 020/145] smb: smbdirect: introduce smbdirect_connection_{create,destroy}_mem_pools() This is based on smb_direct_{create,destroy}_pools() in the server. But it doesn't use smbdirect_connection_get_recv_io() on cleanup, instead it uses list_for_each_entry_safe()... It also keep some logic to allow userspace access to smbdirect_recv_io payload, which is needed for the client code. But it exposes the whole payload including the smbdirect_data_transfer header as documentation says data_offset = 0 and data_length != 0 would be valid, while the existing client code requires data_offset >= 24. This should replace the related server functions and also be used on the client. It also abstracts recv_io.mem.gfp_mask in order to allow server to keep using __GFP_RETRY_MAYFAIL. It also uses struct kmem_cache_args consistently as that's the currently preferred version of kmem_cache_create(). And it makes use of the mempool_create_slab_pool() helper. And it uses list_add_tail() just to let me feel better when looking at the code... Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 573dc278ca71..8290e45464e3 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -6,6 +6,113 @@ #include "smbdirect_internal.h" +static void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc); + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_connection_create_mem_pools(struct smbdirect_socket *sc) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + char name[80]; + size_t i; + + /* + * We use sizeof(struct smbdirect_negotiate_resp) for the + * payload size as it is larger as + * sizeof(struct smbdirect_data_transfer). + * + * This will fit client and server usage for now. + */ + snprintf(name, sizeof(name), "smbdirect_send_io_cache_%p", sc); + struct kmem_cache_args send_io_args = { + .align = __alignof__(struct smbdirect_send_io), + }; + sc->send_io.mem.cache = kmem_cache_create(name, + sizeof(struct smbdirect_send_io) + + sizeof(struct smbdirect_negotiate_resp), + &send_io_args, + SLAB_HWCACHE_ALIGN); + if (!sc->send_io.mem.cache) + goto err; + + sc->send_io.mem.pool = mempool_create_slab_pool(sp->send_credit_target, + sc->send_io.mem.cache); + if (!sc->send_io.mem.pool) + goto err; + + /* + * A payload size of sp->max_recv_size should fit + * any message. + * + * For smbdirect_data_transfer messages the whole + * buffer might be exposed to userspace + * (currently on the client side...) + * The documentation says data_offset = 0 would be + * strange but valid. + */ + snprintf(name, sizeof(name), "smbdirect_recv_io_cache_%p", sc); + struct kmem_cache_args recv_io_args = { + .align = __alignof__(struct smbdirect_recv_io), + .useroffset = sizeof(struct smbdirect_recv_io), + .usersize = sp->max_recv_size, + }; + sc->recv_io.mem.cache = kmem_cache_create(name, + sizeof(struct smbdirect_recv_io) + + sp->max_recv_size, + &recv_io_args, + SLAB_HWCACHE_ALIGN); + if (!sc->recv_io.mem.cache) + goto err; + + sc->recv_io.mem.pool = mempool_create_slab_pool(sp->recv_credit_max, + sc->recv_io.mem.cache); + if (!sc->recv_io.mem.pool) + goto err; + + for (i = 0; i < sp->recv_credit_max; i++) { + struct smbdirect_recv_io *recv_io; + + recv_io = mempool_alloc(sc->recv_io.mem.pool, + sc->recv_io.mem.gfp_mask); + if (!recv_io) + goto err; + recv_io->socket = sc; + recv_io->sge.length = 0; + list_add_tail(&recv_io->list, &sc->recv_io.free.list); + } + + return 0; +err: + smbdirect_connection_destroy_mem_pools(sc); + return -ENOMEM; +} + +static void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc) +{ + struct smbdirect_recv_io *recv_io, *next_io; + + list_for_each_entry_safe(recv_io, next_io, &sc->recv_io.free.list, list) { + list_del(&recv_io->list); + mempool_free(recv_io, sc->recv_io.mem.pool); + } + + /* + * Note mempool_destroy() and kmem_cache_destroy() + * work fine with a NULL pointer + */ + + mempool_destroy(sc->recv_io.mem.pool); + sc->recv_io.mem.pool = NULL; + + kmem_cache_destroy(sc->recv_io.mem.cache); + sc->recv_io.mem.cache = NULL; + + mempool_destroy(sc->send_io.mem.pool); + sc->send_io.mem.pool = NULL; + + kmem_cache_destroy(sc->send_io.mem.cache); + sc->send_io.mem.cache = NULL; +} + __maybe_unused /* this is temporary while this file is included in others */ static struct smbdirect_send_io *smbdirect_connection_alloc_send_io(struct smbdirect_socket *sc) { From 8a4d6c0d4fc4a138c7569e081389f163d2cee389 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 15 Sep 2025 01:41:44 +0200 Subject: [PATCH 021/145] smb: smbdirect: introduce smbdirect_map_sges_from_iter() and helper functions These are basically copies of smb_extract_iter_to_rdma() and its helpers in the client, which will be replaced in the next steps. The goal is to use them also in the server, which will simplify a lot. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: David Howells Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 255 ++++++++++++++++++ 1 file changed, 255 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 8290e45464e3..3483fab39eb8 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -5,6 +5,19 @@ */ #include "smbdirect_internal.h" +#include + +struct smbdirect_map_sges { + struct ib_sge *sge; + size_t num_sge; + size_t max_sge; + struct ib_device *device; + u32 local_dma_lkey; + enum dma_data_direction direction; +}; + +static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len, + struct smbdirect_map_sges *state); static void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc); @@ -337,3 +350,245 @@ static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc wake_up(&sc->send_io.pending.dec_wait_queue); } + +static bool smbdirect_map_sges_single_page(struct smbdirect_map_sges *state, + struct page *page, size_t off, size_t len) +{ + struct ib_sge *sge; + u64 addr; + + if (state->num_sge >= state->max_sge) + return false; + + addr = ib_dma_map_page(state->device, page, + off, len, state->direction); + if (ib_dma_mapping_error(state->device, addr)) + return false; + + sge = &state->sge[state->num_sge++]; + sge->addr = addr; + sge->length = len; + sge->lkey = state->local_dma_lkey; + + return true; +} + +/* + * Extract page fragments from a BVEC-class iterator and add them to an ib_sge + * list. The pages are not pinned. + */ +static ssize_t smbdirect_map_sges_from_bvec(struct iov_iter *iter, + struct smbdirect_map_sges *state, + ssize_t maxsize) +{ + const struct bio_vec *bv = iter->bvec; + unsigned long start = iter->iov_offset; + unsigned int i; + ssize_t ret = 0; + + for (i = 0; i < iter->nr_segs; i++) { + size_t off, len; + bool ok; + + len = bv[i].bv_len; + if (start >= len) { + start -= len; + continue; + } + + len = min_t(size_t, maxsize, len - start); + off = bv[i].bv_offset + start; + + ok = smbdirect_map_sges_single_page(state, + bv[i].bv_page, + off, + len); + if (!ok) + return -EIO; + + ret += len; + maxsize -= len; + if (state->num_sge >= state->max_sge || maxsize <= 0) + break; + start = 0; + } + + if (ret > 0) + iov_iter_advance(iter, ret); + return ret; +} + +/* + * Extract fragments from a KVEC-class iterator and add them to an ib_sge list. + * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers. + * The pages are not pinned. + */ +static ssize_t smbdirect_map_sges_from_kvec(struct iov_iter *iter, + struct smbdirect_map_sges *state, + ssize_t maxsize) +{ + const struct kvec *kv = iter->kvec; + unsigned long start = iter->iov_offset; + unsigned int i; + ssize_t ret = 0; + + for (i = 0; i < iter->nr_segs; i++) { + struct page *page; + unsigned long kaddr; + size_t off, len, seg; + + len = kv[i].iov_len; + if (start >= len) { + start -= len; + continue; + } + + kaddr = (unsigned long)kv[i].iov_base + start; + off = kaddr & ~PAGE_MASK; + len = min_t(size_t, maxsize, len - start); + kaddr &= PAGE_MASK; + + maxsize -= len; + do { + bool ok; + + seg = min_t(size_t, len, PAGE_SIZE - off); + + if (is_vmalloc_or_module_addr((void *)kaddr)) + page = vmalloc_to_page((void *)kaddr); + else + page = virt_to_page((void *)kaddr); + + ok = smbdirect_map_sges_single_page(state, page, off, seg); + if (!ok) + return -EIO; + + ret += seg; + len -= seg; + kaddr += PAGE_SIZE; + off = 0; + } while (len > 0 && state->num_sge < state->max_sge); + + if (state->num_sge >= state->max_sge || maxsize <= 0) + break; + start = 0; + } + + if (ret > 0) + iov_iter_advance(iter, ret); + return ret; +} + +/* + * Extract folio fragments from a FOLIOQ-class iterator and add them to an + * ib_sge list. The folios are not pinned. + */ +static ssize_t smbdirect_map_sges_from_folioq(struct iov_iter *iter, + struct smbdirect_map_sges *state, + ssize_t maxsize) +{ + const struct folio_queue *folioq = iter->folioq; + unsigned int slot = iter->folioq_slot; + ssize_t ret = 0; + size_t offset = iter->iov_offset; + + if (WARN_ON_ONCE(!folioq)) + return -EIO; + + if (slot >= folioq_nr_slots(folioq)) { + folioq = folioq->next; + if (WARN_ON_ONCE(!folioq)) + return -EIO; + slot = 0; + } + + do { + struct folio *folio = folioq_folio(folioq, slot); + size_t fsize = folioq_folio_size(folioq, slot); + + if (offset < fsize) { + size_t part = umin(maxsize, fsize - offset); + bool ok; + + ok = smbdirect_map_sges_single_page(state, + folio_page(folio, 0), + offset, + part); + if (!ok) + return -EIO; + + offset += part; + ret += part; + maxsize -= part; + } + + if (offset >= fsize) { + offset = 0; + slot++; + if (slot >= folioq_nr_slots(folioq)) { + if (!folioq->next) { + WARN_ON_ONCE(ret < iter->count); + break; + } + folioq = folioq->next; + slot = 0; + } + } + } while (state->num_sge < state->max_sge && maxsize > 0); + + iter->folioq = folioq; + iter->folioq_slot = slot; + iter->iov_offset = offset; + iter->count -= ret; + return ret; +} + +/* + * Extract page fragments from up to the given amount of the source iterator + * and build up an ib_sge list that refers to all of those bits. The ib_sge list + * is appended to, up to the maximum number of elements set in the parameter + * block. + * + * The extracted page fragments are not pinned or ref'd in any way; if an + * IOVEC/UBUF-type iterator is to be used, it should be converted to a + * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some + * way. + */ +__maybe_unused /* this is temporary while this file is included in others */ +static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len, + struct smbdirect_map_sges *state) +{ + ssize_t ret; + size_t before = state->num_sge; + + if (WARN_ON_ONCE(iov_iter_rw(iter) != ITER_SOURCE)) + return -EIO; + + switch (iov_iter_type(iter)) { + case ITER_BVEC: + ret = smbdirect_map_sges_from_bvec(iter, state, len); + break; + case ITER_KVEC: + ret = smbdirect_map_sges_from_kvec(iter, state, len); + break; + case ITER_FOLIOQ: + ret = smbdirect_map_sges_from_folioq(iter, state, len); + break; + default: + WARN_ONCE(1, "iov_iter_type[%u]\n", iov_iter_type(iter)); + return -EIO; + } + + if (ret < 0) { + while (state->num_sge > before) { + struct ib_sge *sge = &state->sge[state->num_sge--]; + + ib_dma_unmap_page(state->device, + sge->addr, + sge->length, + state->direction); + } + } + + return ret; +} From 0bac604158750d76d30f26d203242c11dd9efcfb Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 15 Sep 2025 07:50:01 +0200 Subject: [PATCH 022/145] smb: smbdirect: introduce smbdirect_connection_qp_event_handler() This is basically a copy of smbd_qp_async_error_upcall() in the client and smb_direct_qpair_handler() in the server. They will be replaced by the new common function soon, which will allow more code to be moved as well. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 3483fab39eb8..ac3f2341320b 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -19,6 +19,31 @@ struct smbdirect_map_sges { static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len, struct smbdirect_map_sges *state); +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_qp_event_handler(struct ib_event *event, void *context) +{ + struct smbdirect_socket *sc = context; + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "%s on device %.*s socket %p (cm_id=%p) status %s first_error %1pe\n", + ib_event_msg(event->event), + IB_DEVICE_NAME_MAX, + event->device->name, + sc, sc->rdma.cm_id, + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); + + switch (event->event) { + case IB_EVENT_CQ_ERR: + case IB_EVENT_QP_FATAL: + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + break; + + default: + break; + } +} + static void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc); __maybe_unused /* this is temporary while this file is included in others */ From 71c4b615daffe85dba6f181d4200da57d4550480 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 17 Sep 2025 06:19:46 +0200 Subject: [PATCH 023/145] smb: smbdirect: introduce smbdirect_connection_negotiate_rdma_resources() This is a copy of the same logic used in client and server, it's inlined there, but they will use the new helper function soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index ac3f2341320b..5308bdd4797e 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -275,6 +275,68 @@ smbdirect_connection_reassembly_first_recv_io(struct smbdirect_socket *sc) return msg; } +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socket *sc, + u8 peer_initiator_depth, + u8 peer_responder_resources, + const struct rdma_conn_param *param) +{ + struct smbdirect_socket_parameters *sp = &sc->parameters; + + if (rdma_protocol_iwarp(sc->ib.dev, sc->rdma.cm_id->port_num) && + param->private_data_len == 8) { + /* + * Legacy clients with only iWarp MPA v1 support + * need a private blob in order to negotiate + * the IRD/ORD values. + */ + const __be32 *ird_ord_hdr = param->private_data; + u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); + u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); + + /* + * cifs.ko sends the legacy IRD/ORD negotiation + * event if iWarp MPA v2 was used. + * + * Here we check that the values match and only + * mark the client as legacy if they don't match. + */ + if ((u32)param->initiator_depth != ird32 || + (u32)param->responder_resources != ord32) { + /* + * There are broken clients (old cifs.ko) + * using little endian and also + * struct rdma_conn_param only uses u8 + * for initiator_depth and responder_resources, + * so we truncate the value to U8_MAX. + * + * smb_direct_accept_client() will then + * do the real negotiation in order to + * select the minimum between client and + * server. + */ + ird32 = min_t(u32, ird32, U8_MAX); + ord32 = min_t(u32, ord32, U8_MAX); + + sc->rdma.legacy_iwarp = true; + peer_initiator_depth = (u8)ird32; + peer_responder_resources = (u8)ord32; + } + } + + /* + * negotiate the value by using the minimum + * between client and server if the client provided + * non 0 values. + */ + if (peer_initiator_depth != 0) + sp->initiator_depth = min_t(u8, sp->initiator_depth, + peer_initiator_depth); + if (peer_responder_resources != 0) + sp->responder_resources = min_t(u8, sp->responder_resources, + peer_responder_resources); +} + static void smbdirect_connection_idle_timer_work(struct work_struct *work) { struct smbdirect_socket *sc = From 6073eb3e31756d569c4853fb22724525739d0e0c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 16 Sep 2025 01:10:00 +0200 Subject: [PATCH 024/145] smb: smbdirect: introduce smbdirect_connection_{create,destroy}_qp() smbdirect_connection_create_qp() is basically a copy of smb_direct_create_qpair() in the server, it just adds extra send_wr space for MR requests. smbdirect_connection_destroy_qp() is the cleanup code smb_direct_create_qpair() has, plus calling ib_drain_qp(), it be a no-op if no requests are posted. These additions allow the functions to be used by client and server. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 214 ++++++++++++++++++ fs/smb/common/smbdirect/smbdirect_socket.h | 3 + 2 files changed, 217 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 5308bdd4797e..3b0cbbece44c 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -44,6 +44,220 @@ static void smbdirect_connection_qp_event_handler(struct ib_event *event, void * } } +static u32 smbdirect_rdma_rw_send_wrs(struct ib_device *dev, + const struct ib_qp_init_attr *attr) +{ + /* + * This could be split out of rdma_rw_init_qp() + * and be a helper function next to rdma_rw_mr_factor() + * + * We can't check unlikely(rdma_rw_force_mr) here, + * but that is most likely 0 anyway. + */ + u32 factor; + + WARN_ON_ONCE(attr->port_num == 0); + + /* + * Each context needs at least one RDMA READ or WRITE WR. + * + * For some hardware we might need more, eventually we should ask the + * HCA driver for a multiplier here. + */ + factor = 1; + + /* + * If the device needs MRs to perform RDMA READ or WRITE operations, + * we'll need two additional MRs for the registrations and the + * invalidation. + */ + if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd) + factor += 2; /* inv + reg */ + + return factor * attr->cap.max_rdma_ctxs; +} + +static void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc); + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_connection_create_qp(struct smbdirect_socket *sc) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + struct ib_qp_init_attr qp_attr; + struct ib_qp_cap qp_cap; + u32 rdma_send_wr; + u32 max_send_wr; + int ret; + + /* + * Note that {rdma,ib}_create_qp() will call + * rdma_rw_init_qp() if max_rdma_ctxs is not 0. + * It will adjust max_send_wr to the required + * number of additional WRs for the RDMA RW operations. + * It will cap max_send_wr to the device limit. + * + * We use allocate sp->responder_resources * 2 MRs + * and each MR needs WRs for REG and INV, so + * we use '* 4'. + * + * +1 for ib_drain_qp() + */ + memset(&qp_cap, 0, sizeof(qp_cap)); + qp_cap.max_send_wr = sp->send_credit_target + sp->responder_resources * 4 + 1; + qp_cap.max_recv_wr = sp->recv_credit_max + 1; + qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; + qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; + qp_cap.max_inline_data = 0; + qp_cap.max_rdma_ctxs = sc->rw_io.credits.max; + + /* + * Find out the number of max_send_wr + * after rdma_rw_init_qp() adjusted it. + * + * We only do it on a temporary variable, + * as rdma_create_qp() will trigger + * rdma_rw_init_qp() again. + */ + memset(&qp_attr, 0, sizeof(qp_attr)); + qp_attr.cap = qp_cap; + qp_attr.port_num = sc->rdma.cm_id->port_num; + rdma_send_wr = smbdirect_rdma_rw_send_wrs(sc->ib.dev, &qp_attr); + max_send_wr = qp_cap.max_send_wr + rdma_send_wr; + + if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe || + qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) { + pr_err("Possible CQE overrun: max_send_wr %d\n", + qp_cap.max_send_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering send_credit_target = %d\n", + sp->send_credit_target); + return -EINVAL; + } + + if (qp_cap.max_rdma_ctxs && + (max_send_wr >= sc->ib.dev->attrs.max_cqe || + max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) { + pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n", + rdma_send_wr, qp_cap.max_send_wr, max_send_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n", + sp->send_credit_target, qp_cap.max_rdma_ctxs); + return -EINVAL; + } + + if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe || + qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) { + pr_err("Possible CQE overrun: max_recv_wr %d\n", + qp_cap.max_recv_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering receive_credit_max = %d\n", + sp->recv_credit_max); + return -EINVAL; + } + + if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge || + qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) { + pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_send_sge, + sc->ib.dev->attrs.max_recv_sge); + return -EINVAL; + } + + sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); + if (IS_ERR(sc->ib.pd)) { + pr_err("Can't create RDMA PD: %1pe\n", sc->ib.pd); + ret = PTR_ERR(sc->ib.pd); + sc->ib.pd = NULL; + return ret; + } + + sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, + max_send_wr, + sc->ib.poll_ctx); + if (IS_ERR(sc->ib.send_cq)) { + pr_err("Can't create RDMA send CQ: %1pe\n", sc->ib.send_cq); + ret = PTR_ERR(sc->ib.send_cq); + sc->ib.send_cq = NULL; + goto err; + } + + sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, + qp_cap.max_recv_wr, + sc->ib.poll_ctx); + if (IS_ERR(sc->ib.recv_cq)) { + pr_err("Can't create RDMA recv CQ: %1pe\n", sc->ib.recv_cq); + ret = PTR_ERR(sc->ib.recv_cq); + sc->ib.recv_cq = NULL; + goto err; + } + + /* + * We reset completely here! + * As the above use was just temporary + * to calc max_send_wr and rdma_send_wr. + * + * rdma_create_qp() will trigger rdma_rw_init_qp() + * again if max_rdma_ctxs is not 0. + */ + memset(&qp_attr, 0, sizeof(qp_attr)); + qp_attr.event_handler = smbdirect_connection_qp_event_handler; + qp_attr.qp_context = sc; + qp_attr.cap = qp_cap; + qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + qp_attr.qp_type = IB_QPT_RC; + qp_attr.send_cq = sc->ib.send_cq; + qp_attr.recv_cq = sc->ib.recv_cq; + qp_attr.port_num = ~0; + + ret = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); + if (ret) { + pr_err("Can't create RDMA QP: %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto err; + } + sc->ib.qp = sc->rdma.cm_id->qp; + + return 0; +err: + smbdirect_connection_destroy_qp(sc); + return ret; +} + +static void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc) +{ + if (sc->ib.qp) { + ib_drain_qp(sc->ib.qp); + sc->ib.qp = NULL; + rdma_destroy_qp(sc->rdma.cm_id); + } + if (sc->ib.recv_cq) { + ib_destroy_cq(sc->ib.recv_cq); + sc->ib.recv_cq = NULL; + } + if (sc->ib.send_cq) { + ib_destroy_cq(sc->ib.send_cq); + sc->ib.send_cq = NULL; + } + if (sc->ib.pd) { + ib_dealloc_pd(sc->ib.pd); + sc->ib.pd = NULL; + } +} + static void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc); __maybe_unused /* this is temporary while this file is included in others */ diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index ef0c48814311..874d4d1a56a0 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -120,6 +120,7 @@ struct smbdirect_socket { /* IB verbs related */ struct { struct ib_pd *pd; + enum ib_poll_context poll_ctx; struct ib_cq *send_cq; struct ib_cq *recv_cq; @@ -498,6 +499,8 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work); disable_work_sync(&sc->disconnect_work); + sc->ib.poll_ctx = IB_POLL_UNBOUND_WORKQUEUE; + spin_lock_init(&sc->connect.lock); INIT_WORK(&sc->connect.work, __smbdirect_socket_disabled_work); disable_work_sync(&sc->connect.work); From dd1960ab384e9188a3d1f7db4ac8276f3edec13e Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 17 Sep 2025 09:13:25 +0200 Subject: [PATCH 025/145] smb: smbdirect: introduce smbdirect_connection_post_recv_io() This is basically a copy of smbd_post_recv() in the client and smb_direct_post_recv() in the server. The only difference is that this returns early if the connection is already broken. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 3b0cbbece44c..81b42eaec8b1 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -652,6 +652,48 @@ static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc wake_up(&sc->send_io.pending.dec_wait_queue); } +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg) +{ + struct smbdirect_socket *sc = msg->socket; + const struct smbdirect_socket_parameters *sp = &sc->parameters; + struct ib_recv_wr recv_wr = { + .wr_cqe = &msg->cqe, + .sg_list = &msg->sge, + .num_sge = 1, + }; + int ret; + + if (unlikely(sc->first_error)) + return sc->first_error; + + msg->sge.addr = ib_dma_map_single(sc->ib.dev, + msg->packet, + sp->max_recv_size, + DMA_FROM_DEVICE); + ret = ib_dma_mapping_error(sc->ib.dev, msg->sge.addr); + if (ret) + return ret; + + msg->sge.length = sp->max_recv_size; + msg->sge.lkey = sc->ib.pd->local_dma_lkey; + + ret = ib_post_recv(sc->ib.qp, &recv_wr, NULL); + if (ret) { + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, + "ib_post_recv failed ret=%d (%1pe)\n", + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); + ib_dma_unmap_single(sc->ib.dev, + msg->sge.addr, + msg->sge.length, + DMA_FROM_DEVICE); + msg->sge.length = 0; + smbdirect_socket_schedule_cleanup(sc, ret); + } + + return ret; +} + static bool smbdirect_map_sges_single_page(struct smbdirect_map_sges *state, struct page *page, size_t off, size_t len) { From 2b41feecdfdf8364242fb98d9dc4e52147b72f1e Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 18 Sep 2025 08:23:03 +0200 Subject: [PATCH 026/145] smb: smbdirect: introduce smbdirect_connection_recv_io_refill_work() This is basically a copy of smbd_post_send_credits() in the client and smb_direct_post_recv_credits() in the server. There are several improvements compared to the existing functions: 1. We calculate the number of missing posted buffers by getting the difference between recv_io.credits.target and recv_io.posted.count. Instead of the difference between recv_io.credits.target and recv_io.credits.count, because recv_io.credits.count is only updated once a message is send to the peer. It was not really a problem before, because we have a fixed number smbdirect_recv_io buffers, so the loop terminated when smbdirect_connection_get_recv_io() returns NULL. But using recv_io.posted.count makes it easier to understand. 2. In order to tell the peer about the newly posted buffer and grant the credits, we only trigger the send immediate when we're not granting only the last possible credit (only one credit is missing to reach the desired target). This is mostly a difference relative to the servers smb_direct_post_recv_credits() implementation, which should avoid useless ping pong messages. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 81b42eaec8b1..2e90f6a1fcf4 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -694,6 +694,100 @@ static int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg) return ret; } +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) +{ + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); + int missing; + int posted = 0; + + if (unlikely(sc->first_error)) + return; + + /* + * Find out how much smbdirect_recv_io buffers we should post. + * + * Note that sc->recv_io.credits.target is the value + * from the peer and it can in theory change over time, + * but it is forced to be at least 1 and at max + * sp->recv_credit_max. + * + * So it can happen that missing will be lower than 0, + * which means the peer has recently lowered its desired + * target, while be already granted a higher number of credits. + * + * Note 'posted' is the number of smbdirect_recv_io buffers + * posted within this function, while sc->recv_io.posted.count + * is the overall value of posted smbdirect_recv_io buffers. + * + * We try to post as much buffers as missing, but + * this is limited if a lot of smbdirect_recv_io buffers + * are still in the sc->recv_io.reassembly.list instead of + * the sc->recv_io.free.list. + * + */ + missing = (int)sc->recv_io.credits.target - atomic_read(&sc->recv_io.posted.count); + while (posted < missing) { + struct smbdirect_recv_io *recv_io; + int ret; + + /* + * It's ok if smbdirect_connection_get_recv_io() + * returns NULL, it means smbdirect_recv_io structures + * are still be in the reassembly.list. + */ + recv_io = smbdirect_connection_get_recv_io(sc); + if (!recv_io) + break; + + recv_io->first_segment = false; + + ret = smbdirect_connection_post_recv_io(recv_io); + if (ret) { + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_post_recv_io failed rc=%d (%1pe)\n", + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); + smbdirect_connection_put_recv_io(recv_io); + return; + } + + atomic_inc(&sc->recv_io.posted.count); + posted += 1; + } + + /* If nothing was posted we're done */ + if (posted == 0) + return; + + atomic_add(posted, &sc->recv_io.credits.available); + + /* + * If we posted at least one smbdirect_recv_io buffer, + * we need to inform the peer about it and grant + * additional credits. + * + * However there is one case where we don't want to + * do that. + * + * If only a single credit was missing before + * reaching the requested target, we should not + * post an immediate send, as that would cause + * endless ping pong once a keep alive exchange + * is started. + * + * However if sc->recv_io.credits.target is only 1, + * the peer has no credit left and we need to + * grant the credit anyway. + */ + if (missing == 1 && sc->recv_io.credits.target != 1) + return; + + smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, + "schedule send of an empty message\n"); + queue_work(sc->workqueue, &sc->idle.immediate_work); +} + static bool smbdirect_map_sges_single_page(struct smbdirect_map_sges *state, struct page *page, size_t off, size_t len) { From a5159795248fec94d4b0995584e038d50cceb1b6 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sat, 20 Sep 2025 05:42:56 +0200 Subject: [PATCH 027/145] smb: smbdirect: split out smbdirect_connection_recv_io_refill() This will allow us to refill the recv queue in a sync way after negotiation. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 2e90f6a1fcf4..b3e11c4c437d 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -694,16 +694,13 @@ static int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg) return ret; } -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) +static int smbdirect_connection_recv_io_refill(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = - container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); int missing; int posted = 0; if (unlikely(sc->first_error)) - return; + return sc->first_error; /* * Find out how much smbdirect_recv_io buffers we should post. @@ -749,7 +746,7 @@ static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) "smbdirect_connection_post_recv_io failed rc=%d (%1pe)\n", ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); smbdirect_connection_put_recv_io(recv_io); - return; + return ret; } atomic_inc(&sc->recv_io.posted.count); @@ -758,7 +755,7 @@ static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) /* If nothing was posted we're done */ if (posted == 0) - return; + return 0; atomic_add(posted, &sc->recv_io.credits.available); @@ -781,11 +778,28 @@ static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) * grant the credit anyway. */ if (missing == 1 && sc->recv_io.credits.target != 1) - return; + return 0; - smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, - "schedule send of an empty message\n"); - queue_work(sc->workqueue, &sc->idle.immediate_work); + return posted; +} + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) +{ + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); + int posted; + + posted = smbdirect_connection_recv_io_refill(sc); + if (unlikely(posted < 0)) { + smbdirect_socket_schedule_cleanup(sc, posted); + return; + } + if (posted > 0) { + smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, + "schedule send of an empty message\n"); + queue_work(sc->workqueue, &sc->idle.immediate_work); + } } static bool smbdirect_map_sges_single_page(struct smbdirect_map_sges *state, From dc01504c90d9613a83d6ecf8323800213495c966 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 19 Sep 2025 09:36:14 +0200 Subject: [PATCH 028/145] smb: smbdirect: introduce smbdirect_get_buf_page_count() This is a copy of get_buf_page_count() in the server and will replace it soon. The only difference is that we now use size_t instead of int. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 874d4d1a56a0..b2882935a5d8 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -734,4 +734,10 @@ struct smbdirect_rw_io { struct scatterlist sg_list[]; }; +static inline size_t smbdirect_get_buf_page_count(const void *buf, size_t size) +{ + return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - + (uintptr_t)buf / PAGE_SIZE; +} + #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ From 0ad03ed97da1761a8c42bf4fad559409dfbe0db7 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 19 Sep 2025 09:48:58 +0200 Subject: [PATCH 029/145] smb: smbdirect: introduce smbdirect_socket_wait_for_credits() This is a copy of wait_for_credits() in the server, which will be replaced by this soon. This will allow us to share more common code between client and server soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_internal.h | 7 +++++ fs/smb/common/smbdirect/smbdirect_socket.c | 29 ++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h index 2d7c69f71ee0..ff4db1c3f128 100644 --- a/fs/smb/common/smbdirect/smbdirect_internal.h +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -32,6 +32,13 @@ static void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, __func__, __LINE__, __error, &__force_status); \ } while (0) +static int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, + enum smbdirect_socket_status expected_status, + int unexpected_errno, + wait_queue_head_t *waitq, + atomic_t *total_credits, + int needed); + static void smbdirect_connection_idle_timer_work(struct work_struct *work); #endif /* __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index 05a284526aa2..bbd794fddc1e 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -250,3 +250,32 @@ static void smbdirect_socket_cleanup_work(struct work_struct *work) */ smbdirect_socket_wake_up_all(sc); } + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, + enum smbdirect_socket_status expected_status, + int unexpected_errno, + wait_queue_head_t *waitq, + atomic_t *total_credits, + int needed) +{ + int ret; + + if (WARN_ON_ONCE(needed < 0)) + return -EINVAL; + + do { + if (atomic_sub_return(needed, total_credits) >= 0) + return 0; + + atomic_add(needed, total_credits); + ret = wait_event_interruptible(*waitq, + atomic_read(total_credits) >= needed || + sc->status != expected_status); + + if (sc->status != expected_status) + return unexpected_errno; + else if (ret < 0) + return ret; + } while (true); +} From de5ef8ec3c4694b9ad665eeea7321202a85474b6 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 19 Sep 2025 07:19:26 +0200 Subject: [PATCH 030/145] smb: smbdirect: introduce smbdirect_mr.c with client mr code This is basically contains the following functions copied from the client: destroy_mr_list, allocate_mr_list, register_mr_done, smbd_mr_recovery_work, get_mr, smbd_iter_to_mr, smbd_register_mr and smbd_deregister_mr. They got new names, some indentation/formatting changes, some variable names are changed too. They also only use struct smbdirect_socket instead of struct smbd_connection. But the logic is still the same. They will be used by the client soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_all_c_files.c | 1 + fs/smb/common/smbdirect/smbdirect_mr.c | 544 ++++++++++++++++++ 2 files changed, 545 insertions(+) create mode 100644 fs/smb/common/smbdirect/smbdirect_mr.c diff --git a/fs/smb/common/smbdirect/smbdirect_all_c_files.c b/fs/smb/common/smbdirect/smbdirect_all_c_files.c index 93098598fbdc..f1afc1120753 100644 --- a/fs/smb/common/smbdirect/smbdirect_all_c_files.c +++ b/fs/smb/common/smbdirect/smbdirect_all_c_files.c @@ -17,3 +17,4 @@ #endif #include "smbdirect_socket.c" #include "smbdirect_connection.c" +#include "smbdirect_mr.c" diff --git a/fs/smb/common/smbdirect/smbdirect_mr.c b/fs/smb/common/smbdirect/smbdirect_mr.c new file mode 100644 index 000000000000..aed7fd5a3269 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_mr.c @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2017, Microsoft Corporation. + * Copyright (c) 2025, Stefan Metzmacher + */ + +#include "smbdirect_internal.h" + +static void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc); +static void smbdirect_connection_mr_io_recovery_work(struct work_struct *work); + +/* + * Allocate MRs used for RDMA read/write + * The number of MRs will not exceed hardware capability in responder_resources + * All MRs are kept in mr_list. The MR can be recovered after it's used + * Recovery is done in smbd_mr_recovery_work. The content of list entry changes + * as MRs are used and recovered for I/O, but the list links will not change + */ +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_mr_io *mr; + int ret; + u32 i; + + if (sp->responder_resources == 0) { + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "responder_resources negotiated as 0\n"); + return -EINVAL; + } + + /* Allocate more MRs (2x) than hardware responder_resources */ + for (i = 0; i < sp->responder_resources * 2; i++) { + mr = kzalloc_obj(*mr); + if (!mr) { + ret = -ENOMEM; + goto kzalloc_mr_failed; + } + + kref_init(&mr->kref); + mutex_init(&mr->mutex); + + mr->mr = ib_alloc_mr(sc->ib.pd, + sc->mr_io.type, + sp->max_frmr_depth); + if (IS_ERR(mr->mr)) { + ret = PTR_ERR(mr->mr); + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "ib_alloc_mr failed ret=%d (%1pe) type=0x%x max_frmr_depth=%u\n", + ret, SMBDIRECT_DEBUG_ERR_PTR(ret), + sc->mr_io.type, sp->max_frmr_depth); + goto ib_alloc_mr_failed; + } + mr->sgt.sgl = kzalloc_objs(struct scatterlist, sp->max_frmr_depth); + if (!mr->sgt.sgl) { + ret = -ENOMEM; + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "failed to allocate sgl, max_frmr_depth=%u\n", + sp->max_frmr_depth); + goto kcalloc_sgl_failed; + } + mr->state = SMBDIRECT_MR_READY; + mr->socket = sc; + + list_add_tail(&mr->list, &sc->mr_io.all.list); + atomic_inc(&sc->mr_io.ready.count); + } + + INIT_WORK(&sc->mr_io.recovery_work, smbdirect_connection_mr_io_recovery_work); + + return 0; + +kcalloc_sgl_failed: + ib_dereg_mr(mr->mr); +ib_alloc_mr_failed: + mutex_destroy(&mr->mutex); + kfree(mr); +kzalloc_mr_failed: + smbdirect_connection_destroy_mr_list(sc); + return ret; +} + +static void smbdirect_mr_io_disable_locked(struct smbdirect_mr_io *mr) +{ + struct smbdirect_socket *sc = mr->socket; + + lockdep_assert_held(&mr->mutex); + + if (mr->state == SMBDIRECT_MR_DISABLED) + return; + + if (mr->mr) + ib_dereg_mr(mr->mr); + if (mr->sgt.nents) + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + kfree(mr->sgt.sgl); + + mr->mr = NULL; + mr->sgt.sgl = NULL; + mr->sgt.nents = 0; + + mr->state = SMBDIRECT_MR_DISABLED; +} + +static void smbdirect_mr_io_free_locked(struct kref *kref) +{ + struct smbdirect_mr_io *mr = + container_of(kref, struct smbdirect_mr_io, kref); + + lockdep_assert_held(&mr->mutex); + + /* + * smbdirect_mr_io_disable_locked() should already be called! + */ + if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED)) + smbdirect_mr_io_disable_locked(mr); + + mutex_unlock(&mr->mutex); + mutex_destroy(&mr->mutex); + kfree(mr); +} + +static void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc) +{ + struct smbdirect_mr_io *mr, *tmp; + LIST_HEAD(all_list); + unsigned long flags; + + disable_work_sync(&sc->mr_io.recovery_work); + + spin_lock_irqsave(&sc->mr_io.all.lock, flags); + list_splice_tail_init(&sc->mr_io.all.list, &all_list); + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); + + list_for_each_entry_safe(mr, tmp, &all_list, list) { + mutex_lock(&mr->mutex); + + smbdirect_mr_io_disable_locked(mr); + list_del(&mr->list); + mr->socket = NULL; + + /* + * No kref_put_mutex() as it's already locked. + * + * If smbdirect_mr_io_free_locked() is called + * and the mutex is unlocked and mr is gone, + * in that case kref_put() returned 1. + * + * If kref_put() returned 0 we know that + * smbdirect_mr_io_free_locked() didn't + * run. Not by us nor by anyone else, as we + * still hold the mutex, so we need to unlock. + * + * If the mr is still registered it will + * be dangling (detached from the connection + * waiting for smbd_deregister_mr() to be + * called in order to free the memory. + */ + if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) + mutex_unlock(&mr->mutex); + } +} + +/* + * Get a MR from mr_list. This function waits until there is at least one MR + * available in the list. It may access the list while the + * smbdirect_connection_mr_io_recovery_work is recovering the MR list. This + * doesn't need a lock as they never modify the same places. However, there may + * be several CPUs issuing I/O trying to get MR at the same time, mr_list_lock + * is used to protect this situation. + */ +static struct smbdirect_mr_io * +smbdirect_connection_get_mr_io(struct smbdirect_socket *sc) +{ + struct smbdirect_mr_io *mr; + unsigned long flags; + int ret; + +again: + ret = wait_event_interruptible(sc->mr_io.ready.wait_queue, + atomic_read(&sc->mr_io.ready.count) || + sc->status != SMBDIRECT_SOCKET_CONNECTED); + if (ret) { + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "wait_event_interruptible ret=%d (%1pe)\n", + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); + return NULL; + } + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "sc->status=%s sc->first_error=%1pe\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); + return NULL; + } + + spin_lock_irqsave(&sc->mr_io.all.lock, flags); + list_for_each_entry(mr, &sc->mr_io.all.list, list) { + if (mr->state == SMBDIRECT_MR_READY) { + mr->state = SMBDIRECT_MR_REGISTERED; + kref_get(&mr->kref); + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); + atomic_dec(&sc->mr_io.ready.count); + atomic_inc(&sc->mr_io.used.count); + return mr; + } + } + + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); + /* + * It is possible that we could fail to get MR because other processes may + * try to acquire a MR at the same time. If this is the case, retry it. + */ + goto again; +} + +static void smbdirect_connection_mr_io_register_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct smbdirect_mr_io *mr = + container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); + struct smbdirect_socket *sc = mr->socket; + + if (wc->status != IB_WC_SUCCESS) { + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "wc->status=%s opcode=%d\n", + ib_wc_status_msg(wc->status), wc->opcode); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + } +} + +static void smbdirect_connection_mr_io_local_inv_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct smbdirect_mr_io *mr = + container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); + struct smbdirect_socket *sc = mr->socket; + + mr->state = SMBDIRECT_MR_INVALIDATED; + if (wc->status != IB_WC_SUCCESS) { + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "invalidate failed status=%s\n", + ib_wc_status_msg(wc->status)); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + } + complete(&mr->invalidate_done); +} + +/* + * The work queue function that recovers MRs + * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used + * again. Both calls are slow, so finish them in a workqueue. This will not + * block I/O path. + * There is one workqueue that recovers MRs, there is no need to lock as the + * I/O requests calling smbd_register_mr will never update the links in the + * mr_list. + */ +static void smbdirect_connection_mr_io_recovery_work(struct work_struct *work) +{ + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, mr_io.recovery_work); + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_mr_io *mr; + int ret; + + list_for_each_entry(mr, &sc->mr_io.all.list, list) { + if (mr->state != SMBDIRECT_MR_ERROR) + /* This MR is being used, don't recover it */ + continue; + + /* recover this MR entry */ + ret = ib_dereg_mr(mr->mr); + if (ret) { + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "ib_dereg_mr failed ret=%u (%1pe)\n", + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); + smbdirect_socket_schedule_cleanup(sc, ret); + continue; + } + + mr->mr = ib_alloc_mr(sc->ib.pd, + sc->mr_io.type, + sp->max_frmr_depth); + if (IS_ERR(mr->mr)) { + ret = PTR_ERR(mr->mr); + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "ib_alloc_mr failed ret=%d (%1pe) type=0x%x depth=%u\n", + ret, SMBDIRECT_DEBUG_ERR_PTR(ret), + sc->mr_io.type, sp->max_frmr_depth); + smbdirect_socket_schedule_cleanup(sc, ret); + continue; + } + + mr->state = SMBDIRECT_MR_READY; + + /* smbdirect_mr->state is updated by this function + * and is read and updated by I/O issuing CPUs trying + * to get a MR, the call to atomic_inc_return + * implicates a memory barrier and guarantees this + * value is updated before waking up any calls to + * get_mr() from the I/O issuing CPUs + */ + if (atomic_inc_return(&sc->mr_io.ready.count) == 1) + wake_up(&sc->mr_io.ready.wait_queue); + } +} + +/* + * Transcribe the pages from an iterator into an MR scatterlist. + */ +static int smbdirect_iter_to_sgt(struct iov_iter *iter, + struct sg_table *sgt, + unsigned int max_sg) +{ + int ret; + + memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist)); + + ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); + WARN_ON(ret < 0); + if (sgt->nents > 0) + sg_mark_end(&sgt->sgl[sgt->nents - 1]); + + return ret; +} + +/* + * Register memory for RDMA read/write + * iter: the buffer to register memory with + * writing: true if this is a RDMA write (SMB read), false for RDMA read + * need_invalidate: true if this MR needs to be locally invalidated after I/O + * return value: the MR registered, NULL if failed. + */ +__maybe_unused /* this is temporary while this file is included in others */ +static struct smbdirect_mr_io * +smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, + struct iov_iter *iter, + bool writing, + bool need_invalidate) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_mr_io *mr; + int ret, num_pages; + struct ib_reg_wr *reg_wr; + + num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); + if (num_pages > sp->max_frmr_depth) { + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "num_pages=%d max_frmr_depth=%d\n", + num_pages, sp->max_frmr_depth); + WARN_ON_ONCE(1); + return NULL; + } + + mr = smbdirect_connection_get_mr_io(sc); + if (!mr) { + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_get_mr_io returning NULL\n"); + return NULL; + } + + mutex_lock(&mr->mutex); + + mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + mr->need_invalidate = need_invalidate; + mr->sgt.nents = 0; + mr->sgt.orig_nents = 0; + + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_INFO, + "num_pages=%u count=%zu depth=%u\n", + num_pages, iov_iter_count(iter), sp->max_frmr_depth); + smbdirect_iter_to_sgt(iter, &mr->sgt, sp->max_frmr_depth); + + ret = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + if (!ret) { + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "ib_dma_map_sg num_pages=%u dir=%x ret=%d (%1pe)\n", + num_pages, mr->dir, ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto dma_map_error; + } + + ret = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE); + if (ret != mr->sgt.nents) { + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "ib_map_mr_sg failed ret = %d nents = %u\n", + ret, mr->sgt.nents); + goto map_mr_error; + } + + ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); + reg_wr = &mr->wr; + reg_wr->wr.opcode = IB_WR_REG_MR; + mr->cqe.done = smbdirect_connection_mr_io_register_done; + reg_wr->wr.wr_cqe = &mr->cqe; + reg_wr->wr.num_sge = 0; + reg_wr->wr.send_flags = IB_SEND_SIGNALED; + reg_wr->mr = mr->mr; + reg_wr->key = mr->mr->rkey; + reg_wr->access = writing ? + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : + IB_ACCESS_REMOTE_READ; + + /* + * There is no need for waiting for complemtion on ib_post_send + * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution + * on the next ib_post_send when we actually send I/O to remote peer + */ + ret = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); + if (!ret) { + /* + * smbdirect_connection_get_mr_io() gave us a reference + * via kref_get(&mr->kref), we keep that and let + * the caller use smbdirect_connection_deregister_mr_io() + * to remove it again. + */ + mutex_unlock(&mr->mutex); + return mr; + } + + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "ib_post_send failed ret=%d (%1pe) reg_wr->key=0x%x\n", + ret, SMBDIRECT_DEBUG_ERR_PTR(ret), reg_wr->key); + + /* If all failed, attempt to recover this MR by setting it SMBDIRECT_MR_ERROR*/ +map_mr_error: + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + +dma_map_error: + mr->sgt.nents = 0; + mr->state = SMBDIRECT_MR_ERROR; + if (atomic_dec_and_test(&sc->mr_io.used.count)) + wake_up(&sc->mr_io.cleanup.wait_queue); + + smbdirect_socket_schedule_cleanup(sc, ret); + + /* + * smbdirect_connection_get_mr_io() gave us a reference + * via kref_get(&mr->kref), we need to remove it again + * on error. + * + * No kref_put_mutex() as it's already locked. + * + * If smbdirect_mr_io_free_locked() is called + * and the mutex is unlocked and mr is gone, + * in that case kref_put() returned 1. + * + * If kref_put() returned 0 we know that + * smbdirect_mr_io_free_locked() didn't + * run. Not by us nor by anyone else, as we + * still hold the mutex, so we need to unlock. + */ + if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) + mutex_unlock(&mr->mutex); + return NULL; +} +/* + * Deregister a MR after I/O is done + * This function may wait if remote invalidation is not used + * and we have to locally invalidate the buffer to prevent data is being + * modified by remote peer after upper layer consumes it + */ +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr) +{ + struct smbdirect_socket *sc = mr->socket; + int ret = 0; + + mutex_lock(&mr->mutex); + if (mr->state == SMBDIRECT_MR_DISABLED) + goto put_kref; + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + smbdirect_mr_io_disable_locked(mr); + goto put_kref; + } + + if (mr->need_invalidate) { + struct ib_send_wr *wr = &mr->inv_wr; + + /* Need to finish local invalidation before returning */ + wr->opcode = IB_WR_LOCAL_INV; + mr->cqe.done = smbdirect_connection_mr_io_local_inv_done; + wr->wr_cqe = &mr->cqe; + wr->num_sge = 0; + wr->ex.invalidate_rkey = mr->mr->rkey; + wr->send_flags = IB_SEND_SIGNALED; + + init_completion(&mr->invalidate_done); + ret = ib_post_send(sc->ib.qp, wr, NULL); + if (ret) { + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "ib_post_send failed ret=%d (%1pe)\n", + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); + smbdirect_mr_io_disable_locked(mr); + smbdirect_socket_schedule_cleanup(sc, ret); + goto done; + } + wait_for_completion(&mr->invalidate_done); + mr->need_invalidate = false; + } else + /* + * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED + * and defer to mr_recovery_work to recover the MR for next use + */ + mr->state = SMBDIRECT_MR_INVALIDATED; + + if (mr->sgt.nents) { + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + mr->sgt.nents = 0; + } + + if (mr->state == SMBDIRECT_MR_INVALIDATED) { + mr->state = SMBDIRECT_MR_READY; + if (atomic_inc_return(&sc->mr_io.ready.count) == 1) + wake_up(&sc->mr_io.ready.wait_queue); + } else + /* + * Schedule the work to do MR recovery for future I/Os MR + * recovery is slow and don't want it to block current I/O + */ + queue_work(sc->workqueue, &sc->mr_io.recovery_work); + +done: + if (atomic_dec_and_test(&sc->mr_io.used.count)) + wake_up(&sc->mr_io.cleanup.wait_queue); + +put_kref: + /* + * No kref_put_mutex() as it's already locked. + * + * If smbdirect_mr_io_free_locked() is called + * and the mutex is unlocked and mr is gone, + * in that case kref_put() returned 1. + * + * If kref_put() returned 0 we know that + * smbdirect_mr_io_free_locked() didn't + * run. Not by us nor by anyone else, as we + * still hold the mutex, so we need to unlock + * and keep the mr in SMBDIRECT_MR_READY or + * SMBDIRECT_MR_ERROR state. + */ + if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) + mutex_unlock(&mr->mutex); +} From 6cc55655d0bc5836e17f84fd81e450740a78a7bb Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 19 Sep 2025 09:07:03 +0200 Subject: [PATCH 031/145] smb: smbdirect: introduce smbdirect_rw.c with server rw code This is basically contains the following functions copied from the server: wait_for_rw_credits, calc_rw_credits, get_sg_list, smb_direct_free_rdma_rw_msg, read_write_done, read_done, write_done, smb_direct_rdma_xmit. They got new names, some indentation/formatting changes, some variable names are changed too. They also only use struct smbdirect_socket instead of struct smb_direct_transport. But the logic is still the same. They will be used by the server soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_all_c_files.c | 1 + fs/smb/common/smbdirect/smbdirect_rw.c | 255 ++++++++++++++++++ fs/smb/common/smbdirect/smbdirect_socket.h | 9 + 3 files changed, 265 insertions(+) create mode 100644 fs/smb/common/smbdirect/smbdirect_rw.c diff --git a/fs/smb/common/smbdirect/smbdirect_all_c_files.c b/fs/smb/common/smbdirect/smbdirect_all_c_files.c index f1afc1120753..963a1fc3b54b 100644 --- a/fs/smb/common/smbdirect/smbdirect_all_c_files.c +++ b/fs/smb/common/smbdirect/smbdirect_all_c_files.c @@ -18,3 +18,4 @@ #include "smbdirect_socket.c" #include "smbdirect_connection.c" #include "smbdirect_mr.c" +#include "smbdirect_rw.c" diff --git a/fs/smb/common/smbdirect/smbdirect_rw.c b/fs/smb/common/smbdirect/smbdirect_rw.c new file mode 100644 index 000000000000..6eeec535b130 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_rw.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2017, Microsoft Corporation. + * Copyright (C) 2018, LG Electronics. + * Copyright (c) 2025, Stefan Metzmacher + */ + +#include "smbdirect_internal.h" + +static int smbdirect_connection_wait_for_rw_credits(struct smbdirect_socket *sc, + int credits) +{ + return smbdirect_socket_wait_for_credits(sc, + SMBDIRECT_SOCKET_CONNECTED, + -ENOTCONN, + &sc->rw_io.credits.wait_queue, + &sc->rw_io.credits.count, + credits); +} + +static int smbdirect_connection_calc_rw_credits(struct smbdirect_socket *sc, + const void *buf, + size_t len) +{ + return DIV_ROUND_UP(smbdirect_get_buf_page_count(buf, len), + sc->rw_io.credits.num_pages); +} + +static int smbdirect_connection_rdma_get_sg_list(void *buf, + size_t size, + struct scatterlist *sg_list, + size_t nentries) +{ + bool high = is_vmalloc_addr(buf); + struct page *page; + size_t offset, len; + int i = 0; + + if (size == 0 || nentries < smbdirect_get_buf_page_count(buf, size)) + return -EINVAL; + + offset = offset_in_page(buf); + buf -= offset; + while (size > 0) { + len = min_t(size_t, PAGE_SIZE - offset, size); + if (high) + page = vmalloc_to_page(buf); + else + page = kmap_to_page(buf); + + if (!sg_list) + return -EINVAL; + sg_set_page(sg_list, page, len, offset); + sg_list = sg_next(sg_list); + + buf += PAGE_SIZE; + size -= len; + offset = 0; + i++; + } + + return i; +} + +static void smbdirect_connection_rw_io_free(struct smbdirect_rw_io *msg, + enum dma_data_direction dir) +{ + struct smbdirect_socket *sc = msg->socket; + + rdma_rw_ctx_destroy(&msg->rdma_ctx, + sc->ib.qp, + sc->ib.qp->port, + msg->sgt.sgl, + msg->sgt.nents, + dir); + sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); + kfree(msg); +} + +static void smbdirect_connection_rdma_rw_done(struct ib_cq *cq, struct ib_wc *wc, + enum dma_data_direction dir) +{ + struct smbdirect_rw_io *msg = + container_of(wc->wr_cqe, struct smbdirect_rw_io, cqe); + struct smbdirect_socket *sc = msg->socket; + + if (wc->status != IB_WC_SUCCESS) { + msg->error = -EIO; + pr_err("read/write error. opcode = %d, status = %s(%d)\n", + wc->opcode, ib_wc_status_msg(wc->status), wc->status); + if (wc->status != IB_WC_WR_FLUSH_ERR) + smbdirect_socket_schedule_cleanup(sc, msg->error); + } + + complete(msg->completion); +} + +static void smbdirect_connection_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) +{ + smbdirect_connection_rdma_rw_done(cq, wc, DMA_FROM_DEVICE); +} + +static void smbdirect_connection_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) +{ + smbdirect_connection_rdma_rw_done(cq, wc, DMA_TO_DEVICE); +} + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, + void *buf, size_t buf_len, + struct smbdirect_buffer_descriptor_v1 *desc, + size_t desc_len, + bool is_read) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + enum dma_data_direction direction = is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + struct smbdirect_rw_io *msg, *next_msg; + size_t i; + int ret; + DECLARE_COMPLETION_ONSTACK(completion); + struct ib_send_wr *first_wr; + LIST_HEAD(msg_list); + u8 *desc_buf; + int credits_needed; + size_t desc_buf_len, desc_num = 0; + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) + return -ENOTCONN; + + if (buf_len > sp->max_read_write_size) + return -EINVAL; + + /* calculate needed credits */ + credits_needed = 0; + desc_buf = buf; + for (i = 0; i < desc_len / sizeof(*desc); i++) { + if (!buf_len) + break; + + desc_buf_len = le32_to_cpu(desc[i].length); + if (!desc_buf_len) + return -EINVAL; + + if (desc_buf_len > buf_len) { + desc_buf_len = buf_len; + desc[i].length = cpu_to_le32(desc_buf_len); + buf_len = 0; + } + + credits_needed += smbdirect_connection_calc_rw_credits(sc, + desc_buf, + desc_buf_len); + desc_buf += desc_buf_len; + buf_len -= desc_buf_len; + desc_num++; + } + + smbdirect_log_rdma_rw(sc, SMBDIRECT_LOG_INFO, + "RDMA %s, len %zu, needed credits %d\n", + str_read_write(is_read), buf_len, credits_needed); + + ret = smbdirect_connection_wait_for_rw_credits(sc, credits_needed); + if (ret < 0) + return ret; + + /* build rdma_rw_ctx for each descriptor */ + desc_buf = buf; + for (i = 0; i < desc_num; i++) { + size_t page_count; + + msg = kzalloc_flex(*msg, sg_list, SG_CHUNK_SIZE, + sc->rw_io.mem.gfp_mask); + if (!msg) { + ret = -ENOMEM; + goto out; + } + + desc_buf_len = le32_to_cpu(desc[i].length); + page_count = smbdirect_get_buf_page_count(desc_buf, desc_buf_len); + + msg->socket = sc; + msg->cqe.done = is_read ? + smbdirect_connection_rdma_read_done : + smbdirect_connection_rdma_write_done; + msg->completion = &completion; + + msg->sgt.sgl = &msg->sg_list[0]; + ret = sg_alloc_table_chained(&msg->sgt, + page_count, + msg->sg_list, + SG_CHUNK_SIZE); + if (ret) { + ret = -ENOMEM; + goto free_msg; + } + + ret = smbdirect_connection_rdma_get_sg_list(desc_buf, + desc_buf_len, + msg->sgt.sgl, + msg->sgt.orig_nents); + if (ret < 0) + goto free_table; + + ret = rdma_rw_ctx_init(&msg->rdma_ctx, + sc->ib.qp, + sc->ib.qp->port, + msg->sgt.sgl, + page_count, + 0, + le64_to_cpu(desc[i].offset), + le32_to_cpu(desc[i].token), + direction); + if (ret < 0) { + pr_err("failed to init rdma_rw_ctx: %d\n", ret); + goto free_table; + } + + list_add_tail(&msg->list, &msg_list); + desc_buf += desc_buf_len; + } + + /* concatenate work requests of rdma_rw_ctxs */ + first_wr = NULL; + list_for_each_entry_reverse(msg, &msg_list, list) { + first_wr = rdma_rw_ctx_wrs(&msg->rdma_ctx, + sc->ib.qp, + sc->ib.qp->port, + &msg->cqe, + first_wr); + } + + ret = ib_post_send(sc->ib.qp, first_wr, NULL); + if (ret) { + pr_err("failed to post send wr for RDMA R/W: %d\n", ret); + goto out; + } + + msg = list_last_entry(&msg_list, struct smbdirect_rw_io, list); + wait_for_completion(&completion); + ret = msg->error; +out: + list_for_each_entry_safe(msg, next_msg, &msg_list, list) { + list_del(&msg->list); + smbdirect_connection_rw_io_free(msg, direction); + } + atomic_add(credits_needed, &sc->rw_io.credits.count); + wake_up(&sc->rw_io.credits.wait_queue); + return ret; + +free_table: + sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); +free_msg: + kfree(msg); + goto out; +} diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index b2882935a5d8..36e6822c3795 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -326,6 +326,14 @@ struct smbdirect_socket { * The state for RDMA read/write requests on the server */ struct { + /* + * Memory hints for + * smbdirect_rw_io structs + */ + struct { + gfp_t gfp_mask; + } mem; + /* * The credit state for the send side */ @@ -541,6 +549,7 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) spin_lock_init(&sc->recv_io.reassembly.lock); init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); + sc->rw_io.mem.gfp_mask = GFP_KERNEL; atomic_set(&sc->rw_io.credits.count, 0); init_waitqueue_head(&sc->rw_io.credits.wait_queue); From 5fe03dd0c52094a2673b829af0f432bab038edcc Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sat, 20 Sep 2025 04:27:03 +0200 Subject: [PATCH 032/145] smb: smbdirect: define SMBDIRECT_MIN_{RECEIVE,FRAGMENTED}_SIZE These are specified in MS-SMBD... Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_pdu.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_pdu.h b/fs/smb/common/smbdirect/smbdirect_pdu.h index ae9fdb05ce23..7693ba337873 100644 --- a/fs/smb/common/smbdirect/smbdirect_pdu.h +++ b/fs/smb/common/smbdirect/smbdirect_pdu.h @@ -8,6 +8,10 @@ #define SMBDIRECT_V1 0x0100 +/* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */ +#define SMBDIRECT_MIN_RECEIVE_SIZE 128 +#define SMBDIRECT_MIN_FRAGMENTED_SIZE 131072 + /* SMBD negotiation request packet [MS-SMBD] 2.2.1 */ struct smbdirect_negotiate_req { __le16 min_version; From 822b1f296a4a230425464a7e42a7f180990aed5d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sat, 20 Sep 2025 23:14:15 +0200 Subject: [PATCH 033/145] smb: smbdirect: define SMBDIRECT_RDMA_CM_[RNR_]RETRY These are copies of {SMBD,SMB_DIRECT}_CM_[RNR_]RETRY. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 36e6822c3795..dec91a102622 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -749,4 +749,13 @@ static inline size_t smbdirect_get_buf_page_count(const void *buf, size_t size) (uintptr_t)buf / PAGE_SIZE; } +/* + * Maximum number of retries on data transfer operations + */ +#define SMBDIRECT_RDMA_CM_RETRY 6 +/* + * No need to retry on Receiver Not Ready since SMB_DIRECT manages credits + */ +#define SMBDIRECT_RDMA_CM_RNR_RETRY 0 + #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ From ea4151222a93d3bfd5d5c438d1c286a5dcff846c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sat, 20 Sep 2025 06:49:55 +0200 Subject: [PATCH 034/145] smb: smbdirect: introduce smbdirect_connection_recv_io_done() This is basically a copy of recv_done() in client and server, with the following additions: - Only handling the SMBDIRECT_EXPECT_DATA_TRANSFER code path, as we'll have separate functions for the negotiate messages. - Using more helper variables - Improved logging - Add credits_requested == 0 error check - Add data_offset not 8 bytes aligned error check - Use disable_work(&sc->recv_io.posted.refill_work) before smbdirect_connection_put_recv_io, when it is followed by smbdirect_socket_schedule_cleanup() This will be used on common between client and server in future and replace the existing recv_done() functions. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 170 ++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index b3e11c4c437d..5862e58f1152 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -694,6 +694,176 @@ static int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg) return ret; } +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct smbdirect_recv_io *recv_io = + container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); + struct smbdirect_socket *sc = recv_io->socket; + const struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_data_transfer *data_transfer; + int current_recv_credits; + u16 old_recv_credit_target; + u16 credits_requested; + u16 credits_granted; + u16 flags; + u32 data_offset; + u32 data_length; + u32 remaining_data_length; + + if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_RECV))) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, + "wc->status=%s (%d) wc->opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, wc->opcode); + goto error; + } + + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_INFO, + "recv_io=0x%p type=%d wc status=%s wc opcode %d byte_len=%d pkey_index=%u\n", + recv_io, sc->recv_io.expected, + ib_wc_status_msg(wc->status), wc->opcode, + wc->byte_len, wc->pkey_index); + + /* + * Reset timer to the keepalive interval in + * order to trigger our next keepalive message. + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_interval_msec)); + + ib_dma_sync_single_for_cpu(sc->ib.dev, + recv_io->sge.addr, + recv_io->sge.length, + DMA_FROM_DEVICE); + + if (unlikely(wc->byte_len < + offsetof(struct smbdirect_data_transfer, padding))) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "wc->byte_len=%u < %zu\n", + wc->byte_len, + offsetof(struct smbdirect_data_transfer, padding)); + goto error; + } + + data_transfer = (struct smbdirect_data_transfer *)recv_io->packet; + credits_requested = le16_to_cpu(data_transfer->credits_requested); + credits_granted = le16_to_cpu(data_transfer->credits_granted); + flags = le16_to_cpu(data_transfer->flags); + remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); + data_offset = le32_to_cpu(data_transfer->data_offset); + data_length = le32_to_cpu(data_transfer->data_length); + + smbdirect_log_incoming(sc, SMBDIRECT_LOG_INFO, + "DataIn: %s=%u, %s=%u, %s=0x%x, %s=%u, %s=%u, %s=%u\n", + "CreditsRequested", + credits_requested, + "CreditsGranted", + credits_granted, + "Flags", + flags, + "RemainingDataLength", + remaining_data_length, + "DataOffset", + data_offset, + "DataLength", + data_length); + + if (unlikely(credits_requested == 0)) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: credits_requested == 0\n"); + goto error; + } + + if (unlikely(data_offset % 8 != 0)) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: data_offset=%u (0x%x) not aligned to 8\n", + data_offset, data_offset); + goto error; + } + + if (unlikely(wc->byte_len < data_offset || + (u64)wc->byte_len < (u64)data_offset + data_length)) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "wc->byte_len=%u < date_offset=%u + data_length=%u\n", + wc->byte_len, data_offset, data_length); + goto error; + } + + if (unlikely(remaining_data_length > sp->max_fragmented_recv_size || + data_length > sp->max_fragmented_recv_size || + (u64)remaining_data_length + (u64)data_length > (u64)sp->max_fragmented_recv_size)) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "remaining_data_length=%u + data_length=%u > max_fragmented=%u\n", + remaining_data_length, data_length, sp->max_fragmented_recv_size); + goto error; + } + + if (data_length) { + if (sc->recv_io.reassembly.full_packet_received) + recv_io->first_segment = true; + + if (remaining_data_length) + sc->recv_io.reassembly.full_packet_received = false; + else + sc->recv_io.reassembly.full_packet_received = true; + } + + atomic_dec(&sc->recv_io.posted.count); + current_recv_credits = atomic_dec_return(&sc->recv_io.credits.count); + + /* + * We take the value from the peer, which is checked to be higher than 0, + * but we limit it to the max value we support in order to have + * the main logic simpler. + */ + old_recv_credit_target = sc->recv_io.credits.target; + sc->recv_io.credits.target = credits_requested; + sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, + sp->recv_credit_max); + if (credits_granted) { + atomic_add(credits_granted, &sc->send_io.credits.count); + /* + * We have new send credits granted from remote peer + * If any sender is waiting for credits, unblock it + */ + wake_up(&sc->send_io.credits.wait_queue); + } + + /* Send an immediate response right away if requested */ + if (flags & SMBDIRECT_FLAG_RESPONSE_REQUESTED) { + smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, + "schedule send of immediate response\n"); + queue_work(sc->workqueue, &sc->idle.immediate_work); + } + + /* + * If this is a packet with data playload place the data in + * reassembly queue and wake up the reading thread + */ + if (data_length) { + if (current_recv_credits <= (sc->recv_io.credits.target / 4) || + sc->recv_io.credits.target > old_recv_credit_target) + queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); + + smbdirect_connection_reassembly_append_recv_io(sc, recv_io, data_length); + wake_up(&sc->recv_io.reassembly.wait_queue); + } else + smbdirect_connection_put_recv_io(recv_io); + + return; + +error: + /* + * Make sure smbdirect_connection_put_recv_io() does not + * start recv_io.posted.refill_work. + */ + disable_work(&sc->recv_io.posted.refill_work); + smbdirect_connection_put_recv_io(recv_io); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); +} + static int smbdirect_connection_recv_io_refill(struct smbdirect_socket *sc) { int missing; From 422a2436697da6cc0f2de812a778ff1d249b5335 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 13 Oct 2025 16:13:15 +0200 Subject: [PATCH 035/145] smb: smbdirect: introduce smbdirect_socket_destroy[_sync]() This will be used in common between client and server in order to destroy all resources attached to a connection. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 4 - fs/smb/common/smbdirect/smbdirect_internal.h | 8 ++ fs/smb/common/smbdirect/smbdirect_mr.c | 1 - fs/smb/common/smbdirect/smbdirect_socket.c | 134 ++++++++++++++++++ 4 files changed, 142 insertions(+), 5 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 5862e58f1152..72099dac02db 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -77,8 +77,6 @@ static u32 smbdirect_rdma_rw_send_wrs(struct ib_device *dev, return factor * attr->cap.max_rdma_ctxs; } -static void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc); - __maybe_unused /* this is temporary while this file is included in others */ static int smbdirect_connection_create_qp(struct smbdirect_socket *sc) { @@ -258,8 +256,6 @@ static void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc) } } -static void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc); - __maybe_unused /* this is temporary while this file is included in others */ static int smbdirect_connection_create_mem_pools(struct smbdirect_socket *sc) { diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h index ff4db1c3f128..9989fe96000d 100644 --- a/fs/smb/common/smbdirect/smbdirect_internal.h +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -39,6 +39,14 @@ static int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, atomic_t *total_credits, int needed); +static void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc); + +static void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc); + +static void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg); + static void smbdirect_connection_idle_timer_work(struct work_struct *work); +static void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc); + #endif /* __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_mr.c b/fs/smb/common/smbdirect/smbdirect_mr.c index aed7fd5a3269..249719c916a8 100644 --- a/fs/smb/common/smbdirect/smbdirect_mr.c +++ b/fs/smb/common/smbdirect/smbdirect_mr.c @@ -6,7 +6,6 @@ #include "smbdirect_internal.h" -static void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc); static void smbdirect_connection_mr_io_recovery_work(struct work_struct *work); /* diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index bbd794fddc1e..251d15219173 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -251,6 +251,140 @@ static void smbdirect_socket_cleanup_work(struct work_struct *work) smbdirect_socket_wake_up_all(sc); } +static void smbdirect_socket_destroy(struct smbdirect_socket *sc) +{ + struct smbdirect_recv_io *recv_io; + struct smbdirect_recv_io *recv_tmp; + LIST_HEAD(all_list); + unsigned long flags; + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "status=%s first_error=%1pe", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); + + /* + * This should not never be called in an interrupt! + */ + WARN_ON_ONCE(in_interrupt()); + + if (sc->status == SMBDIRECT_SOCKET_DESTROYED) + return; + + WARN_ONCE(sc->status != SMBDIRECT_SOCKET_DISCONNECTED, + "status=%s first_error=%1pe", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); + + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + * + * Most likely this was already called via + * smbdirect_socket_cleanup_work(), but call it again... + */ + smbdirect_socket_wake_up_all(sc); + + disable_work_sync(&sc->disconnect_work); + disable_work_sync(&sc->connect.work); + disable_work_sync(&sc->recv_io.posted.refill_work); + disable_work_sync(&sc->mr_io.recovery_work); + disable_work_sync(&sc->idle.immediate_work); + disable_delayed_work_sync(&sc->idle.timer_work); + + if (sc->rdma.cm_id) + rdma_lock_handler(sc->rdma.cm_id); + + if (sc->ib.qp) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "drain qp\n"); + ib_drain_qp(sc->ib.qp); + } + + /* It's not possible for upper layer to get to reassembly */ + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "drain the reassembly queue\n"); + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); + list_splice_tail_init(&sc->recv_io.reassembly.list, &all_list); + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); + list_for_each_entry_safe(recv_io, recv_tmp, &all_list, list) + smbdirect_connection_put_recv_io(recv_io); + sc->recv_io.reassembly.data_length = 0; + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "freeing mr list\n"); + smbdirect_connection_destroy_mr_list(sc); + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "destroying qp\n"); + smbdirect_connection_destroy_qp(sc); + if (sc->rdma.cm_id) { + rdma_unlock_handler(sc->rdma.cm_id); + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "destroying cm_id\n"); + rdma_destroy_id(sc->rdma.cm_id); + sc->rdma.cm_id = NULL; + } + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "destroying mem pools\n"); + smbdirect_connection_destroy_mem_pools(sc); + + sc->status = SMBDIRECT_SOCKET_DESTROYED; + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "rdma session destroyed\n"); +} + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc) +{ + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "status=%s first_error=%1pe", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); + + /* + * This should not never be called in an interrupt! + */ + WARN_ON_ONCE(in_interrupt()); + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "cancelling and disable disconnect_work\n"); + disable_work_sync(&sc->disconnect_work); + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "destroying rdma session\n"); + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) { + /* + * SMBDIRECT_LOG_INFO is enough here + * as this is the typical case where + * we terminate the connection ourself. + */ + smbdirect_socket_schedule_cleanup_lvl(sc, + SMBDIRECT_LOG_INFO, + -ESHUTDOWN); + smbdirect_socket_cleanup_work(&sc->disconnect_work); + } + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "wait for transport being disconnected\n"); + wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "waited for transport being disconnected\n"); + } + + /* + * Once we reached SMBDIRECT_SOCKET_DISCONNECTED, + * we should call smbdirect_socket_destroy() + */ + smbdirect_socket_destroy(sc); + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "status=%s first_error=%1pe", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); +} + __maybe_unused /* this is temporary while this file is included in others */ static int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, enum smbdirect_socket_status expected_status, From b895bc4d215575132c7fec55cf6a2d3787c16a58 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sat, 20 Sep 2025 07:34:44 +0200 Subject: [PATCH 036/145] smb: smbdirect: introduce smbdirect_connection_rdma_{established,event_handler}() This will be used by client and server in future, it will be used after the rdma connection is established in order to simplify the events happening on an established connection. We'll also have smbdirect_{connect,accept}_rdma_event_handler functions which will be used before the rdma connection is established. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 110 ++++++++++++++++++ fs/smb/common/smbdirect/smbdirect_socket.h | 8 ++ 2 files changed, 118 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 72099dac02db..33fa460849b7 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -44,6 +44,116 @@ static void smbdirect_connection_qp_event_handler(struct ib_event *event, void * } } +static int smbdirect_connection_rdma_event_handler(struct rdma_cm_id *id, + struct rdma_cm_event *event) +{ + struct smbdirect_socket *sc = id->context; + int ret = -ECONNRESET; + + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + ret = -ENETDOWN; + if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) + ret = event->status; + + /* + * cma_cm_event_handler() has + * lockdep_assert_held(&id_priv->handler_mutex); + * + * Mutexes are not allowed in interrupts, + * and we rely on not being in an interrupt here. + */ + WARN_ON_ONCE(in_interrupt()); + + if (event->event != sc->rdma.expected_event) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "%s (first_error=%1pe, expected=%s) => event=%s status=%d => ret=%1pe\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), + rdma_event_msg(sc->rdma.expected_event), + rdma_event_msg(event->event), + event->status, + SMBDIRECT_DEBUG_ERR_PTR(ret)); + + /* + * If we get RDMA_CM_EVENT_DEVICE_REMOVAL, + * we should change to SMBDIRECT_SOCKET_DISCONNECTED, + * so that rdma_disconnect() is avoided later via + * smbdirect_socket_schedule_cleanup[_status]() => + * smbdirect_socket_cleanup_work(). + * + * As otherwise we'd set SMBDIRECT_SOCKET_DISCONNECTING, + * but never ever get RDMA_CM_EVENT_DISCONNECTED and + * never reach SMBDIRECT_SOCKET_DISCONNECTED. + */ + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + smbdirect_socket_schedule_cleanup_status(sc, + SMBDIRECT_LOG_ERR, + ret, + SMBDIRECT_SOCKET_DISCONNECTED); + else + smbdirect_socket_schedule_cleanup(sc, ret); + if (sc->ib.qp) + ib_drain_qp(sc->ib.qp); + return 0; + } + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "%s (first_error=%1pe) event=%s\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), + rdma_event_msg(event->event)); + + switch (event->event) { + case RDMA_CM_EVENT_DISCONNECTED: + /* + * We need to change to SMBDIRECT_SOCKET_DISCONNECTED, + * so that rdma_disconnect() is avoided later via + * smbdirect_socket_schedule_cleanup_status() => + * smbdirect_socket_cleanup_work(). + * + * As otherwise we'd set SMBDIRECT_SOCKET_DISCONNECTING, + * but never ever get RDMA_CM_EVENT_DISCONNECTED and + * never reach SMBDIRECT_SOCKET_DISCONNECTED. + * + * This is also a normal disconnect so + * SMBDIRECT_LOG_INFO should be good enough + * and avoids spamming the default logs. + */ + smbdirect_socket_schedule_cleanup_status(sc, + SMBDIRECT_LOG_INFO, + ret, + SMBDIRECT_SOCKET_DISCONNECTED); + if (sc->ib.qp) + ib_drain_qp(sc->ib.qp); + return 0; + + default: + break; + } + + /* + * This is an internal error, should be handled above via + * event->event != sc->rdma.expected_event already. + */ + WARN_ON_ONCE(sc->rdma.expected_event != RDMA_CM_EVENT_DISCONNECTED); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return 0; +} + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_rdma_established(struct smbdirect_socket *sc) +{ + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "rdma established: device: %.*s local: %pISpsfc remote: %pISpsfc\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + &sc->rdma.cm_id->route.addr.src_addr, + &sc->rdma.cm_id->route.addr.dst_addr); + + sc->rdma.cm_id->event_handler = smbdirect_connection_rdma_event_handler; + sc->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED; +} + static u32 smbdirect_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr) { diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index dec91a102622..97e6330249cc 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -111,6 +111,12 @@ struct smbdirect_socket { /* RDMA related */ struct { struct rdma_cm_id *cm_id; + /* + * The expected event in our current + * cm_id->event_handler, all other events + * are treated as an error. + */ + enum rdma_cm_event_type expected_event; /* * This is for iWarp MPA v1 */ @@ -507,6 +513,8 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work); disable_work_sync(&sc->disconnect_work); + sc->rdma.expected_event = RDMA_CM_EVENT_INTERNAL; + sc->ib.poll_ctx = IB_POLL_UNBOUND_WORKQUEUE; spin_lock_init(&sc->connect.lock); From 20c55c6910cc305854e7d545d85493d0d383b081 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 13 Oct 2025 18:34:08 +0200 Subject: [PATCH 037/145] smb: smbdirect: introduce smbdirect_connection_recvmsg() This is basically a copy of smbd_recv() in the client. And it's very similar to smb_direct_read() in the server. It will replace both in the following commits. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 33fa460849b7..0eebc7f336f5 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -1078,6 +1078,157 @@ static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) } } +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, + struct msghdr *msg, + unsigned int flags) +{ + struct smbdirect_recv_io *response; + struct smbdirect_data_transfer *data_transfer; + size_t size = iov_iter_count(&msg->msg_iter); + int to_copy, to_read, data_read, offset; + u32 data_length, remaining_data_length, data_offset; + int ret; + + if (WARN_ONCE(flags, "unexpected flags=0x%x\n", flags)) + return -EINVAL; /* no flags support for now */ + + if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) != ITER_DEST)) + return -EINVAL; /* It's a bug in upper layer to get there */ + +again: + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, + "status=%s first_error=%1pe => %1pe\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), + SMBDIRECT_DEBUG_ERR_PTR(-ENOTCONN)); + return -ENOTCONN; + } + + /* + * No need to hold the reassembly queue lock all the time as we are + * the only one reading from the front of the queue. The transport + * may add more entries to the back of the queue at the same time + */ + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, + "size=%zd sc->recv_io.reassembly.data_length=%d\n", + size, sc->recv_io.reassembly.data_length); + if (sc->recv_io.reassembly.data_length >= size) { + int queue_length; + int queue_removed = 0; + unsigned long flags; + + /* + * Need to make sure reassembly_data_length is read before + * reading reassembly_queue_length and calling + * smbdirect_connection_reassembly_first_recv_io. This call is lock free + * as we never read at the end of the queue which are being + * updated in SOFTIRQ as more data is received + */ + virt_rmb(); + queue_length = sc->recv_io.reassembly.queue_length; + data_read = 0; + to_read = size; + offset = sc->recv_io.reassembly.first_entry_offset; + while (data_read < size) { + response = smbdirect_connection_reassembly_first_recv_io(sc); + data_transfer = (void *)response->packet; + data_length = le32_to_cpu(data_transfer->data_length); + remaining_data_length = + le32_to_cpu( + data_transfer->remaining_data_length); + data_offset = le32_to_cpu(data_transfer->data_offset); + + /* + * The upper layer expects RFC1002 length at the + * beginning of the payload. Return it to indicate + * the total length of the packet. This minimize the + * change to upper layer packet processing logic. This + * will be eventually remove when an intermediate + * transport layer is added + */ + if (response->first_segment && size == 4) { + unsigned int rfc1002_len = + data_length + remaining_data_length; + __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); + + if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr), + &msg->msg_iter) != sizeof(rfc1002_hdr)) + return -EFAULT; + data_read = 4; + response->first_segment = false; + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, + "returning rfc1002 length %d\n", + rfc1002_len); + goto read_rfc1002_done; + } + + to_copy = min_t(int, data_length - offset, to_read); + if (copy_to_iter((u8 *)data_transfer + data_offset + offset, + to_copy, &msg->msg_iter) != to_copy) + return -EFAULT; + + /* move on to the next buffer? */ + if (to_copy == data_length - offset) { + queue_length--; + /* + * No need to lock if we are not at the + * end of the queue + */ + if (queue_length) + list_del(&response->list); + else { + spin_lock_irqsave( + &sc->recv_io.reassembly.lock, flags); + list_del(&response->list); + spin_unlock_irqrestore( + &sc->recv_io.reassembly.lock, flags); + } + queue_removed++; + sc->statistics.dequeue_reassembly_queue++; + smbdirect_connection_put_recv_io(response); + offset = 0; + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, + "smbdirect_connection_put_recv_io offset=0\n"); + } else + offset += to_copy; + + to_read -= to_copy; + data_read += to_copy; + + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, + "memcpy %d bytes len-ofs=%u => todo=%u done=%u ofs=%u\n", + to_copy, data_length - offset, + to_read, data_read, offset); + } + + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); + sc->recv_io.reassembly.data_length -= data_read; + sc->recv_io.reassembly.queue_length -= queue_removed; + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); + + sc->recv_io.reassembly.first_entry_offset = offset; + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, + "returning data_read=%d reassembly_length=%d first_ofs=%u\n", + data_read, sc->recv_io.reassembly.data_length, + sc->recv_io.reassembly.first_entry_offset); +read_rfc1002_done: + return data_read; + } + + smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, + "wait_event on more data\n"); + ret = wait_event_interruptible(sc->recv_io.reassembly.wait_queue, + sc->recv_io.reassembly.data_length >= size || + sc->status != SMBDIRECT_SOCKET_CONNECTED); + /* Don't return any data if interrupted */ + if (ret) + return ret; + + goto again; +} + static bool smbdirect_map_sges_single_page(struct smbdirect_map_sges *state, struct page *page, size_t off, size_t len) { From 3514195010828078173dd0608ba04340b718892f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 15:15:05 +0200 Subject: [PATCH 038/145] smb: smbdirect: introduce smbdirect_connection_grant_recv_credits() This is basically a copy of manage_credits_prior_sending() in the client and the server. It will replace both versions in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 0eebc7f336f5..40d830c58f19 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -688,6 +688,44 @@ static void smbdirect_connection_idle_timer_work(struct work_struct *work) queue_work(sc->workqueue, &sc->idle.immediate_work); } +__maybe_unused /* this is temporary while this file is included in others */ +static u16 smbdirect_connection_grant_recv_credits(struct smbdirect_socket *sc) +{ + int missing; + int available; + int new_credits; + + if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) + return 0; + + missing = (int)sc->recv_io.credits.target - atomic_read(&sc->recv_io.credits.count); + available = atomic_xchg(&sc->recv_io.credits.available, 0); + new_credits = min3((int)U16_MAX, missing, available); + if (new_credits <= 0) { + /* + * If credits are available, but not granted + * we need to re-add them again. + */ + if (available) + atomic_add(available, &sc->recv_io.credits.available); + return 0; + } + + if (new_credits < available) { + /* + * Readd the remaining available again. + */ + available -= new_credits; + atomic_add(available, &sc->recv_io.credits.available); + } + + /* + * Remember we granted the credits + */ + atomic_add(new_credits, &sc->recv_io.credits.count); + return new_credits; +} + __maybe_unused /* this is temporary while this file is included in others */ static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc) { From 4908d19640f4e7834acf26a7de2b78b1c1880829 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 15:49:50 +0200 Subject: [PATCH 039/145] smb: smbdirect: introduce smbdirect_connection_request_keep_alive() This a copy of manage_keep_alive_before_sending() in client and server, it will replace these in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 40d830c58f19..45a7ff9b7354 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -726,6 +726,25 @@ static u16 smbdirect_connection_grant_recv_credits(struct smbdirect_socket *sc) return new_credits; } +__maybe_unused /* this is temporary while this file is included in others */ +static bool smbdirect_connection_request_keep_alive(struct smbdirect_socket *sc) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + + if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; + /* + * Now use the keepalive timeout (instead of keepalive interval) + * in order to wait for a response + */ + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_timeout_msec)); + return true; + } + + return false; +} + __maybe_unused /* this is temporary while this file is included in others */ static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc) { From 54abc694ebb8fae2bf1c23fa0a5b1652f4d70bf6 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 21:54:09 +0200 Subject: [PATCH 040/145] smb: smbdirect: introduce smbdirect_connection_send_iter() and related functions This is basically a copy of the following functions in the server: smb_direct_post_send => smbdirect_connection_post_send_wr smb_direct_send_ctx_init => smbdirect_connection_send_batch_init smb_direct_flush_send_list => smbdirect_connection_send_batch_flush wait_for_send_lcredit => smbdirect_connection_wait_for_send_lcredit wait_for_send_credits => smbdirect_connection_wait_for_send_credits post_sendmsg => smbdirect_connection_post_send_io smb_direct_post_send_data => smbdirect_connection_send_single_iter smb_direct_send_iter => smbdirect_connection_send_iter They will replace the server functions soon and will also be used in the client as smbdirect_connection_send_single_iter() is very similar to smbd_post_send_iter(). There's also a smbdirect_connection_send_wait_zero_pending() helper that can be used together with smbdirect_connection_send_single_iter() in the client until it can use smbdirect_connection_send_iter(), which can happen with David's refactoring. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: David Howells Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 496 +++++++++++++++++- 1 file changed, 495 insertions(+), 1 deletion(-) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 45a7ff9b7354..bebee5bf072c 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -745,7 +745,494 @@ static bool smbdirect_connection_request_keep_alive(struct smbdirect_socket *sc) return false; } +static int smbdirect_connection_post_send_wr(struct smbdirect_socket *sc, + struct ib_send_wr *wr) +{ + int ret; + + if (unlikely(sc->first_error)) + return sc->first_error; + + atomic_inc(&sc->send_io.pending.count); + ret = ib_post_send(sc->ib.qp, wr, NULL); + if (ret) { + atomic_dec(&sc->send_io.pending.count); + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, + "ib_post_send() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + smbdirect_socket_schedule_cleanup(sc, ret); + } + + return ret; +} + +static void smbdirect_connection_send_batch_init(struct smbdirect_send_batch *batch, + bool need_invalidate_rkey, + unsigned int remote_key) +{ + INIT_LIST_HEAD(&batch->msg_list); + batch->wr_cnt = 0; + batch->need_invalidate_rkey = need_invalidate_rkey; + batch->remote_key = remote_key; + batch->credit = 0; +} + +static int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch, + bool is_last) +{ + struct smbdirect_send_io *first, *last; + int ret = 0; + + if (list_empty(&batch->msg_list)) + goto release_credit; + + first = list_first_entry(&batch->msg_list, + struct smbdirect_send_io, + sibling_list); + last = list_last_entry(&batch->msg_list, + struct smbdirect_send_io, + sibling_list); + + if (batch->need_invalidate_rkey) { + first->wr.opcode = IB_WR_SEND_WITH_INV; + first->wr.ex.invalidate_rkey = batch->remote_key; + batch->need_invalidate_rkey = false; + batch->remote_key = 0; + } + + last->wr.send_flags = IB_SEND_SIGNALED; + last->wr.wr_cqe = &last->cqe; + + /* + * Remove last from send_ctx->msg_list + * and splice the rest of send_ctx->msg_list + * to last->sibling_list. + * + * send_ctx->msg_list is a valid empty list + * at the end. + */ + list_del_init(&last->sibling_list); + list_splice_tail_init(&batch->msg_list, &last->sibling_list); + batch->wr_cnt = 0; + + ret = smbdirect_connection_post_send_wr(sc, &first->wr); + if (ret) { + struct smbdirect_send_io *sibling, *next; + + list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { + list_del_init(&sibling->sibling_list); + smbdirect_connection_free_send_io(sibling); + } + smbdirect_connection_free_send_io(last); + } + +release_credit: + if (is_last && !ret && batch->credit) { + atomic_add(batch->credit, &sc->send_io.bcredits.count); + batch->credit = 0; + wake_up(&sc->send_io.bcredits.wait_queue); + } + + return ret; +} + +static int smbdirect_connection_wait_for_send_bcredit(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch) +{ + int ret; + + if (batch->credit) + return 0; + + ret = smbdirect_socket_wait_for_credits(sc, + SMBDIRECT_SOCKET_CONNECTED, + -ENOTCONN, + &sc->send_io.bcredits.wait_queue, + &sc->send_io.bcredits.count, + 1); + if (ret) + return ret; + + batch->credit = 1; + return 0; +} + +static int smbdirect_connection_wait_for_send_lcredit(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch) +{ + if (batch && atomic_read(&sc->send_io.lcredits.count) <= 1) { + int ret; + + ret = smbdirect_connection_send_batch_flush(sc, batch, false); + if (ret) + return ret; + } + + return smbdirect_socket_wait_for_credits(sc, + SMBDIRECT_SOCKET_CONNECTED, + -ENOTCONN, + &sc->send_io.lcredits.wait_queue, + &sc->send_io.lcredits.count, + 1); +} + +static int smbdirect_connection_wait_for_send_credits(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch) +{ + if (batch && (batch->wr_cnt >= 16 || atomic_read(&sc->send_io.credits.count) <= 1)) { + int ret; + + ret = smbdirect_connection_send_batch_flush(sc, batch, false); + if (ret) + return ret; + } + + return smbdirect_socket_wait_for_credits(sc, + SMBDIRECT_SOCKET_CONNECTED, + -ENOTCONN, + &sc->send_io.credits.wait_queue, + &sc->send_io.credits.count, + 1); +} + +static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc); + +static int smbdirect_connection_post_send_io(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch, + struct smbdirect_send_io *msg) +{ + int i; + + for (i = 0; i < msg->num_sge; i++) + ib_dma_sync_single_for_device(sc->ib.dev, + msg->sge[i].addr, msg->sge[i].length, + DMA_TO_DEVICE); + + msg->cqe.done = smbdirect_connection_send_io_done; + msg->wr.wr_cqe = &msg->cqe; + msg->wr.opcode = IB_WR_SEND; + msg->wr.sg_list = &msg->sge[0]; + msg->wr.num_sge = msg->num_sge; + msg->wr.next = NULL; + + if (batch) { + msg->wr.send_flags = 0; + if (!list_empty(&batch->msg_list)) { + struct smbdirect_send_io *last; + + last = list_last_entry(&batch->msg_list, + struct smbdirect_send_io, + sibling_list); + last->wr.next = &msg->wr; + } + list_add_tail(&msg->sibling_list, &batch->msg_list); + batch->wr_cnt++; + return 0; + } + + msg->wr.send_flags = IB_SEND_SIGNALED; + return smbdirect_connection_post_send_wr(sc, &msg->wr); +} + +static int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch, + struct iov_iter *iter, + unsigned int flags, + u32 remaining_data_length) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_send_batch _batch; + struct smbdirect_send_io *msg; + struct smbdirect_data_transfer *packet; + size_t header_length; + u16 new_credits = 0; + u32 data_length = 0; + int ret; + + if (WARN_ON_ONCE(flags)) + return -EINVAL; /* no flags support for now */ + + if (iter) { + if (WARN_ON_ONCE(iov_iter_rw(iter) != ITER_SOURCE)) + return -EINVAL; /* It's a bug in upper layer to get there */ + + header_length = sizeof(struct smbdirect_data_transfer); + if (WARN_ON_ONCE(remaining_data_length == 0 || + iov_iter_count(iter) > remaining_data_length)) + return -EINVAL; + } else { + /* If this is a packet without payload, don't send padding */ + header_length = offsetof(struct smbdirect_data_transfer, padding); + if (WARN_ON_ONCE(remaining_data_length)) + return -EINVAL; + } + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + smbdirect_log_write(sc, SMBDIRECT_LOG_ERR, + "status=%s first_error=%1pe => %1pe\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), + SMBDIRECT_DEBUG_ERR_PTR(-ENOTCONN)); + return -ENOTCONN; + } + + if (!batch) { + smbdirect_connection_send_batch_init(&_batch, false, 0); + batch = &_batch; + } + + ret = smbdirect_connection_wait_for_send_bcredit(sc, batch); + if (ret) + goto bcredit_failed; + + ret = smbdirect_connection_wait_for_send_lcredit(sc, batch); + if (ret) + goto lcredit_failed; + + ret = smbdirect_connection_wait_for_send_credits(sc, batch); + if (ret) + goto credit_failed; + + new_credits = smbdirect_connection_grant_recv_credits(sc); + if (new_credits == 0 && + atomic_read(&sc->send_io.credits.count) == 0 && + atomic_read(&sc->recv_io.credits.count) == 0) { + /* + * queue the refill work in order to + * get some new recv credits we can grant to + * the peer. + */ + queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); + + /* + * wait until either the refill work or the peer + * granted new credits + */ + ret = wait_event_interruptible(sc->send_io.credits.wait_queue, + atomic_read(&sc->send_io.credits.count) >= 1 || + atomic_read(&sc->recv_io.credits.available) >= 1 || + sc->status != SMBDIRECT_SOCKET_CONNECTED); + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) + ret = -ENOTCONN; + if (ret < 0) + goto credit_failed; + + new_credits = smbdirect_connection_grant_recv_credits(sc); + } + + msg = smbdirect_connection_alloc_send_io(sc); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto alloc_failed; + } + + /* Map the packet to DMA */ + msg->sge[0].addr = ib_dma_map_single(sc->ib.dev, + msg->packet, + header_length, + DMA_TO_DEVICE); + ret = ib_dma_mapping_error(sc->ib.dev, msg->sge[0].addr); + if (ret) + goto err; + + msg->sge[0].length = header_length; + msg->sge[0].lkey = sc->ib.pd->local_dma_lkey; + msg->num_sge = 1; + + if (iter) { + struct smbdirect_map_sges extract = { + .num_sge = msg->num_sge, + .max_sge = ARRAY_SIZE(msg->sge), + .sge = msg->sge, + .device = sc->ib.dev, + .local_dma_lkey = sc->ib.pd->local_dma_lkey, + .direction = DMA_TO_DEVICE, + }; + size_t payload_len = umin(iov_iter_count(iter), + sp->max_send_size - sizeof(*packet)); + + ret = smbdirect_map_sges_from_iter(iter, payload_len, &extract); + if (ret < 0) + goto err; + data_length = ret; + remaining_data_length -= data_length; + msg->num_sge = extract.num_sge; + } + + /* Fill in the packet header */ + packet = (struct smbdirect_data_transfer *)msg->packet; + packet->credits_requested = cpu_to_le16(sp->send_credit_target); + packet->credits_granted = cpu_to_le16(new_credits); + + packet->flags = 0; + if (smbdirect_connection_request_keep_alive(sc)) + packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); + + packet->reserved = 0; + if (!data_length) + packet->data_offset = 0; + else + packet->data_offset = cpu_to_le32(24); + packet->data_length = cpu_to_le32(data_length); + packet->remaining_data_length = cpu_to_le32(remaining_data_length); + packet->padding = 0; + + smbdirect_log_outgoing(sc, SMBDIRECT_LOG_INFO, + "DataOut: %s=%u, %s=%u, %s=0x%x, %s=%u, %s=%u, %s=%u\n", + "CreditsRequested", + le16_to_cpu(packet->credits_requested), + "CreditsGranted", + le16_to_cpu(packet->credits_granted), + "Flags", + le16_to_cpu(packet->flags), + "RemainingDataLength", + le32_to_cpu(packet->remaining_data_length), + "DataOffset", + le32_to_cpu(packet->data_offset), + "DataLength", + le32_to_cpu(packet->data_length)); + + ret = smbdirect_connection_post_send_io(sc, batch, msg); + if (ret) + goto err; + + /* + * From here msg is moved to send_ctx + * and we should not free it explicitly. + */ + + if (batch == &_batch) { + ret = smbdirect_connection_send_batch_flush(sc, batch, true); + if (ret) + goto flush_failed; + } + + return data_length; +err: + smbdirect_connection_free_send_io(msg); +flush_failed: +alloc_failed: + atomic_inc(&sc->send_io.credits.count); +credit_failed: + atomic_inc(&sc->send_io.lcredits.count); +lcredit_failed: + atomic_add(batch->credit, &sc->send_io.bcredits.count); + batch->credit = 0; +bcredit_failed: + return ret; +} + __maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc) +{ + /* + * As an optimization, we don't wait for individual I/O to finish + * before sending the next one. + * Send them all and wait for pending send count to get to 0 + * that means all the I/Os have been out and we are good to return + */ + + wait_event(sc->send_io.pending.zero_wait_queue, + atomic_read(&sc->send_io.pending.count) == 0 || + sc->status != SMBDIRECT_SOCKET_CONNECTED); + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + smbdirect_log_write(sc, SMBDIRECT_LOG_ERR, + "status=%s first_error=%1pe => %1pe\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), + SMBDIRECT_DEBUG_ERR_PTR(-ENOTCONN)); + return -ENOTCONN; + } + + return 0; +} + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_connection_send_iter(struct smbdirect_socket *sc, + struct iov_iter *iter, + unsigned int flags, + bool need_invalidate, + unsigned int remote_key) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_send_batch batch; + int total_count = iov_iter_count(iter); + int ret; + int error = 0; + __be32 hdr; + + if (WARN_ONCE(flags, "unexpected flags=0x%x\n", flags)) + return -EINVAL; /* no flags support for now */ + + if (WARN_ON_ONCE(iov_iter_rw(iter) != ITER_SOURCE)) + return -EINVAL; /* It's a bug in upper layer to get there */ + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + smbdirect_log_write(sc, SMBDIRECT_LOG_INFO, + "status=%s first_error=%1pe => %1pe\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), + SMBDIRECT_DEBUG_ERR_PTR(-ENOTCONN)); + return -ENOTCONN; + } + + /* + * For now we expect the iter to have the full + * message, including a 4 byte length header. + */ + if (iov_iter_count(iter) <= 4) + return -EINVAL; + if (!copy_from_iter_full(&hdr, sizeof(hdr), iter)) + return -EFAULT; + if (iov_iter_count(iter) != be32_to_cpu(hdr)) + return -EINVAL; + + /* + * The size must fit into the negotiated + * fragmented send size. + */ + if (iov_iter_count(iter) > sp->max_fragmented_send_size) + return -EMSGSIZE; + + smbdirect_log_write(sc, SMBDIRECT_LOG_INFO, + "Sending (RDMA): length=%zu\n", + iov_iter_count(iter)); + + smbdirect_connection_send_batch_init(&batch, need_invalidate, remote_key); + while (iov_iter_count(iter)) { + ret = smbdirect_connection_send_single_iter(sc, + &batch, + iter, + flags, + iov_iter_count(iter)); + if (unlikely(ret < 0)) { + error = ret; + break; + } + } + + ret = smbdirect_connection_send_batch_flush(sc, &batch, true); + if (unlikely(ret && !error)) + error = ret; + + /* + * As an optimization, we don't wait for individual I/O to finish + * before sending the next one. + * Send them all and wait for pending send count to get to 0 + * that means all the I/Os have been out and we are good to return + */ + + ret = smbdirect_connection_send_wait_zero_pending(sc); + if (unlikely(ret && !error)) + error = ret; + + if (unlikely(error)) + return error; + + return total_count; +} + static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc) { struct smbdirect_send_io *msg = @@ -1092,6 +1579,14 @@ static int smbdirect_connection_recv_io_refill(struct smbdirect_socket *sc) atomic_add(posted, &sc->recv_io.credits.available); + /* + * If the last send credit is waiting for credits + * it can grant we need to wake it up + */ + if (atomic_read(&sc->send_io.bcredits.count) == 0 && + atomic_read(&sc->send_io.credits.count) == 0) + wake_up(&sc->send_io.credits.wait_queue); + /* * If we posted at least one smbdirect_recv_io buffer, * we need to inform the peer about it and grant @@ -1489,7 +1984,6 @@ static ssize_t smbdirect_map_sges_from_folioq(struct iov_iter *iter, * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some * way. */ -__maybe_unused /* this is temporary while this file is included in others */ static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len, struct smbdirect_map_sges *state) { From db3092ed2f0bec27eb289755173134e46e3ae7a4 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 22:10:50 +0200 Subject: [PATCH 041/145] smb: smbdirect: introduce smbdirect_connection_send_immediate_work() This is a combination of smb_direct_send_immediate_work() in the server as well as send_immediate_empty_message() and smbd_post_send_empty() in the client. smbdirect_connection_send_immediate_work() replace all of them in client and server. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index bebee5bf072c..69b00192cb08 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -1302,6 +1302,28 @@ static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc wake_up(&sc->send_io.pending.dec_wait_queue); } +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_send_immediate_work(struct work_struct *work) +{ + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, idle.immediate_work); + int ret; + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) + return; + + smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, + "send an empty message\n"); + sc->statistics.send_empty++; + ret = smbdirect_connection_send_single_iter(sc, NULL, NULL, 0, 0); + if (ret < 0) { + smbdirect_log_write(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_send_single_iter ret=%1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + smbdirect_socket_schedule_cleanup(sc, ret); + } +} + __maybe_unused /* this is temporary while this file is included in others */ static int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg) { From 099db5093e4d894483163bd7e4b58c99319bc3f3 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 21 Sep 2025 21:47:52 +0200 Subject: [PATCH 042/145] smb: smbdirect: introduce smbdirect_connection_negotiation_done() This will be used by client and server in order to turn the connection into a usable state. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 48 ++++++++++++++++++- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 69b00192cb08..757e9eed4ffd 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -19,6 +19,9 @@ struct smbdirect_map_sges { static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len, struct smbdirect_map_sges *state); +static void smbdirect_connection_recv_io_refill_work(struct work_struct *work); +static void smbdirect_connection_send_immediate_work(struct work_struct *work); + __maybe_unused /* this is temporary while this file is included in others */ static void smbdirect_connection_qp_event_handler(struct ib_event *event, void *context) { @@ -154,6 +157,49 @@ static void smbdirect_connection_rdma_established(struct smbdirect_socket *sc) sc->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED; } +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_negotiation_done(struct smbdirect_socket *sc) +{ + if (unlikely(sc->first_error)) + return; + + if (sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) { + /* + * Something went wrong... + */ + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "status=%s first_error=%1pe local: %pISpsfc remote: %pISpsfc\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), + &sc->rdma.cm_id->route.addr.src_addr, + &sc->rdma.cm_id->route.addr.dst_addr); + return; + } + + /* + * We are done, so we can wake up the waiter. + */ + WARN_ONCE(sc->status == SMBDIRECT_SOCKET_CONNECTED, + "status=%s first_error=%1pe", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); + sc->status = SMBDIRECT_SOCKET_CONNECTED; + + /* + * We need to setup the refill and send immediate work + * in order to get a working connection. + */ + INIT_WORK(&sc->recv_io.posted.refill_work, smbdirect_connection_recv_io_refill_work); + INIT_WORK(&sc->idle.immediate_work, smbdirect_connection_send_immediate_work); + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "negotiated: local: %pISpsfc remote: %pISpsfc\n", + &sc->rdma.cm_id->route.addr.src_addr, + &sc->rdma.cm_id->route.addr.dst_addr); + + wake_up(&sc->status_wait); +} + static u32 smbdirect_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr) { @@ -1302,7 +1348,6 @@ static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc wake_up(&sc->send_io.pending.dec_wait_queue); } -__maybe_unused /* this is temporary while this file is included in others */ static void smbdirect_connection_send_immediate_work(struct work_struct *work) { struct smbdirect_socket *sc = @@ -1633,7 +1678,6 @@ static int smbdirect_connection_recv_io_refill(struct smbdirect_socket *sc) return posted; } -__maybe_unused /* this is temporary while this file is included in others */ static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) { struct smbdirect_socket *sc = From b183b7b9b51fc37cc8e1fbad3c0d84e0cb605266 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 21 Oct 2025 17:52:05 +0200 Subject: [PATCH 043/145] smb: smbdirect: introduce smbdirect_mr_io_fill_buffer_descriptor() This will be used by the client instead of dereferencing struct smbdirect_mr_io internals. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_mr.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_mr.c b/fs/smb/common/smbdirect/smbdirect_mr.c index 249719c916a8..d5bf8531e237 100644 --- a/fs/smb/common/smbdirect/smbdirect_mr.c +++ b/fs/smb/common/smbdirect/smbdirect_mr.c @@ -452,6 +452,24 @@ smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, mutex_unlock(&mr->mutex); return NULL; } + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, + struct smbdirect_buffer_descriptor_v1 *v1) +{ + mutex_lock(&mr->mutex); + if (mr->state == SMBDIRECT_MR_REGISTERED) { + v1->offset = cpu_to_le64(mr->mr->iova); + v1->token = cpu_to_le32(mr->mr->rkey); + v1->length = cpu_to_le32(mr->mr->length); + } else { + v1->offset = cpu_to_le64(U64_MAX); + v1->token = cpu_to_le32(U32_MAX); + v1->length = cpu_to_le32(U32_MAX); + } + mutex_unlock(&mr->mutex); +} + /* * Deregister a MR after I/O is done * This function may wait if remote invalidation is not used From a93b68d46e1450ca6d395be0ca002f8bdf04d9a6 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 21 Oct 2025 20:55:38 +0200 Subject: [PATCH 044/145] smb: smbdirect: introduce smbdirect_connection_legacy_debug_proc_show() This will be used by the client in order to keep the debug output in the current way without the need to access struct smbdirect_socket internals. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_all_c_files.c | 1 + fs/smb/common/smbdirect/smbdirect_debug.c | 88 +++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 fs/smb/common/smbdirect/smbdirect_debug.c diff --git a/fs/smb/common/smbdirect/smbdirect_all_c_files.c b/fs/smb/common/smbdirect/smbdirect_all_c_files.c index 963a1fc3b54b..51b2bcda5596 100644 --- a/fs/smb/common/smbdirect/smbdirect_all_c_files.c +++ b/fs/smb/common/smbdirect/smbdirect_all_c_files.c @@ -19,3 +19,4 @@ #include "smbdirect_connection.c" #include "smbdirect_mr.c" #include "smbdirect_rw.c" +#include "smbdirect_debug.c" diff --git a/fs/smb/common/smbdirect/smbdirect_debug.c b/fs/smb/common/smbdirect/smbdirect_debug.c new file mode 100644 index 000000000000..20b87d8aa6d1 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_debug.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2017, Microsoft Corporation. + * Copyright (c) 2025, Stefan Metzmacher + */ + +#include "smbdirect_internal.h" +#include + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, + unsigned int rdma_readwrite_threshold, + struct seq_file *m) +{ + const struct smbdirect_socket_parameters *sp; + + if (!sc) + return; + sp = &sc->parameters; + + seq_puts(m, "\n"); + seq_printf(m, "SMBDirect protocol version: 0x%x ", + SMBDIRECT_V1); + seq_printf(m, "transport status: %s (%u)", + smbdirect_socket_status_string(sc->status), + sc->status); + + seq_puts(m, "\n"); + seq_printf(m, "Conn receive_credit_max: %u ", + sp->recv_credit_max); + seq_printf(m, "send_credit_target: %u max_send_size: %u", + sp->send_credit_target, + sp->max_send_size); + + seq_puts(m, "\n"); + seq_printf(m, "Conn max_fragmented_recv_size: %u ", + sp->max_fragmented_recv_size); + seq_printf(m, "max_fragmented_send_size: %u max_receive_size:%u", + sp->max_fragmented_send_size, + sp->max_recv_size); + + seq_puts(m, "\n"); + seq_printf(m, "Conn keep_alive_interval: %u ", + sp->keepalive_interval_msec * 1000); + seq_printf(m, "max_readwrite_size: %u rdma_readwrite_threshold: %u", + sp->max_read_write_size, + rdma_readwrite_threshold); + + seq_puts(m, "\n"); + seq_printf(m, "Debug count_get_receive_buffer: %llu ", + sc->statistics.get_receive_buffer); + seq_printf(m, "count_put_receive_buffer: %llu count_send_empty: %llu", + sc->statistics.put_receive_buffer, + sc->statistics.send_empty); + + seq_puts(m, "\n"); + seq_printf(m, "Read Queue count_enqueue_reassembly_queue: %llu ", + sc->statistics.enqueue_reassembly_queue); + seq_printf(m, "count_dequeue_reassembly_queue: %llu ", + sc->statistics.dequeue_reassembly_queue); + seq_printf(m, "reassembly_data_length: %u ", + sc->recv_io.reassembly.data_length); + seq_printf(m, "reassembly_queue_length: %u", + sc->recv_io.reassembly.queue_length); + + seq_puts(m, "\n"); + seq_printf(m, "Current Credits send_credits: %u ", + atomic_read(&sc->send_io.credits.count)); + seq_printf(m, "receive_credits: %u receive_credit_target: %u", + atomic_read(&sc->recv_io.credits.count), + sc->recv_io.credits.target); + + seq_puts(m, "\n"); + seq_printf(m, "Pending send_pending: %u ", + atomic_read(&sc->send_io.pending.count)); + + seq_puts(m, "\n"); + seq_printf(m, "MR responder_resources: %u ", + sp->responder_resources); + seq_printf(m, "max_frmr_depth: %u mr_type: 0x%x", + sp->max_frmr_depth, + sc->mr_io.type); + + seq_puts(m, "\n"); + seq_printf(m, "MR mr_ready_count: %u mr_used_count: %u", + atomic_read(&sc->mr_io.ready.count), + atomic_read(&sc->mr_io.used.count)); +} From 09d617d3121e14309ad5e4287b0da3ec27d386a8 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 21 Oct 2025 17:32:07 +0200 Subject: [PATCH 045/145] smb: smbdirect: introduce smbdirect_connection_wait_for_connected() This will be used by client and server in order to wait for the connect/negotiation to finish in order to get a usable connection. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 757e9eed4ffd..7921c859a3ce 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -703,6 +703,79 @@ static void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socke peer_responder_resources); } +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + union { + struct sockaddr sa; + struct sockaddr_storage ss; + } src_addr, dst_addr; + const struct sockaddr *src = NULL; + const struct sockaddr *dst = NULL; + char _devname[IB_DEVICE_NAME_MAX] = { 0, }; + const char *devname = NULL; + int ret; + + if (sc->rdma.cm_id) { + src_addr.ss = sc->rdma.cm_id->route.addr.src_addr; + if (src_addr.sa.sa_family != AF_UNSPEC) + src = &src_addr.sa; + dst_addr.ss = sc->rdma.cm_id->route.addr.dst_addr; + if (dst_addr.sa.sa_family != AF_UNSPEC) + dst = &dst_addr.sa; + + if (sc->ib.dev) { + memcpy(_devname, sc->ib.dev->name, IB_DEVICE_NAME_MAX); + devname = _devname; + } + } + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "waiting for connection: device: %.*s local: %pISpsfc remote: %pISpsfc\n", + IB_DEVICE_NAME_MAX, devname, src, dst); + + ret = wait_event_interruptible_timeout(sc->status_wait, + sc->status == SMBDIRECT_SOCKET_CONNECTED || + sc->first_error, + msecs_to_jiffies(sp->negotiate_timeout_msec)); + if (sc->rdma.cm_id) { + /* + * Maybe src and dev are updated in the meantime. + */ + src_addr.ss = sc->rdma.cm_id->route.addr.src_addr; + if (src_addr.sa.sa_family != AF_UNSPEC) + src = &src_addr.sa; + dst_addr.ss = sc->rdma.cm_id->route.addr.dst_addr; + if (dst_addr.sa.sa_family != AF_UNSPEC) + dst = &dst_addr.sa; + + if (sc->ib.dev) { + memcpy(_devname, sc->ib.dev->name, IB_DEVICE_NAME_MAX); + devname = _devname; + } + } + if (ret == 0) + ret = -ETIMEDOUT; + if (ret < 0) + smbdirect_socket_schedule_cleanup(sc, ret); + if (sc->first_error) { + int lvl = SMBDIRECT_LOG_ERR; + + ret = sc->first_error; + if (ret == -ENODEV) + lvl = SMBDIRECT_LOG_INFO; + + smbdirect_log_rdma_event(sc, lvl, + "connection failed %1pe device: %.*s local: %pISpsfc remote: %pISpsfc\n", + SMBDIRECT_DEBUG_ERR_PTR(ret), + IB_DEVICE_NAME_MAX, devname, src, dst); + return ret; + } + + return 0; +} + static void smbdirect_connection_idle_timer_work(struct work_struct *work) { struct smbdirect_socket *sc = From dc24063813ea617394db153cf9203286770ed404 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 21 Oct 2025 19:42:17 +0200 Subject: [PATCH 046/145] smb: smbdirect: introduce smbdirect_connection_is_connected() This is a simple way to check is the connection is still ok without the need to know internals of struct smbdirect_socket. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_connection.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 7921c859a3ce..735d8dc3e9f5 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -703,6 +703,14 @@ static void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socke peer_responder_resources); } +__maybe_unused /* this is temporary while this file is included in others */ +static bool smbdirect_connection_is_connected(struct smbdirect_socket *sc) +{ + if (unlikely(!sc || sc->first_error || sc->status != SMBDIRECT_SOCKET_CONNECTED)) + return false; + return true; +} + __maybe_unused /* this is temporary while this file is included in others */ static int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) { From b3e78c651441eaf08e830e260d06fd8d33a8b7f9 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 21 Oct 2025 17:32:07 +0200 Subject: [PATCH 047/145] smb: smbdirect: introduce smbdirect_socket_shutdown() This can be used by client and server to trigger a disconnect of the connection, the idea of to be similar to kernel_sock_shutdown(), but for smbdirect there's no point in shutting down only one direction so there's no 'how' argument. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index 251d15219173..57b6e8e16183 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -385,6 +385,12 @@ static void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc) SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); } +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_socket_shutdown(struct smbdirect_socket *sc) +{ + smbdirect_socket_schedule_cleanup_lvl(sc, SMBDIRECT_LOG_INFO, -ESHUTDOWN); +} + __maybe_unused /* this is temporary while this file is included in others */ static int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, enum smbdirect_socket_status expected_status, From dce268ffcddc96f29707c1967c52b036ad92e43d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 21 Oct 2025 15:17:35 +0200 Subject: [PATCH 048/145] smb: smbdirect: introduce smbdirect_socket_init_{new,accepting}() and helpers These will be used in order to initialize struct smbdirect_socket with rdma.cm_id being valid from the start in order to hold a reference to the correct net namespace, this will allow us to implement async connecting and accepting logic in the next steps. This comes with some related helper functions in order to initialize the socket without the need to access internals of struct smbdirect_socket: smbdirect_socket_set_initial_parameters smbdirect_socket_get_current_parameters smbdirect_socket_set_kernel_settings smbdirect_socket_set_custom_workqueue Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.c | 158 ++++++++++++++++++++- 1 file changed, 156 insertions(+), 2 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index 57b6e8e16183..c964c5f8c724 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -24,6 +24,160 @@ static bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs) static void smbdirect_socket_cleanup_work(struct work_struct *work); +static int smbdirect_socket_rdma_event_handler(struct rdma_cm_id *id, + struct rdma_cm_event *event) +{ + struct smbdirect_socket *sc = id->context; + int ret = -ESTALE; + + /* + * This should be replaced before any real work + * starts! So it should never be called! + */ + + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + ret = -ENETDOWN; + if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) + ret = event->status; + pr_err("%s (first_error=%1pe, expected=%s) => event=%s status=%d => ret=%1pe\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), + rdma_event_msg(sc->rdma.expected_event), + rdma_event_msg(event->event), + event->status, + SMBDIRECT_DEBUG_ERR_PTR(ret)); + WARN_ONCE(1, "%s should not be called!\n", __func__); + sc->rdma.cm_id = NULL; + return -ESTALE; +} + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_socket_init_new(struct net *net, struct smbdirect_socket *sc) +{ + struct rdma_cm_id *id; + int ret; + + smbdirect_socket_init(sc); + + id = rdma_create_id(net, + smbdirect_socket_rdma_event_handler, + sc, + RDMA_PS_TCP, + IB_QPT_RC); + if (IS_ERR(id)) { + pr_err("%s: rdma_create_id() failed %1pe\n", __func__, id); + return PTR_ERR(id); + } + + ret = rdma_set_afonly(id, 1); + if (ret) { + rdma_destroy_id(id); + pr_err("%s: rdma_set_afonly() failed %1pe\n", + __func__, SMBDIRECT_DEBUG_ERR_PTR(ret)); + return ret; + } + + sc->rdma.cm_id = id; + + INIT_WORK(&sc->disconnect_work, smbdirect_socket_cleanup_work); + + return 0; +} + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_socket *sc) +{ + smbdirect_socket_init(sc); + + sc->rdma.cm_id = id; + sc->rdma.cm_id->context = sc; + sc->rdma.cm_id->event_handler = smbdirect_socket_rdma_event_handler; + + sc->ib.dev = sc->rdma.cm_id->device; + + INIT_WORK(&sc->disconnect_work, smbdirect_socket_cleanup_work); + + return 0; +} + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, + const struct smbdirect_socket_parameters *sp) +{ + /* + * This is only allowed before connect or accept + */ + WARN_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED, + "status=%s first_error=%1pe", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); + if (sc->status != SMBDIRECT_SOCKET_CREATED) + return -EINVAL; + + /* + * Make a copy of the callers parameters + * from here we only work on the copy + * + * TODO: do we want consistency checking? + */ + sc->parameters = *sp; + + return 0; +} + +__maybe_unused /* this is temporary while this file is included in others */ +static const struct smbdirect_socket_parameters * +smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc) +{ + return &sc->parameters; +} + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, + enum ib_poll_context poll_ctx, + gfp_t gfp_mask) +{ + /* + * This is only allowed before connect or accept + */ + WARN_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED, + "status=%s first_error=%1pe", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); + if (sc->status != SMBDIRECT_SOCKET_CREATED) + return -EINVAL; + + sc->ib.poll_ctx = poll_ctx; + + sc->send_io.mem.gfp_mask = gfp_mask; + sc->recv_io.mem.gfp_mask = gfp_mask; + sc->rw_io.mem.gfp_mask = gfp_mask; + + return 0; +} + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_socket_set_custom_workqueue(struct smbdirect_socket *sc, + struct workqueue_struct *workqueue) +{ + /* + * This is only allowed before connect or accept + */ + WARN_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED, + "status=%s first_error=%1pe", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); + if (sc->status != SMBDIRECT_SOCKET_CREATED) + return -EINVAL; + + /* + * Remember the callers workqueue + */ + sc->workqueue = workqueue; + + return 0; +} + __maybe_unused /* this is temporary while this file is included in others */ static void smbdirect_socket_prepare_create(struct smbdirect_socket *sc, const struct smbdirect_socket_parameters *sp, @@ -35,12 +189,12 @@ static void smbdirect_socket_prepare_create(struct smbdirect_socket *sc, * Make a copy of the callers parameters * from here we only work on the copy */ - sc->parameters = *sp; + smbdirect_socket_set_initial_parameters(sc, sp); /* * Remember the callers workqueue */ - sc->workqueue = workqueue; + smbdirect_socket_set_custom_workqueue(sc, workqueue); INIT_WORK(&sc->disconnect_work, smbdirect_socket_cleanup_work); From ede2b44b0e62378cb8585dda20a4edadbc621bb0 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 18 Dec 2025 20:54:25 +0100 Subject: [PATCH 049/145] smb: smbdirect: let smbdirect_socket_set_initial_parameters() call rdma_restrict_node_type() We allow the caller of smbdirect_socket_set_initial_parameters() to pass SMBDIRECT_FLAG_PORT_RANGE_ONLY_{IB,IW} in order to restrict the rdma devices for the listener or connection to either iWarp (RDMA_NODE_RNIC) or InfiniBand/RoCEv1/RoCEv2 (RDMA_NODE_IB_CA). Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect.h | 7 +++++++ fs/smb/common/smbdirect/smbdirect_socket.c | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect.h b/fs/smb/common/smbdirect/smbdirect.h index 821a34c4cc47..e70a21eb57f0 100644 --- a/fs/smb/common/smbdirect/smbdirect.h +++ b/fs/smb/common/smbdirect/smbdirect.h @@ -25,6 +25,9 @@ struct smbdirect_buffer_descriptor_v1 { * Some values are important for the upper layer. */ struct smbdirect_socket_parameters { + __u64 flags; +#define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB ((__u64)0x1) +#define SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW ((__u64)0x2) __u32 resolve_addr_timeout_msec; __u32 resolve_route_timeout_msec; __u32 rdma_connect_timeout_msec; @@ -43,4 +46,8 @@ struct smbdirect_socket_parameters { __u32 keepalive_timeout_msec; } __packed; +#define SMBDIRECT_FLAG_PORT_RANGE_MASK ( \ + SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB | \ + SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW) + #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index c964c5f8c724..8eb021cd7cee 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -114,6 +114,17 @@ static int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, if (sc->status != SMBDIRECT_SOCKET_CREATED) return -EINVAL; + if (sp->flags & ~SMBDIRECT_FLAG_PORT_RANGE_MASK) + return -EINVAL; + + if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB && + sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW) + return -EINVAL; + else if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB) + rdma_restrict_node_type(sc->rdma.cm_id, RDMA_NODE_IB_CA); + else if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW) + rdma_restrict_node_type(sc->rdma.cm_id, RDMA_NODE_RNIC); + /* * Make a copy of the callers parameters * from here we only work on the copy From d1f187656797fc7434388c1795e05f8abe370d46 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 1 Sep 2025 16:10:49 +0200 Subject: [PATCH 050/145] smb: smbdirect: introduce smbdirect_connect[_sync]() This implements a fully async connect logic over all rdma related operations: rdma_resolve_addr, rdma_resolve_route and rdma_connect_locked until we reach RDMA_CM_EVENT_ESTABLISHED, followed by the smbdirect negotiation request/response handling until we reach SMBDIRECT_SOCKET_CONNECTED. smbdirect_connect_sync() is just a useful wrapper around the async logic calling smbdirect_connect() followed by smbdirect_connection_wait_for_connected(), which only waits for SMBDIRECT_SOCKET_CONNECTED or an error. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_all_c_files.c | 1 + fs/smb/common/smbdirect/smbdirect_connect.c | 926 ++++++++++++++++++ 2 files changed, 927 insertions(+) create mode 100644 fs/smb/common/smbdirect/smbdirect_connect.c diff --git a/fs/smb/common/smbdirect/smbdirect_all_c_files.c b/fs/smb/common/smbdirect/smbdirect_all_c_files.c index 51b2bcda5596..5df7da692df3 100644 --- a/fs/smb/common/smbdirect/smbdirect_all_c_files.c +++ b/fs/smb/common/smbdirect/smbdirect_all_c_files.c @@ -20,3 +20,4 @@ #include "smbdirect_mr.c" #include "smbdirect_rw.c" #include "smbdirect_debug.c" +#include "smbdirect_connect.c" diff --git a/fs/smb/common/smbdirect/smbdirect_connect.c b/fs/smb/common/smbdirect/smbdirect_connect.c new file mode 100644 index 000000000000..c32daf788b41 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_connect.c @@ -0,0 +1,926 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2012,2016,2017,2025 Stefan Metzmacher + */ + +#include "smbdirect_internal.h" +#include "../../common/smb2status.h" + +static int smbdirect_connect_setup_connection(struct smbdirect_socket *sc); +static int smbdirect_connect_resolve_addr(struct smbdirect_socket *sc, + const struct sockaddr *src, + const struct sockaddr *dst); +static int smbdirect_connect_rdma_event_handler(struct rdma_cm_id *id, + struct rdma_cm_event *event); +static int smbdirect_connect_negotiate_start(struct smbdirect_socket *sc); +static void smbdirect_connect_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc); +static void smbdirect_connect_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc); + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_connect(struct smbdirect_socket *sc, + const struct sockaddr *dst) +{ + const struct sockaddr *src = NULL; + union { + struct sockaddr sa; + struct sockaddr_storage ss; + } src_addr = { + .sa = { + .sa_family = AF_UNSPEC, + }, + }; + int ret; + + if (sc->first_error) + return -ENOTCONN; + + if (sc->status != SMBDIRECT_SOCKET_CREATED) + return -EALREADY; + + if (WARN_ON_ONCE(!sc->rdma.cm_id)) + return -EINVAL; + + src_addr.ss = sc->rdma.cm_id->route.addr.src_addr; + if (src_addr.sa.sa_family != AF_UNSPEC) + src = &src_addr.sa; + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "connect: src: %pISpsfc dst: %pISpsfc\n", + src, dst); + + ret = smbdirect_connect_setup_connection(sc); + if (ret) + return ret; + + ret = smbdirect_connect_resolve_addr(sc, src, dst); + if (ret) + return ret; + + /* + * The rest happens async via smbdirect_connect_rdma_event_handler() + * the caller will decide to wait or not. + */ + return 0; +} + +static int smbdirect_connect_setup_connection(struct smbdirect_socket *sc) +{ + rdma_lock_handler(sc->rdma.cm_id); + sc->rdma.cm_id->event_handler = smbdirect_connect_rdma_event_handler; + rdma_unlock_handler(sc->rdma.cm_id); + + if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_CREATED)) + return -EINVAL; + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED; + + return 0; +} + +static int smbdirect_connect_resolve_addr(struct smbdirect_socket *sc, + const struct sockaddr *src, + const struct sockaddr *dst) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + struct sockaddr *src_addr = NULL; + struct sockaddr *dst_addr = NULL; + int ret; + + src_addr = (struct sockaddr *)src; + if (src_addr && src_addr->sa_family == AF_UNSPEC) + src_addr = NULL; + dst_addr = (struct sockaddr *)dst; + + if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED)) + return -EINVAL; + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING; + sc->rdma.expected_event = RDMA_CM_EVENT_ADDR_RESOLVED; + ret = rdma_resolve_addr(sc->rdma.cm_id, src_addr, dst_addr, + sp->resolve_addr_timeout_msec); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "rdma_resolve_addr() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + return ret; + } + + return 0; +} + +static int smbdirect_connect_resolve_route(struct smbdirect_socket *sc) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + int ret; + + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED)) + return sc->first_error; + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING; + sc->rdma.expected_event = RDMA_CM_EVENT_ROUTE_RESOLVED; + ret = rdma_resolve_route(sc->rdma.cm_id, sp->resolve_route_timeout_msec); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "rdma_resolve_route() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + return ret; + } + + return 0; +} + +static int smbdirect_connect_rdma_connect(struct smbdirect_socket *sc) +{ + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct rdma_conn_param conn_param; + __be32 ird_ord_hdr[2]; + int ret; + + sc->ib.dev = sc->rdma.cm_id->device; + + if (!smbdirect_frwr_is_supported(&sc->ib.dev->attrs)) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "Fast Registration Work Requests (FRWR) is not supported device %.*s\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name); + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", + sc->ib.dev->attrs.device_cap_flags, + sc->ib.dev->attrs.max_fast_reg_page_list_len); + return -EPROTONOSUPPORT; + } + + if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB && + !rdma_ib_or_roce(sc->ib.dev, sc->rdma.cm_id->port_num)) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "Not IB: device: %.*s IW:%u local: %pISpsfc remote: %pISpsfc\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + rdma_protocol_iwarp(sc->ib.dev, sc->rdma.cm_id->port_num), + &sc->rdma.cm_id->route.addr.src_addr, + &sc->rdma.cm_id->route.addr.dst_addr); + return -EPROTONOSUPPORT; + } + if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW && + !rdma_protocol_iwarp(sc->ib.dev, sc->rdma.cm_id->port_num)) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "Not IW: device: %.*s IB:%u local: %pISpsfc remote: %pISpsfc\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + rdma_ib_or_roce(sc->ib.dev, sc->rdma.cm_id->port_num), + &sc->rdma.cm_id->route.addr.src_addr, + &sc->rdma.cm_id->route.addr.dst_addr); + return -EPROTONOSUPPORT; + } + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "rdma connect: device: %.*s local: %pISpsfc remote: %pISpsfc\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + &sc->rdma.cm_id->route.addr.src_addr, + &sc->rdma.cm_id->route.addr.dst_addr); + + sp->max_frmr_depth = min_t(u32, sp->max_frmr_depth, + sc->ib.dev->attrs.max_fast_reg_page_list_len); + sc->mr_io.type = IB_MR_TYPE_MEM_REG; + if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) + sc->mr_io.type = IB_MR_TYPE_SG_GAPS; + + sp->responder_resources = min_t(u8, sp->responder_resources, + sc->ib.dev->attrs.max_qp_rd_atom); + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_INFO, + "responder_resources=%d\n", + sp->responder_resources); + + ret = smbdirect_connection_create_qp(sc); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_create_qp() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + return ret; + } + + memset(&conn_param, 0, sizeof(conn_param)); + conn_param.initiator_depth = sp->initiator_depth; + conn_param.responder_resources = sp->responder_resources; + + /* Need to send IRD/ORD in private data for iWARP */ + if (rdma_protocol_iwarp(sc->ib.dev, sc->rdma.cm_id->port_num)) { + ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); + ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); + conn_param.private_data = ird_ord_hdr; + conn_param.private_data_len = sizeof(ird_ord_hdr); + } else { + conn_param.private_data = NULL; + conn_param.private_data_len = 0; + } + + conn_param.retry_count = SMBDIRECT_RDMA_CM_RETRY; + conn_param.rnr_retry_count = SMBDIRECT_RDMA_CM_RNR_RETRY; + conn_param.flow_control = 0; + + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED)) + return sc->first_error; + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; + sc->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED; + ret = rdma_connect_locked(sc->rdma.cm_id, &conn_param); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "rdma_connect_locked() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + return ret; + } + + /* + * start with the rdma connect timeout and SMBDIRECT_KEEPALIVE_PENDING + * so that the timer will cause a disconnect. + */ + INIT_DELAYED_WORK(&sc->idle.timer_work, smbdirect_connection_idle_timer_work); + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->rdma_connect_timeout_msec)); + + return 0; +} + +static int smbdirect_connect_rdma_event_handler(struct rdma_cm_id *id, + struct rdma_cm_event *event) +{ + struct smbdirect_socket *sc = id->context; + u8 peer_initiator_depth; + u8 peer_responder_resources; + int ret; + + /* + * cma_cm_event_handler() has + * lockdep_assert_held(&id_priv->handler_mutex); + * + * Mutexes are not allowed in interrupts, + * and we rely on not being in an interrupt here, + * as we might sleep. + * + * We didn't timeout so we cancel our idle timer, + * it will be scheduled again if needed. + */ + WARN_ON_ONCE(in_interrupt()); + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; + cancel_delayed_work_sync(&sc->idle.timer_work); + + if (event->status || event->event != sc->rdma.expected_event) { + int lvl = SMBDIRECT_LOG_ERR; + + ret = -ECONNABORTED; + + if (event->event == RDMA_CM_EVENT_REJECTED) + ret = -ECONNREFUSED; + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + ret = -ENETDOWN; + if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) + ret = event->status; + + if (ret == -ENODEV) + lvl = SMBDIRECT_LOG_INFO; + + smbdirect_log_rdma_event(sc, lvl, + "%s (first_error=%1pe, expected=%s) => event=%s status=%d => ret=%1pe\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), + rdma_event_msg(sc->rdma.expected_event), + rdma_event_msg(event->event), + event->status, + SMBDIRECT_DEBUG_ERR_PTR(ret)); + + smbdirect_socket_schedule_cleanup_lvl(sc, + lvl, + ret); + return 0; + } + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "%s (first_error=%1pe) event=%s\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), + rdma_event_msg(event->event)); + + if (sc->first_error) + return 0; + + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING)) + return 0; + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED; + + ret = smbdirect_connect_resolve_route(sc); + if (ret) + smbdirect_socket_schedule_cleanup(sc, ret); + return 0; + + case RDMA_CM_EVENT_ROUTE_RESOLVED: + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING)) + return 0; + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; + + ret = smbdirect_connect_rdma_connect(sc); + if (ret) + smbdirect_socket_schedule_cleanup(sc, ret); + return 0; + + case RDMA_CM_EVENT_ESTABLISHED: + smbdirect_connection_rdma_established(sc); + + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)) + return 0; + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; + + /* + * Here we work around an inconsistency between + * iWarp and other devices (at least rxe and irdma using RoCEv2) + */ + if (rdma_protocol_iwarp(id->device, id->port_num)) { + /* + * iWarp devices report the peer's values + * with the perspective of the peer here. + * Tested with siw and irdma (in iwarp mode) + * We need to change to our perspective here, + * so we need to switch the values. + */ + peer_initiator_depth = event->param.conn.responder_resources; + peer_responder_resources = event->param.conn.initiator_depth; + } else { + /* + * Non iWarp devices report the peer's values + * already changed to our perspective here. + * Tested with rxe and irdma (in roce mode). + */ + peer_initiator_depth = event->param.conn.initiator_depth; + peer_responder_resources = event->param.conn.responder_resources; + } + smbdirect_connection_negotiate_rdma_resources(sc, + peer_initiator_depth, + peer_responder_resources, + &event->param.conn); + + ret = smbdirect_connect_negotiate_start(sc); + if (ret) + smbdirect_socket_schedule_cleanup(sc, ret); + return 0; + + default: + break; + } + + /* + * This is an internal error + */ + WARN_ON_ONCE(sc->rdma.expected_event != RDMA_CM_EVENT_ESTABLISHED); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return 0; +} + +static int smbdirect_connect_negotiate_start(struct smbdirect_socket *sc) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_recv_io *recv_io = NULL; + struct smbdirect_send_io *send_io = NULL; + struct smbdirect_negotiate_req *nreq = NULL; + int ret; + + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)) + return sc->first_error; + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; + + ret = smbdirect_connection_create_mem_pools(sc); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_create_mem_pools() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto create_mem_pools_failed; + } + + /* + * There is only a single batch credit + */ + atomic_set(&sc->send_io.bcredits.count, 1); + + /* + * Initialize the local credits to post + * IB_WR_SEND[_WITH_INV]. + */ + atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); + + recv_io = smbdirect_connection_get_recv_io(sc); + if (WARN_ON_ONCE(!recv_io)) { + ret = -EINVAL; + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_get_recv_io() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto get_recv_io_failed; + } + recv_io->cqe.done = smbdirect_connect_negotiate_recv_done; + + send_io = smbdirect_connection_alloc_send_io(sc); + if (IS_ERR(send_io)) { + ret = PTR_ERR(send_io); + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_alloc_send_io() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto alloc_send_io_failed; + } + send_io->cqe.done = smbdirect_connect_negotiate_send_done; + + nreq = (struct smbdirect_negotiate_req *)send_io->packet; + nreq->min_version = cpu_to_le16(SMBDIRECT_V1); + nreq->max_version = cpu_to_le16(SMBDIRECT_V1); + nreq->reserved = 0; + nreq->credits_requested = cpu_to_le16(sp->send_credit_target); + nreq->preferred_send_size = cpu_to_le32(sp->max_send_size); + nreq->max_receive_size = cpu_to_le32(sp->max_recv_size); + nreq->max_fragmented_size = cpu_to_le32(sp->max_fragmented_recv_size); + + smbdirect_log_negotiate(sc, SMBDIRECT_LOG_INFO, + "ReqOut: %s%x, %s%x, %s%u, %s%u, %s%u, %s%u\n", + "MinVersion=0x", + le16_to_cpu(nreq->min_version), + "MaxVersion=0x", + le16_to_cpu(nreq->max_version), + "CreditsRequested=", + le16_to_cpu(nreq->credits_requested), + "PreferredSendSize=", + le32_to_cpu(nreq->preferred_send_size), + "MaxRecvSize=", + le32_to_cpu(nreq->max_receive_size), + "MaxFragmentedSize=", + le32_to_cpu(nreq->max_fragmented_size)); + + send_io->sge[0].addr = ib_dma_map_single(sc->ib.dev, + nreq, + sizeof(*nreq), + DMA_TO_DEVICE); + ret = ib_dma_mapping_error(sc->ib.dev, send_io->sge[0].addr); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "ib_dma_mapping_error() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto dma_mapping_failed; + } + + send_io->sge[0].length = sizeof(*nreq); + send_io->sge[0].lkey = sc->ib.pd->local_dma_lkey; + send_io->num_sge = 1; + + ib_dma_sync_single_for_device(sc->ib.dev, + send_io->sge[0].addr, + send_io->sge[0].length, + DMA_TO_DEVICE); + + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_INFO, + "sge addr=0x%llx length=%u lkey=0x%x\n", + send_io->sge[0].addr, + send_io->sge[0].length, + send_io->sge[0].lkey); + + /* + * Now post the recv_io buffer in order to get + * the negotiate response + */ + sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; + ret = smbdirect_connection_post_recv_io(recv_io); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_post_recv_io() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto post_recv_io_failed; + } + + send_io->wr.next = NULL; + send_io->wr.wr_cqe = &send_io->cqe; + send_io->wr.sg_list = send_io->sge; + send_io->wr.num_sge = send_io->num_sge; + send_io->wr.opcode = IB_WR_SEND; + send_io->wr.send_flags = IB_SEND_SIGNALED; + + ret = smbdirect_connection_post_send_wr(sc, &send_io->wr); + if (ret) { + /* if we reach here, post send failed */ + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_post_send_wr() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto post_send_wr_failed; + } + + /* + * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING + * so that the timer will cause a disconnect. + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->negotiate_timeout_msec)); + + return 0; + +post_send_wr_failed: + /* + * ib_dma_unmap_single is called in + * smbdirect_connection_free_send_io() + */ + smbdirect_connection_free_send_io(send_io); + /* + * recv_io is given to the rdma layer, + * we should not put it even on error + * nor call smbdirect_connection_destroy_mem_pools() + * it will be cleaned up during disconnect. + */ + return ret; + +post_recv_io_failed: + /* + * ib_dma_unmap_single is called in + * smbdirect_connection_free_send_io() + */ +dma_mapping_failed: + smbdirect_connection_free_send_io(send_io); + +alloc_send_io_failed: + smbdirect_connection_put_recv_io(recv_io); + +get_recv_io_failed: + smbdirect_connection_destroy_mem_pools(sc); + +create_mem_pools_failed: + return ret; +} + +static void smbdirect_connect_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct smbdirect_send_io *send_io = + container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); + struct smbdirect_socket *sc = send_io->socket; + + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_INFO, + "smbdirect_send_io completed. status='%s (%d)', opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, wc->opcode); + + /* Note this frees wc->wr_cqe, but not wc */ + smbdirect_connection_free_send_io(send_io); + atomic_dec(&sc->send_io.pending.count); + + if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_SEND))) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, + "wc->status=%s (%d) wc->opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, wc->opcode); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } +} + +static void smbdirect_connect_negotiate_recv_work(struct work_struct *work); + +static void smbdirect_connect_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct smbdirect_recv_io *recv_io = + container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); + struct smbdirect_socket *sc = recv_io->socket; + unsigned long flags; + + if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_RECV))) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, + "wc->status=%s (%d) wc->opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, wc->opcode); + goto error; + } + + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_INFO, + "smbdirect_recv_io completed. status='%s (%d)', opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, wc->opcode); + + /* + * This is an internal error! + */ + if (WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_NEGOTIATE_REP)) + goto error; + + /* + * Don't reset timer to the keepalive interval in + * this will be done in smbdirect_accept_direct_negotiate_recv_work. + */ + + ib_dma_sync_single_for_cpu(sc->ib.dev, + recv_io->sge.addr, + recv_io->sge.length, + DMA_FROM_DEVICE); + + /* + * Only remember recv_io if it has enough bytes, + * this gives smbdirect_accept_negotiate_recv_work enough + * information in order to disconnect if it was not + * valid. + */ + sc->recv_io.reassembly.full_packet_received = true; + if (wc->byte_len >= sizeof(struct smbdirect_negotiate_resp)) + smbdirect_connection_reassembly_append_recv_io(sc, recv_io, 0); + else + smbdirect_connection_put_recv_io(recv_io); + + /* + * We continue via the workqueue as we may have + * complex work that might sleep. + * + * So we defer further processing of the negotiation + * to smbdirect_connect_negotiate_recv_work(). + */ + spin_lock_irqsave(&sc->connect.lock, flags); + if (!sc->first_error) { + INIT_WORK(&sc->connect.work, smbdirect_connect_negotiate_recv_work); + if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) + queue_work(sc->workqueue, &sc->connect.work); + } + spin_unlock_irqrestore(&sc->connect.lock, flags); + + return; + +error: + /* + * recv_io.posted.refill_work is still disabled, + * so smbdirect_connection_put_recv_io() won't + * start it. + */ + smbdirect_connection_put_recv_io(recv_io); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); +} + +static void smbdirect_connect_negotiate_recv_work(struct work_struct *work) +{ + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, connect.work); + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_recv_io *recv_io; + struct smbdirect_negotiate_resp *nrep; + unsigned long flags; + u16 negotiated_version; + u16 credits_requested; + u16 credits_granted; + u32 status; + u32 max_readwrite_size; + u32 preferred_send_size; + u32 max_receive_size; + u32 max_fragmented_size; + int posted; + int ret; + + if (sc->first_error) + return; + + /* + * make sure we won't start again... + */ + disable_work(work); + + /* + * Reset timer to the keepalive interval in + * order to trigger our next keepalive message. + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_interval_msec)); + + /* + * If smbdirect_connect_negotiate_recv_done() detected an + * invalid request we want to disconnect. + */ + recv_io = smbdirect_connection_reassembly_first_recv_io(sc); + if (!recv_io) { + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); + sc->recv_io.reassembly.queue_length--; + list_del(&recv_io->list); + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); + smbdirect_connection_put_recv_io(recv_io); + + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_NEGOTIATE_RUNNING)) + return; + + /* + * Note recv_io is already part of the free list, + * as we just called smbdirect_connection_put_recv_io(), + * but it won't be reused before we call + * smbdirect_connection_recv_io_refill() below. + */ + + nrep = (struct smbdirect_negotiate_resp *)recv_io->packet; + negotiated_version = le16_to_cpu(nrep->negotiated_version); + credits_requested = le16_to_cpu(nrep->credits_requested); + credits_granted = le16_to_cpu(nrep->credits_granted); + status = le32_to_cpu(nrep->status); + max_readwrite_size = le32_to_cpu(nrep->max_readwrite_size); + preferred_send_size = le32_to_cpu(nrep->preferred_send_size); + max_receive_size = le32_to_cpu(nrep->max_receive_size); + max_fragmented_size = le32_to_cpu(nrep->max_fragmented_size); + + smbdirect_log_negotiate(sc, SMBDIRECT_LOG_INFO, + "RepIn: %s%x, %s%x, %s%x, %s%u, %s%u, %s%x, %s%u, %s%u, %s%u, %s%u\n", + "MinVersion=0x", + le16_to_cpu(nrep->min_version), + "MaxVersion=0x", + le16_to_cpu(nrep->max_version), + "NegotiatedVersion=0x", + le16_to_cpu(nrep->negotiated_version), + "CreditsRequested=", + le16_to_cpu(nrep->credits_requested), + "CreditsGranted=", + le16_to_cpu(nrep->credits_granted), + "Status=0x", + le32_to_cpu(nrep->status), + "MaxReadWriteSize=", + le32_to_cpu(nrep->max_readwrite_size), + "PreferredSendSize=", + le32_to_cpu(nrep->preferred_send_size), + "MaxRecvSize=", + le32_to_cpu(nrep->max_receive_size), + "MaxFragmentedSize=", + le32_to_cpu(nrep->max_fragmented_size)); + + if (negotiated_version != SMBDIRECT_V1) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: negotiated_version=0x%x\n", + negotiated_version); + smbdirect_socket_schedule_cleanup(sc, -ECONNREFUSED); + return; + } + + if (status != le32_to_cpu(STATUS_SUCCESS)) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: status=0x%x != 0x0\n", + status); + smbdirect_socket_schedule_cleanup(sc, -ECONNREFUSED); + return; + } + + if (max_receive_size < SMBDIRECT_MIN_RECEIVE_SIZE) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: max_receive_size=%u < %u\n", + max_receive_size, + SMBDIRECT_MIN_RECEIVE_SIZE); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + if (max_fragmented_size < SMBDIRECT_MIN_FRAGMENTED_SIZE) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: max_fragmented_size=%u < %u\n", + max_fragmented_size, + SMBDIRECT_MIN_FRAGMENTED_SIZE); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + if (credits_granted == 0) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: credits_granted == 0\n"); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + if (credits_requested == 0) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: credits_requested == 0\n"); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + if (preferred_send_size > sp->max_recv_size) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: preferred_send_size=%u > max_recv_size=%u\n", + preferred_send_size, + sp->max_recv_size); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + /* + * We take the value from the peer, which is checked to be higher than 0, + * but we limit it to the max value we support in order to have + * the main logic simpler. + */ + sc->recv_io.credits.target = credits_requested; + sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, + sp->recv_credit_max); + + /* + * At least the value of SMBDIRECT_MIN_RECEIVE_SIZE is used. + */ + sp->max_recv_size = min_t(u32, sp->max_recv_size, preferred_send_size); + sp->max_recv_size = max_t(u32, sp->max_recv_size, SMBDIRECT_MIN_RECEIVE_SIZE); + + /* + * We already sent our sp->max_fragmented_recv_size + * to the peer, so we can't lower it here any more. + * + * TODO: but if the peer lowered sp->max_recv_size + * we will have to adjust our number of buffers. + * + * But for now we keep it as the cifs.ko code + * worked before. + */ + + /* + * Note nrep->max_receive_size was already checked against + * SMBDIRECT_MIN_RECEIVE_SIZE above. + */ + sp->max_send_size = min_t(u32, sp->max_send_size, max_receive_size); + + /* + * Make sure the resulting max_frmr_depth is at least 1, + * which means max_read_write_size needs to be at least PAGE_SIZE. + */ + sp->max_read_write_size = min_t(u32, sp->max_frmr_depth * PAGE_SIZE, + max_readwrite_size); + if (sp->max_read_write_size < PAGE_SIZE) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: max_readwrite_size=%u < PAGE_SIZE(%lu)\n", + max_readwrite_size, + PAGE_SIZE); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + sp->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; + + /* + * Note nrep->credits_granted was already checked against 0 above. + */ + atomic_set(&sc->send_io.credits.count, credits_granted); + + /* + * Note nrep->max_fragmented_size was already checked against + * SMBDIRECT_MIN_FRAGMENTED_SIZE above. + */ + sp->max_fragmented_send_size = max_fragmented_size; + + ret = smbdirect_connection_create_mr_list(sc); + if (ret) { + smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_create_mr_list() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + smbdirect_socket_schedule_cleanup(sc, ret); + return; + } + + /* + * Prepare for receiving data_transfer messages + */ + sc->recv_io.reassembly.full_packet_received = true; + sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; + list_for_each_entry(recv_io, &sc->recv_io.free.list, list) + recv_io->cqe.done = smbdirect_connection_recv_io_done; + recv_io = NULL; + + /* + * We should at least post 1 smbdirect_recv_io! + */ + posted = smbdirect_connection_recv_io_refill(sc); + if (posted < 1) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_recv_io_refill() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + /* + * smbdirect_connection_negotiation_done() + * will setup all required things and wake up + * the waiter. + */ + smbdirect_connection_negotiation_done(sc); +} + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_connect_sync(struct smbdirect_socket *sc, + const struct sockaddr *dst) +{ + int ret; + + ret = smbdirect_connect(sc, dst); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connect(%pISpsfc) failed %1pe\n", + dst, SMBDIRECT_DEBUG_ERR_PTR(ret)); + return ret; + } + + ret = smbdirect_connection_wait_for_connected(sc); + if (ret) { + int lvl = SMBDIRECT_LOG_ERR; + + if (ret == -ENODEV) + lvl = SMBDIRECT_LOG_INFO; + + smbdirect_log_rdma_event(sc, lvl, + "wait for smbdirect_connect(%pISpsfc) failed %1pe\n", + dst, SMBDIRECT_DEBUG_ERR_PTR(ret)); + return ret; + } + + return 0; +} From eb3ed1e9048cf7b2a38112f48e0f1b772bb7860d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sat, 20 Sep 2025 08:42:16 +0200 Subject: [PATCH 051/145] smb: smbdirect: introduce smbdirect_accept_connect_request() This will be used by the server to handle new connections. All rdma processing from rdma_accept() to RDMA_CM_EVENT_ESTABLISHED as well as the waiting for the smbdirect negotiation request and sending the negotiation response is done async until we reach SMBDIRECT_SOCKET_CONNECTED. Sync behaviour will be done by the server calling smbdirect_conection_wait_for_connected() in order to each SMBDIRECT_SOCKET_CONNECTED or an error. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_accept.c | 737 ++++++++++++++++++ .../common/smbdirect/smbdirect_all_c_files.c | 1 + 2 files changed, 738 insertions(+) create mode 100644 fs/smb/common/smbdirect/smbdirect_accept.c diff --git a/fs/smb/common/smbdirect/smbdirect_accept.c b/fs/smb/common/smbdirect/smbdirect_accept.c new file mode 100644 index 000000000000..768d5864cf67 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_accept.c @@ -0,0 +1,737 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2017, Microsoft Corporation. + * Copyright (C) 2018, LG Electronics. + * Copyright (c) 2025, Stefan Metzmacher + */ + +#include "smbdirect_internal.h" +#include "../../common/smb2status.h" + +static int smbdirect_accept_rdma_event_handler(struct rdma_cm_id *id, + struct rdma_cm_event *event); +static int smbdirect_accept_init_params(struct smbdirect_socket *sc); +static void smbdirect_accept_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc); +static void smbdirect_accept_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc); + +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_accept_connect_request(struct smbdirect_socket *sc, + const struct rdma_conn_param *param) +{ + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_recv_io *recv_io; + u8 peer_initiator_depth; + u8 peer_responder_resources; + struct rdma_conn_param conn_param; + __be32 ird_ord_hdr[2]; + int ret; + + if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_CREATED)) + return -EINVAL; + + /* + * First set what the we as server are able to support + */ + sp->initiator_depth = min_t(u8, sp->initiator_depth, + sc->ib.dev->attrs.max_qp_rd_atom); + + peer_initiator_depth = param->initiator_depth; + peer_responder_resources = param->responder_resources; + smbdirect_connection_negotiate_rdma_resources(sc, + peer_initiator_depth, + peer_responder_resources, + param); + + ret = smbdirect_accept_init_params(sc); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_accept_init_params() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto init_params_failed; + } + + ret = smbdirect_connection_create_qp(sc); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_create_qp() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto create_qp_failed; + } + + ret = smbdirect_connection_create_mem_pools(sc); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_create_mem_pools() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto create_mem_failed; + } + + recv_io = smbdirect_connection_get_recv_io(sc); + if (WARN_ON_ONCE(!recv_io)) { + ret = -EINVAL; + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_get_recv_io() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto get_recv_io_failed; + } + recv_io->cqe.done = smbdirect_accept_negotiate_recv_done; + + /* + * Now post the recv_io buffer in order to get + * the negotiate request + */ + sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REQ; + ret = smbdirect_connection_post_recv_io(recv_io); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_post_recv_io() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto post_recv_io_failed; + } + /* + * From here recv_io is known to the RDMA QP and needs ib_drain_qp and + * smbdirect_accept_negotiate_recv_done to cleanup... + */ + recv_io = NULL; + + /* already checked with SMBDIRECT_CHECK_STATUS_WARN above */ + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; + + /* + * We already negotiated sp->initiator_depth + * and sp->responder_resources above. + */ + memset(&conn_param, 0, sizeof(conn_param)); + conn_param.initiator_depth = sp->initiator_depth; + conn_param.responder_resources = sp->responder_resources; + + if (sc->rdma.legacy_iwarp) { + ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); + ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); + conn_param.private_data = ird_ord_hdr; + conn_param.private_data_len = sizeof(ird_ord_hdr); + } else { + conn_param.private_data = NULL; + conn_param.private_data_len = 0; + } + conn_param.retry_count = SMBDIRECT_RDMA_CM_RETRY; + conn_param.rnr_retry_count = SMBDIRECT_RDMA_CM_RNR_RETRY; + conn_param.flow_control = 0; + + /* explicitly set above */ + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED); + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; + sc->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED; + sc->rdma.cm_id->event_handler = smbdirect_accept_rdma_event_handler; + ret = rdma_accept(sc->rdma.cm_id, &conn_param); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "rdma_accept() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + goto rdma_accept_failed; + } + + /* + * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING + * so that the timer will cause a disconnect. + */ + INIT_DELAYED_WORK(&sc->idle.timer_work, smbdirect_connection_idle_timer_work); + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->negotiate_timeout_msec)); + + return 0; + +rdma_accept_failed: + /* + * smbdirect_connection_destroy_qp() calls ib_drain_qp(), + * so that smbdirect_accept_negotiate_recv_done() will + * call smbdirect_connection_put_recv_io() + */ +post_recv_io_failed: + if (recv_io) + smbdirect_connection_put_recv_io(recv_io); +get_recv_io_failed: + smbdirect_connection_destroy_mem_pools(sc); +create_mem_failed: + smbdirect_connection_destroy_qp(sc); +create_qp_failed: +init_params_failed: + return ret; +} + +static int smbdirect_accept_init_params(struct smbdirect_socket *sc) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + int max_send_sges; + unsigned int maxpages; + + /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, + * SMB2 response could be mapped. + */ + max_send_sges = DIV_ROUND_UP(sp->max_send_size, PAGE_SIZE) + 3; + if (max_send_sges > SMBDIRECT_SEND_IO_MAX_SGE) { + pr_err("max_send_size %d is too large\n", sp->max_send_size); + return -EINVAL; + } + + /* + * There is only a single batch credit + */ + atomic_set(&sc->send_io.bcredits.count, 1); + + /* + * Initialize the local credits to post + * IB_WR_SEND[_WITH_INV]. + */ + atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); + + if (sp->max_read_write_size) { + maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE); + sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev, + sc->rdma.cm_id->port_num, + maxpages); + sc->rw_io.credits.num_pages = DIV_ROUND_UP(maxpages, sc->rw_io.credits.max); + /* add one extra in order to handle unaligned pages */ + sc->rw_io.credits.max += 1; + } + + sc->recv_io.credits.target = 1; + + atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max); + + return 0; +} + +static void smbdirect_accept_negotiate_recv_work(struct work_struct *work); + +static void smbdirect_accept_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct smbdirect_recv_io *recv_io = + container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); + struct smbdirect_socket *sc = recv_io->socket; + unsigned long flags; + + if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_RECV))) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, + "wc->status=%s (%d) wc->opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, wc->opcode); + goto error; + } + + smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_INFO, + "smbdirect_recv_io completed. status='%s (%d)', opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, wc->opcode); + + /* + * This is an internal error! + */ + if (WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_NEGOTIATE_REQ)) + goto error; + + /* + * Don't reset timer to the keepalive interval in + * this will be done in smbdirect_accept_direct_negotiate_recv_work. + */ + + ib_dma_sync_single_for_cpu(sc->ib.dev, + recv_io->sge.addr, + recv_io->sge.length, + DMA_FROM_DEVICE); + + /* + * Only remember recv_io if it has enough bytes, + * this gives smbdirect_accept_negotiate_recv_work enough + * information in order to disconnect if it was not + * valid. + */ + sc->recv_io.reassembly.full_packet_received = true; + if (wc->byte_len >= sizeof(struct smbdirect_negotiate_req)) + smbdirect_connection_reassembly_append_recv_io(sc, recv_io, 0); + else + smbdirect_connection_put_recv_io(recv_io); + + /* + * Some drivers (at least mlx5_ib and irdma) might post a + * recv completion before RDMA_CM_EVENT_ESTABLISHED, + * we need to adjust our expectation in that case. + * + * So we defer further processing of the negotiation + * to smbdirect_accept_negotiate_recv_work(). + * + * If we are already in SMBDIRECT_SOCKET_NEGOTIATE_NEEDED + * we queue the work directly otherwise + * smbdirect_accept_rdma_event_handler() will do it, when + * RDMA_CM_EVENT_ESTABLISHED arrived. + */ + spin_lock_irqsave(&sc->connect.lock, flags); + if (!sc->first_error) { + INIT_WORK(&sc->connect.work, smbdirect_accept_negotiate_recv_work); + if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_NEEDED) + queue_work(sc->workqueue, &sc->connect.work); + } + spin_unlock_irqrestore(&sc->connect.lock, flags); + + return; + +error: + /* + * recv_io.posted.refill_work is still disabled, + * so smbdirect_connection_put_recv_io() won't + * start it. + */ + smbdirect_connection_put_recv_io(recv_io); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); +} + +static void smbdirect_accept_negotiate_recv_work(struct work_struct *work) +{ + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, connect.work); + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_recv_io *recv_io; + struct smbdirect_negotiate_req *nreq; + unsigned long flags; + u16 min_version; + u16 max_version; + u16 credits_requested; + u32 preferred_send_size; + u32 max_receive_size; + u32 max_fragmented_size; + struct smbdirect_send_io *send_io = NULL; + struct smbdirect_negotiate_resp *nrep; + u32 ntstatus; + int posted; + u16 new_credits; + int ret; + + if (sc->first_error) + return; + + /* + * make sure we won't start again... + */ + disable_work(work); + + /* + * Reset timer to the keepalive interval in + * order to trigger our next keepalive message. + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_interval_msec)); + + /* + * If smbdirect_accept_negotiate_recv_done() detected an + * invalid request we want to disconnect. + */ + recv_io = smbdirect_connection_reassembly_first_recv_io(sc); + if (!recv_io) { + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); + sc->recv_io.reassembly.queue_length--; + list_del(&recv_io->list); + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); + smbdirect_connection_put_recv_io(recv_io); + + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)) + return; + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; + + /* + * Note recv_io is already part of the free list, + * as we just called smbdirect_connection_put_recv_io(), + * but it won't be reused before we call + * smbdirect_connection_recv_io_refill() below. + */ + + nreq = (struct smbdirect_negotiate_req *)recv_io->packet; + min_version = le16_to_cpu(nreq->min_version); + max_version = le16_to_cpu(nreq->max_version); + credits_requested = le16_to_cpu(nreq->credits_requested); + preferred_send_size = le32_to_cpu(nreq->preferred_send_size); + max_receive_size = le32_to_cpu(nreq->max_receive_size); + max_fragmented_size = le32_to_cpu(nreq->max_fragmented_size); + + smbdirect_log_negotiate(sc, SMBDIRECT_LOG_INFO, + "ReqIn: %s%x, %s%x, %s%u, %s%u, %s%u, %s%u\n", + "MinVersion=0x", + le16_to_cpu(nreq->min_version), + "MaxVersion=0x", + le16_to_cpu(nreq->max_version), + "CreditsRequested=", + le16_to_cpu(nreq->credits_requested), + "PreferredSendSize=", + le32_to_cpu(nreq->preferred_send_size), + "MaxRecvSize=", + le32_to_cpu(nreq->max_receive_size), + "MaxFragmentedSize=", + le32_to_cpu(nreq->max_fragmented_size)); + + if (!(min_version <= SMBDIRECT_V1 && max_version >= SMBDIRECT_V1)) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: min_version=0x%x max_version=0x%x\n", + min_version, max_version); + ntstatus = le32_to_cpu(STATUS_NOT_SUPPORTED); + goto not_supported; + } + + if (credits_requested == 0) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: credits_requested == 0\n"); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + if (max_receive_size < SMBDIRECT_MIN_RECEIVE_SIZE) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: max_receive_size=%u < %u\n", + max_receive_size, + SMBDIRECT_MIN_RECEIVE_SIZE); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + if (max_fragmented_size < SMBDIRECT_MIN_FRAGMENTED_SIZE) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "invalid: max_fragmented_size=%u < %u\n", + max_fragmented_size, + SMBDIRECT_MIN_FRAGMENTED_SIZE); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + /* + * At least the value of SMBDIRECT_MIN_RECEIVE_SIZE is used. + */ + sp->max_recv_size = min_t(u32, sp->max_recv_size, preferred_send_size); + sp->max_recv_size = max_t(u32, sp->max_recv_size, SMBDIRECT_MIN_RECEIVE_SIZE); + + /* + * The maximum fragmented upper-layer payload receive size supported + * + * Assume max_payload_per_credit is + * smb_direct_receive_credit_max - 24 = 1340 + * + * The maximum number would be + * smb_direct_receive_credit_max * max_payload_per_credit + * + * 1340 * 255 = 341700 (0x536C4) + * + * The minimum value from the spec is 131072 (0x20000) + * + * For now we use the logic we used in ksmbd before: + * (1364 * 255) / 2 = 173910 (0x2A756) + * + * We need to adjust this here in case the peer + * lowered sp->max_recv_size. + * + * TODO: instead of adjusting max_fragmented_recv_size + * we should adjust the number of available buffers, + * but for now we keep the logic as it was used + * in ksmbd before. + */ + sp->max_fragmented_recv_size = (sp->recv_credit_max * sp->max_recv_size) / 2; + + /* + * We take the value from the peer, which is checked to be higher than 0, + * but we limit it to the max value we support in order to have + * the main logic simpler. + */ + sc->recv_io.credits.target = credits_requested; + sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, + sp->recv_credit_max); + + /* + * Note nreq->max_receive_size was already checked against + * SMBDIRECT_MIN_RECEIVE_SIZE above. + */ + sp->max_send_size = min_t(u32, sp->max_send_size, max_receive_size); + + /* + * Note nreq->max_fragmented_size was already checked against + * SMBDIRECT_MIN_FRAGMENTED_SIZE above. + */ + sp->max_fragmented_send_size = max_fragmented_size; + + /* + * Prepare for receiving data_transfer messages + */ + sc->recv_io.reassembly.full_packet_received = true; + sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; + list_for_each_entry(recv_io, &sc->recv_io.free.list, list) + recv_io->cqe.done = smbdirect_connection_recv_io_done; + recv_io = NULL; + + /* + * We should at least post 1 smbdirect_recv_io! + */ + posted = smbdirect_connection_recv_io_refill(sc); + if (posted < 1) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_recv_io_refill() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(posted)); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + /* + * The response will grant credits for all posted + * smbdirect_recv_io messages. + */ + new_credits = smbdirect_connection_grant_recv_credits(sc); + + ntstatus = le32_to_cpu(STATUS_SUCCESS); + +not_supported: + send_io = smbdirect_connection_alloc_send_io(sc); + if (IS_ERR(send_io)) { + ret = PTR_ERR(send_io); + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_alloc_send_io() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + smbdirect_socket_schedule_cleanup(sc, ret); + return; + } + send_io->cqe.done = smbdirect_accept_negotiate_send_done; + + nrep = (struct smbdirect_negotiate_resp *)send_io->packet; + nrep->min_version = cpu_to_le16(SMBDIRECT_V1); + nrep->max_version = cpu_to_le16(SMBDIRECT_V1); + if (ntstatus == 0) { + nrep->negotiated_version = cpu_to_le16(SMBDIRECT_V1); + nrep->reserved = 0; + nrep->credits_requested = cpu_to_le16(sp->send_credit_target); + nrep->credits_granted = cpu_to_le16(new_credits); + nrep->status = cpu_to_le32(ntstatus); + nrep->max_readwrite_size = cpu_to_le32(sp->max_read_write_size); + nrep->preferred_send_size = cpu_to_le32(sp->max_send_size); + nrep->max_receive_size = cpu_to_le32(sp->max_recv_size); + nrep->max_fragmented_size = cpu_to_le32(sp->max_fragmented_recv_size); + } else { + nrep->negotiated_version = 0; + nrep->reserved = 0; + nrep->credits_requested = 0; + nrep->credits_granted = 0; + nrep->status = cpu_to_le32(ntstatus); + nrep->max_readwrite_size = 0; + nrep->preferred_send_size = 0; + nrep->max_receive_size = 0; + nrep->max_fragmented_size = 0; + } + + smbdirect_log_negotiate(sc, SMBDIRECT_LOG_INFO, + "RepOut: %s%x, %s%x, %s%x, %s%u, %s%u, %s%x, %s%u, %s%u, %s%u, %s%u\n", + "MinVersion=0x", + le16_to_cpu(nrep->min_version), + "MaxVersion=0x", + le16_to_cpu(nrep->max_version), + "NegotiatedVersion=0x", + le16_to_cpu(nrep->negotiated_version), + "CreditsRequested=", + le16_to_cpu(nrep->credits_requested), + "CreditsGranted=", + le16_to_cpu(nrep->credits_granted), + "Status=0x", + le32_to_cpu(nrep->status), + "MaxReadWriteSize=", + le32_to_cpu(nrep->max_readwrite_size), + "PreferredSendSize=", + le32_to_cpu(nrep->preferred_send_size), + "MaxRecvSize=", + le32_to_cpu(nrep->max_receive_size), + "MaxFragmentedSize=", + le32_to_cpu(nrep->max_fragmented_size)); + + send_io->sge[0].addr = ib_dma_map_single(sc->ib.dev, + nrep, + sizeof(*nrep), + DMA_TO_DEVICE); + ret = ib_dma_mapping_error(sc->ib.dev, send_io->sge[0].addr); + if (ret) { + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "ib_dma_mapping_error() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + smbdirect_connection_free_send_io(send_io); + smbdirect_socket_schedule_cleanup(sc, ret); + return; + } + + send_io->sge[0].length = sizeof(*nrep); + send_io->sge[0].lkey = sc->ib.pd->local_dma_lkey; + send_io->num_sge = 1; + + ib_dma_sync_single_for_device(sc->ib.dev, + send_io->sge[0].addr, + send_io->sge[0].length, + DMA_TO_DEVICE); + + send_io->wr.next = NULL; + send_io->wr.wr_cqe = &send_io->cqe; + send_io->wr.sg_list = send_io->sge; + send_io->wr.num_sge = send_io->num_sge; + send_io->wr.opcode = IB_WR_SEND; + send_io->wr.send_flags = IB_SEND_SIGNALED; + + ret = smbdirect_connection_post_send_wr(sc, &send_io->wr); + if (ret) { + /* if we reach here, post send failed */ + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, + "smbdirect_connection_post_send_wr() failed %1pe\n", + SMBDIRECT_DEBUG_ERR_PTR(ret)); + /* + * Note smbdirect_connection_free_send_io() + * does ib_dma_unmap_page() + */ + smbdirect_connection_free_send_io(send_io); + smbdirect_socket_schedule_cleanup(sc, ret); + return; + } + + /* + * smbdirect_accept_negotiate_send_done + * will do all remaining work... + */ +} + +static void smbdirect_accept_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct smbdirect_send_io *send_io = + container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); + struct smbdirect_socket *sc = send_io->socket; + struct smbdirect_negotiate_resp *nrep; + u32 ntstatus; + + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_INFO, + "smbdirect_send_io completed. status='%s (%d)', opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, wc->opcode); + + nrep = (struct smbdirect_negotiate_resp *)send_io->packet; + ntstatus = le32_to_cpu(nrep->status); + + /* Note this frees wc->wr_cqe, but not wc */ + smbdirect_connection_free_send_io(send_io); + atomic_dec(&sc->send_io.pending.count); + + if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_SEND))) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, + "wc->status=%s (%d) wc->opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, wc->opcode); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return; + } + + /* + * If we send a smbdirect_negotiate_resp without NT_STATUS_OK (0) + * we need to disconnect now. + * + * Otherwise smbdirect_connection_negotiation_done() + * will setup all required things and wake up + * the waiter. + */ + if (ntstatus) + smbdirect_socket_schedule_cleanup(sc, -EOPNOTSUPP); + else + smbdirect_connection_negotiation_done(sc); +} + +static int smbdirect_accept_rdma_event_handler(struct rdma_cm_id *id, + struct rdma_cm_event *event) +{ + struct smbdirect_socket *sc = id->context; + unsigned long flags; + + /* + * cma_cm_event_handler() has + * lockdep_assert_held(&id_priv->handler_mutex); + * + * Mutexes are not allowed in interrupts, + * and we rely on not being in an interrupt here, + * as we might sleep. + * + * We didn't timeout so we cancel our idle timer, + * it will be scheduled again if needed. + */ + WARN_ON_ONCE(in_interrupt()); + + if (event->status || event->event != sc->rdma.expected_event) { + int ret = -ECONNABORTED; + + if (event->event == RDMA_CM_EVENT_REJECTED) + ret = -ECONNREFUSED; + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + ret = -ENETDOWN; + if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) + ret = event->status; + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, + "%s (first_error=%1pe, expected=%s) => event=%s status=%d => ret=%1pe\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), + rdma_event_msg(sc->rdma.expected_event), + rdma_event_msg(event->event), + event->status, + SMBDIRECT_DEBUG_ERR_PTR(ret)); + + smbdirect_socket_schedule_cleanup(sc, ret); + return 0; + } + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "%s (first_error=%1pe) event=%s\n", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), + rdma_event_msg(event->event)); + + if (sc->first_error) + return 0; + + switch (event->event) { + case RDMA_CM_EVENT_ESTABLISHED: + smbdirect_connection_rdma_established(sc); + + /* + * Some drivers (at least mlx5_ib and irdma) might post a + * recv completion before RDMA_CM_EVENT_ESTABLISHED, + * we need to adjust our expectation in that case. + * + * If smbdirect_accept_negotiate_recv_done was called first + * it initialized sc->connect.work only for us to + * start, so that we turned into + * SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, before + * smbdirect_accept_negotiate_recv_work() runs. + * + * If smbdirect_accept_negotiate_recv_done didn't happen + * yet. sc->connect.work is still be disabled and + * queue_work() is a no-op. + */ + if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)) + return 0; + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; + spin_lock_irqsave(&sc->connect.lock, flags); + if (!sc->first_error) + queue_work(sc->workqueue, &sc->connect.work); + spin_unlock_irqrestore(&sc->connect.lock, flags); + + /* + * wait for smbdirect_accept_negotiate_recv_done() + * to get the negotiate request. + */ + return 0; + + default: + break; + } + + /* + * This is an internal error + */ + WARN_ON_ONCE(sc->rdma.expected_event != RDMA_CM_EVENT_ESTABLISHED); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); + return 0; +} diff --git a/fs/smb/common/smbdirect/smbdirect_all_c_files.c b/fs/smb/common/smbdirect/smbdirect_all_c_files.c index 5df7da692df3..40e2ceb9a4a4 100644 --- a/fs/smb/common/smbdirect/smbdirect_all_c_files.c +++ b/fs/smb/common/smbdirect/smbdirect_all_c_files.c @@ -21,3 +21,4 @@ #include "smbdirect_rw.c" #include "smbdirect_debug.c" #include "smbdirect_connect.c" +#include "smbdirect_accept.c" From 20cd3cc4420bdb9f63644cd140e2682f634e651e Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 21 Oct 2025 16:48:12 +0200 Subject: [PATCH 052/145] smb: smbdirect: introduce smbdirect_socket_create_{kern,accepting}() and smbdirect_socket_release() This provides functions which also allocate and free struct smbdirect_socket. This allows callers to use the same flow as with sock_create_kern()/sock_release(). The end goal would be to use sock_create_kern()/sock_release(), but the first step will be to use smbdirect specific functions without any struct socket nor struct sock. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.c | 95 +++++++++++++++++++++- fs/smb/common/smbdirect/smbdirect_socket.h | 33 ++++++++ 2 files changed, 127 insertions(+), 1 deletion(-) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index 8eb021cd7cee..f70cd395812b 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -51,7 +51,6 @@ static int smbdirect_socket_rdma_event_handler(struct rdma_cm_id *id, return -ESTALE; } -__maybe_unused /* this is temporary while this file is included in others */ static int smbdirect_socket_init_new(struct net *net, struct smbdirect_socket *sc) { struct rdma_cm_id *id; @@ -85,6 +84,31 @@ static int smbdirect_socket_init_new(struct net *net, struct smbdirect_socket *s } __maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc) +{ + struct smbdirect_socket *sc; + int ret; + + ret = -ENOMEM; + sc = kzalloc_obj(*sc); + if (!sc) + goto alloc_failed; + + ret = smbdirect_socket_init_new(net, sc); + if (ret) + goto init_failed; + + kref_init(&sc->refs.destroy); + + *_sc = sc; + return 0; + +init_failed: + kfree(sc); +alloc_failed: + return ret; +} + static int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_socket *sc) { smbdirect_socket_init(sc); @@ -100,6 +124,32 @@ static int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdire return 0; } +__maybe_unused /* this is temporary while this file is included in others */ +static int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_socket **_sc) +{ + struct smbdirect_socket *sc; + int ret; + + ret = -ENOMEM; + sc = kzalloc_obj(*sc); + if (!sc) + goto alloc_failed; + + ret = smbdirect_socket_init_accepting(id, sc); + if (ret) + goto init_failed; + + kref_init(&sc->refs.destroy); + + *_sc = sc; + return 0; + +init_failed: + kfree(sc); +alloc_failed: + return ret; +} + __maybe_unused /* this is temporary while this file is included in others */ static int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, const struct smbdirect_socket_parameters *sp) @@ -556,6 +606,49 @@ static void smbdirect_socket_shutdown(struct smbdirect_socket *sc) smbdirect_socket_schedule_cleanup_lvl(sc, SMBDIRECT_LOG_INFO, -ESHUTDOWN); } +static void smbdirect_socket_release_disconnect(struct kref *kref) +{ + struct smbdirect_socket *sc = + container_of(kref, struct smbdirect_socket, refs.disconnect); + + /* + * For now do a sync disconnect/destroy + */ + smbdirect_socket_destroy_sync(sc); +} + +static void smbdirect_socket_release_destroy(struct kref *kref) +{ + struct smbdirect_socket *sc = + container_of(kref, struct smbdirect_socket, refs.destroy); + + /* + * Do a sync disconnect/destroy... + * hopefully a no-op, as it should be already + * in DESTROYED state, before we free the memory. + */ + smbdirect_socket_destroy_sync(sc); + kfree(sc); +} + +__maybe_unused /* this is temporary while this file is included in others */ +static void smbdirect_socket_release(struct smbdirect_socket *sc) +{ + /* + * We expect only 1 disconnect reference + * and if it is already 0, it's a use after free! + */ + WARN_ON_ONCE(kref_read(&sc->refs.disconnect) != 1); + WARN_ON(!kref_put(&sc->refs.disconnect, smbdirect_socket_release_disconnect)); + + /* + * This may not trigger smbdirect_socket_release_destroy(), + * if struct smbdirect_socket is embedded in another structure + * indicated by REFCOUNT_MAX. + */ + kref_put(&sc->refs.destroy, smbdirect_socket_release_destroy); +} + __maybe_unused /* this is temporary while this file is included in others */ static int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, enum smbdirect_socket_status expected_status, diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 97e6330249cc..5a6386e4a021 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -108,6 +108,36 @@ struct smbdirect_socket { struct work_struct disconnect_work; + /* + * The reference counts. + */ + struct { + /* + * This holds the references by the + * frontend, typically the smb layer. + * + * It is typically 1 and a disconnect + * will happen if it reaches 0. + */ + struct kref disconnect; + + /* + * This holds the reference by the + * backend, the code that manages + * the lifetime of the whole + * struct smbdirect_socket, + * if this reaches 0 it can will + * be freed. + * + * Can be REFCOUNT_MAX is part + * of another structure. + * + * This is equal or higher than + * the disconnect refcount. + */ + struct kref destroy; + } refs; + /* RDMA related */ struct { struct rdma_cm_id *cm_id; @@ -513,6 +543,9 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work); disable_work_sync(&sc->disconnect_work); + kref_init(&sc->refs.disconnect); + sc->refs.destroy = (struct kref) KREF_INIT(REFCOUNT_MAX); + sc->rdma.expected_event = RDMA_CM_EVENT_INTERNAL; sc->ib.poll_ctx = IB_POLL_UNBOUND_WORKQUEUE; From b1e6277bd1240c3a66aac537b1b43b4bfd2edcd8 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 31 Oct 2025 14:19:26 +0100 Subject: [PATCH 053/145] smb: smbdirect: let smbdirect_socket.h include all headers for used structures Currently they are implicitly included via client and server code, but this is needed when we move to an smbdirect.ko. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 5a6386e4a021..7bd55cbc4227 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -6,6 +6,13 @@ #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ #define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ +#include +#include +#include +#include +#include +#include +#include #include enum smbdirect_socket_status { From 5e4bf7fadd4a608cace7604b720483f051f8176f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 28 Oct 2025 11:11:16 +0100 Subject: [PATCH 054/145] smb: smbdirect: let smbdirect_internal.h define pr_fmt without SMBDIRECT_USE_INLINE_C_FILES When we move to smbdirect.ko we want log message prefixed with the module name. Note callers are still using smbdirect_socket_set_logging() in order to redirect the per connection logging to their own log functions. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_internal.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h index 9989fe96000d..63349ce3536c 100644 --- a/fs/smb/common/smbdirect/smbdirect_internal.h +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -6,6 +6,10 @@ #ifndef __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ #define __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ +#ifndef SMBDIRECT_USE_INLINE_C_FILES +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#endif /* ! SMBDIRECT_USE_INLINE_C_FILES */ + #include "smbdirect.h" #include "smbdirect_pdu.h" #include "smbdirect_socket.h" From b2261ceedd4a4831957732ebae5ad33bf5c7fc80 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 27 Oct 2025 16:57:39 +0100 Subject: [PATCH 055/145] smb: smbdirect: introduce smbdirect_public.h with prototypes smbdirect_public.h contains functions which will be still be eported when we move to an smbdirect.ko. For now this uses the SMBDIRECT_USE_INLINE_C_FILES code path and marks all function as '__maybe_unused static', but this will make further changes easier. Note this generates the following things from checkpatch.pl, so I passed --ignore=FILE_PATH_CHANGES,EXPORT_SYMBOL,COMPLEX_MACRO ERROR: Macros with complex values should be enclosed in parentheses #514: FILE: fs/smb/common/smbdirect/smbdirect_public.h:18: +#define __SMBDIRECT_PUBLIC__ __maybe_unused static WARNING: EXPORT_SYMBOL(foo); should immediately follow its function/variable #515: FILE: fs/smb/common/smbdirect/smbdirect_public.h:19: +#define __SMBDIRECT_EXPORT_SYMBOL__(__sym) WARNING: EXPORT_SYMBOL(foo); should immediately follow its function/variable #518: FILE: fs/smb/common/smbdirect/smbdirect_public.h:22: +#define __SMBDIRECT_EXPORT_SYMBOL__(__sym) EXPORT_SYMBOL_FOR_MODULES(__sym, "cifs,ksmbd") This is exactly what we want here, so we should ignore the checkpatch.pl problems. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_accept.c | 7 +- fs/smb/common/smbdirect/smbdirect_connect.c | 13 +- .../common/smbdirect/smbdirect_connection.c | 49 +++--- fs/smb/common/smbdirect/smbdirect_debug.c | 9 +- fs/smb/common/smbdirect/smbdirect_internal.h | 1 + fs/smb/common/smbdirect/smbdirect_mr.c | 17 +- fs/smb/common/smbdirect/smbdirect_public.h | 154 ++++++++++++++++++ fs/smb/common/smbdirect/smbdirect_rw.c | 13 +- fs/smb/common/smbdirect/smbdirect_socket.c | 82 ++++++---- fs/smb/common/smbdirect/smbdirect_socket.h | 14 -- 10 files changed, 262 insertions(+), 97 deletions(-) create mode 100644 fs/smb/common/smbdirect/smbdirect_public.h diff --git a/fs/smb/common/smbdirect/smbdirect_accept.c b/fs/smb/common/smbdirect/smbdirect_accept.c index 768d5864cf67..4baa2c1ad31e 100644 --- a/fs/smb/common/smbdirect/smbdirect_accept.c +++ b/fs/smb/common/smbdirect/smbdirect_accept.c @@ -14,9 +14,9 @@ static int smbdirect_accept_init_params(struct smbdirect_socket *sc); static void smbdirect_accept_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void smbdirect_accept_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc); -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_accept_connect_request(struct smbdirect_socket *sc, - const struct rdma_conn_param *param) +__SMBDIRECT_PUBLIC__ +int smbdirect_accept_connect_request(struct smbdirect_socket *sc, + const struct rdma_conn_param *param) { struct smbdirect_socket_parameters *sp = &sc->parameters; struct smbdirect_recv_io *recv_io; @@ -160,6 +160,7 @@ static int smbdirect_accept_connect_request(struct smbdirect_socket *sc, init_params_failed: return ret; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_accept_connect_request); static int smbdirect_accept_init_params(struct smbdirect_socket *sc) { diff --git a/fs/smb/common/smbdirect/smbdirect_connect.c b/fs/smb/common/smbdirect/smbdirect_connect.c index c32daf788b41..f411f6ee66b5 100644 --- a/fs/smb/common/smbdirect/smbdirect_connect.c +++ b/fs/smb/common/smbdirect/smbdirect_connect.c @@ -16,9 +16,8 @@ static int smbdirect_connect_negotiate_start(struct smbdirect_socket *sc); static void smbdirect_connect_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc); static void smbdirect_connect_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc); -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_connect(struct smbdirect_socket *sc, - const struct sockaddr *dst) +__SMBDIRECT_PUBLIC__ +int smbdirect_connect(struct smbdirect_socket *sc, const struct sockaddr *dst) { const struct sockaddr *src = NULL; union { @@ -62,6 +61,7 @@ static int smbdirect_connect(struct smbdirect_socket *sc, */ return 0; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connect); static int smbdirect_connect_setup_connection(struct smbdirect_socket *sc) { @@ -895,9 +895,9 @@ static void smbdirect_connect_negotiate_recv_work(struct work_struct *work) smbdirect_connection_negotiation_done(sc); } -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_connect_sync(struct smbdirect_socket *sc, - const struct sockaddr *dst) +__SMBDIRECT_PUBLIC__ +int smbdirect_connect_sync(struct smbdirect_socket *sc, + const struct sockaddr *dst) { int ret; @@ -924,3 +924,4 @@ static int smbdirect_connect_sync(struct smbdirect_socket *sc, return 0; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connect_sync); diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 735d8dc3e9f5..7f9fa2b615a2 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -703,16 +703,17 @@ static void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socke peer_responder_resources); } -__maybe_unused /* this is temporary while this file is included in others */ -static bool smbdirect_connection_is_connected(struct smbdirect_socket *sc) +__SMBDIRECT_PUBLIC__ +bool smbdirect_connection_is_connected(struct smbdirect_socket *sc) { if (unlikely(!sc || sc->first_error || sc->status != SMBDIRECT_SOCKET_CONNECTED)) return false; return true; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_is_connected); -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) { const struct smbdirect_socket_parameters *sp = &sc->parameters; union { @@ -783,6 +784,7 @@ static int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) return 0; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_wait_for_connected); static void smbdirect_connection_idle_timer_work(struct work_struct *work) { @@ -1062,11 +1064,12 @@ static int smbdirect_connection_post_send_io(struct smbdirect_socket *sc, return smbdirect_connection_post_send_wr(sc, &msg->wr); } -static int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, - struct smbdirect_send_batch *batch, - struct iov_iter *iter, - unsigned int flags, - u32 remaining_data_length) +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch, + struct iov_iter *iter, + unsigned int flags, + u32 remaining_data_length) { const struct smbdirect_socket_parameters *sp = &sc->parameters; struct smbdirect_send_batch _batch; @@ -1249,9 +1252,10 @@ static int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, bcredit_failed: return ret; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_single_iter); -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc) +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc) { /* * As an optimization, we don't wait for individual I/O to finish @@ -1274,13 +1278,14 @@ static int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket * return 0; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_wait_zero_pending); -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_connection_send_iter(struct smbdirect_socket *sc, - struct iov_iter *iter, - unsigned int flags, - bool need_invalidate, - unsigned int remote_key) +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_send_iter(struct smbdirect_socket *sc, + struct iov_iter *iter, + unsigned int flags, + bool need_invalidate, + unsigned int remote_key) { const struct smbdirect_socket_parameters *sp = &sc->parameters; struct smbdirect_send_batch batch; @@ -1359,6 +1364,7 @@ static int smbdirect_connection_send_iter(struct smbdirect_socket *sc, return total_count; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_iter); static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc) { @@ -1777,10 +1783,10 @@ static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) } } -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, - struct msghdr *msg, - unsigned int flags) +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, + struct msghdr *msg, + unsigned int flags) { struct smbdirect_recv_io *response; struct smbdirect_data_transfer *data_transfer; @@ -1927,6 +1933,7 @@ static int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, goto again; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_recvmsg); static bool smbdirect_map_sges_single_page(struct smbdirect_map_sges *state, struct page *page, size_t off, size_t len) diff --git a/fs/smb/common/smbdirect/smbdirect_debug.c b/fs/smb/common/smbdirect/smbdirect_debug.c index 20b87d8aa6d1..eac924164fd4 100644 --- a/fs/smb/common/smbdirect/smbdirect_debug.c +++ b/fs/smb/common/smbdirect/smbdirect_debug.c @@ -7,10 +7,10 @@ #include "smbdirect_internal.h" #include -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, - unsigned int rdma_readwrite_threshold, - struct seq_file *m) +__SMBDIRECT_PUBLIC__ +void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, + unsigned int rdma_readwrite_threshold, + struct seq_file *m) { const struct smbdirect_socket_parameters *sp; @@ -86,3 +86,4 @@ static void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket atomic_read(&sc->mr_io.ready.count), atomic_read(&sc->mr_io.used.count)); } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_legacy_debug_proc_show); diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h index 63349ce3536c..f8f76bfe5c3e 100644 --- a/fs/smb/common/smbdirect/smbdirect_internal.h +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -12,6 +12,7 @@ #include "smbdirect.h" #include "smbdirect_pdu.h" +#include "smbdirect_public.h" #include "smbdirect_socket.h" static void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, diff --git a/fs/smb/common/smbdirect/smbdirect_mr.c b/fs/smb/common/smbdirect/smbdirect_mr.c index d5bf8531e237..90879bd109c1 100644 --- a/fs/smb/common/smbdirect/smbdirect_mr.c +++ b/fs/smb/common/smbdirect/smbdirect_mr.c @@ -330,8 +330,8 @@ static int smbdirect_iter_to_sgt(struct iov_iter *iter, * need_invalidate: true if this MR needs to be locally invalidated after I/O * return value: the MR registered, NULL if failed. */ -__maybe_unused /* this is temporary while this file is included in others */ -static struct smbdirect_mr_io * +__SMBDIRECT_PUBLIC__ +struct smbdirect_mr_io * smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, struct iov_iter *iter, bool writing, @@ -452,10 +452,11 @@ smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, mutex_unlock(&mr->mutex); return NULL; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_register_mr_io); -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, - struct smbdirect_buffer_descriptor_v1 *v1) +__SMBDIRECT_PUBLIC__ +void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, + struct smbdirect_buffer_descriptor_v1 *v1) { mutex_lock(&mr->mutex); if (mr->state == SMBDIRECT_MR_REGISTERED) { @@ -469,6 +470,7 @@ static void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, } mutex_unlock(&mr->mutex); } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_mr_io_fill_buffer_descriptor); /* * Deregister a MR after I/O is done @@ -476,8 +478,8 @@ static void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, * and we have to locally invalidate the buffer to prevent data is being * modified by remote peer after upper layer consumes it */ -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr) +__SMBDIRECT_PUBLIC__ +void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr) { struct smbdirect_socket *sc = mr->socket; int ret = 0; @@ -559,3 +561,4 @@ static void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr) if (!kref_put(&mr->kref, smbdirect_mr_io_free_locked)) mutex_unlock(&mr->mutex); } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_deregister_mr_io); diff --git a/fs/smb/common/smbdirect/smbdirect_public.h b/fs/smb/common/smbdirect/smbdirect_public.h new file mode 100644 index 000000000000..a5b15fce840c --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_public.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2025, Stefan Metzmacher + */ + +#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PUBLIC_H__ +#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PUBLIC_H__ + +struct smbdirect_buffer_descriptor_v1; +struct smbdirect_socket_parameters; + +struct smbdirect_socket; +struct smbdirect_send_batch; +struct smbdirect_mr_io; + +#ifdef SMBDIRECT_USE_INLINE_C_FILES +/* this is temporary while this file is included in others */ +#define __SMBDIRECT_PUBLIC__ __maybe_unused static +#define __SMBDIRECT_EXPORT_SYMBOL__(__sym) +#else +#define __SMBDIRECT_PUBLIC__ +#define __SMBDIRECT_EXPORT_SYMBOL__(__sym) EXPORT_SYMBOL_FOR_MODULES(__sym, "cifs,ksmbd") +#endif + +#include + +__SMBDIRECT_PUBLIC__ +bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs); + +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc); + +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_socket **_sc); + +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, + const struct smbdirect_socket_parameters *sp); + +__SMBDIRECT_PUBLIC__ +const struct smbdirect_socket_parameters * +smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc); + +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, + enum ib_poll_context poll_ctx, + gfp_t gfp_mask); + +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_set_custom_workqueue(struct smbdirect_socket *sc, + struct workqueue_struct *workqueue); + +#define SMBDIRECT_LOG_ERR 0x0 +#define SMBDIRECT_LOG_INFO 0x1 + +#define SMBDIRECT_LOG_OUTGOING 0x1 +#define SMBDIRECT_LOG_INCOMING 0x2 +#define SMBDIRECT_LOG_READ 0x4 +#define SMBDIRECT_LOG_WRITE 0x8 +#define SMBDIRECT_LOG_RDMA_SEND 0x10 +#define SMBDIRECT_LOG_RDMA_RECV 0x20 +#define SMBDIRECT_LOG_KEEP_ALIVE 0x40 +#define SMBDIRECT_LOG_RDMA_EVENT 0x80 +#define SMBDIRECT_LOG_RDMA_MR 0x100 +#define SMBDIRECT_LOG_RDMA_RW 0x200 +#define SMBDIRECT_LOG_NEGOTIATE 0x400 +__SMBDIRECT_PUBLIC__ +void smbdirect_socket_set_logging(struct smbdirect_socket *sc, + void *private_ptr, + bool (*needed)(struct smbdirect_socket *sc, + void *private_ptr, + unsigned int lvl, + unsigned int cls), + void (*vaprintf)(struct smbdirect_socket *sc, + const char *func, + unsigned int line, + void *private_ptr, + unsigned int lvl, + unsigned int cls, + struct va_format *vaf)); + +__SMBDIRECT_PUBLIC__ +bool smbdirect_connection_is_connected(struct smbdirect_socket *sc); + +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc); + +__SMBDIRECT_PUBLIC__ +void smbdirect_socket_shutdown(struct smbdirect_socket *sc); + +__SMBDIRECT_PUBLIC__ +void smbdirect_socket_release(struct smbdirect_socket *sc); + +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch, + struct iov_iter *iter, + unsigned int flags, + u32 remaining_data_length); + +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc); + +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_send_iter(struct smbdirect_socket *sc, + struct iov_iter *iter, + unsigned int flags, + bool need_invalidate, + unsigned int remote_key); + +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, + struct msghdr *msg, + unsigned int flags); + +__SMBDIRECT_PUBLIC__ +int smbdirect_connect(struct smbdirect_socket *sc, + const struct sockaddr *dst); + +__SMBDIRECT_PUBLIC__ +int smbdirect_connect_sync(struct smbdirect_socket *sc, + const struct sockaddr *dst); + +__SMBDIRECT_PUBLIC__ +int smbdirect_accept_connect_request(struct smbdirect_socket *sc, + const struct rdma_conn_param *param); + +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, + void *buf, size_t buf_len, + struct smbdirect_buffer_descriptor_v1 *desc, + size_t desc_len, + bool is_read); + +__SMBDIRECT_PUBLIC__ +struct smbdirect_mr_io * +smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, + struct iov_iter *iter, + bool writing, + bool need_invalidate); + +__SMBDIRECT_PUBLIC__ +void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, + struct smbdirect_buffer_descriptor_v1 *v1); + +__SMBDIRECT_PUBLIC__ +void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr); + +__SMBDIRECT_PUBLIC__ +void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, + unsigned int rdma_readwrite_threshold, + struct seq_file *m); + +#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PUBLIC_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_rw.c b/fs/smb/common/smbdirect/smbdirect_rw.c index 6eeec535b130..bd90dffbf369 100644 --- a/fs/smb/common/smbdirect/smbdirect_rw.c +++ b/fs/smb/common/smbdirect/smbdirect_rw.c @@ -105,12 +105,12 @@ static void smbdirect_connection_rdma_write_done(struct ib_cq *cq, struct ib_wc smbdirect_connection_rdma_rw_done(cq, wc, DMA_TO_DEVICE); } -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, - void *buf, size_t buf_len, - struct smbdirect_buffer_descriptor_v1 *desc, - size_t desc_len, - bool is_read) +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, + void *buf, size_t buf_len, + struct smbdirect_buffer_descriptor_v1 *desc, + size_t desc_len, + bool is_read) { const struct smbdirect_socket_parameters *sp = &sc->parameters; enum dma_data_direction direction = is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; @@ -253,3 +253,4 @@ static int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, kfree(msg); goto out; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_rdma_xmit); diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index f70cd395812b..a54c16daffb9 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -6,8 +6,8 @@ #include "smbdirect_internal.h" -__maybe_unused /* this is temporary while this file is included in others */ -static bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs) +__SMBDIRECT_PUBLIC__ +bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs) { /* * Test if FRWR (Fast Registration Work Requests) is supported on the @@ -21,6 +21,7 @@ static bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs) return false; return true; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_frwr_is_supported); static void smbdirect_socket_cleanup_work(struct work_struct *work); @@ -83,8 +84,8 @@ static int smbdirect_socket_init_new(struct net *net, struct smbdirect_socket *s return 0; } -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc) +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc) { struct smbdirect_socket *sc; int ret; @@ -108,6 +109,7 @@ static int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket alloc_failed: return ret; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_create_kern); static int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_socket *sc) { @@ -124,8 +126,8 @@ static int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdire return 0; } -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_socket **_sc) +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_socket **_sc) { struct smbdirect_socket *sc; int ret; @@ -149,10 +151,11 @@ static int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdi alloc_failed: return ret; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_create_accepting); -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, - const struct smbdirect_socket_parameters *sp) +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, + const struct smbdirect_socket_parameters *sp) { /* * This is only allowed before connect or accept @@ -185,18 +188,20 @@ static int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, return 0; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_initial_parameters); -__maybe_unused /* this is temporary while this file is included in others */ -static const struct smbdirect_socket_parameters * +__SMBDIRECT_PUBLIC__ +const struct smbdirect_socket_parameters * smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc) { return &sc->parameters; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_get_current_parameters); -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, - enum ib_poll_context poll_ctx, - gfp_t gfp_mask) +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, + enum ib_poll_context poll_ctx, + gfp_t gfp_mask) { /* * This is only allowed before connect or accept @@ -216,10 +221,11 @@ static int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, return 0; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_kernel_settings); -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_socket_set_custom_workqueue(struct smbdirect_socket *sc, - struct workqueue_struct *workqueue) +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_set_custom_workqueue(struct smbdirect_socket *sc, + struct workqueue_struct *workqueue) { /* * This is only allowed before connect or accept @@ -238,6 +244,7 @@ static int smbdirect_socket_set_custom_workqueue(struct smbdirect_socket *sc, return 0; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_custom_workqueue); __maybe_unused /* this is temporary while this file is included in others */ static void smbdirect_socket_prepare_create(struct smbdirect_socket *sc, @@ -262,25 +269,26 @@ static void smbdirect_socket_prepare_create(struct smbdirect_socket *sc, INIT_DELAYED_WORK(&sc->idle.timer_work, smbdirect_connection_idle_timer_work); } -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_socket_set_logging(struct smbdirect_socket *sc, - void *private_ptr, - bool (*needed)(struct smbdirect_socket *sc, - void *private_ptr, - unsigned int lvl, - unsigned int cls), - void (*vaprintf)(struct smbdirect_socket *sc, - const char *func, - unsigned int line, - void *private_ptr, - unsigned int lvl, - unsigned int cls, - struct va_format *vaf)) +__SMBDIRECT_PUBLIC__ +void smbdirect_socket_set_logging(struct smbdirect_socket *sc, + void *private_ptr, + bool (*needed)(struct smbdirect_socket *sc, + void *private_ptr, + unsigned int lvl, + unsigned int cls), + void (*vaprintf)(struct smbdirect_socket *sc, + const char *func, + unsigned int line, + void *private_ptr, + unsigned int lvl, + unsigned int cls, + struct va_format *vaf)) { sc->logging.private_ptr = private_ptr; sc->logging.needed = needed; sc->logging.vaprintf = vaprintf; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_logging); static void smbdirect_socket_wake_up_all(struct smbdirect_socket *sc) { @@ -600,11 +608,12 @@ static void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc) SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); } -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_socket_shutdown(struct smbdirect_socket *sc) +__SMBDIRECT_PUBLIC__ +void smbdirect_socket_shutdown(struct smbdirect_socket *sc) { smbdirect_socket_schedule_cleanup_lvl(sc, SMBDIRECT_LOG_INFO, -ESHUTDOWN); } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_shutdown); static void smbdirect_socket_release_disconnect(struct kref *kref) { @@ -631,8 +640,8 @@ static void smbdirect_socket_release_destroy(struct kref *kref) kfree(sc); } -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_socket_release(struct smbdirect_socket *sc) +__SMBDIRECT_PUBLIC__ +void smbdirect_socket_release(struct smbdirect_socket *sc) { /* * We expect only 1 disconnect reference @@ -648,6 +657,7 @@ static void smbdirect_socket_release(struct smbdirect_socket *sc) */ kref_put(&sc->refs.destroy, smbdirect_socket_release_destroy); } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_release); __maybe_unused /* this is temporary while this file is included in others */ static int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 7bd55cbc4227..44d04cc63d04 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -406,20 +406,6 @@ struct smbdirect_socket { } statistics; struct { -#define SMBDIRECT_LOG_ERR 0x0 -#define SMBDIRECT_LOG_INFO 0x1 - -#define SMBDIRECT_LOG_OUTGOING 0x1 -#define SMBDIRECT_LOG_INCOMING 0x2 -#define SMBDIRECT_LOG_READ 0x4 -#define SMBDIRECT_LOG_WRITE 0x8 -#define SMBDIRECT_LOG_RDMA_SEND 0x10 -#define SMBDIRECT_LOG_RDMA_RECV 0x20 -#define SMBDIRECT_LOG_KEEP_ALIVE 0x40 -#define SMBDIRECT_LOG_RDMA_EVENT 0x80 -#define SMBDIRECT_LOG_RDMA_MR 0x100 -#define SMBDIRECT_LOG_RDMA_RW 0x200 -#define SMBDIRECT_LOG_NEGOTIATE 0x400 void *private_ptr; bool (*needed)(struct smbdirect_socket *sc, void *private_ptr, From 89df0942907894a92dbece12bfa35e1647959b0c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 27 Oct 2025 20:57:51 +0100 Subject: [PATCH 056/145] smb: smbdirect: provide explicit prototypes for cross .c file functions These prototypes are used between private .c files, when they will be compiled alone into smbdirect.ko. For now this uses the SMBDIRECT_USE_INLINE_C_FILES code path and marks all function as '__maybe_unused static', but this will make further changes easier. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 83 +++++++------ fs/smb/common/smbdirect/smbdirect_internal.h | 114 +++++++++++++++--- fs/smb/common/smbdirect/smbdirect_mr.c | 7 +- fs/smb/common/smbdirect/smbdirect_socket.c | 39 +++--- 4 files changed, 165 insertions(+), 78 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 7f9fa2b615a2..38fb0f34dc86 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -143,8 +143,8 @@ static int smbdirect_connection_rdma_event_handler(struct rdma_cm_id *id, return 0; } -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_connection_rdma_established(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_rdma_established(struct smbdirect_socket *sc) { smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, "rdma established: device: %.*s local: %pISpsfc remote: %pISpsfc\n", @@ -157,8 +157,8 @@ static void smbdirect_connection_rdma_established(struct smbdirect_socket *sc) sc->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED; } -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_connection_negotiation_done(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_negotiation_done(struct smbdirect_socket *sc) { if (unlikely(sc->first_error)) return; @@ -233,8 +233,8 @@ static u32 smbdirect_rdma_rw_send_wrs(struct ib_device *dev, return factor * attr->cap.max_rdma_ctxs; } -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_connection_create_qp(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +int smbdirect_connection_create_qp(struct smbdirect_socket *sc) { const struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_qp_init_attr qp_attr; @@ -391,7 +391,8 @@ static int smbdirect_connection_create_qp(struct smbdirect_socket *sc) return ret; } -static void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc) { if (sc->ib.qp) { ib_drain_qp(sc->ib.qp); @@ -412,8 +413,8 @@ static void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc) } } -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_connection_create_mem_pools(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +int smbdirect_connection_create_mem_pools(struct smbdirect_socket *sc) { const struct smbdirect_socket_parameters *sp = &sc->parameters; char name[80]; @@ -490,7 +491,8 @@ static int smbdirect_connection_create_mem_pools(struct smbdirect_socket *sc) return -ENOMEM; } -static void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc) { struct smbdirect_recv_io *recv_io, *next_io; @@ -517,8 +519,8 @@ static void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc) sc->send_io.mem.cache = NULL; } -__maybe_unused /* this is temporary while this file is included in others */ -static struct smbdirect_send_io *smbdirect_connection_alloc_send_io(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +struct smbdirect_send_io *smbdirect_connection_alloc_send_io(struct smbdirect_socket *sc) { struct smbdirect_send_io *msg; @@ -532,8 +534,8 @@ static struct smbdirect_send_io *smbdirect_connection_alloc_send_io(struct smbdi return msg; } -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_connection_free_send_io(struct smbdirect_send_io *msg) +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_free_send_io(struct smbdirect_send_io *msg) { struct smbdirect_socket *sc = msg->socket; size_t i; @@ -564,8 +566,8 @@ static void smbdirect_connection_free_send_io(struct smbdirect_send_io *msg) mempool_free(msg, sc->send_io.mem.pool); } -__maybe_unused /* this is temporary while this file is included in others */ -static struct smbdirect_recv_io *smbdirect_connection_get_recv_io(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +struct smbdirect_recv_io *smbdirect_connection_get_recv_io(struct smbdirect_socket *sc) { struct smbdirect_recv_io *msg = NULL; unsigned long flags; @@ -584,8 +586,8 @@ static struct smbdirect_recv_io *smbdirect_connection_get_recv_io(struct smbdire return msg; } -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg) +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg) { struct smbdirect_socket *sc = msg->socket; unsigned long flags; @@ -606,10 +608,10 @@ static void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg) queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); } -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_connection_reassembly_append_recv_io(struct smbdirect_socket *sc, - struct smbdirect_recv_io *msg, - u32 data_length) +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_reassembly_append_recv_io(struct smbdirect_socket *sc, + struct smbdirect_recv_io *msg, + u32 data_length) { unsigned long flags; @@ -628,8 +630,8 @@ static void smbdirect_connection_reassembly_append_recv_io(struct smbdirect_sock sc->statistics.enqueue_reassembly_queue++; } -__maybe_unused /* this is temporary while this file is included in others */ -static struct smbdirect_recv_io * +__SMBDIRECT_PRIVATE__ +struct smbdirect_recv_io * smbdirect_connection_reassembly_first_recv_io(struct smbdirect_socket *sc) { struct smbdirect_recv_io *msg; @@ -641,11 +643,11 @@ smbdirect_connection_reassembly_first_recv_io(struct smbdirect_socket *sc) return msg; } -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socket *sc, - u8 peer_initiator_depth, - u8 peer_responder_resources, - const struct rdma_conn_param *param) +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socket *sc, + u8 peer_initiator_depth, + u8 peer_responder_resources, + const struct rdma_conn_param *param) { struct smbdirect_socket_parameters *sp = &sc->parameters; @@ -786,7 +788,8 @@ int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_wait_for_connected); -static void smbdirect_connection_idle_timer_work(struct work_struct *work) +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_idle_timer_work(struct work_struct *work) { struct smbdirect_socket *sc = container_of(work, struct smbdirect_socket, idle.timer_work.work); @@ -817,8 +820,8 @@ static void smbdirect_connection_idle_timer_work(struct work_struct *work) queue_work(sc->workqueue, &sc->idle.immediate_work); } -__maybe_unused /* this is temporary while this file is included in others */ -static u16 smbdirect_connection_grant_recv_credits(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +u16 smbdirect_connection_grant_recv_credits(struct smbdirect_socket *sc) { int missing; int available; @@ -874,8 +877,9 @@ static bool smbdirect_connection_request_keep_alive(struct smbdirect_socket *sc) return false; } -static int smbdirect_connection_post_send_wr(struct smbdirect_socket *sc, - struct ib_send_wr *wr) +__SMBDIRECT_PRIVATE__ +int smbdirect_connection_post_send_wr(struct smbdirect_socket *sc, + struct ib_send_wr *wr) { int ret; @@ -1456,8 +1460,8 @@ static void smbdirect_connection_send_immediate_work(struct work_struct *work) } } -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg) +__SMBDIRECT_PRIVATE__ +int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg) { struct smbdirect_socket *sc = msg->socket; const struct smbdirect_socket_parameters *sp = &sc->parameters; @@ -1498,8 +1502,8 @@ static int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg) return ret; } -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc) +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc) { struct smbdirect_recv_io *recv_io = container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); @@ -1668,7 +1672,8 @@ static void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); } -static int smbdirect_connection_recv_io_refill(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +int smbdirect_connection_recv_io_refill(struct smbdirect_socket *sc) { int missing; int posted = 0; diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h index f8f76bfe5c3e..ae6044ddeb59 100644 --- a/fs/smb/common/smbdirect/smbdirect_internal.h +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -15,13 +15,27 @@ #include "smbdirect_public.h" #include "smbdirect_socket.h" -static void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, - const char *macro_name, - unsigned int lvl, - const char *func, - unsigned int line, - int error, - enum smbdirect_socket_status *force_status); +#ifdef SMBDIRECT_USE_INLINE_C_FILES +/* this is temporary while this file is included in others */ +#define __SMBDIRECT_PRIVATE__ __maybe_unused static +#else +#define __SMBDIRECT_PRIVATE__ +#endif + +__SMBDIRECT_PRIVATE__ +int smbdirect_socket_init_new(struct net *net, struct smbdirect_socket *sc); + +__SMBDIRECT_PRIVATE__ +int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_socket *sc); + +__SMBDIRECT_PRIVATE__ +void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, + const char *macro_name, + unsigned int lvl, + const char *func, + unsigned int line, + int error, + enum smbdirect_socket_status *force_status); #define smbdirect_socket_schedule_cleanup(__sc, __error) \ __smbdirect_socket_schedule_cleanup(__sc, \ "smbdirect_socket_schedule_cleanup", SMBDIRECT_LOG_ERR, \ @@ -37,21 +51,85 @@ static void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, __func__, __LINE__, __error, &__force_status); \ } while (0) -static int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, - enum smbdirect_socket_status expected_status, - int unexpected_errno, - wait_queue_head_t *waitq, - atomic_t *total_credits, - int needed); +__SMBDIRECT_PRIVATE__ +void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc); -static void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc); +__SMBDIRECT_PRIVATE__ +int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, + enum smbdirect_socket_status expected_status, + int unexpected_errno, + wait_queue_head_t *waitq, + atomic_t *total_credits, + int needed); -static void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc); +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_rdma_established(struct smbdirect_socket *sc); -static void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg); +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_negotiation_done(struct smbdirect_socket *sc); -static void smbdirect_connection_idle_timer_work(struct work_struct *work); +__SMBDIRECT_PRIVATE__ +int smbdirect_connection_create_qp(struct smbdirect_socket *sc); -static void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc); +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc); + +__SMBDIRECT_PRIVATE__ +int smbdirect_connection_create_mem_pools(struct smbdirect_socket *sc); + +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc); + +__SMBDIRECT_PRIVATE__ +struct smbdirect_send_io *smbdirect_connection_alloc_send_io(struct smbdirect_socket *sc); + +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_free_send_io(struct smbdirect_send_io *msg); + +__SMBDIRECT_PRIVATE__ +struct smbdirect_recv_io *smbdirect_connection_get_recv_io(struct smbdirect_socket *sc); + +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg); + +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_reassembly_append_recv_io(struct smbdirect_socket *sc, + struct smbdirect_recv_io *msg, + u32 data_length); + +__SMBDIRECT_PRIVATE__ +struct smbdirect_recv_io * +smbdirect_connection_reassembly_first_recv_io(struct smbdirect_socket *sc); + +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socket *sc, + u8 peer_initiator_depth, + u8 peer_responder_resources, + const struct rdma_conn_param *param); + +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_idle_timer_work(struct work_struct *work); + +__SMBDIRECT_PRIVATE__ +u16 smbdirect_connection_grant_recv_credits(struct smbdirect_socket *sc); + +__SMBDIRECT_PRIVATE__ +int smbdirect_connection_post_send_wr(struct smbdirect_socket *sc, + struct ib_send_wr *wr); + +__SMBDIRECT_PRIVATE__ +int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg); + +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc); + +__SMBDIRECT_PRIVATE__ +int smbdirect_connection_recv_io_refill(struct smbdirect_socket *sc); + +__SMBDIRECT_PRIVATE__ +int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc); + +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc); #endif /* __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_mr.c b/fs/smb/common/smbdirect/smbdirect_mr.c index 90879bd109c1..dc85cced8dc2 100644 --- a/fs/smb/common/smbdirect/smbdirect_mr.c +++ b/fs/smb/common/smbdirect/smbdirect_mr.c @@ -15,8 +15,8 @@ static void smbdirect_connection_mr_io_recovery_work(struct work_struct *work); * Recovery is done in smbd_mr_recovery_work. The content of list entry changes * as MRs are used and recovered for I/O, but the list links will not change */ -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc) { const struct smbdirect_socket_parameters *sp = &sc->parameters; struct smbdirect_mr_io *mr; @@ -120,7 +120,8 @@ static void smbdirect_mr_io_free_locked(struct kref *kref) kfree(mr); } -static void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc) { struct smbdirect_mr_io *mr, *tmp; LIST_HEAD(all_list); diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index a54c16daffb9..ac75860e4dd6 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -52,7 +52,8 @@ static int smbdirect_socket_rdma_event_handler(struct rdma_cm_id *id, return -ESTALE; } -static int smbdirect_socket_init_new(struct net *net, struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +int smbdirect_socket_init_new(struct net *net, struct smbdirect_socket *sc) { struct rdma_cm_id *id; int ret; @@ -111,7 +112,8 @@ int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc) } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_create_kern); -static int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_socket *sc) { smbdirect_socket_init(sc); @@ -308,13 +310,14 @@ static void smbdirect_socket_wake_up_all(struct smbdirect_socket *sc) wake_up_all(&sc->mr_io.cleanup.wait_queue); } -static void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, - const char *macro_name, - unsigned int lvl, - const char *func, - unsigned int line, - int error, - enum smbdirect_socket_status *force_status) +__SMBDIRECT_PRIVATE__ +void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, + const char *macro_name, + unsigned int lvl, + const char *func, + unsigned int line, + int error, + enum smbdirect_socket_status *force_status) { bool was_first = false; @@ -559,8 +562,8 @@ static void smbdirect_socket_destroy(struct smbdirect_socket *sc) "rdma session destroyed\n"); } -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc) +__SMBDIRECT_PRIVATE__ +void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc) { smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, "status=%s first_error=%1pe", @@ -659,13 +662,13 @@ void smbdirect_socket_release(struct smbdirect_socket *sc) } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_release); -__maybe_unused /* this is temporary while this file is included in others */ -static int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, - enum smbdirect_socket_status expected_status, - int unexpected_errno, - wait_queue_head_t *waitq, - atomic_t *total_credits, - int needed) +__SMBDIRECT_PRIVATE__ +int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, + enum smbdirect_socket_status expected_status, + int unexpected_errno, + wait_queue_head_t *waitq, + atomic_t *total_credits, + int needed) { int ret; From 4c9e665cb1132b92812886d08ec784132eb66caf Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 3 Feb 2026 19:26:43 +0100 Subject: [PATCH 057/145] smb: smbdirect: introduce smbdirect_init_send_batch_storage() This makes it possible to use batching via public functions without exposing the internals of struct smbdirect_send_batch. Once the client no longer needs to use smbdirect_connection_send_single_iter() we can remove this again. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- .../common/smbdirect/smbdirect_connection.c | 27 ++++++++++++++++--- fs/smb/common/smbdirect/smbdirect_public.h | 23 ++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 38fb0f34dc86..b7adbd04eb69 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -910,9 +910,10 @@ static void smbdirect_connection_send_batch_init(struct smbdirect_send_batch *ba batch->credit = 0; } -static int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, - struct smbdirect_send_batch *batch, - bool is_last) +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch, + bool is_last) { struct smbdirect_send_io *first, *last; int ret = 0; @@ -969,6 +970,26 @@ static int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, return ret; } +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_batch_flush); + +__SMBDIRECT_PUBLIC__ +struct smbdirect_send_batch * +smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, + bool need_invalidate_rkey, + unsigned int remote_key) +{ + struct smbdirect_send_batch *batch = (struct smbdirect_send_batch *)storage; + + memset(storage, 0, sizeof(*storage)); + BUILD_BUG_ON(sizeof(*batch) > sizeof(*storage)); + + smbdirect_connection_send_batch_init(batch, + need_invalidate_rkey, + remote_key); + + return batch; +} +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_init_send_batch_storage); static int smbdirect_connection_wait_for_send_bcredit(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch) diff --git a/fs/smb/common/smbdirect/smbdirect_public.h b/fs/smb/common/smbdirect/smbdirect_public.h index a5b15fce840c..3d20d9a87e7b 100644 --- a/fs/smb/common/smbdirect/smbdirect_public.h +++ b/fs/smb/common/smbdirect/smbdirect_public.h @@ -91,6 +91,29 @@ void smbdirect_socket_shutdown(struct smbdirect_socket *sc); __SMBDIRECT_PUBLIC__ void smbdirect_socket_release(struct smbdirect_socket *sc); +__SMBDIRECT_PUBLIC__ +int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, + struct smbdirect_send_batch *batch, + bool is_last); + +/* + * This is only temporary and only needed + * as long as the client still requires + * to use smbdirect_connection_send_single_iter() + */ +struct smbdirect_send_batch_storage { + union { + struct list_head __msg_list; + __aligned_u64 __space[5]; + }; +}; + +__SMBDIRECT_PUBLIC__ +struct smbdirect_send_batch * +smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, + bool need_invalidate_rkey, + unsigned int remote_key); + __SMBDIRECT_PUBLIC__ int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, From 84df3cde16090d5d1de4df31623ef0433fdea041 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 13 Nov 2025 13:10:24 +0100 Subject: [PATCH 058/145] smb: smbdirect: split out smbdirect_accept_negotiate_finish() This will make it easier to support the listen/accept socket interfaces in the next steps. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_accept.c | 26 ++++++++++++++------ fs/smb/common/smbdirect/smbdirect_internal.h | 2 ++ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect_accept.c b/fs/smb/common/smbdirect/smbdirect_accept.c index 4baa2c1ad31e..72ee80a185f6 100644 --- a/fs/smb/common/smbdirect/smbdirect_accept.c +++ b/fs/smb/common/smbdirect/smbdirect_accept.c @@ -301,12 +301,7 @@ static void smbdirect_accept_negotiate_recv_work(struct work_struct *work) u32 preferred_send_size; u32 max_receive_size; u32 max_fragmented_size; - struct smbdirect_send_io *send_io = NULL; - struct smbdirect_negotiate_resp *nrep; u32 ntstatus; - int posted; - u16 new_credits; - int ret; if (sc->first_error) return; @@ -459,6 +454,25 @@ static void smbdirect_accept_negotiate_recv_work(struct work_struct *work) */ sp->max_fragmented_send_size = max_fragmented_size; + ntstatus = le32_to_cpu(STATUS_SUCCESS); + +not_supported: + smbdirect_accept_negotiate_finish(sc, ntstatus); +} + +void smbdirect_accept_negotiate_finish(struct smbdirect_socket *sc, u32 ntstatus) +{ + const struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_recv_io *recv_io; + struct smbdirect_send_io *send_io; + struct smbdirect_negotiate_resp *nrep; + int posted; + u16 new_credits; + int ret; + + if (ntstatus) + goto not_supported; + /* * Prepare for receiving data_transfer messages */ @@ -486,8 +500,6 @@ static void smbdirect_accept_negotiate_recv_work(struct work_struct *work) */ new_credits = smbdirect_connection_grant_recv_credits(sc); - ntstatus = le32_to_cpu(STATUS_SUCCESS); - not_supported: send_io = smbdirect_connection_alloc_send_io(sc); if (IS_ERR(send_io)) { diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h index ae6044ddeb59..f8432c8c1a5f 100644 --- a/fs/smb/common/smbdirect/smbdirect_internal.h +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -132,4 +132,6 @@ int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc); __SMBDIRECT_PRIVATE__ void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc); +void smbdirect_accept_negotiate_finish(struct smbdirect_socket *sc, u32 ntstatus); + #endif /* __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ */ From 03f9e2c15f8fa32b8056a3a59f98f652726f78b8 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 19 Nov 2025 14:56:03 +0100 Subject: [PATCH 059/145] smb: smbdirect: introduce smbdirect_socket_bind() This will be used by the server in the next steps. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_public.h | 3 +++ fs/smb/common/smbdirect/smbdirect_socket.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_public.h b/fs/smb/common/smbdirect/smbdirect_public.h index 3d20d9a87e7b..95837beeece8 100644 --- a/fs/smb/common/smbdirect/smbdirect_public.h +++ b/fs/smb/common/smbdirect/smbdirect_public.h @@ -85,6 +85,9 @@ bool smbdirect_connection_is_connected(struct smbdirect_socket *sc); __SMBDIRECT_PUBLIC__ int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc); +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_bind(struct smbdirect_socket *sc, struct sockaddr *addr); + __SMBDIRECT_PUBLIC__ void smbdirect_socket_shutdown(struct smbdirect_socket *sc); diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index ac75860e4dd6..def67fdac066 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -611,6 +611,22 @@ void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc) SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); } +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_bind(struct smbdirect_socket *sc, struct sockaddr *addr) +{ + int ret; + + if (sc->status != SMBDIRECT_SOCKET_CREATED) + return -EINVAL; + + ret = rdma_bind_addr(sc->rdma.cm_id, addr); + if (ret) + return ret; + + return 0; +} +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_bind); + __SMBDIRECT_PUBLIC__ void smbdirect_socket_shutdown(struct smbdirect_socket *sc) { From dc691b91ad1677def14582a279e56fd943b52f94 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 19 Nov 2025 11:46:01 +0100 Subject: [PATCH 060/145] smb: smbdirect: introduce smbdirect_socket_{listen,accept}() These will be used by the server soon instead of using smbdirect_accept_connect_request() together with rdma_listen(). Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 2 + fs/smb/common/smbdirect/smbdirect_accept.c | 110 +++++++ .../common/smbdirect/smbdirect_all_c_files.c | 1 + .../common/smbdirect/smbdirect_connection.c | 9 + fs/smb/common/smbdirect/smbdirect_listen.c | 309 ++++++++++++++++++ fs/smb/common/smbdirect/smbdirect_public.h | 8 + fs/smb/common/smbdirect/smbdirect_socket.c | 85 ++++- fs/smb/common/smbdirect/smbdirect_socket.h | 40 +++ fs/smb/server/transport_rdma.c | 2 + 9 files changed, 558 insertions(+), 8 deletions(-) create mode 100644 fs/smb/common/smbdirect/smbdirect_listen.c diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 443ff427e28f..f5ba33460723 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -244,6 +244,7 @@ static void smbd_disconnect_rdma_work(struct work_struct *work) break; case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_LISTENING: case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: @@ -323,6 +324,7 @@ static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc) break; case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_LISTENING: sc->status = SMBDIRECT_SOCKET_DISCONNECTED; break; diff --git a/fs/smb/common/smbdirect/smbdirect_accept.c b/fs/smb/common/smbdirect/smbdirect_accept.c index 72ee80a185f6..4fc5983e99b8 100644 --- a/fs/smb/common/smbdirect/smbdirect_accept.c +++ b/fs/smb/common/smbdirect/smbdirect_accept.c @@ -6,6 +6,7 @@ */ #include "smbdirect_internal.h" +#include #include "../../common/smb2status.h" static int smbdirect_accept_rdma_event_handler(struct rdma_cm_id *id, @@ -454,6 +455,28 @@ static void smbdirect_accept_negotiate_recv_work(struct work_struct *work) */ sp->max_fragmented_send_size = max_fragmented_size; + if (sc->accept.listener) { + struct smbdirect_socket *lsc = sc->accept.listener; + unsigned long flags; + + spin_lock_irqsave(&lsc->listen.lock, flags); + list_del(&sc->accept.list); + list_add_tail(&sc->accept.list, &lsc->listen.ready); + wake_up(&lsc->listen.wait_queue); + spin_unlock_irqrestore(&lsc->listen.lock, flags); + + /* + * smbdirect_socket_accept() will call + * smbdirect_accept_negotiate_finish(nsc, 0); + * + * So that we don't send the negotiation + * response that grants credits to the peer + * before the socket is accepted by the + * application. + */ + return; + } + ntstatus = le32_to_cpu(STATUS_SUCCESS); not_supported: @@ -748,3 +771,90 @@ static int smbdirect_accept_rdma_event_handler(struct rdma_cm_id *id, smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return 0; } + +static long smbdirect_socket_wait_for_accept(struct smbdirect_socket *lsc, long timeo) +{ + long ret; + + ret = wait_event_interruptible_timeout(lsc->listen.wait_queue, + !list_empty_careful(&lsc->listen.ready) || + lsc->status != SMBDIRECT_SOCKET_LISTENING || + lsc->first_error, + timeo); + if (lsc->status != SMBDIRECT_SOCKET_LISTENING) + return -EINVAL; + if (lsc->first_error) + return lsc->first_error; + if (!ret) + ret = -ETIMEDOUT; + if (ret < 0) + return ret; + + return 0; +} + +__SMBDIRECT_PUBLIC__ +struct smbdirect_socket *smbdirect_socket_accept(struct smbdirect_socket *lsc, + long timeo, + struct proto_accept_arg *arg) +{ + struct smbdirect_socket *nsc; + unsigned long flags; + + if (lsc->status != SMBDIRECT_SOCKET_LISTENING) { + arg->err = -EINVAL; + return NULL; + } + + if (lsc->first_error) { + arg->err = lsc->first_error; + return NULL; + } + + if (list_empty_careful(&lsc->listen.ready)) { + int ret; + + if (timeo == 0) { + arg->err = -EAGAIN; + return NULL; + } + + ret = smbdirect_socket_wait_for_accept(lsc, timeo); + if (ret) { + arg->err = ret; + return NULL; + } + } + + spin_lock_irqsave(&lsc->listen.lock, flags); + nsc = list_first_entry_or_null(&lsc->listen.ready, + struct smbdirect_socket, + accept.list); + if (nsc) { + nsc->accept.listener = NULL; + list_del_init_careful(&nsc->accept.list); + arg->is_empty = list_empty_careful(&lsc->listen.ready); + } + spin_unlock_irqrestore(&lsc->listen.lock, flags); + if (!nsc) { + arg->err = -EAGAIN; + return NULL; + } + + /* + * We did not send the negotiation response + * yet, so we did not grant any credits to the client, + * so it didn't grant any credits to us. + * + * The caller expects a connected socket + * now as there are no credits anyway. + * + * Then we send the negotiation response in + * order to grant credits to the peer. + */ + nsc->status = SMBDIRECT_SOCKET_CONNECTED; + smbdirect_accept_negotiate_finish(nsc, 0); + + return nsc; +} +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_accept); diff --git a/fs/smb/common/smbdirect/smbdirect_all_c_files.c b/fs/smb/common/smbdirect/smbdirect_all_c_files.c index 40e2ceb9a4a4..03e5852cdf86 100644 --- a/fs/smb/common/smbdirect/smbdirect_all_c_files.c +++ b/fs/smb/common/smbdirect/smbdirect_all_c_files.c @@ -22,3 +22,4 @@ #include "smbdirect_debug.c" #include "smbdirect_connect.c" #include "smbdirect_accept.c" +#include "smbdirect_listen.c" diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index b7adbd04eb69..4afeb4ddadd0 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -163,6 +163,14 @@ void smbdirect_connection_negotiation_done(struct smbdirect_socket *sc) if (unlikely(sc->first_error)) return; + if (sc->status == SMBDIRECT_SOCKET_CONNECTED) + /* + * This is the accept case where + * smbdirect_socket_accept() already sets + * SMBDIRECT_SOCKET_CONNECTED + */ + goto done; + if (sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) { /* * Something went wrong... @@ -189,6 +197,7 @@ void smbdirect_connection_negotiation_done(struct smbdirect_socket *sc) * We need to setup the refill and send immediate work * in order to get a working connection. */ +done: INIT_WORK(&sc->recv_io.posted.refill_work, smbdirect_connection_recv_io_refill_work); INIT_WORK(&sc->idle.immediate_work, smbdirect_connection_send_immediate_work); diff --git a/fs/smb/common/smbdirect/smbdirect_listen.c b/fs/smb/common/smbdirect/smbdirect_listen.c new file mode 100644 index 000000000000..ad1ecf64762b --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_listen.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2017, Microsoft Corporation. + * Copyright (C) 2018, LG Electronics. + * Copyright (c) 2025, Stefan Metzmacher + */ + +#include "smbdirect_internal.h" + +static int smbdirect_listen_rdma_event_handler(struct rdma_cm_id *id, + struct rdma_cm_event *event); + +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_listen(struct smbdirect_socket *sc, int backlog) +{ + int ret; + + if (backlog < 0) + return -EINVAL; + if (!backlog) + backlog = 1; /* use 1 as default for now */ + + if (sc->first_error) + return -EINVAL; + + if (sc->status != SMBDIRECT_SOCKET_CREATED) + return -EINVAL; + + if (WARN_ON_ONCE(!sc->rdma.cm_id)) + return -EINVAL; + + if (sc->rdma.cm_id->device) + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "try to listen on addr: %pISpsfc dev: %.*s\n", + &sc->rdma.cm_id->route.addr.src_addr, + IB_DEVICE_NAME_MAX, + sc->rdma.cm_id->device->name); + else + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "try to listen on addr: %pISpsfc\n", + &sc->rdma.cm_id->route.addr.src_addr); + + /* already checked above */ + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); + sc->status = SMBDIRECT_SOCKET_LISTENING; + sc->rdma.expected_event = RDMA_CM_EVENT_CONNECT_REQUEST; + rdma_lock_handler(sc->rdma.cm_id); + sc->rdma.cm_id->event_handler = smbdirect_listen_rdma_event_handler; + rdma_unlock_handler(sc->rdma.cm_id); + + ret = rdma_listen(sc->rdma.cm_id, backlog); + if (ret) { + sc->first_error = ret; + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; + if (sc->rdma.cm_id->device) + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "listening failed %1pe on addr: %pISpsfc dev: %.*s\n", + SMBDIRECT_DEBUG_ERR_PTR(ret), + &sc->rdma.cm_id->route.addr.src_addr, + IB_DEVICE_NAME_MAX, + sc->rdma.cm_id->device->name); + else + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "listening failed %1pe on addr: %pISpsfc\n", + SMBDIRECT_DEBUG_ERR_PTR(ret), + &sc->rdma.cm_id->route.addr.src_addr); + return ret; + } + + /* + * This is a value > 0, checked above, + * so we are able to use sc->listen.backlog == -1, + * as indication that the socket was never + * a listener. + */ + sc->listen.backlog = backlog; + + if (sc->rdma.cm_id->device) + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "listening on addr: %pISpsfc dev: %.*s\n", + &sc->rdma.cm_id->route.addr.src_addr, + IB_DEVICE_NAME_MAX, + sc->rdma.cm_id->device->name); + else + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "listening on addr: %pISpsfc\n", + &sc->rdma.cm_id->route.addr.src_addr); + + /* + * The rest happens async via smbdirect_listen_rdma_event_handler() + */ + return 0; +} +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_listen); + +static int smbdirect_new_rdma_event_handler(struct rdma_cm_id *new_id, + struct rdma_cm_event *event) +{ + int ret = -ESTALE; + + /* + * This should be replaced before any real work + * starts! So it should never be called! + */ + + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + ret = -ENETDOWN; + if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) + ret = event->status; + WARN_ONCE(1, + "%s should not be called! event=%s status=%d => ret=%1pe\n", + __func__, + rdma_event_msg(event->event), + event->status, + SMBDIRECT_DEBUG_ERR_PTR(ret)); + return -ESTALE; +} + +static int smbdirect_listen_connect_request(struct smbdirect_socket *lsc, + struct rdma_cm_id *new_id, + const struct rdma_cm_event *event); + +static int smbdirect_listen_rdma_event_handler(struct rdma_cm_id *new_id, + struct rdma_cm_event *event) +{ + struct smbdirect_socket *lsc = new_id->context; + int ret; + + if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { + new_id->context = NULL; + new_id->event_handler = smbdirect_new_rdma_event_handler; + } else + new_id = NULL; + + /* + * cma_cm_event_handler() has + * lockdep_assert_held(&id_priv->handler_mutex); + * + * Mutexes are not allowed in interrupts, + * and we rely on not being in an interrupt here, + * as we might sleep. + */ + WARN_ON_ONCE(in_interrupt()); + + if (event->status || event->event != lsc->rdma.expected_event) { + ret = -ECONNABORTED; + + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) + ret = -ENETDOWN; + if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) + ret = event->status; + + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_ERR, + "%s (first_error=%1pe, expected=%s) => event=%s status=%d => ret=%1pe\n", + smbdirect_socket_status_string(lsc->status), + SMBDIRECT_DEBUG_ERR_PTR(lsc->first_error), + rdma_event_msg(lsc->rdma.expected_event), + rdma_event_msg(event->event), + event->status, + SMBDIRECT_DEBUG_ERR_PTR(ret)); + + /* + * In case of error return it and let the caller + * destroy new_id + */ + smbdirect_socket_schedule_cleanup(lsc, ret); + return new_id ? ret : 0; + } + + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_INFO, + "%s (first_error=%1pe) event=%s\n", + smbdirect_socket_status_string(lsc->status), + SMBDIRECT_DEBUG_ERR_PTR(lsc->first_error), + rdma_event_msg(event->event)); + + /* + * In case of error return it and let the caller + * destroy new_id + */ + if (lsc->first_error) + return new_id ? lsc->first_error : 0; + + switch (event->event) { + case RDMA_CM_EVENT_CONNECT_REQUEST: + WARN_ON_ONCE(lsc->status != SMBDIRECT_SOCKET_LISTENING); + + /* + * In case of error return it and let the caller + * destroy new_id + */ + ret = smbdirect_listen_connect_request(lsc, new_id, event); + if (ret) + return ret; + return 0; + + default: + break; + } + + /* + * This is an internal error + */ + WARN_ON_ONCE(lsc->rdma.expected_event != RDMA_CM_EVENT_CONNECT_REQUEST); + smbdirect_socket_schedule_cleanup(lsc, -EINVAL); + return 0; +} + +static int smbdirect_listen_connect_request(struct smbdirect_socket *lsc, + struct rdma_cm_id *new_id, + const struct rdma_cm_event *event) +{ + const struct smbdirect_socket_parameters *lsp = &lsc->parameters; + struct smbdirect_socket *nsc; + unsigned long flags; + size_t backlog = max_t(size_t, 1, lsc->listen.backlog); + size_t psockets; + size_t rsockets; + int ret; + + if (!smbdirect_frwr_is_supported(&new_id->device->attrs)) { + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_ERR, + "Fast Registration Work Requests (FRWR) is not supported device %.*s\n", + IB_DEVICE_NAME_MAX, + new_id->device->name); + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_ERR, + "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", + new_id->device->attrs.device_cap_flags, + new_id->device->attrs.max_fast_reg_page_list_len); + return -EPROTONOSUPPORT; + } + + if (lsp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB && + !rdma_ib_or_roce(new_id->device, new_id->port_num)) { + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_ERR, + "Not IB: device: %.*s IW:%u local: %pISpsfc remote: %pISpsfc\n", + IB_DEVICE_NAME_MAX, + new_id->device->name, + rdma_protocol_iwarp(new_id->device, new_id->port_num), + &new_id->route.addr.src_addr, + &new_id->route.addr.dst_addr); + return -EPROTONOSUPPORT; + } + if (lsp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW && + !rdma_protocol_iwarp(new_id->device, new_id->port_num)) { + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_ERR, + "Not IW: device: %.*s IB:%u local: %pISpsfc remote: %pISpsfc\n", + IB_DEVICE_NAME_MAX, + new_id->device->name, + rdma_ib_or_roce(new_id->device, new_id->port_num), + &new_id->route.addr.src_addr, + &new_id->route.addr.dst_addr); + return -EPROTONOSUPPORT; + } + + spin_lock_irqsave(&lsc->listen.lock, flags); + psockets = list_count_nodes(&lsc->listen.pending); + rsockets = list_count_nodes(&lsc->listen.ready); + spin_unlock_irqrestore(&lsc->listen.lock, flags); + + if (psockets > backlog || + rsockets > backlog || + (psockets + rsockets) > backlog) { + smbdirect_log_rdma_event(lsc, SMBDIRECT_LOG_ERR, + "Backlog[%d][%zu] full pending[%zu] ready[%zu]\n", + lsc->listen.backlog, backlog, psockets, rsockets); + return -EBUSY; + } + + ret = smbdirect_socket_create_accepting(new_id, &nsc); + if (ret) + goto socket_init_failed; + + nsc->logging = lsc->logging; + ret = smbdirect_socket_set_initial_parameters(nsc, &lsc->parameters); + if (ret) + goto set_params_failed; + ret = smbdirect_socket_set_kernel_settings(nsc, + lsc->ib.poll_ctx, + lsc->send_io.mem.gfp_mask); + if (ret) + goto set_settings_failed; + + spin_lock_irqsave(&lsc->listen.lock, flags); + list_add_tail(&nsc->accept.list, &lsc->listen.pending); + nsc->accept.listener = lsc; + spin_unlock_irqrestore(&lsc->listen.lock, flags); + + ret = smbdirect_accept_connect_request(nsc, &event->param.conn); + if (ret) + goto accept_connect_failed; + + return 0; + +accept_connect_failed: + spin_lock_irqsave(&lsc->listen.lock, flags); + list_del_init(&nsc->accept.list); + nsc->accept.listener = NULL; + spin_unlock_irqrestore(&lsc->listen.lock, flags); +set_settings_failed: +set_params_failed: + /* + * The caller will destroy new_id + */ + nsc->ib.dev = NULL; + nsc->rdma.cm_id = NULL; + smbdirect_socket_release(nsc); +socket_init_failed: + return ret; +} diff --git a/fs/smb/common/smbdirect/smbdirect_public.h b/fs/smb/common/smbdirect/smbdirect_public.h index 95837beeece8..c0144c5a808c 100644 --- a/fs/smb/common/smbdirect/smbdirect_public.h +++ b/fs/smb/common/smbdirect/smbdirect_public.h @@ -147,10 +147,18 @@ __SMBDIRECT_PUBLIC__ int smbdirect_connect_sync(struct smbdirect_socket *sc, const struct sockaddr *dst); +__SMBDIRECT_PUBLIC__ +int smbdirect_socket_listen(struct smbdirect_socket *sc, int backlog); + __SMBDIRECT_PUBLIC__ int smbdirect_accept_connect_request(struct smbdirect_socket *sc, const struct rdma_conn_param *param); +__SMBDIRECT_PUBLIC__ +struct smbdirect_socket *smbdirect_socket_accept(struct smbdirect_socket *lsc, + long timeo, + struct proto_accept_arg *arg); + __SMBDIRECT_PUBLIC__ int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, void *buf, size_t buf_len, diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index def67fdac066..63cdfccedd55 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -299,6 +299,7 @@ static void smbdirect_socket_wake_up_all(struct smbdirect_socket *sc) * in order to notice the broken connection. */ wake_up_all(&sc->status_wait); + wake_up_all(&sc->listen.wait_queue); wake_up_all(&sc->send_io.bcredits.wait_queue); wake_up_all(&sc->send_io.lcredits.wait_queue); wake_up_all(&sc->send_io.credits.wait_queue); @@ -319,6 +320,8 @@ void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, int error, enum smbdirect_socket_status *force_status) { + struct smbdirect_socket *psc, *tsc; + unsigned long flags; bool was_first = false; if (!sc->first_error) { @@ -351,6 +354,18 @@ void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; disable_delayed_work(&sc->idle.timer_work); + /* + * In case we were a listener we need to + * disconnect all pending and ready sockets + * + * First we move ready sockets to pending again. + */ + spin_lock_irqsave(&sc->listen.lock, flags); + list_splice_init(&sc->listen.ready, &sc->listen.pending); + list_for_each_entry_safe(psc, tsc, &sc->listen.pending, accept.list) + smbdirect_socket_schedule_cleanup(psc, sc->first_error); + spin_unlock_irqrestore(&sc->listen.lock, flags); + switch (sc->status) { case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: @@ -386,6 +401,7 @@ void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, break; case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_LISTENING: sc->status = SMBDIRECT_SOCKET_DISCONNECTED; break; @@ -410,6 +426,8 @@ static void smbdirect_socket_cleanup_work(struct work_struct *work) { struct smbdirect_socket *sc = container_of(work, struct smbdirect_socket, disconnect_work); + struct smbdirect_socket *psc, *tsc; + unsigned long flags; /* * This should not never be called in an interrupt! @@ -437,6 +455,18 @@ static void smbdirect_socket_cleanup_work(struct work_struct *work) sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; disable_delayed_work(&sc->idle.timer_work); + /* + * In case we were a listener we need to + * disconnect all pending and ready sockets + * + * First we move ready sockets to pending again. + */ + spin_lock_irqsave(&sc->listen.lock, flags); + list_splice_init(&sc->listen.ready, &sc->listen.pending); + list_for_each_entry_safe(psc, tsc, &sc->listen.pending, accept.list) + smbdirect_socket_schedule_cleanup(psc, sc->first_error); + spin_unlock_irqrestore(&sc->listen.lock, flags); + switch (sc->status) { case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: @@ -448,6 +478,7 @@ static void smbdirect_socket_cleanup_work(struct work_struct *work) break; case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_LISTENING: case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: @@ -479,6 +510,8 @@ static void smbdirect_socket_cleanup_work(struct work_struct *work) static void smbdirect_socket_destroy(struct smbdirect_socket *sc) { + struct smbdirect_socket *psc, *tsc; + size_t psockets; struct smbdirect_recv_io *recv_io; struct smbdirect_recv_io *recv_tmp; LIST_HEAD(all_list); @@ -502,6 +535,14 @@ static void smbdirect_socket_destroy(struct smbdirect_socket *sc) smbdirect_socket_status_string(sc->status), SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); + /* + * The listener should clear this before we reach this + */ + WARN_ONCE(sc->accept.listener, + "status=%s first_error=%1pe", + smbdirect_socket_status_string(sc->status), + SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); + /* * Wake up all waiters in all wait queues * in order to notice the broken connection. @@ -527,9 +568,34 @@ static void smbdirect_socket_destroy(struct smbdirect_socket *sc) ib_drain_qp(sc->ib.qp); } + /* + * In case we were a listener we need to + * disconnect all pending and ready sockets + * + * We move ready sockets to pending again. + */ + spin_lock_irqsave(&sc->listen.lock, flags); + list_splice_tail_init(&sc->listen.ready, &all_list); + list_splice_tail_init(&sc->listen.pending, &all_list); + spin_unlock_irqrestore(&sc->listen.lock, flags); + psockets = list_count_nodes(&all_list); + if (sc->listen.backlog != -1) /* was a listener */ + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "release %zu pending sockets\n", psockets); + list_for_each_entry_safe(psc, tsc, &all_list, accept.list) { + list_del_init(&psc->accept.list); + psc->accept.listener = NULL; + smbdirect_socket_release(psc); + } + if (sc->listen.backlog != -1) /* was a listener */ + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "released %zu pending sockets\n", psockets); + INIT_LIST_HEAD(&all_list); + /* It's not possible for upper layer to get to reassembly */ - smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, - "drain the reassembly queue\n"); + if (sc->listen.backlog == -1) /* was not a listener */ + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "drain the reassembly queue\n"); spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); list_splice_tail_init(&sc->recv_io.reassembly.list, &all_list); spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); @@ -537,12 +603,14 @@ static void smbdirect_socket_destroy(struct smbdirect_socket *sc) smbdirect_connection_put_recv_io(recv_io); sc->recv_io.reassembly.data_length = 0; - smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, - "freeing mr list\n"); + if (sc->listen.backlog == -1) /* was not a listener */ + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "freeing mr list\n"); smbdirect_connection_destroy_mr_list(sc); - smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, - "destroying qp\n"); + if (sc->listen.backlog == -1) /* was not a listener */ + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "destroying qp\n"); smbdirect_connection_destroy_qp(sc); if (sc->rdma.cm_id) { rdma_unlock_handler(sc->rdma.cm_id); @@ -552,8 +620,9 @@ static void smbdirect_socket_destroy(struct smbdirect_socket *sc) sc->rdma.cm_id = NULL; } - smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, - "destroying mem pools\n"); + if (sc->listen.backlog == -1) /* was not a listener */ + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "destroying mem pools\n"); smbdirect_connection_destroy_mem_pools(sc); sc->status = SMBDIRECT_SOCKET_DESTROYED; diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 44d04cc63d04..127197c3e164 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -17,6 +17,7 @@ enum smbdirect_socket_status { SMBDIRECT_SOCKET_CREATED, + SMBDIRECT_SOCKET_LISTENING, SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED, SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING, SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED, @@ -42,6 +43,8 @@ const char *smbdirect_socket_status_string(enum smbdirect_socket_status status) switch (status) { case SMBDIRECT_SOCKET_CREATED: return "CREATED"; + case SMBDIRECT_SOCKET_LISTENING: + return "LISTENING"; case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: return "RESOLVE_ADDR_NEEDED"; case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: @@ -193,6 +196,35 @@ struct smbdirect_socket { struct delayed_work timer_work; } idle; + /* + * The state for listen sockets + */ + struct { + spinlock_t lock; + struct list_head pending; + struct list_head ready; + wait_queue_head_t wait_queue; + /* + * This starts as -1 and a value != -1 + * means this socket was in LISTENING state + * before. Note the valid backlog can + * only be > 0. + */ + int backlog; + } listen; + + /* + * The state for sockets waiting + * for accept, either still waiting + * for the negotiation to finish + * or already ready with a usable + * connection. + */ + struct { + struct smbdirect_socket *listener; + struct list_head list; + } accept; + /* * The state for posted send buffers */ @@ -552,6 +584,14 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work); disable_delayed_work_sync(&sc->idle.timer_work); + spin_lock_init(&sc->listen.lock); + INIT_LIST_HEAD(&sc->listen.pending); + INIT_LIST_HEAD(&sc->listen.ready); + sc->listen.backlog = -1; /* not a listener */ + init_waitqueue_head(&sc->listen.wait_queue); + + INIT_LIST_HEAD(&sc->accept.list); + sc->send_io.mem.gfp_mask = GFP_KERNEL; atomic_set(&sc->send_io.bcredits.count, 0); diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 12a8def52ff8..2978b8850082 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -284,6 +284,7 @@ static void smb_direct_disconnect_rdma_work(struct work_struct *work) break; case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_LISTENING: case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: @@ -364,6 +365,7 @@ smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc) break; case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_LISTENING: sc->status = SMBDIRECT_SOCKET_DISCONNECTED; break; From f9a804da479cc41172f1039b4ffde06a09920506 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 18 Jul 2025 18:44:15 +0200 Subject: [PATCH 061/145] smb: smbdirect: introduce the basic smbdirect.ko This exports the functions needed by cifs.ko and ksmbd.ko. It doesn't yet provide a generic socket layer, but it is a good start to introduce that on top. It will be much easier after Davids refactoring using MSG_SPLICE_PAGES, will make it easier to use the socket layer without an additional copy. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: David Howells Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/Kconfig | 1 + fs/smb/common/Makefile | 1 + fs/smb/common/smbdirect/Kconfig | 9 +++++ fs/smb/common/smbdirect/Makefile | 17 +++++++++ fs/smb/common/smbdirect/smbdirect_internal.h | 9 +++++ fs/smb/common/smbdirect/smbdirect_main.c | 40 ++++++++++++++++++++ 6 files changed, 77 insertions(+) create mode 100644 fs/smb/common/smbdirect/Kconfig create mode 100644 fs/smb/common/smbdirect/Makefile create mode 100644 fs/smb/common/smbdirect/smbdirect_main.c diff --git a/fs/smb/Kconfig b/fs/smb/Kconfig index 85f7ad5fbc5e..b4b2cfdc2a6b 100644 --- a/fs/smb/Kconfig +++ b/fs/smb/Kconfig @@ -4,6 +4,7 @@ source "fs/smb/client/Kconfig" source "fs/smb/server/Kconfig" +source "fs/smb/common/smbdirect/Kconfig" config SMBFS tristate diff --git a/fs/smb/common/Makefile b/fs/smb/common/Makefile index 9e0730a385fb..e6ee65c31b5d 100644 --- a/fs/smb/common/Makefile +++ b/fs/smb/common/Makefile @@ -4,3 +4,4 @@ # obj-$(CONFIG_SMBFS) += cifs_md4.o +obj-$(CONFIG_SMB_COMMON_SMBDIRECT) += smbdirect/ diff --git a/fs/smb/common/smbdirect/Kconfig b/fs/smb/common/smbdirect/Kconfig new file mode 100644 index 000000000000..a46a2e6ec87a --- /dev/null +++ b/fs/smb/common/smbdirect/Kconfig @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# smbdirect configuration + +config SMB_COMMON_SMBDIRECT + def_tristate n + depends on INFINIBAND && INFINIBAND_ADDR_TRANS + depends on m || INFINIBAND=y + select SG_POOL diff --git a/fs/smb/common/smbdirect/Makefile b/fs/smb/common/smbdirect/Makefile new file mode 100644 index 000000000000..b41271facfc3 --- /dev/null +++ b/fs/smb/common/smbdirect/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Makefile for smbdirect support +# + +obj-$(CONFIG_SMB_COMMON_SMBDIRECT) += smbdirect.o + +smbdirect-y := \ + smbdirect_socket.o \ + smbdirect_connection.o \ + smbdirect_mr.o \ + smbdirect_rw.o \ + smbdirect_debug.o \ + smbdirect_connect.o \ + smbdirect_listen.o \ + smbdirect_accept.o \ + smbdirect_main.o diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h index f8432c8c1a5f..901540d0cbbf 100644 --- a/fs/smb/common/smbdirect/smbdirect_internal.h +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -13,6 +13,15 @@ #include "smbdirect.h" #include "smbdirect_pdu.h" #include "smbdirect_public.h" + +#include + +struct smbdirect_module_state { + struct mutex mutex; +}; + +extern struct smbdirect_module_state smbdirect_globals; + #include "smbdirect_socket.h" #ifdef SMBDIRECT_USE_INLINE_C_FILES diff --git a/fs/smb/common/smbdirect/smbdirect_main.c b/fs/smb/common/smbdirect/smbdirect_main.c new file mode 100644 index 000000000000..c61ae8d7f4f0 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_main.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2025, Stefan Metzmacher + */ + +#include "smbdirect_internal.h" +#include + +struct smbdirect_module_state smbdirect_globals = { + .mutex = __MUTEX_INITIALIZER(smbdirect_globals.mutex), +}; + +static __init int smbdirect_module_init(void) +{ + pr_notice("subsystem loading...\n"); + mutex_lock(&smbdirect_globals.mutex); + + /* TODO... */ + + mutex_unlock(&smbdirect_globals.mutex); + pr_notice("subsystem loaded\n"); + return 0; +} + +static __exit void smbdirect_module_exit(void) +{ + pr_notice("subsystem unloading...\n"); + mutex_lock(&smbdirect_globals.mutex); + + /* TODO... */ + + mutex_unlock(&smbdirect_globals.mutex); + pr_notice("subsystem unloaded\n"); +} + +module_init(smbdirect_module_init); +module_exit(smbdirect_module_exit); + +MODULE_DESCRIPTION("smbdirect subsystem"); +MODULE_LICENSE("GPL"); From dd43c2227394472aa6e438ddffc2f58028de7531 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 15:32:48 +0200 Subject: [PATCH 062/145] smb: client: make use of smbdirect_socket_prepare_create() This prepares the use of functions from smbdirect_connection.c. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index f5ba33460723..2d6d8a1e2d99 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2111,6 +2111,7 @@ static struct smbd_connection *_smbd_get_connection( int rc; struct smbd_connection *info; struct smbdirect_socket *sc; + struct smbdirect_socket_parameters init_params = {}; struct smbdirect_socket_parameters *sp; struct rdma_conn_param conn_param; struct ib_qp_cap qp_cap; @@ -2121,20 +2122,10 @@ static struct smbd_connection *_smbd_get_connection( char wq_name[80]; struct workqueue_struct *workqueue; - info = kzalloc_obj(struct smbd_connection); - if (!info) - return NULL; - sc = &info->socket; - scnprintf(wq_name, ARRAY_SIZE(wq_name), "smbd_%p", sc); - workqueue = create_workqueue(wq_name); - if (!workqueue) - goto create_wq_failed; - smbdirect_socket_init(sc); - sc->workqueue = workqueue; - sp = &sc->parameters; - - INIT_WORK(&sc->disconnect_work, smbd_disconnect_rdma_work); - + /* + * Create the initial parameters + */ + sp = &init_params; sp->resolve_addr_timeout_msec = RDMA_RESOLVE_TIMEOUT; sp->resolve_route_timeout_msec = RDMA_RESOLVE_TIMEOUT; sp->rdma_connect_timeout_msec = RDMA_RESOLVE_TIMEOUT; @@ -2150,6 +2141,22 @@ static struct smbd_connection *_smbd_get_connection( sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; sp->keepalive_timeout_msec = KEEPALIVE_RECV_TIMEOUT * 1000; + info = kzalloc_obj(*info); + if (!info) + return NULL; + sc = &info->socket; + scnprintf(wq_name, ARRAY_SIZE(wq_name), "smbd_%p", sc); + workqueue = create_workqueue(wq_name); + if (!workqueue) + goto create_wq_failed; + smbdirect_socket_prepare_create(sc, sp, workqueue); + /* + * from here we operate on the copy. + */ + sp = &sc->parameters; + + INIT_WORK(&sc->disconnect_work, smbd_disconnect_rdma_work); + rc = smbd_ia_open(sc, dstaddr, port); if (rc) { log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); From 2459505596f57664f61a1be9b50065ebed9da660 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 23 Oct 2025 13:29:46 +0200 Subject: [PATCH 063/145] smb: client: make use of smbdirect_socket_set_logging() This will allow the logging to keep working as before, when we move to common functions in the next commits. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 2d6d8a1e2d99..1d285d364a72 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -175,6 +175,43 @@ static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc); */ #include "../common/smbdirect/smbdirect_all_c_files.c" +static bool smbd_logging_needed(struct smbdirect_socket *sc, + void *private_ptr, + unsigned int lvl, + unsigned int cls) +{ +#define BUILD_BUG_SAME(x) BUILD_BUG_ON(x != SMBDIRECT_LOG_ ##x) + BUILD_BUG_SAME(ERR); + BUILD_BUG_SAME(INFO); +#undef BUILD_BUG_SAME +#define BUILD_BUG_SAME(x) BUILD_BUG_ON(x != SMBDIRECT_ ##x) + BUILD_BUG_SAME(LOG_OUTGOING); + BUILD_BUG_SAME(LOG_INCOMING); + BUILD_BUG_SAME(LOG_READ); + BUILD_BUG_SAME(LOG_WRITE); + BUILD_BUG_SAME(LOG_RDMA_SEND); + BUILD_BUG_SAME(LOG_RDMA_RECV); + BUILD_BUG_SAME(LOG_KEEP_ALIVE); + BUILD_BUG_SAME(LOG_RDMA_EVENT); + BUILD_BUG_SAME(LOG_RDMA_MR); +#undef BUILD_BUG_SAME + + if (lvl <= smbd_logging_level || cls & smbd_logging_class) + return true; + return false; +} + +static void smbd_logging_vaprintf(struct smbdirect_socket *sc, + const char *func, + unsigned int line, + void *private_ptr, + unsigned int lvl, + unsigned int cls, + struct va_format *vaf) +{ + cifs_dbg(VFS, "%s:%u %pV", func, line, vaf); +} + #define log_rdma(level, class, fmt, args...) \ do { \ if (level <= smbd_logging_level || class & smbd_logging_class) \ @@ -2150,6 +2187,7 @@ static struct smbd_connection *_smbd_get_connection( if (!workqueue) goto create_wq_failed; smbdirect_socket_prepare_create(sc, sp, workqueue); + smbdirect_socket_set_logging(sc, NULL, smbd_logging_needed, smbd_logging_vaprintf); /* * from here we operate on the copy. */ From 5f6e338bbb78787933ffcf87959178c4f0a08757 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 2 Sep 2025 12:49:44 +0200 Subject: [PATCH 064/145] smb: client: make use of smbdirect_socket_wake_up_all() This is a superset of smbd_disconnect_wake_up_all() and calling wake_up_all(&sc->rw_io.credits.wait_queue); in addition should not matter as it's not used on the client anyway. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 1d285d364a72..69ff1128e8f0 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -235,22 +235,6 @@ do { \ #define log_rdma_mr(level, fmt, args...) \ log_rdma(level, LOG_RDMA_MR, fmt, ##args) -static void smbd_disconnect_wake_up_all(struct smbdirect_socket *sc) -{ - /* - * Wake up all waiters in all wait queues - * in order to notice the broken connection. - */ - wake_up_all(&sc->status_wait); - wake_up_all(&sc->send_io.lcredits.wait_queue); - wake_up_all(&sc->send_io.credits.wait_queue); - wake_up_all(&sc->send_io.pending.dec_wait_queue); - wake_up_all(&sc->send_io.pending.zero_wait_queue); - wake_up_all(&sc->recv_io.reassembly.wait_queue); - wake_up_all(&sc->mr_io.ready.wait_queue); - wake_up_all(&sc->mr_io.cleanup.wait_queue); -} - static void smbd_disconnect_rdma_work(struct work_struct *work) { struct smbdirect_socket *sc = @@ -308,7 +292,7 @@ static void smbd_disconnect_rdma_work(struct work_struct *work) * Wake up all waiters in all wait queues * in order to notice the broken connection. */ - smbd_disconnect_wake_up_all(sc); + smbdirect_socket_wake_up_all(sc); } static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc) @@ -374,7 +358,7 @@ static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc) * Wake up all waiters in all wait queues * in order to notice the broken connection. */ - smbd_disconnect_wake_up_all(sc); + smbdirect_socket_wake_up_all(sc); queue_work(sc->workqueue, &sc->disconnect_work); } @@ -1973,7 +1957,7 @@ void smbd_destroy(struct TCP_Server_Info *server) * Most likely this was already called via * smbd_disconnect_rdma_work(), but call it again... */ - smbd_disconnect_wake_up_all(sc); + smbdirect_socket_wake_up_all(sc); log_rdma_event(INFO, "cancelling recv_io.posted.refill_work\n"); disable_work_sync(&sc->recv_io.posted.refill_work); From 872b23ab6d9495e5504ac0a43e9ec977e750052a Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 16:55:52 +0200 Subject: [PATCH 065/145] smb: client: make use of smbdirect_socket_cleanup_work() Note smbdirect_socket_prepare_create() already calls INIT_WORK() with smbdirect_socket_cleanup_work. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 74 ++++----------------------------------- 1 file changed, 6 insertions(+), 68 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 69ff1128e8f0..5204f0ff95d8 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -235,66 +235,6 @@ do { \ #define log_rdma_mr(level, fmt, args...) \ log_rdma(level, LOG_RDMA_MR, fmt, ##args) -static void smbd_disconnect_rdma_work(struct work_struct *work) -{ - struct smbdirect_socket *sc = - container_of(work, struct smbdirect_socket, disconnect_work); - - if (sc->first_error == 0) - sc->first_error = -ECONNABORTED; - - /* - * make sure this and other work is not queued again - * but here we don't block and avoid - * disable[_delayed]_work_sync() - */ - disable_work(&sc->disconnect_work); - disable_work(&sc->recv_io.posted.refill_work); - disable_work(&sc->mr_io.recovery_work); - disable_work(&sc->idle.immediate_work); - disable_delayed_work(&sc->idle.timer_work); - - switch (sc->status) { - case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: - case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: - case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: - case SMBDIRECT_SOCKET_CONNECTED: - case SMBDIRECT_SOCKET_ERROR: - sc->status = SMBDIRECT_SOCKET_DISCONNECTING; - rdma_disconnect(sc->rdma.cm_id); - break; - - case SMBDIRECT_SOCKET_CREATED: - case SMBDIRECT_SOCKET_LISTENING: - case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: - case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: - case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: - case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: - case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: - case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: - /* - * rdma_connect() never reached - * RDMA_CM_EVENT_ESTABLISHED - */ - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - break; - - case SMBDIRECT_SOCKET_DISCONNECTING: - case SMBDIRECT_SOCKET_DISCONNECTED: - case SMBDIRECT_SOCKET_DESTROYED: - break; - } - - /* - * Wake up all waiters in all wait queues - * in order to notice the broken connection. - */ - smbdirect_socket_wake_up_all(sc); -} - static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc) { if (sc->first_error == 0) @@ -394,13 +334,13 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_ADDR_ERROR: log_rdma_event(ERR, "connecting failed event=%s\n", event_name); sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; - smbd_disconnect_rdma_work(&sc->disconnect_work); + smbdirect_socket_cleanup_work(&sc->disconnect_work); break; case RDMA_CM_EVENT_ROUTE_ERROR: log_rdma_event(ERR, "connecting failed event=%s\n", event_name); sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; - smbd_disconnect_rdma_work(&sc->disconnect_work); + smbdirect_socket_cleanup_work(&sc->disconnect_work); break; case RDMA_CM_EVENT_ESTABLISHED: @@ -495,7 +435,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_REJECTED: log_rdma_event(ERR, "connecting failed event=%s\n", event_name); sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; - smbd_disconnect_rdma_work(&sc->disconnect_work); + smbdirect_socket_cleanup_work(&sc->disconnect_work); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: @@ -506,7 +446,7 @@ static int smbd_conn_upcall( } sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - smbd_disconnect_rdma_work(&sc->disconnect_work); + smbdirect_socket_cleanup_work(&sc->disconnect_work); break; default: @@ -1943,7 +1883,7 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "destroying rdma session\n"); if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) - smbd_disconnect_rdma_work(&sc->disconnect_work); + smbdirect_socket_cleanup_work(&sc->disconnect_work); if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) { log_rdma_event(INFO, "wait for transport being disconnected\n"); wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); @@ -1955,7 +1895,7 @@ void smbd_destroy(struct TCP_Server_Info *server) * in order to notice the broken connection. * * Most likely this was already called via - * smbd_disconnect_rdma_work(), but call it again... + * smbdirect_socket_cleanup_work(), but call it again... */ smbdirect_socket_wake_up_all(sc); @@ -2177,8 +2117,6 @@ static struct smbd_connection *_smbd_get_connection( */ sp = &sc->parameters; - INIT_WORK(&sc->disconnect_work, smbd_disconnect_rdma_work); - rc = smbd_ia_open(sc, dstaddr, port); if (rc) { log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); From 43e1fed89e40346578a2f94ae0a87dfa05987fa5 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 17:28:53 +0200 Subject: [PATCH 066/145] smb: client: make use of smbdirect_socket_schedule_cleanup() This removes smbd_disconnect_rdma_connection() which is basically the same as smbdirect_socket_schedule_cleanup(). And we pass more useful errors than -ECONNABORTED if we have them. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 98 ++++++--------------------------------- 1 file changed, 14 insertions(+), 84 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 5204f0ff95d8..9ed0f95c23e6 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -10,7 +10,6 @@ #include #include #include -#define __SMBDIRECT_SOCKET_DISCONNECT(__sc) smbd_disconnect_rdma_connection(__sc) #include "../common/smbdirect/smbdirect_pdu.h" #include "smbdirect.h" #include "cifs_debug.h" @@ -165,8 +164,6 @@ module_param(smbd_logging_level, uint, 0644); MODULE_PARM_DESC(smbd_logging_level, "Logging level for SMBD transport, 0 (default): error, 1: info"); -static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc); - /* * This is a temporary solution until all code * is moved to smbdirect_all_c_files.c and we @@ -235,74 +232,6 @@ do { \ #define log_rdma_mr(level, fmt, args...) \ log_rdma(level, LOG_RDMA_MR, fmt, ##args) -static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc) -{ - if (sc->first_error == 0) - sc->first_error = -ECONNABORTED; - - /* - * make sure other work (than disconnect_work) is - * not queued again but here we don't block and avoid - * disable[_delayed]_work_sync() - */ - disable_work(&sc->recv_io.posted.refill_work); - disable_work(&sc->mr_io.recovery_work); - disable_work(&sc->idle.immediate_work); - disable_delayed_work(&sc->idle.timer_work); - - switch (sc->status) { - case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: - case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: - case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: - case SMBDIRECT_SOCKET_ERROR: - case SMBDIRECT_SOCKET_DISCONNECTING: - case SMBDIRECT_SOCKET_DISCONNECTED: - case SMBDIRECT_SOCKET_DESTROYED: - /* - * Keep the current error status - */ - break; - - case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: - case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: - sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; - break; - - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: - sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; - break; - - case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: - case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; - break; - - case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: - case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; - break; - - case SMBDIRECT_SOCKET_CREATED: - case SMBDIRECT_SOCKET_LISTENING: - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - break; - - case SMBDIRECT_SOCKET_CONNECTED: - sc->status = SMBDIRECT_SOCKET_ERROR; - break; - } - - /* - * Wake up all waiters in all wait queues - * in order to notice the broken connection. - */ - smbdirect_socket_wake_up_all(sc); - - queue_work(sc->workqueue, &sc->disconnect_work); -} - /* Upcall from RDMA CM */ static int smbd_conn_upcall( struct rdma_cm_id *id, struct rdma_cm_event *event) @@ -470,7 +399,7 @@ smbd_qp_async_error_upcall(struct ib_event *event, void *context) switch (event->event) { case IB_EVENT_CQ_ERR: case IB_EVENT_QP_FATAL: - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); break; default: @@ -567,7 +496,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) log_rdma_send(ERR, "unexpected send completion wc->status=%s (%d) wc->opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, wc->opcode); - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } @@ -588,7 +517,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_WR_FLUSH_ERR) log_rdma_send(ERR, "wc->status=%s wc->opcode=%d\n", ib_wc_status_msg(wc->status), wc->opcode); - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } @@ -790,7 +719,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) negotiate_done = false; if (!negotiate_done) { sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); } else { sc->status = SMBDIRECT_SOCKET_CONNECTED; wake_up(&sc->status_wait); @@ -889,7 +818,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); error: put_receive_buffer(sc, response); - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); } static struct rdma_cm_id *smbd_create_id( @@ -1191,7 +1120,7 @@ static int smbd_ib_post_send(struct smbdirect_socket *sc, ret = ib_post_send(sc->ib.qp, wr, NULL); if (ret) { pr_err("failed to post send: %d\n", ret); - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, ret); ret = -EAGAIN; } return ret; @@ -1631,7 +1560,7 @@ static int smbd_post_recv( ib_dma_unmap_single(sc->ib.dev, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); response->sge.length = 0; - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, rc); log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); } @@ -1842,7 +1771,7 @@ static void idle_connection_timer(struct work_struct *work) log_keep_alive(ERR, "error status sc->idle.keepalive=%d\n", sc->idle.keepalive); - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ETIMEDOUT); return; } @@ -2612,7 +2541,7 @@ static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->status) { log_rdma_mr(ERR, "status=%d\n", wc->status); - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); } } @@ -2642,7 +2571,7 @@ static void smbd_mr_recovery_work(struct work_struct *work) log_rdma_mr(ERR, "ib_dereg_mr failed rc=%x\n", rc); - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, rc); continue; } @@ -2650,10 +2579,11 @@ static void smbd_mr_recovery_work(struct work_struct *work) sc->ib.pd, sc->mr_io.type, sp->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { + rc = PTR_ERR(smbdirect_mr->mr); log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", sc->mr_io.type, sp->max_frmr_depth); - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, rc); continue; } } else @@ -2988,7 +2918,7 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, if (atomic_dec_and_test(&sc->mr_io.used.count)) wake_up(&sc->mr_io.cleanup.wait_queue); - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, rc); /* * get_mr() gave us a reference @@ -3063,7 +2993,7 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr) if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); smbd_mr_disable_locked(mr); - smbd_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, rc); goto done; } wait_for_completion(&mr->invalidate_done); From 927183cdbe4897f9a4bc0f64201fb0f192722d35 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 19:13:27 +0200 Subject: [PATCH 067/145] smb: client: make use of smbdirect_connection_{get,put}_recv_io() These are basically copies of {get,put}_receive_buffer(). Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 78 ++++++--------------------------------- 1 file changed, 11 insertions(+), 67 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 9ed0f95c23e6..daa4c4105938 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -23,11 +23,6 @@ const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connec return &sc->parameters; } -static struct smbdirect_recv_io *get_receive_buffer( - struct smbdirect_socket *sc); -static void put_receive_buffer( - struct smbdirect_socket *sc, - struct smbdirect_recv_io *response); static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf); static void destroy_receive_buffers(struct smbdirect_socket *sc); @@ -628,7 +623,7 @@ static void smbd_post_send_credits(struct work_struct *work) if (sc->recv_io.credits.target > atomic_read(&sc->recv_io.credits.count)) { while (true) { - response = get_receive_buffer(sc); + response = smbdirect_connection_get_recv_io(sc); if (!response) break; @@ -637,7 +632,7 @@ static void smbd_post_send_credits(struct work_struct *work) if (rc) { log_rdma_recv(ERR, "post_recv failed rc=%d\n", rc); - put_receive_buffer(sc, response); + smbdirect_connection_put_recv_io(response); break; } @@ -714,7 +709,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) sc->recv_io.reassembly.full_packet_received = true; negotiate_done = process_negotiation_response(response, wc->byte_len); - put_receive_buffer(sc, response); + smbdirect_connection_put_recv_io(response); if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_RUNNING)) negotiate_done = false; if (!negotiate_done) { @@ -802,7 +797,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) enqueue_reassembly(sc, response, data_length); wake_up(&sc->recv_io.reassembly.wait_queue); } else - put_receive_buffer(sc, response); + smbdirect_connection_put_recv_io(response); return; @@ -817,7 +812,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) log_rdma_recv(ERR, "unexpected response type=%d\n", sc->recv_io.expected); WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); error: - put_receive_buffer(sc, response); + smbdirect_connection_put_recv_io(response); smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); } @@ -1572,7 +1567,7 @@ static int smbd_negotiate(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; int rc; - struct smbdirect_recv_io *response = get_receive_buffer(sc); + struct smbdirect_recv_io *response = smbdirect_connection_get_recv_io(sc); WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED); sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; @@ -1583,7 +1578,7 @@ static int smbd_negotiate(struct smbdirect_socket *sc) rc, response->sge.addr, response->sge.length, response->sge.lkey); if (rc) { - put_receive_buffer(sc, response); + smbdirect_connection_put_recv_io(response); return rc; } @@ -1659,57 +1654,6 @@ static struct smbdirect_recv_io *_get_first_reassembly(struct smbdirect_socket * return ret; } -/* - * Get a receive buffer - * For each remote send, we need to post a receive. The receive buffers are - * pre-allocated in advance. - * return value: the receive buffer, NULL if none is available - */ -static struct smbdirect_recv_io *get_receive_buffer(struct smbdirect_socket *sc) -{ - struct smbdirect_recv_io *ret = NULL; - unsigned long flags; - - spin_lock_irqsave(&sc->recv_io.free.lock, flags); - if (!list_empty(&sc->recv_io.free.list)) { - ret = list_first_entry( - &sc->recv_io.free.list, - struct smbdirect_recv_io, list); - list_del(&ret->list); - sc->statistics.get_receive_buffer++; - } - spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); - - return ret; -} - -/* - * Return a receive buffer - * Upon returning of a receive buffer, we can post new receive and extend - * more receive credits to remote peer. This is done immediately after a - * receive buffer is returned. - */ -static void put_receive_buffer( - struct smbdirect_socket *sc, struct smbdirect_recv_io *response) -{ - unsigned long flags; - - if (likely(response->sge.length != 0)) { - ib_dma_unmap_single(sc->ib.dev, - response->sge.addr, - response->sge.length, - DMA_FROM_DEVICE); - response->sge.length = 0; - } - - spin_lock_irqsave(&sc->recv_io.free.lock, flags); - list_add_tail(&response->list, &sc->recv_io.free.list); - sc->statistics.put_receive_buffer++; - spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); - - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); -} - /* Preallocate all receive buffer on transport establishment */ static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf) { @@ -1744,7 +1688,7 @@ static void destroy_receive_buffers(struct smbdirect_socket *sc) { struct smbdirect_recv_io *response; - while ((response = get_receive_buffer(sc))) + while ((response = smbdirect_connection_get_recv_io(sc))) mempool_free(response, sc->recv_io.mem.pool); } @@ -1850,7 +1794,7 @@ void smbd_destroy(struct TCP_Server_Info *server) list_del(&response->list); spin_unlock_irqrestore( &sc->recv_io.reassembly.lock, flags); - put_receive_buffer(sc, response); + smbdirect_connection_put_recv_io(response); } else spin_unlock_irqrestore( &sc->recv_io.reassembly.lock, flags); @@ -2389,9 +2333,9 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) } queue_removed++; sc->statistics.dequeue_reassembly_queue++; - put_receive_buffer(sc, response); + smbdirect_connection_put_recv_io(response); offset = 0; - log_read(INFO, "put_receive_buffer offset=0\n"); + log_read(INFO, "smbdirect_connection_put_recv_io offset=0\n"); } else offset += to_copy; From 018ed87aa5ddc42f4437f6f9df4386e8e18e481d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 27 Aug 2025 16:48:39 +0200 Subject: [PATCH 068/145] smb: client: make use of smbdirect_connection_reassembly_{append,first}_recv_io() These are exact copies of enqueue_reassembly() and _get_first_reassembly(). Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 63 +++------------------------------------ 1 file changed, 4 insertions(+), 59 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index daa4c4105938..c65bcd819816 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -26,12 +26,6 @@ const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connec static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf); static void destroy_receive_buffers(struct smbdirect_socket *sc); -static void enqueue_reassembly( - struct smbdirect_socket *sc, - struct smbdirect_recv_io *response, int data_length); -static struct smbdirect_recv_io *_get_first_reassembly( - struct smbdirect_socket *sc); - static int smbd_post_send(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, struct smbdirect_send_io *request); @@ -794,7 +788,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) sc->recv_io.credits.target > old_recv_credit_target) queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); - enqueue_reassembly(sc, response, data_length); + smbdirect_connection_reassembly_append_recv_io(sc, response, data_length); wake_up(&sc->recv_io.reassembly.wait_queue); } else smbdirect_connection_put_recv_io(response); @@ -1605,55 +1599,6 @@ static int smbd_negotiate(struct smbdirect_socket *sc) return rc; } -/* - * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1 - * This is a queue for reassembling upper layer payload and present to upper - * layer. All the inncoming payload go to the reassembly queue, regardless of - * if reassembly is required. The uuper layer code reads from the queue for all - * incoming payloads. - * Put a received packet to the reassembly queue - * response: the packet received - * data_length: the size of payload in this packet - */ -static void enqueue_reassembly( - struct smbdirect_socket *sc, - struct smbdirect_recv_io *response, - int data_length) -{ - unsigned long flags; - - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); - list_add_tail(&response->list, &sc->recv_io.reassembly.list); - sc->recv_io.reassembly.queue_length++; - /* - * Make sure reassembly_data_length is updated after list and - * reassembly_queue_length are updated. On the dequeue side - * reassembly_data_length is checked without a lock to determine - * if reassembly_queue_length and list is up to date - */ - virt_wmb(); - sc->recv_io.reassembly.data_length += data_length; - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); - sc->statistics.enqueue_reassembly_queue++; -} - -/* - * Get the first entry at the front of reassembly queue - * Caller is responsible for locking - * return value: the first entry if any, NULL if queue is empty - */ -static struct smbdirect_recv_io *_get_first_reassembly(struct smbdirect_socket *sc) -{ - struct smbdirect_recv_io *ret = NULL; - - if (!list_empty(&sc->recv_io.reassembly.list)) { - ret = list_first_entry( - &sc->recv_io.reassembly.list, - struct smbdirect_recv_io, list); - } - return ret; -} - /* Preallocate all receive buffer on transport establishment */ static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf) { @@ -1789,7 +1734,7 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "drain the reassembly queue\n"); do { spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); - response = _get_first_reassembly(sc); + response = smbdirect_connection_reassembly_first_recv_io(sc); if (response) { list_del(&response->list); spin_unlock_irqrestore( @@ -2270,7 +2215,7 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) /* * Need to make sure reassembly_data_length is read before * reading reassembly_queue_length and calling - * _get_first_reassembly. This call is lock free + * smbdirect_connection_reassembly_first_recv_io. This call is lock free * as we never read at the end of the queue which are being * updated in SOFTIRQ as more data is received */ @@ -2280,7 +2225,7 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) to_read = size; offset = sc->recv_io.reassembly.first_entry_offset; while (data_read < size) { - response = _get_first_reassembly(sc); + response = smbdirect_connection_reassembly_first_recv_io(sc); data_transfer = smbdirect_recv_io_payload(response); data_length = le32_to_cpu(data_transfer->data_length); remaining_data_length = From c85814ca5f3d22f08f3513a6eb91162392f4fe4b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 27 Aug 2025 17:22:44 +0200 Subject: [PATCH 069/145] smb: client: make use of smbdirect_connection_idle_timer_work() This is basically a copy of idle_connection_timer(). Note smbdirect_socket_prepare_create() already calls INIT_DELAYED_WORK(). Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index c65bcd819816..8fa9621d3f23 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1649,35 +1649,6 @@ static void send_immediate_empty_message(struct work_struct *work) smbd_post_send_empty(sc); } -/* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ -static void idle_connection_timer(struct work_struct *work) -{ - struct smbdirect_socket *sc = - container_of(work, struct smbdirect_socket, idle.timer_work.work); - struct smbdirect_socket_parameters *sp = &sc->parameters; - - if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { - log_keep_alive(ERR, - "error status sc->idle.keepalive=%d\n", - sc->idle.keepalive); - smbdirect_socket_schedule_cleanup(sc, -ETIMEDOUT); - return; - } - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) - return; - - /* - * Now use the keepalive timeout (instead of keepalive interval) - * in order to wait for a response - */ - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, - msecs_to_jiffies(sp->keepalive_timeout_msec)); - log_keep_alive(INFO, "schedule send of empty idle message\n"); - queue_work(sc->workqueue, &sc->idle.immediate_work); -} - /* * Destroy the transport and related RDMA and memory resources * Need to go through all the pending counters and make sure on one is using @@ -2081,7 +2052,6 @@ static struct smbd_connection *_smbd_get_connection( } INIT_WORK(&sc->idle.immediate_work, send_immediate_empty_message); - INIT_DELAYED_WORK(&sc->idle.timer_work, idle_connection_timer); /* * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING * so that the timer will cause a disconnect. From 6f9055aa9c8c16c7a9e185e35257dbee3852d42f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 28 Aug 2025 16:59:00 +0200 Subject: [PATCH 070/145] smb: client: make use of smbdirect_frwr_is_supported() This an exact copy of frwr_is_supported(). Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 8fa9621d3f23..ba002060fed0 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -922,20 +922,6 @@ static struct rdma_cm_id *smbd_create_id( return ERR_PTR(rc); } -/* - * Test if FRWR (Fast Registration Work Requests) is supported on the device - * This implementation requires FRWR on RDMA read/write - * return value: true if it is supported - */ -static bool frwr_is_supported(struct ib_device_attr *attrs) -{ - if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) - return false; - if (attrs->max_fast_reg_page_list_len == 0) - return false; - return true; -} - static int smbd_ia_open( struct smbdirect_socket *sc, struct sockaddr *dstaddr, int port) @@ -953,7 +939,7 @@ static int smbd_ia_open( } sc->ib.dev = sc->rdma.cm_id->device; - if (!frwr_is_supported(&sc->ib.dev->attrs)) { + if (!smbdirect_frwr_is_supported(&sc->ib.dev->attrs)) { log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n"); log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", sc->ib.dev->attrs.device_cap_flags, From f7a59fff0259592e138c702b3c22b5fbf3c8ea00 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 11 Sep 2025 20:09:24 +0200 Subject: [PATCH 071/145] smb: client: make use of smbdirect_connection_{alloc,free}_send_io() This simplifies the code and allows us to share more code in common with the server. The only difference is that we use ib_dma_unmap_page() for all sges, this simplifies the logic and doesn't matter as ib_dma_unmap_single() and ib_dma_unmap_page() both operate on dma_addr_t and dma_unmap_single_attrs() is just an alias for dma_unmap_page_attrs(). We already had such an inconsistency before as we called ib_dma_unmap_single(), while we mapped using ib_dma_map_page() in smb_set_sge(). Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 62 ++++++--------------------------------- 1 file changed, 9 insertions(+), 53 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index ba002060fed0..367ee741d9ae 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -406,51 +406,6 @@ static inline void *smbdirect_recv_io_payload(struct smbdirect_recv_io *response return (void *)response->packet; } -static struct smbdirect_send_io *smbd_alloc_send_io(struct smbdirect_socket *sc) -{ - struct smbdirect_send_io *msg; - - msg = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); - if (!msg) - return ERR_PTR(-ENOMEM); - msg->socket = sc; - INIT_LIST_HEAD(&msg->sibling_list); - msg->num_sge = 0; - - return msg; -} - -static void smbd_free_send_io(struct smbdirect_send_io *msg) -{ - struct smbdirect_socket *sc = msg->socket; - size_t i; - - /* - * The list needs to be empty! - * The caller should take care of it. - */ - WARN_ON_ONCE(!list_empty(&msg->sibling_list)); - - /* - * Note we call ib_dma_unmap_page(), even if some sges are mapped using - * ib_dma_map_single(). - * - * The difference between _single() and _page() only matters for the - * ib_dma_map_*() case. - * - * For the ib_dma_unmap_*() case it does not matter as both take the - * dma_addr_t and dma_unmap_single_attrs() is just an alias to - * dma_unmap_page_attrs(). - */ - for (i = 0; i < msg->num_sge; i++) - ib_dma_unmap_page(sc->ib.dev, - msg->sge[i].addr, - msg->sge[i].length, - DMA_TO_DEVICE); - - mempool_free(msg, sc->send_io.mem.pool); -} - /* Called when a RDMA send is done */ static void send_done(struct ib_cq *cq, struct ib_wc *wc) { @@ -494,11 +449,11 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) */ list_for_each_entry_safe(sibling, next, &request->sibling_list, sibling_list) { list_del_init(&sibling->sibling_list); - smbd_free_send_io(sibling); + smbdirect_connection_free_send_io(sibling); lcredits += 1; } /* Note this frees wc->wr_cqe, but not wc */ - smbd_free_send_io(request); + smbdirect_connection_free_send_io(request); lcredits += 1; if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { @@ -977,7 +932,7 @@ static int smbd_post_send_negotiate_req(struct smbdirect_socket *sc) struct smbdirect_send_io *request; struct smbdirect_negotiate_req *packet; - request = smbd_alloc_send_io(sc); + request = smbdirect_connection_alloc_send_io(sc); if (IS_ERR(request)) return PTR_ERR(request); @@ -1002,6 +957,7 @@ static int smbd_post_send_negotiate_req(struct smbdirect_socket *sc) request->sge[0].length = sizeof(*packet); request->sge[0].lkey = sc->ib.pd->local_dma_lkey; + request->num_sge = 1; rc = smbd_post_send(sc, NULL, request); if (!rc) @@ -1011,7 +967,7 @@ static int smbd_post_send_negotiate_req(struct smbdirect_socket *sc) rc = -EIO; dma_mapping_failed: - smbd_free_send_io(request); + smbdirect_connection_free_send_io(request); return rc; } @@ -1202,9 +1158,9 @@ static int smbd_send_batch_flush(struct smbdirect_socket *sc, list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { list_del_init(&sibling->sibling_list); - smbd_free_send_io(sibling); + smbdirect_connection_free_send_io(sibling); } - smbd_free_send_io(last); + smbdirect_connection_free_send_io(last); } release_credit: @@ -1353,7 +1309,7 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, new_credits = manage_credits_prior_sending(sc); } - request = smbd_alloc_send_io(sc); + request = smbdirect_connection_alloc_send_io(sc); if (IS_ERR(request)) { rc = PTR_ERR(request); goto err_alloc; @@ -1447,7 +1403,7 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, } err_dma: - smbd_free_send_io(request); + smbdirect_connection_free_send_io(request); err_flush: err_alloc: From 958da403e1db2fad62d1b9398b486e34658396f8 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 11 Sep 2025 20:51:53 +0200 Subject: [PATCH 072/145] smb: client: make use of smbdirect_connection_send_io_done() This handles freeing of siblings too, which is used on the client yet, but that might follow later. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 70 +-------------------------------------- 1 file changed, 1 insertion(+), 69 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 367ee741d9ae..41baa403063b 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -406,74 +406,6 @@ static inline void *smbdirect_recv_io_payload(struct smbdirect_recv_io *response return (void *)response->packet; } -/* Called when a RDMA send is done */ -static void send_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct smbdirect_send_io *request = - container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); - struct smbdirect_socket *sc = request->socket; - struct smbdirect_send_io *sibling, *next; - int lcredits = 0; - - log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%s\n", - request, ib_wc_status_msg(wc->status)); - - if (unlikely(!(request->wr.send_flags & IB_SEND_SIGNALED))) { - /* - * This happens when smbdirect_send_io is a sibling - * before the final message, it is signaled on - * error anyway, so we need to skip - * smbdirect_connection_free_send_io here, - * otherwise is will destroy the memory - * of the siblings too, which will cause - * use after free problems for the others - * triggered from ib_drain_qp(). - */ - if (wc->status != IB_WC_SUCCESS) - goto skip_free; - - /* - * This should not happen! - * But we better just close the - * connection... - */ - log_rdma_send(ERR, - "unexpected send completion wc->status=%s (%d) wc->opcode=%d\n", - ib_wc_status_msg(wc->status), wc->status, wc->opcode); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - return; - } - - /* - * Free possible siblings and then the main send_io - */ - list_for_each_entry_safe(sibling, next, &request->sibling_list, sibling_list) { - list_del_init(&sibling->sibling_list); - smbdirect_connection_free_send_io(sibling); - lcredits += 1; - } - /* Note this frees wc->wr_cqe, but not wc */ - smbdirect_connection_free_send_io(request); - lcredits += 1; - - if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { -skip_free: - if (wc->status != IB_WC_WR_FLUSH_ERR) - log_rdma_send(ERR, "wc->status=%s wc->opcode=%d\n", - ib_wc_status_msg(wc->status), wc->opcode); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - return; - } - - atomic_add(lcredits, &sc->send_io.lcredits.count); - wake_up(&sc->send_io.lcredits.wait_queue); - - if (atomic_dec_and_test(&sc->send_io.pending.count)) - wake_up(&sc->send_io.pending.zero_wait_queue); - - wake_up(&sc->send_io.pending.dec_wait_queue); -} - static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) { log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n", @@ -1075,7 +1007,7 @@ static int smbd_post_send(struct smbdirect_socket *sc, DMA_TO_DEVICE); } - request->cqe.done = send_done; + request->cqe.done = smbdirect_connection_send_io_done; request->wr.next = NULL; request->wr.sg_list = request->sge; request->wr.num_sge = request->num_sge; From 116f3eed365143dd8c31a50fe62726966d047577 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sat, 13 Sep 2025 00:10:06 +0200 Subject: [PATCH 073/145] smb: client: make use of smbdirect_connection_{create,destroy}_mem_pools() The main logical differences are the following: We now don't use smbdirect_connection_get_recv_io() on cleanup, instead it uses list_for_each_entry_safe()... For the smbdirect_recv_io payload we expose the whole payload including the smbdirect_data_transfer header as documentation says data_offset = 0 and data_length != 0 would be valid, while the existing client code requires data_offset >= 24. The smbdirect_send_io cache includes header space for sizeof(struct smbdirect_negotiate_resp) = 32 bytes instead of sizeof(struct smbdirect_data_transfer) = 24 bytes. If this ever becomes a problem, we can allocate separate space for the smbdirect_negotiate_resp in the server. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 129 +------------------------------------- 1 file changed, 3 insertions(+), 126 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 41baa403063b..28fb77b2661b 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -23,9 +23,6 @@ const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connec return &sc->parameters; } -static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf); -static void destroy_receive_buffers(struct smbdirect_socket *sc); - static int smbd_post_send(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, struct smbdirect_send_io *request); @@ -1473,44 +1470,6 @@ static int smbd_negotiate(struct smbdirect_socket *sc) return rc; } -/* Preallocate all receive buffer on transport establishment */ -static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf) -{ - struct smbdirect_recv_io *response; - int i; - - for (i = 0; i < num_buf; i++) { - response = mempool_alloc(sc->recv_io.mem.pool, GFP_KERNEL); - if (!response) - goto allocate_failed; - - response->socket = sc; - response->sge.length = 0; - list_add_tail(&response->list, &sc->recv_io.free.list); - } - - return 0; - -allocate_failed: - while (!list_empty(&sc->recv_io.free.list)) { - response = list_first_entry( - &sc->recv_io.free.list, - struct smbdirect_recv_io, list); - list_del(&response->list); - - mempool_free(response, sc->recv_io.mem.pool); - } - return -ENOMEM; -} - -static void destroy_receive_buffers(struct smbdirect_socket *sc) -{ - struct smbdirect_recv_io *response; - - while ((response = smbdirect_connection_get_recv_io(sc))) - mempool_free(response, sc->recv_io.mem.pool); -} - static void send_immediate_empty_message(struct work_struct *work) { struct smbdirect_socket *sc = @@ -1591,9 +1550,6 @@ void smbd_destroy(struct TCP_Server_Info *server) } while (response); sc->recv_io.reassembly.data_length = 0; - log_rdma_event(INFO, "free receive buffers\n"); - destroy_receive_buffers(sc); - log_rdma_event(INFO, "freeing mr list\n"); destroy_mr_list(sc); @@ -1603,11 +1559,7 @@ void smbd_destroy(struct TCP_Server_Info *server) rdma_destroy_id(sc->rdma.cm_id); /* free mempools */ - mempool_destroy(sc->send_io.mem.pool); - kmem_cache_destroy(sc->send_io.mem.cache); - - mempool_destroy(sc->recv_io.mem.pool); - kmem_cache_destroy(sc->recv_io.mem.cache); + smbdirect_connection_destroy_mem_pools(sc); sc->status = SMBDIRECT_SOCKET_DESTROYED; @@ -1653,81 +1605,6 @@ int smbd_reconnect(struct TCP_Server_Info *server) return -ENOENT; } -static void destroy_caches(struct smbdirect_socket *sc) -{ - destroy_receive_buffers(sc); - mempool_destroy(sc->recv_io.mem.pool); - kmem_cache_destroy(sc->recv_io.mem.cache); - mempool_destroy(sc->send_io.mem.pool); - kmem_cache_destroy(sc->send_io.mem.cache); -} - -#define MAX_NAME_LEN 80 -static int allocate_caches(struct smbdirect_socket *sc) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - char name[MAX_NAME_LEN]; - int rc; - - if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) - return -ENOMEM; - - scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", sc); - sc->send_io.mem.cache = - kmem_cache_create( - name, - sizeof(struct smbdirect_send_io) + - sizeof(struct smbdirect_data_transfer), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!sc->send_io.mem.cache) - return -ENOMEM; - - sc->send_io.mem.pool = - mempool_create(sp->send_credit_target, mempool_alloc_slab, - mempool_free_slab, sc->send_io.mem.cache); - if (!sc->send_io.mem.pool) - goto out1; - - scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", sc); - - struct kmem_cache_args response_args = { - .align = __alignof__(struct smbdirect_recv_io), - .useroffset = (offsetof(struct smbdirect_recv_io, packet) + - sizeof(struct smbdirect_data_transfer)), - .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), - }; - sc->recv_io.mem.cache = - kmem_cache_create(name, - sizeof(struct smbdirect_recv_io) + sp->max_recv_size, - &response_args, SLAB_HWCACHE_ALIGN); - if (!sc->recv_io.mem.cache) - goto out2; - - sc->recv_io.mem.pool = - mempool_create(sp->recv_credit_max, mempool_alloc_slab, - mempool_free_slab, sc->recv_io.mem.cache); - if (!sc->recv_io.mem.pool) - goto out3; - - rc = allocate_receive_buffers(sc, sp->recv_credit_max); - if (rc) { - log_rdma_event(ERR, "failed to allocate receive buffers\n"); - goto out4; - } - - return 0; - -out4: - mempool_destroy(sc->recv_io.mem.pool); -out3: - kmem_cache_destroy(sc->recv_io.mem.cache); -out2: - mempool_destroy(sc->send_io.mem.pool); -out1: - kmem_cache_destroy(sc->send_io.mem.cache); - return -ENOMEM; -} - /* Create a SMBD connection, called by upper layer */ static struct smbd_connection *_smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port) @@ -1919,7 +1796,7 @@ static struct smbd_connection *_smbd_get_connection( log_rdma_event(INFO, "rdma_connect connected\n"); - rc = allocate_caches(sc); + rc = smbdirect_connection_create_mem_pools(sc); if (rc) { log_rdma_event(ERR, "cache allocation failed\n"); goto allocate_cache_failed; @@ -1958,7 +1835,7 @@ static struct smbd_connection *_smbd_get_connection( negotiation_failed: disable_delayed_work_sync(&sc->idle.timer_work); - destroy_caches(sc); + smbdirect_connection_destroy_mem_pools(sc); sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; rdma_disconnect(sc->rdma.cm_id); wait_event(sc->status_wait, From e5fd091663f82ee6c36cffd4bb93fcee9fc644c3 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 15 Sep 2025 02:26:47 +0200 Subject: [PATCH 074/145] smb: client: make use of smbdirect_map_sges_from_iter() This is basically a copy of smb_extract_iter_to_rdma() moved to common code. Before we had the inconsistency we called ib_dma_unmap_single(), while we mapped using ib_dma_map_page() in smb_set_sge(). Now ib_dma_unmap_page() is used for consistency. It doesn't really matter as ib_dma_unmap_single() and ib_dma_unmap_page() both operate on dma_addr_t and dma_unmap_single_attrs() is just an alias for dma_unmap_page_attrs(). Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 241 +------------------------------------- 1 file changed, 5 insertions(+), 236 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 28fb77b2661b..0ccea382fdaa 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -36,17 +36,6 @@ static int smbd_post_send_empty(struct smbdirect_socket *sc); static void destroy_mr_list(struct smbdirect_socket *sc); static int allocate_mr_list(struct smbdirect_socket *sc); -struct smb_extract_to_rdma { - struct ib_sge *sge; - unsigned int nr_sge; - unsigned int max_sge; - struct ib_device *device; - u32 local_dma_lkey; - enum dma_data_direction direction; -}; -static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, - struct smb_extract_to_rdma *rdma); - /* Port numbers for SMBD transport */ #define SMB_PORT 445 #define SMBD_PORT 5445 @@ -1268,9 +1257,9 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, /* Fill in the data payload to find out how much data we can add */ if (iter) { - struct smb_extract_to_rdma extract = { - .nr_sge = request->num_sge, - .max_sge = SMBDIRECT_SEND_IO_MAX_SGE, + struct smbdirect_map_sges extract = { + .num_sge = request->num_sge, + .max_sge = ARRAY_SIZE(request->sge), .sge = request->sge, .device = sc->ib.dev, .local_dma_lkey = sc->ib.pd->local_dma_lkey, @@ -1279,12 +1268,11 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, size_t payload_len = umin(*_remaining_data_length, sp->max_send_size - sizeof(*packet)); - rc = smb_extract_iter_to_rdma(iter, payload_len, - &extract); + rc = smbdirect_map_sges_from_iter(iter, payload_len, &extract); if (rc < 0) goto err_dma; data_length = rc; - request->num_sge = extract.nr_sge; + request->num_sge = extract.num_sge; *_remaining_data_length -= data_length; } else { data_length = 0; @@ -2653,222 +2641,3 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr) if (!kref_put(&mr->kref, smbd_mr_free_locked)) mutex_unlock(&mr->mutex); } - -static bool smb_set_sge(struct smb_extract_to_rdma *rdma, - struct page *lowest_page, size_t off, size_t len) -{ - struct ib_sge *sge = &rdma->sge[rdma->nr_sge]; - u64 addr; - - addr = ib_dma_map_page(rdma->device, lowest_page, - off, len, rdma->direction); - if (ib_dma_mapping_error(rdma->device, addr)) - return false; - - sge->addr = addr; - sge->length = len; - sge->lkey = rdma->local_dma_lkey; - rdma->nr_sge++; - return true; -} - -/* - * Extract page fragments from a BVEC-class iterator and add them to an RDMA - * element list. The pages are not pinned. - */ -static ssize_t smb_extract_bvec_to_rdma(struct iov_iter *iter, - struct smb_extract_to_rdma *rdma, - ssize_t maxsize) -{ - const struct bio_vec *bv = iter->bvec; - unsigned long start = iter->iov_offset; - unsigned int i; - ssize_t ret = 0; - - for (i = 0; i < iter->nr_segs; i++) { - size_t off, len; - - len = bv[i].bv_len; - if (start >= len) { - start -= len; - continue; - } - - len = min_t(size_t, maxsize, len - start); - off = bv[i].bv_offset + start; - - if (!smb_set_sge(rdma, bv[i].bv_page, off, len)) - return -EIO; - - ret += len; - maxsize -= len; - if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) - break; - start = 0; - } - - if (ret > 0) - iov_iter_advance(iter, ret); - return ret; -} - -/* - * Extract fragments from a KVEC-class iterator and add them to an RDMA list. - * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers. - * The pages are not pinned. - */ -static ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter, - struct smb_extract_to_rdma *rdma, - ssize_t maxsize) -{ - const struct kvec *kv = iter->kvec; - unsigned long start = iter->iov_offset; - unsigned int i; - ssize_t ret = 0; - - for (i = 0; i < iter->nr_segs; i++) { - struct page *page; - unsigned long kaddr; - size_t off, len, seg; - - len = kv[i].iov_len; - if (start >= len) { - start -= len; - continue; - } - - kaddr = (unsigned long)kv[i].iov_base + start; - off = kaddr & ~PAGE_MASK; - len = min_t(size_t, maxsize, len - start); - kaddr &= PAGE_MASK; - - maxsize -= len; - do { - seg = min_t(size_t, len, PAGE_SIZE - off); - - if (is_vmalloc_or_module_addr((void *)kaddr)) - page = vmalloc_to_page((void *)kaddr); - else - page = virt_to_page((void *)kaddr); - - if (!smb_set_sge(rdma, page, off, seg)) - return -EIO; - - ret += seg; - len -= seg; - kaddr += PAGE_SIZE; - off = 0; - } while (len > 0 && rdma->nr_sge < rdma->max_sge); - - if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0) - break; - start = 0; - } - - if (ret > 0) - iov_iter_advance(iter, ret); - return ret; -} - -/* - * Extract folio fragments from a FOLIOQ-class iterator and add them to an RDMA - * list. The folios are not pinned. - */ -static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, - struct smb_extract_to_rdma *rdma, - ssize_t maxsize) -{ - const struct folio_queue *folioq = iter->folioq; - unsigned int slot = iter->folioq_slot; - ssize_t ret = 0; - size_t offset = iter->iov_offset; - - BUG_ON(!folioq); - - if (slot >= folioq_nr_slots(folioq)) { - folioq = folioq->next; - if (WARN_ON_ONCE(!folioq)) - return -EIO; - slot = 0; - } - - do { - struct folio *folio = folioq_folio(folioq, slot); - size_t fsize = folioq_folio_size(folioq, slot); - - if (offset < fsize) { - size_t part = umin(maxsize, fsize - offset); - - if (!smb_set_sge(rdma, folio_page(folio, 0), offset, part)) - return -EIO; - - offset += part; - ret += part; - maxsize -= part; - } - - if (offset >= fsize) { - offset = 0; - slot++; - if (slot >= folioq_nr_slots(folioq)) { - if (!folioq->next) { - WARN_ON_ONCE(ret < iter->count); - break; - } - folioq = folioq->next; - slot = 0; - } - } - } while (rdma->nr_sge < rdma->max_sge && maxsize > 0); - - iter->folioq = folioq; - iter->folioq_slot = slot; - iter->iov_offset = offset; - iter->count -= ret; - return ret; -} - -/* - * Extract page fragments from up to the given amount of the source iterator - * and build up an RDMA list that refers to all of those bits. The RDMA list - * is appended to, up to the maximum number of elements set in the parameter - * block. - * - * The extracted page fragments are not pinned or ref'd in any way; if an - * IOVEC/UBUF-type iterator is to be used, it should be converted to a - * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some - * way. - */ -static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, - struct smb_extract_to_rdma *rdma) -{ - ssize_t ret; - int before = rdma->nr_sge; - - switch (iov_iter_type(iter)) { - case ITER_BVEC: - ret = smb_extract_bvec_to_rdma(iter, rdma, len); - break; - case ITER_KVEC: - ret = smb_extract_kvec_to_rdma(iter, rdma, len); - break; - case ITER_FOLIOQ: - ret = smb_extract_folioq_to_rdma(iter, rdma, len); - break; - default: - WARN_ON_ONCE(1); - return -EIO; - } - - if (ret < 0) { - while (rdma->nr_sge > before) { - struct ib_sge *sge = &rdma->sge[rdma->nr_sge--]; - - ib_dma_unmap_single(rdma->device, sge->addr, sge->length, - rdma->direction); - sge->addr = 0; - } - } - - return ret; -} From 0b7da58fec9ae573263571d5574d6a44f52c8223 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 15 Sep 2025 07:55:19 +0200 Subject: [PATCH 075/145] smb: client: make use of smbdirect_connection_qp_event_handler() This is a copy of smbd_qp_async_error_upcall()... It will allow more code to be moved to common functions soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 0ccea382fdaa..3463d9fabe31 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -362,26 +362,6 @@ static int smbd_conn_upcall( return 0; } -/* Upcall from RDMA QP */ -static void -smbd_qp_async_error_upcall(struct ib_event *event, void *context) -{ - struct smbdirect_socket *sc = context; - - log_rdma_event(ERR, "%s on device %s socket %p\n", - ib_event_msg(event->event), event->device->name, sc); - - switch (event->event) { - case IB_EVENT_CQ_ERR: - case IB_EVENT_QP_FATAL: - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - break; - - default: - break; - } -} - static inline void *smbdirect_send_io_payload(struct smbdirect_send_io *request) { return (void *)request->packet; @@ -1724,7 +1704,7 @@ static struct smbd_connection *_smbd_get_connection( } memset(&qp_attr, 0, sizeof(qp_attr)); - qp_attr.event_handler = smbd_qp_async_error_upcall; + qp_attr.event_handler = smbdirect_connection_qp_event_handler; qp_attr.qp_context = sc; qp_attr.cap = qp_cap; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; From 6bcccfb0c0f214e2ee3f09125f0459c9fbfea766 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 18 Sep 2025 18:37:40 +0200 Subject: [PATCH 076/145] smb: client: make use of smbdirect_connection_negotiate_rdma_resources() It's good to have this logic in a central place, it will allow us share more code soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 59 +++------------------------------------ 1 file changed, 4 insertions(+), 55 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 3463d9fabe31..7a77889d4648 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -212,7 +212,6 @@ static int smbd_conn_upcall( struct rdma_cm_id *id, struct rdma_cm_event *event) { struct smbdirect_socket *sc = id->context; - struct smbdirect_socket_parameters *sp = &sc->parameters; const char *event_name = rdma_event_msg(event->event); u8 peer_initiator_depth; u8 peer_responder_resources; @@ -273,60 +272,10 @@ static int smbd_conn_upcall( peer_initiator_depth = event->param.conn.initiator_depth; peer_responder_resources = event->param.conn.responder_resources; } - if (rdma_protocol_iwarp(id->device, id->port_num) && - event->param.conn.private_data_len == 8) { - /* - * Legacy clients with only iWarp MPA v1 support - * need a private blob in order to negotiate - * the IRD/ORD values. - */ - const __be32 *ird_ord_hdr = event->param.conn.private_data; - u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); - u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); - - /* - * cifs.ko sends the legacy IRD/ORD negotiation - * event if iWarp MPA v2 was used. - * - * Here we check that the values match and only - * mark the client as legacy if they don't match. - */ - if ((u32)event->param.conn.initiator_depth != ird32 || - (u32)event->param.conn.responder_resources != ord32) { - /* - * There are broken clients (old cifs.ko) - * using little endian and also - * struct rdma_conn_param only uses u8 - * for initiator_depth and responder_resources, - * so we truncate the value to U8_MAX. - * - * smb_direct_accept_client() will then - * do the real negotiation in order to - * select the minimum between client and - * server. - */ - ird32 = min_t(u32, ird32, U8_MAX); - ord32 = min_t(u32, ord32, U8_MAX); - - sc->rdma.legacy_iwarp = true; - peer_initiator_depth = (u8)ird32; - peer_responder_resources = (u8)ord32; - } - } - - /* - * negotiate the value by using the minimum - * between client and server if the client provided - * non 0 values. - */ - if (peer_initiator_depth != 0) - sp->initiator_depth = - min_t(u8, sp->initiator_depth, - peer_initiator_depth); - if (peer_responder_resources != 0) - sp->responder_resources = - min_t(u8, sp->responder_resources, - peer_responder_resources); + smbdirect_connection_negotiate_rdma_resources(sc, + peer_initiator_depth, + peer_responder_resources, + &event->param.conn); if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)) break; From 8de5571045902b5cadee117ec02c62c2f6cb0886 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 16 Sep 2025 01:53:23 +0200 Subject: [PATCH 077/145] smb: client: make use of smbdirect_connection_{create,destroy}_qp() It's good a use common code for this and it will allow us to share more code in the next steps. Calling ib_drain_qp() twice is ok. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 105 +++----------------------------------- 1 file changed, 7 insertions(+), 98 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 7a77889d4648..6f60bed49d70 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1441,10 +1441,8 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "cancelling recv_io.posted.refill_work\n"); disable_work_sync(&sc->recv_io.posted.refill_work); - log_rdma_event(INFO, "destroying qp\n"); + log_rdma_event(INFO, "drain qp\n"); ib_drain_qp(sc->ib.qp); - rdma_destroy_qp(sc->rdma.cm_id); - sc->ib.qp = NULL; log_rdma_event(INFO, "cancelling idle timer\n"); disable_delayed_work_sync(&sc->idle.timer_work); @@ -1470,9 +1468,8 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "freeing mr list\n"); destroy_mr_list(sc); - ib_free_cq(sc->ib.send_cq); - ib_free_cq(sc->ib.recv_cq); - ib_dealloc_pd(sc->ib.pd); + log_rdma_event(INFO, "destroying qp\n"); + smbdirect_connection_destroy_qp(sc); rdma_destroy_id(sc->rdma.cm_id); /* free mempools */ @@ -1532,8 +1529,6 @@ static struct smbd_connection *_smbd_get_connection( struct smbdirect_socket_parameters init_params = {}; struct smbdirect_socket_parameters *sp; struct rdma_conn_param conn_param; - struct ib_qp_cap qp_cap; - struct ib_qp_init_attr qp_attr; struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; struct ib_port_immutable port_immutable; __be32 ird_ord_hdr[2]; @@ -1569,6 +1564,7 @@ static struct smbd_connection *_smbd_get_connection( goto create_wq_failed; smbdirect_socket_prepare_create(sc, sp, workqueue); smbdirect_socket_set_logging(sc, NULL, smbd_logging_needed, smbd_logging_vaprintf); + sc->ib.poll_ctx = IB_POLL_SOFTIRQ; /* * from here we operate on the copy. */ @@ -1580,94 +1576,17 @@ static struct smbd_connection *_smbd_get_connection( goto create_id_failed; } - if (sp->send_credit_target > sc->ib.dev->attrs.max_cqe || - sp->send_credit_target > sc->ib.dev->attrs.max_qp_wr) { - log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", - sp->send_credit_target, - sc->ib.dev->attrs.max_cqe, - sc->ib.dev->attrs.max_qp_wr); - goto config_failed; - } - - if (sp->recv_credit_max > sc->ib.dev->attrs.max_cqe || - sp->recv_credit_max > sc->ib.dev->attrs.max_qp_wr) { - log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", - sp->recv_credit_max, - sc->ib.dev->attrs.max_cqe, - sc->ib.dev->attrs.max_qp_wr); - goto config_failed; - } - - if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE || - sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { - log_rdma_event(ERR, - "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", - IB_DEVICE_NAME_MAX, - sc->ib.dev->name, - sc->ib.dev->attrs.max_send_sge, - sc->ib.dev->attrs.max_recv_sge); - goto config_failed; - } - sp->responder_resources = min_t(u8, sp->responder_resources, sc->ib.dev->attrs.max_qp_rd_atom); log_rdma_mr(INFO, "responder_resources=%d\n", sp->responder_resources); - /* - * We use allocate sp->responder_resources * 2 MRs - * and each MR needs WRs for REG and INV, so - * we use '* 4'. - * - * +1 for ib_drain_qp() - */ - memset(&qp_cap, 0, sizeof(qp_cap)); - qp_cap.max_send_wr = sp->send_credit_target + sp->responder_resources * 4 + 1; - qp_cap.max_recv_wr = sp->recv_credit_max + 1; - qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; - qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; - - sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); - if (IS_ERR(sc->ib.pd)) { - rc = PTR_ERR(sc->ib.pd); - sc->ib.pd = NULL; - log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); - goto alloc_pd_failed; - } - - sc->ib.send_cq = - ib_alloc_cq_any(sc->ib.dev, sc, - qp_cap.max_send_wr, IB_POLL_SOFTIRQ); - if (IS_ERR(sc->ib.send_cq)) { - sc->ib.send_cq = NULL; - goto alloc_cq_failed; - } - - sc->ib.recv_cq = - ib_alloc_cq_any(sc->ib.dev, sc, - qp_cap.max_recv_wr, IB_POLL_SOFTIRQ); - if (IS_ERR(sc->ib.recv_cq)) { - sc->ib.recv_cq = NULL; - goto alloc_cq_failed; - } - - memset(&qp_attr, 0, sizeof(qp_attr)); - qp_attr.event_handler = smbdirect_connection_qp_event_handler; - qp_attr.qp_context = sc; - qp_attr.cap = qp_cap; - qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; - qp_attr.qp_type = IB_QPT_RC; - qp_attr.send_cq = sc->ib.send_cq; - qp_attr.recv_cq = sc->ib.recv_cq; - qp_attr.port_num = ~0; - - rc = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); + rc = smbdirect_connection_create_qp(sc); if (rc) { - log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc); + log_rdma_event(ERR, "smbdirect_connection_create_qp failed %i\n", rc); goto create_qp_failed; } - sc->ib.qp = sc->rdma.cm_id->qp; memset(&conn_param, 0, sizeof(conn_param)); conn_param.initiator_depth = sp->initiator_depth; @@ -1760,19 +1679,9 @@ static struct smbd_connection *_smbd_get_connection( allocate_cache_failed: rdma_connect_failed: - rdma_destroy_qp(sc->rdma.cm_id); + smbdirect_connection_destroy_qp(sc); create_qp_failed: -alloc_cq_failed: - if (sc->ib.send_cq) - ib_free_cq(sc->ib.send_cq); - if (sc->ib.recv_cq) - ib_free_cq(sc->ib.recv_cq); - - ib_dealloc_pd(sc->ib.pd); - -alloc_pd_failed: -config_failed: rdma_destroy_id(sc->rdma.cm_id); create_id_failed: From 6a67fe6212028754d3b3b57871916309a16863a6 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 17 Sep 2025 08:54:03 +0200 Subject: [PATCH 078/145] smb: client: initialize recv_io->cqe.done = recv_done just once smbdirect_recv_io structures are pre-allocated so we can set the callback function just once. This will make it easy to move smbd_post_recv() to common code soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 6f60bed49d70..6ba2c252ff1e 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1325,8 +1325,6 @@ static int smbd_post_recv( response->sge.length = sp->max_recv_size; response->sge.lkey = sc->ib.pd->local_dma_lkey; - response->cqe.done = recv_done; - recv_wr.wr_cqe = &response->cqe; recv_wr.next = NULL; recv_wr.sg_list = &response->sge; @@ -1534,6 +1532,7 @@ static struct smbd_connection *_smbd_get_connection( __be32 ird_ord_hdr[2]; char wq_name[80]; struct workqueue_struct *workqueue; + struct smbdirect_recv_io *recv_io; /* * Create the initial parameters @@ -1638,6 +1637,9 @@ static struct smbd_connection *_smbd_get_connection( goto allocate_cache_failed; } + list_for_each_entry(recv_io, &sc->recv_io.free.list, list) + recv_io->cqe.done = recv_done; + INIT_WORK(&sc->idle.immediate_work, send_immediate_empty_message); /* * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING From 26003faa7d477eed4ceb5b5b49e4eb4a77bf2f6f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 17 Sep 2025 09:18:08 +0200 Subject: [PATCH 079/145] smb: client: make use of smbdirect_connection_post_recv_io() The only difference is that smbdirect_connection_post_recv_io() returns early if the connection is already broken. And that the error code from ib_dma_mapping_error() (currently only -ENOMEM is possible) is returned instead of -EIO. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 46 ++------------------------------------- 1 file changed, 2 insertions(+), 44 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 6ba2c252ff1e..f8b204a9d304 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -27,10 +27,6 @@ static int smbd_post_send(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, struct smbdirect_send_io *request); -static int smbd_post_recv( - struct smbdirect_socket *sc, - struct smbdirect_recv_io *response); - static int smbd_post_send_empty(struct smbdirect_socket *sc); static void destroy_mr_list(struct smbdirect_socket *sc); @@ -424,7 +420,7 @@ static void smbd_post_send_credits(struct work_struct *work) break; response->first_segment = false; - rc = smbd_post_recv(sc, response); + rc = smbdirect_connection_post_recv_io(response); if (rc) { log_rdma_recv(ERR, "post_recv failed rc=%d\n", rc); @@ -1304,44 +1300,6 @@ static int smbd_post_send_full_iter(struct smbdirect_socket *sc, return rc; } -/* - * Post a receive request to the transport - * The remote peer can only send data when a receive request is posted - * The interaction is controlled by send/receive credit system - */ -static int smbd_post_recv( - struct smbdirect_socket *sc, struct smbdirect_recv_io *response) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct ib_recv_wr recv_wr; - int rc = -EIO; - - response->sge.addr = ib_dma_map_single( - sc->ib.dev, response->packet, - sp->max_recv_size, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(sc->ib.dev, response->sge.addr)) - return rc; - - response->sge.length = sp->max_recv_size; - response->sge.lkey = sc->ib.pd->local_dma_lkey; - - recv_wr.wr_cqe = &response->cqe; - recv_wr.next = NULL; - recv_wr.sg_list = &response->sge; - recv_wr.num_sge = 1; - - rc = ib_post_recv(sc->ib.qp, &recv_wr, NULL); - if (rc) { - ib_dma_unmap_single(sc->ib.dev, response->sge.addr, - response->sge.length, DMA_FROM_DEVICE); - response->sge.length = 0; - smbdirect_socket_schedule_cleanup(sc, rc); - log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); - } - - return rc; -} - /* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ static int smbd_negotiate(struct smbdirect_socket *sc) { @@ -1353,7 +1311,7 @@ static int smbd_negotiate(struct smbdirect_socket *sc) sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; - rc = smbd_post_recv(sc, response); + rc = smbdirect_connection_post_recv_io(response); log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n", rc, response->sge.addr, response->sge.length, response->sge.lkey); From 73ec624781cd7a43a2dbad8c7d40133703089224 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 18 Sep 2025 21:39:25 +0200 Subject: [PATCH 080/145] smb: client: make use of smbdirect_connection_recv_io_refill_work() This is basically a copy of smbd_post_send_credits(), but there are several improvements compared to the existing function: We calculate the number of missing posted buffers by getting the difference between recv_io.credits.target and recv_io.posted.count. Instead of the difference between recv_io.credits.target and recv_io.credits.count, because recv_io.credits.count is only updated once a message is send to the peer. It was not really a problem before, because we have a fixed number smbdirect_recv_io buffers, so the loop terminated when smbdirect_connection_get_recv_io() returns NULL. But using recv_io.posted.count makes it easier to understand. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 54 +-------------------------------------- 1 file changed, 1 insertion(+), 53 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index f8b204a9d304..e66fd17ffc67 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -400,58 +400,6 @@ static bool process_negotiation_response( return true; } -static void smbd_post_send_credits(struct work_struct *work) -{ - int rc; - struct smbdirect_recv_io *response; - struct smbdirect_socket *sc = - container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); - int posted = 0; - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { - return; - } - - if (sc->recv_io.credits.target > - atomic_read(&sc->recv_io.credits.count)) { - while (true) { - response = smbdirect_connection_get_recv_io(sc); - if (!response) - break; - - response->first_segment = false; - rc = smbdirect_connection_post_recv_io(response); - if (rc) { - log_rdma_recv(ERR, - "post_recv failed rc=%d\n", rc); - smbdirect_connection_put_recv_io(response); - break; - } - - atomic_inc(&sc->recv_io.posted.count); - posted += 1; - } - } - - atomic_add(posted, &sc->recv_io.credits.available); - - /* - * If the last send credit is waiting for credits - * it can grant we need to wake it up - */ - if (posted && - atomic_read(&sc->send_io.bcredits.count) == 0 && - atomic_read(&sc->send_io.credits.count) == 0) - wake_up(&sc->send_io.credits.wait_queue); - - /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */ - if (atomic_read(&sc->recv_io.credits.count) < - sc->recv_io.credits.target - 1) { - log_keep_alive(INFO, "schedule send of an empty message\n"); - queue_work(sc->workqueue, &sc->idle.immediate_work); - } -} - /* Called from softirq, when recv is done */ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) { @@ -1607,7 +1555,7 @@ static struct smbd_connection *_smbd_get_connection( mod_delayed_work(sc->workqueue, &sc->idle.timer_work, msecs_to_jiffies(sp->negotiate_timeout_msec)); - INIT_WORK(&sc->recv_io.posted.refill_work, smbd_post_send_credits); + INIT_WORK(&sc->recv_io.posted.refill_work, smbdirect_connection_recv_io_refill_work); rc = smbd_negotiate(sc); if (rc) { From 2a49b625189ebf43329299f47dd513840acd89ae Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 19 Sep 2025 08:35:51 +0200 Subject: [PATCH 081/145] smb: client: make use of functions from smbdirect_mr.c The copied code only got new names, some indentation/formatting changes, some variable names are changed too. They also only use struct smbdirect_socket instead of struct smbd_connection. But the logic is still the same. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 495 +------------------------------------- 1 file changed, 4 insertions(+), 491 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index e66fd17ffc67..43e79166b27a 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -29,9 +29,6 @@ static int smbd_post_send(struct smbdirect_socket *sc, static int smbd_post_send_empty(struct smbdirect_socket *sc); -static void destroy_mr_list(struct smbdirect_socket *sc); -static int allocate_mr_list(struct smbdirect_socket *sc); - /* Port numbers for SMBD transport */ #define SMB_PORT 445 #define SMBD_PORT 5445 @@ -1370,7 +1367,7 @@ void smbd_destroy(struct TCP_Server_Info *server) sc->recv_io.reassembly.data_length = 0; log_rdma_event(INFO, "freeing mr list\n"); - destroy_mr_list(sc); + smbdirect_connection_destroy_mr_list(sc); log_rdma_event(INFO, "destroying qp\n"); smbdirect_connection_destroy_qp(sc); @@ -1563,7 +1560,7 @@ static struct smbd_connection *_smbd_get_connection( goto negotiation_failed; } - rc = allocate_mr_list(sc); + rc = smbdirect_connection_create_mr_list(sc); if (rc) { log_rdma_mr(ERR, "memory registration allocation failed\n"); goto allocate_mr_failed; @@ -1877,291 +1874,6 @@ int smbd_send(struct TCP_Server_Info *server, return rc; } -static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct smbdirect_mr_io *mr = - container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); - struct smbdirect_socket *sc = mr->socket; - - if (wc->status) { - log_rdma_mr(ERR, "status=%d\n", wc->status); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - } -} - -/* - * The work queue function that recovers MRs - * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used - * again. Both calls are slow, so finish them in a workqueue. This will not - * block I/O path. - * There is one workqueue that recovers MRs, there is no need to lock as the - * I/O requests calling smbd_register_mr will never update the links in the - * mr_list. - */ -static void smbd_mr_recovery_work(struct work_struct *work) -{ - struct smbdirect_socket *sc = - container_of(work, struct smbdirect_socket, mr_io.recovery_work); - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_mr_io *smbdirect_mr; - int rc; - - list_for_each_entry(smbdirect_mr, &sc->mr_io.all.list, list) { - if (smbdirect_mr->state == SMBDIRECT_MR_ERROR) { - - /* recover this MR entry */ - rc = ib_dereg_mr(smbdirect_mr->mr); - if (rc) { - log_rdma_mr(ERR, - "ib_dereg_mr failed rc=%x\n", - rc); - smbdirect_socket_schedule_cleanup(sc, rc); - continue; - } - - smbdirect_mr->mr = ib_alloc_mr( - sc->ib.pd, sc->mr_io.type, - sp->max_frmr_depth); - if (IS_ERR(smbdirect_mr->mr)) { - rc = PTR_ERR(smbdirect_mr->mr); - log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", - sc->mr_io.type, - sp->max_frmr_depth); - smbdirect_socket_schedule_cleanup(sc, rc); - continue; - } - } else - /* This MR is being used, don't recover it */ - continue; - - smbdirect_mr->state = SMBDIRECT_MR_READY; - - /* smbdirect_mr->state is updated by this function - * and is read and updated by I/O issuing CPUs trying - * to get a MR, the call to atomic_inc_return - * implicates a memory barrier and guarantees this - * value is updated before waking up any calls to - * get_mr() from the I/O issuing CPUs - */ - if (atomic_inc_return(&sc->mr_io.ready.count) == 1) - wake_up(&sc->mr_io.ready.wait_queue); - } -} - -static void smbd_mr_disable_locked(struct smbdirect_mr_io *mr) -{ - struct smbdirect_socket *sc = mr->socket; - - lockdep_assert_held(&mr->mutex); - - if (mr->state == SMBDIRECT_MR_DISABLED) - return; - - if (mr->mr) - ib_dereg_mr(mr->mr); - if (mr->sgt.nents) - ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); - kfree(mr->sgt.sgl); - - mr->mr = NULL; - mr->sgt.sgl = NULL; - mr->sgt.nents = 0; - - mr->state = SMBDIRECT_MR_DISABLED; -} - -static void smbd_mr_free_locked(struct kref *kref) -{ - struct smbdirect_mr_io *mr = - container_of(kref, struct smbdirect_mr_io, kref); - - lockdep_assert_held(&mr->mutex); - - /* - * smbd_mr_disable_locked() should already be called! - */ - if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED)) - smbd_mr_disable_locked(mr); - - mutex_unlock(&mr->mutex); - mutex_destroy(&mr->mutex); - kfree(mr); -} - -static void destroy_mr_list(struct smbdirect_socket *sc) -{ - struct smbdirect_mr_io *mr, *tmp; - LIST_HEAD(all_list); - unsigned long flags; - - disable_work_sync(&sc->mr_io.recovery_work); - - spin_lock_irqsave(&sc->mr_io.all.lock, flags); - list_splice_tail_init(&sc->mr_io.all.list, &all_list); - spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); - - list_for_each_entry_safe(mr, tmp, &all_list, list) { - mutex_lock(&mr->mutex); - - smbd_mr_disable_locked(mr); - list_del(&mr->list); - mr->socket = NULL; - - /* - * No kref_put_mutex() as it's already locked. - * - * If smbd_mr_free_locked() is called - * and the mutex is unlocked and mr is gone, - * in that case kref_put() returned 1. - * - * If kref_put() returned 0 we know that - * smbd_mr_free_locked() didn't - * run. Not by us nor by anyone else, as we - * still hold the mutex, so we need to unlock. - * - * If the mr is still registered it will - * be dangling (detached from the connection - * waiting for smbd_deregister_mr() to be - * called in order to free the memory. - */ - if (!kref_put(&mr->kref, smbd_mr_free_locked)) - mutex_unlock(&mr->mutex); - } -} - -/* - * Allocate MRs used for RDMA read/write - * The number of MRs will not exceed hardware capability in responder_resources - * All MRs are kept in mr_list. The MR can be recovered after it's used - * Recovery is done in smbd_mr_recovery_work. The content of list entry changes - * as MRs are used and recovered for I/O, but the list links will not change - */ -static int allocate_mr_list(struct smbdirect_socket *sc) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_mr_io *mr; - int ret; - u32 i; - - if (sp->responder_resources == 0) { - log_rdma_mr(ERR, "responder_resources negotiated as 0\n"); - return -EINVAL; - } - - /* Allocate more MRs (2x) than hardware responder_resources */ - for (i = 0; i < sp->responder_resources * 2; i++) { - mr = kzalloc_obj(*mr); - if (!mr) { - ret = -ENOMEM; - goto kzalloc_mr_failed; - } - - kref_init(&mr->kref); - mutex_init(&mr->mutex); - - mr->mr = ib_alloc_mr(sc->ib.pd, - sc->mr_io.type, - sp->max_frmr_depth); - if (IS_ERR(mr->mr)) { - ret = PTR_ERR(mr->mr); - log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", - sc->mr_io.type, sp->max_frmr_depth); - goto ib_alloc_mr_failed; - } - - mr->sgt.sgl = kzalloc_objs(struct scatterlist, - sp->max_frmr_depth); - if (!mr->sgt.sgl) { - ret = -ENOMEM; - log_rdma_mr(ERR, "failed to allocate sgl\n"); - goto kcalloc_sgl_failed; - } - mr->state = SMBDIRECT_MR_READY; - mr->socket = sc; - - list_add_tail(&mr->list, &sc->mr_io.all.list); - atomic_inc(&sc->mr_io.ready.count); - } - - INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work); - - return 0; - -kcalloc_sgl_failed: - ib_dereg_mr(mr->mr); -ib_alloc_mr_failed: - mutex_destroy(&mr->mutex); - kfree(mr); -kzalloc_mr_failed: - destroy_mr_list(sc); - return ret; -} - -/* - * Get a MR from mr_list. This function waits until there is at least one - * MR available in the list. It may access the list while the - * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock - * as they never modify the same places. However, there may be several CPUs - * issuing I/O trying to get MR at the same time, mr_list_lock is used to - * protect this situation. - */ -static struct smbdirect_mr_io *get_mr(struct smbdirect_socket *sc) -{ - struct smbdirect_mr_io *ret; - unsigned long flags; - int rc; -again: - rc = wait_event_interruptible(sc->mr_io.ready.wait_queue, - atomic_read(&sc->mr_io.ready.count) || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (rc) { - log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc); - return NULL; - } - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { - log_rdma_mr(ERR, "sc->status=%x\n", sc->status); - return NULL; - } - - spin_lock_irqsave(&sc->mr_io.all.lock, flags); - list_for_each_entry(ret, &sc->mr_io.all.list, list) { - if (ret->state == SMBDIRECT_MR_READY) { - ret->state = SMBDIRECT_MR_REGISTERED; - kref_get(&ret->kref); - spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); - atomic_dec(&sc->mr_io.ready.count); - atomic_inc(&sc->mr_io.used.count); - return ret; - } - } - - spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); - /* - * It is possible that we could fail to get MR because other processes may - * try to acquire a MR at the same time. If this is the case, retry it. - */ - goto again; -} - -/* - * Transcribe the pages from an iterator into an MR scatterlist. - */ -static int smbd_iter_to_mr(struct iov_iter *iter, - struct sg_table *sgt, - unsigned int max_sg) -{ - int ret; - - memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist)); - - ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); - WARN_ON(ret < 0); - if (sgt->nents > 0) - sg_mark_end(&sgt->sgl[sgt->nents - 1]); - return ret; -} - /* * Register memory for RDMA read/write * iter: the buffer to register memory with @@ -2174,131 +1886,8 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, bool writing, bool need_invalidate) { struct smbdirect_socket *sc = &info->socket; - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_mr_io *mr; - int rc, num_pages; - struct ib_reg_wr *reg_wr; - num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); - if (num_pages > sp->max_frmr_depth) { - log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n", - num_pages, sp->max_frmr_depth); - WARN_ON_ONCE(1); - return NULL; - } - - mr = get_mr(sc); - if (!mr) { - log_rdma_mr(ERR, "get_mr returning NULL\n"); - return NULL; - } - - mutex_lock(&mr->mutex); - - mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - mr->need_invalidate = need_invalidate; - mr->sgt.nents = 0; - mr->sgt.orig_nents = 0; - - log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n", - num_pages, iov_iter_count(iter), sp->max_frmr_depth); - smbd_iter_to_mr(iter, &mr->sgt, sp->max_frmr_depth); - - rc = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); - if (!rc) { - log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", - num_pages, mr->dir, rc); - goto dma_map_error; - } - - rc = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE); - if (rc != mr->sgt.nents) { - log_rdma_mr(ERR, - "ib_map_mr_sg failed rc = %d nents = %x\n", - rc, mr->sgt.nents); - goto map_mr_error; - } - - ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); - reg_wr = &mr->wr; - reg_wr->wr.opcode = IB_WR_REG_MR; - mr->cqe.done = register_mr_done; - reg_wr->wr.wr_cqe = &mr->cqe; - reg_wr->wr.num_sge = 0; - reg_wr->wr.send_flags = IB_SEND_SIGNALED; - reg_wr->mr = mr->mr; - reg_wr->key = mr->mr->rkey; - reg_wr->access = writing ? - IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : - IB_ACCESS_REMOTE_READ; - - /* - * There is no need for waiting for complemtion on ib_post_send - * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution - * on the next ib_post_send when we actually send I/O to remote peer - */ - rc = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); - if (!rc) { - /* - * get_mr() gave us a reference - * via kref_get(&mr->kref), we keep that and let - * the caller use smbd_deregister_mr() - * to remove it again. - */ - mutex_unlock(&mr->mutex); - return mr; - } - - log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", - rc, reg_wr->key); - - /* If all failed, attempt to recover this MR by setting it SMBDIRECT_MR_ERROR*/ -map_mr_error: - ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); - -dma_map_error: - mr->sgt.nents = 0; - mr->state = SMBDIRECT_MR_ERROR; - if (atomic_dec_and_test(&sc->mr_io.used.count)) - wake_up(&sc->mr_io.cleanup.wait_queue); - - smbdirect_socket_schedule_cleanup(sc, rc); - - /* - * get_mr() gave us a reference - * via kref_get(&mr->kref), we need to remove it again - * on error. - * - * No kref_put_mutex() as it's already locked. - * - * If smbd_mr_free_locked() is called - * and the mutex is unlocked and mr is gone, - * in that case kref_put() returned 1. - * - * If kref_put() returned 0 we know that - * smbd_mr_free_locked() didn't - * run. Not by us nor by anyone else, as we - * still hold the mutex, so we need to unlock. - */ - if (!kref_put(&mr->kref, smbd_mr_free_locked)) - mutex_unlock(&mr->mutex); - - return NULL; -} - -static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct smbdirect_mr_io *smbdirect_mr; - struct ib_cqe *cqe; - - cqe = wc->wr_cqe; - smbdirect_mr = container_of(cqe, struct smbdirect_mr_io, cqe); - smbdirect_mr->state = SMBDIRECT_MR_INVALIDATED; - if (wc->status != IB_WC_SUCCESS) { - log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status); - smbdirect_mr->state = SMBDIRECT_MR_ERROR; - } - complete(&smbdirect_mr->invalidate_done); + return smbdirect_connection_register_mr_io(sc, iter, writing, need_invalidate); } /* @@ -2309,81 +1898,5 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) */ void smbd_deregister_mr(struct smbdirect_mr_io *mr) { - struct smbdirect_socket *sc = mr->socket; - - mutex_lock(&mr->mutex); - if (mr->state == SMBDIRECT_MR_DISABLED) - goto put_kref; - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { - smbd_mr_disable_locked(mr); - goto put_kref; - } - - if (mr->need_invalidate) { - struct ib_send_wr *wr = &mr->inv_wr; - int rc; - - /* Need to finish local invalidation before returning */ - wr->opcode = IB_WR_LOCAL_INV; - mr->cqe.done = local_inv_done; - wr->wr_cqe = &mr->cqe; - wr->num_sge = 0; - wr->ex.invalidate_rkey = mr->mr->rkey; - wr->send_flags = IB_SEND_SIGNALED; - - init_completion(&mr->invalidate_done); - rc = ib_post_send(sc->ib.qp, wr, NULL); - if (rc) { - log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); - smbd_mr_disable_locked(mr); - smbdirect_socket_schedule_cleanup(sc, rc); - goto done; - } - wait_for_completion(&mr->invalidate_done); - mr->need_invalidate = false; - } else - /* - * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED - * and defer to mr_recovery_work to recover the MR for next use - */ - mr->state = SMBDIRECT_MR_INVALIDATED; - - if (mr->sgt.nents) { - ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); - mr->sgt.nents = 0; - } - - if (mr->state == SMBDIRECT_MR_INVALIDATED) { - mr->state = SMBDIRECT_MR_READY; - if (atomic_inc_return(&sc->mr_io.ready.count) == 1) - wake_up(&sc->mr_io.ready.wait_queue); - } else - /* - * Schedule the work to do MR recovery for future I/Os MR - * recovery is slow and don't want it to block current I/O - */ - queue_work(sc->workqueue, &sc->mr_io.recovery_work); - -done: - if (atomic_dec_and_test(&sc->mr_io.used.count)) - wake_up(&sc->mr_io.cleanup.wait_queue); - -put_kref: - /* - * No kref_put_mutex() as it's already locked. - * - * If smbd_mr_free_locked() is called - * and the mutex is unlocked and mr is gone, - * in that case kref_put() returned 1. - * - * If kref_put() returned 0 we know that - * smbd_mr_free_locked() didn't - * run. Not by us nor by anyone else, as we - * still hold the mutex, so we need to unlock - * and keep the mr in SMBDIRECT_MR_READY or - * SMBDIRECT_MR_ERROR state. - */ - if (!kref_put(&mr->kref, smbd_mr_free_locked)) - mutex_unlock(&mr->mutex); + smbdirect_connection_deregister_mr_io(mr); } From 2cafcddbdada359f36a93bd014eef7ea2186435d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 13 Oct 2025 16:33:20 +0200 Subject: [PATCH 082/145] smb: client: make use of smbdirect_socket_destroy_sync() This is basically the same logic as before, but we now use common code, which will also be used by the server soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 63 +-------------------------------------- 1 file changed, 1 insertion(+), 62 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 43e79166b27a..bd93aee51557 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1309,8 +1309,6 @@ void smbd_destroy(struct TCP_Server_Info *server) { struct smbd_connection *info = server->smbd_conn; struct smbdirect_socket *sc; - struct smbdirect_recv_io *response; - unsigned long flags; if (!info) { log_rdma_event(INFO, "rdma session already destroyed\n"); @@ -1318,68 +1316,9 @@ void smbd_destroy(struct TCP_Server_Info *server) } sc = &info->socket; - log_rdma_event(INFO, "cancelling and disable disconnect_work\n"); - disable_work_sync(&sc->disconnect_work); - - log_rdma_event(INFO, "destroying rdma session\n"); - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) - smbdirect_socket_cleanup_work(&sc->disconnect_work); - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) { - log_rdma_event(INFO, "wait for transport being disconnected\n"); - wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); - log_rdma_event(INFO, "waited for transport being disconnected\n"); - } - - /* - * Wake up all waiters in all wait queues - * in order to notice the broken connection. - * - * Most likely this was already called via - * smbdirect_socket_cleanup_work(), but call it again... - */ - smbdirect_socket_wake_up_all(sc); - - log_rdma_event(INFO, "cancelling recv_io.posted.refill_work\n"); - disable_work_sync(&sc->recv_io.posted.refill_work); - - log_rdma_event(INFO, "drain qp\n"); - ib_drain_qp(sc->ib.qp); - - log_rdma_event(INFO, "cancelling idle timer\n"); - disable_delayed_work_sync(&sc->idle.timer_work); - log_rdma_event(INFO, "cancelling send immediate work\n"); - disable_work_sync(&sc->idle.immediate_work); - - /* It's not possible for upper layer to get to reassembly */ - log_rdma_event(INFO, "drain the reassembly queue\n"); - do { - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); - response = smbdirect_connection_reassembly_first_recv_io(sc); - if (response) { - list_del(&response->list); - spin_unlock_irqrestore( - &sc->recv_io.reassembly.lock, flags); - smbdirect_connection_put_recv_io(response); - } else - spin_unlock_irqrestore( - &sc->recv_io.reassembly.lock, flags); - } while (response); - sc->recv_io.reassembly.data_length = 0; - - log_rdma_event(INFO, "freeing mr list\n"); - smbdirect_connection_destroy_mr_list(sc); - - log_rdma_event(INFO, "destroying qp\n"); - smbdirect_connection_destroy_qp(sc); - rdma_destroy_id(sc->rdma.cm_id); - - /* free mempools */ - smbdirect_connection_destroy_mem_pools(sc); - - sc->status = SMBDIRECT_SOCKET_DESTROYED; + smbdirect_socket_destroy_sync(sc); destroy_workqueue(sc->workqueue); - log_rdma_event(INFO, "rdma session destroyed\n"); kfree(info); server->smbd_conn = NULL; } From edb9e514f0e058a924a169795fb0e34286da9572 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 13 Oct 2025 18:42:08 +0200 Subject: [PATCH 083/145] smb: client: make use of smbdirect_connection_recvmsg() This is basically the same as it was copied before... Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 131 +------------------------------------- 1 file changed, 1 insertion(+), 130 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index bd93aee51557..ffdb87d24b47 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1580,137 +1580,8 @@ struct smbd_connection *smbd_get_connection( int smbd_recv(struct smbd_connection *info, struct msghdr *msg) { struct smbdirect_socket *sc = &info->socket; - struct smbdirect_recv_io *response; - struct smbdirect_data_transfer *data_transfer; - size_t size = iov_iter_count(&msg->msg_iter); - int to_copy, to_read, data_read, offset; - u32 data_length, remaining_data_length, data_offset; - int rc; - if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE)) - return -EINVAL; /* It's a bug in upper layer to get there */ - -again: - /* - * No need to hold the reassembly queue lock all the time as we are - * the only one reading from the front of the queue. The transport - * may add more entries to the back of the queue at the same time - */ - log_read(INFO, "size=%zd sc->recv_io.reassembly.data_length=%d\n", size, - sc->recv_io.reassembly.data_length); - if (sc->recv_io.reassembly.data_length >= size) { - int queue_length; - int queue_removed = 0; - unsigned long flags; - - /* - * Need to make sure reassembly_data_length is read before - * reading reassembly_queue_length and calling - * smbdirect_connection_reassembly_first_recv_io. This call is lock free - * as we never read at the end of the queue which are being - * updated in SOFTIRQ as more data is received - */ - virt_rmb(); - queue_length = sc->recv_io.reassembly.queue_length; - data_read = 0; - to_read = size; - offset = sc->recv_io.reassembly.first_entry_offset; - while (data_read < size) { - response = smbdirect_connection_reassembly_first_recv_io(sc); - data_transfer = smbdirect_recv_io_payload(response); - data_length = le32_to_cpu(data_transfer->data_length); - remaining_data_length = - le32_to_cpu( - data_transfer->remaining_data_length); - data_offset = le32_to_cpu(data_transfer->data_offset); - - /* - * The upper layer expects RFC1002 length at the - * beginning of the payload. Return it to indicate - * the total length of the packet. This minimize the - * change to upper layer packet processing logic. This - * will be eventually remove when an intermediate - * transport layer is added - */ - if (response->first_segment && size == 4) { - unsigned int rfc1002_len = - data_length + remaining_data_length; - __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); - if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr), - &msg->msg_iter) != sizeof(rfc1002_hdr)) - return -EFAULT; - data_read = 4; - response->first_segment = false; - log_read(INFO, "returning rfc1002 length %d\n", - rfc1002_len); - goto read_rfc1002_done; - } - - to_copy = min_t(int, data_length - offset, to_read); - if (copy_to_iter((char *)data_transfer + data_offset + offset, - to_copy, &msg->msg_iter) != to_copy) - return -EFAULT; - - /* move on to the next buffer? */ - if (to_copy == data_length - offset) { - queue_length--; - /* - * No need to lock if we are not at the - * end of the queue - */ - if (queue_length) - list_del(&response->list); - else { - spin_lock_irqsave( - &sc->recv_io.reassembly.lock, flags); - list_del(&response->list); - spin_unlock_irqrestore( - &sc->recv_io.reassembly.lock, flags); - } - queue_removed++; - sc->statistics.dequeue_reassembly_queue++; - smbdirect_connection_put_recv_io(response); - offset = 0; - log_read(INFO, "smbdirect_connection_put_recv_io offset=0\n"); - } else - offset += to_copy; - - to_read -= to_copy; - data_read += to_copy; - - log_read(INFO, "_get_first_reassembly memcpy %d bytes data_transfer_length-offset=%d after that to_read=%d data_read=%d offset=%d\n", - to_copy, data_length - offset, - to_read, data_read, offset); - } - - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); - sc->recv_io.reassembly.data_length -= data_read; - sc->recv_io.reassembly.queue_length -= queue_removed; - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); - - sc->recv_io.reassembly.first_entry_offset = offset; - log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", - data_read, sc->recv_io.reassembly.data_length, - sc->recv_io.reassembly.first_entry_offset); -read_rfc1002_done: - return data_read; - } - - log_read(INFO, "wait_event on more data\n"); - rc = wait_event_interruptible( - sc->recv_io.reassembly.wait_queue, - sc->recv_io.reassembly.data_length >= size || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - /* Don't return any data if interrupted */ - if (rc) - return rc; - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { - log_read(ERR, "disconnected\n"); - return -ECONNABORTED; - } - - goto again; + return smbdirect_connection_recvmsg(sc, msg, 0); } /* From 8b72c199a9626cc1b53c8d579e9e4c6f23af8908 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 15:37:32 +0200 Subject: [PATCH 084/145] smb: client: make use of smbdirect_connection_grant_recv_credits() This already calls atomic_add(new_credits, &sc->recv_io.credits.count), so there's no need to do it in the caller anymore. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 51 +++------------------------------------ 1 file changed, 3 insertions(+), 48 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index ffdb87d24b47..4cc5dc825ee4 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -759,51 +759,6 @@ static int smbd_post_send_negotiate_req(struct smbdirect_socket *sc) return rc; } -/* - * Extend the credits to remote peer - * This implements [MS-SMBD] 3.1.5.9 - * The idea is that we should extend credits to remote peer as quickly as - * it's allowed, to maintain data flow. We allocate as much receive - * buffer as possible, and extend the receive credits to remote peer - * return value: the new credtis being granted. - */ -static int manage_credits_prior_sending(struct smbdirect_socket *sc) -{ - int missing; - int available; - int new_credits; - - if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) - return 0; - - missing = (int)sc->recv_io.credits.target - atomic_read(&sc->recv_io.credits.count); - available = atomic_xchg(&sc->recv_io.credits.available, 0); - new_credits = (u16)min3(U16_MAX, missing, available); - if (new_credits <= 0) { - /* - * If credits are available, but not granted - * we need to re-add them again. - */ - if (available) - atomic_add(available, &sc->recv_io.credits.available); - return 0; - } - - if (new_credits < available) { - /* - * Readd the remaining available again. - */ - available -= new_credits; - atomic_add(available, &sc->recv_io.credits.available); - } - - /* - * Remember we granted the credits - */ - atomic_add(new_credits, &sc->recv_io.credits.count); - return new_credits; -} - /* * Check if we need to send a KEEP_ALIVE message * The idle connection timer triggers a KEEP_ALIVE message when expires @@ -1048,7 +1003,7 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, int data_length; struct smbdirect_send_io *request; struct smbdirect_data_transfer *packet; - int new_credits = 0; + u16 new_credits = 0; struct smbdirect_send_batch _batch; if (!batch) { @@ -1077,7 +1032,7 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, goto err_wait_credit; } - new_credits = manage_credits_prior_sending(sc); + new_credits = smbdirect_connection_grant_recv_credits(sc); if (new_credits == 0 && atomic_read(&sc->send_io.credits.count) == 0 && atomic_read(&sc->recv_io.credits.count) == 0) { @@ -1094,7 +1049,7 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, goto err_wait_credit; } - new_credits = manage_credits_prior_sending(sc); + new_credits = smbdirect_connection_grant_recv_credits(sc); } request = smbdirect_connection_alloc_send_io(sc); From b942f351c25051f971a39fac06ebed02da9a648e Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 15:51:26 +0200 Subject: [PATCH 085/145] smb: client: make use of smbdirect_connection_request_keep_alive() This will help to share more common code soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 4cc5dc825ee4..2464e0617eec 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -759,32 +759,6 @@ static int smbd_post_send_negotiate_req(struct smbdirect_socket *sc) return rc; } -/* - * Check if we need to send a KEEP_ALIVE message - * The idle connection timer triggers a KEEP_ALIVE message when expires - * SMBDIRECT_FLAG_RESPONSE_REQUESTED is set in the message flag to have peer send - * back a response. - * return value: - * 1 if SMBDIRECT_FLAG_RESPONSE_REQUESTED needs to be set - * 0: otherwise - */ -static int manage_keep_alive_before_sending(struct smbdirect_socket *sc) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - - if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; - /* - * Now use the keepalive timeout (instead of keepalive interval) - * in order to wait for a response - */ - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, - msecs_to_jiffies(sp->keepalive_timeout_msec)); - return 1; - } - return 0; -} - static int smbd_ib_post_send(struct smbdirect_socket *sc, struct ib_send_wr *wr) { @@ -1108,7 +1082,7 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, packet->credits_granted = cpu_to_le16(new_credits); packet->flags = 0; - if (manage_keep_alive_before_sending(sc)) + if (smbdirect_connection_request_keep_alive(sc)) packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); packet->reserved = 0; From 15c7e492610f001e1ff6480c6b5d9d1653afaa3c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 20:33:10 +0200 Subject: [PATCH 086/145] smb: client: change smbd_post_send_empty() to void return The caller doesn't check, so we better call smbdirect_socket_schedule_cleanup() to handle the error. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 2464e0617eec..4ff593ccb371 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -27,8 +27,6 @@ static int smbd_post_send(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, struct smbdirect_send_io *request); -static int smbd_post_send_empty(struct smbdirect_socket *sc); - /* Port numbers for SMBD transport */ #define SMB_PORT 445 #define SMBD_PORT 5445 @@ -1144,12 +1142,17 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, * Empty message is used to extend credits to peer to for keep live * while there is no upper layer payload to send at the time */ -static int smbd_post_send_empty(struct smbdirect_socket *sc) +static void smbd_post_send_empty(struct smbdirect_socket *sc) { int remaining_data_length = 0; + int ret; sc->statistics.send_empty++; - return smbd_post_send_iter(sc, NULL, NULL, &remaining_data_length); + ret = smbd_post_send_iter(sc, NULL, NULL, &remaining_data_length); + if (ret < 0) { + log_rdma_send(ERR, "smbd_post_send_iter failed ret=%d\n", ret); + smbdirect_socket_schedule_cleanup(sc, ret); + } } static int smbd_post_send_full_iter(struct smbdirect_socket *sc, From b626ccd251ae9181dd716036718da7b7da042726 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 20:46:29 +0200 Subject: [PATCH 087/145] smb: client: let smbd_post_send_iter() get remaining_length and return data_length This lets the logic be like smb_direct_post_send_data(), so we can share common code in the next steps. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 4ff593ccb371..4c19ad453f93 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -967,7 +967,7 @@ static int wait_for_send_credits(struct smbdirect_socket *sc, static int smbd_post_send_iter(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, struct iov_iter *iter, - int *_remaining_data_length) + u32 remaining_data_length) { struct smbdirect_socket_parameters *sp = &sc->parameters; int rc; @@ -978,6 +978,18 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, u16 new_credits = 0; struct smbdirect_send_batch _batch; + if (iter) { + header_length = sizeof(struct smbdirect_data_transfer); + if (WARN_ON_ONCE(remaining_data_length == 0 || + iov_iter_count(iter) > remaining_data_length)) + return -EINVAL; + } else { + /* If this is a packet without payload, don't send padding */ + header_length = offsetof(struct smbdirect_data_transfer, padding); + if (WARN_ON_ONCE(remaining_data_length)) + return -EINVAL; + } + if (!batch) { smbd_send_batch_init(&_batch, false, 0); batch = &_batch; @@ -1032,12 +1044,6 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, memset(request->sge, 0, sizeof(request->sge)); - /* Map the packet to DMA */ - header_length = sizeof(struct smbdirect_data_transfer); - /* If this is a packet without payload, don't send padding */ - if (!iter) - header_length = offsetof(struct smbdirect_data_transfer, padding); - packet = smbdirect_send_io_payload(request); request->sge[0].addr = ib_dma_map_single(sc->ib.dev, (void *)packet, @@ -1062,7 +1068,7 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, .local_dma_lkey = sc->ib.pd->local_dma_lkey, .direction = DMA_TO_DEVICE, }; - size_t payload_len = umin(*_remaining_data_length, + size_t payload_len = umin(iov_iter_count(iter), sp->max_send_size - sizeof(*packet)); rc = smbdirect_map_sges_from_iter(iter, payload_len, &extract); @@ -1070,7 +1076,7 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, goto err_dma; data_length = rc; request->num_sge = extract.num_sge; - *_remaining_data_length -= data_length; + remaining_data_length -= data_length; } else { data_length = 0; } @@ -1089,7 +1095,7 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, else packet->data_offset = cpu_to_le32(24); packet->data_length = cpu_to_le32(data_length); - packet->remaining_data_length = cpu_to_le32(*_remaining_data_length); + packet->remaining_data_length = cpu_to_le32(remaining_data_length); packet->padding = 0; log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", @@ -1107,11 +1113,11 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, */ if (batch != &_batch) - return 0; + return data_length; rc = smbd_send_batch_flush(sc, batch, true); if (!rc) - return 0; + return data_length; goto err_flush; } @@ -1144,11 +1150,10 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, */ static void smbd_post_send_empty(struct smbdirect_socket *sc) { - int remaining_data_length = 0; int ret; sc->statistics.send_empty++; - ret = smbd_post_send_iter(sc, NULL, NULL, &remaining_data_length); + ret = smbd_post_send_iter(sc, NULL, NULL, 0); if (ret < 0) { log_rdma_send(ERR, "smbd_post_send_iter failed ret=%d\n", ret); smbdirect_socket_schedule_cleanup(sc, ret); @@ -1169,9 +1174,11 @@ static int smbd_post_send_full_iter(struct smbdirect_socket *sc, */ while (iov_iter_count(iter) > 0) { - rc = smbd_post_send_iter(sc, batch, iter, _remaining_data_length); + rc = smbd_post_send_iter(sc, batch, iter, *_remaining_data_length); if (rc < 0) break; + *_remaining_data_length -= rc; + rc = 0; } return rc; From 7c81e7bb1338b7c9a45f6f240aec3bc243abf0b6 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 21:03:45 +0200 Subject: [PATCH 088/145] smb: client: let smbd_post_send_full_iter() get remaining_length and return data_length This will simplify further changes in order to share more common code in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 4c19ad453f93..ffc6a4b6de39 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1163,9 +1163,9 @@ static void smbd_post_send_empty(struct smbdirect_socket *sc) static int smbd_post_send_full_iter(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, struct iov_iter *iter, - int *_remaining_data_length) + u32 remaining_data_length) { - int rc = 0; + int bytes = 0; /* * smbd_post_send_iter() respects the @@ -1174,14 +1174,16 @@ static int smbd_post_send_full_iter(struct smbdirect_socket *sc, */ while (iov_iter_count(iter) > 0) { - rc = smbd_post_send_iter(sc, batch, iter, *_remaining_data_length); + int rc; + + rc = smbd_post_send_iter(sc, batch, iter, remaining_data_length); if (rc < 0) - break; - *_remaining_data_length -= rc; - rc = 0; + return rc; + remaining_data_length -= rc; + bytes += rc; } - return rc; + return bytes; } /* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ @@ -1584,20 +1586,22 @@ int smbd_send(struct TCP_Server_Info *server, klen += rqst->rq_iov[i].iov_len; iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); - rc = smbd_post_send_full_iter(sc, &batch, &iter, &remaining_data_length); + rc = smbd_post_send_full_iter(sc, &batch, &iter, remaining_data_length); if (rc < 0) { error = rc; break; } + remaining_data_length -= rc; if (iov_iter_count(&rqst->rq_iter) > 0) { /* And then the data pages if there are any */ rc = smbd_post_send_full_iter(sc, &batch, &rqst->rq_iter, - &remaining_data_length); + remaining_data_length); if (rc < 0) { error = rc; break; } + remaining_data_length -= rc; } } while (++rqst_idx < num_rqst); From 63972da39f900b98c18b5283dcde74e3ce0909fb Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 22:23:10 +0200 Subject: [PATCH 089/145] smb: client: make use of smbdirect_connection_send_{single_iter,immediate_work}() With this the low level send functions are in common, we'll have to do some more changes in generic smb code in order to use smbdirect_connection_send_iter() instead of looping around smbdirect_connection_send_single_iter(). David's cleanups will allow us to use smbdirect_connection_send_iter(). Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: David Howells Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 389 ++------------------------------------ 1 file changed, 20 insertions(+), 369 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index ffc6a4b6de39..ee3347289c87 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -817,349 +817,6 @@ static int smbd_post_send(struct smbdirect_socket *sc, return smbd_ib_post_send(sc, &request->wr); } -static void smbd_send_batch_init(struct smbdirect_send_batch *batch, - bool need_invalidate_rkey, - unsigned int remote_key) -{ - INIT_LIST_HEAD(&batch->msg_list); - batch->wr_cnt = 0; - batch->need_invalidate_rkey = need_invalidate_rkey; - batch->remote_key = remote_key; - batch->credit = 0; -} - -static int smbd_send_batch_flush(struct smbdirect_socket *sc, - struct smbdirect_send_batch *batch, - bool is_last) -{ - struct smbdirect_send_io *first, *last; - int ret = 0; - - if (list_empty(&batch->msg_list)) - goto release_credit; - - first = list_first_entry(&batch->msg_list, - struct smbdirect_send_io, - sibling_list); - last = list_last_entry(&batch->msg_list, - struct smbdirect_send_io, - sibling_list); - - if (batch->need_invalidate_rkey) { - first->wr.opcode = IB_WR_SEND_WITH_INV; - first->wr.ex.invalidate_rkey = batch->remote_key; - batch->need_invalidate_rkey = false; - batch->remote_key = 0; - } - - last->wr.send_flags = IB_SEND_SIGNALED; - last->wr.wr_cqe = &last->cqe; - - /* - * Remove last from batch->msg_list - * and splice the rest of batch->msg_list - * to last->sibling_list. - * - * batch->msg_list is a valid empty list - * at the end. - */ - list_del_init(&last->sibling_list); - list_splice_tail_init(&batch->msg_list, &last->sibling_list); - batch->wr_cnt = 0; - - ret = smbd_ib_post_send(sc, &first->wr); - if (ret) { - struct smbdirect_send_io *sibling, *next; - - list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { - list_del_init(&sibling->sibling_list); - smbdirect_connection_free_send_io(sibling); - } - smbdirect_connection_free_send_io(last); - } - -release_credit: - if (is_last && !ret && batch->credit) { - atomic_add(batch->credit, &sc->send_io.bcredits.count); - batch->credit = 0; - wake_up(&sc->send_io.bcredits.wait_queue); - } - - return ret; -} - -static int wait_for_credits(struct smbdirect_socket *sc, - wait_queue_head_t *waitq, atomic_t *total_credits, - int needed) -{ - int ret; - - do { - if (atomic_sub_return(needed, total_credits) >= 0) - return 0; - - atomic_add(needed, total_credits); - ret = wait_event_interruptible(*waitq, - atomic_read(total_credits) >= needed || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) - return -ENOTCONN; - else if (ret < 0) - return ret; - } while (true); -} - -static int wait_for_send_bcredit(struct smbdirect_socket *sc, - struct smbdirect_send_batch *batch) -{ - int ret; - - if (batch->credit) - return 0; - - ret = wait_for_credits(sc, - &sc->send_io.bcredits.wait_queue, - &sc->send_io.bcredits.count, - 1); - if (ret) - return ret; - - batch->credit = 1; - return 0; -} - -static int wait_for_send_lcredit(struct smbdirect_socket *sc, - struct smbdirect_send_batch *batch) -{ - if (batch && (atomic_read(&sc->send_io.lcredits.count) <= 1)) { - int ret; - - ret = smbd_send_batch_flush(sc, batch, false); - if (ret) - return ret; - } - - return wait_for_credits(sc, - &sc->send_io.lcredits.wait_queue, - &sc->send_io.lcredits.count, - 1); -} - -static int wait_for_send_credits(struct smbdirect_socket *sc, - struct smbdirect_send_batch *batch) -{ - if (batch && - (batch->wr_cnt >= 16 || atomic_read(&sc->send_io.credits.count) <= 1)) { - int ret; - - ret = smbd_send_batch_flush(sc, batch, false); - if (ret) - return ret; - } - - return wait_for_credits(sc, - &sc->send_io.credits.wait_queue, - &sc->send_io.credits.count, - 1); -} - -static int smbd_post_send_iter(struct smbdirect_socket *sc, - struct smbdirect_send_batch *batch, - struct iov_iter *iter, - u32 remaining_data_length) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - int rc; - int header_length; - int data_length; - struct smbdirect_send_io *request; - struct smbdirect_data_transfer *packet; - u16 new_credits = 0; - struct smbdirect_send_batch _batch; - - if (iter) { - header_length = sizeof(struct smbdirect_data_transfer); - if (WARN_ON_ONCE(remaining_data_length == 0 || - iov_iter_count(iter) > remaining_data_length)) - return -EINVAL; - } else { - /* If this is a packet without payload, don't send padding */ - header_length = offsetof(struct smbdirect_data_transfer, padding); - if (WARN_ON_ONCE(remaining_data_length)) - return -EINVAL; - } - - if (!batch) { - smbd_send_batch_init(&_batch, false, 0); - batch = &_batch; - } - - rc = wait_for_send_bcredit(sc, batch); - if (rc) { - log_outgoing(ERR, "disconnected not sending on wait_bcredit\n"); - rc = -EAGAIN; - goto err_wait_bcredit; - } - - rc = wait_for_send_lcredit(sc, batch); - if (rc) { - log_outgoing(ERR, "disconnected not sending on wait_lcredit\n"); - rc = -EAGAIN; - goto err_wait_lcredit; - } - - rc = wait_for_send_credits(sc, batch); - if (rc) { - log_outgoing(ERR, "disconnected not sending on wait_credit\n"); - rc = -EAGAIN; - goto err_wait_credit; - } - - new_credits = smbdirect_connection_grant_recv_credits(sc); - if (new_credits == 0 && - atomic_read(&sc->send_io.credits.count) == 0 && - atomic_read(&sc->recv_io.credits.count) == 0) { - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); - rc = wait_event_interruptible(sc->send_io.credits.wait_queue, - atomic_read(&sc->send_io.credits.count) >= 1 || - atomic_read(&sc->recv_io.credits.available) >= 1 || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) - rc = -ENOTCONN; - if (rc < 0) { - log_outgoing(ERR, "disconnected not sending on last credit\n"); - rc = -EAGAIN; - goto err_wait_credit; - } - - new_credits = smbdirect_connection_grant_recv_credits(sc); - } - - request = smbdirect_connection_alloc_send_io(sc); - if (IS_ERR(request)) { - rc = PTR_ERR(request); - goto err_alloc; - } - - memset(request->sge, 0, sizeof(request->sge)); - - packet = smbdirect_send_io_payload(request); - request->sge[0].addr = ib_dma_map_single(sc->ib.dev, - (void *)packet, - header_length, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { - rc = -EIO; - goto err_dma; - } - - request->sge[0].length = header_length; - request->sge[0].lkey = sc->ib.pd->local_dma_lkey; - request->num_sge = 1; - - /* Fill in the data payload to find out how much data we can add */ - if (iter) { - struct smbdirect_map_sges extract = { - .num_sge = request->num_sge, - .max_sge = ARRAY_SIZE(request->sge), - .sge = request->sge, - .device = sc->ib.dev, - .local_dma_lkey = sc->ib.pd->local_dma_lkey, - .direction = DMA_TO_DEVICE, - }; - size_t payload_len = umin(iov_iter_count(iter), - sp->max_send_size - sizeof(*packet)); - - rc = smbdirect_map_sges_from_iter(iter, payload_len, &extract); - if (rc < 0) - goto err_dma; - data_length = rc; - request->num_sge = extract.num_sge; - remaining_data_length -= data_length; - } else { - data_length = 0; - } - - /* Fill in the packet header */ - packet->credits_requested = cpu_to_le16(sp->send_credit_target); - packet->credits_granted = cpu_to_le16(new_credits); - - packet->flags = 0; - if (smbdirect_connection_request_keep_alive(sc)) - packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); - - packet->reserved = 0; - if (!data_length) - packet->data_offset = 0; - else - packet->data_offset = cpu_to_le32(24); - packet->data_length = cpu_to_le32(data_length); - packet->remaining_data_length = cpu_to_le32(remaining_data_length); - packet->padding = 0; - - log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", - le16_to_cpu(packet->credits_requested), - le16_to_cpu(packet->credits_granted), - le32_to_cpu(packet->data_offset), - le32_to_cpu(packet->data_length), - le32_to_cpu(packet->remaining_data_length)); - - rc = smbd_post_send(sc, batch, request); - if (!rc) { - /* - * From here request is moved to batch - * and we should not free it explicitly. - */ - - if (batch != &_batch) - return data_length; - - rc = smbd_send_batch_flush(sc, batch, true); - if (!rc) - return data_length; - - goto err_flush; - } - -err_dma: - smbdirect_connection_free_send_io(request); - -err_flush: -err_alloc: - atomic_inc(&sc->send_io.credits.count); - wake_up(&sc->send_io.credits.wait_queue); - -err_wait_credit: - atomic_inc(&sc->send_io.lcredits.count); - wake_up(&sc->send_io.lcredits.wait_queue); - -err_wait_lcredit: - atomic_add(batch->credit, &sc->send_io.bcredits.count); - batch->credit = 0; - wake_up(&sc->send_io.bcredits.wait_queue); - -err_wait_bcredit: - return rc; -} - -/* - * Send an empty message - * Empty message is used to extend credits to peer to for keep live - * while there is no upper layer payload to send at the time - */ -static void smbd_post_send_empty(struct smbdirect_socket *sc) -{ - int ret; - - sc->statistics.send_empty++; - ret = smbd_post_send_iter(sc, NULL, NULL, 0); - if (ret < 0) { - log_rdma_send(ERR, "smbd_post_send_iter failed ret=%d\n", ret); - smbdirect_socket_schedule_cleanup(sc, ret); - } -} - static int smbd_post_send_full_iter(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, struct iov_iter *iter, @@ -1168,7 +825,7 @@ static int smbd_post_send_full_iter(struct smbdirect_socket *sc, int bytes = 0; /* - * smbd_post_send_iter() respects the + * smbdirect_connection_send_single_iter() respects the * negotiated max_send_size, so we need to * loop until the full iter is posted */ @@ -1176,7 +833,11 @@ static int smbd_post_send_full_iter(struct smbdirect_socket *sc, while (iov_iter_count(iter) > 0) { int rc; - rc = smbd_post_send_iter(sc, batch, iter, remaining_data_length); + rc = smbdirect_connection_send_single_iter(sc, + batch, + iter, + 0, /* flags */ + remaining_data_length); if (rc < 0) return rc; remaining_data_length -= rc; @@ -1229,18 +890,6 @@ static int smbd_negotiate(struct smbdirect_socket *sc) return rc; } -static void send_immediate_empty_message(struct work_struct *work) -{ - struct smbdirect_socket *sc = - container_of(work, struct smbdirect_socket, idle.immediate_work); - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) - return; - - log_keep_alive(INFO, "send an empty message\n"); - smbd_post_send_empty(sc); -} - /* * Destroy the transport and related RDMA and memory resources * Need to go through all the pending counters and make sure on one is using @@ -1423,7 +1072,7 @@ static struct smbd_connection *_smbd_get_connection( list_for_each_entry(recv_io, &sc->recv_io.free.list, list) recv_io->cqe.done = recv_done; - INIT_WORK(&sc->idle.immediate_work, send_immediate_empty_message); + INIT_WORK(&sc->idle.immediate_work, smbdirect_connection_send_immediate_work); /* * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING * so that the timer will cause a disconnect. @@ -1539,7 +1188,8 @@ int smbd_send(struct TCP_Server_Info *server, struct smbdirect_socket_parameters *sp = &sc->parameters; struct smb_rqst *rqst; struct iov_iter iter; - struct smbdirect_send_batch batch; + struct smbdirect_send_batch_storage bstorage; + struct smbdirect_send_batch *batch; unsigned int remaining_data_length, klen; int rc, i, rqst_idx; int error = 0; @@ -1567,7 +1217,7 @@ int smbd_send(struct TCP_Server_Info *server, num_rqst, remaining_data_length); rqst_idx = 0; - smbd_send_batch_init(&batch, false, 0); + batch = smbdirect_init_send_batch_storage(&bstorage, false, 0); do { rqst = &rqst_array[rqst_idx]; @@ -1586,7 +1236,7 @@ int smbd_send(struct TCP_Server_Info *server, klen += rqst->rq_iov[i].iov_len; iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); - rc = smbd_post_send_full_iter(sc, &batch, &iter, remaining_data_length); + rc = smbd_post_send_full_iter(sc, batch, &iter, remaining_data_length); if (rc < 0) { error = rc; break; @@ -1595,7 +1245,7 @@ int smbd_send(struct TCP_Server_Info *server, if (iov_iter_count(&rqst->rq_iter) > 0) { /* And then the data pages if there are any */ - rc = smbd_post_send_full_iter(sc, &batch, &rqst->rq_iter, + rc = smbd_post_send_full_iter(sc, batch, &rqst->rq_iter, remaining_data_length); if (rc < 0) { error = rc; @@ -1606,7 +1256,7 @@ int smbd_send(struct TCP_Server_Info *server, } while (++rqst_idx < num_rqst); - rc = smbd_send_batch_flush(sc, &batch, true); + rc = smbdirect_connection_send_batch_flush(sc, batch, true); if (unlikely(!rc && error)) rc = error; @@ -1617,14 +1267,15 @@ int smbd_send(struct TCP_Server_Info *server, * that means all the I/Os have been out and we are good to return */ - wait_event(sc->send_io.pending.zero_wait_queue, - atomic_read(&sc->send_io.pending.count) == 0 || - sc->status != SMBDIRECT_SOCKET_CONNECTED); + error = rc; + rc = smbdirect_connection_send_wait_zero_pending(sc); + if (unlikely(rc && !error)) + error = -EAGAIN; - if (sc->status != SMBDIRECT_SOCKET_CONNECTED && rc == 0) - rc = -EAGAIN; + if (unlikely(error)) + return error; - return rc; + return 0; } /* From 5bd752e7749e4abcae71d95e22b75272ac767b06 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 21 Oct 2025 20:43:44 +0200 Subject: [PATCH 090/145] smb: client: introduce and use smbd_mr_fill_buffer_descriptor() This will allow us to make struct smbdirect_mr_io private in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 8 ++------ fs/smb/client/smbdirect.c | 6 ++++++ fs/smb/client/smbdirect.h | 2 ++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 59d7418cc480..0aeb23aed8eb 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4554,9 +4554,7 @@ smb2_new_read_req(void **buf, unsigned int *total_len, req->ReadChannelInfoLength = cpu_to_le16(sizeof(struct smbdirect_buffer_descriptor_v1)); v1 = (struct smbdirect_buffer_descriptor_v1 *) &req->Buffer[0]; - v1->offset = cpu_to_le64(rdata->mr->mr->iova); - v1->token = cpu_to_le32(rdata->mr->mr->rkey); - v1->length = cpu_to_le32(rdata->mr->mr->length); + smbd_mr_fill_buffer_descriptor(rdata->mr, v1); *total_len += sizeof(*v1) - 1; } @@ -5155,9 +5153,7 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) req->WriteChannelInfoLength = cpu_to_le16(sizeof(struct smbdirect_buffer_descriptor_v1)); v1 = (struct smbdirect_buffer_descriptor_v1 *) &req->Buffer[0]; - v1->offset = cpu_to_le64(wdata->mr->mr->iova); - v1->token = cpu_to_le32(wdata->mr->mr->rkey); - v1->length = cpu_to_le32(wdata->mr->mr->length); + smbd_mr_fill_buffer_descriptor(wdata->mr, v1); rqst.rq_iov[0].iov_len += sizeof(*v1); diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index ee3347289c87..93a91d4e0da5 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1294,6 +1294,12 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, return smbdirect_connection_register_mr_io(sc, iter, writing, need_invalidate); } +void smbd_mr_fill_buffer_descriptor(struct smbdirect_mr_io *mr, + struct smbdirect_buffer_descriptor_v1 *v1) +{ + smbdirect_mr_io_fill_buffer_descriptor(mr, v1); +} + /* * Deregister a MR after I/O is done * This function may wait if remote invalidation is not used diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 577d37dbeb8a..09f7dd14b2c1 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -60,6 +60,8 @@ int smbd_send(struct TCP_Server_Info *server, struct smbdirect_mr_io *smbd_register_mr( struct smbd_connection *info, struct iov_iter *iter, bool writing, bool need_invalidate); +void smbd_mr_fill_buffer_descriptor(struct smbdirect_mr_io *mr, + struct smbdirect_buffer_descriptor_v1 *v1); void smbd_deregister_mr(struct smbdirect_mr_io *mr); #else From 0b0a1a3b2d25464ed65a89e2cfbdd41ea78b2502 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 21 Oct 2025 21:07:26 +0200 Subject: [PATCH 091/145] smb: client: introduce and use smbd_debug_proc_show() This will allow us to make struct smbdirect_socket private in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 67 +------------------------------------- fs/smb/client/smbdirect.c | 15 +++++++++ fs/smb/client/smbdirect.h | 2 ++ 3 files changed, 18 insertions(+), 66 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 217444e3e6d0..0691d2a3e04b 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -23,7 +23,6 @@ #endif #ifdef CONFIG_CIFS_SMB_DIRECT #include "smbdirect.h" -#include "../common/smbdirect/smbdirect_pdu.h" #endif #include "cifs_swn.h" #include "cached_dir.h" @@ -452,11 +451,6 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) c = 0; spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { -#ifdef CONFIG_CIFS_SMB_DIRECT - struct smbdirect_socket *sc; - struct smbdirect_socket_parameters *sp; -#endif - /* channel info will be printed as a part of sessions below */ if (SERVER_IS_CHAN(server)) continue; @@ -471,66 +465,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\nClientGUID: %pUL", server->client_guid); spin_unlock(&server->srv_lock); #ifdef CONFIG_CIFS_SMB_DIRECT - if (!server->rdma) - goto skip_rdma; - - if (!server->smbd_conn) { - seq_printf(m, "\nSMBDirect transport not available"); - goto skip_rdma; - } - sc = &server->smbd_conn->socket; - sp = &sc->parameters; - - seq_printf(m, "\nSMBDirect protocol version: 0x%x " - "transport status: %s (%u)", - SMBDIRECT_V1, - smbdirect_socket_status_string(sc->status), - sc->status); - seq_printf(m, "\nConn receive_credit_max: %u " - "send_credit_target: %u max_send_size: %u", - sp->recv_credit_max, - sp->send_credit_target, - sp->max_send_size); - seq_printf(m, "\nConn max_fragmented_recv_size: %u " - "max_fragmented_send_size: %u max_receive_size:%u", - sp->max_fragmented_recv_size, - sp->max_fragmented_send_size, - sp->max_recv_size); - seq_printf(m, "\nConn keep_alive_interval: %u " - "max_readwrite_size: %u rdma_readwrite_threshold: %u", - sp->keepalive_interval_msec * 1000, - sp->max_read_write_size, - server->rdma_readwrite_threshold); - seq_printf(m, "\nDebug count_get_receive_buffer: %llu " - "count_put_receive_buffer: %llu count_send_empty: %llu", - sc->statistics.get_receive_buffer, - sc->statistics.put_receive_buffer, - sc->statistics.send_empty); - seq_printf(m, "\nRead Queue " - "count_enqueue_reassembly_queue: %llu " - "count_dequeue_reassembly_queue: %llu " - "reassembly_data_length: %u " - "reassembly_queue_length: %u", - sc->statistics.enqueue_reassembly_queue, - sc->statistics.dequeue_reassembly_queue, - sc->recv_io.reassembly.data_length, - sc->recv_io.reassembly.queue_length); - seq_printf(m, "\nCurrent Credits send_credits: %u " - "receive_credits: %u receive_credit_target: %u", - atomic_read(&sc->send_io.credits.count), - atomic_read(&sc->recv_io.credits.count), - sc->recv_io.credits.target); - seq_printf(m, "\nPending send_pending: %u ", - atomic_read(&sc->send_io.pending.count)); - seq_printf(m, "\nMR responder_resources: %u " - "max_frmr_depth: %u mr_type: 0x%x", - sp->responder_resources, - sp->max_frmr_depth, - sc->mr_io.type); - seq_printf(m, "\nMR mr_ready_count: %u mr_used_count: %u", - atomic_read(&sc->mr_io.ready.count), - atomic_read(&sc->mr_io.used.count)); -skip_rdma: + smbd_debug_proc_show(server, m); #endif seq_printf(m, "\nNumber of credits: %d,%d,%d Dialect 0x%x", server->credits, diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 93a91d4e0da5..c229f493edb4 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1310,3 +1310,18 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr) { smbdirect_connection_deregister_mr_io(mr); } + +void smbd_debug_proc_show(struct TCP_Server_Info *server, struct seq_file *m) +{ + if (!server->rdma) + return; + + if (!server->smbd_conn) { + seq_puts(m, "\nSMBDirect transport not available"); + return; + } + + smbdirect_connection_legacy_debug_proc_show(&server->smbd_conn->socket, + server->rdma_readwrite_threshold, + m); +} diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 09f7dd14b2c1..7986e87bf384 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -64,6 +64,8 @@ void smbd_mr_fill_buffer_descriptor(struct smbdirect_mr_io *mr, struct smbdirect_buffer_descriptor_v1 *v1); void smbd_deregister_mr(struct smbdirect_mr_io *mr); +void smbd_debug_proc_show(struct TCP_Server_Info *server, struct seq_file *m); + #else #define cifs_rdma_enabled(server) 0 struct smbd_connection {}; From 7dbfc0d910e0364117e01b6c41fb641360852497 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 19 Sep 2025 22:54:36 +0200 Subject: [PATCH 092/145] smb: client: make use of smbdirect_socket_init_new() and smbdirect_connect_sync() This means we finally only use common functions in the client. We still use the embedded struct smbdirect_socket and are able to access internals, but the will be removed in the next commits as well. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 891 ++++---------------------------------- fs/smb/client/smbdirect.h | 1 + 2 files changed, 76 insertions(+), 816 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index c229f493edb4..4b29b6ca4e74 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -10,23 +10,11 @@ #include #include #include -#include "../common/smbdirect/smbdirect_pdu.h" #include "smbdirect.h" #include "cifs_debug.h" #include "cifsproto.h" #include "smb2proto.h" -const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connection *conn) -{ - struct smbdirect_socket *sc = &conn->socket; - - return &sc->parameters; -} - -static int smbd_post_send(struct smbdirect_socket *sc, - struct smbdirect_send_batch *batch, - struct smbdirect_send_io *request); - /* Port numbers for SMBD transport */ #define SMB_PORT 445 #define SMBD_PORT 5445 @@ -40,21 +28,12 @@ static int smbd_post_send(struct smbdirect_socket *sc, /* The timeout to wait for a keepalive message from peer in seconds */ #define KEEPALIVE_RECV_TIMEOUT 5 -/* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */ -#define SMBD_MIN_RECEIVE_SIZE 128 -#define SMBD_MIN_FRAGMENTED_SIZE 131072 - /* * Default maximum number of RDMA read/write outstanding on this connection * This value is possibly decreased during QP creation on hardware limit */ #define SMBD_CM_RESPONDER_RESOURCES 32 -/* Maximum number of retries on data transfer operations */ -#define SMBD_CM_RETRY 6 -/* No need to retry on Receiver Not Ready since SMBD manages credits */ -#define SMBD_CM_RNR_RETRY 0 - /* * User configurable initial values per SMBD transport connection * as defined in [MS-SMBD] 3.1.1.1 @@ -198,625 +177,6 @@ do { \ #define log_rdma_mr(level, fmt, args...) \ log_rdma(level, LOG_RDMA_MR, fmt, ##args) -/* Upcall from RDMA CM */ -static int smbd_conn_upcall( - struct rdma_cm_id *id, struct rdma_cm_event *event) -{ - struct smbdirect_socket *sc = id->context; - const char *event_name = rdma_event_msg(event->event); - u8 peer_initiator_depth; - u8 peer_responder_resources; - - log_rdma_event(INFO, "event=%s status=%d\n", - event_name, event->status); - - switch (event->event) { - case RDMA_CM_EVENT_ADDR_RESOLVED: - if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING)) - break; - sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED; - wake_up(&sc->status_wait); - break; - - case RDMA_CM_EVENT_ROUTE_RESOLVED: - if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING)) - break; - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; - wake_up(&sc->status_wait); - break; - - case RDMA_CM_EVENT_ADDR_ERROR: - log_rdma_event(ERR, "connecting failed event=%s\n", event_name); - sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; - smbdirect_socket_cleanup_work(&sc->disconnect_work); - break; - - case RDMA_CM_EVENT_ROUTE_ERROR: - log_rdma_event(ERR, "connecting failed event=%s\n", event_name); - sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; - smbdirect_socket_cleanup_work(&sc->disconnect_work); - break; - - case RDMA_CM_EVENT_ESTABLISHED: - log_rdma_event(INFO, "connected event=%s\n", event_name); - - /* - * Here we work around an inconsistency between - * iWarp and other devices (at least rxe and irdma using RoCEv2) - */ - if (rdma_protocol_iwarp(id->device, id->port_num)) { - /* - * iWarp devices report the peer's values - * with the perspective of the peer here. - * Tested with siw and irdma (in iwarp mode) - * We need to change to our perspective here, - * so we need to switch the values. - */ - peer_initiator_depth = event->param.conn.responder_resources; - peer_responder_resources = event->param.conn.initiator_depth; - } else { - /* - * Non iWarp devices report the peer's values - * already changed to our perspective here. - * Tested with rxe and irdma (in roce mode). - */ - peer_initiator_depth = event->param.conn.initiator_depth; - peer_responder_resources = event->param.conn.responder_resources; - } - smbdirect_connection_negotiate_rdma_resources(sc, - peer_initiator_depth, - peer_responder_resources, - &event->param.conn); - - if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)) - break; - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; - wake_up(&sc->status_wait); - break; - - case RDMA_CM_EVENT_CONNECT_ERROR: - case RDMA_CM_EVENT_UNREACHABLE: - case RDMA_CM_EVENT_REJECTED: - log_rdma_event(ERR, "connecting failed event=%s\n", event_name); - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; - smbdirect_socket_cleanup_work(&sc->disconnect_work); - break; - - case RDMA_CM_EVENT_DEVICE_REMOVAL: - case RDMA_CM_EVENT_DISCONNECTED: - /* This happens when we fail the negotiation */ - if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { - log_rdma_event(ERR, "event=%s during negotiation\n", event_name); - } - - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - smbdirect_socket_cleanup_work(&sc->disconnect_work); - break; - - default: - log_rdma_event(ERR, "unexpected event=%s status=%d\n", - event_name, event->status); - break; - } - - return 0; -} - -static inline void *smbdirect_send_io_payload(struct smbdirect_send_io *request) -{ - return (void *)request->packet; -} - -static inline void *smbdirect_recv_io_payload(struct smbdirect_recv_io *response) -{ - return (void *)response->packet; -} - -static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) -{ - log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n", - resp->min_version, resp->max_version, - resp->negotiated_version, resp->credits_requested, - resp->credits_granted, resp->status, - resp->max_readwrite_size, resp->preferred_send_size, - resp->max_receive_size, resp->max_fragmented_size); -} - -/* - * Process a negotiation response message, according to [MS-SMBD]3.1.5.7 - * response, packet_length: the negotiation response message - * return value: true if negotiation is a success, false if failed - */ -static bool process_negotiation_response( - struct smbdirect_recv_io *response, int packet_length) -{ - struct smbdirect_socket *sc = response->socket; - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_negotiate_resp *packet = smbdirect_recv_io_payload(response); - - if (packet_length < sizeof(struct smbdirect_negotiate_resp)) { - log_rdma_event(ERR, - "error: packet_length=%d\n", packet_length); - return false; - } - - if (le16_to_cpu(packet->negotiated_version) != SMBDIRECT_V1) { - log_rdma_event(ERR, "error: negotiated_version=%x\n", - le16_to_cpu(packet->negotiated_version)); - return false; - } - - if (packet->credits_requested == 0) { - log_rdma_event(ERR, "error: credits_requested==0\n"); - return false; - } - sc->recv_io.credits.target = le16_to_cpu(packet->credits_requested); - sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); - - if (packet->credits_granted == 0) { - log_rdma_event(ERR, "error: credits_granted==0\n"); - return false; - } - atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); - atomic_set(&sc->send_io.credits.count, le16_to_cpu(packet->credits_granted)); - - if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) { - log_rdma_event(ERR, "error: preferred_send_size=%d\n", - le32_to_cpu(packet->preferred_send_size)); - return false; - } - sp->max_recv_size = le32_to_cpu(packet->preferred_send_size); - - if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) { - log_rdma_event(ERR, "error: max_receive_size=%d\n", - le32_to_cpu(packet->max_receive_size)); - return false; - } - sp->max_send_size = min_t(u32, sp->max_send_size, - le32_to_cpu(packet->max_receive_size)); - - if (le32_to_cpu(packet->max_fragmented_size) < - SMBD_MIN_FRAGMENTED_SIZE) { - log_rdma_event(ERR, "error: max_fragmented_size=%d\n", - le32_to_cpu(packet->max_fragmented_size)); - return false; - } - sp->max_fragmented_send_size = - le32_to_cpu(packet->max_fragmented_size); - - - sp->max_read_write_size = min_t(u32, - le32_to_cpu(packet->max_readwrite_size), - sp->max_frmr_depth * PAGE_SIZE); - sp->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; - - atomic_set(&sc->send_io.bcredits.count, 1); - sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; - return true; -} - -/* Called from softirq, when recv is done */ -static void recv_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct smbdirect_data_transfer *data_transfer; - struct smbdirect_recv_io *response = - container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); - struct smbdirect_socket *sc = response->socket; - struct smbdirect_socket_parameters *sp = &sc->parameters; - int current_recv_credits; - u16 old_recv_credit_target; - u32 data_offset = 0; - u32 data_length = 0; - u32 remaining_data_length = 0; - bool negotiate_done = false; - - log_rdma_recv(INFO, - "response=0x%p type=%d wc status=%s wc opcode %d byte_len=%d pkey_index=%u\n", - response, sc->recv_io.expected, - ib_wc_status_msg(wc->status), wc->opcode, - wc->byte_len, wc->pkey_index); - - if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { - if (wc->status != IB_WC_WR_FLUSH_ERR) - log_rdma_recv(ERR, "wc->status=%s opcode=%d\n", - ib_wc_status_msg(wc->status), wc->opcode); - goto error; - } - - ib_dma_sync_single_for_cpu( - wc->qp->device, - response->sge.addr, - response->sge.length, - DMA_FROM_DEVICE); - - /* - * Reset timer to the keepalive interval in - * order to trigger our next keepalive message. - */ - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, - msecs_to_jiffies(sp->keepalive_interval_msec)); - - switch (sc->recv_io.expected) { - /* SMBD negotiation response */ - case SMBDIRECT_EXPECT_NEGOTIATE_REP: - dump_smbdirect_negotiate_resp(smbdirect_recv_io_payload(response)); - sc->recv_io.reassembly.full_packet_received = true; - negotiate_done = - process_negotiation_response(response, wc->byte_len); - smbdirect_connection_put_recv_io(response); - if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_RUNNING)) - negotiate_done = false; - if (!negotiate_done) { - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - } else { - sc->status = SMBDIRECT_SOCKET_CONNECTED; - wake_up(&sc->status_wait); - } - - return; - - /* SMBD data transfer packet */ - case SMBDIRECT_EXPECT_DATA_TRANSFER: - data_transfer = smbdirect_recv_io_payload(response); - - if (wc->byte_len < - offsetof(struct smbdirect_data_transfer, padding)) - goto error; - - remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); - data_offset = le32_to_cpu(data_transfer->data_offset); - data_length = le32_to_cpu(data_transfer->data_length); - if (wc->byte_len < data_offset || - (u64)wc->byte_len < (u64)data_offset + data_length) - goto error; - - if (remaining_data_length > sp->max_fragmented_recv_size || - data_length > sp->max_fragmented_recv_size || - (u64)remaining_data_length + (u64)data_length > (u64)sp->max_fragmented_recv_size) - goto error; - - if (data_length) { - if (sc->recv_io.reassembly.full_packet_received) - response->first_segment = true; - - if (le32_to_cpu(data_transfer->remaining_data_length)) - sc->recv_io.reassembly.full_packet_received = false; - else - sc->recv_io.reassembly.full_packet_received = true; - } - - atomic_dec(&sc->recv_io.posted.count); - current_recv_credits = atomic_dec_return(&sc->recv_io.credits.count); - - old_recv_credit_target = sc->recv_io.credits.target; - sc->recv_io.credits.target = - le16_to_cpu(data_transfer->credits_requested); - sc->recv_io.credits.target = - min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); - sc->recv_io.credits.target = - max_t(u16, sc->recv_io.credits.target, 1); - if (le16_to_cpu(data_transfer->credits_granted)) { - atomic_add(le16_to_cpu(data_transfer->credits_granted), - &sc->send_io.credits.count); - /* - * We have new send credits granted from remote peer - * If any sender is waiting for credits, unblock it - */ - wake_up(&sc->send_io.credits.wait_queue); - } - - log_incoming(INFO, "data flags %d data_offset %d data_length %d remaining_data_length %d\n", - le16_to_cpu(data_transfer->flags), - le32_to_cpu(data_transfer->data_offset), - le32_to_cpu(data_transfer->data_length), - le32_to_cpu(data_transfer->remaining_data_length)); - - /* Send an immediate response right away if requested */ - if (le16_to_cpu(data_transfer->flags) & - SMBDIRECT_FLAG_RESPONSE_REQUESTED) { - log_keep_alive(INFO, "schedule send of immediate response\n"); - queue_work(sc->workqueue, &sc->idle.immediate_work); - } - - /* - * If this is a packet with data playload place the data in - * reassembly queue and wake up the reading thread - */ - if (data_length) { - if (current_recv_credits <= (sc->recv_io.credits.target / 4) || - sc->recv_io.credits.target > old_recv_credit_target) - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); - - smbdirect_connection_reassembly_append_recv_io(sc, response, data_length); - wake_up(&sc->recv_io.reassembly.wait_queue); - } else - smbdirect_connection_put_recv_io(response); - - return; - - case SMBDIRECT_EXPECT_NEGOTIATE_REQ: - /* Only server... */ - break; - } - - /* - * This is an internal error! - */ - log_rdma_recv(ERR, "unexpected response type=%d\n", sc->recv_io.expected); - WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); -error: - smbdirect_connection_put_recv_io(response); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); -} - -static struct rdma_cm_id *smbd_create_id( - struct smbdirect_socket *sc, - struct sockaddr *dstaddr, int port) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct rdma_cm_id *id; - u8 node_type = RDMA_NODE_UNSPECIFIED; - int rc; - __be16 *sport; - - id = rdma_create_id(&init_net, smbd_conn_upcall, sc, - RDMA_PS_TCP, IB_QPT_RC); - if (IS_ERR(id)) { - rc = PTR_ERR(id); - log_rdma_event(ERR, "rdma_create_id() failed %i\n", rc); - return id; - } - - switch (port) { - case SMBD_PORT: - /* - * only allow iWarp devices - * for port 5445. - */ - node_type = RDMA_NODE_RNIC; - break; - case SMB_PORT: - /* - * only allow InfiniBand, RoCEv1 or RoCEv2 - * devices for port 445. - * - * (Basically don't allow iWarp devices) - */ - node_type = RDMA_NODE_IB_CA; - break; - } - rc = rdma_restrict_node_type(id, node_type); - if (rc) { - log_rdma_event(ERR, "rdma_restrict_node_type(%u) failed %i\n", - node_type, rc); - goto out; - } - - if (dstaddr->sa_family == AF_INET6) - sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port; - else - sport = &((struct sockaddr_in *)dstaddr)->sin_port; - - *sport = htons(port); - - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED); - sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING; - rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr, - sp->resolve_addr_timeout_msec); - if (rc) { - log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc); - goto out; - } - rc = wait_event_interruptible_timeout( - sc->status_wait, - sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING, - msecs_to_jiffies(sp->resolve_addr_timeout_msec)); - /* e.g. if interrupted returns -ERESTARTSYS */ - if (rc < 0) { - log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); - goto out; - } - if (sc->status == SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING) { - rc = -ETIMEDOUT; - log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); - goto out; - } - if (sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED) { - rc = -EHOSTUNREACH; - log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); - goto out; - } - - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED); - sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING; - rc = rdma_resolve_route(id, sp->resolve_route_timeout_msec); - if (rc) { - log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc); - goto out; - } - rc = wait_event_interruptible_timeout( - sc->status_wait, - sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING, - msecs_to_jiffies(sp->resolve_route_timeout_msec)); - /* e.g. if interrupted returns -ERESTARTSYS */ - if (rc < 0) { - log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); - goto out; - } - if (sc->status == SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING) { - rc = -ETIMEDOUT; - log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); - goto out; - } - if (sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED) { - rc = -ENETUNREACH; - log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); - goto out; - } - - return id; - -out: - rdma_destroy_id(id); - return ERR_PTR(rc); -} - -static int smbd_ia_open( - struct smbdirect_socket *sc, - struct sockaddr *dstaddr, int port) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - int rc; - - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); - sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED; - - sc->rdma.cm_id = smbd_create_id(sc, dstaddr, port); - if (IS_ERR(sc->rdma.cm_id)) { - rc = PTR_ERR(sc->rdma.cm_id); - goto out1; - } - sc->ib.dev = sc->rdma.cm_id->device; - - if (!smbdirect_frwr_is_supported(&sc->ib.dev->attrs)) { - log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n"); - log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", - sc->ib.dev->attrs.device_cap_flags, - sc->ib.dev->attrs.max_fast_reg_page_list_len); - rc = -EPROTONOSUPPORT; - goto out2; - } - sp->max_frmr_depth = min_t(u32, - sp->max_frmr_depth, - sc->ib.dev->attrs.max_fast_reg_page_list_len); - sc->mr_io.type = IB_MR_TYPE_MEM_REG; - if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) - sc->mr_io.type = IB_MR_TYPE_SG_GAPS; - - return 0; - -out2: - rdma_destroy_id(sc->rdma.cm_id); - sc->rdma.cm_id = NULL; - -out1: - return rc; -} - -/* - * Send a negotiation request message to the peer - * The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3 - * After negotiation, the transport is connected and ready for - * carrying upper layer SMB payload - */ -static int smbd_post_send_negotiate_req(struct smbdirect_socket *sc) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - int rc; - struct smbdirect_send_io *request; - struct smbdirect_negotiate_req *packet; - - request = smbdirect_connection_alloc_send_io(sc); - if (IS_ERR(request)) - return PTR_ERR(request); - - packet = smbdirect_send_io_payload(request); - packet->min_version = cpu_to_le16(SMBDIRECT_V1); - packet->max_version = cpu_to_le16(SMBDIRECT_V1); - packet->reserved = 0; - packet->credits_requested = cpu_to_le16(sp->send_credit_target); - packet->preferred_send_size = cpu_to_le32(sp->max_send_size); - packet->max_receive_size = cpu_to_le32(sp->max_recv_size); - packet->max_fragmented_size = - cpu_to_le32(sp->max_fragmented_recv_size); - - request->sge[0].addr = ib_dma_map_single( - sc->ib.dev, (void *)packet, - sizeof(*packet), DMA_TO_DEVICE); - if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { - rc = -EIO; - goto dma_mapping_failed; - } - request->num_sge = 1; - - request->sge[0].length = sizeof(*packet); - request->sge[0].lkey = sc->ib.pd->local_dma_lkey; - request->num_sge = 1; - - rc = smbd_post_send(sc, NULL, request); - if (!rc) - return 0; - - if (rc == -EAGAIN) - rc = -EIO; - -dma_mapping_failed: - smbdirect_connection_free_send_io(request); - return rc; -} - -static int smbd_ib_post_send(struct smbdirect_socket *sc, - struct ib_send_wr *wr) -{ - int ret; - - atomic_inc(&sc->send_io.pending.count); - ret = ib_post_send(sc->ib.qp, wr, NULL); - if (ret) { - pr_err("failed to post send: %d\n", ret); - smbdirect_socket_schedule_cleanup(sc, ret); - ret = -EAGAIN; - } - return ret; -} - -/* Post the send request */ -static int smbd_post_send(struct smbdirect_socket *sc, - struct smbdirect_send_batch *batch, - struct smbdirect_send_io *request) -{ - int i; - - for (i = 0; i < request->num_sge; i++) { - log_rdma_send(INFO, - "rdma_request sge[%d] addr=0x%llx length=%u\n", - i, request->sge[i].addr, request->sge[i].length); - ib_dma_sync_single_for_device( - sc->ib.dev, - request->sge[i].addr, - request->sge[i].length, - DMA_TO_DEVICE); - } - - request->cqe.done = smbdirect_connection_send_io_done; - request->wr.next = NULL; - request->wr.sg_list = request->sge; - request->wr.num_sge = request->num_sge; - request->wr.opcode = IB_WR_SEND; - - if (batch) { - request->wr.wr_cqe = NULL; - request->wr.send_flags = 0; - if (!list_empty(&batch->msg_list)) { - struct smbdirect_send_io *last; - - last = list_last_entry(&batch->msg_list, - struct smbdirect_send_io, - sibling_list); - last->wr.next = &request->wr; - } - list_add_tail(&request->sibling_list, &batch->msg_list); - batch->wr_cnt++; - return 0; - } - - request->wr.wr_cqe = &request->cqe; - request->wr.send_flags = IB_SEND_SIGNALED; - return smbd_ib_post_send(sc, &request->wr); -} - static int smbd_post_send_full_iter(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, struct iov_iter *iter, @@ -847,49 +207,6 @@ static int smbd_post_send_full_iter(struct smbdirect_socket *sc, return bytes; } -/* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ -static int smbd_negotiate(struct smbdirect_socket *sc) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - int rc; - struct smbdirect_recv_io *response = smbdirect_connection_get_recv_io(sc); - - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED); - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; - - sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; - rc = smbdirect_connection_post_recv_io(response); - log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n", - rc, response->sge.addr, - response->sge.length, response->sge.lkey); - if (rc) { - smbdirect_connection_put_recv_io(response); - return rc; - } - - rc = smbd_post_send_negotiate_req(sc); - if (rc) - return rc; - - rc = wait_event_interruptible_timeout( - sc->status_wait, - sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING, - msecs_to_jiffies(sp->negotiate_timeout_msec)); - log_rdma_event(INFO, "wait_event_interruptible_timeout rc=%d\n", rc); - - if (sc->status == SMBDIRECT_SOCKET_CONNECTED) - return 0; - - if (rc == 0) - rc = -ETIMEDOUT; - else if (rc == -ERESTARTSYS) - rc = -EINTR; - else - rc = -ENOTCONN; - - return rc; -} - /* * Destroy the transport and related RDMA and memory resources * Need to go through all the pending counters and make sure on one is using @@ -908,7 +225,7 @@ void smbd_destroy(struct TCP_Server_Info *server) smbdirect_socket_destroy_sync(sc); - destroy_workqueue(sc->workqueue); + destroy_workqueue(info->workqueue); kfree(info); server->smbd_conn = NULL; } @@ -930,10 +247,8 @@ int smbd_reconnect(struct TCP_Server_Info *server) * This is possible if transport is disconnected and we haven't received * notification from RDMA, but upper layer has detected timeout */ - if (server->smbd_conn->socket.status == SMBDIRECT_SOCKET_CONNECTED) { - log_rdma_event(INFO, "disconnecting transport\n"); - smbd_destroy(server); - } + log_rdma_event(INFO, "disconnecting transport\n"); + smbd_destroy(server); create_conn: log_rdma_event(INFO, "creating rdma session\n"); @@ -953,23 +268,40 @@ int smbd_reconnect(struct TCP_Server_Info *server) static struct smbd_connection *_smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port) { - int rc; + struct net *net = cifs_net_ns(server); struct smbd_connection *info; struct smbdirect_socket *sc; struct smbdirect_socket_parameters init_params = {}; struct smbdirect_socket_parameters *sp; - struct rdma_conn_param conn_param; - struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; - struct ib_port_immutable port_immutable; - __be32 ird_ord_hdr[2]; + __be16 *sport; + u64 port_flags = 0; char wq_name[80]; - struct workqueue_struct *workqueue; - struct smbdirect_recv_io *recv_io; + int ret; + + switch (port) { + case SMBD_PORT: + /* + * only allow iWarp devices + * for port 5445. + */ + port_flags |= SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW; + break; + case SMB_PORT: + /* + * only allow InfiniBand, RoCEv1 or RoCEv2 + * devices for port 445. + * + * (Basically don't allow iWarp devices) + */ + port_flags |= SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB; + break; + } /* * Create the initial parameters */ sp = &init_params; + sp->flags = port_flags; sp->resolve_addr_timeout_msec = RDMA_RESOLVE_TIMEOUT; sp->resolve_route_timeout_msec = RDMA_RESOLVE_TIMEOUT; sp->rdma_connect_timeout_msec = RDMA_RESOLVE_TIMEOUT; @@ -988,143 +320,64 @@ static struct smbd_connection *_smbd_get_connection( info = kzalloc_obj(*info); if (!info) return NULL; - sc = &info->socket; - scnprintf(wq_name, ARRAY_SIZE(wq_name), "smbd_%p", sc); - workqueue = create_workqueue(wq_name); - if (!workqueue) + scnprintf(wq_name, ARRAY_SIZE(wq_name), "smbd_%p", info); + info->workqueue = create_workqueue(wq_name); + if (!info->workqueue) goto create_wq_failed; - smbdirect_socket_prepare_create(sc, sp, workqueue); + sc = &info->socket; + ret = smbdirect_socket_init_new(net, sc); + if (ret) + goto socket_init_failed; smbdirect_socket_set_logging(sc, NULL, smbd_logging_needed, smbd_logging_vaprintf); - sc->ib.poll_ctx = IB_POLL_SOFTIRQ; - /* - * from here we operate on the copy. - */ - sp = &sc->parameters; + ret = smbdirect_socket_set_initial_parameters(sc, sp); + if (ret) + goto set_params_failed; + ret = smbdirect_socket_set_kernel_settings(sc, IB_POLL_SOFTIRQ, GFP_KERNEL); + if (ret) + goto set_settings_failed; + ret = smbdirect_socket_set_custom_workqueue(sc, info->workqueue); + if (ret) + goto set_workqueue_failed; - rc = smbd_ia_open(sc, dstaddr, port); - if (rc) { - log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); - goto create_id_failed; - } + if (dstaddr->sa_family == AF_INET6) + sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port; + else + sport = &((struct sockaddr_in *)dstaddr)->sin_port; - sp->responder_resources = - min_t(u8, sp->responder_resources, - sc->ib.dev->attrs.max_qp_rd_atom); - log_rdma_mr(INFO, "responder_resources=%d\n", - sp->responder_resources); + *sport = htons(port); - rc = smbdirect_connection_create_qp(sc); - if (rc) { - log_rdma_event(ERR, "smbdirect_connection_create_qp failed %i\n", rc); - goto create_qp_failed; - } - - memset(&conn_param, 0, sizeof(conn_param)); - conn_param.initiator_depth = sp->initiator_depth; - conn_param.responder_resources = sp->responder_resources; - - /* Need to send IRD/ORD in private data for iWARP */ - sc->ib.dev->ops.get_port_immutable( - sc->ib.dev, sc->rdma.cm_id->port_num, &port_immutable); - if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { - ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); - ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); - conn_param.private_data = ird_ord_hdr; - conn_param.private_data_len = sizeof(ird_ord_hdr); - } else { - conn_param.private_data = NULL; - conn_param.private_data_len = 0; - } - - conn_param.retry_count = SMBD_CM_RETRY; - conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY; - conn_param.flow_control = 0; - - log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", - &addr_in->sin_addr, port); - - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED); - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; - rc = rdma_connect(sc->rdma.cm_id, &conn_param); - if (rc) { - log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); - goto rdma_connect_failed; - } - - wait_event_interruptible_timeout( - sc->status_wait, - sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING, - msecs_to_jiffies(sp->rdma_connect_timeout_msec)); - - if (sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED) { - log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); - goto rdma_connect_failed; - } - - log_rdma_event(INFO, "rdma_connect connected\n"); - - rc = smbdirect_connection_create_mem_pools(sc); - if (rc) { - log_rdma_event(ERR, "cache allocation failed\n"); - goto allocate_cache_failed; - } - - list_for_each_entry(recv_io, &sc->recv_io.free.list, list) - recv_io->cqe.done = recv_done; - - INIT_WORK(&sc->idle.immediate_work, smbdirect_connection_send_immediate_work); - /* - * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING - * so that the timer will cause a disconnect. - */ - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, - msecs_to_jiffies(sp->negotiate_timeout_msec)); - - INIT_WORK(&sc->recv_io.posted.refill_work, smbdirect_connection_recv_io_refill_work); - - rc = smbd_negotiate(sc); - if (rc) { - log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc); - goto negotiation_failed; - } - - rc = smbdirect_connection_create_mr_list(sc); - if (rc) { - log_rdma_mr(ERR, "memory registration allocation failed\n"); - goto allocate_mr_failed; + ret = smbdirect_connect_sync(sc, dstaddr); + if (ret) { + log_rdma_event(ERR, "connect to %pISpsfc failed: %1pe\n", + dstaddr, ERR_PTR(ret)); + goto connect_failed; } return info; -allocate_mr_failed: +connect_failed: +set_workqueue_failed: +set_settings_failed: +set_params_failed: /* At this point, need to a full transport shutdown */ server->smbd_conn = info; smbd_destroy(server); return NULL; -negotiation_failed: - disable_delayed_work_sync(&sc->idle.timer_work); - smbdirect_connection_destroy_mem_pools(sc); - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; - rdma_disconnect(sc->rdma.cm_id); - wait_event(sc->status_wait, - sc->status == SMBDIRECT_SOCKET_DISCONNECTED); - -allocate_cache_failed: -rdma_connect_failed: - smbdirect_connection_destroy_qp(sc); - -create_qp_failed: - rdma_destroy_id(sc->rdma.cm_id); - -create_id_failed: - destroy_workqueue(sc->workqueue); +socket_init_failed: + destroy_workqueue(info->workqueue); create_wq_failed: kfree(info); return NULL; } +const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connection *conn) +{ + struct smbdirect_socket *sc = &conn->socket; + + return smbdirect_socket_get_current_parameters(sc); +} + struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr) { @@ -1143,7 +396,7 @@ struct smbd_connection *smbd_get_connection( if (!ret) return NULL; - sp = &ret->socket.parameters; + sp = smbd_get_parameters(ret); server->rdma_readwrite_threshold = rdma_readwrite_threshold > sp->max_fragmented_send_size ? @@ -1171,6 +424,9 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) { struct smbdirect_socket *sc = &info->socket; + if (!smbdirect_connection_is_connected(sc)) + return -ENOTCONN; + return smbdirect_connection_recvmsg(sc, msg, 0); } @@ -1185,7 +441,7 @@ int smbd_send(struct TCP_Server_Info *server, { struct smbd_connection *info = server->smbd_conn; struct smbdirect_socket *sc = &info->socket; - struct smbdirect_socket_parameters *sp = &sc->parameters; + const struct smbdirect_socket_parameters *sp = smbd_get_parameters(info); struct smb_rqst *rqst; struct iov_iter iter; struct smbdirect_send_batch_storage bstorage; @@ -1194,7 +450,7 @@ int smbd_send(struct TCP_Server_Info *server, int rc, i, rqst_idx; int error = 0; - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) + if (!smbdirect_connection_is_connected(sc)) return -EAGAIN; /* @@ -1291,6 +547,9 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, { struct smbdirect_socket *sc = &info->socket; + if (!smbdirect_connection_is_connected(sc)) + return NULL; + return smbdirect_connection_register_mr_io(sc, iter, writing, need_invalidate); } diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 7986e87bf384..3f623a37aedc 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -38,6 +38,7 @@ extern int smbd_receive_credit_max; */ struct smbd_connection { struct smbdirect_socket socket; + struct workqueue_struct *workqueue; }; /* Create a SMBDirect session */ From b8aef8c8808cc78992bec2ab2195c5e0903c0879 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 24 Oct 2025 17:41:03 +0200 Subject: [PATCH 093/145] smb: client: make use of smbdirect_socket_create_kern()/smbdirect_socket_release() With this we no longer embed struct smbdirect_socket, which will allow us to make it private in the following commits. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 30 ++++++++++++++---------------- fs/smb/client/smbdirect.h | 2 +- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 4b29b6ca4e74..e51a91b07e94 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -215,15 +215,13 @@ static int smbd_post_send_full_iter(struct smbdirect_socket *sc, void smbd_destroy(struct TCP_Server_Info *server) { struct smbd_connection *info = server->smbd_conn; - struct smbdirect_socket *sc; if (!info) { log_rdma_event(INFO, "rdma session already destroyed\n"); return; } - sc = &info->socket; - smbdirect_socket_destroy_sync(sc); + smbdirect_socket_release(info->socket); destroy_workqueue(info->workqueue); kfree(info); @@ -324,8 +322,7 @@ static struct smbd_connection *_smbd_get_connection( info->workqueue = create_workqueue(wq_name); if (!info->workqueue) goto create_wq_failed; - sc = &info->socket; - ret = smbdirect_socket_init_new(net, sc); + ret = smbdirect_socket_create_kern(net, &sc); if (ret) goto socket_init_failed; smbdirect_socket_set_logging(sc, NULL, smbd_logging_needed, smbd_logging_vaprintf); @@ -353,17 +350,14 @@ static struct smbd_connection *_smbd_get_connection( goto connect_failed; } + info->socket = sc; return info; connect_failed: set_workqueue_failed: set_settings_failed: set_params_failed: - /* At this point, need to a full transport shutdown */ - server->smbd_conn = info; - smbd_destroy(server); - return NULL; - + smbdirect_socket_release(sc); socket_init_failed: destroy_workqueue(info->workqueue); create_wq_failed: @@ -373,9 +367,13 @@ static struct smbd_connection *_smbd_get_connection( const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connection *conn) { - struct smbdirect_socket *sc = &conn->socket; + if (unlikely(!conn->socket)) { + static const struct smbdirect_socket_parameters zero_params; - return smbdirect_socket_get_current_parameters(sc); + return &zero_params; + } + + return smbdirect_socket_get_current_parameters(conn->socket); } struct smbd_connection *smbd_get_connection( @@ -422,7 +420,7 @@ struct smbd_connection *smbd_get_connection( */ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) { - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = info->socket; if (!smbdirect_connection_is_connected(sc)) return -ENOTCONN; @@ -440,7 +438,7 @@ int smbd_send(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst_array) { struct smbd_connection *info = server->smbd_conn; - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = info->socket; const struct smbdirect_socket_parameters *sp = smbd_get_parameters(info); struct smb_rqst *rqst; struct iov_iter iter; @@ -545,7 +543,7 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, struct iov_iter *iter, bool writing, bool need_invalidate) { - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = info->socket; if (!smbdirect_connection_is_connected(sc)) return NULL; @@ -580,7 +578,7 @@ void smbd_debug_proc_show(struct TCP_Server_Info *server, struct seq_file *m) return; } - smbdirect_connection_legacy_debug_proc_show(&server->smbd_conn->socket, + smbdirect_connection_legacy_debug_proc_show(server->smbd_conn->socket, server->rdma_readwrite_threshold, m); } diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 3f623a37aedc..35172076f2ee 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -37,7 +37,7 @@ extern int smbd_receive_credit_max; * 5. mempools for allocating packets */ struct smbd_connection { - struct smbdirect_socket socket; + struct smbdirect_socket *socket; struct workqueue_struct *workqueue; }; From a8e98e392062a9575e41646621f238f3a35203ae Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 27 Oct 2025 21:29:48 +0100 Subject: [PATCH 094/145] smb: client: only use public smbdirect functions Also remove a lot of unused includes... Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 1 - fs/smb/client/smbdirect.c | 20 +++++++++----------- fs/smb/client/smbdirect.h | 13 ------------- 3 files changed, 9 insertions(+), 25 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 0aeb23aed8eb..957aca2222b5 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -36,7 +36,6 @@ #include "../common/smb2status.h" #include "smb2glob.h" #include "cifs_spnego.h" -#include "../common/smbdirect/smbdirect.h" #include "smbdirect.h" #include "trace.h" #ifdef CONFIG_CIFS_DFS_UPCALL diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index e51a91b07e94..f0b3eebc8279 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -7,13 +7,11 @@ #define SMBDIRECT_USE_INLINE_C_FILES 1 -#include -#include -#include #include "smbdirect.h" #include "cifs_debug.h" #include "cifsproto.h" #include "smb2proto.h" +#include "../common/smbdirect/smbdirect_public.h" /* Port numbers for SMBD transport */ #define SMB_PORT 445 @@ -109,14 +107,6 @@ module_param(smbd_logging_level, uint, 0644); MODULE_PARM_DESC(smbd_logging_level, "Logging level for SMBD transport, 0 (default): error, 1: info"); -/* - * This is a temporary solution until all code - * is moved to smbdirect_all_c_files.c and we - * have an smbdirect.ko that exports the required - * functions. - */ -#include "../common/smbdirect/smbdirect_all_c_files.c" - static bool smbd_logging_needed(struct smbdirect_socket *sc, void *private_ptr, unsigned int lvl, @@ -582,3 +572,11 @@ void smbd_debug_proc_show(struct TCP_Server_Info *server, struct seq_file *m) server->rdma_readwrite_threshold, m); } + +/* + * This is a temporary solution until all code + * is moved to smbdirect_all_c_files.c and we + * have an smbdirect.ko that exports the required + * functions. + */ +#include "../common/smbdirect/smbdirect_all_c_files.c" diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 35172076f2ee..bd03ae72e9c8 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -11,12 +11,8 @@ #define cifs_rdma_enabled(server) ((server)->rdma) #include "cifsglob.h" -#include -#include -#include #include "../common/smbdirect/smbdirect.h" -#include "../common/smbdirect/smbdirect_socket.h" extern int rdma_readwrite_threshold; extern int smbd_max_frmr_depth; @@ -27,15 +23,6 @@ extern int smbd_max_send_size; extern int smbd_send_credit_target; extern int smbd_receive_credit_max; -/* - * The context for the SMBDirect transport - * Everything related to the transport is here. It has several logical parts - * 1. RDMA related structures - * 2. SMBDirect connection parameters - * 3. Memory registrations - * 4. Receive and reassembly queues for data receive path - * 5. mempools for allocating packets - */ struct smbd_connection { struct smbdirect_socket *socket; struct workqueue_struct *workqueue; From 6acc747906c5b87657dc313ff6cb777d805a6ec4 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 18 Jul 2025 19:22:33 +0200 Subject: [PATCH 095/145] smb: client: make use of smbdirect.ko This means we no longer inline the common smbdirect .c files and use the exported functions from the module instead. Note the connection specific logging is still redirect to cifs.ko functions via smbdirect_socket_set_logging(). We still don't use real socket layer, but we're very close... Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: David Howells Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/Kconfig | 5 +++-- fs/smb/client/smbdirect.c | 10 ---------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig index ec2abfe0a62f..63831242fddf 100644 --- a/fs/smb/client/Kconfig +++ b/fs/smb/client/Kconfig @@ -180,8 +180,9 @@ if CIFS config CIFS_SMB_DIRECT bool "SMB Direct support" - depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y - select SG_POOL + depends on CIFS && INFINIBAND && INFINIBAND_ADDR_TRANS + depends on CIFS=m || INFINIBAND=y + select SMB_COMMON_SMBDIRECT help Enables SMB Direct support for SMB 3.0, 3.02 and 3.1.1. SMB Direct allows transferring SMB packets over RDMA. If unsure, diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index f0b3eebc8279..73fc86312bbf 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -5,8 +5,6 @@ * Author(s): Long Li */ -#define SMBDIRECT_USE_INLINE_C_FILES 1 - #include "smbdirect.h" #include "cifs_debug.h" #include "cifsproto.h" @@ -572,11 +570,3 @@ void smbd_debug_proc_show(struct TCP_Server_Info *server, struct seq_file *m) server->rdma_readwrite_threshold, m); } - -/* - * This is a temporary solution until all code - * is moved to smbdirect_all_c_files.c and we - * have an smbdirect.ko that exports the required - * functions. - */ -#include "../common/smbdirect/smbdirect_all_c_files.c" From 4624f1bf1b79bd50ddbd1178aa741b8a7afba5b0 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 15:32:48 +0200 Subject: [PATCH 096/145] smb: server: make use of smbdirect_socket_prepare_create() This prepares the use of functions from smbdirect_connection.c. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 2978b8850082..55514ce348ac 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -422,20 +422,14 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) { struct smb_direct_transport *t; struct smbdirect_socket *sc; + struct smbdirect_socket_parameters init_params = {}; struct smbdirect_socket_parameters *sp; struct ksmbd_conn *conn; - t = kzalloc_obj(*t, KSMBD_DEFAULT_GFP); - if (!t) - return NULL; - sc = &t->socket; - smbdirect_socket_init(sc); - sp = &sc->parameters; - - sc->workqueue = smb_direct_wq; - - INIT_WORK(&sc->disconnect_work, smb_direct_disconnect_rdma_work); - + /* + * Create the initial parameters + */ + sp = &init_params; sp->negotiate_timeout_msec = SMB_DIRECT_NEGOTIATE_TIMEOUT * 1000; sp->initiator_depth = SMB_DIRECT_CM_INITIATOR_DEPTH; sp->responder_resources = 1; @@ -448,6 +442,18 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) sp->keepalive_interval_msec = SMB_DIRECT_KEEPALIVE_SEND_INTERVAL * 1000; sp->keepalive_timeout_msec = SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT * 1000; + t = kzalloc_obj(*t, KSMBD_DEFAULT_GFP); + if (!t) + return NULL; + sc = &t->socket; + smbdirect_socket_prepare_create(sc, sp, smb_direct_wq); + /* + * from here we operate on the copy. + */ + sp = &sc->parameters; + + INIT_WORK(&sc->disconnect_work, smb_direct_disconnect_rdma_work); + sc->rdma.cm_id = cm_id; cm_id->context = sc; From bbf3559afe5ef7283eaa3112520ce06f73426ee1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 23 Oct 2025 14:40:48 +0200 Subject: [PATCH 097/145] smb: server: make use of smbdirect_socket_set_logging() This will allow the logging to keep working as before, when we move to common functions in the next commits. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 62 ++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 55514ce348ac..1ea494522d86 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -126,6 +126,65 @@ struct smb_direct_transport { struct smbdirect_socket socket; }; +static bool smb_direct_logging_needed(struct smbdirect_socket *sc, + void *private_ptr, + unsigned int lvl, + unsigned int cls) +{ + if (lvl <= SMBDIRECT_LOG_ERR) + return true; + + if (lvl > SMBDIRECT_LOG_INFO) + return false; + + switch (cls) { + /* + * These were more or less also logged before + * the move to common code. + * + * SMBDIRECT_LOG_RDMA_MR was not used, but + * that's client only code and we should + * notice if it's used on the server... + */ + case SMBDIRECT_LOG_RDMA_EVENT: + case SMBDIRECT_LOG_RDMA_SEND: + case SMBDIRECT_LOG_RDMA_RECV: + case SMBDIRECT_LOG_WRITE: + case SMBDIRECT_LOG_READ: + case SMBDIRECT_LOG_NEGOTIATE: + case SMBDIRECT_LOG_OUTGOING: + case SMBDIRECT_LOG_RDMA_RW: + case SMBDIRECT_LOG_RDMA_MR: + return true; + /* + * These were not logged before the move + * to common code. + */ + case SMBDIRECT_LOG_KEEP_ALIVE: + case SMBDIRECT_LOG_INCOMING: + return false; + } + + /* + * Log all unknown messages + */ + return true; +} + +static void smb_direct_logging_vaprintf(struct smbdirect_socket *sc, + const char *func, + unsigned int line, + void *private_ptr, + unsigned int lvl, + unsigned int cls, + struct va_format *vaf) +{ + if (lvl <= SMBDIRECT_LOG_ERR) + pr_err("%pV", vaf); + else + ksmbd_debug(RDMA, "%pV", vaf); +} + #define KSMBD_TRANS(t) (&(t)->transport) #define SMBD_TRANS(t) (container_of(t, \ struct smb_direct_transport, transport)) @@ -447,6 +506,9 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) return NULL; sc = &t->socket; smbdirect_socket_prepare_create(sc, sp, smb_direct_wq); + smbdirect_socket_set_logging(sc, NULL, + smb_direct_logging_needed, + smb_direct_logging_vaprintf); /* * from here we operate on the copy. */ From 33562021f4151c3d18be696c7c55e323716a0a39 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 2 Sep 2025 12:55:04 +0200 Subject: [PATCH 098/145] smb: server: make use of smbdirect_socket_wake_up_all() This is a superset of smb_direct_disconnect_wake_up_all() and calling wake_up_all(&sc->mr_io.ready.wait_queue); and wake_up_all(&sc->mr_io.cleanup.wait_queue); in addition should not matter as it's not used on the server anyway. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 1ea494522d86..8724c7fb0b12 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -298,21 +298,6 @@ static struct smbdirect_recv_io *get_first_reassembly(struct smbdirect_socket *s return NULL; } -static void smb_direct_disconnect_wake_up_all(struct smbdirect_socket *sc) -{ - /* - * Wake up all waiters in all wait queues - * in order to notice the broken connection. - */ - wake_up_all(&sc->status_wait); - wake_up_all(&sc->send_io.bcredits.wait_queue); - wake_up_all(&sc->send_io.lcredits.wait_queue); - wake_up_all(&sc->send_io.credits.wait_queue); - wake_up_all(&sc->send_io.pending.zero_wait_queue); - wake_up_all(&sc->recv_io.reassembly.wait_queue); - wake_up_all(&sc->rw_io.credits.wait_queue); -} - static void smb_direct_disconnect_rdma_work(struct work_struct *work) { struct smbdirect_socket *sc = @@ -370,7 +355,7 @@ static void smb_direct_disconnect_rdma_work(struct work_struct *work) * Wake up all waiters in all wait queues * in order to notice the broken connection. */ - smb_direct_disconnect_wake_up_all(sc); + smbdirect_socket_wake_up_all(sc); } static void @@ -437,7 +422,7 @@ smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc) * Wake up all waiters in all wait queues * in order to notice the broken connection. */ - smb_direct_disconnect_wake_up_all(sc); + smbdirect_socket_wake_up_all(sc); queue_work(sc->workqueue, &sc->disconnect_work); } @@ -563,7 +548,7 @@ static void free_transport(struct smb_direct_transport *t) * Most likely this was already called via * smb_direct_disconnect_rdma_work(), but call it again... */ - smb_direct_disconnect_wake_up_all(sc); + smbdirect_socket_wake_up_all(sc); disable_work_sync(&sc->connect.work); disable_work_sync(&sc->recv_io.posted.refill_work); From 1b1ee1e3ee32115492adc6c746177fca6fc8593b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 17:20:30 +0200 Subject: [PATCH 099/145] smb: server: make use of smbdirect_socket_cleanup_work() Note smbdirect_socket_prepare_create() already calls INIT_WORK() with smbdirect_socket_cleanup_work. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 72 +++------------------------------- 1 file changed, 5 insertions(+), 67 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 8724c7fb0b12..a191df1bd326 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -298,66 +298,6 @@ static struct smbdirect_recv_io *get_first_reassembly(struct smbdirect_socket *s return NULL; } -static void smb_direct_disconnect_rdma_work(struct work_struct *work) -{ - struct smbdirect_socket *sc = - container_of(work, struct smbdirect_socket, disconnect_work); - - if (sc->first_error == 0) - sc->first_error = -ECONNABORTED; - - /* - * make sure this and other work is not queued again - * but here we don't block and avoid - * disable[_delayed]_work_sync() - */ - disable_work(&sc->disconnect_work); - disable_work(&sc->connect.work); - disable_work(&sc->recv_io.posted.refill_work); - disable_delayed_work(&sc->idle.timer_work); - disable_work(&sc->idle.immediate_work); - - switch (sc->status) { - case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: - case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: - case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: - case SMBDIRECT_SOCKET_CONNECTED: - case SMBDIRECT_SOCKET_ERROR: - sc->status = SMBDIRECT_SOCKET_DISCONNECTING; - rdma_disconnect(sc->rdma.cm_id); - break; - - case SMBDIRECT_SOCKET_CREATED: - case SMBDIRECT_SOCKET_LISTENING: - case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: - case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: - case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: - case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: - case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: - case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: - /* - * rdma_accept() never reached - * RDMA_CM_EVENT_ESTABLISHED - */ - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - break; - - case SMBDIRECT_SOCKET_DISCONNECTING: - case SMBDIRECT_SOCKET_DISCONNECTED: - case SMBDIRECT_SOCKET_DESTROYED: - break; - } - - /* - * Wake up all waiters in all wait queues - * in order to notice the broken connection. - */ - smbdirect_socket_wake_up_all(sc); -} - static void smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc) { @@ -499,8 +439,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) */ sp = &sc->parameters; - INIT_WORK(&sc->disconnect_work, smb_direct_disconnect_rdma_work); - sc->rdma.cm_id = cm_id; cm_id->context = sc; @@ -537,7 +475,7 @@ static void free_transport(struct smb_direct_transport *t) disable_work_sync(&sc->disconnect_work); if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) - smb_direct_disconnect_rdma_work(&sc->disconnect_work); + smbdirect_socket_cleanup_work(&sc->disconnect_work); if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); @@ -546,7 +484,7 @@ static void free_transport(struct smb_direct_transport *t) * in order to notice the broken connection. * * Most likely this was already called via - * smb_direct_disconnect_rdma_work(), but call it again... + * smbdirect_socket_cleanup_work(), but call it again... */ smbdirect_socket_wake_up_all(sc); @@ -2036,7 +1974,7 @@ static void smb_direct_shutdown(struct ksmbd_transport *t) ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", sc->rdma.cm_id); - smb_direct_disconnect_rdma_work(&sc->disconnect_work); + smbdirect_socket_cleanup_work(&sc->disconnect_work); } static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, @@ -2078,14 +2016,14 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DISCONNECTED: { sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - smb_direct_disconnect_rdma_work(&sc->disconnect_work); + smbdirect_socket_cleanup_work(&sc->disconnect_work); if (sc->ib.qp) ib_drain_qp(sc->ib.qp); break; } case RDMA_CM_EVENT_CONNECT_ERROR: { sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - smb_direct_disconnect_rdma_work(&sc->disconnect_work); + smbdirect_socket_cleanup_work(&sc->disconnect_work); break; } default: From 0ffbbfdf6a2698d31dc6b38b47fa04ef0cd075a1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 17:31:39 +0200 Subject: [PATCH 100/145] smb: server: make use of smbdirect_socket_schedule_cleanup() This removes smb_direct_disconnect_rdma_connection() which is basically the same as smbdirect_socket_schedule_cleanup(). And we pass more useful errors than -ECONNABORTED if we have them. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 105 +++++---------------------------- 1 file changed, 16 insertions(+), 89 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index a191df1bd326..60ce743a9d35 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -21,8 +21,6 @@ #include #include -#define __SMBDIRECT_SOCKET_DISCONNECT(__sc) smb_direct_disconnect_rdma_connection(__sc) - #include "glob.h" #include "connection.h" #include "smb_common.h" @@ -32,8 +30,6 @@ #include "../common/smbdirect/smbdirect_socket.h" #include "transport_rdma.h" -static void smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc); - /* * This is a temporary solution until all code * is moved to smbdirect_all_c_files.c and we @@ -298,75 +294,6 @@ static struct smbdirect_recv_io *get_first_reassembly(struct smbdirect_socket *s return NULL; } -static void -smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc) -{ - if (sc->first_error == 0) - sc->first_error = -ECONNABORTED; - - /* - * make sure other work (than disconnect_work) is - * not queued again but here we don't block and avoid - * disable[_delayed]_work_sync() - */ - disable_work(&sc->connect.work); - disable_work(&sc->recv_io.posted.refill_work); - disable_work(&sc->idle.immediate_work); - disable_delayed_work(&sc->idle.timer_work); - - switch (sc->status) { - case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: - case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: - case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: - case SMBDIRECT_SOCKET_ERROR: - case SMBDIRECT_SOCKET_DISCONNECTING: - case SMBDIRECT_SOCKET_DISCONNECTED: - case SMBDIRECT_SOCKET_DESTROYED: - /* - * Keep the current error status - */ - break; - - case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: - case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: - sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; - break; - - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: - case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: - sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; - break; - - case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: - case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; - break; - - case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: - case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; - break; - - case SMBDIRECT_SOCKET_CREATED: - case SMBDIRECT_SOCKET_LISTENING: - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - break; - - case SMBDIRECT_SOCKET_CONNECTED: - sc->status = SMBDIRECT_SOCKET_ERROR; - break; - } - - /* - * Wake up all waiters in all wait queues - * in order to notice the broken connection. - */ - smbdirect_socket_wake_up_all(sc); - - queue_work(sc->workqueue, &sc->disconnect_work); -} - static void smb_direct_send_immediate_work(struct work_struct *work) { struct smbdirect_socket *sc = @@ -385,7 +312,7 @@ static void smb_direct_idle_connection_timer(struct work_struct *work) struct smbdirect_socket_parameters *sp = &sc->parameters; if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ETIMEDOUT); return; } @@ -635,7 +562,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) pr_err("Recv error. status='%s (%d)' opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, wc->opcode); - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); } return; } @@ -669,7 +596,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->byte_len < offsetof(struct smbdirect_data_transfer, padding)) { put_recvmsg(sc, recvmsg); - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } @@ -679,7 +606,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->byte_len < data_offset || wc->byte_len < (u64)data_offset + data_length) { put_recvmsg(sc, recvmsg); - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } if (remaining_data_length > sp->max_fragmented_recv_size || @@ -687,7 +614,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) (u64)remaining_data_length + (u64)data_length > (u64)sp->max_fragmented_recv_size) { put_recvmsg(sc, recvmsg); - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } @@ -743,7 +670,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) */ WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); put_recvmsg(sc, recvmsg); - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); } static void smb_direct_negotiate_recv_work(struct work_struct *work); @@ -766,7 +693,7 @@ static void smb_direct_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) pr_err("Negotiate Recv error. status='%s (%d)' opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, wc->opcode); - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); } return; } @@ -785,7 +712,7 @@ static void smb_direct_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) */ if (WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_NEGOTIATE_REQ)) { put_recvmsg(sc, recv_io); - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } @@ -854,12 +781,12 @@ static void smb_direct_negotiate_recv_work(struct work_struct *work) */ recv_io = get_first_reassembly(sc); if (!recv_io) { - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)) { - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; @@ -895,7 +822,7 @@ static int smb_direct_post_recv(struct smbdirect_socket *sc, recvmsg->sge.addr, recvmsg->sge.length, DMA_FROM_DEVICE); recvmsg->sge.length = 0; - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, ret); return ret; } return ret; @@ -1098,7 +1025,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) */ pr_err("unexpected send completion wc->status=%s (%d) wc->opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, wc->opcode); - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } @@ -1119,7 +1046,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) pr_err("Send error. status='%s (%d)', opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, wc->opcode); - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } @@ -1193,7 +1120,7 @@ static int smb_direct_post_send(struct smbdirect_socket *sc, ret = ib_post_send(sc->ib.qp, wr, NULL); if (ret) { pr_err("failed to post send: %d\n", ret); - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, ret); } return ret; } @@ -1791,7 +1718,7 @@ static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, pr_err("read/write error. opcode = %d, status = %s(%d)\n", wc->opcode, ib_wc_status_msg(wc->status), wc->status); if (wc->status != IB_WC_WR_FLUSH_ERR) - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, msg->error); } complete(msg->completion); @@ -2045,7 +1972,7 @@ static void smb_direct_qpair_handler(struct ib_event *event, void *context) switch (event->event) { case IB_EVENT_CQ_ERR: case IB_EVENT_QP_FATAL: - smb_direct_disconnect_rdma_connection(sc); + smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); break; default: break; From 01f26988c8728c5dd993f03b316c32d2cce3b4e3 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 26 Aug 2025 19:16:54 +0200 Subject: [PATCH 101/145] smb: server: make use of smbdirect_connection_{get,put}_recv_io() These are basically copies of {get,put}_receive_buffer() in the client. They are very similar to {get_free,put}_recvmsg() the only logical difference is the updating of the sc->statistics.*. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 71 ++++++++-------------------------- 1 file changed, 17 insertions(+), 54 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 60ce743a9d35..32ece0140502 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -228,43 +228,6 @@ static inline void return (void *)recvmsg->packet; } -static struct -smbdirect_recv_io *get_free_recvmsg(struct smbdirect_socket *sc) -{ - struct smbdirect_recv_io *recvmsg = NULL; - unsigned long flags; - - spin_lock_irqsave(&sc->recv_io.free.lock, flags); - if (!list_empty(&sc->recv_io.free.list)) { - recvmsg = list_first_entry(&sc->recv_io.free.list, - struct smbdirect_recv_io, - list); - list_del(&recvmsg->list); - } - spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); - return recvmsg; -} - -static void put_recvmsg(struct smbdirect_socket *sc, - struct smbdirect_recv_io *recvmsg) -{ - unsigned long flags; - - if (likely(recvmsg->sge.length != 0)) { - ib_dma_unmap_single(sc->ib.dev, - recvmsg->sge.addr, - recvmsg->sge.length, - DMA_FROM_DEVICE); - recvmsg->sge.length = 0; - } - - spin_lock_irqsave(&sc->recv_io.free.lock, flags); - list_add(&recvmsg->list, &sc->recv_io.free.list); - spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); - - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); -} - static void enqueue_reassembly(struct smbdirect_socket *sc, struct smbdirect_recv_io *recvmsg, int data_length) @@ -438,7 +401,7 @@ static void free_transport(struct smb_direct_transport *t) if (recvmsg) { list_del(&recvmsg->list); spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); - put_recvmsg(sc, recvmsg); + smbdirect_connection_put_recv_io(recvmsg); } else { spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); } @@ -557,7 +520,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) sp = &sc->parameters; if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { - put_recvmsg(sc, recvmsg); + smbdirect_connection_put_recv_io(recvmsg); if (wc->status != IB_WC_WR_FLUSH_ERR) { pr_err("Recv error. status='%s (%d)' opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, @@ -595,7 +558,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->byte_len < offsetof(struct smbdirect_data_transfer, padding)) { - put_recvmsg(sc, recvmsg); + smbdirect_connection_put_recv_io(recvmsg); smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } @@ -605,7 +568,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) data_offset = le32_to_cpu(data_transfer->data_offset); if (wc->byte_len < data_offset || wc->byte_len < (u64)data_offset + data_length) { - put_recvmsg(sc, recvmsg); + smbdirect_connection_put_recv_io(recvmsg); smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } @@ -613,7 +576,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) data_length > sp->max_fragmented_recv_size || (u64)remaining_data_length + (u64)data_length > (u64)sp->max_fragmented_recv_size) { - put_recvmsg(sc, recvmsg); + smbdirect_connection_put_recv_io(recvmsg); smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } @@ -656,7 +619,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) enqueue_reassembly(sc, recvmsg, (int)data_length); wake_up(&sc->recv_io.reassembly.wait_queue); } else - put_recvmsg(sc, recvmsg); + smbdirect_connection_put_recv_io(recvmsg); return; } @@ -669,7 +632,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) * This is an internal error! */ WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); - put_recvmsg(sc, recvmsg); + smbdirect_connection_put_recv_io(recvmsg); smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); } @@ -688,7 +651,7 @@ static void smb_direct_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) recv_io->cqe.done = recv_done; if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { - put_recvmsg(sc, recv_io); + smbdirect_connection_put_recv_io(recv_io); if (wc->status != IB_WC_WR_FLUSH_ERR) { pr_err("Negotiate Recv error. status='%s (%d)' opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, @@ -711,7 +674,7 @@ static void smb_direct_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) * This is an internal error! */ if (WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_NEGOTIATE_REQ)) { - put_recvmsg(sc, recv_io); + smbdirect_connection_put_recv_io(recv_io); smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; } @@ -731,7 +694,7 @@ static void smb_direct_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) if (wc->byte_len >= sizeof(struct smbdirect_negotiate_req)) enqueue_reassembly(sc, recv_io, 0); else - put_recvmsg(sc, recv_io); + smbdirect_connection_put_recv_io(recv_io); /* * Some drivers (at least mlx5_ib and irdma in roce mode) @@ -914,7 +877,7 @@ static int smb_direct_read(struct ksmbd_transport *t, char *buf, spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); } queue_removed++; - put_recvmsg(sc, recvmsg); + smbdirect_connection_put_recv_io(recvmsg); offset = 0; } else { offset += to_copy; @@ -958,7 +921,7 @@ static void smb_direct_post_recv_credits(struct work_struct *work) if (atomic_read(&sc->recv_io.credits.count) < sc->recv_io.credits.target) { while (true) { - recvmsg = get_free_recvmsg(sc); + recvmsg = smbdirect_connection_get_recv_io(sc); if (!recvmsg) break; @@ -967,7 +930,7 @@ static void smb_direct_post_recv_credits(struct work_struct *work) ret = smb_direct_post_recv(sc, recvmsg); if (ret) { pr_err("Can't post recv: %d\n", ret); - put_recvmsg(sc, recvmsg); + smbdirect_connection_put_recv_io(recvmsg); break; } credits++; @@ -2105,7 +2068,7 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REQ; - recvmsg = get_free_recvmsg(sc); + recvmsg = smbdirect_connection_get_recv_io(sc); if (!recvmsg) return -ENOMEM; recvmsg->cqe.done = smb_direct_negotiate_recv_done; @@ -2132,7 +2095,7 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) * will unmap it exactly once. */ if (!recv_posted) - put_recvmsg(sc, recvmsg); + smbdirect_connection_put_recv_io(recvmsg); return ret; } @@ -2172,7 +2135,7 @@ static void smb_direct_destroy_pools(struct smbdirect_socket *sc) { struct smbdirect_recv_io *recvmsg; - while ((recvmsg = get_free_recvmsg(sc))) + while ((recvmsg = smbdirect_connection_get_recv_io(sc))) mempool_free(recvmsg, sc->recv_io.mem.pool); mempool_destroy(sc->recv_io.mem.pool); @@ -2518,7 +2481,7 @@ static int smb_direct_prepare(struct ksmbd_transport *t) sc->recv_io.reassembly.queue_length--; list_del(&recvmsg->list); spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); - put_recvmsg(sc, recvmsg); + smbdirect_connection_put_recv_io(recvmsg); if (ret == -ECONNABORTED) return ret; From aa1255e71ffac6868e9db10ac3b6c2c10711afd9 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 27 Aug 2025 16:57:47 +0200 Subject: [PATCH 102/145] smb: server: make use of smbdirect_connection_reassembly_{append,first}_recv_io() These are basically copies of enqueue_reassembly() and get_first_reassembly(). The only difference is that sc->statistics.enqueue_reassembly_queue now updated. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 43 ++++++---------------------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 32ece0140502..bb92000aa338 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -228,35 +228,6 @@ static inline void return (void *)recvmsg->packet; } -static void enqueue_reassembly(struct smbdirect_socket *sc, - struct smbdirect_recv_io *recvmsg, - int data_length) -{ - unsigned long flags; - - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); - list_add_tail(&recvmsg->list, &sc->recv_io.reassembly.list); - sc->recv_io.reassembly.queue_length++; - /* - * Make sure reassembly_data_length is updated after list and - * reassembly_queue_length are updated. On the dequeue side - * reassembly_data_length is checked without a lock to determine - * if reassembly_queue_length and list is up to date - */ - virt_wmb(); - sc->recv_io.reassembly.data_length += data_length; - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); -} - -static struct smbdirect_recv_io *get_first_reassembly(struct smbdirect_socket *sc) -{ - if (!list_empty(&sc->recv_io.reassembly.list)) - return list_first_entry(&sc->recv_io.reassembly.list, - struct smbdirect_recv_io, list); - else - return NULL; -} - static void smb_direct_send_immediate_work(struct work_struct *work) { struct smbdirect_socket *sc = @@ -397,7 +368,7 @@ static void free_transport(struct smb_direct_transport *t) unsigned long flags; spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); - recvmsg = get_first_reassembly(sc); + recvmsg = smbdirect_connection_reassembly_first_recv_io(sc); if (recvmsg) { list_del(&recvmsg->list); spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); @@ -616,7 +587,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) sc->recv_io.credits.target > old_recv_credit_target) queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); - enqueue_reassembly(sc, recvmsg, (int)data_length); + smbdirect_connection_reassembly_append_recv_io(sc, recvmsg, data_length); wake_up(&sc->recv_io.reassembly.wait_queue); } else smbdirect_connection_put_recv_io(recvmsg); @@ -692,7 +663,7 @@ static void smb_direct_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) */ sc->recv_io.reassembly.full_packet_received = true; if (wc->byte_len >= sizeof(struct smbdirect_negotiate_req)) - enqueue_reassembly(sc, recv_io, 0); + smbdirect_connection_reassembly_append_recv_io(sc, recv_io, 0); else smbdirect_connection_put_recv_io(recv_io); @@ -742,7 +713,7 @@ static void smb_direct_negotiate_recv_work(struct work_struct *work) * If smb_direct_negotiate_recv_done() detected an * invalid request we want to disconnect. */ - recv_io = get_first_reassembly(sc); + recv_io = smbdirect_connection_reassembly_first_recv_io(sc); if (!recv_io) { smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); return; @@ -821,7 +792,7 @@ static int smb_direct_read(struct ksmbd_transport *t, char *buf, /* * Need to make sure reassembly_data_length is read before * reading reassembly_queue_length and calling - * get_first_reassembly. This call is lock free + * smbdirect_connection_reassembly_first_recv_io. This call is lock free * as we never read at the end of the queue which are being * updated in SOFTIRQ as more data is received */ @@ -831,7 +802,7 @@ static int smb_direct_read(struct ksmbd_transport *t, char *buf, to_read = size; offset = sc->recv_io.reassembly.first_entry_offset; while (data_read < size) { - recvmsg = get_first_reassembly(sc); + recvmsg = smbdirect_connection_reassembly_first_recv_io(sc); data_transfer = smbdirect_recv_io_payload(recvmsg); data_length = le32_to_cpu(data_transfer->data_length); remaining_data_length = @@ -2432,7 +2403,7 @@ static int smb_direct_prepare(struct ksmbd_transport *t) if (ret <= 0 || sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) return ret < 0 ? ret : -ETIMEDOUT; - recvmsg = get_first_reassembly(sc); + recvmsg = smbdirect_connection_reassembly_first_recv_io(sc); if (!recvmsg) return -ECONNABORTED; From 84d7085e5f2343877e4b0e0a55569f59f7db92a1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 27 Aug 2025 17:25:37 +0200 Subject: [PATCH 103/145] smb: server: make use of smbdirect_connection_idle_timer_work() This is basically a copy of smb_direct_idle_connection_timer(). The only difference is that we had no logging before. Note smbdirect_socket_prepare_create() already calls INIT_DELAYED_WORK(). Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index bb92000aa338..1606fc70810e 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -239,30 +239,6 @@ static void smb_direct_send_immediate_work(struct work_struct *work) smb_direct_post_send_data(sc, NULL, NULL, 0, 0); } -static void smb_direct_idle_connection_timer(struct work_struct *work) -{ - struct smbdirect_socket *sc = - container_of(work, struct smbdirect_socket, idle.timer_work.work); - struct smbdirect_socket_parameters *sp = &sc->parameters; - - if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { - smbdirect_socket_schedule_cleanup(sc, -ETIMEDOUT); - return; - } - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) - return; - - /* - * Now use the keepalive timeout (instead of keepalive interval) - * in order to wait for a response - */ - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, - msecs_to_jiffies(sp->keepalive_timeout_msec)); - queue_work(sc->workqueue, &sc->idle.immediate_work); -} - static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) { struct smb_direct_transport *t; @@ -305,8 +281,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) sc->ib.dev = sc->rdma.cm_id->device; - INIT_DELAYED_WORK(&sc->idle.timer_work, smb_direct_idle_connection_timer); - conn = ksmbd_conn_alloc(); if (!conn) goto err; From c81c66d3c09aa1dfab2137ac6b737a206d228b2f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 28 Aug 2025 17:01:58 +0200 Subject: [PATCH 104/145] smb: server: make use of smbdirect_frwr_is_supported() This is an exact copy of rdma_frwr_is_supported(). Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 1606fc70810e..feb571196824 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -2490,15 +2490,6 @@ static int smb_direct_connect(struct smbdirect_socket *sc) return 0; } -static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) -{ - if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) - return false; - if (attrs->max_fast_reg_page_list_len == 0) - return false; - return true; -} - static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id, struct rdma_cm_event *event) { @@ -2511,7 +2502,7 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id, u8 peer_responder_resources; int ret; - if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { + if (!smbdirect_frwr_is_supported(&new_cm_id->device->attrs)) { ksmbd_debug(RDMA, "Fast Registration Work Requests is not supported. device capabilities=%llx\n", new_cm_id->device->attrs.device_cap_flags); @@ -2703,7 +2694,7 @@ static int smb_direct_ib_client_add(struct ib_device *ib_dev) { struct smb_direct_device *smb_dev; - if (!rdma_frwr_is_supported(&ib_dev->attrs)) + if (!smbdirect_frwr_is_supported(&ib_dev->attrs)) return 0; smb_dev = kzalloc_obj(*smb_dev, KSMBD_DEFAULT_GFP); @@ -2851,7 +2842,7 @@ static bool ksmbd_find_rdma_capable_netdev(struct net_device *netdev) ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); if (ibdev) { - rdma_capable = rdma_frwr_is_supported(&ibdev->attrs); + rdma_capable = smbdirect_frwr_is_supported(&ibdev->attrs); ib_device_put(ibdev); } } From 8ecb32ada10e13d608a80f1112daf03c82fa3683 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 11 Sep 2025 20:12:11 +0200 Subject: [PATCH 105/145] smb: server: make use of smbdirect_connection_{alloc,free}_send_io() These are basically copies of smb_direct_{alloc,free}_sendmsg() just a bit simpler and with the gfp_mask mask abstracted. For now we still use KSMBD_DEFAULT_GFP, which includes __GFP_RETRY_MAYFAIL. The only difference is that we use ib_dma_unmap_page() for all sges, this simplifies the logic and doesn't matter as ib_dma_unmap_single() and ib_dma_unmap_page() both operate on dma_addr_t and dma_unmap_single_attrs() is just an alias for dma_unmap_page_attrs(). We already had such an inconsistency in the client code where we use ib_dma_unmap_single(), while we mapped using ib_dma_map_page(). Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 58 +++++++--------------------------- 1 file changed, 11 insertions(+), 47 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index feb571196824..dab8c9e16e8e 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -271,6 +271,7 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) smbdirect_socket_set_logging(sc, NULL, smb_direct_logging_needed, smb_direct_logging_vaprintf); + sc->send_io.mem.gfp_mask = KSMBD_DEFAULT_GFP; /* * from here we operate on the copy. */ @@ -368,43 +369,6 @@ static void free_transport(struct smb_direct_transport *t) ksmbd_conn_free(KSMBD_TRANS(t)->conn); } -static struct smbdirect_send_io -*smb_direct_alloc_sendmsg(struct smbdirect_socket *sc) -{ - struct smbdirect_send_io *msg; - - msg = mempool_alloc(sc->send_io.mem.pool, KSMBD_DEFAULT_GFP); - if (!msg) - return ERR_PTR(-ENOMEM); - msg->socket = sc; - INIT_LIST_HEAD(&msg->sibling_list); - msg->num_sge = 0; - return msg; -} - -static void smb_direct_free_sendmsg(struct smbdirect_socket *sc, - struct smbdirect_send_io *msg) -{ - int i; - - /* - * The list needs to be empty! - * The caller should take care of it. - */ - WARN_ON_ONCE(!list_empty(&msg->sibling_list)); - - if (msg->num_sge > 0) { - ib_dma_unmap_single(sc->ib.dev, - msg->sge[0].addr, msg->sge[0].length, - DMA_TO_DEVICE); - for (i = 1; i < msg->num_sge; i++) - ib_dma_unmap_page(sc->ib.dev, - msg->sge[i].addr, msg->sge[i].length, - DMA_TO_DEVICE); - } - mempool_free(msg, sc->send_io.mem.pool); -} - static int smb_direct_check_recvmsg(struct smbdirect_recv_io *recvmsg) { struct smbdirect_socket *sc = recvmsg->socket; @@ -942,11 +906,11 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) */ list_for_each_entry_safe(sibling, next, &sendmsg->sibling_list, sibling_list) { list_del_init(&sibling->sibling_list); - smb_direct_free_sendmsg(sc, sibling); + smbdirect_connection_free_send_io(sibling); lcredits += 1; } /* Note this frees wc->wr_cqe, but not wc */ - smb_direct_free_sendmsg(sc, sendmsg); + smbdirect_connection_free_send_io(sendmsg); lcredits += 1; if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { @@ -1089,9 +1053,9 @@ static int smb_direct_flush_send_list(struct smbdirect_socket *sc, list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { list_del_init(&sibling->sibling_list); - smb_direct_free_sendmsg(sc, sibling); + smbdirect_connection_free_send_io(sibling); } - smb_direct_free_sendmsg(sc, last); + smbdirect_connection_free_send_io(last); } release_credit: @@ -1203,7 +1167,7 @@ static int smb_direct_create_header(struct smbdirect_socket *sc, int header_length; int ret; - sendmsg = smb_direct_alloc_sendmsg(sc); + sendmsg = smbdirect_connection_alloc_send_io(sc); if (IS_ERR(sendmsg)) return PTR_ERR(sendmsg); @@ -1246,7 +1210,7 @@ static int smb_direct_create_header(struct smbdirect_socket *sc, DMA_TO_DEVICE); ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr); if (ret) { - smb_direct_free_sendmsg(sc, sendmsg); + smbdirect_connection_free_send_io(sendmsg); return ret; } @@ -1441,7 +1405,7 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, return 0; err: - smb_direct_free_sendmsg(sc, msg); + smbdirect_connection_free_send_io(msg); flush_failed: header_failed: atomic_inc(&sc->send_io.credits.count); @@ -1895,7 +1859,7 @@ static int smb_direct_send_negotiate_response(struct smbdirect_socket *sc, struct smbdirect_negotiate_resp *resp; int ret; - sendmsg = smb_direct_alloc_sendmsg(sc); + sendmsg = smbdirect_connection_alloc_send_io(sc); if (IS_ERR(sendmsg)) return -ENOMEM; @@ -1932,7 +1896,7 @@ static int smb_direct_send_negotiate_response(struct smbdirect_socket *sc, DMA_TO_DEVICE); ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr); if (ret) { - smb_direct_free_sendmsg(sc, sendmsg); + smbdirect_connection_free_send_io(sendmsg); return ret; } @@ -1942,7 +1906,7 @@ static int smb_direct_send_negotiate_response(struct smbdirect_socket *sc, ret = post_sendmsg(sc, NULL, sendmsg); if (ret) { - smb_direct_free_sendmsg(sc, sendmsg); + smbdirect_connection_free_send_io(sendmsg); return ret; } From bb1d5c49d6cdc0be77719123237ead835216f304 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 11 Sep 2025 20:54:31 +0200 Subject: [PATCH 106/145] smb: server: make use of smbdirect_connection_send_io_done() This also wakes up send_io.pending.dec_wait_queue, which is currently always empty in the server, but that might change in future. And we also don't spam the logs on IB_WC_WR_FLUSH_ERR. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 68 +--------------------------------- 1 file changed, 1 insertion(+), 67 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index dab8c9e16e8e..bbb7e95d302f 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -863,72 +863,6 @@ static void smb_direct_post_recv_credits(struct work_struct *work) queue_work(sc->workqueue, &sc->idle.immediate_work); } -static void send_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct smbdirect_send_io *sendmsg, *sibling, *next; - struct smbdirect_socket *sc; - int lcredits = 0; - - sendmsg = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); - sc = sendmsg->socket; - - ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n", - ib_wc_status_msg(wc->status), wc->status, - wc->opcode); - - if (unlikely(!(sendmsg->wr.send_flags & IB_SEND_SIGNALED))) { - /* - * This happens when smbdirect_send_io is a sibling - * before the final message, it is signaled on - * error anyway, so we need to skip - * smbdirect_connection_free_send_io here, - * otherwise is will destroy the memory - * of the siblings too, which will cause - * use after free problems for the others - * triggered from ib_drain_qp(). - */ - if (wc->status != IB_WC_SUCCESS) - goto skip_free; - - /* - * This should not happen! - * But we better just close the - * connection... - */ - pr_err("unexpected send completion wc->status=%s (%d) wc->opcode=%d\n", - ib_wc_status_msg(wc->status), wc->status, wc->opcode); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - return; - } - - /* - * Free possible siblings and then the main send_io - */ - list_for_each_entry_safe(sibling, next, &sendmsg->sibling_list, sibling_list) { - list_del_init(&sibling->sibling_list); - smbdirect_connection_free_send_io(sibling); - lcredits += 1; - } - /* Note this frees wc->wr_cqe, but not wc */ - smbdirect_connection_free_send_io(sendmsg); - lcredits += 1; - - if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { -skip_free: - pr_err("Send error. status='%s (%d)', opcode=%d\n", - ib_wc_status_msg(wc->status), wc->status, - wc->opcode); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - return; - } - - atomic_add(lcredits, &sc->send_io.lcredits.count); - wake_up(&sc->send_io.lcredits.wait_queue); - - if (atomic_dec_and_test(&sc->send_io.pending.count)) - wake_up(&sc->send_io.pending.zero_wait_queue); -} - static int manage_credits_prior_sending(struct smbdirect_socket *sc) { int missing; @@ -1275,7 +1209,7 @@ static int post_sendmsg(struct smbdirect_socket *sc, msg->sge[i].addr, msg->sge[i].length, DMA_TO_DEVICE); - msg->cqe.done = send_done; + msg->cqe.done = smbdirect_connection_send_io_done; msg->wr.opcode = IB_WR_SEND; msg->wr.sg_list = &msg->sge[0]; msg->wr.num_sge = msg->num_sge; From 07aec3a151b732cfa06bd00821a1ed99b8f87c89 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sat, 13 Sep 2025 00:18:50 +0200 Subject: [PATCH 107/145] smb: server: make use of smbdirect_connection_{create,destroy}_mem_pools() This were based on smb_direct_{create,destroy}_pools() in the server. The main logical differences are the following: We now don't use smbdirect_connection_get_recv_io() on cleanup, instead it uses list_for_each_entry_safe()... We don't generate warnings if smbdirect_recv_io payload is copied into userspace buffers. This doesn't happen in the server anyway. And it uses list_add_tail() just to let me feel better when looking at the code... Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 76 ++-------------------------------- 1 file changed, 3 insertions(+), 73 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index bbb7e95d302f..a258e87b6df0 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -215,7 +215,6 @@ static inline int get_buf_page_count(void *buf, int size) (uintptr_t)buf / PAGE_SIZE; } -static void smb_direct_destroy_pools(struct smbdirect_socket *sc); static void smb_direct_post_recv_credits(struct work_struct *work); static int smb_direct_post_send_data(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx, @@ -272,6 +271,7 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) smb_direct_logging_needed, smb_direct_logging_vaprintf); sc->send_io.mem.gfp_mask = KSMBD_DEFAULT_GFP; + sc->recv_io.mem.gfp_mask = KSMBD_DEFAULT_GFP; /* * from here we operate on the copy. */ @@ -365,7 +365,7 @@ static void free_transport(struct smb_direct_transport *t) rdma_destroy_id(sc->rdma.cm_id); } - smb_direct_destroy_pools(sc); + smbdirect_connection_destroy_mem_pools(sc); ksmbd_conn_free(KSMBD_TRANS(t)->conn); } @@ -1974,76 +1974,6 @@ static int smb_direct_init_params(struct smbdirect_socket *sc) return 0; } -static void smb_direct_destroy_pools(struct smbdirect_socket *sc) -{ - struct smbdirect_recv_io *recvmsg; - - while ((recvmsg = smbdirect_connection_get_recv_io(sc))) - mempool_free(recvmsg, sc->recv_io.mem.pool); - - mempool_destroy(sc->recv_io.mem.pool); - sc->recv_io.mem.pool = NULL; - - kmem_cache_destroy(sc->recv_io.mem.cache); - sc->recv_io.mem.cache = NULL; - - mempool_destroy(sc->send_io.mem.pool); - sc->send_io.mem.pool = NULL; - - kmem_cache_destroy(sc->send_io.mem.cache); - sc->send_io.mem.cache = NULL; -} - -static int smb_direct_create_pools(struct smbdirect_socket *sc) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - char name[80]; - int i; - struct smbdirect_recv_io *recvmsg; - - snprintf(name, sizeof(name), "smbdirect_send_io_pool_%p", sc); - sc->send_io.mem.cache = kmem_cache_create(name, - sizeof(struct smbdirect_send_io) + - sizeof(struct smbdirect_negotiate_resp), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!sc->send_io.mem.cache) - return -ENOMEM; - - sc->send_io.mem.pool = mempool_create(sp->send_credit_target, - mempool_alloc_slab, mempool_free_slab, - sc->send_io.mem.cache); - if (!sc->send_io.mem.pool) - goto err; - - snprintf(name, sizeof(name), "smbdirect_recv_io_pool_%p", sc); - sc->recv_io.mem.cache = kmem_cache_create(name, - sizeof(struct smbdirect_recv_io) + - sp->max_recv_size, - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!sc->recv_io.mem.cache) - goto err; - - sc->recv_io.mem.pool = - mempool_create(sp->recv_credit_max, mempool_alloc_slab, - mempool_free_slab, sc->recv_io.mem.cache); - if (!sc->recv_io.mem.pool) - goto err; - - for (i = 0; i < sp->recv_credit_max; i++) { - recvmsg = mempool_alloc(sc->recv_io.mem.pool, KSMBD_DEFAULT_GFP); - if (!recvmsg) - goto err; - recvmsg->socket = sc; - recvmsg->sge.length = 0; - list_add(&recvmsg->list, &sc->recv_io.free.list); - } - - return 0; -err: - smb_direct_destroy_pools(sc); - return -ENOMEM; -} - static u32 smb_direct_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr) { /* @@ -2365,7 +2295,7 @@ static int smb_direct_connect(struct smbdirect_socket *sc) return ret; } - ret = smb_direct_create_pools(sc); + ret = smbdirect_connection_create_mem_pools(sc); if (ret) { pr_err("Can't init RDMA pool: %d\n", ret); return ret; From 8688d7a8c712dc507bf4ae6ee07c1eed536e35a5 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 15 Sep 2025 04:39:13 +0200 Subject: [PATCH 108/145] smb: server: make use of smbdirect_map_sges_from_iter() It will make it easier to move stuff into common code when both client and server use smbdirect_map_sges_from_iter(). Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 55 +++++++++++----------------------- 1 file changed, 18 insertions(+), 37 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index a258e87b6df0..5074b8ff332f 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -1188,16 +1188,6 @@ static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nen return i; } -static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, - struct scatterlist *sg_list, int nentries, - enum dma_data_direction dir, int *npages) -{ - *npages = get_sg_list(buf, size, sg_list, nentries); - if (*npages < 0) - return -EINVAL; - return ib_dma_map_sg(device, sg_list, *npages, dir); -} - static int post_sendmsg(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx, struct smbdirect_send_io *msg) @@ -1241,10 +1231,9 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, struct kvec *iov, int niov, int remaining_data_length) { - int i, j, ret; + int i, ret; struct smbdirect_send_io *msg; int data_length; - struct scatterlist sg[SMBDIRECT_SEND_IO_MAX_SGE - 1]; struct smbdirect_send_batch _send_ctx; int new_credits; @@ -1291,35 +1280,27 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, if (ret) goto header_failed; - for (i = 0; i < niov; i++) { - struct ib_sge *sge; - int sg_cnt; - int npages; + if (data_length) { + struct smbdirect_map_sges extract = { + .num_sge = msg->num_sge, + .max_sge = ARRAY_SIZE(msg->sge), + .sge = msg->sge, + .device = sc->ib.dev, + .local_dma_lkey = sc->ib.pd->local_dma_lkey, + .direction = DMA_TO_DEVICE, + }; + struct iov_iter iter; - sg_init_table(sg, SMBDIRECT_SEND_IO_MAX_SGE - 1); - sg_cnt = get_mapped_sg_list(sc->ib.dev, - iov[i].iov_base, iov[i].iov_len, - sg, SMBDIRECT_SEND_IO_MAX_SGE - 1, - DMA_TO_DEVICE, &npages); - if (sg_cnt <= 0) { - pr_err("failed to map buffer\n"); - ret = -ENOMEM; + iov_iter_kvec(&iter, ITER_SOURCE, iov, niov, data_length); + + ret = smbdirect_map_sges_from_iter(&iter, data_length, &extract); + if (ret < 0) goto err; - } else if (sg_cnt + msg->num_sge > SMBDIRECT_SEND_IO_MAX_SGE) { - pr_err("buffer not fitted into sges\n"); - ret = -E2BIG; - ib_dma_unmap_sg(sc->ib.dev, sg, npages, - DMA_TO_DEVICE); + if (WARN_ON_ONCE(ret != data_length)) { + ret = -EIO; goto err; } - - for (j = 0; j < sg_cnt; j++) { - sge = &msg->sge[msg->num_sge]; - sge->addr = sg_dma_address(&sg[j]); - sge->length = sg_dma_len(&sg[j]); - sge->lkey = sc->ib.pd->local_dma_lkey; - msg->num_sge++; - } + msg->num_sge = extract.num_sge; } ret = post_sendmsg(sc, send_ctx, msg); From ab8e9249e735f8801039f7eef7ca556d65f64b2b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 15 Sep 2025 07:59:20 +0200 Subject: [PATCH 109/145] smb: server: make use of smbdirect_connection_qp_event_handler() This is a copy of smb_direct_qpair_handler()... It will allow more code to be moved to common functions soon. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 5074b8ff332f..b7672558676d 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -1749,23 +1749,6 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, return 0; } -static void smb_direct_qpair_handler(struct ib_event *event, void *context) -{ - struct smbdirect_socket *sc = context; - - ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n", - sc->rdma.cm_id, ib_event_msg(event->event), event->event); - - switch (event->event) { - case IB_EVENT_CQ_ERR: - case IB_EVENT_QP_FATAL: - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - break; - default: - break; - } -} - static int smb_direct_send_negotiate_response(struct smbdirect_socket *sc, int failed) { @@ -2116,7 +2099,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc) * again if max_rdma_ctxs is not 0. */ memset(&qp_attr, 0, sizeof(qp_attr)); - qp_attr.event_handler = smb_direct_qpair_handler; + qp_attr.event_handler = smbdirect_connection_qp_event_handler; qp_attr.qp_context = sc; qp_attr.cap = qp_cap; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; From d5e2bdda493f10ccc8e7c3545f79e2505ee94dff Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 18 Sep 2025 18:38:14 +0200 Subject: [PATCH 110/145] smb: server: make use of smbdirect_connection_negotiate_rdma_resources() It's good to have this logic in a central place, it will allow us share more code soon. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 62 ++++------------------------------ 1 file changed, 7 insertions(+), 55 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index b7672558676d..61c84d99acea 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -2307,66 +2307,18 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id, sc = &t->socket; sp = &sc->parameters; - peer_initiator_depth = event->param.conn.initiator_depth; - peer_responder_resources = event->param.conn.responder_resources; - if (rdma_protocol_iwarp(new_cm_id->device, new_cm_id->port_num) && - event->param.conn.private_data_len == 8) { - /* - * Legacy clients with only iWarp MPA v1 support - * need a private blob in order to negotiate - * the IRD/ORD values. - */ - const __be32 *ird_ord_hdr = event->param.conn.private_data; - u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); - u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); - - /* - * cifs.ko sends the legacy IRD/ORD negotiation - * event if iWarp MPA v2 was used. - * - * Here we check that the values match and only - * mark the client as legacy if they don't match. - */ - if ((u32)event->param.conn.initiator_depth != ird32 || - (u32)event->param.conn.responder_resources != ord32) { - /* - * There are broken clients (old cifs.ko) - * using little endian and also - * struct rdma_conn_param only uses u8 - * for initiator_depth and responder_resources, - * so we truncate the value to U8_MAX. - * - * smb_direct_accept_client() will then - * do the real negotiation in order to - * select the minimum between client and - * server. - */ - ird32 = min_t(u32, ird32, U8_MAX); - ord32 = min_t(u32, ord32, U8_MAX); - - sc->rdma.legacy_iwarp = true; - peer_initiator_depth = (u8)ird32; - peer_responder_resources = (u8)ord32; - } - } - /* * First set what the we as server are able to support */ sp->initiator_depth = min_t(u8, sp->initiator_depth, - new_cm_id->device->attrs.max_qp_rd_atom); + sc->ib.dev->attrs.max_qp_rd_atom); - /* - * negotiate the value by using the minimum - * between client and server if the client provided - * non 0 values. - */ - if (peer_initiator_depth != 0) - sp->initiator_depth = min_t(u8, sp->initiator_depth, - peer_initiator_depth); - if (peer_responder_resources != 0) - sp->responder_resources = min_t(u8, sp->responder_resources, - peer_responder_resources); + peer_initiator_depth = event->param.conn.initiator_depth; + peer_responder_resources = event->param.conn.responder_resources; + smbdirect_connection_negotiate_rdma_resources(sc, + peer_initiator_depth, + peer_responder_resources, + &event->param.conn); ret = smb_direct_connect(sc); if (ret) From 5a2999d7d9c550f265b04e1abf5774e97150d220 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 18 Sep 2025 18:48:40 +0200 Subject: [PATCH 111/145] smb: server: make use of smbdirect_connection_{create,destroy}_qp() It's good a use common code for this and it will allow us to share more code in the next steps. Calling ib_drain_qp() twice is ok. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 217 +-------------------------------- 1 file changed, 6 insertions(+), 211 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 61c84d99acea..5ddf32238fda 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -270,6 +270,7 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) smbdirect_socket_set_logging(sc, NULL, smb_direct_logging_needed, smb_direct_logging_vaprintf); + sc->ib.poll_ctx = IB_POLL_WORKQUEUE; sc->send_io.mem.gfp_mask = KSMBD_DEFAULT_GFP; sc->recv_io.mem.gfp_mask = KSMBD_DEFAULT_GFP; /* @@ -332,11 +333,8 @@ static void free_transport(struct smb_direct_transport *t) if (sc->rdma.cm_id) rdma_lock_handler(sc->rdma.cm_id); - if (sc->ib.qp) { + if (sc->ib.qp) ib_drain_qp(sc->ib.qp); - sc->ib.qp = NULL; - rdma_destroy_qp(sc->rdma.cm_id); - } ksmbd_debug(RDMA, "drain the reassembly queue\n"); do { @@ -354,12 +352,7 @@ static void free_transport(struct smb_direct_transport *t) } while (recvmsg); sc->recv_io.reassembly.data_length = 0; - if (sc->ib.send_cq) - ib_free_cq(sc->ib.send_cq); - if (sc->ib.recv_cq) - ib_free_cq(sc->ib.recv_cq); - if (sc->ib.pd) - ib_dealloc_pd(sc->ib.pd); + smbdirect_connection_destroy_qp(sc); if (sc->rdma.cm_id) { rdma_unlock_handler(sc->rdma.cm_id); rdma_destroy_id(sc->rdma.cm_id); @@ -1938,206 +1931,6 @@ static int smb_direct_init_params(struct smbdirect_socket *sc) return 0; } -static u32 smb_direct_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr) -{ - /* - * This could be split out of rdma_rw_init_qp() - * and be a helper function next to rdma_rw_mr_factor() - * - * We can't check unlikely(rdma_rw_force_mr) here, - * but that is most likely 0 anyway. - */ - u32 factor; - - WARN_ON_ONCE(attr->port_num == 0); - - /* - * Each context needs at least one RDMA READ or WRITE WR. - * - * For some hardware we might need more, eventually we should ask the - * HCA driver for a multiplier here. - */ - factor = 1; - - /* - * If the device needs MRs to perform RDMA READ or WRITE operations, - * we'll need two additional MRs for the registrations and the - * invalidation. - */ - if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd) - factor += 2; /* inv + reg */ - - return factor * attr->cap.max_rdma_ctxs; -} - -static int smb_direct_create_qpair(struct smbdirect_socket *sc) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - int ret; - struct ib_qp_cap qp_cap; - struct ib_qp_init_attr qp_attr; - u32 max_send_wr; - u32 rdma_send_wr; - - /* - * Note that {rdma,ib}_create_qp() will call - * rdma_rw_init_qp() if cap->max_rdma_ctxs is not 0. - * It will adjust cap->max_send_wr to the required - * number of additional WRs for the RDMA RW operations. - * It will cap cap->max_send_wr to the device limit. - * - * +1 for ib_drain_qp - */ - qp_cap.max_send_wr = sp->send_credit_target + 1; - qp_cap.max_recv_wr = sp->recv_credit_max + 1; - qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; - qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; - qp_cap.max_inline_data = 0; - qp_cap.max_rdma_ctxs = sc->rw_io.credits.max; - - /* - * Find out the number of max_send_wr - * after rdma_rw_init_qp() adjusted it. - * - * We only do it on a temporary variable, - * as rdma_create_qp() will trigger - * rdma_rw_init_qp() again. - */ - memset(&qp_attr, 0, sizeof(qp_attr)); - qp_attr.cap = qp_cap; - qp_attr.port_num = sc->rdma.cm_id->port_num; - rdma_send_wr = smb_direct_rdma_rw_send_wrs(sc->ib.dev, &qp_attr); - max_send_wr = qp_cap.max_send_wr + rdma_send_wr; - - if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe || - qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) { - pr_err("Possible CQE overrun: max_send_wr %d\n", - qp_cap.max_send_wr); - pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", - IB_DEVICE_NAME_MAX, - sc->ib.dev->name, - sc->ib.dev->attrs.max_cqe, - sc->ib.dev->attrs.max_qp_wr); - pr_err("consider lowering send_credit_target = %d\n", - sp->send_credit_target); - return -EINVAL; - } - - if (qp_cap.max_rdma_ctxs && - (max_send_wr >= sc->ib.dev->attrs.max_cqe || - max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) { - pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n", - rdma_send_wr, qp_cap.max_send_wr, max_send_wr); - pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", - IB_DEVICE_NAME_MAX, - sc->ib.dev->name, - sc->ib.dev->attrs.max_cqe, - sc->ib.dev->attrs.max_qp_wr); - pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n", - sp->send_credit_target, qp_cap.max_rdma_ctxs); - return -EINVAL; - } - - if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe || - qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) { - pr_err("Possible CQE overrun: max_recv_wr %d\n", - qp_cap.max_recv_wr); - pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", - IB_DEVICE_NAME_MAX, - sc->ib.dev->name, - sc->ib.dev->attrs.max_cqe, - sc->ib.dev->attrs.max_qp_wr); - pr_err("consider lowering receive_credit_max = %d\n", - sp->recv_credit_max); - return -EINVAL; - } - - if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge || - qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) { - pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", - IB_DEVICE_NAME_MAX, - sc->ib.dev->name, - sc->ib.dev->attrs.max_send_sge, - sc->ib.dev->attrs.max_recv_sge); - return -EINVAL; - } - - sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); - if (IS_ERR(sc->ib.pd)) { - pr_err("Can't create RDMA PD\n"); - ret = PTR_ERR(sc->ib.pd); - sc->ib.pd = NULL; - return ret; - } - - sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, - max_send_wr, - IB_POLL_WORKQUEUE); - if (IS_ERR(sc->ib.send_cq)) { - pr_err("Can't create RDMA send CQ\n"); - ret = PTR_ERR(sc->ib.send_cq); - sc->ib.send_cq = NULL; - goto err; - } - - sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, - qp_cap.max_recv_wr, - IB_POLL_WORKQUEUE); - if (IS_ERR(sc->ib.recv_cq)) { - pr_err("Can't create RDMA recv CQ\n"); - ret = PTR_ERR(sc->ib.recv_cq); - sc->ib.recv_cq = NULL; - goto err; - } - - /* - * We reset completely here! - * As the above use was just temporary - * to calc max_send_wr and rdma_send_wr. - * - * rdma_create_qp() will trigger rdma_rw_init_qp() - * again if max_rdma_ctxs is not 0. - */ - memset(&qp_attr, 0, sizeof(qp_attr)); - qp_attr.event_handler = smbdirect_connection_qp_event_handler; - qp_attr.qp_context = sc; - qp_attr.cap = qp_cap; - qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; - qp_attr.qp_type = IB_QPT_RC; - qp_attr.send_cq = sc->ib.send_cq; - qp_attr.recv_cq = sc->ib.recv_cq; - qp_attr.port_num = ~0; - - ret = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); - if (ret) { - pr_err("Can't create RDMA QP: %d\n", ret); - goto err; - } - - sc->ib.qp = sc->rdma.cm_id->qp; - sc->rdma.cm_id->event_handler = smb_direct_cm_handler; - - return 0; -err: - if (sc->ib.qp) { - sc->ib.qp = NULL; - rdma_destroy_qp(sc->rdma.cm_id); - } - if (sc->ib.recv_cq) { - ib_destroy_cq(sc->ib.recv_cq); - sc->ib.recv_cq = NULL; - } - if (sc->ib.send_cq) { - ib_destroy_cq(sc->ib.send_cq); - sc->ib.send_cq = NULL; - } - if (sc->ib.pd) { - ib_dealloc_pd(sc->ib.pd); - sc->ib.pd = NULL; - } - return ret; -} - static int smb_direct_prepare(struct ksmbd_transport *t) { struct smb_direct_transport *st = SMBD_TRANS(t); @@ -2253,6 +2046,8 @@ static int smb_direct_connect(struct smbdirect_socket *sc) struct smbdirect_recv_io *recv_io; int ret; + sc->rdma.cm_id->event_handler = smb_direct_cm_handler; + ret = smb_direct_init_params(sc); if (ret) { pr_err("Can't configure RDMA parameters\n"); @@ -2268,7 +2063,7 @@ static int smb_direct_connect(struct smbdirect_socket *sc) list_for_each_entry(recv_io, &sc->recv_io.free.list, list) recv_io->cqe.done = recv_done; - ret = smb_direct_create_qpair(sc); + ret = smbdirect_connection_create_qp(sc); if (ret) { pr_err("Can't accept RDMA client: %d\n", ret); return ret; From 62782820e85250bc9919621aa242510d300e9093 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 18 Sep 2025 19:52:03 +0200 Subject: [PATCH 112/145] smb: server: make use of smbdirect_connection_post_recv_io() The only difference is that smbdirect_connection_post_recv_io() returns early if the connection is already broken. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 39 ++-------------------------------- 1 file changed, 2 insertions(+), 37 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 5ddf32238fda..17642ba35d8d 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -658,41 +658,6 @@ static void smb_direct_negotiate_recv_work(struct work_struct *work) wake_up(&sc->status_wait); } -static int smb_direct_post_recv(struct smbdirect_socket *sc, - struct smbdirect_recv_io *recvmsg) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct ib_recv_wr wr; - int ret; - - recvmsg->sge.addr = ib_dma_map_single(sc->ib.dev, - recvmsg->packet, - sp->max_recv_size, - DMA_FROM_DEVICE); - ret = ib_dma_mapping_error(sc->ib.dev, recvmsg->sge.addr); - if (ret) - return ret; - recvmsg->sge.length = sp->max_recv_size; - recvmsg->sge.lkey = sc->ib.pd->local_dma_lkey; - - wr.wr_cqe = &recvmsg->cqe; - wr.next = NULL; - wr.sg_list = &recvmsg->sge; - wr.num_sge = 1; - - ret = ib_post_recv(sc->ib.qp, &wr, NULL); - if (ret) { - pr_err("Can't post recv: %d\n", ret); - ib_dma_unmap_single(sc->ib.dev, - recvmsg->sge.addr, recvmsg->sge.length, - DMA_FROM_DEVICE); - recvmsg->sge.length = 0; - smbdirect_socket_schedule_cleanup(sc, ret); - return ret; - } - return ret; -} - static int smb_direct_read(struct ksmbd_transport *t, char *buf, unsigned int size, int unused) { @@ -829,7 +794,7 @@ static void smb_direct_post_recv_credits(struct work_struct *work) recvmsg->first_segment = false; - ret = smb_direct_post_recv(sc, recvmsg); + ret = smbdirect_connection_post_recv_io(recvmsg); if (ret) { pr_err("Can't post recv: %d\n", ret); smbdirect_connection_put_recv_io(recvmsg); @@ -1873,7 +1838,7 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) return -ENOMEM; recvmsg->cqe.done = smb_direct_negotiate_recv_done; - ret = smb_direct_post_recv(sc, recvmsg); + ret = smbdirect_connection_post_recv_io(recvmsg); if (ret) { pr_err("Can't post recv: %d\n", ret); goto out_err; From 8d55169a570944cf68c740ba723987ffcd762728 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 18 Sep 2025 21:50:44 +0200 Subject: [PATCH 113/145] smb: server: make use of smbdirect_connection_recv_io_refill[_work]() This is basically a copy of smb_direct_post_recv_credits(), but there are several improvements compared to the existing function: 1. We calculate the number of missing posted buffers by getting the difference between recv_io.credits.target and recv_io.posted.count. Instead of the difference between recv_io.credits.target and recv_io.credits.count, because recv_io.credits.count is only updated once a message is send to the peer. It was not really a problem before, because we have a fixed number smbdirect_recv_io buffers, so the loop terminated when smbdirect_connection_get_recv_io() returns NULL. But using recv_io.posted.count makes it easier to understand. 2. In order to tell the peer about the newly posted buffer and grant the credits, we only trigger the send immediate when we're not granting only the last possible credit. This is mostly a difference relative to the servers smb_direct_post_recv_credits() implementation, which should avoid useless ping pong messages. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 64 ++++++---------------------------- 1 file changed, 10 insertions(+), 54 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 17642ba35d8d..b03e68bab4b5 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -215,7 +215,6 @@ static inline int get_buf_page_count(void *buf, int size) (uintptr_t)buf / PAGE_SIZE; } -static void smb_direct_post_recv_credits(struct work_struct *work); static int smb_direct_post_send_data(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx, struct kvec *iov, int niov, @@ -778,49 +777,6 @@ static int smb_direct_read(struct ksmbd_transport *t, char *buf, goto again; } -static void smb_direct_post_recv_credits(struct work_struct *work) -{ - struct smbdirect_socket *sc = - container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); - struct smbdirect_recv_io *recvmsg; - int credits = 0; - int ret; - - if (atomic_read(&sc->recv_io.credits.count) < sc->recv_io.credits.target) { - while (true) { - recvmsg = smbdirect_connection_get_recv_io(sc); - if (!recvmsg) - break; - - recvmsg->first_segment = false; - - ret = smbdirect_connection_post_recv_io(recvmsg); - if (ret) { - pr_err("Can't post recv: %d\n", ret); - smbdirect_connection_put_recv_io(recvmsg); - break; - } - credits++; - - atomic_inc(&sc->recv_io.posted.count); - } - } - - atomic_add(credits, &sc->recv_io.credits.available); - - /* - * If the last send credit is waiting for credits - * it can grant we need to wake it up - */ - if (credits && - atomic_read(&sc->send_io.bcredits.count) == 0 && - atomic_read(&sc->send_io.credits.count) == 0) - wake_up(&sc->send_io.credits.wait_queue); - - if (credits) - queue_work(sc->workqueue, &sc->idle.immediate_work); -} - static int manage_credits_prior_sending(struct smbdirect_socket *sc) { int missing; @@ -1986,24 +1942,24 @@ static int smb_direct_prepare(struct ksmbd_transport *t) /* * We negotiated with success, so we need to refill the recv queue. - * We do that with sc->idle.immediate_work still being disabled - * via smbdirect_socket_init(), so that queue_work(sc->workqueue, - * &sc->idle.immediate_work) in smb_direct_post_recv_credits() - * is a no-op. * * The message that grants the credits to the client is * the negotiate response. */ - INIT_WORK(&sc->recv_io.posted.refill_work, smb_direct_post_recv_credits); - smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work); - if (unlikely(sc->first_error)) - return sc->first_error; - INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); + ret = smbdirect_connection_recv_io_refill(sc); + if (ret < 0) + return ret; + ret = 0; respond: ret = smb_direct_send_negotiate_response(sc, ret); + if (ret) + return ret; - return ret; + INIT_WORK(&sc->recv_io.posted.refill_work, smbdirect_connection_recv_io_refill_work); + INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); + + return 0; } static int smb_direct_connect(struct smbdirect_socket *sc) From be0ac9f59f4c7d3399388f8ec90137c5fed1fcd0 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 19 Sep 2025 09:41:16 +0200 Subject: [PATCH 114/145] smb: server: make use of smbdirect_get_buf_page_count() This will allow us to move code into common code between client and server soon. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index b03e68bab4b5..40ae110b6abf 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -209,12 +209,6 @@ unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) return sp->max_read_write_size; } -static inline int get_buf_page_count(void *buf, int size) -{ - return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - - (uintptr_t)buf / PAGE_SIZE; -} - static int smb_direct_post_send_data(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx, struct kvec *iov, int niov, @@ -1000,7 +994,7 @@ static int wait_for_rw_credits(struct smbdirect_socket *sc, int credits) static int calc_rw_credits(struct smbdirect_socket *sc, char *buf, unsigned int len) { - return DIV_ROUND_UP(get_buf_page_count(buf, len), + return DIV_ROUND_UP(smbdirect_get_buf_page_count(buf, len), sc->rw_io.credits.num_pages); } @@ -1077,7 +1071,7 @@ static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nen int offset, len; int i = 0; - if (size <= 0 || nentries < get_buf_page_count(buf, size)) + if (size <= 0 || nentries < smbdirect_get_buf_page_count(buf, size)) return -EINVAL; offset = offset_in_page(buf); @@ -1312,7 +1306,7 @@ static int smb_direct_writev(struct ksmbd_transport *t, v->iov_len = min_t(size_t, iov[iov_idx].iov_len - iov_ofs, possible_bytes); - page_count = get_buf_page_count(v->iov_base, v->iov_len); + page_count = smbdirect_get_buf_page_count(v->iov_base, v->iov_len); if (page_count > possible_vecs) { /* * If the number of pages in the buffer @@ -1341,7 +1335,7 @@ static int smb_direct_writev(struct ksmbd_transport *t, size_t elen = min_t(size_t, v->iov_len - fplen, epages*PAGE_SIZE); v->iov_len = fplen + elen; - page_count = get_buf_page_count(v->iov_base, v->iov_len); + page_count = smbdirect_get_buf_page_count(v->iov_base, v->iov_len); if (WARN_ON_ONCE(page_count > possible_vecs)) { /* * Something went wrong in the above @@ -1506,7 +1500,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, msg->sgt.sgl = &msg->sg_list[0]; ret = sg_alloc_table_chained(&msg->sgt, - get_buf_page_count(desc_buf, desc_buf_len), + smbdirect_get_buf_page_count(desc_buf, desc_buf_len), msg->sg_list, SG_CHUNK_SIZE); if (ret) { ret = -ENOMEM; @@ -1520,7 +1514,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, ret = rdma_rw_ctx_init(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, msg->sgt.sgl, - get_buf_page_count(desc_buf, desc_buf_len), + smbdirect_get_buf_page_count(desc_buf, desc_buf_len), 0, le64_to_cpu(desc[i].offset), le32_to_cpu(desc[i].token), From 0911d32ba20657c1ceafeab83442ef1f24cdff57 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 19 Sep 2025 09:52:35 +0200 Subject: [PATCH 115/145] smb: server: make use of smbdirect_socket_wait_for_credits() This will allow us to share more common code between client and server soon. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 58 ++++++++++++++-------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 40ae110b6abf..3b052ac0df54 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -910,27 +910,6 @@ static int smb_direct_flush_send_list(struct smbdirect_socket *sc, return ret; } -static int wait_for_credits(struct smbdirect_socket *sc, - wait_queue_head_t *waitq, atomic_t *total_credits, - int needed) -{ - int ret; - - do { - if (atomic_sub_return(needed, total_credits) >= 0) - return 0; - - atomic_add(needed, total_credits); - ret = wait_event_interruptible(*waitq, - atomic_read(total_credits) >= needed || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) - return -ENOTCONN; - else if (ret < 0) - return ret; - } while (true); -} static int wait_for_send_bcredit(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx) @@ -940,10 +919,12 @@ static int wait_for_send_bcredit(struct smbdirect_socket *sc, if (send_ctx->credit) return 0; - ret = wait_for_credits(sc, - &sc->send_io.bcredits.wait_queue, - &sc->send_io.bcredits.count, - 1); + ret = smbdirect_socket_wait_for_credits(sc, + SMBDIRECT_SOCKET_CONNECTED, + -ENOTCONN, + &sc->send_io.bcredits.wait_queue, + &sc->send_io.bcredits.count, + 1); if (ret) return ret; @@ -962,10 +943,12 @@ static int wait_for_send_lcredit(struct smbdirect_socket *sc, return ret; } - return wait_for_credits(sc, - &sc->send_io.lcredits.wait_queue, - &sc->send_io.lcredits.count, - 1); + return smbdirect_socket_wait_for_credits(sc, + SMBDIRECT_SOCKET_CONNECTED, + -ENOTCONN, + &sc->send_io.lcredits.wait_queue, + &sc->send_io.lcredits.count, + 1); } static int wait_for_send_credits(struct smbdirect_socket *sc, @@ -980,15 +963,22 @@ static int wait_for_send_credits(struct smbdirect_socket *sc, return ret; } - return wait_for_credits(sc, &sc->send_io.credits.wait_queue, &sc->send_io.credits.count, 1); + return smbdirect_socket_wait_for_credits(sc, + SMBDIRECT_SOCKET_CONNECTED, + -ENOTCONN, + &sc->send_io.credits.wait_queue, + &sc->send_io.credits.count, + 1); } static int wait_for_rw_credits(struct smbdirect_socket *sc, int credits) { - return wait_for_credits(sc, - &sc->rw_io.credits.wait_queue, - &sc->rw_io.credits.count, - credits); + return smbdirect_socket_wait_for_credits(sc, + SMBDIRECT_SOCKET_CONNECTED, + -ENOTCONN, + &sc->rw_io.credits.wait_queue, + &sc->rw_io.credits.count, + credits); } static int calc_rw_credits(struct smbdirect_socket *sc, From 21a72d0900733f19b8b1b846e8318bfe96795636 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 19 Sep 2025 10:23:24 +0200 Subject: [PATCH 116/145] smb: server: make use of functions from smbdirect_rw.c The copied code only got new names, some indentation/formatting changes, some variable names are changed too. They also only use struct smbdirect_socket instead of struct smb_direct_transport. But the logic is still the same. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 236 ++------------------------------- 1 file changed, 11 insertions(+), 225 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 3b052ac0df54..0cb3c7f24edc 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -266,6 +266,7 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) sc->ib.poll_ctx = IB_POLL_WORKQUEUE; sc->send_io.mem.gfp_mask = KSMBD_DEFAULT_GFP; sc->recv_io.mem.gfp_mask = KSMBD_DEFAULT_GFP; + sc->rw_io.mem.gfp_mask = KSMBD_DEFAULT_GFP; /* * from here we operate on the copy. */ @@ -971,23 +972,6 @@ static int wait_for_send_credits(struct smbdirect_socket *sc, 1); } -static int wait_for_rw_credits(struct smbdirect_socket *sc, int credits) -{ - return smbdirect_socket_wait_for_credits(sc, - SMBDIRECT_SOCKET_CONNECTED, - -ENOTCONN, - &sc->rw_io.credits.wait_queue, - &sc->rw_io.credits.count, - credits); -} - -static int calc_rw_credits(struct smbdirect_socket *sc, - char *buf, unsigned int len) -{ - return DIV_ROUND_UP(smbdirect_get_buf_page_count(buf, len), - sc->rw_io.credits.num_pages); -} - static int smb_direct_create_header(struct smbdirect_socket *sc, int size, int remaining_data_length, int new_credits, @@ -1054,38 +1038,6 @@ static int smb_direct_create_header(struct smbdirect_socket *sc, return 0; } -static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries) -{ - bool high = is_vmalloc_addr(buf); - struct page *page; - int offset, len; - int i = 0; - - if (size <= 0 || nentries < smbdirect_get_buf_page_count(buf, size)) - return -EINVAL; - - offset = offset_in_page(buf); - buf -= offset; - while (size > 0) { - len = min_t(int, PAGE_SIZE - offset, size); - if (high) - page = vmalloc_to_page(buf); - else - page = kmap_to_page(buf); - - if (!sg_list) - return -EINVAL; - sg_set_page(sg_list, page, len, offset); - sg_list = sg_next(sg_list); - - buf += PAGE_SIZE; - size -= len; - offset = 0; - i++; - } - return i; -} - static int post_sendmsg(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx, struct smbdirect_send_io *msg) @@ -1379,185 +1331,16 @@ static int smb_direct_writev(struct ksmbd_transport *t, return ret; } -static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, - struct smbdirect_rw_io *msg, - enum dma_data_direction dir) -{ - struct smbdirect_socket *sc = &t->socket; - - rdma_rw_ctx_destroy(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, - msg->sgt.sgl, msg->sgt.nents, dir); - sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); - kfree(msg); -} - -static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, - enum dma_data_direction dir) -{ - struct smbdirect_rw_io *msg = - container_of(wc->wr_cqe, struct smbdirect_rw_io, cqe); - struct smbdirect_socket *sc = msg->socket; - - if (wc->status != IB_WC_SUCCESS) { - msg->error = -EIO; - pr_err("read/write error. opcode = %d, status = %s(%d)\n", - wc->opcode, ib_wc_status_msg(wc->status), wc->status); - if (wc->status != IB_WC_WR_FLUSH_ERR) - smbdirect_socket_schedule_cleanup(sc, msg->error); - } - - complete(msg->completion); -} - -static void read_done(struct ib_cq *cq, struct ib_wc *wc) -{ - read_write_done(cq, wc, DMA_FROM_DEVICE); -} - -static void write_done(struct ib_cq *cq, struct ib_wc *wc) -{ - read_write_done(cq, wc, DMA_TO_DEVICE); -} - -static int smb_direct_rdma_xmit(struct smb_direct_transport *t, - void *buf, int buf_len, - struct smbdirect_buffer_descriptor_v1 *desc, - unsigned int desc_len, - bool is_read) -{ - struct smbdirect_socket *sc = &t->socket; - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_rw_io *msg, *next_msg; - int i, ret; - DECLARE_COMPLETION_ONSTACK(completion); - struct ib_send_wr *first_wr; - LIST_HEAD(msg_list); - char *desc_buf; - int credits_needed; - unsigned int desc_buf_len, desc_num = 0; - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) - return -ENOTCONN; - - if (buf_len > sp->max_read_write_size) - return -EINVAL; - - /* calculate needed credits */ - credits_needed = 0; - desc_buf = buf; - for (i = 0; i < desc_len / sizeof(*desc); i++) { - if (!buf_len) - break; - - desc_buf_len = le32_to_cpu(desc[i].length); - if (!desc_buf_len) - return -EINVAL; - - if (desc_buf_len > buf_len) { - desc_buf_len = buf_len; - desc[i].length = cpu_to_le32(desc_buf_len); - buf_len = 0; - } - - credits_needed += calc_rw_credits(sc, desc_buf, desc_buf_len); - desc_buf += desc_buf_len; - buf_len -= desc_buf_len; - desc_num++; - } - - ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", - str_read_write(is_read), buf_len, credits_needed); - - ret = wait_for_rw_credits(sc, credits_needed); - if (ret < 0) - return ret; - - /* build rdma_rw_ctx for each descriptor */ - desc_buf = buf; - for (i = 0; i < desc_num; i++) { - msg = kzalloc_flex(*msg, sg_list, SG_CHUNK_SIZE, - KSMBD_DEFAULT_GFP); - if (!msg) { - ret = -ENOMEM; - goto out; - } - - desc_buf_len = le32_to_cpu(desc[i].length); - - msg->socket = sc; - msg->cqe.done = is_read ? read_done : write_done; - msg->completion = &completion; - - msg->sgt.sgl = &msg->sg_list[0]; - ret = sg_alloc_table_chained(&msg->sgt, - smbdirect_get_buf_page_count(desc_buf, desc_buf_len), - msg->sg_list, SG_CHUNK_SIZE); - if (ret) { - ret = -ENOMEM; - goto free_msg; - } - - ret = get_sg_list(desc_buf, desc_buf_len, - msg->sgt.sgl, msg->sgt.orig_nents); - if (ret < 0) - goto free_table; - - ret = rdma_rw_ctx_init(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, - msg->sgt.sgl, - smbdirect_get_buf_page_count(desc_buf, desc_buf_len), - 0, - le64_to_cpu(desc[i].offset), - le32_to_cpu(desc[i].token), - is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); - if (ret < 0) { - pr_err("failed to init rdma_rw_ctx: %d\n", ret); - goto free_table; - } - - list_add_tail(&msg->list, &msg_list); - desc_buf += desc_buf_len; - } - - /* concatenate work requests of rdma_rw_ctxs */ - first_wr = NULL; - list_for_each_entry_reverse(msg, &msg_list, list) { - first_wr = rdma_rw_ctx_wrs(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, - &msg->cqe, first_wr); - } - - ret = ib_post_send(sc->ib.qp, first_wr, NULL); - if (ret) { - pr_err("failed to post send wr for RDMA R/W: %d\n", ret); - goto out; - } - - msg = list_last_entry(&msg_list, struct smbdirect_rw_io, list); - wait_for_completion(&completion); - ret = msg->error; -out: - list_for_each_entry_safe(msg, next_msg, &msg_list, list) { - list_del(&msg->list); - smb_direct_free_rdma_rw_msg(t, msg, - is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); - } - atomic_add(credits_needed, &sc->rw_io.credits.count); - wake_up(&sc->rw_io.credits.wait_queue); - return ret; - -free_table: - sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); -free_msg: - kfree(msg); - goto out; -} - static int smb_direct_rdma_write(struct ksmbd_transport *t, void *buf, unsigned int buflen, struct smbdirect_buffer_descriptor_v1 *desc, unsigned int desc_len) { - return smb_direct_rdma_xmit(SMBD_TRANS(t), buf, buflen, - desc, desc_len, false); + struct smb_direct_transport *st = SMBD_TRANS(t); + struct smbdirect_socket *sc = &st->socket; + + return smbdirect_connection_rdma_xmit(sc, buf, buflen, + desc, desc_len, false); } static int smb_direct_rdma_read(struct ksmbd_transport *t, @@ -1565,8 +1348,11 @@ static int smb_direct_rdma_read(struct ksmbd_transport *t, struct smbdirect_buffer_descriptor_v1 *desc, unsigned int desc_len) { - return smb_direct_rdma_xmit(SMBD_TRANS(t), buf, buflen, - desc, desc_len, true); + struct smb_direct_transport *st = SMBD_TRANS(t); + struct smbdirect_socket *sc = &st->socket; + + return smbdirect_connection_rdma_xmit(sc, buf, buflen, + desc, desc_len, true); } static void smb_direct_disconnect(struct ksmbd_transport *t) From a3bf9bfee8370d7a497276c26ee639d1d7e41677 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 13 Oct 2025 16:36:07 +0200 Subject: [PATCH 117/145] smb: server: make use of smbdirect_socket_destroy_sync() This is basically the same logic as before, but we now use common code, which will also be used by the server soon. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 50 +--------------------------------- 1 file changed, 1 insertion(+), 49 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 0cb3c7f24edc..fbc9d31cde05 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -302,57 +302,9 @@ static void smb_direct_free_transport(struct ksmbd_transport *kt) static void free_transport(struct smb_direct_transport *t) { struct smbdirect_socket *sc = &t->socket; - struct smbdirect_recv_io *recvmsg; - disable_work_sync(&sc->disconnect_work); - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) - smbdirect_socket_cleanup_work(&sc->disconnect_work); - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) - wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); + smbdirect_socket_destroy_sync(sc); - /* - * Wake up all waiters in all wait queues - * in order to notice the broken connection. - * - * Most likely this was already called via - * smbdirect_socket_cleanup_work(), but call it again... - */ - smbdirect_socket_wake_up_all(sc); - - disable_work_sync(&sc->connect.work); - disable_work_sync(&sc->recv_io.posted.refill_work); - disable_delayed_work_sync(&sc->idle.timer_work); - disable_work_sync(&sc->idle.immediate_work); - - if (sc->rdma.cm_id) - rdma_lock_handler(sc->rdma.cm_id); - - if (sc->ib.qp) - ib_drain_qp(sc->ib.qp); - - ksmbd_debug(RDMA, "drain the reassembly queue\n"); - do { - unsigned long flags; - - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); - recvmsg = smbdirect_connection_reassembly_first_recv_io(sc); - if (recvmsg) { - list_del(&recvmsg->list); - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); - smbdirect_connection_put_recv_io(recvmsg); - } else { - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); - } - } while (recvmsg); - sc->recv_io.reassembly.data_length = 0; - - smbdirect_connection_destroy_qp(sc); - if (sc->rdma.cm_id) { - rdma_unlock_handler(sc->rdma.cm_id); - rdma_destroy_id(sc->rdma.cm_id); - } - - smbdirect_connection_destroy_mem_pools(sc); ksmbd_conn_free(KSMBD_TRANS(t)->conn); } From 73489efddadc53dbdd4270569c0c00492ace9801 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 13 Oct 2025 18:49:30 +0200 Subject: [PATCH 118/145] smb: server: make use of smbdirect_connection_recvmsg() This is basically the same logic, it just operates on iov_iter_kvec() instead of a raw buffer pointer. This allows us to use common code between client and server. We keep returning -EINTR instead of -ERESTARTSYS if wait_event_interruptible() fails. I don't if this is required, but changing it is a task for another patch. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 128 +++------------------------------ 1 file changed, 11 insertions(+), 117 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index fbc9d31cde05..9f13f96978cd 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -214,12 +214,6 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, struct kvec *iov, int niov, int remaining_data_length); -static inline void -*smbdirect_recv_io_payload(struct smbdirect_recv_io *recvmsg) -{ - return (void *)recvmsg->packet; -} - static void smb_direct_send_immediate_work(struct work_struct *work) { struct smbdirect_socket *sc = @@ -607,121 +601,21 @@ static void smb_direct_negotiate_recv_work(struct work_struct *work) static int smb_direct_read(struct ksmbd_transport *t, char *buf, unsigned int size, int unused) { - struct smbdirect_recv_io *recvmsg; - struct smbdirect_data_transfer *data_transfer; - int to_copy, to_read, data_read, offset; - u32 data_length, remaining_data_length, data_offset; - int rc; struct smb_direct_transport *st = SMBD_TRANS(t); struct smbdirect_socket *sc = &st->socket; + struct msghdr msg = { .msg_flags = 0, }; + struct kvec iov = { + .iov_base = buf, + .iov_len = size, + }; + int ret; -again: - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { - pr_err("disconnected\n"); - return -ENOTCONN; - } + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, size); - /* - * No need to hold the reassembly queue lock all the time as we are - * the only one reading from the front of the queue. The transport - * may add more entries to the back of the queue at the same time - */ - if (sc->recv_io.reassembly.data_length >= size) { - int queue_length; - int queue_removed = 0; - unsigned long flags; - - /* - * Need to make sure reassembly_data_length is read before - * reading reassembly_queue_length and calling - * smbdirect_connection_reassembly_first_recv_io. This call is lock free - * as we never read at the end of the queue which are being - * updated in SOFTIRQ as more data is received - */ - virt_rmb(); - queue_length = sc->recv_io.reassembly.queue_length; - data_read = 0; - to_read = size; - offset = sc->recv_io.reassembly.first_entry_offset; - while (data_read < size) { - recvmsg = smbdirect_connection_reassembly_first_recv_io(sc); - data_transfer = smbdirect_recv_io_payload(recvmsg); - data_length = le32_to_cpu(data_transfer->data_length); - remaining_data_length = - le32_to_cpu(data_transfer->remaining_data_length); - data_offset = le32_to_cpu(data_transfer->data_offset); - - /* - * The upper layer expects RFC1002 length at the - * beginning of the payload. Return it to indicate - * the total length of the packet. This minimize the - * change to upper layer packet processing logic. This - * will be eventually remove when an intermediate - * transport layer is added - */ - if (recvmsg->first_segment && size == 4) { - unsigned int rfc1002_len = - data_length + remaining_data_length; - *((__be32 *)buf) = cpu_to_be32(rfc1002_len); - data_read = 4; - recvmsg->first_segment = false; - ksmbd_debug(RDMA, - "returning rfc1002 length %d\n", - rfc1002_len); - goto read_rfc1002_done; - } - - to_copy = min_t(int, data_length - offset, to_read); - memcpy(buf + data_read, (char *)data_transfer + data_offset + offset, - to_copy); - - /* move on to the next buffer? */ - if (to_copy == data_length - offset) { - queue_length--; - /* - * No need to lock if we are not at the - * end of the queue - */ - if (queue_length) { - list_del(&recvmsg->list); - } else { - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); - list_del(&recvmsg->list); - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); - } - queue_removed++; - smbdirect_connection_put_recv_io(recvmsg); - offset = 0; - } else { - offset += to_copy; - } - - to_read -= to_copy; - data_read += to_copy; - } - - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); - sc->recv_io.reassembly.data_length -= data_read; - sc->recv_io.reassembly.queue_length -= queue_removed; - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); - - sc->recv_io.reassembly.first_entry_offset = offset; - ksmbd_debug(RDMA, - "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", - data_read, sc->recv_io.reassembly.data_length, - sc->recv_io.reassembly.first_entry_offset); -read_rfc1002_done: - return data_read; - } - - ksmbd_debug(RDMA, "wait_event on more data\n"); - rc = wait_event_interruptible(sc->recv_io.reassembly.wait_queue, - sc->recv_io.reassembly.data_length >= size || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (rc) - return -EINTR; - - goto again; + ret = smbdirect_connection_recvmsg(sc, &msg, 0); + if (ret == -ERESTARTSYS) + ret = -EINTR; + return ret; } static int manage_credits_prior_sending(struct smbdirect_socket *sc) From 0a1702e9319f428e2e24a6f4b7109d212296f812 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 15:45:01 +0200 Subject: [PATCH 119/145] smb: server: make use of smbdirect_connection_grant_recv_credits() This is already used by the client too and will help to share more common code. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 45 +++------------------------------- 1 file changed, 4 insertions(+), 41 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 9f13f96978cd..278dca912485 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -618,43 +618,6 @@ static int smb_direct_read(struct ksmbd_transport *t, char *buf, return ret; } -static int manage_credits_prior_sending(struct smbdirect_socket *sc) -{ - int missing; - int available; - int new_credits; - - if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) - return 0; - - missing = (int)sc->recv_io.credits.target - atomic_read(&sc->recv_io.credits.count); - available = atomic_xchg(&sc->recv_io.credits.available, 0); - new_credits = (u16)min3(U16_MAX, missing, available); - if (new_credits <= 0) { - /* - * If credits are available, but not granted - * we need to re-add them again. - */ - if (available) - atomic_add(available, &sc->recv_io.credits.available); - return 0; - } - - if (new_credits < available) { - /* - * Readd the remaining available again. - */ - available -= new_credits; - atomic_add(available, &sc->recv_io.credits.available); - } - - /* - * Remember we granted the credits - */ - atomic_add(new_credits, &sc->recv_io.credits.count); - return new_credits; -} - static int manage_keep_alive_before_sending(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; @@ -931,7 +894,7 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, struct smbdirect_send_io *msg; int data_length; struct smbdirect_send_batch _send_ctx; - int new_credits; + u16 new_credits; if (!send_ctx) { smb_direct_send_ctx_init(&_send_ctx, false, 0); @@ -950,7 +913,7 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, if (ret) goto credit_failed; - new_credits = manage_credits_prior_sending(sc); + new_credits = smbdirect_connection_grant_recv_credits(sc); if (new_credits == 0 && atomic_read(&sc->send_io.credits.count) == 0 && atomic_read(&sc->recv_io.credits.count) == 0) { @@ -964,7 +927,7 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, if (ret < 0) goto credit_failed; - new_credits = manage_credits_prior_sending(sc); + new_credits = smbdirect_connection_grant_recv_credits(sc); } data_length = 0; @@ -1307,7 +1270,7 @@ static int smb_direct_send_negotiate_response(struct smbdirect_socket *sc, resp->reserved = 0; resp->credits_requested = cpu_to_le16(sp->send_credit_target); - resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(sc)); + resp->credits_granted = cpu_to_le16(smbdirect_connection_grant_recv_credits(sc)); resp->max_readwrite_size = cpu_to_le32(sp->max_read_write_size); resp->preferred_send_size = cpu_to_le32(sp->max_send_size); resp->max_receive_size = cpu_to_le32(sp->max_recv_size); From 1421d50ea941c450d089d3b296d308f2b2728f6d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 15:53:04 +0200 Subject: [PATCH 120/145] smb: server: make use of smbdirect_connection_request_keep_alive() This will help to share more common code soon. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 278dca912485..c3d4ce423db3 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -618,23 +618,6 @@ static int smb_direct_read(struct ksmbd_transport *t, char *buf, return ret; } -static int manage_keep_alive_before_sending(struct smbdirect_socket *sc) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - - if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; - /* - * Now use the keepalive timeout (instead of keepalive interval) - * in order to wait for a response - */ - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, - msecs_to_jiffies(sp->keepalive_timeout_msec)); - return 1; - } - return 0; -} - static int smb_direct_post_send(struct smbdirect_socket *sc, struct ib_send_wr *wr) { @@ -802,7 +785,7 @@ static int smb_direct_create_header(struct smbdirect_socket *sc, packet->credits_granted = cpu_to_le16(new_credits); packet->flags = 0; - if (manage_keep_alive_before_sending(sc)) + if (smbdirect_connection_request_keep_alive(sc)) packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); packet->reserved = 0; From 0184d2b386f836925ff2f9b4e6d4f9a8048cf58f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 16:51:42 +0200 Subject: [PATCH 121/145] smb: server: move iov_iter_kvec() out of smb_direct_post_send_data() This will allow us to make the code more generic in order to move it to common with the client. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 38 ++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index c3d4ce423db3..fa4f8f9ea11b 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -211,8 +211,8 @@ unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) static int smb_direct_post_send_data(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx, - struct kvec *iov, int niov, - int remaining_data_length); + struct iov_iter *iter, + size_t *remaining_data_length); static void smb_direct_send_immediate_work(struct work_struct *work) { @@ -222,7 +222,7 @@ static void smb_direct_send_immediate_work(struct work_struct *work) if (sc->status != SMBDIRECT_SOCKET_CONNECTED) return; - smb_direct_post_send_data(sc, NULL, NULL, 0, 0); + smb_direct_post_send_data(sc, NULL, NULL, NULL); } static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) @@ -870,12 +870,13 @@ static int post_sendmsg(struct smbdirect_socket *sc, static int smb_direct_post_send_data(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx, - struct kvec *iov, int niov, - int remaining_data_length) + struct iov_iter *iter, + size_t *_remaining_data_length) { - int i, ret; + int ret; struct smbdirect_send_io *msg; - int data_length; + u32 remaining_data_length = 0; + u32 data_length = 0; struct smbdirect_send_batch _send_ctx; u16 new_credits; @@ -913,16 +914,20 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, new_credits = smbdirect_connection_grant_recv_credits(sc); } - data_length = 0; - for (i = 0; i < niov; i++) - data_length += iov[i].iov_len; + if (iter) + data_length = iov_iter_count(iter); + + if (_remaining_data_length) { + *_remaining_data_length -= data_length; + remaining_data_length = *_remaining_data_length; + } ret = smb_direct_create_header(sc, data_length, remaining_data_length, new_credits, &msg); if (ret) goto header_failed; - if (data_length) { + if (iter) { struct smbdirect_map_sges extract = { .num_sge = msg->num_sge, .max_sge = ARRAY_SIZE(msg->sge), @@ -931,11 +936,8 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, .local_dma_lkey = sc->ib.pd->local_dma_lkey, .direction = DMA_TO_DEVICE, }; - struct iov_iter iter; - iov_iter_kvec(&iter, ITER_SOURCE, iov, niov, data_length); - - ret = smbdirect_map_sges_from_iter(&iter, data_length, &extract); + ret = smbdirect_map_sges_from_iter(iter, data_length, &extract); if (ret < 0) goto err; if (WARN_ON_ONCE(ret != data_length)) { @@ -1011,6 +1013,7 @@ static int smb_direct_writev(struct ksmbd_transport *t, size_t possible_vecs; size_t bytes = 0; size_t nvecs = 0; + struct iov_iter iter; /* * For the last message remaining_data_length should be @@ -1091,11 +1094,10 @@ static int smb_direct_writev(struct ksmbd_transport *t, } } - remaining_data_length -= bytes; + iov_iter_kvec(&iter, ITER_SOURCE, vecs, nvecs, bytes); ret = smb_direct_post_send_data(sc, &send_ctx, - vecs, nvecs, - remaining_data_length); + &iter, &remaining_data_length); if (unlikely(ret)) { error = ret; goto done; From 0af87a0a31668d4a0dc8d8140fb51da594935eb4 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 17:24:28 +0200 Subject: [PATCH 122/145] smb: server: inline smb_direct_create_header() into smb_direct_post_send_data() The point is that ib_dma_map_single() is done first, but the 'Fill in the packet header' will be done after smbdirect_map_sges_from_iter(). This will simplify further changes in order to share common code with the client. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 126 +++++++++++++++------------------ 1 file changed, 56 insertions(+), 70 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index fa4f8f9ea11b..add4b6a27fac 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -764,72 +764,6 @@ static int wait_for_send_credits(struct smbdirect_socket *sc, 1); } -static int smb_direct_create_header(struct smbdirect_socket *sc, - int size, int remaining_data_length, - int new_credits, - struct smbdirect_send_io **sendmsg_out) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_send_io *sendmsg; - struct smbdirect_data_transfer *packet; - int header_length; - int ret; - - sendmsg = smbdirect_connection_alloc_send_io(sc); - if (IS_ERR(sendmsg)) - return PTR_ERR(sendmsg); - - /* Fill in the packet header */ - packet = (struct smbdirect_data_transfer *)sendmsg->packet; - packet->credits_requested = cpu_to_le16(sp->send_credit_target); - packet->credits_granted = cpu_to_le16(new_credits); - - packet->flags = 0; - if (smbdirect_connection_request_keep_alive(sc)) - packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); - - packet->reserved = 0; - if (!size) - packet->data_offset = 0; - else - packet->data_offset = cpu_to_le32(24); - packet->data_length = cpu_to_le32(size); - packet->remaining_data_length = cpu_to_le32(remaining_data_length); - packet->padding = 0; - - ksmbd_debug(RDMA, - "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", - le16_to_cpu(packet->credits_requested), - le16_to_cpu(packet->credits_granted), - le32_to_cpu(packet->data_offset), - le32_to_cpu(packet->data_length), - le32_to_cpu(packet->remaining_data_length)); - - /* Map the packet to DMA */ - header_length = sizeof(struct smbdirect_data_transfer); - /* If this is a packet without payload, don't send padding */ - if (!size) - header_length = - offsetof(struct smbdirect_data_transfer, padding); - - sendmsg->sge[0].addr = ib_dma_map_single(sc->ib.dev, - (void *)packet, - header_length, - DMA_TO_DEVICE); - ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr); - if (ret) { - smbdirect_connection_free_send_io(sendmsg); - return ret; - } - - sendmsg->num_sge = 1; - sendmsg->sge[0].length = header_length; - sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey; - - *sendmsg_out = sendmsg; - return 0; -} - static int post_sendmsg(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx, struct smbdirect_send_io *msg) @@ -873,13 +807,23 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, struct iov_iter *iter, size_t *_remaining_data_length) { + const struct smbdirect_socket_parameters *sp = &sc->parameters; int ret; struct smbdirect_send_io *msg; + struct smbdirect_data_transfer *packet; + size_t header_length; u32 remaining_data_length = 0; u32 data_length = 0; struct smbdirect_send_batch _send_ctx; u16 new_credits; + if (iter) { + header_length = sizeof(struct smbdirect_data_transfer); + } else { + /* If this is a packet without payload, don't send padding */ + header_length = offsetof(struct smbdirect_data_transfer, padding); + } + if (!send_ctx) { smb_direct_send_ctx_init(&_send_ctx, false, 0); send_ctx = &_send_ctx; @@ -922,10 +866,24 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, remaining_data_length = *_remaining_data_length; } - ret = smb_direct_create_header(sc, data_length, remaining_data_length, - new_credits, &msg); + msg = smbdirect_connection_alloc_send_io(sc); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto alloc_failed; + } + + /* Map the packet to DMA */ + msg->sge[0].addr = ib_dma_map_single(sc->ib.dev, + msg->packet, + header_length, + DMA_TO_DEVICE); + ret = ib_dma_mapping_error(sc->ib.dev, msg->sge[0].addr); if (ret) - goto header_failed; + goto err; + + msg->sge[0].length = header_length; + msg->sge[0].lkey = sc->ib.pd->local_dma_lkey; + msg->num_sge = 1; if (iter) { struct smbdirect_map_sges extract = { @@ -947,6 +905,34 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, msg->num_sge = extract.num_sge; } + /* Fill in the packet header */ + packet = (struct smbdirect_data_transfer *)msg->packet; + packet->credits_requested = cpu_to_le16(sp->send_credit_target); + new_credits = smbdirect_connection_grant_recv_credits(sc); + packet->credits_granted = cpu_to_le16(new_credits); + + packet->flags = 0; + if (smbdirect_connection_request_keep_alive(sc)) + packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); + + packet->reserved = 0; + if (!data_length) + packet->data_offset = 0; + else + packet->data_offset = cpu_to_le32(24); + packet->data_length = cpu_to_le32(data_length); + packet->remaining_data_length = cpu_to_le32(remaining_data_length); + packet->padding = 0; + + ksmbd_debug(RDMA, + "credits_req=%u credits_granted=%u flags=0x%x ofs=%u len=%u remaining=%u\n", + le16_to_cpu(packet->credits_requested), + le16_to_cpu(packet->credits_granted), + le16_to_cpu(packet->flags), + le32_to_cpu(packet->data_offset), + le32_to_cpu(packet->data_length), + le32_to_cpu(packet->remaining_data_length)); + ret = post_sendmsg(sc, send_ctx, msg); if (ret) goto err; @@ -966,7 +952,7 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, err: smbdirect_connection_free_send_io(msg); flush_failed: -header_failed: +alloc_failed: atomic_inc(&sc->send_io.credits.count); credit_failed: atomic_inc(&sc->send_io.lcredits.count); From da20536c508c0f511cf20ceef6757ea4861bf547 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 17:58:16 +0200 Subject: [PATCH 123/145] smb: server: let smbdirect_map_sges_from_iter() truncate the message boundary smbdirect_map_sges_from_iter() already handles the case that only a limited number of sges are available. Its return value is data_length and the remaining bytes in the iter are remaining_data_length. This is now much easier and will allow us to share more code with the client soon. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 151 +++++++-------------------------- 1 file changed, 31 insertions(+), 120 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index add4b6a27fac..adec517e91b5 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -212,7 +212,7 @@ unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) static int smb_direct_post_send_data(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx, struct iov_iter *iter, - size_t *remaining_data_length); + u32 remaining_data_length); static void smb_direct_send_immediate_work(struct work_struct *work) { @@ -222,7 +222,7 @@ static void smb_direct_send_immediate_work(struct work_struct *work) if (sc->status != SMBDIRECT_SOCKET_CONNECTED) return; - smb_direct_post_send_data(sc, NULL, NULL, NULL); + smb_direct_post_send_data(sc, NULL, NULL, 0); } static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) @@ -805,23 +805,27 @@ static int post_sendmsg(struct smbdirect_socket *sc, static int smb_direct_post_send_data(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx, struct iov_iter *iter, - size_t *_remaining_data_length) + u32 remaining_data_length) { const struct smbdirect_socket_parameters *sp = &sc->parameters; int ret; struct smbdirect_send_io *msg; struct smbdirect_data_transfer *packet; size_t header_length; - u32 remaining_data_length = 0; u32 data_length = 0; struct smbdirect_send_batch _send_ctx; u16 new_credits; if (iter) { header_length = sizeof(struct smbdirect_data_transfer); + if (WARN_ON_ONCE(remaining_data_length == 0 || + iov_iter_count(iter) > remaining_data_length)) + return -EINVAL; } else { /* If this is a packet without payload, don't send padding */ header_length = offsetof(struct smbdirect_data_transfer, padding); + if (WARN_ON_ONCE(remaining_data_length)) + return -EINVAL; } if (!send_ctx) { @@ -858,14 +862,6 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, new_credits = smbdirect_connection_grant_recv_credits(sc); } - if (iter) - data_length = iov_iter_count(iter); - - if (_remaining_data_length) { - *_remaining_data_length -= data_length; - remaining_data_length = *_remaining_data_length; - } - msg = smbdirect_connection_alloc_send_io(sc); if (IS_ERR(msg)) { ret = PTR_ERR(msg); @@ -894,14 +890,14 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, .local_dma_lkey = sc->ib.pd->local_dma_lkey, .direction = DMA_TO_DEVICE, }; + size_t payload_len = umin(iov_iter_count(iter), + sp->max_send_size - sizeof(*packet)); - ret = smbdirect_map_sges_from_iter(iter, data_length, &extract); + ret = smbdirect_map_sges_from_iter(iter, payload_len, &extract); if (ret < 0) goto err; - if (WARN_ON_ONCE(ret != data_length)) { - ret = -EIO; - goto err; - } + data_length = ret; + remaining_data_length -= data_length; msg->num_sge = extract.num_sge; } @@ -970,13 +966,9 @@ static int smb_direct_writev(struct ksmbd_transport *t, struct smb_direct_transport *st = SMBD_TRANS(t); struct smbdirect_socket *sc = &st->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; - size_t remaining_data_length; - size_t iov_idx; - size_t iov_ofs; - size_t max_iov_size = sp->max_send_size - - sizeof(struct smbdirect_data_transfer); int ret; struct smbdirect_send_batch send_ctx; + struct iov_iter iter; int error = 0; if (sc->status != SMBDIRECT_SOCKET_CONNECTED) @@ -985,112 +977,31 @@ static int smb_direct_writev(struct ksmbd_transport *t, //FIXME: skip RFC1002 header.. if (WARN_ON_ONCE(niovs <= 1 || iov[0].iov_len != 4)) return -EINVAL; - buflen -= 4; - iov_idx = 1; - iov_ofs = 0; + iov_iter_kvec(&iter, ITER_SOURCE, iov, niovs, buflen); + iov_iter_advance(&iter, 4); - remaining_data_length = buflen; - ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen); + /* + * The size must fit into the negotiated + * fragmented send size. + */ + if (iov_iter_count(&iter) > sp->max_fragmented_send_size) + return -EMSGSIZE; + + ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%zu\n", + iov_iter_count(&iter)); smb_direct_send_ctx_init(&send_ctx, need_invalidate, remote_key); - while (remaining_data_length) { - struct kvec vecs[SMBDIRECT_SEND_IO_MAX_SGE - 1]; /* minus smbdirect hdr */ - size_t possible_bytes = max_iov_size; - size_t possible_vecs; - size_t bytes = 0; - size_t nvecs = 0; - struct iov_iter iter; - - /* - * For the last message remaining_data_length should be - * have been 0 already! - */ - if (WARN_ON_ONCE(iov_idx >= niovs)) { - error = -EINVAL; - goto done; - } - - /* - * We have 2 factors which limit the arguments we pass - * to smb_direct_post_send_data(): - * - * 1. The number of supported sges for the send, - * while one is reserved for the smbdirect header. - * And we currently need one SGE per page. - * 2. The number of negotiated payload bytes per send. - */ - possible_vecs = min_t(size_t, ARRAY_SIZE(vecs), niovs - iov_idx); - - while (iov_idx < niovs && possible_vecs && possible_bytes) { - struct kvec *v = &vecs[nvecs]; - int page_count; - - v->iov_base = ((u8 *)iov[iov_idx].iov_base) + iov_ofs; - v->iov_len = min_t(size_t, - iov[iov_idx].iov_len - iov_ofs, - possible_bytes); - page_count = smbdirect_get_buf_page_count(v->iov_base, v->iov_len); - if (page_count > possible_vecs) { - /* - * If the number of pages in the buffer - * is to much (because we currently require - * one SGE per page), we need to limit the - * length. - * - * We know possible_vecs is at least 1, - * so we always keep the first page. - * - * We need to calculate the number extra - * pages (epages) we can also keep. - * - * We calculate the number of bytes in the - * first page (fplen), this should never be - * larger than v->iov_len because page_count is - * at least 2, but adding a limitation feels - * better. - * - * Then we calculate the number of bytes (elen) - * we can keep for the extra pages. - */ - size_t epages = possible_vecs - 1; - size_t fpofs = offset_in_page(v->iov_base); - size_t fplen = min_t(size_t, PAGE_SIZE - fpofs, v->iov_len); - size_t elen = min_t(size_t, v->iov_len - fplen, epages*PAGE_SIZE); - - v->iov_len = fplen + elen; - page_count = smbdirect_get_buf_page_count(v->iov_base, v->iov_len); - if (WARN_ON_ONCE(page_count > possible_vecs)) { - /* - * Something went wrong in the above - * logic... - */ - error = -EINVAL; - goto done; - } - } - possible_vecs -= page_count; - nvecs += 1; - possible_bytes -= v->iov_len; - bytes += v->iov_len; - - iov_ofs += v->iov_len; - if (iov_ofs >= iov[iov_idx].iov_len) { - iov_idx += 1; - iov_ofs = 0; - } - } - - iov_iter_kvec(&iter, ITER_SOURCE, vecs, nvecs, bytes); - - ret = smb_direct_post_send_data(sc, &send_ctx, - &iter, &remaining_data_length); + while (iov_iter_count(&iter)) { + ret = smb_direct_post_send_data(sc, + &send_ctx, + &iter, + iov_iter_count(&iter)); if (unlikely(ret)) { error = ret; - goto done; + break; } } -done: ret = smb_direct_flush_send_list(sc, &send_ctx, true); if (unlikely(!ret && error)) ret = error; From 08ffdf0c416849615e8bc935839557429ec24194 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 19:25:12 +0200 Subject: [PATCH 124/145] smb: server: split out smb_direct_send_iter() out of smb_direct_writev() This will help to move to common code in future. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 47 +++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index adec517e91b5..29281005a30e 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -959,43 +959,47 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, return ret; } -static int smb_direct_writev(struct ksmbd_transport *t, - struct kvec *iov, int niovs, int buflen, - bool need_invalidate, unsigned int remote_key) +static int smb_direct_send_iter(struct smbdirect_socket *sc, + struct iov_iter *iter, + bool need_invalidate, + unsigned int remote_key) { - struct smb_direct_transport *st = SMBD_TRANS(t); - struct smbdirect_socket *sc = &st->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; int ret; struct smbdirect_send_batch send_ctx; - struct iov_iter iter; int error = 0; + __be32 hdr; if (sc->status != SMBDIRECT_SOCKET_CONNECTED) return -ENOTCONN; - //FIXME: skip RFC1002 header.. - if (WARN_ON_ONCE(niovs <= 1 || iov[0].iov_len != 4)) + /* + * For now we expect the iter to have the full + * message, including a 4 byte length header. + */ + if (iov_iter_count(iter) <= 4) + return -EINVAL; + if (!copy_from_iter_full(&hdr, sizeof(hdr), iter)) + return -EFAULT; + if (iov_iter_count(iter) != be32_to_cpu(hdr)) return -EINVAL; - iov_iter_kvec(&iter, ITER_SOURCE, iov, niovs, buflen); - iov_iter_advance(&iter, 4); /* * The size must fit into the negotiated * fragmented send size. */ - if (iov_iter_count(&iter) > sp->max_fragmented_send_size) + if (iov_iter_count(iter) > sp->max_fragmented_send_size) return -EMSGSIZE; ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%zu\n", - iov_iter_count(&iter)); + iov_iter_count(iter)); smb_direct_send_ctx_init(&send_ctx, need_invalidate, remote_key); - while (iov_iter_count(&iter)) { + while (iov_iter_count(iter)) { ret = smb_direct_post_send_data(sc, &send_ctx, - &iter, - iov_iter_count(&iter)); + iter, + iov_iter_count(iter)); if (unlikely(ret)) { error = ret; break; @@ -1022,6 +1026,19 @@ static int smb_direct_writev(struct ksmbd_transport *t, return ret; } +static int smb_direct_writev(struct ksmbd_transport *t, + struct kvec *iov, int niovs, int buflen, + bool need_invalidate, unsigned int remote_key) +{ + struct smb_direct_transport *st = SMBD_TRANS(t); + struct smbdirect_socket *sc = &st->socket; + struct iov_iter iter; + + iov_iter_kvec(&iter, ITER_SOURCE, iov, niovs, buflen); + + return smb_direct_send_iter(sc, &iter, need_invalidate, remote_key); +} + static int smb_direct_rdma_write(struct ksmbd_transport *t, void *buf, unsigned int buflen, struct smbdirect_buffer_descriptor_v1 *desc, From c6b077efbc39e0ad6c20733693671ab4a8dbba18 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 20:20:02 +0200 Subject: [PATCH 125/145] smb: server: let smb_direct_post_send_data() return data_length This make it easier moving to common code shared with the client. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 29281005a30e..b43582a453e9 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -944,7 +944,7 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, goto flush_failed; } - return 0; + return data_length; err: smbdirect_connection_free_send_io(msg); flush_failed: @@ -1000,7 +1000,7 @@ static int smb_direct_send_iter(struct smbdirect_socket *sc, &send_ctx, iter, iov_iter_count(iter)); - if (unlikely(ret)) { + if (unlikely(ret < 0)) { error = ret; break; } From 4b4c21a7d2204bda49aa9772d407ba1264727d6d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 22:16:56 +0200 Subject: [PATCH 126/145] smb: server: make use of smbdirect_connection_send_iter() and related functions This makes use of common code for sending messages, this will allow to make more use of common code in the next commits. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 431 +-------------------------------- 1 file changed, 4 insertions(+), 427 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index b43582a453e9..2ecb279476c1 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -209,22 +209,6 @@ unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) return sp->max_read_write_size; } -static int smb_direct_post_send_data(struct smbdirect_socket *sc, - struct smbdirect_send_batch *send_ctx, - struct iov_iter *iter, - u32 remaining_data_length); - -static void smb_direct_send_immediate_work(struct work_struct *work) -{ - struct smbdirect_socket *sc = - container_of(work, struct smbdirect_socket, idle.immediate_work); - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) - return; - - smb_direct_post_send_data(sc, NULL, NULL, 0); -} - static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) { struct smb_direct_transport *t; @@ -618,414 +602,6 @@ static int smb_direct_read(struct ksmbd_transport *t, char *buf, return ret; } -static int smb_direct_post_send(struct smbdirect_socket *sc, - struct ib_send_wr *wr) -{ - int ret; - - atomic_inc(&sc->send_io.pending.count); - ret = ib_post_send(sc->ib.qp, wr, NULL); - if (ret) { - pr_err("failed to post send: %d\n", ret); - smbdirect_socket_schedule_cleanup(sc, ret); - } - return ret; -} - -static void smb_direct_send_ctx_init(struct smbdirect_send_batch *send_ctx, - bool need_invalidate_rkey, - unsigned int remote_key) -{ - INIT_LIST_HEAD(&send_ctx->msg_list); - send_ctx->wr_cnt = 0; - send_ctx->need_invalidate_rkey = need_invalidate_rkey; - send_ctx->remote_key = remote_key; - send_ctx->credit = 0; -} - -static int smb_direct_flush_send_list(struct smbdirect_socket *sc, - struct smbdirect_send_batch *send_ctx, - bool is_last) -{ - struct smbdirect_send_io *first, *last; - int ret = 0; - - if (list_empty(&send_ctx->msg_list)) - goto release_credit; - - first = list_first_entry(&send_ctx->msg_list, - struct smbdirect_send_io, - sibling_list); - last = list_last_entry(&send_ctx->msg_list, - struct smbdirect_send_io, - sibling_list); - - if (send_ctx->need_invalidate_rkey) { - first->wr.opcode = IB_WR_SEND_WITH_INV; - first->wr.ex.invalidate_rkey = send_ctx->remote_key; - send_ctx->need_invalidate_rkey = false; - send_ctx->remote_key = 0; - } - - last->wr.send_flags = IB_SEND_SIGNALED; - last->wr.wr_cqe = &last->cqe; - - /* - * Remove last from send_ctx->msg_list - * and splice the rest of send_ctx->msg_list - * to last->sibling_list. - * - * send_ctx->msg_list is a valid empty list - * at the end. - */ - list_del_init(&last->sibling_list); - list_splice_tail_init(&send_ctx->msg_list, &last->sibling_list); - send_ctx->wr_cnt = 0; - - ret = smb_direct_post_send(sc, &first->wr); - if (ret) { - struct smbdirect_send_io *sibling, *next; - - list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { - list_del_init(&sibling->sibling_list); - smbdirect_connection_free_send_io(sibling); - } - smbdirect_connection_free_send_io(last); - } - -release_credit: - if (is_last && !ret && send_ctx->credit) { - atomic_add(send_ctx->credit, &sc->send_io.bcredits.count); - send_ctx->credit = 0; - wake_up(&sc->send_io.bcredits.wait_queue); - } - - return ret; -} - - -static int wait_for_send_bcredit(struct smbdirect_socket *sc, - struct smbdirect_send_batch *send_ctx) -{ - int ret; - - if (send_ctx->credit) - return 0; - - ret = smbdirect_socket_wait_for_credits(sc, - SMBDIRECT_SOCKET_CONNECTED, - -ENOTCONN, - &sc->send_io.bcredits.wait_queue, - &sc->send_io.bcredits.count, - 1); - if (ret) - return ret; - - send_ctx->credit = 1; - return 0; -} - -static int wait_for_send_lcredit(struct smbdirect_socket *sc, - struct smbdirect_send_batch *send_ctx) -{ - if (send_ctx && (atomic_read(&sc->send_io.lcredits.count) <= 1)) { - int ret; - - ret = smb_direct_flush_send_list(sc, send_ctx, false); - if (ret) - return ret; - } - - return smbdirect_socket_wait_for_credits(sc, - SMBDIRECT_SOCKET_CONNECTED, - -ENOTCONN, - &sc->send_io.lcredits.wait_queue, - &sc->send_io.lcredits.count, - 1); -} - -static int wait_for_send_credits(struct smbdirect_socket *sc, - struct smbdirect_send_batch *send_ctx) -{ - int ret; - - if (send_ctx && - (send_ctx->wr_cnt >= 16 || atomic_read(&sc->send_io.credits.count) <= 1)) { - ret = smb_direct_flush_send_list(sc, send_ctx, false); - if (ret) - return ret; - } - - return smbdirect_socket_wait_for_credits(sc, - SMBDIRECT_SOCKET_CONNECTED, - -ENOTCONN, - &sc->send_io.credits.wait_queue, - &sc->send_io.credits.count, - 1); -} - -static int post_sendmsg(struct smbdirect_socket *sc, - struct smbdirect_send_batch *send_ctx, - struct smbdirect_send_io *msg) -{ - int i; - - for (i = 0; i < msg->num_sge; i++) - ib_dma_sync_single_for_device(sc->ib.dev, - msg->sge[i].addr, msg->sge[i].length, - DMA_TO_DEVICE); - - msg->cqe.done = smbdirect_connection_send_io_done; - msg->wr.opcode = IB_WR_SEND; - msg->wr.sg_list = &msg->sge[0]; - msg->wr.num_sge = msg->num_sge; - msg->wr.next = NULL; - - if (send_ctx) { - msg->wr.wr_cqe = NULL; - msg->wr.send_flags = 0; - if (!list_empty(&send_ctx->msg_list)) { - struct smbdirect_send_io *last; - - last = list_last_entry(&send_ctx->msg_list, - struct smbdirect_send_io, - sibling_list); - last->wr.next = &msg->wr; - } - list_add_tail(&msg->sibling_list, &send_ctx->msg_list); - send_ctx->wr_cnt++; - return 0; - } - - msg->wr.wr_cqe = &msg->cqe; - msg->wr.send_flags = IB_SEND_SIGNALED; - return smb_direct_post_send(sc, &msg->wr); -} - -static int smb_direct_post_send_data(struct smbdirect_socket *sc, - struct smbdirect_send_batch *send_ctx, - struct iov_iter *iter, - u32 remaining_data_length) -{ - const struct smbdirect_socket_parameters *sp = &sc->parameters; - int ret; - struct smbdirect_send_io *msg; - struct smbdirect_data_transfer *packet; - size_t header_length; - u32 data_length = 0; - struct smbdirect_send_batch _send_ctx; - u16 new_credits; - - if (iter) { - header_length = sizeof(struct smbdirect_data_transfer); - if (WARN_ON_ONCE(remaining_data_length == 0 || - iov_iter_count(iter) > remaining_data_length)) - return -EINVAL; - } else { - /* If this is a packet without payload, don't send padding */ - header_length = offsetof(struct smbdirect_data_transfer, padding); - if (WARN_ON_ONCE(remaining_data_length)) - return -EINVAL; - } - - if (!send_ctx) { - smb_direct_send_ctx_init(&_send_ctx, false, 0); - send_ctx = &_send_ctx; - } - - ret = wait_for_send_bcredit(sc, send_ctx); - if (ret) - goto bcredit_failed; - - ret = wait_for_send_lcredit(sc, send_ctx); - if (ret) - goto lcredit_failed; - - ret = wait_for_send_credits(sc, send_ctx); - if (ret) - goto credit_failed; - - new_credits = smbdirect_connection_grant_recv_credits(sc); - if (new_credits == 0 && - atomic_read(&sc->send_io.credits.count) == 0 && - atomic_read(&sc->recv_io.credits.count) == 0) { - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); - ret = wait_event_interruptible(sc->send_io.credits.wait_queue, - atomic_read(&sc->send_io.credits.count) >= 1 || - atomic_read(&sc->recv_io.credits.available) >= 1 || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) - ret = -ENOTCONN; - if (ret < 0) - goto credit_failed; - - new_credits = smbdirect_connection_grant_recv_credits(sc); - } - - msg = smbdirect_connection_alloc_send_io(sc); - if (IS_ERR(msg)) { - ret = PTR_ERR(msg); - goto alloc_failed; - } - - /* Map the packet to DMA */ - msg->sge[0].addr = ib_dma_map_single(sc->ib.dev, - msg->packet, - header_length, - DMA_TO_DEVICE); - ret = ib_dma_mapping_error(sc->ib.dev, msg->sge[0].addr); - if (ret) - goto err; - - msg->sge[0].length = header_length; - msg->sge[0].lkey = sc->ib.pd->local_dma_lkey; - msg->num_sge = 1; - - if (iter) { - struct smbdirect_map_sges extract = { - .num_sge = msg->num_sge, - .max_sge = ARRAY_SIZE(msg->sge), - .sge = msg->sge, - .device = sc->ib.dev, - .local_dma_lkey = sc->ib.pd->local_dma_lkey, - .direction = DMA_TO_DEVICE, - }; - size_t payload_len = umin(iov_iter_count(iter), - sp->max_send_size - sizeof(*packet)); - - ret = smbdirect_map_sges_from_iter(iter, payload_len, &extract); - if (ret < 0) - goto err; - data_length = ret; - remaining_data_length -= data_length; - msg->num_sge = extract.num_sge; - } - - /* Fill in the packet header */ - packet = (struct smbdirect_data_transfer *)msg->packet; - packet->credits_requested = cpu_to_le16(sp->send_credit_target); - new_credits = smbdirect_connection_grant_recv_credits(sc); - packet->credits_granted = cpu_to_le16(new_credits); - - packet->flags = 0; - if (smbdirect_connection_request_keep_alive(sc)) - packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); - - packet->reserved = 0; - if (!data_length) - packet->data_offset = 0; - else - packet->data_offset = cpu_to_le32(24); - packet->data_length = cpu_to_le32(data_length); - packet->remaining_data_length = cpu_to_le32(remaining_data_length); - packet->padding = 0; - - ksmbd_debug(RDMA, - "credits_req=%u credits_granted=%u flags=0x%x ofs=%u len=%u remaining=%u\n", - le16_to_cpu(packet->credits_requested), - le16_to_cpu(packet->credits_granted), - le16_to_cpu(packet->flags), - le32_to_cpu(packet->data_offset), - le32_to_cpu(packet->data_length), - le32_to_cpu(packet->remaining_data_length)); - - ret = post_sendmsg(sc, send_ctx, msg); - if (ret) - goto err; - - /* - * From here msg is moved to send_ctx - * and we should not free it explicitly. - */ - - if (send_ctx == &_send_ctx) { - ret = smb_direct_flush_send_list(sc, send_ctx, true); - if (ret) - goto flush_failed; - } - - return data_length; -err: - smbdirect_connection_free_send_io(msg); -flush_failed: -alloc_failed: - atomic_inc(&sc->send_io.credits.count); -credit_failed: - atomic_inc(&sc->send_io.lcredits.count); -lcredit_failed: - atomic_add(send_ctx->credit, &sc->send_io.bcredits.count); - send_ctx->credit = 0; -bcredit_failed: - return ret; -} - -static int smb_direct_send_iter(struct smbdirect_socket *sc, - struct iov_iter *iter, - bool need_invalidate, - unsigned int remote_key) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - int ret; - struct smbdirect_send_batch send_ctx; - int error = 0; - __be32 hdr; - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) - return -ENOTCONN; - - /* - * For now we expect the iter to have the full - * message, including a 4 byte length header. - */ - if (iov_iter_count(iter) <= 4) - return -EINVAL; - if (!copy_from_iter_full(&hdr, sizeof(hdr), iter)) - return -EFAULT; - if (iov_iter_count(iter) != be32_to_cpu(hdr)) - return -EINVAL; - - /* - * The size must fit into the negotiated - * fragmented send size. - */ - if (iov_iter_count(iter) > sp->max_fragmented_send_size) - return -EMSGSIZE; - - ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%zu\n", - iov_iter_count(iter)); - - smb_direct_send_ctx_init(&send_ctx, need_invalidate, remote_key); - while (iov_iter_count(iter)) { - ret = smb_direct_post_send_data(sc, - &send_ctx, - iter, - iov_iter_count(iter)); - if (unlikely(ret < 0)) { - error = ret; - break; - } - } - - ret = smb_direct_flush_send_list(sc, &send_ctx, true); - if (unlikely(!ret && error)) - ret = error; - - /* - * As an optimization, we don't wait for individual I/O to finish - * before sending the next one. - * Send them all and wait for pending send count to get to 0 - * that means all the I/Os have been out and we are good to return - */ - - wait_event(sc->send_io.pending.zero_wait_queue, - atomic_read(&sc->send_io.pending.count) == 0 || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (sc->status != SMBDIRECT_SOCKET_CONNECTED && ret == 0) - ret = -ENOTCONN; - - return ret; -} - static int smb_direct_writev(struct ksmbd_transport *t, struct kvec *iov, int niovs, int buflen, bool need_invalidate, unsigned int remote_key) @@ -1036,7 +612,8 @@ static int smb_direct_writev(struct ksmbd_transport *t, iov_iter_kvec(&iter, ITER_SOURCE, iov, niovs, buflen); - return smb_direct_send_iter(sc, &iter, need_invalidate, remote_key); + return smbdirect_connection_send_iter(sc, &iter, 0, + need_invalidate, remote_key); } static int smb_direct_rdma_write(struct ksmbd_transport *t, @@ -1194,7 +771,7 @@ static int smb_direct_send_negotiate_response(struct smbdirect_socket *sc, sendmsg->sge[0].length = sizeof(*resp); sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey; - ret = post_sendmsg(sc, NULL, sendmsg); + ret = smbdirect_connection_post_send_io(sc, NULL, sendmsg); if (ret) { smbdirect_connection_free_send_io(sendmsg); return ret; @@ -1435,7 +1012,7 @@ static int smb_direct_prepare(struct ksmbd_transport *t) return ret; INIT_WORK(&sc->recv_io.posted.refill_work, smbdirect_connection_recv_io_refill_work); - INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); + INIT_WORK(&sc->idle.immediate_work, smbdirect_connection_send_immediate_work); return 0; } From 94604164871e4c182d1305ab1e43971f41b6cf38 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 21 Sep 2025 00:21:41 +0200 Subject: [PATCH 127/145] smb: server: make use of smbdirect_{socket_init_accepting,connection_wait_for_connected}() This means we finally only use common functions in the server. We still use the embedded struct smbdirect_socket and are able to access internals, but the will be removed in the next commits as well. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 742 ++------------------------------- 1 file changed, 29 insertions(+), 713 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 2ecb279476c1..29e90dfcc254 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -197,14 +197,14 @@ unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) { struct smb_direct_transport *t; struct smbdirect_socket *sc; - struct smbdirect_socket_parameters *sp; + const struct smbdirect_socket_parameters *sp; if (kt->ops != &ksmbd_smb_direct_transport_ops) return 0; t = SMBD_TRANS(kt); sc = &t->socket; - sp = &sc->parameters; + sp = smbdirect_socket_get_current_parameters(sc); return sp->max_read_write_size; } @@ -216,6 +216,7 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) struct smbdirect_socket_parameters init_params = {}; struct smbdirect_socket_parameters *sp; struct ksmbd_conn *conn; + int ret; /* * Create the initial parameters @@ -237,27 +238,25 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) if (!t) return NULL; sc = &t->socket; - smbdirect_socket_prepare_create(sc, sp, smb_direct_wq); + ret = smbdirect_socket_init_accepting(cm_id, sc); + if (ret) + goto socket_init_failed; smbdirect_socket_set_logging(sc, NULL, smb_direct_logging_needed, smb_direct_logging_vaprintf); - sc->ib.poll_ctx = IB_POLL_WORKQUEUE; - sc->send_io.mem.gfp_mask = KSMBD_DEFAULT_GFP; - sc->recv_io.mem.gfp_mask = KSMBD_DEFAULT_GFP; - sc->rw_io.mem.gfp_mask = KSMBD_DEFAULT_GFP; - /* - * from here we operate on the copy. - */ - sp = &sc->parameters; - - sc->rdma.cm_id = cm_id; - cm_id->context = sc; - - sc->ib.dev = sc->rdma.cm_id->device; + ret = smbdirect_socket_set_initial_parameters(sc, sp); + if (ret) + goto set_params_failed; + ret = smbdirect_socket_set_kernel_settings(sc, IB_POLL_WORKQUEUE, KSMBD_DEFAULT_GFP); + if (ret) + goto set_settings_failed; + ret = smbdirect_socket_set_custom_workqueue(sc, smb_direct_wq); + if (ret) + goto set_workqueue_failed; conn = ksmbd_conn_alloc(); if (!conn) - goto err; + goto conn_alloc_failed; down_write(&conn_list_lock); hash_add(conn_list, &conn->hlist, 0); @@ -267,7 +266,12 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) KSMBD_TRANS(t)->conn = conn; KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops; return t; -err: + +conn_alloc_failed: +set_workqueue_failed: +set_settings_failed: +set_params_failed: +socket_init_failed: kfree(t); return NULL; } @@ -286,302 +290,6 @@ static void free_transport(struct smb_direct_transport *t) ksmbd_conn_free(KSMBD_TRANS(t)->conn); } -static int smb_direct_check_recvmsg(struct smbdirect_recv_io *recvmsg) -{ - struct smbdirect_socket *sc = recvmsg->socket; - - switch (sc->recv_io.expected) { - case SMBDIRECT_EXPECT_DATA_TRANSFER: { - struct smbdirect_data_transfer *req = - (struct smbdirect_data_transfer *)recvmsg->packet; - struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet - + le32_to_cpu(req->data_offset)); - ksmbd_debug(RDMA, - "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n", - le16_to_cpu(req->credits_granted), - le16_to_cpu(req->credits_requested), - req->data_length, req->remaining_data_length, - hdr->ProtocolId, hdr->Command); - return 0; - } - case SMBDIRECT_EXPECT_NEGOTIATE_REQ: { - struct smbdirect_negotiate_req *req = - (struct smbdirect_negotiate_req *)recvmsg->packet; - ksmbd_debug(RDMA, - "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n", - le16_to_cpu(req->min_version), - le16_to_cpu(req->max_version), - le16_to_cpu(req->credits_requested), - le32_to_cpu(req->preferred_send_size), - le32_to_cpu(req->max_receive_size), - le32_to_cpu(req->max_fragmented_size)); - if (le16_to_cpu(req->min_version) > 0x0100 || - le16_to_cpu(req->max_version) < 0x0100) - return -EOPNOTSUPP; - if (le16_to_cpu(req->credits_requested) <= 0 || - le32_to_cpu(req->max_receive_size) <= 128 || - le32_to_cpu(req->max_fragmented_size) <= - 128 * 1024) - return -ECONNABORTED; - - return 0; - } - case SMBDIRECT_EXPECT_NEGOTIATE_REP: - /* client only */ - break; - } - - /* This is an internal error */ - return -EINVAL; -} - -static void recv_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct smbdirect_recv_io *recvmsg; - struct smbdirect_socket *sc; - struct smbdirect_socket_parameters *sp; - - recvmsg = container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); - sc = recvmsg->socket; - sp = &sc->parameters; - - if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { - smbdirect_connection_put_recv_io(recvmsg); - if (wc->status != IB_WC_WR_FLUSH_ERR) { - pr_err("Recv error. status='%s (%d)' opcode=%d\n", - ib_wc_status_msg(wc->status), wc->status, - wc->opcode); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - } - return; - } - - ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n", - ib_wc_status_msg(wc->status), wc->status, - wc->opcode); - - ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr, - recvmsg->sge.length, DMA_FROM_DEVICE); - - /* - * Reset timer to the keepalive interval in - * order to trigger our next keepalive message. - */ - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, - msecs_to_jiffies(sp->keepalive_interval_msec)); - - switch (sc->recv_io.expected) { - case SMBDIRECT_EXPECT_NEGOTIATE_REQ: - /* see smb_direct_negotiate_recv_done */ - break; - case SMBDIRECT_EXPECT_DATA_TRANSFER: { - struct smbdirect_data_transfer *data_transfer = - (struct smbdirect_data_transfer *)recvmsg->packet; - u32 remaining_data_length, data_offset, data_length; - int current_recv_credits; - u16 old_recv_credit_target; - - if (wc->byte_len < - offsetof(struct smbdirect_data_transfer, padding)) { - smbdirect_connection_put_recv_io(recvmsg); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - return; - } - - remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); - data_length = le32_to_cpu(data_transfer->data_length); - data_offset = le32_to_cpu(data_transfer->data_offset); - if (wc->byte_len < data_offset || - wc->byte_len < (u64)data_offset + data_length) { - smbdirect_connection_put_recv_io(recvmsg); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - return; - } - if (remaining_data_length > sp->max_fragmented_recv_size || - data_length > sp->max_fragmented_recv_size || - (u64)remaining_data_length + (u64)data_length > - (u64)sp->max_fragmented_recv_size) { - smbdirect_connection_put_recv_io(recvmsg); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - return; - } - - if (data_length) { - if (sc->recv_io.reassembly.full_packet_received) - recvmsg->first_segment = true; - - if (le32_to_cpu(data_transfer->remaining_data_length)) - sc->recv_io.reassembly.full_packet_received = false; - else - sc->recv_io.reassembly.full_packet_received = true; - } - - atomic_dec(&sc->recv_io.posted.count); - current_recv_credits = atomic_dec_return(&sc->recv_io.credits.count); - - old_recv_credit_target = sc->recv_io.credits.target; - sc->recv_io.credits.target = - le16_to_cpu(data_transfer->credits_requested); - sc->recv_io.credits.target = - min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); - sc->recv_io.credits.target = - max_t(u16, sc->recv_io.credits.target, 1); - atomic_add(le16_to_cpu(data_transfer->credits_granted), - &sc->send_io.credits.count); - - if (le16_to_cpu(data_transfer->flags) & - SMBDIRECT_FLAG_RESPONSE_REQUESTED) - queue_work(sc->workqueue, &sc->idle.immediate_work); - - if (atomic_read(&sc->send_io.credits.count) > 0) - wake_up(&sc->send_io.credits.wait_queue); - - if (data_length) { - if (current_recv_credits <= (sc->recv_io.credits.target / 4) || - sc->recv_io.credits.target > old_recv_credit_target) - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); - - smbdirect_connection_reassembly_append_recv_io(sc, recvmsg, data_length); - wake_up(&sc->recv_io.reassembly.wait_queue); - } else - smbdirect_connection_put_recv_io(recvmsg); - - return; - } - case SMBDIRECT_EXPECT_NEGOTIATE_REP: - /* client only */ - break; - } - - /* - * This is an internal error! - */ - WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); - smbdirect_connection_put_recv_io(recvmsg); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); -} - -static void smb_direct_negotiate_recv_work(struct work_struct *work); - -static void smb_direct_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct smbdirect_recv_io *recv_io = - container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); - struct smbdirect_socket *sc = recv_io->socket; - unsigned long flags; - - /* - * reset the common recv_done for later reuse. - */ - recv_io->cqe.done = recv_done; - - if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { - smbdirect_connection_put_recv_io(recv_io); - if (wc->status != IB_WC_WR_FLUSH_ERR) { - pr_err("Negotiate Recv error. status='%s (%d)' opcode=%d\n", - ib_wc_status_msg(wc->status), wc->status, - wc->opcode); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - } - return; - } - - ksmbd_debug(RDMA, "Negotiate Recv completed. status='%s (%d)', opcode=%d\n", - ib_wc_status_msg(wc->status), wc->status, - wc->opcode); - - ib_dma_sync_single_for_cpu(sc->ib.dev, - recv_io->sge.addr, - recv_io->sge.length, - DMA_FROM_DEVICE); - - /* - * This is an internal error! - */ - if (WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_NEGOTIATE_REQ)) { - smbdirect_connection_put_recv_io(recv_io); - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - return; - } - - /* - * Don't reset timer to the keepalive interval in - * this will be done in smb_direct_negotiate_recv_work. - */ - - /* - * Only remember the recv_io if it has enough bytes, - * this gives smb_direct_negotiate_recv_work enough - * information in order to disconnect if it was not - * valid. - */ - sc->recv_io.reassembly.full_packet_received = true; - if (wc->byte_len >= sizeof(struct smbdirect_negotiate_req)) - smbdirect_connection_reassembly_append_recv_io(sc, recv_io, 0); - else - smbdirect_connection_put_recv_io(recv_io); - - /* - * Some drivers (at least mlx5_ib and irdma in roce mode) - * might post a recv completion before RDMA_CM_EVENT_ESTABLISHED, - * we need to adjust our expectation in that case. - * - * So we defer further processing of the negotiation - * to smb_direct_negotiate_recv_work(). - * - * If we are already in SMBDIRECT_SOCKET_NEGOTIATE_NEEDED - * we queue the work directly otherwise - * smb_direct_cm_handler() will do it, when - * RDMA_CM_EVENT_ESTABLISHED arrived. - */ - spin_lock_irqsave(&sc->connect.lock, flags); - if (!sc->first_error) { - INIT_WORK(&sc->connect.work, smb_direct_negotiate_recv_work); - if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_NEEDED) - queue_work(sc->workqueue, &sc->connect.work); - } - spin_unlock_irqrestore(&sc->connect.lock, flags); -} - -static void smb_direct_negotiate_recv_work(struct work_struct *work) -{ - struct smbdirect_socket *sc = - container_of(work, struct smbdirect_socket, connect.work); - const struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_recv_io *recv_io; - - if (sc->first_error) - return; - - ksmbd_debug(RDMA, "Negotiate Recv Work running\n"); - - /* - * Reset timer to the keepalive interval in - * order to trigger our next keepalive message. - */ - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, - msecs_to_jiffies(sp->keepalive_interval_msec)); - - /* - * If smb_direct_negotiate_recv_done() detected an - * invalid request we want to disconnect. - */ - recv_io = smbdirect_connection_reassembly_first_recv_io(sc); - if (!recv_io) { - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - return; - } - - if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)) { - smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); - return; - } - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; - wake_up(&sc->status_wait); -} - static int smb_direct_read(struct ksmbd_transport *t, char *buf, unsigned int size, int unused) { @@ -657,399 +365,24 @@ static void smb_direct_shutdown(struct ksmbd_transport *t) ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", sc->rdma.cm_id); - smbdirect_socket_cleanup_work(&sc->disconnect_work); -} - -static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, - struct rdma_cm_event *event) -{ - struct smbdirect_socket *sc = cm_id->context; - unsigned long flags; - - ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n", - cm_id, rdma_event_msg(event->event), event->event); - - switch (event->event) { - case RDMA_CM_EVENT_ESTABLISHED: { - /* - * Some drivers (at least mlx5_ib and irdma in roce mode) - * might post a recv completion before RDMA_CM_EVENT_ESTABLISHED, - * we need to adjust our expectation in that case. - * - * If smb_direct_negotiate_recv_done was called first - * it initialized sc->connect.work only for us to - * start, so that we turned into - * SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, before - * smb_direct_negotiate_recv_work() runs. - * - * If smb_direct_negotiate_recv_done didn't happen - * yet. sc->connect.work is still be disabled and - * queue_work() is a no-op. - */ - if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)) - break; - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; - spin_lock_irqsave(&sc->connect.lock, flags); - if (!sc->first_error) - queue_work(sc->workqueue, &sc->connect.work); - spin_unlock_irqrestore(&sc->connect.lock, flags); - wake_up(&sc->status_wait); - break; - } - case RDMA_CM_EVENT_DEVICE_REMOVAL: - case RDMA_CM_EVENT_DISCONNECTED: { - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - smbdirect_socket_cleanup_work(&sc->disconnect_work); - if (sc->ib.qp) - ib_drain_qp(sc->ib.qp); - break; - } - case RDMA_CM_EVENT_CONNECT_ERROR: { - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - smbdirect_socket_cleanup_work(&sc->disconnect_work); - break; - } - default: - pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n", - cm_id, rdma_event_msg(event->event), - event->event); - break; - } - return 0; -} - -static int smb_direct_send_negotiate_response(struct smbdirect_socket *sc, - int failed) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_send_io *sendmsg; - struct smbdirect_negotiate_resp *resp; - int ret; - - sendmsg = smbdirect_connection_alloc_send_io(sc); - if (IS_ERR(sendmsg)) - return -ENOMEM; - - resp = (struct smbdirect_negotiate_resp *)sendmsg->packet; - if (failed) { - memset(resp, 0, sizeof(*resp)); - resp->min_version = SMB_DIRECT_VERSION_LE; - resp->max_version = SMB_DIRECT_VERSION_LE; - resp->status = STATUS_NOT_SUPPORTED; - - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; - } else { - resp->status = STATUS_SUCCESS; - resp->min_version = SMB_DIRECT_VERSION_LE; - resp->max_version = SMB_DIRECT_VERSION_LE; - resp->negotiated_version = SMB_DIRECT_VERSION_LE; - resp->reserved = 0; - resp->credits_requested = - cpu_to_le16(sp->send_credit_target); - resp->credits_granted = cpu_to_le16(smbdirect_connection_grant_recv_credits(sc)); - resp->max_readwrite_size = cpu_to_le32(sp->max_read_write_size); - resp->preferred_send_size = cpu_to_le32(sp->max_send_size); - resp->max_receive_size = cpu_to_le32(sp->max_recv_size); - resp->max_fragmented_size = - cpu_to_le32(sp->max_fragmented_recv_size); - - atomic_set(&sc->send_io.bcredits.count, 1); - sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; - sc->status = SMBDIRECT_SOCKET_CONNECTED; - } - - sendmsg->sge[0].addr = ib_dma_map_single(sc->ib.dev, - (void *)resp, sizeof(*resp), - DMA_TO_DEVICE); - ret = ib_dma_mapping_error(sc->ib.dev, sendmsg->sge[0].addr); - if (ret) { - smbdirect_connection_free_send_io(sendmsg); - return ret; - } - - sendmsg->num_sge = 1; - sendmsg->sge[0].length = sizeof(*resp); - sendmsg->sge[0].lkey = sc->ib.pd->local_dma_lkey; - - ret = smbdirect_connection_post_send_io(sc, NULL, sendmsg); - if (ret) { - smbdirect_connection_free_send_io(sendmsg); - return ret; - } - - wait_event(sc->send_io.pending.zero_wait_queue, - atomic_read(&sc->send_io.pending.count) == 0 || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) - return -ENOTCONN; - - return 0; -} - -static int smb_direct_accept_client(struct smbdirect_socket *sc) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct rdma_conn_param conn_param; - __be32 ird_ord_hdr[2]; - int ret; - - /* - * smb_direct_handle_connect_request() - * already negotiated sp->initiator_depth - * and sp->responder_resources - */ - memset(&conn_param, 0, sizeof(conn_param)); - conn_param.initiator_depth = sp->initiator_depth; - conn_param.responder_resources = sp->responder_resources; - - if (sc->rdma.legacy_iwarp) { - ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); - ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); - conn_param.private_data = ird_ord_hdr; - conn_param.private_data_len = sizeof(ird_ord_hdr); - } else { - conn_param.private_data = NULL; - conn_param.private_data_len = 0; - } - conn_param.retry_count = SMB_DIRECT_CM_RETRY; - conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY; - conn_param.flow_control = 0; - - /* - * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING - * so that the timer will cause a disconnect. - */ - sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, - msecs_to_jiffies(sp->negotiate_timeout_msec)); - - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED); - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; - ret = rdma_accept(sc->rdma.cm_id, &conn_param); - if (ret) { - pr_err("error at rdma_accept: %d\n", ret); - return ret; - } - return 0; -} - -static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) -{ - struct smbdirect_recv_io *recvmsg; - bool recv_posted = false; - int ret; - - WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); - sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; - - sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REQ; - - recvmsg = smbdirect_connection_get_recv_io(sc); - if (!recvmsg) - return -ENOMEM; - recvmsg->cqe.done = smb_direct_negotiate_recv_done; - - ret = smbdirect_connection_post_recv_io(recvmsg); - if (ret) { - pr_err("Can't post recv: %d\n", ret); - goto out_err; - } - recv_posted = true; - - ret = smb_direct_accept_client(sc); - if (ret) { - pr_err("Can't accept client\n"); - goto out_err; - } - - return 0; -out_err: - /* - * If the recv was never posted, return it to the free list. - * If it was posted, leave it alone so disconnect teardown can - * drain the QP and complete it (flush) and the completion path - * will unmap it exactly once. - */ - if (!recv_posted) - smbdirect_connection_put_recv_io(recvmsg); - return ret; -} - -static int smb_direct_init_params(struct smbdirect_socket *sc) -{ - struct smbdirect_socket_parameters *sp = &sc->parameters; - int max_send_sges; - unsigned int maxpages; - - /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, - * SMB2 response could be mapped. - */ - max_send_sges = DIV_ROUND_UP(sp->max_send_size, PAGE_SIZE) + 3; - if (max_send_sges > SMBDIRECT_SEND_IO_MAX_SGE) { - pr_err("max_send_size %d is too large\n", sp->max_send_size); - return -EINVAL; - } - - atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); - - maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE); - sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev, - sc->rdma.cm_id->port_num, - maxpages); - sc->rw_io.credits.num_pages = DIV_ROUND_UP(maxpages, sc->rw_io.credits.max); - /* add one extra in order to handle unaligned pages */ - sc->rw_io.credits.max += 1; - - sc->recv_io.credits.target = 1; - - atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max); - - return 0; + smbdirect_socket_shutdown(sc); } static int smb_direct_prepare(struct ksmbd_transport *t) { struct smb_direct_transport *st = SMBD_TRANS(t); struct smbdirect_socket *sc = &st->socket; - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_recv_io *recvmsg; - struct smbdirect_negotiate_req *req; - unsigned long flags; int ret; - /* - * We are waiting to pass the following states: - * - * SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED - * SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING - * SMBDIRECT_SOCKET_NEGOTIATE_NEEDED - * - * To finally get to SMBDIRECT_SOCKET_NEGOTIATE_RUNNING - * in order to continue below. - * - * Everything else is unexpected and an error. - */ - ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); - ret = wait_event_interruptible_timeout(sc->status_wait, - sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED && - sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING && - sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, - msecs_to_jiffies(sp->negotiate_timeout_msec)); - if (ret <= 0 || sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) - return ret < 0 ? ret : -ETIMEDOUT; - - recvmsg = smbdirect_connection_reassembly_first_recv_io(sc); - if (!recvmsg) - return -ECONNABORTED; - - ret = smb_direct_check_recvmsg(recvmsg); - if (ret) - goto put; - - req = (struct smbdirect_negotiate_req *)recvmsg->packet; - sp->max_recv_size = min_t(u32, sp->max_recv_size, - le32_to_cpu(req->preferred_send_size)); - sp->max_send_size = min_t(u32, sp->max_send_size, - le32_to_cpu(req->max_receive_size)); - sp->max_fragmented_send_size = - le32_to_cpu(req->max_fragmented_size); - /* - * The maximum fragmented upper-layer payload receive size supported - * - * Assume max_payload_per_credit is - * smb_direct_receive_credit_max - 24 = 1340 - * - * The maximum number would be - * smb_direct_receive_credit_max * max_payload_per_credit - * - * 1340 * 255 = 341700 (0x536C4) - * - * The minimum value from the spec is 131072 (0x20000) - * - * For now we use the logic we used before: - * (1364 * 255) / 2 = 173910 (0x2A756) - * - * We need to adjust this here in case the peer - * lowered sp->max_recv_size. - * - * TODO: instead of adjusting max_fragmented_recv_size - * we should adjust the number of available buffers, - * but for now we keep the current logic. - */ - sp->max_fragmented_recv_size = - (sp->recv_credit_max * sp->max_recv_size) / 2; - sc->recv_io.credits.target = le16_to_cpu(req->credits_requested); - sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); - sc->recv_io.credits.target = max_t(u16, sc->recv_io.credits.target, 1); - -put: - spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); - sc->recv_io.reassembly.queue_length--; - list_del(&recvmsg->list); - spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); - smbdirect_connection_put_recv_io(recvmsg); - - if (ret == -ECONNABORTED) - return ret; - - if (ret) - goto respond; - - /* - * We negotiated with success, so we need to refill the recv queue. - * - * The message that grants the credits to the client is - * the negotiate response. - */ - ret = smbdirect_connection_recv_io_refill(sc); - if (ret < 0) - return ret; - ret = 0; - -respond: - ret = smb_direct_send_negotiate_response(sc, ret); - if (ret) - return ret; - - INIT_WORK(&sc->recv_io.posted.refill_work, smbdirect_connection_recv_io_refill_work); - INIT_WORK(&sc->idle.immediate_work, smbdirect_connection_send_immediate_work); - - return 0; -} - -static int smb_direct_connect(struct smbdirect_socket *sc) -{ - struct smbdirect_recv_io *recv_io; - int ret; - - sc->rdma.cm_id->event_handler = smb_direct_cm_handler; - - ret = smb_direct_init_params(sc); + ksmbd_debug(RDMA, "SMB_DIRECT Waiting for connection\n"); + ret = smbdirect_connection_wait_for_connected(sc); if (ret) { - pr_err("Can't configure RDMA parameters\n"); + ksmbd_debug(RDMA, "SMB_DIRECT connection failed %d => %1pe\n", + ret, ERR_PTR(ret)); return ret; } - ret = smbdirect_connection_create_mem_pools(sc); - if (ret) { - pr_err("Can't init RDMA pool: %d\n", ret); - return ret; - } - - list_for_each_entry(recv_io, &sc->recv_io.free.list, list) - recv_io->cqe.done = recv_done; - - ret = smbdirect_connection_create_qp(sc); - if (ret) { - pr_err("Can't accept RDMA client: %d\n", ret); - return ret; - } - - ret = smb_direct_prepare_negotiation(sc); - if (ret) { - pr_err("Can't negotiate: %d\n", ret); - return ret; - } + ksmbd_debug(RDMA, "SMB_DIRECT connection ready\n"); return 0; } @@ -1059,10 +392,7 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id, struct smb_direct_listener *listener = new_cm_id->context; struct smb_direct_transport *t; struct smbdirect_socket *sc; - struct smbdirect_socket_parameters *sp; struct task_struct *handler; - u8 peer_initiator_depth; - u8 peer_responder_resources; int ret; if (!smbdirect_frwr_is_supported(&new_cm_id->device->attrs)) { @@ -1076,22 +406,8 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id, if (!t) return -ENOMEM; sc = &t->socket; - sp = &sc->parameters; - /* - * First set what the we as server are able to support - */ - sp->initiator_depth = min_t(u8, sp->initiator_depth, - sc->ib.dev->attrs.max_qp_rd_atom); - - peer_initiator_depth = event->param.conn.initiator_depth; - peer_responder_resources = event->param.conn.responder_resources; - smbdirect_connection_negotiate_rdma_resources(sc, - peer_initiator_depth, - peer_responder_resources, - &event->param.conn); - - ret = smb_direct_connect(sc); + ret = smbdirect_accept_connect_request(sc, &event->param.conn); if (ret) goto out_err; From ff7673f6fde8a39d2a693c4ef431a7ce933397d2 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 23 Oct 2025 22:49:27 +0200 Subject: [PATCH 128/145] smb: server: make use of smbdirect_socket_create_accepting()/smbdirect_socket_release() With this we no longer embed struct smbdirect_socket, which will allow us to make it private in the following commits. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 53 +++++++++++++++------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 29e90dfcc254..bacdc40c820a 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -41,8 +41,6 @@ #define SMB_DIRECT_PORT_IWARP 5445 #define SMB_DIRECT_PORT_INFINIBAND 445 -#define SMB_DIRECT_VERSION_LE cpu_to_le16(SMBDIRECT_V1) - /* SMB_DIRECT negotiation timeout (for the server) in seconds */ #define SMB_DIRECT_NEGOTIATE_TIMEOUT 5 @@ -58,11 +56,6 @@ */ #define SMB_DIRECT_CM_INITIATOR_DEPTH 8 -/* Maximum number of retries on data transfer operations */ -#define SMB_DIRECT_CM_RETRY 6 -/* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */ -#define SMB_DIRECT_CM_RNR_RETRY 0 - /* * User configurable initial values per SMB_DIRECT transport connection * as defined in [MS-SMBD] 3.1.1.1 @@ -119,7 +112,7 @@ static struct workqueue_struct *smb_direct_wq; struct smb_direct_transport { struct ksmbd_transport transport; - struct smbdirect_socket socket; + struct smbdirect_socket *socket; }; static bool smb_direct_logging_needed(struct smbdirect_socket *sc, @@ -196,15 +189,13 @@ void init_smbd_max_io_size(unsigned int sz) unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) { struct smb_direct_transport *t; - struct smbdirect_socket *sc; const struct smbdirect_socket_parameters *sp; if (kt->ops != &ksmbd_smb_direct_transport_ops) return 0; t = SMBD_TRANS(kt); - sc = &t->socket; - sp = smbdirect_socket_get_current_parameters(sc); + sp = smbdirect_socket_get_current_parameters(t->socket); return sp->max_read_write_size; } @@ -237,10 +228,9 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) t = kzalloc_obj(*t, KSMBD_DEFAULT_GFP); if (!t) return NULL; - sc = &t->socket; - ret = smbdirect_socket_init_accepting(cm_id, sc); + ret = smbdirect_socket_create_accepting(cm_id, &sc); if (ret) - goto socket_init_failed; + goto socket_create_failed; smbdirect_socket_set_logging(sc, NULL, smb_direct_logging_needed, smb_direct_logging_vaprintf); @@ -265,28 +255,31 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) conn->transport = KSMBD_TRANS(t); KSMBD_TRANS(t)->conn = conn; KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops; + + t->socket = sc; return t; conn_alloc_failed: set_workqueue_failed: set_settings_failed: set_params_failed: -socket_init_failed: + smbdirect_socket_release(sc); +socket_create_failed: kfree(t); return NULL; } static void smb_direct_free_transport(struct ksmbd_transport *kt) { - kfree(SMBD_TRANS(kt)); + struct smb_direct_transport *t = SMBD_TRANS(kt); + + smbdirect_socket_release(t->socket); + kfree(t); } static void free_transport(struct smb_direct_transport *t) { - struct smbdirect_socket *sc = &t->socket; - - smbdirect_socket_destroy_sync(sc); - + smbdirect_socket_shutdown(t->socket); ksmbd_conn_free(KSMBD_TRANS(t)->conn); } @@ -294,7 +287,7 @@ static int smb_direct_read(struct ksmbd_transport *t, char *buf, unsigned int size, int unused) { struct smb_direct_transport *st = SMBD_TRANS(t); - struct smbdirect_socket *sc = &st->socket; + struct smbdirect_socket *sc = st->socket; struct msghdr msg = { .msg_flags = 0, }; struct kvec iov = { .iov_base = buf, @@ -315,7 +308,7 @@ static int smb_direct_writev(struct ksmbd_transport *t, bool need_invalidate, unsigned int remote_key) { struct smb_direct_transport *st = SMBD_TRANS(t); - struct smbdirect_socket *sc = &st->socket; + struct smbdirect_socket *sc = st->socket; struct iov_iter iter; iov_iter_kvec(&iter, ITER_SOURCE, iov, niovs, buflen); @@ -330,7 +323,7 @@ static int smb_direct_rdma_write(struct ksmbd_transport *t, unsigned int desc_len) { struct smb_direct_transport *st = SMBD_TRANS(t); - struct smbdirect_socket *sc = &st->socket; + struct smbdirect_socket *sc = st->socket; return smbdirect_connection_rdma_xmit(sc, buf, buflen, desc, desc_len, false); @@ -342,7 +335,7 @@ static int smb_direct_rdma_read(struct ksmbd_transport *t, unsigned int desc_len) { struct smb_direct_transport *st = SMBD_TRANS(t); - struct smbdirect_socket *sc = &st->socket; + struct smbdirect_socket *sc = st->socket; return smbdirect_connection_rdma_xmit(sc, buf, buflen, desc, desc_len, true); @@ -351,9 +344,9 @@ static int smb_direct_rdma_read(struct ksmbd_transport *t, static void smb_direct_disconnect(struct ksmbd_transport *t) { struct smb_direct_transport *st = SMBD_TRANS(t); - struct smbdirect_socket *sc = &st->socket; + struct smbdirect_socket *sc = st->socket; - ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", sc->rdma.cm_id); + ksmbd_debug(RDMA, "Disconnecting sc=%p\n", sc); free_transport(st); } @@ -361,9 +354,9 @@ static void smb_direct_disconnect(struct ksmbd_transport *t) static void smb_direct_shutdown(struct ksmbd_transport *t) { struct smb_direct_transport *st = SMBD_TRANS(t); - struct smbdirect_socket *sc = &st->socket; + struct smbdirect_socket *sc = st->socket; - ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", sc->rdma.cm_id); + ksmbd_debug(RDMA, "smb-direct shutdown sc=%p\n", sc); smbdirect_socket_shutdown(sc); } @@ -371,7 +364,7 @@ static void smb_direct_shutdown(struct ksmbd_transport *t) static int smb_direct_prepare(struct ksmbd_transport *t) { struct smb_direct_transport *st = SMBD_TRANS(t); - struct smbdirect_socket *sc = &st->socket; + struct smbdirect_socket *sc = st->socket; int ret; ksmbd_debug(RDMA, "SMB_DIRECT Waiting for connection\n"); @@ -405,7 +398,7 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id, t = alloc_transport(new_cm_id); if (!t) return -ENOMEM; - sc = &t->socket; + sc = t->socket; ret = smbdirect_accept_connect_request(sc, &event->param.conn); if (ret) From 1b2d94a3c986473fbb05cd6c5a45d67e5f39f3c2 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 27 Oct 2025 21:39:19 +0100 Subject: [PATCH 129/145] smb: server: only use public smbdirect functions Also remove a lot of unused includes... Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 1 - fs/smb/server/transport_rdma.c | 25 +++++++++---------------- fs/smb/server/transport_rdma.h | 2 ++ 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 10ae77dae5a1..ee32e61b6d3c 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -24,7 +24,6 @@ #include "asn1.h" #include "connection.h" #include "transport_ipc.h" -#include "../common/smbdirect/smbdirect.h" #include "transport_rdma.h" #include "vfs.h" #include "vfs_cache.h" diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index bacdc40c820a..e58d7e89da0e 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -13,30 +13,15 @@ #include #include -#include -#include -#include #include -#include -#include -#include #include "glob.h" #include "connection.h" #include "smb_common.h" #include "../common/smb2status.h" -#include "../common/smbdirect/smbdirect.h" -#include "../common/smbdirect/smbdirect_pdu.h" -#include "../common/smbdirect/smbdirect_socket.h" #include "transport_rdma.h" +#include "../common/smbdirect/smbdirect_public.h" -/* - * This is a temporary solution until all code - * is moved to smbdirect_all_c_files.c and we - * have an smbdirect.ko that exports the required - * functions. - */ -#include "../common/smbdirect/smbdirect_all_c_files.c" #define SMB_DIRECT_PORT_IWARP 5445 #define SMB_DIRECT_PORT_INFINIBAND 445 @@ -709,3 +694,11 @@ static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .rdma_write = smb_direct_rdma_write, .free_transport = smb_direct_free_transport, }; + +/* + * This is a temporary solution until all code + * is moved to smbdirect_all_c_files.c and we + * have an smbdirect.ko that exports the required + * functions. + */ +#include "../common/smbdirect/smbdirect_all_c_files.c" diff --git a/fs/smb/server/transport_rdma.h b/fs/smb/server/transport_rdma.h index 3f93c6a9f7e4..e16f625caed2 100644 --- a/fs/smb/server/transport_rdma.h +++ b/fs/smb/server/transport_rdma.h @@ -27,4 +27,6 @@ static inline void init_smbd_max_io_size(unsigned int sz) { } static inline unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) { return 0; } #endif +#include "../common/smbdirect/smbdirect.h" + #endif /* __KSMBD_TRANSPORT_RDMA_H__ */ From 2eff5e51f97663ad2371115260884396718b5e92 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 14 Nov 2025 15:41:02 +0100 Subject: [PATCH 130/145] smb: server: make use of smbdirect_socket_{listen,accept}() We no longer need the custom rdma listener. The code logic is very similar to transport_tcp.c now using a kernel thread that loops over smbdirect_socket_accept(). This is the first step in the direction of using IPPROTO_SMBDIRECT sockets in future. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 260 ++++++++++++++++----------------- 1 file changed, 128 insertions(+), 132 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index e58d7e89da0e..7171bde9d078 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -89,7 +89,10 @@ struct smb_direct_device { static struct smb_direct_listener { int port; - struct rdma_cm_id *cm_id; + + struct task_struct *thread; + + struct smbdirect_socket *socket; } smb_direct_ib_listener, smb_direct_iw_listener; static struct workqueue_struct *smb_direct_wq; @@ -185,49 +188,15 @@ unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) return sp->max_read_write_size; } -static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) +static struct smb_direct_transport *alloc_transport(struct smbdirect_socket *sc) { struct smb_direct_transport *t; - struct smbdirect_socket *sc; - struct smbdirect_socket_parameters init_params = {}; - struct smbdirect_socket_parameters *sp; struct ksmbd_conn *conn; - int ret; - - /* - * Create the initial parameters - */ - sp = &init_params; - sp->negotiate_timeout_msec = SMB_DIRECT_NEGOTIATE_TIMEOUT * 1000; - sp->initiator_depth = SMB_DIRECT_CM_INITIATOR_DEPTH; - sp->responder_resources = 1; - sp->recv_credit_max = smb_direct_receive_credit_max; - sp->send_credit_target = smb_direct_send_credit_target; - sp->max_send_size = smb_direct_max_send_size; - sp->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; - sp->max_recv_size = smb_direct_max_receive_size; - sp->max_read_write_size = smb_direct_max_read_write_size; - sp->keepalive_interval_msec = SMB_DIRECT_KEEPALIVE_SEND_INTERVAL * 1000; - sp->keepalive_timeout_msec = SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT * 1000; t = kzalloc_obj(*t, KSMBD_DEFAULT_GFP); if (!t) return NULL; - ret = smbdirect_socket_create_accepting(cm_id, &sc); - if (ret) - goto socket_create_failed; - smbdirect_socket_set_logging(sc, NULL, - smb_direct_logging_needed, - smb_direct_logging_vaprintf); - ret = smbdirect_socket_set_initial_parameters(sc, sp); - if (ret) - goto set_params_failed; - ret = smbdirect_socket_set_kernel_settings(sc, IB_POLL_WORKQUEUE, KSMBD_DEFAULT_GFP); - if (ret) - goto set_settings_failed; - ret = smbdirect_socket_set_custom_workqueue(sc, smb_direct_wq); - if (ret) - goto set_workqueue_failed; + t->socket = sc; conn = ksmbd_conn_alloc(); if (!conn) @@ -241,15 +210,9 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) KSMBD_TRANS(t)->conn = conn; KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops; - t->socket = sc; return t; conn_alloc_failed: -set_workqueue_failed: -set_settings_failed: -set_params_failed: - smbdirect_socket_release(sc); -socket_create_failed: kfree(t); return NULL; } @@ -346,48 +309,18 @@ static void smb_direct_shutdown(struct ksmbd_transport *t) smbdirect_socket_shutdown(sc); } -static int smb_direct_prepare(struct ksmbd_transport *t) +static int smb_direct_new_connection(struct smb_direct_listener *listener, + struct smbdirect_socket *client_sc) { - struct smb_direct_transport *st = SMBD_TRANS(t); - struct smbdirect_socket *sc = st->socket; - int ret; - - ksmbd_debug(RDMA, "SMB_DIRECT Waiting for connection\n"); - ret = smbdirect_connection_wait_for_connected(sc); - if (ret) { - ksmbd_debug(RDMA, "SMB_DIRECT connection failed %d => %1pe\n", - ret, ERR_PTR(ret)); - return ret; - } - - ksmbd_debug(RDMA, "SMB_DIRECT connection ready\n"); - return 0; -} - -static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id, - struct rdma_cm_event *event) -{ - struct smb_direct_listener *listener = new_cm_id->context; struct smb_direct_transport *t; - struct smbdirect_socket *sc; struct task_struct *handler; int ret; - if (!smbdirect_frwr_is_supported(&new_cm_id->device->attrs)) { - ksmbd_debug(RDMA, - "Fast Registration Work Requests is not supported. device capabilities=%llx\n", - new_cm_id->device->attrs.device_cap_flags); - return -EPROTONOSUPPORT; - } - - t = alloc_transport(new_cm_id); - if (!t) + t = alloc_transport(client_sc); + if (!t) { + smbdirect_socket_release(client_sc); return -ENOMEM; - sc = t->socket; - - ret = smbdirect_accept_connect_request(sc, &event->param.conn); - if (ret) - goto out_err; + } handler = kthread_run(ksmbd_conn_handler_loop, KSMBD_TRANS(t)->conn, "ksmbd:r%u", @@ -404,41 +337,68 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id, return ret; } -static int smb_direct_listen_handler(struct rdma_cm_id *cm_id, - struct rdma_cm_event *event) +static int smb_direct_listener_kthread_fn(void *p) { - switch (event->event) { - case RDMA_CM_EVENT_CONNECT_REQUEST: { - int ret = smb_direct_handle_connect_request(cm_id, event); + struct smb_direct_listener *listener = (struct smb_direct_listener *)p; + struct smbdirect_socket *client_sc = NULL; - if (ret) { - pr_err("Can't create transport: %d\n", ret); - return ret; - } + while (!kthread_should_stop()) { + struct proto_accept_arg arg = { .err = -EINVAL, }; + long timeo = MAX_SCHEDULE_TIMEOUT; - ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n", - cm_id); - break; - } - default: - pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n", - cm_id, rdma_event_msg(event->event), event->event); - break; + if (!listener->socket) + break; + client_sc = smbdirect_socket_accept(listener->socket, timeo, &arg); + if (!client_sc && arg.err == -EINVAL) + break; + if (!client_sc) + continue; + + ksmbd_debug(CONN, "connect success: accepted new connection\n"); + smb_direct_new_connection(listener, client_sc); } + + ksmbd_debug(CONN, "releasing socket\n"); return 0; } +static void smb_direct_listener_destroy(struct smb_direct_listener *listener) +{ + int ret; + + if (listener->socket) + smbdirect_socket_shutdown(listener->socket); + + if (listener->thread) { + ret = kthread_stop(listener->thread); + if (ret) + pr_err("failed to stop forker thread\n"); + listener->thread = NULL; + } + + if (listener->socket) { + smbdirect_socket_release(listener->socket); + listener->socket = NULL; + } + + listener->port = 0; +} + static int smb_direct_listen(struct smb_direct_listener *listener, int port) { - int ret; - struct rdma_cm_id *cm_id; - u8 node_type = RDMA_NODE_UNSPECIFIED; + struct net *net = current->nsproxy->net_ns; + struct task_struct *kthread; struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(port), }; + struct smbdirect_socket_parameters init_params = {}; + struct smbdirect_socket_parameters *sp; + struct smbdirect_socket *sc; + u64 port_flags = 0; + int ret; switch (port) { case SMB_DIRECT_PORT_IWARP: @@ -446,7 +406,7 @@ static int smb_direct_listen(struct smb_direct_listener *listener, * only allow iWarp devices * for port 5445. */ - node_type = RDMA_NODE_RNIC; + port_flags |= SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW; break; case SMB_DIRECT_PORT_INFINIBAND: /* @@ -455,47 +415,90 @@ static int smb_direct_listen(struct smb_direct_listener *listener, * * (Basically don't allow iWarp devices) */ - node_type = RDMA_NODE_IB_CA; + port_flags |= SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB; break; default: pr_err("unsupported smbdirect port=%d!\n", port); return -ENODEV; } - cm_id = rdma_create_id(&init_net, smb_direct_listen_handler, - listener, RDMA_PS_TCP, IB_QPT_RC); - if (IS_ERR(cm_id)) { - pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id)); - return PTR_ERR(cm_id); + ret = smbdirect_socket_create_kern(net, &sc); + if (ret) { + pr_err("smbdirect_socket_create_kern() failed: %d %1pe\n", + ret, ERR_PTR(ret)); + return ret; } - ret = rdma_restrict_node_type(cm_id, node_type); + /* + * Create the initial parameters + */ + sp = &init_params; + sp->flags |= port_flags; + sp->negotiate_timeout_msec = SMB_DIRECT_NEGOTIATE_TIMEOUT * 1000; + sp->initiator_depth = SMB_DIRECT_CM_INITIATOR_DEPTH; + sp->responder_resources = 1; + sp->recv_credit_max = smb_direct_receive_credit_max; + sp->send_credit_target = smb_direct_send_credit_target; + sp->max_send_size = smb_direct_max_send_size; + sp->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size; + sp->max_recv_size = smb_direct_max_receive_size; + sp->max_read_write_size = smb_direct_max_read_write_size; + sp->keepalive_interval_msec = SMB_DIRECT_KEEPALIVE_SEND_INTERVAL * 1000; + sp->keepalive_timeout_msec = SMB_DIRECT_KEEPALIVE_RECV_TIMEOUT * 1000; + + smbdirect_socket_set_logging(sc, NULL, + smb_direct_logging_needed, + smb_direct_logging_vaprintf); + ret = smbdirect_socket_set_initial_parameters(sc, sp); if (ret) { - pr_err("rdma_restrict_node_type(%u) failed %d\n", - node_type, ret); + pr_err("Failed smbdirect_socket_set_initial_parameters(): %d %1pe\n", + ret, ERR_PTR(ret)); + goto err; + } + ret = smbdirect_socket_set_kernel_settings(sc, IB_POLL_WORKQUEUE, KSMBD_DEFAULT_GFP); + if (ret) { + pr_err("Failed smbdirect_socket_set_kernel_settings(): %d %1pe\n", + ret, ERR_PTR(ret)); + goto err; + } + ret = smbdirect_socket_set_custom_workqueue(sc, smb_direct_wq); + if (ret) { + pr_err("Failed smbdirect_socket_set_custom_workqueue(): %d %1pe\n", + ret, ERR_PTR(ret)); goto err; } - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); + ret = smbdirect_socket_bind(sc, (struct sockaddr *)&sin); if (ret) { - pr_err("Can't bind: %d\n", ret); + pr_err("smbdirect_socket_bind() failed: %d %1pe\n", + ret, ERR_PTR(ret)); goto err; } - ret = rdma_listen(cm_id, 10); + ret = smbdirect_socket_listen(sc, 10); if (ret) { - pr_err("Can't listen: %d\n", ret); + pr_err("Port[%d] smbdirect_socket_listen() failed: %d %1pe\n", + port, ret, ERR_PTR(ret)); goto err; } listener->port = port; - listener->cm_id = cm_id; + listener->socket = sc; + kthread = kthread_run(smb_direct_listener_kthread_fn, + listener, + "ksmbd-smbdirect-listener-%u", port); + if (IS_ERR(kthread)) { + ret = PTR_ERR(kthread); + pr_err("Can't start ksmbd listen kthread: %d %1pe\n", + ret, ERR_PTR(ret)); + goto err; + } + + listener->thread = kthread; return 0; err: - listener->port = 0; - listener->cm_id = NULL; - rdma_destroy_id(cm_id); + smb_direct_listener_destroy(listener); return ret; } @@ -546,7 +549,7 @@ int ksmbd_rdma_init(void) int ret; smb_direct_ib_listener = smb_direct_iw_listener = (struct smb_direct_listener) { - .cm_id = NULL, + .socket = NULL, }; ret = ib_register_client(&smb_direct_ib_client); @@ -575,8 +578,8 @@ int ksmbd_rdma_init(void) goto err; } - ksmbd_debug(RDMA, "InfiniBand/RoCEv1/RoCEv2 RDMA listener. cm_id=%p\n", - smb_direct_ib_listener.cm_id); + ksmbd_debug(RDMA, "InfiniBand/RoCEv1/RoCEv2 RDMA listener. socket=%p\n", + smb_direct_ib_listener.socket); ret = smb_direct_listen(&smb_direct_iw_listener, SMB_DIRECT_PORT_IWARP); @@ -585,8 +588,8 @@ int ksmbd_rdma_init(void) goto err; } - ksmbd_debug(RDMA, "iWarp RDMA listener. cm_id=%p\n", - smb_direct_iw_listener.cm_id); + ksmbd_debug(RDMA, "iWarp RDMA listener. socket=%p\n", + smb_direct_iw_listener.socket); return 0; err: @@ -597,19 +600,13 @@ int ksmbd_rdma_init(void) void ksmbd_rdma_stop_listening(void) { - if (!smb_direct_ib_listener.cm_id && !smb_direct_iw_listener.cm_id) + if (!smb_direct_ib_listener.socket && !smb_direct_iw_listener.socket) return; ib_unregister_client(&smb_direct_ib_client); - if (smb_direct_ib_listener.cm_id) - rdma_destroy_id(smb_direct_ib_listener.cm_id); - if (smb_direct_iw_listener.cm_id) - rdma_destroy_id(smb_direct_iw_listener.cm_id); - - smb_direct_ib_listener = smb_direct_iw_listener = (struct smb_direct_listener) { - .cm_id = NULL, - }; + smb_direct_listener_destroy(&smb_direct_ib_listener); + smb_direct_listener_destroy(&smb_direct_iw_listener); } void ksmbd_rdma_destroy(void) @@ -685,7 +682,6 @@ bool ksmbd_rdma_capable_netdev(struct net_device *netdev) } static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { - .prepare = smb_direct_prepare, .disconnect = smb_direct_disconnect, .shutdown = smb_direct_shutdown, .writev = smb_direct_writev, From 98bdc5fda9cc425afe608342b372d25970071f96 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 25 Nov 2025 17:44:31 +0100 Subject: [PATCH 131/145] smb: server: remove unused ksmbd_transport_ops.prepare() This is no longer needed for smbdirect. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 4 ---- fs/smb/server/connection.h | 1 - 2 files changed, 5 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 26cfce344861..708fac40b8ea 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -376,9 +376,6 @@ int ksmbd_conn_handler_loop(void *p) mutex_init(&conn->srv_mutex); __module_get(THIS_MODULE); - if (t->ops->prepare && t->ops->prepare(t)) - goto out; - max_req = server_conf.max_inflight_req; conn->last_active = jiffies; set_freezable(); @@ -470,7 +467,6 @@ int ksmbd_conn_handler_loop(void *p) } } -out: ksmbd_conn_set_releasing(conn); /* Wait till all reference dropped to the Server object*/ ksmbd_debug(CONN, "Wait for all pending requests(%d)\n", atomic_read(&conn->r_count)); diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 1e2587036bca..ae21a1bd4c70 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -127,7 +127,6 @@ struct ksmbd_conn_ops { }; struct ksmbd_transport_ops { - int (*prepare)(struct ksmbd_transport *t); void (*disconnect)(struct ksmbd_transport *t); void (*shutdown)(struct ksmbd_transport *t); int (*read)(struct ksmbd_transport *t, char *buf, From 50bdab9ae45e6345eaa94adbaefaf1ce5a7e90a1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 18 Jul 2025 19:22:47 +0200 Subject: [PATCH 132/145] smb: server: make use of smbdirect.ko This means we no longer inline the common smbdirect .c files and use the exported functions from the module instead. Note the connection specific logging is still redirect to ksmbd.ko functions via smbdirect_socket_set_logging(). We still don't use real socket layer, but we're very close... Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/Kconfig | 5 +++-- fs/smb/server/transport_rdma.c | 10 ---------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/fs/smb/server/Kconfig b/fs/smb/server/Kconfig index 96aa8e2a8770..37387410e5bb 100644 --- a/fs/smb/server/Kconfig +++ b/fs/smb/server/Kconfig @@ -47,8 +47,9 @@ if SMB_SERVER config SMB_SERVER_SMBDIRECT bool "Support for SMB Direct protocol" - depends on SMB_SERVER=m && INFINIBAND && INFINIBAND_ADDR_TRANS || SMB_SERVER=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y - select SG_POOL + depends on SMB_SERVER && INFINIBAND && INFINIBAND_ADDR_TRANS + depends on SMB_SERVER=m || INFINIBAND=y + select SMB_COMMON_SMBDIRECT default n help diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 7171bde9d078..5f84d133feff 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -9,8 +9,6 @@ #define SUBMOD_NAME "smb_direct" -#define SMBDIRECT_USE_INLINE_C_FILES 1 - #include #include #include @@ -690,11 +688,3 @@ static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .rdma_write = smb_direct_rdma_write, .free_transport = smb_direct_free_transport, }; - -/* - * This is a temporary solution until all code - * is moved to smbdirect_all_c_files.c and we - * have an smbdirect.ko that exports the required - * functions. - */ -#include "../common/smbdirect/smbdirect_all_c_files.c" From 81a7a3a0faea7e8e64f83aa58e807a8ad329c97d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 8 Dec 2025 20:56:45 +0100 Subject: [PATCH 133/145] smb: smbdirect: introduce smbdirect_netdev_rdma_capable_mode_type() This is basically a copy of ksmbd_rdma_capable_netdev() in the server, but this also prints a message when a device is renamed. The differences are: - It uses rdma_for_each_port() instead of implementing the same logic again. - It returns RDMA_NODE_{UNSPECIFIED,IB_CA,RNIC} values instead of bool Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/Makefile | 1 + fs/smb/common/smbdirect/smbdirect_devices.c | 277 +++++++++++++++++++ fs/smb/common/smbdirect/smbdirect_internal.h | 18 ++ fs/smb/common/smbdirect/smbdirect_main.c | 14 +- fs/smb/common/smbdirect/smbdirect_public.h | 3 + 5 files changed, 311 insertions(+), 2 deletions(-) create mode 100644 fs/smb/common/smbdirect/smbdirect_devices.c diff --git a/fs/smb/common/smbdirect/Makefile b/fs/smb/common/smbdirect/Makefile index b41271facfc3..423f533e1002 100644 --- a/fs/smb/common/smbdirect/Makefile +++ b/fs/smb/common/smbdirect/Makefile @@ -14,4 +14,5 @@ smbdirect-y := \ smbdirect_connect.o \ smbdirect_listen.o \ smbdirect_accept.o \ + smbdirect_devices.o \ smbdirect_main.o diff --git a/fs/smb/common/smbdirect/smbdirect_devices.c b/fs/smb/common/smbdirect/smbdirect_devices.c new file mode 100644 index 000000000000..aaab99e9c045 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_devices.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2017, Microsoft Corporation. + * Copyright (C) 2018, LG Electronics. + * Copyright (c) 2025 Stefan Metzmacher + */ + +#include "smbdirect_internal.h" + +static u8 smbdirect_ib_device_rdma_capable_node_type(struct ib_device *ib_dev) +{ + if (!smbdirect_frwr_is_supported(&ib_dev->attrs)) + return RDMA_NODE_UNSPECIFIED; + + switch (ib_dev->node_type) { + case RDMA_NODE_IB_CA: /* Infiniband, RoCE v1 and v2 */ + case RDMA_NODE_RNIC: /* iWarp */ + return ib_dev->node_type; + } + + return RDMA_NODE_UNSPECIFIED; +} + +static int smbdirect_ib_client_add(struct ib_device *ib_dev) +{ + u8 node_type = smbdirect_ib_device_rdma_capable_node_type(ib_dev); + struct smbdirect_device *sdev; + const char *node_str; + const char *action; + u32 pidx; + + switch (node_type) { + case RDMA_NODE_IB_CA: + node_str = "IB_CA"; + action = "added"; + break; + case RDMA_NODE_RNIC: + node_str = "RNIC"; + action = "added"; + break; + case RDMA_NODE_UNSPECIFIED: + node_str = "UNSPECIFIED"; + action = "ignored"; + break; + default: + node_str = "UNKNOWN"; + action = "ignored"; + node_type = RDMA_NODE_UNSPECIFIED; + break; + } + + pr_info("ib_dev[%.*s]: %s: %s %s=%u %s=0x%llx %s=0x%llx %s=0x%llx\n", + IB_DEVICE_NAME_MAX, + ib_dev->name, + action, + node_str, + "max_fast_reg_page_list_len", + ib_dev->attrs.max_fast_reg_page_list_len, + "device_cap_flags", + ib_dev->attrs.device_cap_flags, + "kernel_cap_flags", + ib_dev->attrs.kernel_cap_flags, + "page_size_cap", + ib_dev->attrs.page_size_cap); + + if (node_type == RDMA_NODE_UNSPECIFIED) + return 0; + + pr_info("ib_dev[%.*s]: %s=%u %s=%u %s=%u %s=%u %s=%u %s=%u %s=%u %s=%u %s=%u\n", + IB_DEVICE_NAME_MAX, + ib_dev->name, + "num_ports", + rdma_end_port(ib_dev), + "max_qp_rd_atom", + ib_dev->attrs.max_qp_rd_atom, + "max_qp_init_rd_atom", + ib_dev->attrs.max_qp_init_rd_atom, + "max_sgl_rd", + ib_dev->attrs.max_sgl_rd, + "max_sge_rd", + ib_dev->attrs.max_sge_rd, + "max_cqe", + ib_dev->attrs.max_cqe, + "max_qp_wr", + ib_dev->attrs.max_qp_wr, + "max_send_sge", + ib_dev->attrs.max_send_sge, + "max_recv_sge", + ib_dev->attrs.max_recv_sge); + + rdma_for_each_port(ib_dev, pidx) { + const struct ib_port_immutable *ib_pi = + ib_port_immutable_read(ib_dev, pidx); + u32 core_cap_flags = ib_pi ? ib_pi->core_cap_flags : 0; + + pr_info("ib_dev[%.*s]PORT[%u]: %s=%u %s=%u %s=%u %s=%u %s=%u %s=0x%x\n", + IB_DEVICE_NAME_MAX, + ib_dev->name, + pidx, + "iwarp", + rdma_protocol_iwarp(ib_dev, pidx), + "ib", + rdma_protocol_ib(ib_dev, pidx), + "roce", + rdma_protocol_roce(ib_dev, pidx), + "v1", + rdma_protocol_roce_eth_encap(ib_dev, pidx), + "v2", + rdma_protocol_roce_udp_encap(ib_dev, pidx), + "core_cap_flags", + core_cap_flags); + } + + sdev = kzalloc_obj(*sdev); + if (!sdev) + return -ENOMEM; + sdev->ib_dev = ib_dev; + snprintf(sdev->ib_name, ARRAY_SIZE(sdev->ib_name), "%.*s", + IB_DEVICE_NAME_MAX, ib_dev->name); + + write_lock(&smbdirect_globals.devices.lock); + list_add(&sdev->list, &smbdirect_globals.devices.list); + write_unlock(&smbdirect_globals.devices.lock); + + return 0; +} + +static void smbdirect_ib_client_remove(struct ib_device *ib_dev, void *client_data) +{ + struct smbdirect_device *sdev, *tmp; + + write_lock(&smbdirect_globals.devices.lock); + list_for_each_entry_safe(sdev, tmp, &smbdirect_globals.devices.list, list) { + if (sdev->ib_dev == ib_dev) { + list_del(&sdev->list); + pr_info("ib_dev[%.*s] removed\n", + IB_DEVICE_NAME_MAX, sdev->ib_name); + kfree(sdev); + break; + } + } + write_unlock(&smbdirect_globals.devices.lock); +} + +static void smbdirect_ib_client_rename(struct ib_device *ib_dev, void *client_data) +{ + struct smbdirect_device *sdev; + + write_lock(&smbdirect_globals.devices.lock); + list_for_each_entry(sdev, &smbdirect_globals.devices.list, list) { + if (sdev->ib_dev == ib_dev) { + pr_info("ib_dev[%.*s] renamed to [%.*s]\n", + IB_DEVICE_NAME_MAX, sdev->ib_name, + IB_DEVICE_NAME_MAX, ib_dev->name); + snprintf(sdev->ib_name, ARRAY_SIZE(sdev->ib_name), "%.*s", + IB_DEVICE_NAME_MAX, ib_dev->name); + break; + } + } + write_unlock(&smbdirect_globals.devices.lock); +} + +static struct ib_client smbdirect_ib_client = { + .name = "smbdirect_ib_client", + .add = smbdirect_ib_client_add, + .remove = smbdirect_ib_client_remove, + .rename = smbdirect_ib_client_rename, +}; + +static u8 smbdirect_netdev_find_rdma_capable_node_type(struct net_device *netdev) +{ + struct smbdirect_device *sdev; + u8 node_type = RDMA_NODE_UNSPECIFIED; + + read_lock(&smbdirect_globals.devices.lock); + list_for_each_entry(sdev, &smbdirect_globals.devices.list, list) { + u32 pi; + + rdma_for_each_port(sdev->ib_dev, pi) { + struct net_device *ndev; + + ndev = ib_device_get_netdev(sdev->ib_dev, pi); + if (!ndev) + continue; + + if (ndev == netdev) { + dev_put(ndev); + node_type = sdev->ib_dev->node_type; + goto out; + } + dev_put(ndev); + } + } +out: + read_unlock(&smbdirect_globals.devices.lock); + + if (node_type == RDMA_NODE_UNSPECIFIED) { + struct ib_device *ibdev; + + ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); + if (ibdev) { + node_type = smbdirect_ib_device_rdma_capable_node_type(ibdev); + ib_device_put(ibdev); + } + } + + return node_type; +} + +/* + * Returns RDMA_NODE_UNSPECIFIED when the netdev has + * no support for smbdirect capable rdma. + * + * Otherwise RDMA_NODE_RNIC is returned for iwarp devices + * and RDMA_NODE_IB_CA or Infiniband and RoCE (v1 and v2) + */ +u8 smbdirect_netdev_rdma_capable_node_type(struct net_device *netdev) +{ + struct net_device *lower_dev; + struct list_head *iter; + u8 node_type = RDMA_NODE_UNSPECIFIED; + + node_type = smbdirect_netdev_find_rdma_capable_node_type(netdev); + if (node_type != RDMA_NODE_UNSPECIFIED) + return node_type; + + /* check if netdev is bridge or VLAN */ + if (netif_is_bridge_master(netdev) || netdev->priv_flags & IFF_802_1Q_VLAN) + netdev_for_each_lower_dev(netdev, lower_dev, iter) { + node_type = smbdirect_netdev_find_rdma_capable_node_type(lower_dev); + if (node_type != RDMA_NODE_UNSPECIFIED) + return node_type; + } + + /* check if netdev is IPoIB safely without layer violation */ + if (netdev->type == ARPHRD_INFINIBAND) + return RDMA_NODE_IB_CA; + + return RDMA_NODE_UNSPECIFIED; +} +__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_netdev_rdma_capable_node_type); + +__init int smbdirect_devices_init(void) +{ + int ret; + + rwlock_init(&smbdirect_globals.devices.lock); + INIT_LIST_HEAD(&smbdirect_globals.devices.list); + + ret = ib_register_client(&smbdirect_ib_client); + if (ret) { + pr_crit("failed to ib_register_client: %d %1pe\n", + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); + return ret; + } + + return 0; +} + +__exit void smbdirect_devices_exit(void) +{ + struct smbdirect_device *sdev, *tmp; + + /* + * On exist we just cleanup so that + * smbdirect_ib_client_remove() won't + * print removals of devices. + */ + write_lock(&smbdirect_globals.devices.lock); + list_for_each_entry_safe(sdev, tmp, &smbdirect_globals.devices.list, list) { + list_del(&sdev->list); + kfree(sdev); + } + write_unlock(&smbdirect_globals.devices.lock); + + ib_unregister_client(&smbdirect_ib_client); +} diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h index 901540d0cbbf..03a01fb1ab1c 100644 --- a/fs/smb/common/smbdirect/smbdirect_internal.h +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -18,12 +18,27 @@ struct smbdirect_module_state { struct mutex mutex; + + struct { + rwlock_t lock; + struct list_head list; + } devices; }; extern struct smbdirect_module_state smbdirect_globals; #include "smbdirect_socket.h" +struct smbdirect_device { + struct list_head list; + struct ib_device *ib_dev; + /* + * copy of ib_dev->name, + * in order to print renames + */ + char ib_name[IB_DEVICE_NAME_MAX]; +}; + #ifdef SMBDIRECT_USE_INLINE_C_FILES /* this is temporary while this file is included in others */ #define __SMBDIRECT_PRIVATE__ __maybe_unused static @@ -143,4 +158,7 @@ void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc); void smbdirect_accept_negotiate_finish(struct smbdirect_socket *sc, u32 ntstatus); +__init int smbdirect_devices_init(void); +__exit void smbdirect_devices_exit(void); + #endif /* __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_main.c b/fs/smb/common/smbdirect/smbdirect_main.c index c61ae8d7f4f0..948964d3fa35 100644 --- a/fs/smb/common/smbdirect/smbdirect_main.c +++ b/fs/smb/common/smbdirect/smbdirect_main.c @@ -12,14 +12,24 @@ struct smbdirect_module_state smbdirect_globals = { static __init int smbdirect_module_init(void) { + int ret; + pr_notice("subsystem loading...\n"); mutex_lock(&smbdirect_globals.mutex); - /* TODO... */ + ret = smbdirect_devices_init(); + if (ret) + goto devices_init_failed; mutex_unlock(&smbdirect_globals.mutex); pr_notice("subsystem loaded\n"); return 0; + +devices_init_failed: + mutex_unlock(&smbdirect_globals.mutex); + pr_crit("failed to loaded: %d (%1pe)\n", + ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); + return ret; } static __exit void smbdirect_module_exit(void) @@ -27,7 +37,7 @@ static __exit void smbdirect_module_exit(void) pr_notice("subsystem unloading...\n"); mutex_lock(&smbdirect_globals.mutex); - /* TODO... */ + smbdirect_devices_exit(); mutex_unlock(&smbdirect_globals.mutex); pr_notice("subsystem unloaded\n"); diff --git a/fs/smb/common/smbdirect/smbdirect_public.h b/fs/smb/common/smbdirect/smbdirect_public.h index c0144c5a808c..231ad7a9c6af 100644 --- a/fs/smb/common/smbdirect/smbdirect_public.h +++ b/fs/smb/common/smbdirect/smbdirect_public.h @@ -24,6 +24,9 @@ struct smbdirect_mr_io; #include +__SMBDIRECT_PUBLIC__ +u8 smbdirect_netdev_rdma_capable_node_type(struct net_device *netdev); + __SMBDIRECT_PUBLIC__ bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs); From 33b2894e8df76f7faf7253d8784515415511968f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 9 Dec 2025 13:39:52 +0100 Subject: [PATCH 134/145] smb: server: make use of smbdirect_netdev_rdma_capable_mode_type() This removes is basically the same logic. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 122 +-------------------------------- 1 file changed, 2 insertions(+), 120 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 5f84d133feff..951ac9fec687 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -77,14 +77,6 @@ static int smb_direct_max_receive_size = 1364; static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE; -static LIST_HEAD(smb_direct_device_list); -static DEFINE_RWLOCK(smb_direct_device_lock); - -struct smb_direct_device { - struct ib_device *ib_dev; - struct list_head list; -}; - static struct smb_direct_listener { int port; @@ -500,48 +492,6 @@ static int smb_direct_listen(struct smb_direct_listener *listener, return ret; } -static int smb_direct_ib_client_add(struct ib_device *ib_dev) -{ - struct smb_direct_device *smb_dev; - - if (!smbdirect_frwr_is_supported(&ib_dev->attrs)) - return 0; - - smb_dev = kzalloc_obj(*smb_dev, KSMBD_DEFAULT_GFP); - if (!smb_dev) - return -ENOMEM; - smb_dev->ib_dev = ib_dev; - - write_lock(&smb_direct_device_lock); - list_add(&smb_dev->list, &smb_direct_device_list); - write_unlock(&smb_direct_device_lock); - - ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name); - return 0; -} - -static void smb_direct_ib_client_remove(struct ib_device *ib_dev, - void *client_data) -{ - struct smb_direct_device *smb_dev, *tmp; - - write_lock(&smb_direct_device_lock); - list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) { - if (smb_dev->ib_dev == ib_dev) { - list_del(&smb_dev->list); - kfree(smb_dev); - break; - } - } - write_unlock(&smb_direct_device_lock); -} - -static struct ib_client smb_direct_ib_client = { - .name = "ksmbd_smb_direct_ib", - .add = smb_direct_ib_client_add, - .remove = smb_direct_ib_client_remove, -}; - int ksmbd_rdma_init(void) { int ret; @@ -550,12 +500,6 @@ int ksmbd_rdma_init(void) .socket = NULL, }; - ret = ib_register_client(&smb_direct_ib_client); - if (ret) { - pr_err("failed to ib_register_client\n"); - return ret; - } - /* When a client is running out of send credits, the credits are * granted by the server's sending a packet using this queue. * This avoids the situation that a clients cannot send packets @@ -598,11 +542,6 @@ int ksmbd_rdma_init(void) void ksmbd_rdma_stop_listening(void) { - if (!smb_direct_ib_listener.socket && !smb_direct_iw_listener.socket) - return; - - ib_unregister_client(&smb_direct_ib_client); - smb_direct_listener_destroy(&smb_direct_ib_listener); smb_direct_listener_destroy(&smb_direct_iw_listener); } @@ -615,68 +554,11 @@ void ksmbd_rdma_destroy(void) } } -static bool ksmbd_find_rdma_capable_netdev(struct net_device *netdev) -{ - struct smb_direct_device *smb_dev; - int i; - bool rdma_capable = false; - - read_lock(&smb_direct_device_lock); - list_for_each_entry(smb_dev, &smb_direct_device_list, list) { - for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { - struct net_device *ndev; - - ndev = ib_device_get_netdev(smb_dev->ib_dev, i + 1); - if (!ndev) - continue; - - if (ndev == netdev) { - dev_put(ndev); - rdma_capable = true; - goto out; - } - dev_put(ndev); - } - } -out: - read_unlock(&smb_direct_device_lock); - - if (rdma_capable == false) { - struct ib_device *ibdev; - - ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN); - if (ibdev) { - rdma_capable = smbdirect_frwr_is_supported(&ibdev->attrs); - ib_device_put(ibdev); - } - } - - ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n", - netdev->name, str_true_false(rdma_capable)); - - return rdma_capable; -} - bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { - struct net_device *lower_dev; - struct list_head *iter; + u8 node_type = smbdirect_netdev_rdma_capable_node_type(netdev); - if (ksmbd_find_rdma_capable_netdev(netdev)) - return true; - - /* check if netdev is bridge or VLAN */ - if (netif_is_bridge_master(netdev) || - netdev->priv_flags & IFF_802_1Q_VLAN) - netdev_for_each_lower_dev(netdev, lower_dev, iter) - if (ksmbd_find_rdma_capable_netdev(lower_dev)) - return true; - - /* check if netdev is IPoIB safely without layer violation */ - if (netdev->type == ARPHRD_INFINIBAND) - return true; - - return false; + return node_type != RDMA_NODE_UNSPECIFIED; } static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { From a40e6f0166e6d5fef4dd7d3b71c333319a0964ab Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 24 Nov 2025 14:49:55 +0100 Subject: [PATCH 135/145] smb: smbdirect: wrap rdma_disconnect() in rdma_[un]lock_handler() This might not be needed, but it controls the order of ib_drain_qp() and rdma_disconnect(). Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index 63cdfccedd55..f69c290f36ca 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -474,7 +474,20 @@ static void smbdirect_socket_cleanup_work(struct work_struct *work) case SMBDIRECT_SOCKET_CONNECTED: case SMBDIRECT_SOCKET_ERROR: sc->status = SMBDIRECT_SOCKET_DISCONNECTING; + /* + * Make sure we hold the callback lock + * im order to coordinate with the + * rdma_event handlers, typically + * smbdirect_connection_rdma_event_handler(), + * and smbdirect_socket_destroy(). + * + * So that the order of ib_drain_qp() + * and rdma_disconnect() is controlled + * by the mutex. + */ + rdma_lock_handler(sc->rdma.cm_id); rdma_disconnect(sc->rdma.cm_id); + rdma_unlock_handler(sc->rdma.cm_id); break; case SMBDIRECT_SOCKET_CREATED: From 00ac2a4fe04af50e65bbac010379d66d87547c0f Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 4 Nov 2025 16:29:38 +0100 Subject: [PATCH 136/145] smb: smbdirect: remove unused smbdirect_connection_mr_io_recovery_work() This would actually never be used as we only move to SMBDIRECT_MR_ERROR when we directly call smbdirect_socket_schedule_cleanup(). Doing an ib_dereg_mr/ib_alloc_mr dance on working connection is not needed and it's also pointless on a broken connection as we don't reuse any ib_pd. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_mr.c | 95 +++----------------------- 1 file changed, 10 insertions(+), 85 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect_mr.c b/fs/smb/common/smbdirect/smbdirect_mr.c index dc85cced8dc2..d2e70941f772 100644 --- a/fs/smb/common/smbdirect/smbdirect_mr.c +++ b/fs/smb/common/smbdirect/smbdirect_mr.c @@ -6,8 +6,6 @@ #include "smbdirect_internal.h" -static void smbdirect_connection_mr_io_recovery_work(struct work_struct *work); - /* * Allocate MRs used for RDMA read/write * The number of MRs will not exceed hardware capability in responder_resources @@ -66,8 +64,6 @@ int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc) atomic_inc(&sc->mr_io.ready.count); } - INIT_WORK(&sc->mr_io.recovery_work, smbdirect_connection_mr_io_recovery_work); - return 0; kcalloc_sgl_failed: @@ -127,8 +123,6 @@ void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc) LIST_HEAD(all_list); unsigned long flags; - disable_work_sync(&sc->mr_io.recovery_work); - spin_lock_irqsave(&sc->mr_io.all.lock, flags); list_splice_tail_init(&sc->mr_io.all.list, &all_list); spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); @@ -164,11 +158,8 @@ void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc) /* * Get a MR from mr_list. This function waits until there is at least one MR - * available in the list. It may access the list while the - * smbdirect_connection_mr_io_recovery_work is recovering the MR list. This - * doesn't need a lock as they never modify the same places. However, there may - * be several CPUs issuing I/O trying to get MR at the same time, mr_list_lock - * is used to protect this situation. + * available in the list. There may be several CPUs issuing I/O trying to get MR + * at the same time, mr_list_lock is used to protect this situation. */ static struct smbdirect_mr_io * smbdirect_connection_get_mr_io(struct smbdirect_socket *sc) @@ -246,65 +237,6 @@ static void smbdirect_connection_mr_io_local_inv_done(struct ib_cq *cq, struct i complete(&mr->invalidate_done); } -/* - * The work queue function that recovers MRs - * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used - * again. Both calls are slow, so finish them in a workqueue. This will not - * block I/O path. - * There is one workqueue that recovers MRs, there is no need to lock as the - * I/O requests calling smbd_register_mr will never update the links in the - * mr_list. - */ -static void smbdirect_connection_mr_io_recovery_work(struct work_struct *work) -{ - struct smbdirect_socket *sc = - container_of(work, struct smbdirect_socket, mr_io.recovery_work); - struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_mr_io *mr; - int ret; - - list_for_each_entry(mr, &sc->mr_io.all.list, list) { - if (mr->state != SMBDIRECT_MR_ERROR) - /* This MR is being used, don't recover it */ - continue; - - /* recover this MR entry */ - ret = ib_dereg_mr(mr->mr); - if (ret) { - smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, - "ib_dereg_mr failed ret=%u (%1pe)\n", - ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); - smbdirect_socket_schedule_cleanup(sc, ret); - continue; - } - - mr->mr = ib_alloc_mr(sc->ib.pd, - sc->mr_io.type, - sp->max_frmr_depth); - if (IS_ERR(mr->mr)) { - ret = PTR_ERR(mr->mr); - smbdirect_log_rdma_mr(sc, SMBDIRECT_LOG_ERR, - "ib_alloc_mr failed ret=%d (%1pe) type=0x%x depth=%u\n", - ret, SMBDIRECT_DEBUG_ERR_PTR(ret), - sc->mr_io.type, sp->max_frmr_depth); - smbdirect_socket_schedule_cleanup(sc, ret); - continue; - } - - mr->state = SMBDIRECT_MR_READY; - - /* smbdirect_mr->state is updated by this function - * and is read and updated by I/O issuing CPUs trying - * to get a MR, the call to atomic_inc_return - * implicates a memory barrier and guarantees this - * value is updated before waking up any calls to - * get_mr() from the I/O issuing CPUs - */ - if (atomic_inc_return(&sc->mr_io.ready.count) == 1) - wake_up(&sc->mr_io.ready.wait_queue); - } -} - /* * Transcribe the pages from an iterator into an MR scatterlist. */ @@ -421,15 +353,13 @@ smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, "ib_post_send failed ret=%d (%1pe) reg_wr->key=0x%x\n", ret, SMBDIRECT_DEBUG_ERR_PTR(ret), reg_wr->key); - /* If all failed, attempt to recover this MR by setting it SMBDIRECT_MR_ERROR*/ map_mr_error: ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); dma_map_error: mr->sgt.nents = 0; mr->state = SMBDIRECT_MR_ERROR; - if (atomic_dec_and_test(&sc->mr_io.used.count)) - wake_up(&sc->mr_io.cleanup.wait_queue); + atomic_dec(&sc->mr_io.used.count); smbdirect_socket_schedule_cleanup(sc, ret); @@ -529,20 +459,15 @@ void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr) mr->sgt.nents = 0; } - if (mr->state == SMBDIRECT_MR_INVALIDATED) { - mr->state = SMBDIRECT_MR_READY; - if (atomic_inc_return(&sc->mr_io.ready.count) == 1) - wake_up(&sc->mr_io.ready.wait_queue); - } else - /* - * Schedule the work to do MR recovery for future I/Os MR - * recovery is slow and don't want it to block current I/O - */ - queue_work(sc->workqueue, &sc->mr_io.recovery_work); + WARN_ONCE(mr->state != SMBDIRECT_MR_INVALIDATED, + "mr->state[%u] != SMBDIRECT_MR_INVALIDATED[%u]\n", + mr->state, SMBDIRECT_MR_INVALIDATED); + mr->state = SMBDIRECT_MR_READY; + if (atomic_inc_return(&sc->mr_io.ready.count) == 1) + wake_up(&sc->mr_io.ready.wait_queue); done: - if (atomic_dec_and_test(&sc->mr_io.used.count)) - wake_up(&sc->mr_io.cleanup.wait_queue); + atomic_dec(&sc->mr_io.used.count); put_kref: /* From e4ce1fca0468eb4b6fc2f02667f599bb76df8848 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 4 Nov 2025 17:02:29 +0100 Subject: [PATCH 137/145] smb: smbdirect: prepare use of dedicated workqueues for different steps This is a preparation in order to have global workqueues in the smbdirect module instead of having the caller to provide one. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_accept.c | 8 ++++---- fs/smb/common/smbdirect/smbdirect_connect.c | 8 ++++---- fs/smb/common/smbdirect/smbdirect_connection.c | 18 +++++++++--------- fs/smb/common/smbdirect/smbdirect_socket.c | 9 +++++++-- fs/smb/common/smbdirect/smbdirect_socket.h | 11 +++++++++-- 5 files changed, 33 insertions(+), 21 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect_accept.c b/fs/smb/common/smbdirect/smbdirect_accept.c index 4fc5983e99b8..862df982c2ce 100644 --- a/fs/smb/common/smbdirect/smbdirect_accept.c +++ b/fs/smb/common/smbdirect/smbdirect_accept.c @@ -139,7 +139,7 @@ int smbdirect_accept_connect_request(struct smbdirect_socket *sc, */ INIT_DELAYED_WORK(&sc->idle.timer_work, smbdirect_connection_idle_timer_work); sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, msecs_to_jiffies(sp->negotiate_timeout_msec)); return 0; @@ -272,7 +272,7 @@ static void smbdirect_accept_negotiate_recv_done(struct ib_cq *cq, struct ib_wc if (!sc->first_error) { INIT_WORK(&sc->connect.work, smbdirect_accept_negotiate_recv_work); if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_NEEDED) - queue_work(sc->workqueue, &sc->connect.work); + queue_work(sc->workqueues.accept, &sc->connect.work); } spin_unlock_irqrestore(&sc->connect.lock, flags); @@ -317,7 +317,7 @@ static void smbdirect_accept_negotiate_recv_work(struct work_struct *work) * order to trigger our next keepalive message. */ sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, msecs_to_jiffies(sp->keepalive_interval_msec)); /* @@ -751,7 +751,7 @@ static int smbdirect_accept_rdma_event_handler(struct rdma_cm_id *id, sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; spin_lock_irqsave(&sc->connect.lock, flags); if (!sc->first_error) - queue_work(sc->workqueue, &sc->connect.work); + queue_work(sc->workqueues.accept, &sc->connect.work); spin_unlock_irqrestore(&sc->connect.lock, flags); /* diff --git a/fs/smb/common/smbdirect/smbdirect_connect.c b/fs/smb/common/smbdirect/smbdirect_connect.c index f411f6ee66b5..282dc46c943c 100644 --- a/fs/smb/common/smbdirect/smbdirect_connect.c +++ b/fs/smb/common/smbdirect/smbdirect_connect.c @@ -234,7 +234,7 @@ static int smbdirect_connect_rdma_connect(struct smbdirect_socket *sc) */ INIT_DELAYED_WORK(&sc->idle.timer_work, smbdirect_connection_idle_timer_work); sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, msecs_to_jiffies(sp->rdma_connect_timeout_msec)); return 0; @@ -511,7 +511,7 @@ static int smbdirect_connect_negotiate_start(struct smbdirect_socket *sc) * so that the timer will cause a disconnect. */ sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, msecs_to_jiffies(sp->negotiate_timeout_msec)); return 0; @@ -632,7 +632,7 @@ static void smbdirect_connect_negotiate_recv_done(struct ib_cq *cq, struct ib_wc if (!sc->first_error) { INIT_WORK(&sc->connect.work, smbdirect_connect_negotiate_recv_work); if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) - queue_work(sc->workqueue, &sc->connect.work); + queue_work(sc->workqueues.connect, &sc->connect.work); } spin_unlock_irqrestore(&sc->connect.lock, flags); @@ -680,7 +680,7 @@ static void smbdirect_connect_negotiate_recv_work(struct work_struct *work) * order to trigger our next keepalive message. */ sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, msecs_to_jiffies(sp->keepalive_interval_msec)); /* diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index 4afeb4ddadd0..fb947a00e4b6 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -614,7 +614,7 @@ void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg) sc->statistics.put_receive_buffer++; spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); + queue_work(sc->workqueues.refill, &sc->recv_io.posted.refill_work); } __SMBDIRECT_PRIVATE__ @@ -822,11 +822,11 @@ void smbdirect_connection_idle_timer_work(struct work_struct *work) * in order to wait for a response */ sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, msecs_to_jiffies(sp->keepalive_timeout_msec)); smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, "schedule send of empty idle message\n"); - queue_work(sc->workqueue, &sc->idle.immediate_work); + queue_work(sc->workqueues.immediate, &sc->idle.immediate_work); } __SMBDIRECT_PRIVATE__ @@ -878,7 +878,7 @@ static bool smbdirect_connection_request_keep_alive(struct smbdirect_socket *sc) * Now use the keepalive timeout (instead of keepalive interval) * in order to wait for a response */ - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, msecs_to_jiffies(sp->keepalive_timeout_msec)); return true; } @@ -1167,7 +1167,7 @@ int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, * get some new recv credits we can grant to * the peer. */ - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); + queue_work(sc->workqueues.refill, &sc->recv_io.posted.refill_work); /* * wait until either the refill work or the peer @@ -1568,7 +1568,7 @@ void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc) * order to trigger our next keepalive message. */ sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; - mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, msecs_to_jiffies(sp->keepalive_interval_msec)); ib_dma_sync_single_for_cpu(sc->ib.dev, @@ -1673,7 +1673,7 @@ void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc) if (flags & SMBDIRECT_FLAG_RESPONSE_REQUESTED) { smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, "schedule send of immediate response\n"); - queue_work(sc->workqueue, &sc->idle.immediate_work); + queue_work(sc->workqueues.immediate, &sc->idle.immediate_work); } /* @@ -1683,7 +1683,7 @@ void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc) if (data_length) { if (current_recv_credits <= (sc->recv_io.credits.target / 4) || sc->recv_io.credits.target > old_recv_credit_target) - queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); + queue_work(sc->workqueues.refill, &sc->recv_io.posted.refill_work); smbdirect_connection_reassembly_append_recv_io(sc, recv_io, data_length); wake_up(&sc->recv_io.reassembly.wait_queue); @@ -1814,7 +1814,7 @@ static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) if (posted > 0) { smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, "schedule send of an empty message\n"); - queue_work(sc->workqueue, &sc->idle.immediate_work); + queue_work(sc->workqueues.immediate, &sc->idle.immediate_work); } } diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index f69c290f36ca..fedde477994f 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -242,7 +242,12 @@ int smbdirect_socket_set_custom_workqueue(struct smbdirect_socket *sc, /* * Remember the callers workqueue */ - sc->workqueue = workqueue; + sc->workqueues.accept = workqueue; + sc->workqueues.connect = workqueue; + sc->workqueues.idle = workqueue; + sc->workqueues.refill = workqueue; + sc->workqueues.immediate = workqueue; + sc->workqueues.cleanup = workqueue; return 0; } @@ -419,7 +424,7 @@ void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, */ smbdirect_socket_wake_up_all(sc); - queue_work(sc->workqueue, &sc->disconnect_work); + queue_work(sc->workqueues.cleanup, &sc->disconnect_work); } static void smbdirect_socket_cleanup_work(struct work_struct *work) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 127197c3e164..5a3b75c40728 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -109,12 +109,19 @@ struct smbdirect_socket { int first_error; /* - * This points to the workqueue to + * This points to the workqueues to * be used for this socket. * It can be per socket (on the client) * or point to a global workqueue (on the server) */ - struct workqueue_struct *workqueue; + struct { + struct workqueue_struct *accept; + struct workqueue_struct *connect; + struct workqueue_struct *idle; + struct workqueue_struct *refill; + struct workqueue_struct *immediate; + struct workqueue_struct *cleanup; + } workqueues; struct work_struct disconnect_work; From 1adde16a9e28446b5a73a8f0e05f6f977e520528 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 4 Nov 2025 17:25:48 +0100 Subject: [PATCH 138/145] smb: smbdirect: introduce global workqueues These will be used in future and callers should no longer use smbdirect_socket_set_custom_workqueue(). Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_internal.h | 9 +++ fs/smb/common/smbdirect/smbdirect_main.c | 73 +++++++++++++++++++- fs/smb/common/smbdirect/smbdirect_socket.h | 9 ++- 3 files changed, 88 insertions(+), 3 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h index 03a01fb1ab1c..b5fe07b7c54d 100644 --- a/fs/smb/common/smbdirect/smbdirect_internal.h +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -19,6 +19,15 @@ struct smbdirect_module_state { struct mutex mutex; + struct { + struct workqueue_struct *accept; + struct workqueue_struct *connect; + struct workqueue_struct *idle; + struct workqueue_struct *refill; + struct workqueue_struct *immediate; + struct workqueue_struct *cleanup; + } workqueues; + struct { rwlock_t lock; struct list_head list; diff --git a/fs/smb/common/smbdirect/smbdirect_main.c b/fs/smb/common/smbdirect/smbdirect_main.c index 948964d3fa35..fe6e8d93c34c 100644 --- a/fs/smb/common/smbdirect/smbdirect_main.c +++ b/fs/smb/common/smbdirect/smbdirect_main.c @@ -12,11 +12,63 @@ struct smbdirect_module_state smbdirect_globals = { static __init int smbdirect_module_init(void) { - int ret; + int ret = -ENOMEM; pr_notice("subsystem loading...\n"); mutex_lock(&smbdirect_globals.mutex); + smbdirect_globals.workqueues.accept = alloc_workqueue("smbdirect-accept", + WQ_SYSFS | + WQ_PERCPU | + WQ_POWER_EFFICIENT, + 0); + if (smbdirect_globals.workqueues.accept == NULL) + goto alloc_accept_wq_failed; + + smbdirect_globals.workqueues.connect = alloc_workqueue("smbdirect-connect", + WQ_SYSFS | + WQ_PERCPU | + WQ_POWER_EFFICIENT, + 0); + if (smbdirect_globals.workqueues.connect == NULL) + goto alloc_connect_wq_failed; + + smbdirect_globals.workqueues.idle = alloc_workqueue("smbdirect-idle", + WQ_SYSFS | + WQ_PERCPU | + WQ_POWER_EFFICIENT, + 0); + if (smbdirect_globals.workqueues.idle == NULL) + goto alloc_idle_wq_failed; + + smbdirect_globals.workqueues.refill = alloc_workqueue("smbdirect-refill", + WQ_HIGHPRI | + WQ_SYSFS | + WQ_PERCPU | + WQ_POWER_EFFICIENT, + 0); + if (smbdirect_globals.workqueues.refill == NULL) + goto alloc_refill_wq_failed; + + smbdirect_globals.workqueues.immediate = alloc_workqueue("smbdirect-immediate", + WQ_HIGHPRI | + WQ_SYSFS | + WQ_PERCPU | + WQ_POWER_EFFICIENT, + 0); + if (smbdirect_globals.workqueues.immediate == NULL) + goto alloc_immediate_wq_failed; + + smbdirect_globals.workqueues.cleanup = alloc_workqueue("smbdirect-cleanup", + WQ_MEM_RECLAIM | + WQ_HIGHPRI | + WQ_SYSFS | + WQ_PERCPU | + WQ_POWER_EFFICIENT, + 0); + if (smbdirect_globals.workqueues.cleanup == NULL) + goto alloc_cleanup_wq_failed; + ret = smbdirect_devices_init(); if (ret) goto devices_init_failed; @@ -26,6 +78,18 @@ static __init int smbdirect_module_init(void) return 0; devices_init_failed: + destroy_workqueue(smbdirect_globals.workqueues.cleanup); +alloc_cleanup_wq_failed: + destroy_workqueue(smbdirect_globals.workqueues.immediate); +alloc_immediate_wq_failed: + destroy_workqueue(smbdirect_globals.workqueues.refill); +alloc_refill_wq_failed: + destroy_workqueue(smbdirect_globals.workqueues.idle); +alloc_idle_wq_failed: + destroy_workqueue(smbdirect_globals.workqueues.connect); +alloc_connect_wq_failed: + destroy_workqueue(smbdirect_globals.workqueues.accept); +alloc_accept_wq_failed: mutex_unlock(&smbdirect_globals.mutex); pr_crit("failed to loaded: %d (%1pe)\n", ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); @@ -39,6 +103,13 @@ static __exit void smbdirect_module_exit(void) smbdirect_devices_exit(); + destroy_workqueue(smbdirect_globals.workqueues.accept); + destroy_workqueue(smbdirect_globals.workqueues.connect); + destroy_workqueue(smbdirect_globals.workqueues.idle); + destroy_workqueue(smbdirect_globals.workqueues.refill); + destroy_workqueue(smbdirect_globals.workqueues.immediate); + destroy_workqueue(smbdirect_globals.workqueues.cleanup); + mutex_unlock(&smbdirect_globals.mutex); pr_notice("subsystem unloaded\n"); } diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 5a3b75c40728..f32fee3a6bd2 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -111,8 +111,6 @@ struct smbdirect_socket { /* * This points to the workqueues to * be used for this socket. - * It can be per socket (on the client) - * or point to a global workqueue (on the server) */ struct { struct workqueue_struct *accept; @@ -572,6 +570,13 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) init_waitqueue_head(&sc->status_wait); + sc->workqueues.accept = smbdirect_globals.workqueues.accept; + sc->workqueues.connect = smbdirect_globals.workqueues.connect; + sc->workqueues.idle = smbdirect_globals.workqueues.idle; + sc->workqueues.refill = smbdirect_globals.workqueues.refill; + sc->workqueues.immediate = smbdirect_globals.workqueues.immediate; + sc->workqueues.cleanup = smbdirect_globals.workqueues.cleanup; + INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work); disable_work_sync(&sc->disconnect_work); From 73dc52d2942ccf4d4f680176c1e7f36aadba4ce8 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 4 Nov 2025 17:31:25 +0100 Subject: [PATCH 139/145] smb: client: no longer use smbdirect_socket_set_custom_workqueue() smbdirect.ko has global workqueues now, so we should use these default once. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 12 ------------ fs/smb/client/smbdirect.h | 1 - 2 files changed, 13 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 73fc86312bbf..9e67adcdc7d3 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -211,7 +211,6 @@ void smbd_destroy(struct TCP_Server_Info *server) smbdirect_socket_release(info->socket); - destroy_workqueue(info->workqueue); kfree(info); server->smbd_conn = NULL; } @@ -261,7 +260,6 @@ static struct smbd_connection *_smbd_get_connection( struct smbdirect_socket_parameters *sp; __be16 *sport; u64 port_flags = 0; - char wq_name[80]; int ret; switch (port) { @@ -306,10 +304,6 @@ static struct smbd_connection *_smbd_get_connection( info = kzalloc_obj(*info); if (!info) return NULL; - scnprintf(wq_name, ARRAY_SIZE(wq_name), "smbd_%p", info); - info->workqueue = create_workqueue(wq_name); - if (!info->workqueue) - goto create_wq_failed; ret = smbdirect_socket_create_kern(net, &sc); if (ret) goto socket_init_failed; @@ -320,9 +314,6 @@ static struct smbd_connection *_smbd_get_connection( ret = smbdirect_socket_set_kernel_settings(sc, IB_POLL_SOFTIRQ, GFP_KERNEL); if (ret) goto set_settings_failed; - ret = smbdirect_socket_set_custom_workqueue(sc, info->workqueue); - if (ret) - goto set_workqueue_failed; if (dstaddr->sa_family == AF_INET6) sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port; @@ -342,13 +333,10 @@ static struct smbd_connection *_smbd_get_connection( return info; connect_failed: -set_workqueue_failed: set_settings_failed: set_params_failed: smbdirect_socket_release(sc); socket_init_failed: - destroy_workqueue(info->workqueue); -create_wq_failed: kfree(info); return NULL; } diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index bd03ae72e9c8..0017d5b2de44 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -25,7 +25,6 @@ extern int smbd_receive_credit_max; struct smbd_connection { struct smbdirect_socket *socket; - struct workqueue_struct *workqueue; }; /* Create a SMBDirect session */ From 649c47559a37fdefefc259ab580b537abbc79fbd Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 4 Nov 2025 17:35:46 +0100 Subject: [PATCH 140/145] smb: server: no longer use smbdirect_socket_set_custom_workqueue() smbdirect.ko has global workqueues now, so we should use these default once. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 1 - fs/smb/server/transport_rdma.c | 30 ------------------------------ fs/smb/server/transport_rdma.h | 2 -- 3 files changed, 33 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 708fac40b8ea..a26899d12df1 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -562,6 +562,5 @@ void ksmbd_conn_transport_destroy(void) ksmbd_tcp_destroy(); ksmbd_rdma_stop_listening(); stop_sessions(); - ksmbd_rdma_destroy(); mutex_unlock(&init_lock); } diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 951ac9fec687..706a2c897948 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -85,8 +85,6 @@ static struct smb_direct_listener { struct smbdirect_socket *socket; } smb_direct_ib_listener, smb_direct_iw_listener; -static struct workqueue_struct *smb_direct_wq; - struct smb_direct_transport { struct ksmbd_transport transport; @@ -451,12 +449,6 @@ static int smb_direct_listen(struct smb_direct_listener *listener, ret, ERR_PTR(ret)); goto err; } - ret = smbdirect_socket_set_custom_workqueue(sc, smb_direct_wq); - if (ret) { - pr_err("Failed smbdirect_socket_set_custom_workqueue(): %d %1pe\n", - ret, ERR_PTR(ret)); - goto err; - } ret = smbdirect_socket_bind(sc, (struct sockaddr *)&sin); if (ret) { @@ -500,19 +492,6 @@ int ksmbd_rdma_init(void) .socket = NULL, }; - /* When a client is running out of send credits, the credits are - * granted by the server's sending a packet using this queue. - * This avoids the situation that a clients cannot send packets - * for lack of credits - */ - smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq", - WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_PERCPU, - 0); - if (!smb_direct_wq) { - ret = -ENOMEM; - goto err; - } - ret = smb_direct_listen(&smb_direct_ib_listener, SMB_DIRECT_PORT_INFINIBAND); if (ret) { @@ -536,7 +515,6 @@ int ksmbd_rdma_init(void) return 0; err: ksmbd_rdma_stop_listening(); - ksmbd_rdma_destroy(); return ret; } @@ -546,14 +524,6 @@ void ksmbd_rdma_stop_listening(void) smb_direct_listener_destroy(&smb_direct_iw_listener); } -void ksmbd_rdma_destroy(void) -{ - if (smb_direct_wq) { - destroy_workqueue(smb_direct_wq); - smb_direct_wq = NULL; - } -} - bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { u8 node_type = smbdirect_netdev_rdma_capable_node_type(netdev); diff --git a/fs/smb/server/transport_rdma.h b/fs/smb/server/transport_rdma.h index e16f625caed2..05352dc47f95 100644 --- a/fs/smb/server/transport_rdma.h +++ b/fs/smb/server/transport_rdma.h @@ -14,14 +14,12 @@ #ifdef CONFIG_SMB_SERVER_SMBDIRECT int ksmbd_rdma_init(void); void ksmbd_rdma_stop_listening(void); -void ksmbd_rdma_destroy(void); bool ksmbd_rdma_capable_netdev(struct net_device *netdev); void init_smbd_max_io_size(unsigned int sz); unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt); #else static inline int ksmbd_rdma_init(void) { return 0; } static inline void ksmbd_rdma_stop_listening(void) { } -static inline void ksmbd_rdma_destroy(void) { } static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; } static inline void init_smbd_max_io_size(unsigned int sz) { } static inline unsigned int get_smbd_max_read_write_size(struct ksmbd_transport *kt) { return 0; } From aa43bb2c0fc0d928bb120f853349c8affcfeb8b4 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 30 Oct 2025 12:48:30 +0100 Subject: [PATCH 141/145] smb: smbdirect: remove unused SMBDIRECT_USE_INLINE_C_FILES logic We always build as standalone module (or as part of the core kernel). This also removes unused elements from struct smbdirect_socket and unused exports. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_accept.c | 3 - .../common/smbdirect/smbdirect_all_c_files.c | 25 ------- fs/smb/common/smbdirect/smbdirect_connect.c | 2 - .../common/smbdirect/smbdirect_connection.c | 31 -------- fs/smb/common/smbdirect/smbdirect_debug.c | 1 - fs/smb/common/smbdirect/smbdirect_internal.h | 38 +--------- fs/smb/common/smbdirect/smbdirect_listen.c | 1 - fs/smb/common/smbdirect/smbdirect_mr.c | 5 -- fs/smb/common/smbdirect/smbdirect_public.h | 43 ----------- fs/smb/common/smbdirect/smbdirect_rw.c | 1 - fs/smb/common/smbdirect/smbdirect_socket.c | 71 ------------------- fs/smb/common/smbdirect/smbdirect_socket.h | 15 ---- 12 files changed, 3 insertions(+), 233 deletions(-) delete mode 100644 fs/smb/common/smbdirect/smbdirect_all_c_files.c diff --git a/fs/smb/common/smbdirect/smbdirect_accept.c b/fs/smb/common/smbdirect/smbdirect_accept.c index 862df982c2ce..d6d5e6a3f5de 100644 --- a/fs/smb/common/smbdirect/smbdirect_accept.c +++ b/fs/smb/common/smbdirect/smbdirect_accept.c @@ -15,7 +15,6 @@ static int smbdirect_accept_init_params(struct smbdirect_socket *sc); static void smbdirect_accept_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void smbdirect_accept_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc); -__SMBDIRECT_PUBLIC__ int smbdirect_accept_connect_request(struct smbdirect_socket *sc, const struct rdma_conn_param *param) { @@ -161,7 +160,6 @@ int smbdirect_accept_connect_request(struct smbdirect_socket *sc, init_params_failed: return ret; } -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_accept_connect_request); static int smbdirect_accept_init_params(struct smbdirect_socket *sc) { @@ -793,7 +791,6 @@ static long smbdirect_socket_wait_for_accept(struct smbdirect_socket *lsc, long return 0; } -__SMBDIRECT_PUBLIC__ struct smbdirect_socket *smbdirect_socket_accept(struct smbdirect_socket *lsc, long timeo, struct proto_accept_arg *arg) diff --git a/fs/smb/common/smbdirect/smbdirect_all_c_files.c b/fs/smb/common/smbdirect/smbdirect_all_c_files.c deleted file mode 100644 index 03e5852cdf86..000000000000 --- a/fs/smb/common/smbdirect/smbdirect_all_c_files.c +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (c) 2025, Stefan Metzmacher - */ - -/* - * This is a temporary solution in order - * to include the common smbdirect functions - * into .c files in order to make a transformation - * in tiny bisectable steps possible. - * - * It will be replaced by a smbdirect.ko with - * exported public functions at the end. - */ -#ifndef SMBDIRECT_USE_INLINE_C_FILES -#error SMBDIRECT_USE_INLINE_C_FILES define needed -#endif -#include "smbdirect_socket.c" -#include "smbdirect_connection.c" -#include "smbdirect_mr.c" -#include "smbdirect_rw.c" -#include "smbdirect_debug.c" -#include "smbdirect_connect.c" -#include "smbdirect_accept.c" -#include "smbdirect_listen.c" diff --git a/fs/smb/common/smbdirect/smbdirect_connect.c b/fs/smb/common/smbdirect/smbdirect_connect.c index 282dc46c943c..2b54f79dba43 100644 --- a/fs/smb/common/smbdirect/smbdirect_connect.c +++ b/fs/smb/common/smbdirect/smbdirect_connect.c @@ -16,7 +16,6 @@ static int smbdirect_connect_negotiate_start(struct smbdirect_socket *sc); static void smbdirect_connect_negotiate_send_done(struct ib_cq *cq, struct ib_wc *wc); static void smbdirect_connect_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc); -__SMBDIRECT_PUBLIC__ int smbdirect_connect(struct smbdirect_socket *sc, const struct sockaddr *dst) { const struct sockaddr *src = NULL; @@ -895,7 +894,6 @@ static void smbdirect_connect_negotiate_recv_work(struct work_struct *work) smbdirect_connection_negotiation_done(sc); } -__SMBDIRECT_PUBLIC__ int smbdirect_connect_sync(struct smbdirect_socket *sc, const struct sockaddr *dst) { diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c index fb947a00e4b6..7e4921b9538c 100644 --- a/fs/smb/common/smbdirect/smbdirect_connection.c +++ b/fs/smb/common/smbdirect/smbdirect_connection.c @@ -22,7 +22,6 @@ static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len, static void smbdirect_connection_recv_io_refill_work(struct work_struct *work); static void smbdirect_connection_send_immediate_work(struct work_struct *work); -__maybe_unused /* this is temporary while this file is included in others */ static void smbdirect_connection_qp_event_handler(struct ib_event *event, void *context) { struct smbdirect_socket *sc = context; @@ -143,7 +142,6 @@ static int smbdirect_connection_rdma_event_handler(struct rdma_cm_id *id, return 0; } -__SMBDIRECT_PRIVATE__ void smbdirect_connection_rdma_established(struct smbdirect_socket *sc) { smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, @@ -157,7 +155,6 @@ void smbdirect_connection_rdma_established(struct smbdirect_socket *sc) sc->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED; } -__SMBDIRECT_PRIVATE__ void smbdirect_connection_negotiation_done(struct smbdirect_socket *sc) { if (unlikely(sc->first_error)) @@ -242,7 +239,6 @@ static u32 smbdirect_rdma_rw_send_wrs(struct ib_device *dev, return factor * attr->cap.max_rdma_ctxs; } -__SMBDIRECT_PRIVATE__ int smbdirect_connection_create_qp(struct smbdirect_socket *sc) { const struct smbdirect_socket_parameters *sp = &sc->parameters; @@ -400,7 +396,6 @@ int smbdirect_connection_create_qp(struct smbdirect_socket *sc) return ret; } -__SMBDIRECT_PRIVATE__ void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc) { if (sc->ib.qp) { @@ -422,7 +417,6 @@ void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc) } } -__SMBDIRECT_PRIVATE__ int smbdirect_connection_create_mem_pools(struct smbdirect_socket *sc) { const struct smbdirect_socket_parameters *sp = &sc->parameters; @@ -500,7 +494,6 @@ int smbdirect_connection_create_mem_pools(struct smbdirect_socket *sc) return -ENOMEM; } -__SMBDIRECT_PRIVATE__ void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc) { struct smbdirect_recv_io *recv_io, *next_io; @@ -528,7 +521,6 @@ void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc) sc->send_io.mem.cache = NULL; } -__SMBDIRECT_PRIVATE__ struct smbdirect_send_io *smbdirect_connection_alloc_send_io(struct smbdirect_socket *sc) { struct smbdirect_send_io *msg; @@ -543,7 +535,6 @@ struct smbdirect_send_io *smbdirect_connection_alloc_send_io(struct smbdirect_so return msg; } -__SMBDIRECT_PRIVATE__ void smbdirect_connection_free_send_io(struct smbdirect_send_io *msg) { struct smbdirect_socket *sc = msg->socket; @@ -575,7 +566,6 @@ void smbdirect_connection_free_send_io(struct smbdirect_send_io *msg) mempool_free(msg, sc->send_io.mem.pool); } -__SMBDIRECT_PRIVATE__ struct smbdirect_recv_io *smbdirect_connection_get_recv_io(struct smbdirect_socket *sc) { struct smbdirect_recv_io *msg = NULL; @@ -595,7 +585,6 @@ struct smbdirect_recv_io *smbdirect_connection_get_recv_io(struct smbdirect_sock return msg; } -__SMBDIRECT_PRIVATE__ void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg) { struct smbdirect_socket *sc = msg->socket; @@ -617,7 +606,6 @@ void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg) queue_work(sc->workqueues.refill, &sc->recv_io.posted.refill_work); } -__SMBDIRECT_PRIVATE__ void smbdirect_connection_reassembly_append_recv_io(struct smbdirect_socket *sc, struct smbdirect_recv_io *msg, u32 data_length) @@ -639,7 +627,6 @@ void smbdirect_connection_reassembly_append_recv_io(struct smbdirect_socket *sc, sc->statistics.enqueue_reassembly_queue++; } -__SMBDIRECT_PRIVATE__ struct smbdirect_recv_io * smbdirect_connection_reassembly_first_recv_io(struct smbdirect_socket *sc) { @@ -652,7 +639,6 @@ smbdirect_connection_reassembly_first_recv_io(struct smbdirect_socket *sc) return msg; } -__SMBDIRECT_PRIVATE__ void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socket *sc, u8 peer_initiator_depth, u8 peer_responder_resources, @@ -714,7 +700,6 @@ void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socket *sc, peer_responder_resources); } -__SMBDIRECT_PUBLIC__ bool smbdirect_connection_is_connected(struct smbdirect_socket *sc) { if (unlikely(!sc || sc->first_error || sc->status != SMBDIRECT_SOCKET_CONNECTED)) @@ -723,7 +708,6 @@ bool smbdirect_connection_is_connected(struct smbdirect_socket *sc) } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_is_connected); -__SMBDIRECT_PUBLIC__ int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) { const struct smbdirect_socket_parameters *sp = &sc->parameters; @@ -797,7 +781,6 @@ int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_wait_for_connected); -__SMBDIRECT_PRIVATE__ void smbdirect_connection_idle_timer_work(struct work_struct *work) { struct smbdirect_socket *sc = @@ -829,7 +812,6 @@ void smbdirect_connection_idle_timer_work(struct work_struct *work) queue_work(sc->workqueues.immediate, &sc->idle.immediate_work); } -__SMBDIRECT_PRIVATE__ u16 smbdirect_connection_grant_recv_credits(struct smbdirect_socket *sc) { int missing; @@ -867,7 +849,6 @@ u16 smbdirect_connection_grant_recv_credits(struct smbdirect_socket *sc) return new_credits; } -__maybe_unused /* this is temporary while this file is included in others */ static bool smbdirect_connection_request_keep_alive(struct smbdirect_socket *sc) { const struct smbdirect_socket_parameters *sp = &sc->parameters; @@ -886,7 +867,6 @@ static bool smbdirect_connection_request_keep_alive(struct smbdirect_socket *sc) return false; } -__SMBDIRECT_PRIVATE__ int smbdirect_connection_post_send_wr(struct smbdirect_socket *sc, struct ib_send_wr *wr) { @@ -919,7 +899,6 @@ static void smbdirect_connection_send_batch_init(struct smbdirect_send_batch *ba batch->credit = 0; } -__SMBDIRECT_PUBLIC__ int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, bool is_last) @@ -981,7 +960,6 @@ int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_batch_flush); -__SMBDIRECT_PUBLIC__ struct smbdirect_send_batch * smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, bool need_invalidate_rkey, @@ -1098,7 +1076,6 @@ static int smbdirect_connection_post_send_io(struct smbdirect_socket *sc, return smbdirect_connection_post_send_wr(sc, &msg->wr); } -__SMBDIRECT_PUBLIC__ int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, struct iov_iter *iter, @@ -1288,7 +1265,6 @@ int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_single_iter); -__SMBDIRECT_PUBLIC__ int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc) { /* @@ -1314,7 +1290,6 @@ int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc) } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_wait_zero_pending); -__SMBDIRECT_PUBLIC__ int smbdirect_connection_send_iter(struct smbdirect_socket *sc, struct iov_iter *iter, unsigned int flags, @@ -1465,8 +1440,6 @@ static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc if (atomic_dec_and_test(&sc->send_io.pending.count)) wake_up(&sc->send_io.pending.zero_wait_queue); - - wake_up(&sc->send_io.pending.dec_wait_queue); } static void smbdirect_connection_send_immediate_work(struct work_struct *work) @@ -1490,7 +1463,6 @@ static void smbdirect_connection_send_immediate_work(struct work_struct *work) } } -__SMBDIRECT_PRIVATE__ int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg) { struct smbdirect_socket *sc = msg->socket; @@ -1532,7 +1504,6 @@ int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg) return ret; } -__SMBDIRECT_PRIVATE__ void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc) { struct smbdirect_recv_io *recv_io = @@ -1702,7 +1673,6 @@ void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc) smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); } -__SMBDIRECT_PRIVATE__ int smbdirect_connection_recv_io_refill(struct smbdirect_socket *sc) { int missing; @@ -1818,7 +1788,6 @@ static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) } } -__SMBDIRECT_PUBLIC__ int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, struct msghdr *msg, unsigned int flags) diff --git a/fs/smb/common/smbdirect/smbdirect_debug.c b/fs/smb/common/smbdirect/smbdirect_debug.c index eac924164fd4..d8664fd7f71a 100644 --- a/fs/smb/common/smbdirect/smbdirect_debug.c +++ b/fs/smb/common/smbdirect/smbdirect_debug.c @@ -7,7 +7,6 @@ #include "smbdirect_internal.h" #include -__SMBDIRECT_PUBLIC__ void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, unsigned int rdma_readwrite_threshold, struct seq_file *m) diff --git a/fs/smb/common/smbdirect/smbdirect_internal.h b/fs/smb/common/smbdirect/smbdirect_internal.h index b5fe07b7c54d..30a1b8643657 100644 --- a/fs/smb/common/smbdirect/smbdirect_internal.h +++ b/fs/smb/common/smbdirect/smbdirect_internal.h @@ -6,9 +6,7 @@ #ifndef __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ #define __FS_SMB_COMMON_SMBDIRECT_INTERNAL_H__ -#ifndef SMBDIRECT_USE_INLINE_C_FILES #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#endif /* ! SMBDIRECT_USE_INLINE_C_FILES */ #include "smbdirect.h" #include "smbdirect_pdu.h" @@ -48,20 +46,10 @@ struct smbdirect_device { char ib_name[IB_DEVICE_NAME_MAX]; }; -#ifdef SMBDIRECT_USE_INLINE_C_FILES -/* this is temporary while this file is included in others */ -#define __SMBDIRECT_PRIVATE__ __maybe_unused static -#else -#define __SMBDIRECT_PRIVATE__ -#endif - -__SMBDIRECT_PRIVATE__ int smbdirect_socket_init_new(struct net *net, struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, const char *macro_name, unsigned int lvl, @@ -84,10 +72,8 @@ void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, __func__, __LINE__, __error, &__force_status); \ } while (0) -__SMBDIRECT_PRIVATE__ void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, enum smbdirect_socket_status expected_status, int unexpected_errno, @@ -95,76 +81,58 @@ int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, atomic_t *total_credits, int needed); -__SMBDIRECT_PRIVATE__ void smbdirect_connection_rdma_established(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ void smbdirect_connection_negotiation_done(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ int smbdirect_connection_create_qp(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ int smbdirect_connection_create_mem_pools(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ struct smbdirect_send_io *smbdirect_connection_alloc_send_io(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ void smbdirect_connection_free_send_io(struct smbdirect_send_io *msg); -__SMBDIRECT_PRIVATE__ struct smbdirect_recv_io *smbdirect_connection_get_recv_io(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg); -__SMBDIRECT_PRIVATE__ void smbdirect_connection_reassembly_append_recv_io(struct smbdirect_socket *sc, struct smbdirect_recv_io *msg, u32 data_length); -__SMBDIRECT_PRIVATE__ struct smbdirect_recv_io * smbdirect_connection_reassembly_first_recv_io(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socket *sc, u8 peer_initiator_depth, u8 peer_responder_resources, const struct rdma_conn_param *param); -__SMBDIRECT_PRIVATE__ void smbdirect_connection_idle_timer_work(struct work_struct *work); -__SMBDIRECT_PRIVATE__ u16 smbdirect_connection_grant_recv_credits(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ int smbdirect_connection_post_send_wr(struct smbdirect_socket *sc, struct ib_send_wr *wr); -__SMBDIRECT_PRIVATE__ int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg); -__SMBDIRECT_PRIVATE__ void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc); -__SMBDIRECT_PRIVATE__ int smbdirect_connection_recv_io_refill(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc); -__SMBDIRECT_PRIVATE__ void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc); +int smbdirect_accept_connect_request(struct smbdirect_socket *sc, + const struct rdma_conn_param *param); + void smbdirect_accept_negotiate_finish(struct smbdirect_socket *sc, u32 ntstatus); __init int smbdirect_devices_init(void); diff --git a/fs/smb/common/smbdirect/smbdirect_listen.c b/fs/smb/common/smbdirect/smbdirect_listen.c index ad1ecf64762b..05c7902e7020 100644 --- a/fs/smb/common/smbdirect/smbdirect_listen.c +++ b/fs/smb/common/smbdirect/smbdirect_listen.c @@ -10,7 +10,6 @@ static int smbdirect_listen_rdma_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event); -__SMBDIRECT_PUBLIC__ int smbdirect_socket_listen(struct smbdirect_socket *sc, int backlog) { int ret; diff --git a/fs/smb/common/smbdirect/smbdirect_mr.c b/fs/smb/common/smbdirect/smbdirect_mr.c index d2e70941f772..4873a2c1066f 100644 --- a/fs/smb/common/smbdirect/smbdirect_mr.c +++ b/fs/smb/common/smbdirect/smbdirect_mr.c @@ -13,7 +13,6 @@ * Recovery is done in smbd_mr_recovery_work. The content of list entry changes * as MRs are used and recovered for I/O, but the list links will not change */ -__SMBDIRECT_PRIVATE__ int smbdirect_connection_create_mr_list(struct smbdirect_socket *sc) { const struct smbdirect_socket_parameters *sp = &sc->parameters; @@ -116,7 +115,6 @@ static void smbdirect_mr_io_free_locked(struct kref *kref) kfree(mr); } -__SMBDIRECT_PRIVATE__ void smbdirect_connection_destroy_mr_list(struct smbdirect_socket *sc) { struct smbdirect_mr_io *mr, *tmp; @@ -263,7 +261,6 @@ static int smbdirect_iter_to_sgt(struct iov_iter *iter, * need_invalidate: true if this MR needs to be locally invalidated after I/O * return value: the MR registered, NULL if failed. */ -__SMBDIRECT_PUBLIC__ struct smbdirect_mr_io * smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, struct iov_iter *iter, @@ -385,7 +382,6 @@ smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_register_mr_io); -__SMBDIRECT_PUBLIC__ void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, struct smbdirect_buffer_descriptor_v1 *v1) { @@ -409,7 +405,6 @@ __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_mr_io_fill_buffer_descriptor); * and we have to locally invalidate the buffer to prevent data is being * modified by remote peer after upper layer consumes it */ -__SMBDIRECT_PUBLIC__ void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr) { struct smbdirect_socket *sc = mr->socket; diff --git a/fs/smb/common/smbdirect/smbdirect_public.h b/fs/smb/common/smbdirect/smbdirect_public.h index 231ad7a9c6af..50088155e7c3 100644 --- a/fs/smb/common/smbdirect/smbdirect_public.h +++ b/fs/smb/common/smbdirect/smbdirect_public.h @@ -13,46 +13,28 @@ struct smbdirect_socket; struct smbdirect_send_batch; struct smbdirect_mr_io; -#ifdef SMBDIRECT_USE_INLINE_C_FILES -/* this is temporary while this file is included in others */ -#define __SMBDIRECT_PUBLIC__ __maybe_unused static -#define __SMBDIRECT_EXPORT_SYMBOL__(__sym) -#else -#define __SMBDIRECT_PUBLIC__ #define __SMBDIRECT_EXPORT_SYMBOL__(__sym) EXPORT_SYMBOL_FOR_MODULES(__sym, "cifs,ksmbd") -#endif #include -__SMBDIRECT_PUBLIC__ u8 smbdirect_netdev_rdma_capable_node_type(struct net_device *netdev); -__SMBDIRECT_PUBLIC__ bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs); -__SMBDIRECT_PUBLIC__ int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc); -__SMBDIRECT_PUBLIC__ int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_socket **_sc); -__SMBDIRECT_PUBLIC__ int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, const struct smbdirect_socket_parameters *sp); -__SMBDIRECT_PUBLIC__ const struct smbdirect_socket_parameters * smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc); -__SMBDIRECT_PUBLIC__ int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, enum ib_poll_context poll_ctx, gfp_t gfp_mask); -__SMBDIRECT_PUBLIC__ -int smbdirect_socket_set_custom_workqueue(struct smbdirect_socket *sc, - struct workqueue_struct *workqueue); - #define SMBDIRECT_LOG_ERR 0x0 #define SMBDIRECT_LOG_INFO 0x1 @@ -67,7 +49,6 @@ int smbdirect_socket_set_custom_workqueue(struct smbdirect_socket *sc, #define SMBDIRECT_LOG_RDMA_MR 0x100 #define SMBDIRECT_LOG_RDMA_RW 0x200 #define SMBDIRECT_LOG_NEGOTIATE 0x400 -__SMBDIRECT_PUBLIC__ void smbdirect_socket_set_logging(struct smbdirect_socket *sc, void *private_ptr, bool (*needed)(struct smbdirect_socket *sc, @@ -82,22 +63,16 @@ void smbdirect_socket_set_logging(struct smbdirect_socket *sc, unsigned int cls, struct va_format *vaf)); -__SMBDIRECT_PUBLIC__ bool smbdirect_connection_is_connected(struct smbdirect_socket *sc); -__SMBDIRECT_PUBLIC__ int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc); -__SMBDIRECT_PUBLIC__ int smbdirect_socket_bind(struct smbdirect_socket *sc, struct sockaddr *addr); -__SMBDIRECT_PUBLIC__ void smbdirect_socket_shutdown(struct smbdirect_socket *sc); -__SMBDIRECT_PUBLIC__ void smbdirect_socket_release(struct smbdirect_socket *sc); -__SMBDIRECT_PUBLIC__ int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, bool is_last); @@ -114,76 +89,58 @@ struct smbdirect_send_batch_storage { }; }; -__SMBDIRECT_PUBLIC__ struct smbdirect_send_batch * smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, bool need_invalidate_rkey, unsigned int remote_key); -__SMBDIRECT_PUBLIC__ int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, struct smbdirect_send_batch *batch, struct iov_iter *iter, unsigned int flags, u32 remaining_data_length); -__SMBDIRECT_PUBLIC__ int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc); -__SMBDIRECT_PUBLIC__ int smbdirect_connection_send_iter(struct smbdirect_socket *sc, struct iov_iter *iter, unsigned int flags, bool need_invalidate, unsigned int remote_key); -__SMBDIRECT_PUBLIC__ int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, struct msghdr *msg, unsigned int flags); -__SMBDIRECT_PUBLIC__ int smbdirect_connect(struct smbdirect_socket *sc, const struct sockaddr *dst); -__SMBDIRECT_PUBLIC__ int smbdirect_connect_sync(struct smbdirect_socket *sc, const struct sockaddr *dst); -__SMBDIRECT_PUBLIC__ int smbdirect_socket_listen(struct smbdirect_socket *sc, int backlog); -__SMBDIRECT_PUBLIC__ -int smbdirect_accept_connect_request(struct smbdirect_socket *sc, - const struct rdma_conn_param *param); - -__SMBDIRECT_PUBLIC__ struct smbdirect_socket *smbdirect_socket_accept(struct smbdirect_socket *lsc, long timeo, struct proto_accept_arg *arg); -__SMBDIRECT_PUBLIC__ int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, void *buf, size_t buf_len, struct smbdirect_buffer_descriptor_v1 *desc, size_t desc_len, bool is_read); -__SMBDIRECT_PUBLIC__ struct smbdirect_mr_io * smbdirect_connection_register_mr_io(struct smbdirect_socket *sc, struct iov_iter *iter, bool writing, bool need_invalidate); -__SMBDIRECT_PUBLIC__ void smbdirect_mr_io_fill_buffer_descriptor(struct smbdirect_mr_io *mr, struct smbdirect_buffer_descriptor_v1 *v1); -__SMBDIRECT_PUBLIC__ void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr); -__SMBDIRECT_PUBLIC__ void smbdirect_connection_legacy_debug_proc_show(struct smbdirect_socket *sc, unsigned int rdma_readwrite_threshold, struct seq_file *m); diff --git a/fs/smb/common/smbdirect/smbdirect_rw.c b/fs/smb/common/smbdirect/smbdirect_rw.c index bd90dffbf369..3b2eb8c48efc 100644 --- a/fs/smb/common/smbdirect/smbdirect_rw.c +++ b/fs/smb/common/smbdirect/smbdirect_rw.c @@ -105,7 +105,6 @@ static void smbdirect_connection_rdma_write_done(struct ib_cq *cq, struct ib_wc smbdirect_connection_rdma_rw_done(cq, wc, DMA_TO_DEVICE); } -__SMBDIRECT_PUBLIC__ int smbdirect_connection_rdma_xmit(struct smbdirect_socket *sc, void *buf, size_t buf_len, struct smbdirect_buffer_descriptor_v1 *desc, diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index fedde477994f..1e3b361728cc 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -6,7 +6,6 @@ #include "smbdirect_internal.h" -__SMBDIRECT_PUBLIC__ bool smbdirect_frwr_is_supported(const struct ib_device_attr *attrs) { /* @@ -52,7 +51,6 @@ static int smbdirect_socket_rdma_event_handler(struct rdma_cm_id *id, return -ESTALE; } -__SMBDIRECT_PRIVATE__ int smbdirect_socket_init_new(struct net *net, struct smbdirect_socket *sc) { struct rdma_cm_id *id; @@ -85,7 +83,6 @@ int smbdirect_socket_init_new(struct net *net, struct smbdirect_socket *sc) return 0; } -__SMBDIRECT_PUBLIC__ int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc) { struct smbdirect_socket *sc; @@ -112,7 +109,6 @@ int smbdirect_socket_create_kern(struct net *net, struct smbdirect_socket **_sc) } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_create_kern); -__SMBDIRECT_PRIVATE__ int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_socket *sc) { smbdirect_socket_init(sc); @@ -128,7 +124,6 @@ int smbdirect_socket_init_accepting(struct rdma_cm_id *id, struct smbdirect_sock return 0; } -__SMBDIRECT_PUBLIC__ int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_socket **_sc) { struct smbdirect_socket *sc; @@ -155,7 +150,6 @@ int smbdirect_socket_create_accepting(struct rdma_cm_id *id, struct smbdirect_so } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_create_accepting); -__SMBDIRECT_PUBLIC__ int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, const struct smbdirect_socket_parameters *sp) { @@ -192,7 +186,6 @@ int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_initial_parameters); -__SMBDIRECT_PUBLIC__ const struct smbdirect_socket_parameters * smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc) { @@ -200,7 +193,6 @@ smbdirect_socket_get_current_parameters(struct smbdirect_socket *sc) } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_get_current_parameters); -__SMBDIRECT_PUBLIC__ int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, enum ib_poll_context poll_ctx, gfp_t gfp_mask) @@ -225,58 +217,6 @@ int smbdirect_socket_set_kernel_settings(struct smbdirect_socket *sc, } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_kernel_settings); -__SMBDIRECT_PUBLIC__ -int smbdirect_socket_set_custom_workqueue(struct smbdirect_socket *sc, - struct workqueue_struct *workqueue) -{ - /* - * This is only allowed before connect or accept - */ - WARN_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED, - "status=%s first_error=%1pe", - smbdirect_socket_status_string(sc->status), - SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); - if (sc->status != SMBDIRECT_SOCKET_CREATED) - return -EINVAL; - - /* - * Remember the callers workqueue - */ - sc->workqueues.accept = workqueue; - sc->workqueues.connect = workqueue; - sc->workqueues.idle = workqueue; - sc->workqueues.refill = workqueue; - sc->workqueues.immediate = workqueue; - sc->workqueues.cleanup = workqueue; - - return 0; -} -__SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_set_custom_workqueue); - -__maybe_unused /* this is temporary while this file is included in others */ -static void smbdirect_socket_prepare_create(struct smbdirect_socket *sc, - const struct smbdirect_socket_parameters *sp, - struct workqueue_struct *workqueue) -{ - smbdirect_socket_init(sc); - - /* - * Make a copy of the callers parameters - * from here we only work on the copy - */ - smbdirect_socket_set_initial_parameters(sc, sp); - - /* - * Remember the callers workqueue - */ - smbdirect_socket_set_custom_workqueue(sc, workqueue); - - INIT_WORK(&sc->disconnect_work, smbdirect_socket_cleanup_work); - - INIT_DELAYED_WORK(&sc->idle.timer_work, smbdirect_connection_idle_timer_work); -} - -__SMBDIRECT_PUBLIC__ void smbdirect_socket_set_logging(struct smbdirect_socket *sc, void *private_ptr, bool (*needed)(struct smbdirect_socket *sc, @@ -308,15 +248,12 @@ static void smbdirect_socket_wake_up_all(struct smbdirect_socket *sc) wake_up_all(&sc->send_io.bcredits.wait_queue); wake_up_all(&sc->send_io.lcredits.wait_queue); wake_up_all(&sc->send_io.credits.wait_queue); - wake_up_all(&sc->send_io.pending.dec_wait_queue); wake_up_all(&sc->send_io.pending.zero_wait_queue); wake_up_all(&sc->recv_io.reassembly.wait_queue); wake_up_all(&sc->rw_io.credits.wait_queue); wake_up_all(&sc->mr_io.ready.wait_queue); - wake_up_all(&sc->mr_io.cleanup.wait_queue); } -__SMBDIRECT_PRIVATE__ void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, const char *macro_name, unsigned int lvl, @@ -354,7 +291,6 @@ void __smbdirect_socket_schedule_cleanup(struct smbdirect_socket *sc, */ disable_work(&sc->connect.work); disable_work(&sc->recv_io.posted.refill_work); - disable_work(&sc->mr_io.recovery_work); disable_work(&sc->idle.immediate_work); sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; disable_delayed_work(&sc->idle.timer_work); @@ -455,7 +391,6 @@ static void smbdirect_socket_cleanup_work(struct work_struct *work) disable_work(&sc->disconnect_work); disable_work(&sc->connect.work); disable_work(&sc->recv_io.posted.refill_work); - disable_work(&sc->mr_io.recovery_work); disable_work(&sc->idle.immediate_work); sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; disable_delayed_work(&sc->idle.timer_work); @@ -573,7 +508,6 @@ static void smbdirect_socket_destroy(struct smbdirect_socket *sc) disable_work_sync(&sc->disconnect_work); disable_work_sync(&sc->connect.work); disable_work_sync(&sc->recv_io.posted.refill_work); - disable_work_sync(&sc->mr_io.recovery_work); disable_work_sync(&sc->idle.immediate_work); disable_delayed_work_sync(&sc->idle.timer_work); @@ -649,7 +583,6 @@ static void smbdirect_socket_destroy(struct smbdirect_socket *sc) "rdma session destroyed\n"); } -__SMBDIRECT_PRIVATE__ void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc) { smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, @@ -698,7 +631,6 @@ void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc) SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); } -__SMBDIRECT_PUBLIC__ int smbdirect_socket_bind(struct smbdirect_socket *sc, struct sockaddr *addr) { int ret; @@ -714,7 +646,6 @@ int smbdirect_socket_bind(struct smbdirect_socket *sc, struct sockaddr *addr) } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_bind); -__SMBDIRECT_PUBLIC__ void smbdirect_socket_shutdown(struct smbdirect_socket *sc) { smbdirect_socket_schedule_cleanup_lvl(sc, SMBDIRECT_LOG_INFO, -ESHUTDOWN); @@ -746,7 +677,6 @@ static void smbdirect_socket_release_destroy(struct kref *kref) kfree(sc); } -__SMBDIRECT_PUBLIC__ void smbdirect_socket_release(struct smbdirect_socket *sc) { /* @@ -765,7 +695,6 @@ void smbdirect_socket_release(struct smbdirect_socket *sc) } __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_socket_release); -__SMBDIRECT_PRIVATE__ int smbdirect_socket_wait_for_credits(struct smbdirect_socket *sc, enum smbdirect_socket_status expected_status, int unexpected_errno, diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index f32fee3a6bd2..c09eddd8ad16 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -276,10 +276,6 @@ struct smbdirect_socket { */ struct { atomic_t count; - /* - * woken when count is decremented - */ - wait_queue_head_t dec_wait_queue; /* * woken when count reached zero */ @@ -393,13 +389,6 @@ struct smbdirect_socket { struct { atomic_t count; } used; - - struct work_struct recovery_work; - - /* Used by transport to wait until all MRs are returned */ - struct { - wait_queue_head_t wait_queue; - } cleanup; } mr_io; /* @@ -616,7 +605,6 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) init_waitqueue_head(&sc->send_io.credits.wait_queue); atomic_set(&sc->send_io.pending.count, 0); - init_waitqueue_head(&sc->send_io.pending.dec_wait_queue); init_waitqueue_head(&sc->send_io.pending.zero_wait_queue); sc->recv_io.mem.gfp_mask = GFP_KERNEL; @@ -644,9 +632,6 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) atomic_set(&sc->mr_io.ready.count, 0); init_waitqueue_head(&sc->mr_io.ready.wait_queue); atomic_set(&sc->mr_io.used.count, 0); - INIT_WORK(&sc->mr_io.recovery_work, __smbdirect_socket_disabled_work); - disable_work_sync(&sc->mr_io.recovery_work); - init_waitqueue_head(&sc->mr_io.cleanup.wait_queue); sc->logging.private_ptr = NULL; sc->logging.needed = __smbdirect_log_needed; From 735610d0cefa9e44b28498b53706ed2ebac3be27 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 7 Apr 2026 16:46:27 +0200 Subject: [PATCH 142/145] smb: smbdirect: change smbdirect_socket_parameters.{initiator_depth,responder_resources} to __u16 We still limit this to U8_MAX as the rdma api only uses __u8 and that's also the limit for Infiniband and RoCE*, while iWarp would be able to support larger values at the protocol level. As struct smbdirect_socket_parameters will be part of the uapi for IPPROTO_SMBDIRECT in future, change it now even if userspace sockets won't be supported yet. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: David Howells Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Acked-by: Henrique Carvalho Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect.h | 4 ++-- fs/smb/common/smbdirect/smbdirect_socket.c | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect.h b/fs/smb/common/smbdirect/smbdirect.h index e70a21eb57f0..8fbffe3eb845 100644 --- a/fs/smb/common/smbdirect/smbdirect.h +++ b/fs/smb/common/smbdirect/smbdirect.h @@ -32,8 +32,8 @@ struct smbdirect_socket_parameters { __u32 resolve_route_timeout_msec; __u32 rdma_connect_timeout_msec; __u32 negotiate_timeout_msec; - __u8 initiator_depth; - __u8 responder_resources; + __u16 initiator_depth; /* limited to U8_MAX */ + __u16 responder_resources; /* limited to U8_MAX */ __u16 recv_credit_max; __u16 send_credit_target; __u32 max_send_size; diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index 1e3b361728cc..4003753bea26 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -166,6 +166,11 @@ int smbdirect_socket_set_initial_parameters(struct smbdirect_socket *sc, if (sp->flags & ~SMBDIRECT_FLAG_PORT_RANGE_MASK) return -EINVAL; + if (sp->initiator_depth > U8_MAX) + return -EINVAL; + if (sp->responder_resources > U8_MAX) + return -EINVAL; + if (sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IB && sp->flags & SMBDIRECT_FLAG_PORT_RANGE_ONLY_IW) return -EINVAL; From 3892007f2bbf8ae2df5374de708282d6895402e9 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 7 Apr 2026 16:46:28 +0200 Subject: [PATCH 143/145] smb: smbdirect: fix copyright header of smbdirect.h Everything in smbdirect.h was taken from my out of tree prototype. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: David Howells Cc: Henrique Carvalho Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect.h b/fs/smb/common/smbdirect/smbdirect.h index 8fbffe3eb845..bbab5f7f7cc9 100644 --- a/fs/smb/common/smbdirect/smbdirect.h +++ b/fs/smb/common/smbdirect/smbdirect.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * Copyright (C) 2017, Microsoft Corporation. - * Copyright (C) 2018, LG Electronics. + * Copyright (C) 2025 Stefan Metzmacher */ #ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ From 25c2e34931c5f2a02baefd111a4eb7fa31158059 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 7 Apr 2026 16:46:29 +0200 Subject: [PATCH 144/145] smb: smbdirect: fix the logic in smbdirect_socket_destroy_sync() without an error If smbdirect_socket_destroy_sync() and sc->first_error was not set we should set -ESHUTDOWN, that's a better condition doing it only implicitly with the sc->status < SMBDIRECT_SOCKET_DISCONNECTING check. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: David Howells Cc: Henrique Carvalho Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.c | 28 ++++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.c b/fs/smb/common/smbdirect/smbdirect_socket.c index 4003753bea26..9153e1dbf53d 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.c +++ b/fs/smb/common/smbdirect/smbdirect_socket.c @@ -600,13 +600,20 @@ void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc) */ WARN_ON_ONCE(in_interrupt()); - smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, - "cancelling and disable disconnect_work\n"); - disable_work_sync(&sc->disconnect_work); + /* + * First we try to disable the work + * without disable_work_sync() in a + * non blocking way, if it's already + * running it will be handles by + * disable_work_sync() below. + * + * Here we just want to make sure queue_work() in + * smbdirect_socket_schedule_cleanup_lvl() + * is a no-op. + */ + disable_work(&sc->disconnect_work); - smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, - "destroying rdma session\n"); - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) { + if (!sc->first_error) /* * SMBDIRECT_LOG_INFO is enough here * as this is the typical case where @@ -615,8 +622,15 @@ void smbdirect_socket_destroy_sync(struct smbdirect_socket *sc) smbdirect_socket_schedule_cleanup_lvl(sc, SMBDIRECT_LOG_INFO, -ESHUTDOWN); + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "cancelling and disable disconnect_work\n"); + disable_work_sync(&sc->disconnect_work); + + smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, + "destroying rdma session\n"); + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) smbdirect_socket_cleanup_work(&sc->disconnect_work); - } if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) { smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, "wait for transport being disconnected\n"); From d09a040c186a2083b1cfa9c3c112782ce4b1f6d4 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 7 Apr 2026 16:46:30 +0200 Subject: [PATCH 145/145] smb: smbdirect: let smbdirect_connection_deregister_mr_io unlock while waiting We should not hold a mutex locked during wait_for_completion() holding a reference is enough. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: David Howells Cc: Henrique Carvalho Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_mr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/smb/common/smbdirect/smbdirect_mr.c b/fs/smb/common/smbdirect/smbdirect_mr.c index 4873a2c1066f..fa9be8089925 100644 --- a/fs/smb/common/smbdirect/smbdirect_mr.c +++ b/fs/smb/common/smbdirect/smbdirect_mr.c @@ -410,6 +410,7 @@ void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr) struct smbdirect_socket *sc = mr->socket; int ret = 0; +lock_again: mutex_lock(&mr->mutex); if (mr->state == SMBDIRECT_MR_DISABLED) goto put_kref; @@ -440,8 +441,15 @@ void smbdirect_connection_deregister_mr_io(struct smbdirect_mr_io *mr) smbdirect_socket_schedule_cleanup(sc, ret); goto done; } + + /* + * We still hold the reference to mr + * so we can unlock while waiting. + */ + mutex_unlock(&mr->mutex); wait_for_completion(&mr->invalidate_done); mr->need_invalidate = false; + goto lock_again; } else /* * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED