From ce28b772c3c28fb1c62a81533045ae90ed3b496c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 6 May 2026 06:05:14 -0700 Subject: [PATCH 1/8] nvme: make prp passthrough usage less scary The warning is a bit alarming, and it only prints for the very first non-sgl capable device that receives a passthrough command. Just log an informational message on initial discovery for every device. Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 ++++ drivers/nvme/host/ioctl.c | 13 ++----------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dc388e24caad..1e7e42b43aa3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3749,6 +3749,10 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended) ret = nvme_hwmon_init(ctrl); if (ret == -EINTR) return ret; + + if (!nvme_ctrl_sgl_supported(ctrl)) + dev_info(ctrl->device, + "passthrough uses implicit buffer lengths\n"); } clear_bit(NVME_CTRL_DIRTY_CAPABILITY, &ctrl->flags); diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 9597a87cf05d..e232188ccd02 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -120,21 +120,12 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct nvme_ns *ns = q->queuedata; struct block_device *bdev = ns ? ns->disk->part0 : NULL; bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); - struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; bool has_metadata = meta_buffer && meta_len; struct bio *bio = NULL; int ret; - if (!nvme_ctrl_sgl_supported(ctrl)) - dev_warn_once(ctrl->device, "using unchecked data buffer\n"); - if (has_metadata) { - if (!supports_metadata) - return -EINVAL; - - if (!nvme_ctrl_meta_sgl_supported(ctrl)) - dev_warn_once(ctrl->device, - "using unchecked metadata buffer\n"); - } + if (has_metadata && !supports_metadata) + return -EINVAL; if (iter) ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL); From 2279cd9c61a330e5de4d6eb0bc422820dd6fdf36 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 6 May 2026 06:16:02 -0700 Subject: [PATCH 2/8] nvme: fix bio leak on mapping failure The local bio is always NULL, so we'd leak the bio if the integrity mapping failed. Just get it directly from the request. Fixes: d0d1d522316e91f ("blk-map: provide the bdev to bio if one exists") Reviewed-by: Sagi Grimberg Reviewed-by: John Garry Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index e232188ccd02..08889b20e5d8 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -121,7 +121,6 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct block_device *bdev = ns ? ns->disk->part0 : NULL; bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); bool has_metadata = meta_buffer && meta_len; - struct bio *bio = NULL; int ret; if (has_metadata && !supports_metadata) @@ -145,8 +144,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, return ret; out_unmap: - if (bio) - blk_rq_unmap_user(bio); + if (req->bio) + blk_rq_unmap_user(req->bio); return ret; } From a891962ae5e48fb5476c23631df759482e62ab16 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 30 Apr 2026 15:22:32 +0200 Subject: [PATCH 3/8] nvmet-auth: Do not print DH-HMAC-CHAP secrets From a security standpoint we should not allow to print out the DH-HMAC-CHAP secrets, but at the same time having them is useful for debugging authentication failures. So add a Kconfig option NVME_TARGET_AUTH_DEBUG to only enable debugging if explictly requested at build time. Reviewed-by: Sagi Grimberg Signed-off-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/target/Kconfig | 9 +++++++++ drivers/nvme/target/auth.c | 13 ++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 4904097dfd49..69bde270115e 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -117,6 +117,15 @@ config NVME_TARGET_AUTH If unsure, say N. +config NVME_TARGET_AUTH_DEBUG + bool "NVMe over Fabrics In-band Authentication debug messages" + depends on NVME_TARGET_AUTH + help + This enables additional debug messages including the generated + DH-HMAC-CHAP secrets to help debugging authentication failures. + + If unsure, say N. + config NVME_TARGET_PCI_EPF tristate "NVMe PCI Endpoint Function target support" depends on NVME_TARGET && PCI_ENDPOINT diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 9a2eccdc8b13..edb9627d97b0 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -144,7 +144,6 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, bool reset) goto out_unlock; list_for_each_entry(p, &ctrl->subsys->hosts, entry) { - pr_debug("check %s\n", nvmet_host_name(p->host)); if (strcmp(nvmet_host_name(p->host), ctrl->hostnqn)) continue; host = p->host; @@ -189,11 +188,12 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, bool reset) ctrl->host_key = NULL; goto out_free_hash; } +#ifdef CONFIG_NVME_TARGET_AUTH_DEBUG pr_debug("%s: using hash %s key %*ph\n", __func__, ctrl->host_key->hash > 0 ? nvme_auth_hmac_name(ctrl->host_key->hash) : "none", (int)ctrl->host_key->len, ctrl->host_key->key); - +#endif nvme_auth_free_key(ctrl->ctrl_key); if (!host->dhchap_ctrl_secret) { ctrl->ctrl_key = NULL; @@ -207,11 +207,12 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, bool reset) ctrl->ctrl_key = NULL; goto out_free_hash; } +#ifdef CONFIG_NVME_TARGET_AUTH_DEBUG pr_debug("%s: using ctrl hash %s key %*ph\n", __func__, ctrl->ctrl_key->hash > 0 ? nvme_auth_hmac_name(ctrl->ctrl_key->hash) : "none", (int)ctrl->ctrl_key->len, ctrl->ctrl_key->key); - +#endif out_free_hash: if (ret) { if (ctrl->host_key) { @@ -317,7 +318,6 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, if (ret) goto out_free_challenge; } - pr_debug("ctrl %d qid %d host response seq %u transaction %d\n", ctrl->cntlid, req->sq->qid, req->sq->dhchap_s1, req->sq->dhchap_tid); @@ -434,8 +434,10 @@ int nvmet_auth_ctrl_exponential(struct nvmet_req *req, ret = -EINVAL; } else { memcpy(buf, ctrl->dh_key, buf_size); +#ifdef CONFIG_NVME_TARGET_AUTH_DEBUG pr_debug("%s: ctrl %d public key %*ph\n", __func__, ctrl->cntlid, (int)buf_size, buf); +#endif } return ret; @@ -458,11 +460,12 @@ int nvmet_auth_ctrl_sesskey(struct nvmet_req *req, ctrl->shash_id); if (ret) pr_debug("failed to compute session key, err %d\n", ret); +#ifdef CONFIG_NVME_TARGET_AUTH_DEBUG else pr_debug("%s: session key %*ph\n", __func__, (int)req->sq->dhchap_skey_len, req->sq->dhchap_skey); - +#endif return ret; } From b35a13036755c5803168a7cb93bc66035c3e65b8 Mon Sep 17 00:00:00 2001 From: "Chia-Lin Kao (AceLan)" Date: Wed, 29 Apr 2026 16:11:16 +0800 Subject: [PATCH 4/8] nvme-pci: fix use-after-free in nvme_free_host_mem() nvme_free_host_mem() frees dev->hmb_sgt via dma_free_noncontiguous() but never clears the pointer afterward. This leads to a use-after-free if nvme_free_host_mem() is called twice in the same error path. This can happen during nvme_probe() when nvme_setup_host_mem() succeeds in allocating the HMB (setting dev->hmb_sgt) but nvme_set_host_mem() fails with an I/O error: nvme_setup_host_mem() nvme_alloc_host_mem_single() -> sets dev->hmb_sgt nvme_set_host_mem() -> fails with -EIO nvme_free_host_mem() -> frees hmb_sgt, but does NOT NULL it return error nvme_probe() error path: nvme_free_host_mem() -> dev->hmb_sgt is stale, use-after-free The second call dereferences the freed sgt, causing a NULL pointer dereference in iommu_dma_free_noncontiguous() when it accesses sgt->sgl->dma_address (the backing memory has been freed and zeroed). This is reproducible on Thunderbolt-attached NVMe devices (e.g., OWC Envoy Express behind a Dell WD22TB4 dock) where the device intermittently returns I/O errors during HMB setup due to PCIe link instability. BUG: kernel NULL pointer dereference, address: 0000000000000010 RIP: 0010:iommu_dma_free_noncontiguous+0x22/0x80 Call Trace: dma_free_noncontiguous+0x3b/0x130 nvme_free_host_mem+0x30/0xf0 [nvme] nvme_probe.cold+0xcc/0x275 [nvme] local_pci_probe+0x43/0xa0 pci_device_probe+0xeea/0x290 really_probe+0xf9/0x3b0 __driver_probe_device+0x8b/0x170 driver_probe_device+0x24/0xd0 __driver_attach_async_helper+0x6b/0x110 async_run_entry_fn+0x37/0x170 process_one_work+0x1ac/0x3d0 worker_thread+0x1b8/0x360 kthread+0xf7/0x130 ret_from_fork+0x2d8/0x3a0 ret_from_fork_asm+0x1a/0x30 Fix this by setting dev->hmb_sgt to NULL after freeing it, so the second call takes the multi-descriptor path which safely handles the already-cleaned-up state. Fixes: 63a5c7a4b4c4 ("nvme-pci: use dma_alloc_noncontigous if possible") Signed-off-by: Chia-Lin Kao (AceLan) Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9fd04cd7c5cb..94c423bed947 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2533,11 +2533,13 @@ static void nvme_free_host_mem_multi(struct nvme_dev *dev) static void nvme_free_host_mem(struct nvme_dev *dev) { - if (dev->hmb_sgt) + if (dev->hmb_sgt) { dma_free_noncontiguous(dev->dev, dev->host_mem_size, dev->hmb_sgt, DMA_BIDIRECTIONAL); - else + dev->hmb_sgt = NULL; + } else { nvme_free_host_mem_multi(dev); + } dma_free_coherent(dev->dev, dev->host_mem_descs_size, dev->host_mem_descs, dev->host_mem_descs_dma); From dbbd07d0a7020b80f6a7028e561908f7b83b3d5a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 10 May 2026 23:30:29 +0300 Subject: [PATCH 5/8] nvmet-tcp: Fix potential UAF when ddgst mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shivam Kumar found via vulnerability testing: When data digest is enabled on an NVMe/TCP connection and a digest mismatch occurs on a non-final H2C_DATA PDU during an R2T-based data transfer, the digest error handler in nvmet_tcp_try_recv_ddgst() calls nvmet_req_uninit() — which performs percpu_ref_put() on the submission queue — but does NOT mark the command as completed. It does not set cqe->status, does not modify rbytes_done, and does not clear any flag. When the subsequent fatal error triggers queue teardown, nvmet_tcp_uninit_data_in_cmds() iterates all commands, checks nvmet_tcp_need_data_in() for each one, and finds that the already-uninited command still appears to need data (because rbytes_done < transfer_len and cqe->status == 0). It therefore calls nvmet_req_uninit() a second time on the same command — a double percpu_ref_put against a single percpu_ref_get. Reported-by: Shivam Kumar Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 164a564ba3b4..20f150d17a96 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1321,8 +1321,10 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) queue->idx, cmd->req.cmd->common.command_id, queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst), le32_to_cpu(cmd->exp_ddgst)); - if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED)) + if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED)) { + cmd->req.cqe->status = NVME_SC_CMD_SEQ_ERROR; nvmet_req_uninit(&cmd->req); + } nvmet_tcp_free_cmd_buffers(cmd); ret = -EPROTO; goto out; From 40df2172853ffc0f84cca3906f5c4d9a6131195d Mon Sep 17 00:00:00 2001 From: AlanCui4080 Date: Fri, 8 May 2026 17:59:12 +0800 Subject: [PATCH 6/8] Revert "nvme: add quirk NVME_QUIRK_IGNORE_DEV_SUBNQN for 144d:a808" This reverts commit 7f991e3f9b8f044640bcb5fa8570350a68932843 ("nvme: add quirk NVME_QUIRK_IGNORE_DEV_SUBNQN for 144d:a808") The incorrect implementations of SUBNQN is a known issue in a massive number of NVMe units. However, the warning "nvme nvmex: missing or invalid SUBNQN field." is usually appropriate and will not affect performance or behavior etc. That is because the support for SUBNQN is mandatory if the controller supports NVMe revision 1.2.1 or greater, and it reported itself without a SUBNQN field which breaks compliance with the specification. It should be not quirked by the Linux Kernel. Signed-off-by: Alan Cui Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 94c423bed947..139a10cd687f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -4109,8 +4109,6 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c5f, 0x0555), /* Memblaze Pblaze5 adapter */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, - { PCI_DEVICE(0x144d, 0xa808), /* Samsung PM981/983 */ - .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ From 37953cec775ed34e59cf9a7d7bb9b0610daa3f3e Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 8 May 2026 15:33:29 +0200 Subject: [PATCH 7/8] nvme: fix race condition between connected uevent and STARTED_ONCE flag When a controller connects, nvme_start_ctrl() emits the "NVME_EVENT=connected" uevent and sets the NVME_CTRL_STARTED_ONCE flag. Currently, the uevent is emitted before the flag is set. This creates a race condition for userspace tools (like udev rules) that might rely on the "connected" event to configure other attributes. Swap the order of operations in nvme_start_ctrl() so that the NVME_CTRL_STARTED_ONCE flag is set before the uevent is sent. This guarantees that the admin_timeout can already be changed when userspace is notified. Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Daniel Wagner Signed-off-by: Maurizio Lombardi Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1e7e42b43aa3..c3032d6ad6b1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -5045,8 +5045,8 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) nvme_mpath_update(ctrl); } - nvme_change_uevent(ctrl, "NVME_EVENT=connected"); set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags); + nvme_change_uevent(ctrl, "NVME_EVENT=connected"); } EXPORT_SYMBOL_GPL(nvme_start_ctrl); From a6ab75639e23169a741b0b2e12191fd8acb32c73 Mon Sep 17 00:00:00 2001 From: Nick Chan Date: Thu, 14 May 2026 21:16:01 +0800 Subject: [PATCH 8/8] nvme-apple: Reset q->sq_tail during queue init Fixes a "duplicate tag error for tag 0" firmware crash during controller reset while setting up a queue on Apple A11 / T8015 caused by stale entries in the submission queue due to an invalid sq_tail offset after reset. Fixes: 04d8ecf37b5e ("nvme: apple: Add Apple A11 support") Cc: stable@vger.kernel.org Suggested-by: Yuriy Havrylyuk Reviewed-by: Sven Peter Signed-off-by: Nick Chan Signed-off-by: Keith Busch --- drivers/nvme/host/apple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 423c9c628e7b..c692fc73babf 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1009,6 +1009,7 @@ static void apple_nvme_init_queue(struct apple_nvme_queue *q) unsigned int depth = apple_nvme_queue_depth(q); struct apple_nvme *anv = queue_to_apple_nvme(q); + q->sq_tail = 0; q->cq_head = 0; q->cq_phase = 1; if (anv->hw->has_lsq_nvmmu)