Merge tag 'nvme-6.15-2025-04-02' of git://git.infradead.org/nvme into block-6.15

Pull final NVMe updates from Keith:

"nvme updates for Linux 6.15

 - PCI endpoint target cleanup (Damien)
 - Early import for uring_cmd fixed buffer (Caleb)
 - Multipath documentation and notification improvements (John)
 - Invalid pci sq doorbell write fix (Maurizio)"

* tag 'nvme-6.15-2025-04-02' of git://git.infradead.org/nvme:
  nvme-pci: skip nvme_write_sq_db on empty rqlist
  nvme-multipath: change the NVME_MULTIPATH config option
  nvme: update the multipath warning in nvme_init_ns_head
  nvme/ioctl: move fixed buffer lookup to nvme_uring_cmd_io()
  nvme/ioctl: move blk_mq_free_request() out of nvme_map_user_request()
  nvme/ioctl: don't warn on vectorized uring_cmd with fixed buffer
  nvmet: pci-epf: Keep completion queues mapped
This commit is contained in:
Jens Axboe
2025-04-02 12:03:26 -06:00
5 changed files with 75 additions and 74 deletions

View File

@@ -18,10 +18,15 @@ config NVME_MULTIPATH
bool "NVMe multipath support"
depends on NVME_CORE
help
This option enables support for multipath access to NVMe
subsystems. If this option is enabled only a single
/dev/nvmeXnY device will show up for each NVMe namespace,
even if it is accessible through multiple controllers.
This option controls support for multipath access to NVMe
subsystems. If this option is enabled support for NVMe multipath
access is included in the kernel. If this option is disabled support
for NVMe multipath access is excluded from the kernel. When this
option is disabled each controller/namespace receives its
own /dev/nvmeXnY device entry and NVMe multipath access is
not supported.
If unsure, say Y.
config NVME_VERBOSE_ERRORS
bool "NVMe verbose error reporting"

View File

@@ -3822,7 +3822,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
"Found shared namespace %d, but multipathing not supported.\n",
info->nsid);
dev_warn_once(ctrl->device,
"Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n");
"Shared namespace support requires core_nvme.multipath=Y.\n");
}
}

View File

@@ -114,8 +114,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,
static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
struct io_uring_cmd *ioucmd, unsigned int flags,
unsigned int iou_issue_flags)
struct iov_iter *iter, unsigned int flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
@@ -129,37 +128,23 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
if (!nvme_ctrl_sgl_supported(ctrl))
dev_warn_once(ctrl->device, "using unchecked data buffer\n");
if (has_metadata) {
if (!supports_metadata) {
ret = -EINVAL;
goto out;
}
if (!supports_metadata)
return -EINVAL;
if (!nvme_ctrl_meta_sgl_supported(ctrl))
dev_warn_once(ctrl->device,
"using unchecked metadata buffer\n");
}
if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
struct iov_iter iter;
/* fixedbufs is only for non-vectored io */
if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) {
ret = -EINVAL;
goto out;
}
ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
rq_data_dir(req), &iter, ioucmd,
iou_issue_flags);
if (ret < 0)
goto out;
ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);
} else {
if (iter)
ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL);
else
ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer),
bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0,
0, rq_data_dir(req));
}
if (ret)
goto out;
return ret;
bio = req->bio;
if (bdev)
@@ -176,8 +161,6 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
out_unmap:
if (bio)
blk_rq_unmap_user(bio);
out:
blk_mq_free_request(req);
return ret;
}
@@ -200,9 +183,9 @@ static int nvme_submit_user_cmd(struct request_queue *q,
req->timeout = timeout;
if (ubuffer && bufflen) {
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
meta_len, NULL, flags, 0);
meta_len, NULL, flags);
if (ret)
return ret;
goto out_free_req;
}
bio = req->bio;
@@ -218,7 +201,10 @@ static int nvme_submit_user_cmd(struct request_queue *q,
if (effects)
nvme_passthru_end(ctrl, ns, effects, cmd, ret);
return ret;
out_free_req:
blk_mq_free_request(req);
return ret;
}
@@ -469,6 +455,8 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
struct nvme_uring_data d;
struct nvme_command c;
struct iov_iter iter;
struct iov_iter *map_iter = NULL;
struct request *req;
blk_opf_t rq_flags = REQ_ALLOC_CACHE;
blk_mq_req_flags_t blk_flags = 0;
@@ -504,6 +492,20 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
d.metadata_len = READ_ONCE(cmd->metadata_len);
d.timeout_ms = READ_ONCE(cmd->timeout_ms);
if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
/* fixedbufs is only for non-vectored io */
if (vec)
return -EINVAL;
ret = io_uring_cmd_import_fixed(d.addr, d.data_len,
nvme_is_write(&c) ? WRITE : READ, &iter, ioucmd,
issue_flags);
if (ret < 0)
return ret;
map_iter = &iter;
}
if (issue_flags & IO_URING_F_NONBLOCK) {
rq_flags |= REQ_NOWAIT;
blk_flags = BLK_MQ_REQ_NOWAIT;
@@ -517,11 +519,11 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0;
if (d.data_len) {
ret = nvme_map_user_request(req, d.addr,
d.data_len, nvme_to_user_ptr(d.metadata),
d.metadata_len, ioucmd, vec, issue_flags);
ret = nvme_map_user_request(req, d.addr, d.data_len,
nvme_to_user_ptr(d.metadata), d.metadata_len,
map_iter, vec);
if (ret)
return ret;
goto out_free_req;
}
/* to free bio on completion, as req->bio will be null at that time */
@@ -531,6 +533,10 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
req->end_io = nvme_uring_cmd_end_io;
blk_execute_rq_nowait(req, false);
return -EIOCBQUEUED;
out_free_req:
blk_mq_free_request(req);
return ret;
}
static bool is_ctrl_ioctl(unsigned int cmd)

View File

@@ -986,6 +986,9 @@ static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct rq_list *rqlist)
{
struct request *req;
if (rq_list_empty(rqlist))
return;
spin_lock(&nvmeq->sq_lock);
while ((req = rq_list_pop(rqlist))) {
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);

View File

@@ -1264,6 +1264,7 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata;
struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid];
u16 status;
int ret;
if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
@@ -1298,6 +1299,24 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
if (status != NVME_SC_SUCCESS)
goto err;
/*
* Map the CQ PCI address space and since PCI endpoint controllers may
* return a partial mapping, check that the mapping is large enough.
*/
ret = nvmet_pci_epf_mem_map(ctrl->nvme_epf, cq->pci_addr, cq->pci_size,
&cq->pci_map);
if (ret) {
dev_err(ctrl->dev, "Failed to map CQ %u (err=%d)\n",
cq->qid, ret);
goto err_internal;
}
if (cq->pci_map.pci_size < cq->pci_size) {
dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
cq->qid);
goto err_unmap_queue;
}
set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);
dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
@@ -1305,6 +1324,10 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
return NVME_SC_SUCCESS;
err_unmap_queue:
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
err_internal:
status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
err:
if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
@@ -1322,6 +1345,7 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid)
cancel_delayed_work_sync(&cq->work);
nvmet_pci_epf_drain_queue(cq);
nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
return NVME_SC_SUCCESS;
}
@@ -1553,36 +1577,6 @@ static void nvmet_pci_epf_free_queues(struct nvmet_pci_epf_ctrl *ctrl)
ctrl->cq = NULL;
}
static int nvmet_pci_epf_map_queue(struct nvmet_pci_epf_ctrl *ctrl,
struct nvmet_pci_epf_queue *queue)
{
struct nvmet_pci_epf *nvme_epf = ctrl->nvme_epf;
int ret;
ret = nvmet_pci_epf_mem_map(nvme_epf, queue->pci_addr,
queue->pci_size, &queue->pci_map);
if (ret) {
dev_err(ctrl->dev, "Failed to map queue %u (err=%d)\n",
queue->qid, ret);
return ret;
}
if (queue->pci_map.pci_size < queue->pci_size) {
dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
queue->qid);
nvmet_pci_epf_mem_unmap(nvme_epf, &queue->pci_map);
return -ENOMEM;
}
return 0;
}
static inline void nvmet_pci_epf_unmap_queue(struct nvmet_pci_epf_ctrl *ctrl,
struct nvmet_pci_epf_queue *queue)
{
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &queue->pci_map);
}
static void nvmet_pci_epf_exec_iod_work(struct work_struct *work)
{
struct nvmet_pci_epf_iod *iod =
@@ -1746,11 +1740,7 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
struct nvme_completion *cqe;
struct nvmet_pci_epf_iod *iod;
unsigned long flags;
int ret, n = 0;
ret = nvmet_pci_epf_map_queue(ctrl, cq);
if (ret)
goto again;
int ret = 0, n = 0;
while (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) && ctrl->link_up) {
@@ -1797,8 +1787,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
n++;
}
nvmet_pci_epf_unmap_queue(ctrl, cq);
/*
* We do not support precise IRQ coalescing time (100ns units as per
* NVMe specifications). So if we have posted completion entries without
@@ -1807,7 +1795,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
if (n)
nvmet_pci_epf_raise_irq(ctrl, cq, true);
again:
if (ret < 0)
queue_delayed_work(system_highpri_wq, &cq->work,
NVMET_PCI_EPF_CQ_RETRY_INTERVAL);