mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-14 17:06:24 -04:00
Merge branch 'p2pdma-mmio-6.19.v5' into for-6.19/block
Merge MMIO P2P DMA series from Leon: "This patch series improves block layer and NVMe driver support for MMIO memory regions, particularly for peer-to-peer (P2P) DMA transfers that go through the host bridge. The series addresses a critical gap where P2P transfers through the host bridge (PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) were not properly marked as MMIO memory, leading to potential issues with: - Inappropriate CPU cache synchronization operations on MMIO regions - Incorrect DMA mapping/unmapping that doesn't respect MMIO semantics - Missing IOMMU configuration for MMIO memory handling This work is extracted from the larger DMA physical API improvement series [1] and focuses specifically on block layer and NVMe requirements for MMIO memory support. [1] https://lore.kernel.org/all/cover.1757423202.git.leonro@nvidia.com/" Link: https://lore.kernel.org/linux-block/20251114-block-with-mmio-v5-0-69d00f73d766@nvidia.com/ Signed-off-by: Jens Axboe <axboe@kernel.dk> * p2pdma-mmio-6.19.v5: block-dma: properly take MMIO path nvme-pci: migrate to dma_map_phys instead of map_page
This commit is contained in:
@@ -92,8 +92,13 @@ static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec)
|
||||
static bool blk_dma_map_direct(struct request *req, struct device *dma_dev,
|
||||
struct blk_dma_iter *iter, struct phys_vec *vec)
|
||||
{
|
||||
iter->addr = dma_map_page(dma_dev, phys_to_page(vec->paddr),
|
||||
offset_in_page(vec->paddr), vec->len, rq_dma_dir(req));
|
||||
unsigned int attrs = 0;
|
||||
|
||||
if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
|
||||
attrs |= DMA_ATTR_MMIO;
|
||||
|
||||
iter->addr = dma_map_phys(dma_dev, vec->paddr, vec->len,
|
||||
rq_dma_dir(req), attrs);
|
||||
if (dma_mapping_error(dma_dev, iter->addr)) {
|
||||
iter->status = BLK_STS_RESOURCE;
|
||||
return false;
|
||||
@@ -108,14 +113,18 @@ static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev,
|
||||
{
|
||||
enum dma_data_direction dir = rq_dma_dir(req);
|
||||
unsigned int mapped = 0;
|
||||
unsigned int attrs = 0;
|
||||
int error;
|
||||
|
||||
iter->addr = state->addr;
|
||||
iter->len = dma_iova_size(state);
|
||||
|
||||
if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
|
||||
attrs |= DMA_ATTR_MMIO;
|
||||
|
||||
do {
|
||||
error = dma_iova_link(dma_dev, state, vec->paddr, mapped,
|
||||
vec->len, dir, 0);
|
||||
vec->len, dir, attrs);
|
||||
if (error)
|
||||
break;
|
||||
mapped += vec->len;
|
||||
@@ -162,6 +171,7 @@ static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev,
|
||||
|
||||
memset(&iter->p2pdma, 0, sizeof(iter->p2pdma));
|
||||
iter->status = BLK_STS_OK;
|
||||
iter->p2pdma.map = PCI_P2PDMA_MAP_NONE;
|
||||
|
||||
/*
|
||||
* Grab the first segment ASAP because we'll need it to check for P2P
|
||||
@@ -173,10 +183,6 @@ static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev,
|
||||
switch (pci_p2pdma_state(&iter->p2pdma, dma_dev,
|
||||
phys_to_page(vec.paddr))) {
|
||||
case PCI_P2PDMA_MAP_BUS_ADDR:
|
||||
if (iter->iter.is_integrity)
|
||||
bio_integrity(req->bio)->bip_flags |= BIP_P2P_DMA;
|
||||
else
|
||||
req->cmd_flags |= REQ_P2PDMA;
|
||||
return blk_dma_map_bus(iter, &vec);
|
||||
case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
|
||||
/*
|
||||
|
||||
@@ -260,8 +260,20 @@ enum nvme_iod_flags {
|
||||
/* single segment dma mapping */
|
||||
IOD_SINGLE_SEGMENT = 1U << 2,
|
||||
|
||||
/* Data payload contains p2p memory */
|
||||
IOD_DATA_P2P = 1U << 3,
|
||||
|
||||
/* Metadata contains p2p memory */
|
||||
IOD_META_P2P = 1U << 4,
|
||||
|
||||
/* Data payload contains MMIO memory */
|
||||
IOD_DATA_MMIO = 1U << 5,
|
||||
|
||||
/* Metadata contains MMIO memory */
|
||||
IOD_META_MMIO = 1U << 6,
|
||||
|
||||
/* Metadata using non-coalesced MPTR */
|
||||
IOD_SINGLE_META_SEGMENT = 1U << 5,
|
||||
IOD_SINGLE_META_SEGMENT = 1U << 7,
|
||||
};
|
||||
|
||||
struct nvme_dma_vec {
|
||||
@@ -698,20 +710,20 @@ static void nvme_free_descriptors(struct request *req)
|
||||
}
|
||||
}
|
||||
|
||||
static void nvme_free_prps(struct request *req)
|
||||
static void nvme_free_prps(struct request *req, unsigned int attrs)
|
||||
{
|
||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < iod->nr_dma_vecs; i++)
|
||||
dma_unmap_page(nvmeq->dev->dev, iod->dma_vecs[i].addr,
|
||||
iod->dma_vecs[i].len, rq_dma_dir(req));
|
||||
dma_unmap_phys(nvmeq->dev->dev, iod->dma_vecs[i].addr,
|
||||
iod->dma_vecs[i].len, rq_dma_dir(req), attrs);
|
||||
mempool_free(iod->dma_vecs, nvmeq->dev->dmavec_mempool);
|
||||
}
|
||||
|
||||
static void nvme_free_sgls(struct request *req, struct nvme_sgl_desc *sge,
|
||||
struct nvme_sgl_desc *sg_list)
|
||||
struct nvme_sgl_desc *sg_list, unsigned int attrs)
|
||||
{
|
||||
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
|
||||
enum dma_data_direction dir = rq_dma_dir(req);
|
||||
@@ -720,22 +732,25 @@ static void nvme_free_sgls(struct request *req, struct nvme_sgl_desc *sge,
|
||||
unsigned int i;
|
||||
|
||||
if (sge->type == (NVME_SGL_FMT_DATA_DESC << 4)) {
|
||||
dma_unmap_page(dma_dev, le64_to_cpu(sge->addr), len, dir);
|
||||
dma_unmap_phys(dma_dev, le64_to_cpu(sge->addr), len, dir,
|
||||
attrs);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < len / sizeof(*sg_list); i++)
|
||||
dma_unmap_page(dma_dev, le64_to_cpu(sg_list[i].addr),
|
||||
le32_to_cpu(sg_list[i].length), dir);
|
||||
dma_unmap_phys(dma_dev, le64_to_cpu(sg_list[i].addr),
|
||||
le32_to_cpu(sg_list[i].length), dir, attrs);
|
||||
}
|
||||
|
||||
static void nvme_unmap_metadata(struct request *req)
|
||||
{
|
||||
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
|
||||
enum pci_p2pdma_map_type map = PCI_P2PDMA_MAP_NONE;
|
||||
enum dma_data_direction dir = rq_dma_dir(req);
|
||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||
struct device *dma_dev = nvmeq->dev->dev;
|
||||
struct nvme_sgl_desc *sge = iod->meta_descriptor;
|
||||
unsigned int attrs = 0;
|
||||
|
||||
if (iod->flags & IOD_SINGLE_META_SEGMENT) {
|
||||
dma_unmap_page(dma_dev, iod->meta_dma,
|
||||
@@ -744,13 +759,20 @@ static void nvme_unmap_metadata(struct request *req)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!blk_rq_integrity_dma_unmap(req, dma_dev, &iod->meta_dma_state,
|
||||
iod->meta_total_len)) {
|
||||
if (iod->flags & IOD_META_P2P)
|
||||
map = PCI_P2PDMA_MAP_BUS_ADDR;
|
||||
else if (iod->flags & IOD_META_MMIO) {
|
||||
map = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
|
||||
attrs |= DMA_ATTR_MMIO;
|
||||
}
|
||||
|
||||
if (!blk_rq_dma_unmap(req, dma_dev, &iod->meta_dma_state,
|
||||
iod->meta_total_len, map)) {
|
||||
if (nvme_pci_cmd_use_meta_sgl(&iod->cmd))
|
||||
nvme_free_sgls(req, sge, &sge[1]);
|
||||
nvme_free_sgls(req, sge, &sge[1], attrs);
|
||||
else
|
||||
dma_unmap_page(dma_dev, iod->meta_dma,
|
||||
iod->meta_total_len, dir);
|
||||
dma_unmap_phys(dma_dev, iod->meta_dma,
|
||||
iod->meta_total_len, dir, attrs);
|
||||
}
|
||||
|
||||
if (iod->meta_descriptor)
|
||||
@@ -760,9 +782,11 @@ static void nvme_unmap_metadata(struct request *req)
|
||||
|
||||
static void nvme_unmap_data(struct request *req)
|
||||
{
|
||||
enum pci_p2pdma_map_type map = PCI_P2PDMA_MAP_NONE;
|
||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
|
||||
struct device *dma_dev = nvmeq->dev->dev;
|
||||
unsigned int attrs = 0;
|
||||
|
||||
if (iod->flags & IOD_SINGLE_SEGMENT) {
|
||||
static_assert(offsetof(union nvme_data_ptr, prp1) ==
|
||||
@@ -772,12 +796,20 @@ static void nvme_unmap_data(struct request *req)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len)) {
|
||||
if (iod->flags & IOD_DATA_P2P)
|
||||
map = PCI_P2PDMA_MAP_BUS_ADDR;
|
||||
else if (iod->flags & IOD_DATA_MMIO) {
|
||||
map = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
|
||||
attrs |= DMA_ATTR_MMIO;
|
||||
}
|
||||
|
||||
if (!blk_rq_dma_unmap(req, dma_dev, &iod->dma_state, iod->total_len,
|
||||
map)) {
|
||||
if (nvme_pci_cmd_use_sgl(&iod->cmd))
|
||||
nvme_free_sgls(req, iod->descriptors[0],
|
||||
&iod->cmd.common.dptr.sgl);
|
||||
&iod->cmd.common.dptr.sgl, attrs);
|
||||
else
|
||||
nvme_free_prps(req);
|
||||
nvme_free_prps(req, attrs);
|
||||
}
|
||||
|
||||
if (iod->nr_descriptors)
|
||||
@@ -1048,6 +1080,19 @@ static blk_status_t nvme_map_data(struct request *req)
|
||||
if (!blk_rq_dma_map_iter_start(req, dev->dev, &iod->dma_state, &iter))
|
||||
return iter.status;
|
||||
|
||||
switch (iter.p2pdma.map) {
|
||||
case PCI_P2PDMA_MAP_BUS_ADDR:
|
||||
iod->flags |= IOD_DATA_P2P;
|
||||
break;
|
||||
case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
|
||||
iod->flags |= IOD_DATA_MMIO;
|
||||
break;
|
||||
case PCI_P2PDMA_MAP_NONE:
|
||||
break;
|
||||
default:
|
||||
return BLK_STS_RESOURCE;
|
||||
}
|
||||
|
||||
if (use_sgl == SGL_FORCED ||
|
||||
(use_sgl == SGL_SUPPORTED &&
|
||||
(sgl_threshold && nvme_pci_avg_seg_size(req) >= sgl_threshold)))
|
||||
@@ -1070,6 +1115,19 @@ static blk_status_t nvme_pci_setup_meta_sgls(struct request *req)
|
||||
&iod->meta_dma_state, &iter))
|
||||
return iter.status;
|
||||
|
||||
switch (iter.p2pdma.map) {
|
||||
case PCI_P2PDMA_MAP_BUS_ADDR:
|
||||
iod->flags |= IOD_META_P2P;
|
||||
break;
|
||||
case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
|
||||
iod->flags |= IOD_META_MMIO;
|
||||
break;
|
||||
case PCI_P2PDMA_MAP_NONE:
|
||||
break;
|
||||
default:
|
||||
return BLK_STS_RESOURCE;
|
||||
}
|
||||
|
||||
if (blk_rq_dma_map_coalesce(&iod->meta_dma_state))
|
||||
entries = 1;
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ enum bip_flags {
|
||||
BIP_CHECK_GUARD = 1 << 5, /* guard check */
|
||||
BIP_CHECK_REFTAG = 1 << 6, /* reftag check */
|
||||
BIP_CHECK_APPTAG = 1 << 7, /* apptag check */
|
||||
BIP_P2P_DMA = 1 << 8, /* using P2P address */
|
||||
|
||||
BIP_MEMPOOL = 1 << 15, /* buffer backed by mempool */
|
||||
};
|
||||
|
||||
@@ -33,14 +33,6 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
|
||||
|
||||
static inline bool blk_rq_integrity_dma_unmap(struct request *req,
|
||||
struct device *dma_dev, struct dma_iova_state *state,
|
||||
size_t mapped_len)
|
||||
{
|
||||
return blk_dma_unmap(req, dma_dev, state, mapped_len,
|
||||
bio_integrity(req->bio)->bip_flags & BIP_P2P_DMA);
|
||||
}
|
||||
|
||||
int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
|
||||
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
|
||||
ssize_t bytes);
|
||||
@@ -129,12 +121,6 @@ static inline int blk_rq_map_integrity_sg(struct request *q,
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline bool blk_rq_integrity_dma_unmap(struct request *req,
|
||||
struct device *dma_dev, struct dma_iova_state *state,
|
||||
size_t mapped_len)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline int blk_rq_integrity_map_user(struct request *rq,
|
||||
void __user *ubuf,
|
||||
ssize_t bytes)
|
||||
|
||||
@@ -16,13 +16,13 @@ struct blk_dma_iter {
|
||||
/* Output address range for this iteration */
|
||||
dma_addr_t addr;
|
||||
u32 len;
|
||||
struct pci_p2pdma_map_state p2pdma;
|
||||
|
||||
/* Status code. Only valid when blk_rq_dma_map_iter_* returned false */
|
||||
blk_status_t status;
|
||||
|
||||
/* Internal to blk_rq_dma_map_iter_* */
|
||||
struct blk_map_iter iter;
|
||||
struct pci_p2pdma_map_state p2pdma;
|
||||
};
|
||||
|
||||
bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev,
|
||||
@@ -43,36 +43,34 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state)
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_dma_unmap - try to DMA unmap a request
|
||||
* blk_rq_dma_unmap - try to DMA unmap a request
|
||||
* @req: request to unmap
|
||||
* @dma_dev: device to unmap from
|
||||
* @state: DMA IOVA state
|
||||
* @mapped_len: number of bytes to unmap
|
||||
* @is_p2p: true if mapped with PCI_P2PDMA_MAP_BUS_ADDR
|
||||
* @map: peer-to-peer mapping type
|
||||
*
|
||||
* Returns %false if the callers need to manually unmap every DMA segment
|
||||
* mapped using @iter or %true if no work is left to be done.
|
||||
*/
|
||||
static inline bool blk_dma_unmap(struct request *req, struct device *dma_dev,
|
||||
struct dma_iova_state *state, size_t mapped_len, bool is_p2p)
|
||||
static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
|
||||
struct dma_iova_state *state, size_t mapped_len,
|
||||
enum pci_p2pdma_map_type map)
|
||||
{
|
||||
if (is_p2p)
|
||||
if (map == PCI_P2PDMA_MAP_BUS_ADDR)
|
||||
return true;
|
||||
|
||||
if (dma_use_iova(state)) {
|
||||
unsigned int attrs = 0;
|
||||
|
||||
if (map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
|
||||
attrs |= DMA_ATTR_MMIO;
|
||||
|
||||
dma_iova_destroy(dma_dev, state, mapped_len, rq_dma_dir(req),
|
||||
0);
|
||||
attrs);
|
||||
return true;
|
||||
}
|
||||
|
||||
return !dma_need_unmap(dma_dev);
|
||||
}
|
||||
|
||||
static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
|
||||
struct dma_iova_state *state, size_t mapped_len)
|
||||
{
|
||||
return blk_dma_unmap(req, dma_dev, state, mapped_len,
|
||||
req->cmd_flags & REQ_P2PDMA);
|
||||
}
|
||||
|
||||
#endif /* BLK_MQ_DMA_H */
|
||||
|
||||
@@ -393,7 +393,6 @@ enum req_flag_bits {
|
||||
__REQ_DRV, /* for driver use */
|
||||
__REQ_FS_PRIVATE, /* for file system (submitter) use */
|
||||
__REQ_ATOMIC, /* for atomic write operations */
|
||||
__REQ_P2PDMA, /* contains P2P DMA pages */
|
||||
/*
|
||||
* Command specific flags, keep last:
|
||||
*/
|
||||
@@ -426,7 +425,6 @@ enum req_flag_bits {
|
||||
#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV)
|
||||
#define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE)
|
||||
#define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC)
|
||||
#define REQ_P2PDMA (__force blk_opf_t)(1ULL << __REQ_P2PDMA)
|
||||
|
||||
#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user