vduse: Use fixed 4KB bounce pages for non-4KB page size

The allocation granularity of bounce pages is PAGE_SIZE. This may cause
even small IO requests to occupy an entire bounce page exclusively. The
kind of memory waste will be more significant when PAGE_SIZE is larger
than 4KB (e.g. arm64 with 64KB pages).

So, optimize it by using fixed 4KB bounce maps and iova allocation
granularity. A single IO request occupies at least a 4KB bounce page
instead of the entire memory page of PAGE_SIZE.

Signed-off-by: Sheng Zhao <sheng.zhao@bytedance.com>
Message-Id: <20250925113516.60305-1-sheng.zhao@bytedance.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
Sheng Zhao
2025-09-25 19:35:16 +08:00
committed by Michael S. Tsirkin
parent 1c14b0e4ba
commit 3fc3068e72
2 changed files with 94 additions and 41 deletions

View File

@@ -103,19 +103,38 @@ void vduse_domain_clear_map(struct vduse_iova_domain *domain,
static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
u64 iova, u64 size, u64 paddr)
{
struct vduse_bounce_map *map;
struct vduse_bounce_map *map, *head_map;
struct page *tmp_page;
u64 last = iova + size - 1;
while (iova <= last) {
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
/*
* When PAGE_SIZE is larger than 4KB, multiple adjacent bounce_maps will
* point to the same memory page of PAGE_SIZE. Since bounce_maps originate
* from IO requests, we may not be able to guarantee that the orig_phys
* values of all IO requests within the same 64KB memory page are contiguous.
* Therefore, we need to store them separately.
*
* Bounce pages are allocated on demand. As a result, it may occur that
* multiple bounce pages corresponding to the same 64KB memory page attempt
* to allocate memory simultaneously, so we use cmpxchg to handle this
* concurrency.
*/
map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT];
if (!map->bounce_page) {
map->bounce_page = alloc_page(GFP_ATOMIC);
if (!map->bounce_page)
return -ENOMEM;
head_map = &domain->bounce_maps[(iova & PAGE_MASK) >> BOUNCE_MAP_SHIFT];
if (!head_map->bounce_page) {
tmp_page = alloc_page(GFP_ATOMIC);
if (!tmp_page)
return -ENOMEM;
if (cmpxchg(&head_map->bounce_page, NULL, tmp_page))
__free_page(tmp_page);
}
map->bounce_page = head_map->bounce_page;
}
map->orig_phys = paddr;
paddr += PAGE_SIZE;
iova += PAGE_SIZE;
paddr += BOUNCE_MAP_SIZE;
iova += BOUNCE_MAP_SIZE;
}
return 0;
}
@@ -127,12 +146,17 @@ static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
u64 last = iova + size - 1;
while (iova <= last) {
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT];
map->orig_phys = INVALID_PHYS_ADDR;
iova += PAGE_SIZE;
iova += BOUNCE_MAP_SIZE;
}
}
static unsigned int offset_in_bounce_page(dma_addr_t addr)
{
return (addr & ~BOUNCE_MAP_MASK);
}
static void do_bounce(phys_addr_t orig, void *addr, size_t size,
enum dma_data_direction dir)
{
@@ -163,7 +187,7 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
{
struct vduse_bounce_map *map;
struct page *page;
unsigned int offset;
unsigned int offset, head_offset;
void *addr;
size_t sz;
@@ -171,9 +195,10 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
return;
while (size) {
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
offset = offset_in_page(iova);
sz = min_t(size_t, PAGE_SIZE - offset, size);
map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT];
head_offset = offset_in_page(iova);
offset = offset_in_bounce_page(iova);
sz = min_t(size_t, BOUNCE_MAP_SIZE - offset, size);
if (WARN_ON(!map->bounce_page ||
map->orig_phys == INVALID_PHYS_ADDR))
@@ -183,7 +208,7 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
map->user_bounce_page : map->bounce_page;
addr = kmap_local_page(page);
do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
do_bounce(map->orig_phys + offset, addr + head_offset, sz, dir);
kunmap_local(addr);
size -= sz;
iova += sz;
@@ -218,7 +243,7 @@ vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
struct page *page = NULL;
read_lock(&domain->bounce_lock);
map = &domain->bounce_maps[iova >> PAGE_SHIFT];
map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT];
if (domain->user_bounce_pages || !map->bounce_page)
goto out;
@@ -236,7 +261,7 @@ vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
struct vduse_bounce_map *map;
unsigned long pfn, bounce_pfns;
bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT;
for (pfn = 0; pfn < bounce_pfns; pfn++) {
map = &domain->bounce_maps[pfn];
@@ -246,7 +271,8 @@ vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
if (!map->bounce_page)
continue;
__free_page(map->bounce_page);
if (!((pfn << BOUNCE_MAP_SHIFT) & ~PAGE_MASK))
__free_page(map->bounce_page);
map->bounce_page = NULL;
}
}
@@ -254,8 +280,12 @@ vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
struct page **pages, int count)
{
struct vduse_bounce_map *map;
int i, ret;
struct vduse_bounce_map *map, *head_map;
int i, j, ret;
int inner_pages = PAGE_SIZE / BOUNCE_MAP_SIZE;
int bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT;
struct page *head_page = NULL;
bool need_copy;
/* Now we don't support partial mapping */
if (count != (domain->bounce_size >> PAGE_SHIFT))
@@ -267,16 +297,23 @@ int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
goto out;
for (i = 0; i < count; i++) {
map = &domain->bounce_maps[i];
if (map->bounce_page) {
need_copy = false;
head_map = &domain->bounce_maps[(i * inner_pages)];
head_page = head_map->bounce_page;
for (j = 0; j < inner_pages; j++) {
if ((i * inner_pages + j) >= bounce_pfns)
break;
map = &domain->bounce_maps[(i * inner_pages + j)];
/* Copy kernel page to user page if it's in use */
if (map->orig_phys != INVALID_PHYS_ADDR)
memcpy_to_page(pages[i], 0,
page_address(map->bounce_page),
PAGE_SIZE);
if ((head_page) && (map->orig_phys != INVALID_PHYS_ADDR))
need_copy = true;
map->user_bounce_page = pages[i];
}
map->user_bounce_page = pages[i];
get_page(pages[i]);
if ((head_page) && (need_copy))
memcpy_to_page(pages[i], 0,
page_address(head_page),
PAGE_SIZE);
}
domain->user_bounce_pages = true;
ret = 0;
@@ -288,8 +325,12 @@ int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
{
struct vduse_bounce_map *map;
unsigned long i, count;
struct vduse_bounce_map *map, *head_map;
unsigned long i, j, count;
int inner_pages = PAGE_SIZE / BOUNCE_MAP_SIZE;
int bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT;
struct page *head_page = NULL;
bool need_copy;
write_lock(&domain->bounce_lock);
if (!domain->user_bounce_pages)
@@ -297,20 +338,27 @@ void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
count = domain->bounce_size >> PAGE_SHIFT;
for (i = 0; i < count; i++) {
struct page *page = NULL;
map = &domain->bounce_maps[i];
if (WARN_ON(!map->user_bounce_page))
need_copy = false;
head_map = &domain->bounce_maps[(i * inner_pages)];
if (WARN_ON(!head_map->user_bounce_page))
continue;
head_page = head_map->user_bounce_page;
/* Copy user page to kernel page if it's in use */
if (map->orig_phys != INVALID_PHYS_ADDR) {
page = map->bounce_page;
memcpy_from_page(page_address(page),
map->user_bounce_page, 0, PAGE_SIZE);
for (j = 0; j < inner_pages; j++) {
if ((i * inner_pages + j) >= bounce_pfns)
break;
map = &domain->bounce_maps[(i * inner_pages + j)];
if (WARN_ON(!map->user_bounce_page))
continue;
/* Copy user page to kernel page if it's in use */
if ((map->orig_phys != INVALID_PHYS_ADDR) && (head_map->bounce_page))
need_copy = true;
map->user_bounce_page = NULL;
}
put_page(map->user_bounce_page);
map->user_bounce_page = NULL;
if (need_copy)
memcpy_from_page(page_address(head_map->bounce_page),
head_page, 0, PAGE_SIZE);
put_page(head_page);
}
domain->user_bounce_pages = false;
out:
@@ -581,7 +629,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
unsigned long pfn, bounce_pfns;
int ret;
bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
bounce_pfns = PAGE_ALIGN(bounce_size) >> BOUNCE_MAP_SHIFT;
if (iova_limit <= bounce_size)
return NULL;
@@ -613,7 +661,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
rwlock_init(&domain->bounce_lock);
spin_lock_init(&domain->iotlb_lock);
init_iova_domain(&domain->stream_iovad,
PAGE_SIZE, IOVA_START_PFN);
BOUNCE_MAP_SIZE, IOVA_START_PFN);
ret = iova_domain_init_rcaches(&domain->stream_iovad);
if (ret)
goto err_iovad_stream;

View File

@@ -19,6 +19,11 @@
#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
#define BOUNCE_MAP_SHIFT 12
#define BOUNCE_MAP_SIZE (1 << BOUNCE_MAP_SHIFT)
#define BOUNCE_MAP_MASK (~(BOUNCE_MAP_SIZE - 1))
#define BOUNCE_MAP_ALIGN(addr) (((addr) + BOUNCE_MAP_SIZE - 1) & ~(BOUNCE_MAP_SIZE - 1))
struct vduse_bounce_map {
struct page *bounce_page;
struct page *user_bounce_page;