From 342ccffd9f77fc29fe1c05fd145e4d842bd2feaa Mon Sep 17 00:00:00 2001 From: Suraj Kandpal Date: Wed, 19 Nov 2025 15:16:50 +0530 Subject: [PATCH 01/94] drm/display/dp_mst: Add protection against 0 vcpi When releasing a timeslot there is a slight chance we may end up with the wrong payload mask due to overflow if the delayed_destroy_work ends up coming into play after a DP 2.1 monitor gets disconnected which causes vcpi to become 0 then we try to make the payload = ~BIT(vcpi - 1) which is a negative shift. VCPI id should never really be 0 hence skip changing the payload mask if VCPI is 0. Otherwise it leads to <7> [515.287237] xe 0000:03:00.0: [drm:drm_dp_mst_get_port_malloc [drm_display_helper]] port ffff888126ce9000 (3) <4> [515.287267] -----------[ cut here ]----------- <3> [515.287268] UBSAN: shift-out-of-bounds in ../drivers/gpu/drm/display/drm_dp_mst_topology.c:4575:36 <3> [515.287271] shift exponent -1 is negative <4> [515.287275] CPU: 7 UID: 0 PID: 3108 Comm: kworker/u64:33 Tainted: G S U 6.17.0-rc6-lgci-xe-xe-3795-3e79699fa1b216e92+ #1 PREEMPT(voluntary) <4> [515.287279] Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER <4> [515.287279] Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 1645 03/15/2024 <4> [515.287281] Workqueue: drm_dp_mst_wq drm_dp_delayed_destroy_work [drm_display_helper] <4> [515.287303] Call Trace: <4> [515.287304] <4> [515.287306] dump_stack_lvl+0xc1/0xf0 <4> [515.287313] dump_stack+0x10/0x20 <4> [515.287316] __ubsan_handle_shift_out_of_bounds+0x133/0x2e0 <4> [515.287324] ? drm_atomic_get_private_obj_state+0x186/0x1d0 <4> [515.287333] drm_dp_atomic_release_time_slots.cold+0x17/0x3d [drm_display_helper] <4> [515.287355] mst_connector_atomic_check+0x159/0x180 [xe] <4> [515.287546] drm_atomic_helper_check_modeset+0x4d9/0xfa0 <4> [515.287550] ? __ww_mutex_lock.constprop.0+0x6f/0x1a60 <4> [515.287562] intel_atomic_check+0x119/0x2b80 [xe] <4> [515.287740] ? find_held_lock+0x31/0x90 <4> [515.287747] ? lock_release+0xce/0x2a0 <4> [515.287754] drm_atomic_check_only+0x6a2/0xb40 <4> [515.287758] ? drm_atomic_add_affected_connectors+0x12b/0x140 <4> [515.287765] drm_atomic_commit+0x6e/0xf0 <4> [515.287766] ? _pfx__drm_printfn_info+0x10/0x10 <4> [515.287774] drm_client_modeset_commit_atomic+0x25c/0x2b0 <4> [515.287794] drm_client_modeset_commit_locked+0x60/0x1b0 <4> [515.287795] ? mutex_lock_nested+0x1b/0x30 <4> [515.287801] drm_client_modeset_commit+0x26/0x50 <4> [515.287804] __drm_fb_helper_restore_fbdev_mode_unlocked+0xdc/0x110 <4> [515.287810] drm_fb_helper_hotplug_event+0x120/0x140 <4> [515.287814] drm_fbdev_client_hotplug+0x28/0xd0 <4> [515.287819] drm_client_hotplug+0x6c/0xf0 <4> [515.287824] drm_client_dev_hotplug+0x9e/0xd0 <4> [515.287829] drm_kms_helper_hotplug_event+0x1a/0x30 <4> [515.287834] drm_dp_delayed_destroy_work+0x3df/0x410 [drm_display_helper] <4> [515.287861] process_one_work+0x22b/0x6f0 <4> [515.287874] worker_thread+0x1e8/0x3d0 <4> [515.287879] ? __pfx_worker_thread+0x10/0x10 <4> [515.287882] kthread+0x11c/0x250 <4> [515.287886] ? __pfx_kthread+0x10/0x10 <4> [515.287890] ret_from_fork+0x2d7/0x310 <4> [515.287894] ? __pfx_kthread+0x10/0x10 <4> [515.287897] ret_from_fork_asm+0x1a/0x30 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6303 Signed-off-by: Suraj Kandpal Reviewed-by: Imre Deak Reviewed-by: Lyude Paul Link: https://patch.msgid.link/20251119094650.799135-1-suraj.kandpal@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 64e5c176d5cc..be749dcad3b5 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -4572,7 +4572,8 @@ int drm_dp_atomic_release_time_slots(struct drm_atomic_state *state, if (!payload->delete) { payload->pbn = 0; payload->delete = true; - topology_state->payload_mask &= ~BIT(payload->vcpi - 1); + if (payload->vcpi > 0) + topology_state->payload_mask &= ~BIT(payload->vcpi - 1); } return 0; From d2c6fde56d451ca48a5e03428535ce3dbc8fc910 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 09:48:35 +0100 Subject: [PATCH 02/94] drm/panthor: Always wait after sending a command to an AS There's currently no situation where we want to issue a command to an AS and not wait for this command to complete. The wait is either explicitly done (LOCK, UNLOCK) or it's missing (UPDATE). So let's turn write_cmd() into as_send_cmd_and_wait() that has the wait after a command is sent. v2: - New patch v3: - Collect R-b v4: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251128084841.3804658-2-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_mmu.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 8818d6a8d93e..f59331f89b33 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -524,27 +524,29 @@ static int wait_ready(struct panthor_device *ptdev, u32 as_nr) return ret; } -static int write_cmd(struct panthor_device *ptdev, u32 as_nr, u32 cmd) +static int as_send_cmd_and_wait(struct panthor_device *ptdev, u32 as_nr, u32 cmd) { int status; /* write AS_COMMAND when MMU is ready to accept another command */ status = wait_ready(ptdev, as_nr); - if (!status) + if (!status) { gpu_write(ptdev, AS_COMMAND(as_nr), cmd); + status = wait_ready(ptdev, as_nr); + } return status; } -static void lock_region(struct panthor_device *ptdev, u32 as_nr, - u64 region_start, u64 size) +static int lock_region(struct panthor_device *ptdev, u32 as_nr, + u64 region_start, u64 size) { u8 region_width; u64 region; u64 region_end = region_start + size; if (!size) - return; + return 0; /* * The locked region is a naturally aligned power of 2 block encoded as @@ -567,7 +569,7 @@ static void lock_region(struct panthor_device *ptdev, u32 as_nr, /* Lock the region that needs to be updated */ gpu_write64(ptdev, AS_LOCKADDR(as_nr), region); - write_cmd(ptdev, as_nr, AS_COMMAND_LOCK); + return as_send_cmd_and_wait(ptdev, as_nr, AS_COMMAND_LOCK); } static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, @@ -600,9 +602,7 @@ static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, * power it up */ - lock_region(ptdev, as_nr, iova, size); - - ret = wait_ready(ptdev, as_nr); + ret = lock_region(ptdev, as_nr, iova, size); if (ret) return ret; @@ -615,10 +615,7 @@ static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, * at the end of the GPU_CONTROL cache flush command, unlike * AS_COMMAND_FLUSH_MEM or AS_COMMAND_FLUSH_PT. */ - write_cmd(ptdev, as_nr, AS_COMMAND_UNLOCK); - - /* Wait for the unlock command to complete */ - return wait_ready(ptdev, as_nr); + return as_send_cmd_and_wait(ptdev, as_nr, AS_COMMAND_UNLOCK); } static int mmu_hw_do_operation(struct panthor_vm *vm, @@ -647,7 +644,7 @@ static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr, gpu_write64(ptdev, AS_MEMATTR(as_nr), memattr); gpu_write64(ptdev, AS_TRANSCFG(as_nr), transcfg); - return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE); + return as_send_cmd_and_wait(ptdev, as_nr, AS_COMMAND_UPDATE); } static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr) @@ -662,7 +659,7 @@ static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr) gpu_write64(ptdev, AS_MEMATTR(as_nr), 0); gpu_write64(ptdev, AS_TRANSCFG(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED); - return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE); + return as_send_cmd_and_wait(ptdev, as_nr, AS_COMMAND_UPDATE); } static u32 panthor_mmu_fault_mask(struct panthor_device *ptdev, u32 value) From 151df689fb75e46a6cafa9a2c407d44969f4bebe Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 09:48:36 +0100 Subject: [PATCH 03/94] drm/panthor: Kill lock_region() The meat in lock_region() is about packing a region range into a single u64. The rest is just a regular reg write plus a as_send_cmd_and_wait() call that can easily be inlined in mmu_hw_do_operation_locked(). v2: - New patch v3: - Don't LOCK is the region has a zero size v4: - Collect R-b Reviewed-by: Steven Price Link: https://patch.msgid.link/20251128084841.3804658-3-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_mmu.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index f59331f89b33..b88a6d3096a0 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -538,14 +538,12 @@ static int as_send_cmd_and_wait(struct panthor_device *ptdev, u32 as_nr, u32 cmd return status; } -static int lock_region(struct panthor_device *ptdev, u32 as_nr, - u64 region_start, u64 size) +static u64 pack_region_range(struct panthor_device *ptdev, u64 region_start, u64 size) { u8 region_width; - u64 region; u64 region_end = region_start + size; - if (!size) + if (drm_WARN_ON_ONCE(&ptdev->base, !size)) return 0; /* @@ -565,11 +563,7 @@ static int lock_region(struct panthor_device *ptdev, u32 as_nr, */ region_start &= GENMASK_ULL(63, region_width); - region = region_width | region_start; - - /* Lock the region that needs to be updated */ - gpu_write64(ptdev, AS_LOCKADDR(as_nr), region); - return as_send_cmd_and_wait(ptdev, as_nr, AS_COMMAND_LOCK); + return region_width | region_start; } static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, @@ -581,6 +575,9 @@ static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, lockdep_assert_held(&ptdev->mmu->as.slots_lock); + if (!size) + return 0; + switch (op) { case AS_COMMAND_FLUSH_MEM: lsc_flush_op = CACHE_CLEAN | CACHE_INV; @@ -602,7 +599,10 @@ static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, * power it up */ - ret = lock_region(ptdev, as_nr, iova, size); + /* Lock the region that needs to be updated */ + gpu_write64(ptdev, AS_LOCKADDR(as_nr), + pack_region_range(ptdev, iova, size)); + ret = as_send_cmd_and_wait(ptdev, as_nr, AS_COMMAND_LOCK); if (ret) return ret; From 3c0a60195b37af83bbbaf223cd3a78945bace49e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 09:48:37 +0100 Subject: [PATCH 04/94] drm/panthor: Recover from panthor_gpu_flush_caches() failures We have seen a few cases where the whole memory subsystem is blocked and flush operations never complete. When that happens, we want to: - schedule a reset, so we can recover from this situation - in the reset path, we need to reset the pending_reqs so we can send new commands after the reset - if more panthor_gpu_flush_caches() operations are queued after the timeout, we skip them and return -EIO directly to avoid needless waits (the memory block won't miraculously work again) Note that we drop the WARN_ON()s because these hangs can be triggered with buggy GPU jobs created by the UMD, and there's no way we can prevent it. We do keep the error messages though. v2: - New patch v3: - Collect R-b - Explicitly mention the fact we dropped the WARN_ON()s in the commit message v4: - No changes Fixes: 5cd894e258c4 ("drm/panthor: Add the GPU logical block") Reviewed-by: Steven Price Link: https://patch.msgid.link/20251128084841.3804658-4-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_gpu.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c index 06b231b2460a..9cb5dee93212 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.c +++ b/drivers/gpu/drm/panthor/panthor_gpu.c @@ -289,38 +289,42 @@ int panthor_gpu_l2_power_on(struct panthor_device *ptdev) int panthor_gpu_flush_caches(struct panthor_device *ptdev, u32 l2, u32 lsc, u32 other) { - bool timedout = false; unsigned long flags; + int ret = 0; /* Serialize cache flush operations. */ guard(mutex)(&ptdev->gpu->cache_flush_lock); spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); - if (!drm_WARN_ON(&ptdev->base, - ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) { + if (!(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) { ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED; gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other)); + } else { + ret = -EIO; } spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); + if (ret) + return ret; + if (!wait_event_timeout(ptdev->gpu->reqs_acked, !(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED), msecs_to_jiffies(100))) { spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 && !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_CLEAN_CACHES_COMPLETED)) - timedout = true; + ret = -ETIMEDOUT; else ptdev->gpu->pending_reqs &= ~GPU_IRQ_CLEAN_CACHES_COMPLETED; spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); } - if (timedout) { + if (ret) { + panthor_device_schedule_reset(ptdev); drm_err(&ptdev->base, "Flush caches timeout"); - return -ETIMEDOUT; } - return 0; + return ret; } /** @@ -360,6 +364,7 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev) return -ETIMEDOUT; } + ptdev->gpu->pending_reqs = 0; return 0; } From 6e2d3b3e858942de4dbffffe3a617e80b1262f74 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 09:48:38 +0100 Subject: [PATCH 05/94] drm/panthor: Add support for atomic page table updates Move the lock/flush_mem operations around the gpuvm_sm_[un]map() calls so we can implement true atomic page updates, where any access in the locked range done by the GPU has to wait for the page table updates to land before proceeding. This is needed for vkQueueBindSparse(), so we can replace the dummy page mapped over the entire object by actual BO backed pages in an atomic way. But it's also useful to avoid "AS_ACTIVE bit stuck" failures in the sm_[un]map() path, leading to gpuvm state inconsistencies. v2: - Adjust to match the two new preliminary patches v3: - Collect R-b v4: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251128084841.3804658-5-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_mmu.c | 190 +++++++++++++------------- 1 file changed, 97 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index b88a6d3096a0..f39e6e799c74 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -389,6 +389,15 @@ struct panthor_vm { * flagged as faulty as a result. */ bool unhandled_fault; + + /** @locked_region: Information about the currently locked region currently. */ + struct { + /** @locked_region.start: Start of the locked region. */ + u64 start; + + /** @locked_region.size: Size of the locked region. */ + u64 size; + } locked_region; }; /** @@ -566,80 +575,9 @@ static u64 pack_region_range(struct panthor_device *ptdev, u64 region_start, u64 return region_width | region_start; } -static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, - u64 iova, u64 size, u32 op) -{ - const u32 l2_flush_op = CACHE_CLEAN | CACHE_INV; - u32 lsc_flush_op; - int ret; - - lockdep_assert_held(&ptdev->mmu->as.slots_lock); - - if (!size) - return 0; - - switch (op) { - case AS_COMMAND_FLUSH_MEM: - lsc_flush_op = CACHE_CLEAN | CACHE_INV; - break; - case AS_COMMAND_FLUSH_PT: - lsc_flush_op = 0; - break; - default: - drm_WARN(&ptdev->base, 1, "Unexpected AS_COMMAND: %d", op); - return -EINVAL; - } - - if (as_nr < 0) - return 0; - - /* - * If the AS number is greater than zero, then we can be sure - * the device is up and running, so we don't need to explicitly - * power it up - */ - - /* Lock the region that needs to be updated */ - gpu_write64(ptdev, AS_LOCKADDR(as_nr), - pack_region_range(ptdev, iova, size)); - ret = as_send_cmd_and_wait(ptdev, as_nr, AS_COMMAND_LOCK); - if (ret) - return ret; - - ret = panthor_gpu_flush_caches(ptdev, l2_flush_op, lsc_flush_op, 0); - if (ret) - return ret; - - /* - * Explicitly unlock the region as the AS is not unlocked automatically - * at the end of the GPU_CONTROL cache flush command, unlike - * AS_COMMAND_FLUSH_MEM or AS_COMMAND_FLUSH_PT. - */ - return as_send_cmd_and_wait(ptdev, as_nr, AS_COMMAND_UNLOCK); -} - -static int mmu_hw_do_operation(struct panthor_vm *vm, - u64 iova, u64 size, u32 op) -{ - struct panthor_device *ptdev = vm->ptdev; - int ret; - - mutex_lock(&ptdev->mmu->as.slots_lock); - ret = mmu_hw_do_operation_locked(ptdev, vm->as.id, iova, size, op); - mutex_unlock(&ptdev->mmu->as.slots_lock); - - return ret; -} - static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr, u64 transtab, u64 transcfg, u64 memattr) { - int ret; - - ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); - if (ret) - return ret; - gpu_write64(ptdev, AS_TRANSTAB(as_nr), transtab); gpu_write64(ptdev, AS_MEMATTR(as_nr), memattr); gpu_write64(ptdev, AS_TRANSCFG(as_nr), transcfg); @@ -651,7 +589,9 @@ static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr) { int ret; - ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); + /* Flush+invalidate RW caches, invalidate RO ones. */ + ret = panthor_gpu_flush_caches(ptdev, CACHE_CLEAN | CACHE_INV, + CACHE_CLEAN | CACHE_INV, CACHE_INV); if (ret) return ret; @@ -733,6 +673,10 @@ int panthor_vm_active(struct panthor_vm *vm) if (refcount_inc_not_zero(&vm->as.active_cnt)) goto out_dev_exit; + /* Make sure we don't race with lock/unlock_region() calls + * happening around VM bind operations. + */ + mutex_lock(&vm->op_lock); mutex_lock(&ptdev->mmu->as.slots_lock); if (refcount_inc_not_zero(&vm->as.active_cnt)) @@ -800,6 +744,10 @@ int panthor_vm_active(struct panthor_vm *vm) gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask); } + /* The VM update is guarded by ::op_lock, which we take at the beginning + * of this function, so we don't expect any locked region here. + */ + drm_WARN_ON(&vm->ptdev->base, vm->locked_region.size > 0); ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr); out_make_active: @@ -810,6 +758,7 @@ int panthor_vm_active(struct panthor_vm *vm) out_unlock: mutex_unlock(&ptdev->mmu->as.slots_lock); + mutex_unlock(&vm->op_lock); out_dev_exit: drm_dev_exit(cookie); @@ -893,24 +842,6 @@ static size_t get_pgsize(u64 addr, size_t size, size_t *count) return SZ_2M; } -static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size) -{ - struct panthor_device *ptdev = vm->ptdev; - int ret = 0, cookie; - - if (vm->as.id < 0) - return 0; - - /* If the device is unplugged, we just silently skip the flush. */ - if (!drm_dev_enter(&ptdev->base, &cookie)) - return 0; - - ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT); - - drm_dev_exit(cookie); - return ret; -} - static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) { struct panthor_device *ptdev = vm->ptdev; @@ -918,6 +849,10 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) u64 start_iova = iova; u64 offset = 0; + drm_WARN_ON(&ptdev->base, + (iova < vm->locked_region.start) || + (iova + size > vm->locked_region.start + vm->locked_region.size)); + while (offset < size) { size_t unmapped_sz = 0, pgcount; size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount); @@ -929,7 +864,6 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) iova + offset + unmapped_sz, iova + offset + pgsize * pgcount, iova, iova + size); - panthor_vm_flush_range(vm, iova, offset + unmapped_sz); return -EINVAL; } @@ -941,7 +875,7 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) offset += unmapped_sz; } - return panthor_vm_flush_range(vm, iova, size); + return 0; } static int @@ -959,6 +893,10 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, if (!size) return 0; + drm_WARN_ON(&ptdev->base, + (iova < vm->locked_region.start) || + (iova + size > vm->locked_region.start + vm->locked_region.size)); + for_each_sgtable_dma_sg(sgt, sgl, count) { dma_addr_t paddr = sg_dma_address(sgl); size_t len = sg_dma_len(sgl); @@ -1009,7 +947,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, offset = 0; } - return panthor_vm_flush_range(vm, start_iova, iova - start_iova); + return 0; } static int flags_to_prot(u32 flags) @@ -1692,6 +1630,62 @@ static const char *access_type_name(struct panthor_device *ptdev, } } +static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64 size) +{ + struct panthor_device *ptdev = vm->ptdev; + int ret = 0; + + mutex_lock(&ptdev->mmu->as.slots_lock); + drm_WARN_ON(&ptdev->base, vm->locked_region.start || vm->locked_region.size); + if (vm->as.id >= 0 && size) { + /* Lock the region that needs to be updated */ + gpu_write64(ptdev, AS_LOCKADDR(vm->as.id), + pack_region_range(ptdev, start, size)); + + /* If the lock succeeded, update the locked_region info. */ + ret = as_send_cmd_and_wait(ptdev, vm->as.id, AS_COMMAND_LOCK); + } + + if (!ret) { + vm->locked_region.start = start; + vm->locked_region.size = size; + } + mutex_unlock(&ptdev->mmu->as.slots_lock); + + return ret; +} + +static void panthor_vm_unlock_region(struct panthor_vm *vm) +{ + struct panthor_device *ptdev = vm->ptdev; + + mutex_lock(&ptdev->mmu->as.slots_lock); + if (vm->as.id >= 0) { + int ret; + + /* flush+invalidate RW caches and invalidate RO ones. + * TODO: See if we can use FLUSH_PA_RANGE when the physical + * range is narrow enough and the HW supports it. + */ + ret = panthor_gpu_flush_caches(ptdev, CACHE_CLEAN | CACHE_INV, + CACHE_CLEAN | CACHE_INV, + CACHE_INV); + + /* Unlock the region if the flush is effective. */ + if (!ret) + ret = as_send_cmd_and_wait(ptdev, vm->as.id, AS_COMMAND_UNLOCK); + + /* If we fail to flush or unlock the region, schedule a GPU reset + * to unblock the situation. + */ + if (ret) + panthor_device_schedule_reset(ptdev); + } + vm->locked_region.start = 0; + vm->locked_region.size = 0; + mutex_unlock(&ptdev->mmu->as.slots_lock); +} + static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status) { bool has_unhandled_faults = false; @@ -1896,6 +1890,7 @@ static void panthor_vm_free(struct drm_gpuvm *gpuvm) drm_sched_entity_destroy(&vm->entity); drm_sched_fini(&vm->sched); + mutex_lock(&vm->op_lock); mutex_lock(&ptdev->mmu->as.slots_lock); if (vm->as.id >= 0) { int cookie; @@ -1910,6 +1905,7 @@ static void panthor_vm_free(struct drm_gpuvm *gpuvm) list_del(&vm->as.lru_node); } mutex_unlock(&ptdev->mmu->as.slots_lock); + mutex_unlock(&vm->op_lock); free_io_pgtable_ops(vm->pgtbl_ops); @@ -2219,6 +2215,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op, mutex_lock(&vm->op_lock); vm->op_ctx = op; + + ret = panthor_vm_lock_region(vm, op->va.addr, op->va.range); + if (ret) + goto out; + switch (op_type) { case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: { const struct drm_gpuvm_map_req map_req = { @@ -2246,6 +2247,9 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op, break; } + panthor_vm_unlock_region(vm); + +out: if (ret && flag_vm_unusable_on_failure) vm->unusable = true; From 19e8bc9456055e594d0f1c9677ea8a3a9983ec3e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 09:48:39 +0100 Subject: [PATCH 06/94] drm/panthor: Make panthor_vm_[un]map_pages() more robust There's no reason for panthor_vm_[un]map_pages() to fail unless the drm_gpuvm state and the page table are out of sync, so let's reflect that by making panthor_vm_unmap_pages() a void function and adding WARN_ON()s in various places. We also try to recover from those unexpected mismatch by checking for already unmapped ranges and skipping them. But there's only so much we can do to try and cope with such SW bugs, so when we see a mismatch, we flag the VM unusable and disable the AS to avoid further GPU accesses to the memory. It could be that the as_disable() call fails because the MMU unit is stuck, in which case the whole GPU is frozen, and only a GPU reset can unblock things. Ater the reset, the VM will be seen as unusable and any attempt to re-use it will fail, so we should be covered for any use-after-unmap issues. v2: - Fix double unlock v3: - Collect R-b v4: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251128084841.3804658-6-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_mmu.c | 81 ++++++++++++++++++--------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index f39e6e799c74..8ba5259e3d28 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -842,13 +842,33 @@ static size_t get_pgsize(u64 addr, size_t size, size_t *count) return SZ_2M; } -static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) +static void panthor_vm_declare_unusable(struct panthor_vm *vm) +{ + struct panthor_device *ptdev = vm->ptdev; + int cookie; + + if (vm->unusable) + return; + + vm->unusable = true; + mutex_lock(&ptdev->mmu->as.slots_lock); + if (vm->as.id >= 0 && drm_dev_enter(&ptdev->base, &cookie)) { + panthor_mmu_as_disable(ptdev, vm->as.id); + drm_dev_exit(cookie); + } + mutex_unlock(&ptdev->mmu->as.slots_lock); +} + +static void panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) { struct panthor_device *ptdev = vm->ptdev; struct io_pgtable_ops *ops = vm->pgtbl_ops; u64 start_iova = iova; u64 offset = 0; + if (!size) + return; + drm_WARN_ON(&ptdev->base, (iova < vm->locked_region.start) || (iova + size > vm->locked_region.start + vm->locked_region.size)); @@ -858,13 +878,28 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount); unmapped_sz = ops->unmap_pages(ops, iova + offset, pgsize, pgcount, NULL); + if (drm_WARN_ON_ONCE(&ptdev->base, unmapped_sz != pgsize * pgcount)) { + /* Gracefully handle sparsely unmapped regions to avoid leaving + * page table pages behind when the drm_gpuvm and VM page table + * are out-of-sync. This is not supposed to happen, hence the + * above WARN_ON(). + */ + while (!ops->iova_to_phys(ops, iova + unmapped_sz) && + unmapped_sz < pgsize * pgcount) + unmapped_sz += SZ_4K; - if (drm_WARN_ON(&ptdev->base, unmapped_sz != pgsize * pgcount)) { - drm_err(&ptdev->base, "failed to unmap range %llx-%llx (requested range %llx-%llx)\n", - iova + offset + unmapped_sz, - iova + offset + pgsize * pgcount, - iova, iova + size); - return -EINVAL; + /* We're passed the point where we can try to fix things, + * so flag the VM unusable to make sure it's not going + * to be used anymore. + */ + panthor_vm_declare_unusable(vm); + + /* If we don't make progress, we're screwed. That also means + * something else prevents us from unmapping the region, but + * there's not much we can do here: time for debugging. + */ + if (drm_WARN_ON_ONCE(&ptdev->base, !unmapped_sz)) + return; } drm_dbg(&ptdev->base, @@ -874,8 +909,6 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) offset += unmapped_sz; } - - return 0; } static int @@ -927,16 +960,17 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, paddr += mapped; len -= mapped; - if (drm_WARN_ON(&ptdev->base, !ret && !mapped)) + /* If nothing was mapped, consider it an ENOMEM. */ + if (!ret && !mapped) ret = -ENOMEM; - if (ret) { - /* If something failed, unmap what we've already mapped before - * returning. The unmap call is not supposed to fail. + /* If something fails, we stop there, and flag the VM unusable. */ + if (drm_WARN_ON_ONCE(&ptdev->base, ret)) { + /* Unmap what we've already mapped to avoid leaving page + * table pages behind. */ - drm_WARN_ON(&ptdev->base, - panthor_vm_unmap_pages(vm, start_iova, - iova - start_iova)); + panthor_vm_unmap_pages(vm, start_iova, iova - start_iova); + panthor_vm_declare_unusable(vm); return ret; } } @@ -2120,12 +2154,9 @@ static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op, struct panthor_vm_op_ctx *op_ctx = vm->op_ctx; struct panthor_vma *prev_vma = NULL, *next_vma = NULL; u64 unmap_start, unmap_range; - int ret; drm_gpuva_op_remap_to_unmap_range(&op->remap, &unmap_start, &unmap_range); - ret = panthor_vm_unmap_pages(vm, unmap_start, unmap_range); - if (ret) - return ret; + panthor_vm_unmap_pages(vm, unmap_start, unmap_range); if (op->remap.prev) { prev_vma = panthor_vm_op_ctx_get_vma(op_ctx); @@ -2165,13 +2196,9 @@ static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op, { struct panthor_vma *unmap_vma = container_of(op->unmap.va, struct panthor_vma, base); struct panthor_vm *vm = priv; - int ret; - - ret = panthor_vm_unmap_pages(vm, unmap_vma->base.va.addr, - unmap_vma->base.va.range); - if (drm_WARN_ON(&vm->ptdev->base, ret)) - return ret; + panthor_vm_unmap_pages(vm, unmap_vma->base.va.addr, + unmap_vma->base.va.range); drm_gpuva_unmap(&op->unmap); panthor_vma_unlink(vm, unmap_vma); return 0; @@ -2251,7 +2278,7 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op, out: if (ret && flag_vm_unusable_on_failure) - vm->unusable = true; + panthor_vm_declare_unusable(vm); vm->op_ctx = NULL; mutex_unlock(&vm->op_lock); From ddf2cb3c9e655dd9b7bb5172249f6c01fc251549 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 09:48:40 +0100 Subject: [PATCH 07/94] drm/panthor: Relax a check in panthor_sched_pre_reset() Groups are only moved out of the runnable lists when panthor_group_stop() is called or when they run out of jobs. What should not happen though is having one group added to one of the runnable list after reset.in_progress has been set to true, but that's not something we can easily check, so let's just drop the WARN_ON() in panthor_sched_pre_reset(). v2: - Adjust explanation in commit message v3: - Collect R-b v4: - No changes Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Link: https://patch.msgid.link/20251128084841.3804658-7-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_sched.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index b834123a6560..1beddc175722 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -2937,8 +2937,6 @@ void panthor_sched_pre_reset(struct panthor_device *ptdev) * new jobs while we're resetting. */ for (i = 0; i < ARRAY_SIZE(sched->groups.runnable); i++) { - /* All groups should be in the idle lists. */ - drm_WARN_ON(&ptdev->base, !list_empty(&sched->groups.runnable[i])); list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node) panthor_group_stop(group); } From 851f58d02f0d6c9c5fa8aee32fe349aaa9796758 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 10:48:32 +0100 Subject: [PATCH 08/94] drm/panthor: Simplify group idleness tracking csg_slot_sync_queues_state_locked() queries the queues state which can then be used to determine if a group is idle or not. Let's base our idleness detection logic solely on the {idle,blocked}_queues masks to avoid inconsistencies between the group state and the state of its subqueues. v2: - Add R-b v3: - Collect R-b Reviewed-by: Steven Price Reviewed-by: Chia-I Wu Link: https://patch.msgid.link/20251128094839.3856402-2-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_sched.c | 31 ++----------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 1beddc175722..5b2ab963ac99 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -108,15 +108,6 @@ struct panthor_csg_slot { /** @priority: Group priority. */ u8 priority; - - /** - * @idle: True if the group bound to this slot is idle. - * - * A group is idle when it has nothing waiting for execution on - * all its queues, or when queues are blocked waiting for something - * to happen (synchronization object). - */ - bool idle; }; /** @@ -1056,13 +1047,8 @@ group_unbind_locked(struct panthor_group *group) static bool group_is_idle(struct panthor_group *group) { - struct panthor_device *ptdev = group->ptdev; - u32 inactive_queues; + u32 inactive_queues = group->idle_queues | group->blocked_queues; - if (group->csg_id >= 0) - return ptdev->scheduler->csg_slots[group->csg_id].idle; - - inactive_queues = group->idle_queues | group->blocked_queues; return hweight32(inactive_queues) == group->queue_count; } @@ -1719,17 +1705,6 @@ static bool cs_slot_process_irq_locked(struct panthor_device *ptdev, return (events & (CS_FAULT | CS_TILER_OOM)) != 0; } -static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id) -{ - struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; - struct panthor_fw_csg_iface *csg_iface; - - lockdep_assert_held(&ptdev->scheduler->lock); - - csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); - csg_slot->idle = csg_iface->output->status_state & CSG_STATUS_STATE_IS_IDLE; -} - static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id) { struct panthor_scheduler *sched = ptdev->scheduler; @@ -1991,10 +1966,8 @@ static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev, if (acked & CSG_STATE_MASK) csg_slot_sync_state_locked(ptdev, csg_id); - if (acked & CSG_STATUS_UPDATE) { + if (acked & CSG_STATUS_UPDATE) csg_slot_sync_queues_state_locked(ptdev, csg_id); - csg_slot_sync_idle_state_locked(ptdev, csg_id); - } if (ret && acked != req_mask && ((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != 0) { From 5232e84927197d5cb045ddea9c90fc143b64bf65 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 10:48:33 +0100 Subject: [PATCH 09/94] drm/panthor: Don't try to enable extract events Not only this only works once, because of how extract events work (event is enabled if the req and ack bit differ, and it's signalled by the FW by setting identical req and ack, to re-enable the event, we need to toggle the bit, which we never do). But more importantly, we never do anything with this event, so we're better off dropping it when programming the CS slot. v2: - Add R-b v3: - Collect R-b Reviewed-by: Steven Price Reviewed-by: Chia-I Wu Link: https://patch.msgid.link/20251128094839.3856402-3-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_sched.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 5b2ab963ac99..5ec553818c28 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -1180,12 +1180,10 @@ cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) panthor_fw_update_reqs(cs_iface, req, CS_IDLE_SYNC_WAIT | CS_IDLE_EMPTY | - CS_STATE_START | - CS_EXTRACT_EVENT, + CS_STATE_START, CS_IDLE_SYNC_WAIT | CS_IDLE_EMPTY | - CS_STATE_MASK | - CS_EXTRACT_EVENT); + CS_STATE_MASK); if (queue->iface.input->insert != queue->iface.input->extract) queue_resume_timeout(queue); } From a3c2d0b40b108bd45d44f6c1dfa33c39d577adcd Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 10:48:34 +0100 Subject: [PATCH 10/94] drm/panthor: Fix the full_tick check We have a full tick when the remaining time to the next tick is zero, not the other way around. Declare a full_tick variable so we don't get that test wrong in other places. v2: - Add R-b v3: - Collect R-b Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") Reviewed-by: Steven Price Reviewed-by: Chia-I Wu Link: https://patch.msgid.link/20251128094839.3856402-4-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_sched.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 5ec553818c28..5d280d9c8225 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -2448,6 +2448,7 @@ static void tick_work(struct work_struct *work) u64 remaining_jiffies = 0, resched_delay; u64 now = get_jiffies_64(); int prio, ret, cookie; + bool full_tick; if (!drm_dev_enter(&ptdev->base, &cookie)) return; @@ -2459,15 +2460,17 @@ static void tick_work(struct work_struct *work) if (time_before64(now, sched->resched_target)) remaining_jiffies = sched->resched_target - now; + full_tick = remaining_jiffies == 0; + mutex_lock(&sched->lock); if (panthor_device_reset_is_pending(sched->ptdev)) goto out_unlock; - tick_ctx_init(sched, &ctx, remaining_jiffies != 0); + tick_ctx_init(sched, &ctx, full_tick); if (ctx.csg_upd_failed_mask) goto out_cleanup_ctx; - if (remaining_jiffies) { + if (!full_tick) { /* Scheduling forced in the middle of a tick. Only RT groups * can preempt non-RT ones. Currently running RT groups can't be * preempted. From 55429c51d5db3db24c2ad561944c6a0ca922d476 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 10:48:35 +0100 Subject: [PATCH 11/94] drm/panthor: Fix the group priority rotation logic When rotating group priorities, we want the group with the highest priority to go back to the end of the queue, and all other active groups to get their priority bumped, otherwise some groups will never get a chance to run with the highest priority. This implies moving the rotation itself to tick_work(), and only dealing with old group ordering in tick_ctx_insert_old_group(). v2: - Add R-b - Fix the commit message v3: - Drop the full_tick argument in tick_ctx_init() - Collect R-b Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") Reviewed-by: Steven Price Reviewed-by: Chia-I Wu Link: https://patch.msgid.link/20251128094839.3856402-5-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_sched.c | 52 +++++++++++++++---------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 5d280d9c8225..531b52ee3a92 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -2050,31 +2050,22 @@ tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched, static void tick_ctx_insert_old_group(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx, - struct panthor_group *group, - bool full_tick) + struct panthor_group *group) { struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id]; struct panthor_group *other_group; - if (!full_tick) { - list_add_tail(&group->run_node, &ctx->old_groups[group->priority]); - return; - } - - /* Rotate to make sure groups with lower CSG slot - * priorities have a chance to get a higher CSG slot - * priority next time they get picked. This priority - * has an impact on resource request ordering, so it's - * important to make sure we don't let one group starve - * all other groups with the same group priority. - */ + /* Class groups in descending priority order so we can easily rotate. */ list_for_each_entry(other_group, &ctx->old_groups[csg_slot->group->priority], run_node) { struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id]; - if (other_csg_slot->priority > csg_slot->priority) { - list_add_tail(&csg_slot->group->run_node, &other_group->run_node); + /* Our group has a higher prio than the one we're testing against, + * place it just before. + */ + if (csg_slot->priority > other_csg_slot->priority) { + list_add_tail(&group->run_node, &other_group->run_node); return; } } @@ -2084,8 +2075,7 @@ tick_ctx_insert_old_group(struct panthor_scheduler *sched, static void tick_ctx_init(struct panthor_scheduler *sched, - struct panthor_sched_tick_ctx *ctx, - bool full_tick) + struct panthor_sched_tick_ctx *ctx) { struct panthor_device *ptdev = sched->ptdev; struct panthor_csg_slots_upd_ctx upd_ctx; @@ -2123,7 +2113,7 @@ tick_ctx_init(struct panthor_scheduler *sched, group->fatal_queues |= GENMASK(group->queue_count - 1, 0); } - tick_ctx_insert_old_group(sched, ctx, group, full_tick); + tick_ctx_insert_old_group(sched, ctx, group); csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i, csg_iface->output->ack ^ CSG_STATUS_UPDATE, CSG_STATUS_UPDATE); @@ -2466,7 +2456,7 @@ static void tick_work(struct work_struct *work) if (panthor_device_reset_is_pending(sched->ptdev)) goto out_unlock; - tick_ctx_init(sched, &ctx, full_tick); + tick_ctx_init(sched, &ctx); if (ctx.csg_upd_failed_mask) goto out_cleanup_ctx; @@ -2492,9 +2482,29 @@ static void tick_work(struct work_struct *work) for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0 && !tick_ctx_is_full(sched, &ctx); prio--) { + struct panthor_group *old_highest_prio_group = + list_first_entry_or_null(&ctx.old_groups[prio], + struct panthor_group, run_node); + + /* Pull out the group with the highest prio for rotation. */ + if (old_highest_prio_group) + list_del(&old_highest_prio_group->run_node); + + /* Re-insert old active groups so they get a chance to run with higher prio. */ + tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true); + + /* Fill the remaining slots with runnable groups. */ tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio], true, false); - tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true); + + /* Re-insert the old group with the highest prio, and give it a chance to be + * scheduled again (but with a lower prio) if there's room left. + */ + if (old_highest_prio_group) { + list_add_tail(&old_highest_prio_group->run_node, &ctx.old_groups[prio]); + tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], + true, true); + } } /* If we have free CSG slots left, pick idle groups */ From 4356d21994f4ff5c87305b874939b359f16f6677 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 10:48:36 +0100 Subject: [PATCH 12/94] drm/panthor: Fix immediate ticking on a disabled tick We have a few paths where we schedule the tick work immediately without changing the resched_target. If the tick was stopped, this would lead to a remaining_jiffies that's always > 0, and it wouldn't force a full tick in that case. Add extra checks to cover that case properly. v2: - Fix typo - Simplify the code as suggested by Steve v3: - Collect R-b Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") Reviewed-by: Steven Price Reviewed-by: Chia-I Wu Link: https://patch.msgid.link/20251128094839.3856402-6-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_sched.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 531b52ee3a92..1efeabc4b0ac 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -2435,6 +2435,7 @@ static void tick_work(struct work_struct *work) tick_work.work); struct panthor_device *ptdev = sched->ptdev; struct panthor_sched_tick_ctx ctx; + u64 resched_target = sched->resched_target; u64 remaining_jiffies = 0, resched_delay; u64 now = get_jiffies_64(); int prio, ret, cookie; @@ -2447,8 +2448,12 @@ static void tick_work(struct work_struct *work) if (drm_WARN_ON(&ptdev->base, ret)) goto out_dev_exit; - if (time_before64(now, sched->resched_target)) - remaining_jiffies = sched->resched_target - now; + /* If the tick is stopped, calculate when the next tick would be */ + if (resched_target == U64_MAX) + resched_target = sched->last_tick + sched->tick_period; + + if (time_before64(now, resched_target)) + remaining_jiffies = resched_target - now; full_tick = remaining_jiffies == 0; From 61d9a43d70dc3e1709ecd14a34f6d5f01e21dfc9 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 10:48:37 +0100 Subject: [PATCH 13/94] drm/panthor: Fix the logic that decides when to stop ticking When we have multiple active groups with the same priority, we need to keep ticking for the priority rotation to take place. If we don't do that, we might starve slots with lower priorities. It's annoying to deal with that in tick_ctx_update_resched_target(), so let's add a ::stop_tick field to the tick context which is initialized to true, and downgraded to false as soon as we detect something that requires to tick to happen. This way we can complement the current logic with extra conditions if needed. v2: - Add R-b v3: - Drop panthor_sched_tick_ctx::min_priority (no longer relevant) - Collect R-b Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") Reviewed-by: Steven Price Reviewed-by: Chia-I Wu Link: https://patch.msgid.link/20251128094839.3856402-7-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_sched.c | 46 ++++++++++--------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 1efeabc4b0ac..8846608dd127 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -1985,10 +1985,10 @@ struct panthor_sched_tick_ctx { struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT]; u32 idle_group_count; u32 group_count; - enum panthor_csg_priority min_priority; struct panthor_vm *vms[MAX_CS_PER_CSG]; u32 as_count; bool immediate_tick; + bool stop_tick; u32 csg_upd_failed_mask; }; @@ -2031,17 +2031,21 @@ tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched, if (!owned_by_tick_ctx) group_get(group); - list_move_tail(&group->run_node, &ctx->groups[group->priority]); ctx->group_count++; + + /* If we have more than one active group with the same priority, + * we need to keep ticking to rotate the CSG priority. + */ if (group_is_idle(group)) ctx->idle_group_count++; + else if (!list_empty(&ctx->groups[group->priority])) + ctx->stop_tick = false; + + list_move_tail(&group->run_node, &ctx->groups[group->priority]); if (i == ctx->as_count) ctx->vms[ctx->as_count++] = group->vm; - if (ctx->min_priority > group->priority) - ctx->min_priority = group->priority; - if (tick_ctx_is_full(sched, ctx)) return; } @@ -2085,7 +2089,7 @@ tick_ctx_init(struct panthor_scheduler *sched, memset(ctx, 0, sizeof(*ctx)); csgs_upd_ctx_init(&upd_ctx); - ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT; + ctx->stop_tick = true; for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { INIT_LIST_HEAD(&ctx->groups[i]); INIT_LIST_HEAD(&ctx->old_groups[i]); @@ -2397,32 +2401,18 @@ static u64 tick_ctx_update_resched_target(struct panthor_scheduler *sched, const struct panthor_sched_tick_ctx *ctx) { - /* We had space left, no need to reschedule until some external event happens. */ - if (!tick_ctx_is_full(sched, ctx)) + u64 resched_target; + + if (ctx->stop_tick) goto no_tick; - /* If idle groups were scheduled, no need to wake up until some external - * event happens (group unblocked, new job submitted, ...). - */ - if (ctx->idle_group_count) - goto no_tick; + resched_target = sched->last_tick + sched->tick_period; - if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT)) - goto no_tick; + if (time_before64(sched->resched_target, sched->last_tick) || + time_before64(resched_target, sched->resched_target)) + sched->resched_target = resched_target; - /* If there are groups of the same priority waiting, we need to - * keep the scheduler ticking, otherwise, we'll just wait for - * new groups with higher priority to be queued. - */ - if (!list_empty(&sched->groups.runnable[ctx->min_priority])) { - u64 resched_target = sched->last_tick + sched->tick_period; - - if (time_before64(sched->resched_target, sched->last_tick) || - time_before64(resched_target, sched->resched_target)) - sched->resched_target = resched_target; - - return sched->resched_target - sched->last_tick; - } + return sched->resched_target - sched->last_tick; no_tick: sched->resched_target = U64_MAX; From 99820b4b7e50d9651f01d2d55b6b9ba92dcc5b99 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 10:48:38 +0100 Subject: [PATCH 14/94] drm/panthor: Make sure we resume the tick when new jobs are submitted If the group is already assigned a slot but was idle before this job submission, we need to make sure the priority rotation happens in the future. Extract the existing logic living in group_schedule_locked() and call this new sched_resume_tick() helper from the "group is assigned a slot" path. v2: - Add R-b v3: - Re-use queue_mask to clear the bit - Collect R-b Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") Reviewed-by: Steven Price Reviewed-by: Chia-I Wu Link: https://patch.msgid.link/20251128094839.3856402-8-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_sched.c | 43 +++++++++++++++++++------ 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 8846608dd127..7759a8a8565e 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -2624,14 +2624,33 @@ static void sync_upd_work(struct work_struct *work) sched_queue_delayed_work(sched, tick, 0); } +static void sched_resume_tick(struct panthor_device *ptdev) +{ + struct panthor_scheduler *sched = ptdev->scheduler; + u64 delay_jiffies, now; + + drm_WARN_ON(&ptdev->base, sched->resched_target != U64_MAX); + + /* Scheduler tick was off, recalculate the resched_target based on the + * last tick event, and queue the scheduler work. + */ + now = get_jiffies_64(); + sched->resched_target = sched->last_tick + sched->tick_period; + if (sched->used_csg_slot_count == sched->csg_slot_count && + time_before64(now, sched->resched_target)) + delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX); + else + delay_jiffies = 0; + + sched_queue_delayed_work(sched, tick, delay_jiffies); +} + static void group_schedule_locked(struct panthor_group *group, u32 queue_mask) { struct panthor_device *ptdev = group->ptdev; struct panthor_scheduler *sched = ptdev->scheduler; struct list_head *queue = &sched->groups.runnable[group->priority]; - u64 delay_jiffies = 0; bool was_idle; - u64 now; if (!group_can_run(group)) return; @@ -2676,13 +2695,7 @@ static void group_schedule_locked(struct panthor_group *group, u32 queue_mask) /* Scheduler tick was off, recalculate the resched_target based on the * last tick event, and queue the scheduler work. */ - now = get_jiffies_64(); - sched->resched_target = sched->last_tick + sched->tick_period; - if (sched->used_csg_slot_count == sched->csg_slot_count && - time_before64(now, sched->resched_target)) - delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX); - - sched_queue_delayed_work(sched, tick, delay_jiffies); + sched_resume_tick(ptdev); } static void queue_stop(struct panthor_queue *queue, @@ -3314,6 +3327,18 @@ queue_run_job(struct drm_sched_job *sched_job) if (group->csg_id < 0) { group_schedule_locked(group, BIT(job->queue_idx)); } else { + u32 queue_mask = BIT(job->queue_idx); + bool resume_tick = group_is_idle(group) && + (group->idle_queues & queue_mask) && + !(group->blocked_queues & queue_mask) && + sched->resched_target == U64_MAX; + + /* We just added something to the queue, so it's no longer idle. */ + group->idle_queues &= ~queue_mask; + + if (resume_tick) + sched_resume_tick(ptdev); + gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1); if (!sched->pm.has_ref && !(group->blocked_queues & BIT(job->queue_idx))) { From 36c5dff41fd958c9e32d931eb90b5ae638a7e600 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Nov 2025 10:48:39 +0100 Subject: [PATCH 15/94] drm/panthor: Kill panthor_sched_immediate_tick() It's only used in a couple places and everyone else is just using sched_queue_delayed_work(sched, tick, 0) directly, so let's make this consistent. v2: - Add R-b v3: - Collect R-b Reviewed-by: Steven Price Reviewed-by: Chia-I Wu Link: https://patch.msgid.link/20251128094839.3856402-9-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_sched.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 7759a8a8565e..389d508b3848 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -2753,13 +2753,6 @@ static void panthor_group_start(struct panthor_group *group) group_put(group); } -static void panthor_sched_immediate_tick(struct panthor_device *ptdev) -{ - struct panthor_scheduler *sched = ptdev->scheduler; - - sched_queue_delayed_work(sched, tick, 0); -} - /** * panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler. */ @@ -2767,13 +2760,13 @@ void panthor_sched_report_mmu_fault(struct panthor_device *ptdev) { /* Force a tick to immediately kill faulty groups. */ if (ptdev->scheduler) - panthor_sched_immediate_tick(ptdev); + sched_queue_delayed_work(ptdev->scheduler, tick, 0); } void panthor_sched_resume(struct panthor_device *ptdev) { /* Force a tick to re-evaluate after a resume. */ - panthor_sched_immediate_tick(ptdev); + sched_queue_delayed_work(ptdev->scheduler, tick, 0); } void panthor_sched_suspend(struct panthor_device *ptdev) From e85e9ccf3f8404007f62dff9a02273fcdeb44206 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 27 Nov 2025 09:03:49 +0000 Subject: [PATCH 16/94] drm/panic: Report invalid or unsupported panic modes Currently the user can write anything into the drm.panic_screen modparam, either at runtime via sysfs, or as a kernel boot time argument. Invalid strings will be silently accepted and ignored at use time by defaulting to the 'user' panic mode. Let instead add some validation in order to have immediate feedback when something has been mistyped, or not compiled in. For example during kernel boot: Booting kernel: `bsod' invalid for parameter `drm.panic_screen' Or at runtime: # echo -n bsod > /sys/module/drm/parameters/panic_screen -bash: echo: write error: Invalid argument Change of behavior is that when invalid mode is attempted to be configured, currently the code will default to the 'user' mode, while with this change the code will ignore it, and default to the mode set at kernel build time via CONFIG_DRM_PANIC_SCREEN. While at it lets also fix the module parameter description to include all compiled in modes. Signed-off-by: Tvrtko Ursulin Cc: Jocelyn Falempe Cc: Javier Martinez Canillas Reviewed-by: Javier Martinez Canillas Reviewed-by: Jocelyn Falempe Signed-off-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20251127090349.92717-1-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/drm_panic.c | 77 ++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 1d6312fa1429..2635f95cbde5 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -39,12 +39,6 @@ MODULE_AUTHOR("Jocelyn Falempe"); MODULE_DESCRIPTION("DRM panic handler"); MODULE_LICENSE("GPL"); -static char drm_panic_screen[16] = CONFIG_DRM_PANIC_SCREEN; -module_param_string(panic_screen, drm_panic_screen, sizeof(drm_panic_screen), 0644); -MODULE_PARM_DESC(panic_screen, - "Choose what will be displayed by drm_panic, 'user' or 'kmsg' [default=" - CONFIG_DRM_PANIC_SCREEN "]"); - /** * DOC: overview * @@ -765,15 +759,60 @@ static void draw_panic_static_qr_code(struct drm_scanout_buffer *sb) draw_panic_static_user(sb); } #else -static void draw_panic_static_qr_code(struct drm_scanout_buffer *sb) -{ - draw_panic_static_user(sb); -} - static void drm_panic_qr_init(void) {}; static void drm_panic_qr_exit(void) {}; #endif +enum drm_panic_type { + DRM_PANIC_TYPE_KMSG, + DRM_PANIC_TYPE_USER, + DRM_PANIC_TYPE_QR, +}; + +static enum drm_panic_type drm_panic_type = -1; + +static const char *drm_panic_type_map[] = { + [DRM_PANIC_TYPE_KMSG] = "kmsg", + [DRM_PANIC_TYPE_USER] = "user", +#if IS_ENABLED(CONFIG_DRM_PANIC_SCREEN_QR_CODE) + [DRM_PANIC_TYPE_QR] = "qr", +#endif +}; + +static int drm_panic_type_set(const char *val, const struct kernel_param *kp) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(drm_panic_type_map); i++) { + if (!strcmp(val, drm_panic_type_map[i])) { + drm_panic_type = i; + return 0; + } + } + + return -EINVAL; +} + +static int drm_panic_type_get(char *buffer, const struct kernel_param *kp) +{ + return scnprintf(buffer, PAGE_SIZE, "%s\n", + drm_panic_type_map[drm_panic_type]); +} + +static const struct kernel_param_ops drm_panic_ops = { + .set = drm_panic_type_set, + .get = drm_panic_type_get, +}; + +module_param_cb(panic_screen, &drm_panic_ops, NULL, 0644); +MODULE_PARM_DESC(panic_screen, +#if IS_ENABLED(CONFIG_DRM_PANIC_SCREEN_QR_CODE) + "Choose what will be displayed by drm_panic, 'user', 'kmsg' or 'qr' [default=" +#else + "Choose what will be displayed by drm_panic, 'user' or 'kmsg' [default=" +#endif + CONFIG_DRM_PANIC_SCREEN "]"); + /* * drm_panic_is_format_supported() * @format: a fourcc color code @@ -790,11 +829,19 @@ static bool drm_panic_is_format_supported(const struct drm_format_info *format) static void draw_panic_dispatch(struct drm_scanout_buffer *sb) { - if (!strcmp(drm_panic_screen, "kmsg")) { + switch (drm_panic_type) { + case DRM_PANIC_TYPE_KMSG: draw_panic_static_kmsg(sb); - } else if (!strcmp(drm_panic_screen, "qr_code")) { + break; + +#if IS_ENABLED(CONFIG_DRM_PANIC_SCREEN_QR_CODE) + case DRM_PANIC_TYPE_QR: draw_panic_static_qr_code(sb); - } else { + break; +#endif + + case DRM_PANIC_TYPE_USER: + default: draw_panic_static_user(sb); } } @@ -977,6 +1024,8 @@ void drm_panic_unregister(struct drm_device *dev) */ void __init drm_panic_init(void) { + if (drm_panic_type == -1) + drm_panic_type_set(CONFIG_DRM_PANIC_SCREEN, NULL); drm_panic_qr_init(); } From 1e759ed22a62680c79aab266d73baaa2bee4de9f Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 26 Nov 2025 10:40:08 +0100 Subject: [PATCH 17/94] drm/ast: Move cursor format conversion into helper function Move the format conversion of the cursor framebuffer into the new helper ast_cursor_plane_get_argb4444(). It returns a buffer in system memory, which the atomic_update handler copies to video memory. The returned buffer is either the GEM buffer itself, or a temporary copy within the plane in ARGB4444 format. As a small change, list supported formats explicitly in the switch statement. Do not assume ARGB8888 input by default. The cursor framebuffer knows its format, so should we. Signed-off-by: Thomas Zimmermann Reviewed-by: Jocelyn Falempe Link: https://patch.msgid.link/20251126094626.41985-2-tzimmermann@suse.de --- drivers/gpu/drm/ast/ast_cursor.c | 58 ++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_cursor.c b/drivers/gpu/drm/ast/ast_cursor.c index 2d3ad7610c2e..24d696df8fcd 100644 --- a/drivers/gpu/drm/ast/ast_cursor.c +++ b/drivers/gpu/drm/ast/ast_cursor.c @@ -181,6 +181,38 @@ static int ast_cursor_plane_helper_atomic_check(struct drm_plane *plane, return 0; } +static const u8 *ast_cursor_plane_get_argb4444(struct ast_cursor_plane *ast_cursor_plane, + struct drm_shadow_plane_state *shadow_plane_state, + const struct drm_rect *clip) +{ + struct drm_plane_state *plane_state = &shadow_plane_state->base; + struct drm_framebuffer *fb = plane_state->fb; + u8 *argb4444 = NULL; + + switch (fb->format->format) { + case DRM_FORMAT_ARGB4444: + argb4444 = shadow_plane_state->data[0].vaddr; + break; + case DRM_FORMAT_ARGB8888: + { + struct iosys_map argb4444_dst[DRM_FORMAT_MAX_PLANES] = { + IOSYS_MAP_INIT_VADDR(ast_cursor_plane->argb4444), + }; + unsigned int argb4444_dst_pitch[DRM_FORMAT_MAX_PLANES] = { + AST_HWC_PITCH, + }; + + drm_fb_argb8888_to_argb4444(argb4444_dst, argb4444_dst_pitch, + shadow_plane_state->data, fb, clip, + &shadow_plane_state->fmtcnv_state); + argb4444 = argb4444_dst[0].vaddr; + } + break; + } + + return argb4444; +} + static void ast_cursor_plane_helper_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -205,29 +237,13 @@ static void ast_cursor_plane_helper_atomic_update(struct drm_plane *plane, */ if (drm_atomic_helper_damage_merged(old_plane_state, plane_state, &damage)) { - u8 *argb4444; + const u8 *argb4444 = ast_cursor_plane_get_argb4444(ast_cursor_plane, + shadow_plane_state, + &damage); - switch (fb->format->format) { - case DRM_FORMAT_ARGB4444: - argb4444 = shadow_plane_state->data[0].vaddr; - break; - default: - argb4444 = ast_cursor_plane->argb4444; - { - struct iosys_map argb4444_dst[DRM_FORMAT_MAX_PLANES] = { - IOSYS_MAP_INIT_VADDR(argb4444), - }; - unsigned int argb4444_dst_pitch[DRM_FORMAT_MAX_PLANES] = { - AST_HWC_PITCH, - }; + if (argb4444) + ast_set_cursor_image(ast, argb4444, fb->width, fb->height); - drm_fb_argb8888_to_argb4444(argb4444_dst, argb4444_dst_pitch, - shadow_plane_state->data, fb, &damage, - &shadow_plane_state->fmtcnv_state); - } - break; - } - ast_set_cursor_image(ast, argb4444, fb->width, fb->height); ast_set_cursor_base(ast, dst_off); } From ef4ed8621a15ea4979dd6c5bde436ae228c4bdfe Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 26 Nov 2025 10:40:09 +0100 Subject: [PATCH 18/94] drm/ast: Support cursor buffers objects in I/O memory Copy the ARGB4444 cursor buffer to system memory if it is located in I/O memory. While this cannot happen with ast's native GEM objects, an imported buffer object might be on the external device's I/O memory. If the cursor buffer is located in system memory continue to use it directly. Signed-off-by: Thomas Zimmermann Reviewed-by: Jocelyn Falempe > Link: https://patch.msgid.link/20251126094626.41985-3-tzimmermann@suse.de --- drivers/gpu/drm/ast/ast_cursor.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_cursor.c b/drivers/gpu/drm/ast/ast_cursor.c index 24d696df8fcd..8d473ed2738c 100644 --- a/drivers/gpu/drm/ast/ast_cursor.c +++ b/drivers/gpu/drm/ast/ast_cursor.c @@ -191,7 +191,20 @@ static const u8 *ast_cursor_plane_get_argb4444(struct ast_cursor_plane *ast_curs switch (fb->format->format) { case DRM_FORMAT_ARGB4444: - argb4444 = shadow_plane_state->data[0].vaddr; + if (shadow_plane_state->data[0].is_iomem) { + struct iosys_map argb4444_dst[DRM_FORMAT_MAX_PLANES] = { + IOSYS_MAP_INIT_VADDR(ast_cursor_plane->argb4444), + }; + unsigned int argb4444_dst_pitch[DRM_FORMAT_MAX_PLANES] = { + AST_HWC_PITCH, + }; + + drm_fb_memcpy(argb4444_dst, argb4444_dst_pitch, + shadow_plane_state->data, fb, clip); + argb4444 = argb4444_dst[0].vaddr; + } else { + argb4444 = shadow_plane_state->data[0].vaddr; + } break; case DRM_FORMAT_ARGB8888: { From 4dfb97060f22c6c5bea995302f0f58936d8f3271 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 26 Nov 2025 10:40:10 +0100 Subject: [PATCH 19/94] drm/ast: Wrap cursor framebuffer access in drm_gem_fb_begin/end_cpu_access() Call drm_gem_fb_begin_cpu_access() and drm_gem_fb_end_cpu_access() around cursor image updates. Imported buffers might have to be synchronized for CPU access before they can be used. Ignore errors from drm_gem_fb_begin_cpu_access(). These errors can often be transitory. The cursor image will be updated on the next frame. Meanwhile display a white square where the cursor would be. Signed-off-by: Thomas Zimmermann Reviewed-by: Jocelyn Falempe > Link: https://patch.msgid.link/20251126094626.41985-4-tzimmermann@suse.de --- drivers/gpu/drm/ast/ast_cursor.c | 70 +++++++++++++++++++------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_cursor.c b/drivers/gpu/drm/ast/ast_cursor.c index 8d473ed2738c..30b62d3f0151 100644 --- a/drivers/gpu/drm/ast/ast_cursor.c +++ b/drivers/gpu/drm/ast/ast_cursor.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "ast_drv.h" @@ -189,38 +190,49 @@ static const u8 *ast_cursor_plane_get_argb4444(struct ast_cursor_plane *ast_curs struct drm_framebuffer *fb = plane_state->fb; u8 *argb4444 = NULL; - switch (fb->format->format) { - case DRM_FORMAT_ARGB4444: - if (shadow_plane_state->data[0].is_iomem) { - struct iosys_map argb4444_dst[DRM_FORMAT_MAX_PLANES] = { - IOSYS_MAP_INIT_VADDR(ast_cursor_plane->argb4444), - }; - unsigned int argb4444_dst_pitch[DRM_FORMAT_MAX_PLANES] = { - AST_HWC_PITCH, - }; + if (drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE) == 0) { + switch (fb->format->format) { + case DRM_FORMAT_ARGB4444: + if (shadow_plane_state->data[0].is_iomem) { + struct iosys_map argb4444_dst[DRM_FORMAT_MAX_PLANES] = { + IOSYS_MAP_INIT_VADDR(ast_cursor_plane->argb4444), + }; + unsigned int argb4444_dst_pitch[DRM_FORMAT_MAX_PLANES] = { + AST_HWC_PITCH, + }; - drm_fb_memcpy(argb4444_dst, argb4444_dst_pitch, - shadow_plane_state->data, fb, clip); - argb4444 = argb4444_dst[0].vaddr; - } else { - argb4444 = shadow_plane_state->data[0].vaddr; - } - break; - case DRM_FORMAT_ARGB8888: - { - struct iosys_map argb4444_dst[DRM_FORMAT_MAX_PLANES] = { - IOSYS_MAP_INIT_VADDR(ast_cursor_plane->argb4444), - }; - unsigned int argb4444_dst_pitch[DRM_FORMAT_MAX_PLANES] = { - AST_HWC_PITCH, - }; + drm_fb_memcpy(argb4444_dst, argb4444_dst_pitch, + shadow_plane_state->data, fb, clip); + argb4444 = argb4444_dst[0].vaddr; + } else { + argb4444 = shadow_plane_state->data[0].vaddr; + } + break; + case DRM_FORMAT_ARGB8888: + { + struct iosys_map argb4444_dst[DRM_FORMAT_MAX_PLANES] = { + IOSYS_MAP_INIT_VADDR(ast_cursor_plane->argb4444), + }; + unsigned int argb4444_dst_pitch[DRM_FORMAT_MAX_PLANES] = { + AST_HWC_PITCH, + }; - drm_fb_argb8888_to_argb4444(argb4444_dst, argb4444_dst_pitch, - shadow_plane_state->data, fb, clip, - &shadow_plane_state->fmtcnv_state); - argb4444 = argb4444_dst[0].vaddr; + drm_fb_argb8888_to_argb4444(argb4444_dst, argb4444_dst_pitch, + shadow_plane_state->data, fb, clip, + &shadow_plane_state->fmtcnv_state); + argb4444 = argb4444_dst[0].vaddr; + } + break; } - break; + + drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); + } else { + /* + * Fall back to white square if GEM object is not ready. Gives + * the user an indication where the cursor is located. + */ + memset(ast_cursor_plane->argb4444, 0xff, sizeof(ast_cursor_plane->argb4444)); + argb4444 = ast_cursor_plane->argb4444; } return argb4444; From b1ea3babb67dcb8b0881c2ab49dfba88b1445856 Mon Sep 17 00:00:00 2001 From: Langyan Ye Date: Thu, 27 Nov 2025 20:16:01 +0800 Subject: [PATCH 20/94] drm/panel-edp: Add CSW MNE007QB3-1 Add support for the CSW MNE007QB3-1, pleace the EDID here for subsequent reference. 00 ff ff ff ff ff ff 00 0e 77 7c 14 00 00 00 00 00 23 01 04 a5 1e 13 78 07 ee 95 a3 54 4c 99 26 0f 50 54 00 00 00 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 35 3c 80 a0 70 b0 23 40 30 20 36 00 2d bc 10 00 00 18 2b 30 80 a0 70 b0 23 40 30 20 36 00 2d bc 10 00 00 18 00 00 00 fd 00 28 3c 4a 4a 0f 01 0a 20 20 20 20 20 20 00 00 00 fc 00 4d 4e 45 30 30 37 51 42 33 2d 31 0a 20 01 5b 70 20 79 02 00 21 00 1d c8 0b 5d 07 80 07 b0 04 00 3d 8a 54 cd a4 99 66 62 0f 02 45 54 40 5e 40 5e 00 44 12 78 2e 00 06 00 44 40 5e 40 5e 81 00 20 74 1a 00 00 03 01 28 3c 00 00 00 00 00 00 3c 00 00 00 00 8d 00 e3 05 04 00 e6 06 01 00 60 60 ff 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 68 90 Signed-off-by: Langyan Ye Signed-off-by: Douglas Anderson Link: https://patch.msgid.link/20251127121601.1608379-1-yelangyan@huaqin.corp-partner.google.com --- drivers/gpu/drm/panel/panel-edp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 415b894890ad..023fbbb10eb4 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -2033,6 +2033,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('C', 'S', 'W', 0x1462, &delay_200_500_e50, "MNE007QS5-2"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x1468, &delay_200_500_e50, "MNE007QB2-2"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x146e, &delay_80_500_e50_d50, "MNE007QB3-1"), + EDP_PANEL_ENTRY('C', 'S', 'W', 0x147c, &delay_200_500_e50_d100, "MNE007QB3-1"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x1519, &delay_200_500_e80_d50, "MNF601BS1-3"), EDP_PANEL_ENTRY('E', 'T', 'C', 0x0000, &delay_50_500_e200_d200_po2e335, "LP079QX1-SP0V"), From f80e89446a460184a5f76c70522be70351a85a11 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 25 Nov 2025 13:52:13 +0100 Subject: [PATCH 21/94] drm/amdgpu: Do not implement mode_set_base_atomic callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove all implementations of the CRTC helper mode_set_base_atomic from amdgpu. It pretends to provide mode setting for kdb debugging, but has been broken for some time. Kdb output has been supported only for non-atomic mode setting since commit 9c79e0b1d096 ("drm/fb-helper: Give up on kgdb for atomic drivers") from 2017. While amdgpu provides non-atomic mode setting for some devices, kdb assumes that the GEM buffer object is at a fixed location in video memory. This has not been the case since commit 087451f372bf ("drm/amdgpu: use generic fb helpers instead of setting up AMD own's.") from 2021. Fbdev-ttm helpers use a shadow buffer with a movable GEM buffer object. Triggering kdb does not update the display. Hence remove the whole kdb support from amdgpu. Signed-off-by: Thomas Zimmermann Acked-by: Christian König Acked-by: Simona Vetter Acked-by: Daniel Thompson (RISCstar) Link: https://patch.msgid.link/20251125130634.1080966-2-tzimmermann@suse.de --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 35 ++++++++------------------ drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 35 ++++++++------------------ drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 35 ++++++++------------------ 3 files changed, 33 insertions(+), 72 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 72ca6538b2e4..61302204e9b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1838,7 +1838,7 @@ static void dce_v10_0_grph_enable(struct drm_crtc *crtc, bool enable) static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, struct drm_framebuffer *fb, - int x, int y, int atomic) + int x, int y) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; @@ -1855,15 +1855,12 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, bool bypass_lut = false; /* no fb bound */ - if (!atomic && !crtc->primary->fb) { + if (!crtc->primary->fb) { DRM_DEBUG_KMS("No FB bound\n"); return 0; } - if (atomic) - target_fb = fb; - else - target_fb = crtc->primary->fb; + target_fb = crtc->primary->fb; /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers @@ -1874,13 +1871,11 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, if (unlikely(r != 0)) return r; - if (!atomic) { - abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(abo); - return -EINVAL; - } + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); + if (unlikely(r != 0)) { + amdgpu_bo_unreserve(abo); + return -EINVAL; } fb_location = amdgpu_bo_gpu_offset(abo); @@ -2068,7 +2063,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, /* set pageflip to happen anywhere in vblank interval */ WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); - if (!atomic && fb && fb != crtc->primary->fb) { + if (fb && fb != crtc->primary->fb) { abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) @@ -2611,7 +2606,7 @@ static int dce_v10_0_crtc_mode_set(struct drm_crtc *crtc, amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode); amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode); - dce_v10_0_crtc_do_set_base(crtc, old_fb, x, y, 0); + dce_v10_0_crtc_do_set_base(crtc, old_fb, x, y); amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode); amdgpu_atombios_crtc_scaler_setup(crtc); dce_v10_0_cursor_reset(crtc); @@ -2659,14 +2654,7 @@ static bool dce_v10_0_crtc_mode_fixup(struct drm_crtc *crtc, static int dce_v10_0_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) { - return dce_v10_0_crtc_do_set_base(crtc, old_fb, x, y, 0); -} - -static int dce_v10_0_crtc_set_base_atomic(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, enum mode_set_atomic state) -{ - return dce_v10_0_crtc_do_set_base(crtc, fb, x, y, 1); + return dce_v10_0_crtc_do_set_base(crtc, old_fb, x, y); } static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = { @@ -2674,7 +2662,6 @@ static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = { .mode_fixup = dce_v10_0_crtc_mode_fixup, .mode_set = dce_v10_0_crtc_mode_set, .mode_set_base = dce_v10_0_crtc_set_base, - .mode_set_base_atomic = dce_v10_0_crtc_set_base_atomic, .prepare = dce_v10_0_crtc_prepare, .commit = dce_v10_0_crtc_commit, .disable = dce_v10_0_crtc_disable, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index acc887a58518..8f4b4c2e36b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1876,7 +1876,7 @@ static void dce_v6_0_grph_enable(struct drm_crtc *crtc, bool enable) static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, struct drm_framebuffer *fb, - int x, int y, int atomic) + int x, int y) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; @@ -1892,15 +1892,12 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, bool bypass_lut = false; /* no fb bound */ - if (!atomic && !crtc->primary->fb) { + if (!crtc->primary->fb) { DRM_DEBUG_KMS("No FB bound\n"); return 0; } - if (atomic) - target_fb = fb; - else - target_fb = crtc->primary->fb; + target_fb = crtc->primary->fb; /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers @@ -1911,13 +1908,11 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, if (unlikely(r != 0)) return r; - if (!atomic) { - abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(abo); - return -EINVAL; - } + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); + if (unlikely(r != 0)) { + amdgpu_bo_unreserve(abo); + return -EINVAL; } fb_location = amdgpu_bo_gpu_offset(abo); @@ -2083,7 +2078,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, /* set pageflip to happen anywhere in vblank interval */ WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); - if (!atomic && fb && fb != crtc->primary->fb) { + if (fb && fb != crtc->primary->fb) { abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) @@ -2578,7 +2573,7 @@ static int dce_v6_0_crtc_mode_set(struct drm_crtc *crtc, amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode); amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode); - dce_v6_0_crtc_do_set_base(crtc, old_fb, x, y, 0); + dce_v6_0_crtc_do_set_base(crtc, old_fb, x, y); amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode); amdgpu_atombios_crtc_scaler_setup(crtc); dce_v6_0_cursor_reset(crtc); @@ -2626,14 +2621,7 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc, static int dce_v6_0_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) { - return dce_v6_0_crtc_do_set_base(crtc, old_fb, x, y, 0); -} - -static int dce_v6_0_crtc_set_base_atomic(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, enum mode_set_atomic state) -{ - return dce_v6_0_crtc_do_set_base(crtc, fb, x, y, 1); + return dce_v6_0_crtc_do_set_base(crtc, old_fb, x, y); } static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = { @@ -2641,7 +2629,6 @@ static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = { .mode_fixup = dce_v6_0_crtc_mode_fixup, .mode_set = dce_v6_0_crtc_mode_set, .mode_set_base = dce_v6_0_crtc_set_base, - .mode_set_base_atomic = dce_v6_0_crtc_set_base_atomic, .prepare = dce_v6_0_crtc_prepare, .commit = dce_v6_0_crtc_commit, .disable = dce_v6_0_crtc_disable, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 2ccd6aad8dd6..9d1853c41fcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1785,7 +1785,7 @@ static void dce_v8_0_grph_enable(struct drm_crtc *crtc, bool enable) static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, struct drm_framebuffer *fb, - int x, int y, int atomic) + int x, int y) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; @@ -1802,15 +1802,12 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, bool bypass_lut = false; /* no fb bound */ - if (!atomic && !crtc->primary->fb) { + if (!crtc->primary->fb) { DRM_DEBUG_KMS("No FB bound\n"); return 0; } - if (atomic) - target_fb = fb; - else - target_fb = crtc->primary->fb; + target_fb = crtc->primary->fb; /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers @@ -1821,13 +1818,11 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, if (unlikely(r != 0)) return r; - if (!atomic) { - abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(abo); - return -EINVAL; - } + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); + if (unlikely(r != 0)) { + amdgpu_bo_unreserve(abo); + return -EINVAL; } fb_location = amdgpu_bo_gpu_offset(abo); @@ -1995,7 +1990,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, /* set pageflip to happen anywhere in vblank interval */ WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); - if (!atomic && fb && fb != crtc->primary->fb) { + if (fb && fb != crtc->primary->fb) { abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) @@ -2537,7 +2532,7 @@ static int dce_v8_0_crtc_mode_set(struct drm_crtc *crtc, amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode); amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode); - dce_v8_0_crtc_do_set_base(crtc, old_fb, x, y, 0); + dce_v8_0_crtc_do_set_base(crtc, old_fb, x, y); amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode); amdgpu_atombios_crtc_scaler_setup(crtc); dce_v8_0_cursor_reset(crtc); @@ -2585,14 +2580,7 @@ static bool dce_v8_0_crtc_mode_fixup(struct drm_crtc *crtc, static int dce_v8_0_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) { - return dce_v8_0_crtc_do_set_base(crtc, old_fb, x, y, 0); -} - -static int dce_v8_0_crtc_set_base_atomic(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, enum mode_set_atomic state) -{ - return dce_v8_0_crtc_do_set_base(crtc, fb, x, y, 1); + return dce_v8_0_crtc_do_set_base(crtc, old_fb, x, y); } static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = { @@ -2600,7 +2588,6 @@ static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = { .mode_fixup = dce_v8_0_crtc_mode_fixup, .mode_set = dce_v8_0_crtc_mode_set, .mode_set_base = dce_v8_0_crtc_set_base, - .mode_set_base_atomic = dce_v8_0_crtc_set_base_atomic, .prepare = dce_v8_0_crtc_prepare, .commit = dce_v8_0_crtc_commit, .disable = dce_v8_0_crtc_disable, From 046a10f4d74fd3e022d9b793c5e910dd9df0b1c0 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 25 Nov 2025 13:52:14 +0100 Subject: [PATCH 22/94] drm/nouveau: Do not implement mode_set_base_atomic callback Remove the implementation of the CRTC helper mode_set_base_atomic from nouveau. It pretends to provide mode setting for kdb debugging, but has been broken for some time. Kdb output has been supported only for non-atomic mode setting since commit 9c79e0b1d096 ("drm/fb-helper: Give up on kgdb for atomic drivers") from 2017. While nouveau provides non-atomic mode setting for some devices, kdb assumes that the GEM buffer object is at a fixed location in video memory. This has not been the case since commit 4a16dd9d18a0 ("drm/nouveau/kms: switch to drm fbdev helpers") from 2022. Fbdev-ttm helpers use a shadow buffer with a movable GEM buffer object. Triggering kdb does therefore not update the display. Hence remove the whole kdb support from nouveau. Signed-off-by: Thomas Zimmermann Reviewed-by: Lyude Paul Acked-by: Simona Vetter Acked-by: Daniel Thompson (RISCstar) Link: https://patch.msgid.link/20251125130634.1080966-3-tzimmermann@suse.de --- drivers/gpu/drm/nouveau/dispnv04/crtc.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c index c063756eaea3..80493224eb6c 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c +++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c @@ -837,7 +837,7 @@ nv_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b, static int nv04_crtc_do_mode_set_base(struct drm_crtc *crtc, struct drm_framebuffer *passed_fb, - int x, int y, bool atomic) + int x, int y) { struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); struct drm_device *dev = crtc->dev; @@ -850,19 +850,12 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc, NV_DEBUG(drm, "index %d\n", nv_crtc->index); /* no fb bound */ - if (!atomic && !crtc->primary->fb) { + if (!crtc->primary->fb) { NV_DEBUG(drm, "No FB bound\n"); return 0; } - /* If atomic, we want to switch to the fb we were passed, so - * now we update pointers to do that. - */ - if (atomic) { - drm_fb = passed_fb; - } else { - drm_fb = crtc->primary->fb; - } + drm_fb = crtc->primary->fb; nvbo = nouveau_gem_object(drm_fb->obj[0]); nv_crtc->fb.offset = nvbo->offset; @@ -920,15 +913,7 @@ nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, int ret = nv_crtc_swap_fbs(crtc, old_fb); if (ret) return ret; - return nv04_crtc_do_mode_set_base(crtc, old_fb, x, y, false); -} - -static int -nv04_crtc_mode_set_base_atomic(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, enum mode_set_atomic state) -{ - return nv04_crtc_do_mode_set_base(crtc, fb, x, y, true); + return nv04_crtc_do_mode_set_base(crtc, old_fb, x, y); } static void nv04_cursor_upload(struct drm_device *dev, struct nouveau_bo *src, @@ -1274,7 +1259,6 @@ static const struct drm_crtc_helper_funcs nv04_crtc_helper_funcs = { .commit = nv_crtc_commit, .mode_set = nv_crtc_mode_set, .mode_set_base = nv04_crtc_mode_set_base, - .mode_set_base_atomic = nv04_crtc_mode_set_base_atomic, .disable = nv_crtc_disable, .get_scanout_position = nouveau_display_scanoutpos, }; From a22461eddaf6a0f82ca2d02de2e180dcc16b3937 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 25 Nov 2025 13:52:15 +0100 Subject: [PATCH 23/94] drm/radeon: Do not implement mode_set_base_atomic callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the implementation of the CRTC helper mode_set_base_atomic from radeon. It pretends to provide mode setting for kdb debugging, but has been broken for some time. Kdb output has been supported only for non-atomic mode setting since commit 9c79e0b1d096 ("drm/fb-helper: Give up on kgdb for atomic drivers") from 2017. While radeon provides non-atomic mode setting, kdb assumes that the GEM buffer object is at a fixed location in video memory. This assumption currently blocks radeon from converting to generic fbdev emulation. Fbdev-ttm helpers use a shadow buffer with a movable GEM buffer object. Triggering kdb does therefore not update the display. Another problem is that the current implementation does not handle USB keyboard input. Therefore a serial terminal is required. Then when continuing from the debugger, radeon fails with an error: [7]kdb> go [ 40.345523][ C7] BUG: scheduling while atomic: bash/1580/0x00110003 [...] [ 40.345613][ C7] schedule+0x27/0xd0 [ 40.345615][ C7] schedule_timeout+0x7b/0x100 [ 40.345617][ C7] ? __pfx_process_timeout+0x10/0x10 [ 40.345619][ C7] msleep+0x31/0x50 [ 40.345621][ C7] radeon_crtc_load_lut+0x2e4/0xcb0 [radeon 31c1ee785de120fcfd0babcc09babb3770252b4e] [ 40.345698][ C7] radeon_crtc_gamma_set+0xe/0x20 [radeon 31c1ee785de120fcfd0babcc09babb3770252b4e] [ 40.345760][ C7] drm_fb_helper_debug_leave+0xd8/0x130 [ 40.345763][ C7] kgdboc_post_exp_handler+0x54/0x70 [...] and the system hangs. Support for kdb feels pretty much broken. Hence remove the whole kdb support from radeon. Signed-off-by: Thomas Zimmermann Acked-by: Christian König Acked-by: Simona Vetter Acked-by: Daniel Thompson (RISCstar) Link: https://patch.msgid.link/20251125130634.1080966-4-tzimmermann@suse.de --- drivers/gpu/drm/radeon/atombios_crtc.c | 74 ++++++--------------- drivers/gpu/drm/radeon/radeon_legacy_crtc.c | 23 ++----- drivers/gpu/drm/radeon/radeon_mode.h | 10 +-- 3 files changed, 26 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 9b3a3a9d60e2..2fc0334e0d6c 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1133,7 +1133,7 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode static int dce4_crtc_do_set_base(struct drm_crtc *crtc, struct drm_framebuffer *fb, - int x, int y, int atomic) + int x, int y) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; @@ -1150,33 +1150,23 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, bool bypass_lut = false; /* no fb bound */ - if (!atomic && !crtc->primary->fb) { + if (!crtc->primary->fb) { DRM_DEBUG_KMS("No FB bound\n"); return 0; } - if (atomic) - target_fb = fb; - else - target_fb = crtc->primary->fb; + target_fb = crtc->primary->fb; - /* If atomic, assume fb object is pinned & idle & fenced and - * just update base pointers - */ obj = target_fb->obj[0]; rbo = gem_to_radeon_bo(obj); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) return r; - if (atomic) - fb_location = radeon_bo_gpu_offset(rbo); - else { - r = radeon_bo_pin(rbo, RADEON_GEM_DOMAIN_VRAM, &fb_location); - if (unlikely(r != 0)) { - radeon_bo_unreserve(rbo); - return -EINVAL; - } + r = radeon_bo_pin(rbo, RADEON_GEM_DOMAIN_VRAM, &fb_location); + if (unlikely(r != 0)) { + radeon_bo_unreserve(rbo); + return -EINVAL; } radeon_bo_get_tiling_flags(rbo, &tiling_flags, NULL); @@ -1437,7 +1427,7 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, /* set pageflip to happen anywhere in vblank interval */ WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0); - if (!atomic && fb && fb != crtc->primary->fb) { + if (fb && fb != crtc->primary->fb) { rbo = gem_to_radeon_bo(fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) @@ -1454,7 +1444,7 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, static int avivo_crtc_do_set_base(struct drm_crtc *crtc, struct drm_framebuffer *fb, - int x, int y, int atomic) + int x, int y) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; @@ -1470,15 +1460,12 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, bool bypass_lut = false; /* no fb bound */ - if (!atomic && !crtc->primary->fb) { + if (!crtc->primary->fb) { DRM_DEBUG_KMS("No FB bound\n"); return 0; } - if (atomic) - target_fb = fb; - else - target_fb = crtc->primary->fb; + target_fb = crtc->primary->fb; obj = target_fb->obj[0]; rbo = gem_to_radeon_bo(obj); @@ -1486,17 +1473,10 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, if (unlikely(r != 0)) return r; - /* If atomic, assume fb object is pinned & idle & fenced and - * just update base pointers - */ - if (atomic) - fb_location = radeon_bo_gpu_offset(rbo); - else { - r = radeon_bo_pin(rbo, RADEON_GEM_DOMAIN_VRAM, &fb_location); - if (unlikely(r != 0)) { - radeon_bo_unreserve(rbo); - return -EINVAL; - } + r = radeon_bo_pin(rbo, RADEON_GEM_DOMAIN_VRAM, &fb_location); + if (unlikely(r != 0)) { + radeon_bo_unreserve(rbo); + return -EINVAL; } radeon_bo_get_tiling_flags(rbo, &tiling_flags, NULL); radeon_bo_unreserve(rbo); @@ -1645,7 +1625,7 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, /* set pageflip to happen only at start of vblank interval (front porch) */ WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3); - if (!atomic && fb && fb != crtc->primary->fb) { + if (fb && fb != crtc->primary->fb) { rbo = gem_to_radeon_bo(fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) @@ -1667,26 +1647,11 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct radeon_device *rdev = dev->dev_private; if (ASIC_IS_DCE4(rdev)) - return dce4_crtc_do_set_base(crtc, old_fb, x, y, 0); + return dce4_crtc_do_set_base(crtc, old_fb, x, y); else if (ASIC_IS_AVIVO(rdev)) - return avivo_crtc_do_set_base(crtc, old_fb, x, y, 0); + return avivo_crtc_do_set_base(crtc, old_fb, x, y); else - return radeon_crtc_do_set_base(crtc, old_fb, x, y, 0); -} - -int atombios_crtc_set_base_atomic(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, enum mode_set_atomic state) -{ - struct drm_device *dev = crtc->dev; - struct radeon_device *rdev = dev->dev_private; - - if (ASIC_IS_DCE4(rdev)) - return dce4_crtc_do_set_base(crtc, fb, x, y, 1); - else if (ASIC_IS_AVIVO(rdev)) - return avivo_crtc_do_set_base(crtc, fb, x, y, 1); - else - return radeon_crtc_do_set_base(crtc, fb, x, y, 1); + return radeon_crtc_do_set_base(crtc, old_fb, x, y); } /* properly set additional regs when using atombios */ @@ -2215,7 +2180,6 @@ static const struct drm_crtc_helper_funcs atombios_helper_funcs = { .mode_fixup = atombios_crtc_mode_fixup, .mode_set = atombios_crtc_mode_set, .mode_set_base = atombios_crtc_set_base, - .mode_set_base_atomic = atombios_crtc_set_base_atomic, .prepare = atombios_crtc_prepare, .commit = atombios_crtc_commit, .disable = atombios_crtc_disable, diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c index 825b351ff53c..a1054c8094d4 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c @@ -360,19 +360,12 @@ static void radeon_crtc_dpms(struct drm_crtc *crtc, int mode) int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) { - return radeon_crtc_do_set_base(crtc, old_fb, x, y, 0); -} - -int radeon_crtc_set_base_atomic(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, enum mode_set_atomic state) -{ - return radeon_crtc_do_set_base(crtc, fb, x, y, 1); + return radeon_crtc_do_set_base(crtc, old_fb, x, y); } int radeon_crtc_do_set_base(struct drm_crtc *crtc, struct drm_framebuffer *fb, - int x, int y, int atomic) + int x, int y) { struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; @@ -390,15 +383,12 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, DRM_DEBUG_KMS("\n"); /* no fb bound */ - if (!atomic && !crtc->primary->fb) { + if (!crtc->primary->fb) { DRM_DEBUG_KMS("No FB bound\n"); return 0; } - if (atomic) - target_fb = fb; - else - target_fb = crtc->primary->fb; + target_fb = crtc->primary->fb; switch (target_fb->format->cpp[0] * 8) { case 8: @@ -445,7 +435,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, * We don't shutdown the display controller because new buffer * will end up in same spot. */ - if (!atomic && fb && fb != crtc->primary->fb) { + if (fb && fb != crtc->primary->fb) { struct radeon_bo *old_rbo; unsigned long nsize, osize; @@ -555,7 +545,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, crtc_offset); WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch); - if (!atomic && fb && fb != crtc->primary->fb) { + if (fb && fb != crtc->primary->fb) { rbo = gem_to_radeon_bo(fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) @@ -1108,7 +1098,6 @@ static const struct drm_crtc_helper_funcs legacy_helper_funcs = { .mode_fixup = radeon_crtc_mode_fixup, .mode_set = radeon_crtc_mode_set, .mode_set_base = radeon_crtc_set_base, - .mode_set_base_atomic = radeon_crtc_set_base_atomic, .prepare = radeon_crtc_prepare, .commit = radeon_crtc_commit, .disable = radeon_crtc_disable, diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 9e34da2cacef..088af85902f7 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -804,10 +804,6 @@ extern bool radeon_encoder_is_digital(struct drm_encoder *encoder); extern void radeon_crtc_load_lut(struct drm_crtc *crtc); extern int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb); -extern int atombios_crtc_set_base_atomic(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, - enum mode_set_atomic state); extern int atombios_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, @@ -817,13 +813,9 @@ extern void atombios_crtc_dpms(struct drm_crtc *crtc, int mode); extern int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb); -extern int radeon_crtc_set_base_atomic(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, - enum mode_set_atomic state); extern int radeon_crtc_do_set_base(struct drm_crtc *crtc, struct drm_framebuffer *fb, - int x, int y, int atomic); + int x, int y); extern int radeon_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv, uint32_t handle, From 6ea3aacc8e89298702812a1556eb1e378a80e02b Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 25 Nov 2025 13:52:16 +0100 Subject: [PATCH 24/94] drm/fbdev-helper: Remove drm_fb_helper_debug_enter/_leave() Remove the debug_enter/debug_leave helpers, as there are no DRM drivers supporting debugging with kgdb. Remove code to keep track of existing fbdev-emulation state. None of this required any longer. Also remove mode_set_base_atomic from struct drm_crtc_helper_funcs, which has no callers or implementations. Signed-off-by: Thomas Zimmermann Reviewed-by: Simona Vetter Acked-by: Daniel Thompson (RISCstar) Link: https://patch.msgid.link/20251125130634.1080966-5-tzimmermann@suse.de --- drivers/gpu/drm/drm_fb_helper.c | 108 ----------------------- include/drm/drm_fb_helper.h | 21 ----- include/drm/drm_modeset_helper_vtables.h | 23 ----- 3 files changed, 152 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index be790fc68707..49a84396b440 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -77,9 +77,6 @@ MODULE_PARM_DESC(drm_leak_fbdev_smem, "Allow unsafe leaking fbdev physical smem address [default=false]"); #endif -static LIST_HEAD(kernel_fb_helper_list); -static DEFINE_MUTEX(kernel_fb_helper_lock); - /** * DOC: fbdev helpers * @@ -117,101 +114,6 @@ static DEFINE_MUTEX(kernel_fb_helper_lock); * mmap page writes. */ -static void drm_fb_helper_restore_lut_atomic(struct drm_crtc *crtc) -{ - uint16_t *r_base, *g_base, *b_base; - - if (crtc->funcs->gamma_set == NULL) - return; - - r_base = crtc->gamma_store; - g_base = r_base + crtc->gamma_size; - b_base = g_base + crtc->gamma_size; - - crtc->funcs->gamma_set(crtc, r_base, g_base, b_base, - crtc->gamma_size, NULL); -} - -/** - * drm_fb_helper_debug_enter - implementation for &fb_ops.fb_debug_enter - * @info: fbdev registered by the helper - */ -int drm_fb_helper_debug_enter(struct fb_info *info) -{ - struct drm_fb_helper *helper = info->par; - const struct drm_crtc_helper_funcs *funcs; - struct drm_mode_set *mode_set; - - list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) { - mutex_lock(&helper->client.modeset_mutex); - drm_client_for_each_modeset(mode_set, &helper->client) { - if (!mode_set->crtc->enabled) - continue; - - funcs = mode_set->crtc->helper_private; - if (funcs->mode_set_base_atomic == NULL) - continue; - - if (drm_drv_uses_atomic_modeset(mode_set->crtc->dev)) - continue; - - funcs->mode_set_base_atomic(mode_set->crtc, - mode_set->fb, - mode_set->x, - mode_set->y, - ENTER_ATOMIC_MODE_SET); - } - mutex_unlock(&helper->client.modeset_mutex); - } - - return 0; -} -EXPORT_SYMBOL(drm_fb_helper_debug_enter); - -/** - * drm_fb_helper_debug_leave - implementation for &fb_ops.fb_debug_leave - * @info: fbdev registered by the helper - */ -int drm_fb_helper_debug_leave(struct fb_info *info) -{ - struct drm_fb_helper *helper = info->par; - struct drm_client_dev *client = &helper->client; - struct drm_device *dev = helper->dev; - struct drm_crtc *crtc; - const struct drm_crtc_helper_funcs *funcs; - struct drm_mode_set *mode_set; - struct drm_framebuffer *fb; - - mutex_lock(&client->modeset_mutex); - drm_client_for_each_modeset(mode_set, client) { - crtc = mode_set->crtc; - if (drm_drv_uses_atomic_modeset(crtc->dev)) - continue; - - funcs = crtc->helper_private; - fb = crtc->primary->fb; - - if (!crtc->enabled) - continue; - - if (!fb) { - drm_err(dev, "no fb to restore?\n"); - continue; - } - - if (funcs->mode_set_base_atomic == NULL) - continue; - - drm_fb_helper_restore_lut_atomic(mode_set->crtc); - funcs->mode_set_base_atomic(mode_set->crtc, fb, crtc->x, - crtc->y, LEAVE_ATOMIC_MODE_SET); - } - mutex_unlock(&client->modeset_mutex); - - return 0; -} -EXPORT_SYMBOL(drm_fb_helper_debug_leave); - static int __drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper, bool force) @@ -399,7 +301,6 @@ void drm_fb_helper_prepare(struct drm_device *dev, struct drm_fb_helper *helper, if (!preferred_bpp) preferred_bpp = 32; - INIT_LIST_HEAD(&helper->kernel_fb_list); spin_lock_init(&helper->damage_lock); INIT_WORK(&helper->resume_work, drm_fb_helper_resume_worker); INIT_WORK(&helper->damage_work, drm_fb_helper_damage_work); @@ -541,11 +442,6 @@ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper) drm_fb_helper_release_info(fb_helper); - mutex_lock(&kernel_fb_helper_lock); - if (!list_empty(&fb_helper->kernel_fb_list)) - list_del(&fb_helper->kernel_fb_list); - mutex_unlock(&kernel_fb_helper_lock); - if (!fb_helper->client.funcs) drm_client_release(&fb_helper->client); } @@ -1780,10 +1676,6 @@ __drm_fb_helper_initial_config_and_unlock(struct drm_fb_helper *fb_helper) drm_info(dev, "fb%d: %s frame buffer device\n", info->node, info->fix.id); - mutex_lock(&kernel_fb_helper_lock); - list_add(&fb_helper->kernel_fb_list, &kernel_fb_helper_list); - mutex_unlock(&kernel_fb_helper_lock); - return 0; err_drm_fb_helper_release_info: diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index dd9a18f8de5a..05cca77b7249 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -166,13 +166,6 @@ struct drm_fb_helper { */ struct mutex lock; - /** - * @kernel_fb_list: - * - * Entry on the global kernel_fb_helper_list, used for kgdb entry/exit. - */ - struct list_head kernel_fb_list; - /** * @delayed_hotplug: * @@ -236,8 +229,6 @@ drm_fb_helper_from_client(struct drm_client_dev *client) .fb_setcmap = drm_fb_helper_setcmap, \ .fb_blank = drm_fb_helper_blank, \ .fb_pan_display = drm_fb_helper_pan_display, \ - .fb_debug_enter = drm_fb_helper_debug_enter, \ - .fb_debug_leave = drm_fb_helper_debug_leave, \ .fb_ioctl = drm_fb_helper_ioctl #ifdef CONFIG_DRM_FBDEV_EMULATION @@ -280,8 +271,6 @@ int drm_fb_helper_ioctl(struct fb_info *info, unsigned int cmd, int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper); int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper); -int drm_fb_helper_debug_enter(struct fb_info *info); -int drm_fb_helper_debug_leave(struct fb_info *info); #else static inline void drm_fb_helper_prepare(struct drm_device *dev, struct drm_fb_helper *helper, @@ -387,16 +376,6 @@ static inline int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper) { return 0; } - -static inline int drm_fb_helper_debug_enter(struct fb_info *info) -{ - return 0; -} - -static inline int drm_fb_helper_debug_leave(struct fb_info *info) -{ - return 0; -} #endif #endif diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h index fe32854b7ffe..3e68213958dd 100644 --- a/include/drm/drm_modeset_helper_vtables.h +++ b/include/drm/drm_modeset_helper_vtables.h @@ -52,11 +52,6 @@ struct drm_scanout_buffer; struct drm_writeback_connector; struct drm_writeback_job; -enum mode_set_atomic { - LEAVE_ATOMIC_MODE_SET, - ENTER_ATOMIC_MODE_SET, -}; - /** * struct drm_crtc_helper_funcs - helper operations for CRTCs * @@ -253,24 +248,6 @@ struct drm_crtc_helper_funcs { int (*mode_set_base)(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb); - /** - * @mode_set_base_atomic: - * - * This callback is used by the fbdev helpers to set a new framebuffer - * and scanout without sleeping, i.e. from an atomic calling context. It - * is only used to implement kgdb support. - * - * This callback is optional and only needed for kgdb support in the fbdev - * helpers. - * - * RETURNS: - * - * 0 on success or a negative error code on failure. - */ - int (*mode_set_base_atomic)(struct drm_crtc *crtc, - struct drm_framebuffer *fb, int x, int y, - enum mode_set_atomic); - /** * @disable: * From 7068d42048dab5eb71a0d65388f64f1e0ca5b9ee Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 25 Nov 2025 13:52:17 +0100 Subject: [PATCH 25/94] fbcon: Remove fb_debug_enter/_leave from struct fb_ops There are no implementations of fb_debug_enter and fb_debug_leave. Remove the callbacks from struct fb_ops and clean up the caller. The field save_graphics in fbcon_par is also no longer required. Remove it as well. Signed-off-by: Thomas Zimmermann Reviewed-by: Simona Vetter Acked-by: Daniel Thompson (RISCstar) Link: https://patch.msgid.link/20251125130634.1080966-6-tzimmermann@suse.de --- Documentation/process/debugging/kgdb.rst | 28 ------------------------ drivers/video/fbdev/core/fbcon.c | 24 -------------------- drivers/video/fbdev/core/fbcon.h | 1 - include/linux/fb.h | 4 ---- 4 files changed, 57 deletions(-) diff --git a/Documentation/process/debugging/kgdb.rst b/Documentation/process/debugging/kgdb.rst index b29b0aac2717..773b19aa1382 100644 --- a/Documentation/process/debugging/kgdb.rst +++ b/Documentation/process/debugging/kgdb.rst @@ -889,34 +889,6 @@ in the virtual console layer. On resuming kernel execution, the kernel debugger calls kgdboc_post_exp_handler() which in turn calls con_debug_leave(). -Any video driver that wants to be compatible with the kernel debugger -and the atomic kms callbacks must implement the ``mode_set_base_atomic``, -``fb_debug_enter`` and ``fb_debug_leave operations``. For the -``fb_debug_enter`` and ``fb_debug_leave`` the option exists to use the -generic drm fb helper functions or implement something custom for the -hardware. The following example shows the initialization of the -.mode_set_base_atomic operation in -drivers/gpu/drm/i915/intel_display.c:: - - - static const struct drm_crtc_helper_funcs intel_helper_funcs = { - [...] - .mode_set_base_atomic = intel_pipe_set_base_atomic, - [...] - }; - - -Here is an example of how the i915 driver initializes the -fb_debug_enter and fb_debug_leave functions to use the generic drm -helpers in ``drivers/gpu/drm/i915/intel_fb.c``:: - - - static struct fb_ops intelfb_ops = { - [...] - .fb_debug_enter = drm_fb_helper_debug_enter, - .fb_debug_leave = drm_fb_helper_debug_leave, - [...] - }; Credits diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 7f35ad66b462..e2e69aab6680 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2287,28 +2287,6 @@ static bool fbcon_blank(struct vc_data *vc, enum vesa_blank_mode blank, return false; } -static void fbcon_debug_enter(struct vc_data *vc) -{ - struct fb_info *info = fbcon_info_from_console(vc->vc_num); - struct fbcon_par *par = info->fbcon_par; - - par->save_graphics = par->graphics; - par->graphics = 0; - if (info->fbops->fb_debug_enter) - info->fbops->fb_debug_enter(info); - fbcon_set_palette(vc, color_table); -} - -static void fbcon_debug_leave(struct vc_data *vc) -{ - struct fb_info *info = fbcon_info_from_console(vc->vc_num); - struct fbcon_par *par = info->fbcon_par; - - par->graphics = par->save_graphics; - if (info->fbops->fb_debug_leave) - info->fbops->fb_debug_leave(info); -} - static int fbcon_get_font(struct vc_data *vc, struct console_font *font, unsigned int vpitch) { u8 *fontdata = vc->vc_font.data; @@ -3186,8 +3164,6 @@ static const struct consw fb_con = { .con_set_palette = fbcon_set_palette, .con_invert_region = fbcon_invert_region, .con_resize = fbcon_resize, - .con_debug_enter = fbcon_debug_enter, - .con_debug_leave = fbcon_debug_leave, }; static ssize_t rotate_store(struct device *device, diff --git a/drivers/video/fbdev/core/fbcon.h b/drivers/video/fbdev/core/fbcon.h index 44ea4ae4bba0..1cd10a7faab0 100644 --- a/drivers/video/fbdev/core/fbcon.h +++ b/drivers/video/fbdev/core/fbcon.h @@ -79,7 +79,6 @@ struct fbcon_par { int cursor_reset; int blank_state; int graphics; - int save_graphics; /* for debug enter/leave */ bool initialized; int rotate; int cur_rotate; diff --git a/include/linux/fb.h b/include/linux/fb.h index 05cc251035da..65fb70382675 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -304,10 +304,6 @@ struct fb_ops { /* teardown any resources to do with this framebuffer */ void (*fb_destroy)(struct fb_info *info); - - /* called at KDB enter and leave time to prepare the console */ - int (*fb_debug_enter)(struct fb_info *info); - int (*fb_debug_leave)(struct fb_info *info); }; #ifdef CONFIG_FB_TILEBLITTING From 9d56cbaf12037e8ce7ead9f8f8f9000e4784f2eb Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Fri, 7 Nov 2025 14:57:00 +0100 Subject: [PATCH 26/94] drm/todo: Add section with task for GPU scheduler The GPU scheduler has a great many problems and deserves its own TODO section. Add a section and a first task describing the problem of drm_sched_resubmit_jobs() being deprecated without a successor. Acked-by: Dave Airlie Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251107135701.244659-3-phasta@kernel.org --- Documentation/gpu/todo.rst | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 9013ced318cb..572a5611dd0c 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -878,6 +878,37 @@ Contact: Christian König Level: Starter +DRM GPU Scheduler +================= + +Provide a universal successor for drm_sched_resubmit_jobs() +----------------------------------------------------------- + +drm_sched_resubmit_jobs() is deprecated. Main reason being that it leads to +reinitializing dma_fences. See that function's docu for details. The better +approach for valid resubmissions by amdgpu and Xe is (apparently) to figure out +which job (and, through association: which entity) caused the hang. Then, the +job's buffer data, together with all other jobs' buffer data currently in the +same hardware ring, must be invalidated. This can for example be done by +overwriting it. amdgpu currently determines which jobs are in the ring and need +to be overwritten by keeping copies of the job. Xe obtains that information by +directly accessing drm_sched's pending_list. + +Tasks: + +1. implement scheduler functionality through which the driver can obtain the + information which *broken* jobs are currently in the hardware ring. +2. Such infrastructure would then typically be used in + drm_sched_backend_ops.timedout_job(). Document that. +3. Port a driver as first user. +4. Document the new alternative in the docu of deprecated + drm_sched_resubmit_jobs(). + +Contact: Christian König + Philipp Stanner + +Level: Advanced + Outside DRM =========== From 439be5c580e553c8777d5533db5892e773f81d40 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Fri, 7 Nov 2025 14:57:01 +0100 Subject: [PATCH 27/94] drm/todo: Add entry for unlocked drm/sched rq readers Runqueues are currently almost everywhere being read unlocked in drm/sched. At XDC 2025, the assembled developers were unsure whether that's legal and whether it can be fixed. Someone should find out. Add a todo entry for the unlocked runqueue reader problem. Acked-by: Dave Airlie Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251107135701.244659-4-phasta@kernel.org --- Documentation/gpu/todo.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 572a5611dd0c..22487ac1b011 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -909,6 +909,20 @@ Contact: Christian König Level: Advanced +Add locking for runqueues +------------------------- + +There is an old FIXME by Sima in include/drm/gpu_scheduler.h. It details that +struct drm_sched_rq is read at many places without any locks, not even with a +READ_ONCE. At XDC 2025 no one could really tell why that is the case, whether +locks are needed and whether they could be added. (But for real, that should +probably be locked!). Check whether it's possible to add locks everywhere, and +do so if yes. + +Contact: Philipp Stanner + +Level: Intermediate + Outside DRM =========== From 3d3ac202c7df0923dee7e182c95d170cf9345a9f Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Tue, 2 Dec 2025 08:54:27 -0800 Subject: [PATCH 28/94] accel/amdxdna: Poll MPNPU_PWAITMODE after requesting firmware suspend After issuing a firmware suspend request, the driver must ensure that the suspend operation has completed before proceeding. Add polling of the MPNPU_PWAITMODE register to confirm that the firmware has fully entered the suspended state. This prevents race conditions where subsequent operations assume the firmware is idle before it has actually completed its suspend sequence. Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Maciej Falkowski Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20251202165427.507414-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_message.c | 9 ++++++++- drivers/accel/amdxdna/aie2_pci.h | 2 ++ drivers/accel/amdxdna/aie2_psp.c | 15 +++++++++++++++ drivers/accel/amdxdna/npu1_regs.c | 2 ++ drivers/accel/amdxdna/npu2_regs.c | 2 ++ drivers/accel/amdxdna/npu4_regs.c | 2 ++ drivers/accel/amdxdna/npu5_regs.c | 2 ++ drivers/accel/amdxdna/npu6_regs.c | 2 ++ 8 files changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index d493bb1c3360..fee3b0627aba 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -59,8 +59,15 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev) { DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND); + int ret; - return aie2_send_mgmt_msg_wait(ndev, &msg); + ret = aie2_send_mgmt_msg_wait(ndev, &msg); + if (ret) { + XDNA_ERR(ndev->xdna, "Failed to suspend fw, ret %d", ret); + return ret; + } + + return aie2_psp_waitmode_poll(ndev->psp_hdl); } int aie2_resume_fw(struct amdxdna_dev_hdl *ndev) diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index a5f9c42155d1..cc9f933f80b2 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -70,6 +70,7 @@ enum psp_reg_idx { PSP_INTR_REG = PSP_NUM_IN_REGS, PSP_STATUS_REG, PSP_RESP_REG, + PSP_PWAITMODE_REG, PSP_MAX_REGS /* Keep this at the end */ }; @@ -290,6 +291,7 @@ int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf); int aie2_psp_start(struct psp_device *psp); void aie2_psp_stop(struct psp_device *psp); +int aie2_psp_waitmode_poll(struct psp_device *psp); /* aie2_error.c */ int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev); diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c index f28a060a8810..3a7130577e3e 100644 --- a/drivers/accel/amdxdna/aie2_psp.c +++ b/drivers/accel/amdxdna/aie2_psp.c @@ -76,6 +76,21 @@ static int psp_exec(struct psp_device *psp, u32 *reg_vals) return 0; } +int aie2_psp_waitmode_poll(struct psp_device *psp) +{ + struct amdxdna_dev *xdna = to_xdna_dev(psp->ddev); + u32 mode_reg; + int ret; + + ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_PWAITMODE_REG), mode_reg, + (mode_reg & 0x1) == 1, + PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT); + if (ret) + XDNA_ERR(xdna, "fw waitmode reg error, ret %d", ret); + + return ret; +} + void aie2_psp_stop(struct psp_device *psp) { u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, }; diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c index ec407f3b48fc..ebc6e2802297 100644 --- a/drivers/accel/amdxdna/npu1_regs.c +++ b/drivers/accel/amdxdna/npu1_regs.c @@ -13,6 +13,7 @@ #include "amdxdna_pci_drv.h" /* Address definition from NPU1 docs */ +#define MPNPU_PWAITMODE 0x3010034 #define MPNPU_PUB_SEC_INTR 0x3010090 #define MPNPU_PUB_PWRMGMT_INTR 0x3010094 #define MPNPU_PUB_SCRATCH2 0x30100A0 @@ -92,6 +93,7 @@ static const struct amdxdna_dev_priv npu1_dev_priv = { DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU1_PSP, MPNPU_PUB_SEC_INTR), DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2), DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU1_PSP, MPNPU_PWAITMODE), }, .smu_regs_off = { DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU1_SMU, MPNPU_PUB_SCRATCH5), diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c index 86f87d0d1354..ad0743fb06d5 100644 --- a/drivers/accel/amdxdna/npu2_regs.c +++ b/drivers/accel/amdxdna/npu2_regs.c @@ -13,6 +13,7 @@ #include "amdxdna_pci_drv.h" /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PWAITMODE 0x301003C #define MPNPU_PUB_SEC_INTR 0x3010060 #define MPNPU_PUB_PWRMGMT_INTR 0x3010064 #define MPNPU_PUB_SCRATCH0 0x301006C @@ -85,6 +86,7 @@ static const struct amdxdna_dev_priv npu2_dev_priv = { DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU2_PSP, MP0_C2PMSG_73), DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU2_PSP, MP0_C2PMSG_123), DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU2_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU2_REG, MPNPU_PWAITMODE), }, .smu_regs_off = { DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU2_SMU, MP1_C2PMSG_0), diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c index 986a5f28ba24..4ca21db70478 100644 --- a/drivers/accel/amdxdna/npu4_regs.c +++ b/drivers/accel/amdxdna/npu4_regs.c @@ -13,6 +13,7 @@ #include "amdxdna_pci_drv.h" /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PWAITMODE 0x301003C #define MPNPU_PUB_SEC_INTR 0x3010060 #define MPNPU_PUB_PWRMGMT_INTR 0x3010064 #define MPNPU_PUB_SCRATCH0 0x301006C @@ -116,6 +117,7 @@ static const struct amdxdna_dev_priv npu4_dev_priv = { DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73), DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123), DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU4_REG, MPNPU_PWAITMODE), }, .smu_regs_off = { DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0), diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c index 75ad97f0b937..131080652ef0 100644 --- a/drivers/accel/amdxdna/npu5_regs.c +++ b/drivers/accel/amdxdna/npu5_regs.c @@ -13,6 +13,7 @@ #include "amdxdna_pci_drv.h" /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PWAITMODE 0x301003C #define MPNPU_PUB_SEC_INTR 0x3010060 #define MPNPU_PUB_PWRMGMT_INTR 0x3010064 #define MPNPU_PUB_SCRATCH0 0x301006C @@ -85,6 +86,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = { DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU5_PSP, MP0_C2PMSG_73), DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU5_PSP, MP0_C2PMSG_123), DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU5_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU5_REG, MPNPU_PWAITMODE), }, .smu_regs_off = { DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU5_SMU, MP1_C2PMSG_0), diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c index 758dc013fe13..1f71285655b2 100644 --- a/drivers/accel/amdxdna/npu6_regs.c +++ b/drivers/accel/amdxdna/npu6_regs.c @@ -13,6 +13,7 @@ #include "amdxdna_pci_drv.h" /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */ +#define MPNPU_PWAITMODE 0x301003C #define MPNPU_PUB_SEC_INTR 0x3010060 #define MPNPU_PUB_PWRMGMT_INTR 0x3010064 #define MPNPU_PUB_SCRATCH0 0x301006C @@ -85,6 +86,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = { DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU6_PSP, MP0_C2PMSG_73), DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123), DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU6_REG, MPNPU_PUB_SCRATCH3), + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU6_REG, MPNPU_PWAITMODE), }, .smu_regs_off = { DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU6_SMU, MP1_C2PMSG_0), From 41f231179a45068fdde2c6c9aa70cbca2eb11b49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 18 Sep 2025 13:52:54 +0200 Subject: [PATCH 29/94] dma-buf: replace "#if" with just "if" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to conditional compile that code, let the compilers dead code elimination handle it instead. Signed-off-by: Christian König Reviewed-by: Michael J. Ruhl Link: https://lore.kernel.org/r/20251006134713.1846-1-christian.koenig@amd.com --- drivers/dma-buf/dma-buf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 2bcf9ceca997..2305bb2cc1f1 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -1141,8 +1141,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, } mangle_sg_table(sg_table); -#ifdef CONFIG_DMA_API_DEBUG - { + if (IS_ENABLED(CONFIG_DMA_API_DEBUG)) { struct scatterlist *sg; u64 addr; int len; @@ -1154,10 +1153,10 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(len)) { pr_debug("%s: addr %llx or len %x is not page aligned!\n", __func__, addr, len); + break; } } } -#endif /* CONFIG_DMA_API_DEBUG */ return sg_table; error_unmap: From 657803cbcafde8caf11d207b042384577a3d5f17 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Dec 2025 13:17:48 +0100 Subject: [PATCH 30/94] drm/panthor: Drop a WARN_ON() in group_free_queue() It appears the timeout can still be enabled when we reach that point, because of the asynchronous progress check done on queues that resets the timer when jobs are still in-flight, but progress was made. We could add more checks to make sure the timer is not re-enabled when a group can't run anymore, but we don't have a group to pass to queue_check_job_completion() in some context. It's just as safe (we just want to be sure the timer is stopped before we destroy the queue) and simpler to drop the WARN_ON() in group_free_queue(). v2: - Collect R-bs Signed-off-by: Boris Brezillon Reviewed-by: Liviu Dudau Reviewed-by: Chia-I Wu Signed-off-by: Liviu Dudau Link: https://patch.msgid.link/20251203121750.404340-2-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_sched.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 389d508b3848..203f6a0a6b9a 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -893,9 +893,8 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * if (IS_ERR_OR_NULL(queue)) return; - /* This should have been disabled before that point. */ - drm_WARN_ON(&group->ptdev->base, - disable_delayed_work_sync(&queue->timeout.work)); + /* Disable the timeout before tearing down drm_sched components. */ + disable_delayed_work_sync(&queue->timeout.work); if (queue->entity.fence_context) drm_sched_entity_destroy(&queue->entity); From 32e593d74c39249ae14c8f0de88eec677c621aa7 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Dec 2025 13:17:49 +0100 Subject: [PATCH 31/94] drm/panthor: Make sure caches are flushed/invalidated when an AS is recycled When we re-assign a slot to a different VM, we need to make sure the old VM caches are flushed before doing the switch. Specialize panthor_mmu_as_disable() so we can skip the slot programmation while still getting the cache flushing, and call this helper from panthor_vm_active() when an idle slot is recycled. v2: - Collect R-bs Fixes: 6e2d3b3e8589 ("drm/panthor: Add support for atomic page table updates") Signed-off-by: Boris Brezillon Reviewed-by: Liviu Dudau Reviewed-by: Chia-I Wu Signed-off-by: Liviu Dudau Link: https://patch.msgid.link/20251203121750.404340-3-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_mmu.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 8ba5259e3d28..3644af1a8e56 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -585,7 +585,8 @@ static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr, return as_send_cmd_and_wait(ptdev, as_nr, AS_COMMAND_UPDATE); } -static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr) +static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr, + bool recycle_slot) { int ret; @@ -595,6 +596,12 @@ static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr) if (ret) return ret; + /* If the slot is going to be used immediately, don't bother changing + * the config. + */ + if (recycle_slot) + return 0; + gpu_write64(ptdev, AS_TRANSTAB(as_nr), 0); gpu_write64(ptdev, AS_MEMATTR(as_nr), 0); gpu_write64(ptdev, AS_TRANSCFG(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED); @@ -714,6 +721,11 @@ int panthor_vm_active(struct panthor_vm *vm) drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt)); as = lru_vm->as.id; + + ret = panthor_mmu_as_disable(ptdev, as, true); + if (ret) + goto out_unlock; + panthor_vm_release_as_locked(lru_vm); } @@ -853,7 +865,7 @@ static void panthor_vm_declare_unusable(struct panthor_vm *vm) vm->unusable = true; mutex_lock(&ptdev->mmu->as.slots_lock); if (vm->as.id >= 0 && drm_dev_enter(&ptdev->base, &cookie)) { - panthor_mmu_as_disable(ptdev, vm->as.id); + panthor_mmu_as_disable(ptdev, vm->as.id, false); drm_dev_exit(cookie); } mutex_unlock(&ptdev->mmu->as.slots_lock); @@ -1780,7 +1792,7 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status) ptdev->mmu->as.slots[as].vm->unhandled_fault = true; /* Disable the MMU to kill jobs on this AS. */ - panthor_mmu_as_disable(ptdev, as); + panthor_mmu_as_disable(ptdev, as, false); mutex_unlock(&ptdev->mmu->as.slots_lock); status &= ~mask; @@ -1809,7 +1821,8 @@ void panthor_mmu_suspend(struct panthor_device *ptdev) struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm; if (vm) { - drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i)); + drm_WARN_ON(&ptdev->base, + panthor_mmu_as_disable(ptdev, i, false)); panthor_vm_release_as_locked(vm); } } @@ -1930,7 +1943,7 @@ static void panthor_vm_free(struct drm_gpuvm *gpuvm) int cookie; if (drm_dev_enter(&ptdev->base, &cookie)) { - panthor_mmu_as_disable(ptdev, vm->as.id); + panthor_mmu_as_disable(ptdev, vm->as.id, false); drm_dev_exit(cookie); } @@ -2790,7 +2803,8 @@ void panthor_mmu_unplug(struct panthor_device *ptdev) struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm; if (vm) { - drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i)); + drm_WARN_ON(&ptdev->base, + panthor_mmu_as_disable(ptdev, i, false)); panthor_vm_release_as_locked(vm); } } From 276e411604b3a90ec9d243075f976e458139a006 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Dec 2025 13:17:50 +0100 Subject: [PATCH 32/94] drm/panthor: Unlock the locked region before disabling an AS An AS can be disabled in the middle of a VM operation (VM being evicted from an AS slot, for instance). In that case, we need the locked section to be unlocked before releasing the slot. v2: - Add an lockdep_assert_held() in panthor_mmu_as_disable() - Collect R-bs v3: - Don't reset the locked_region range in the as_disable() path Fixes: 6e2d3b3e8589 ("drm/panthor: Add support for atomic page table updates") Signed-off-by: Boris Brezillon Reviewed-by: Liviu Dudau Reviewed-by: Chia-I Wu Signed-off-by: Liviu Dudau Link: https://patch.msgid.link/20251203121750.404340-4-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_mmu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 3644af1a8e56..ca112d874ecb 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -588,14 +588,24 @@ static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr, static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr, bool recycle_slot) { + struct panthor_vm *vm = ptdev->mmu->as.slots[as_nr].vm; int ret; + lockdep_assert_held(&ptdev->mmu->as.slots_lock); + /* Flush+invalidate RW caches, invalidate RO ones. */ ret = panthor_gpu_flush_caches(ptdev, CACHE_CLEAN | CACHE_INV, CACHE_CLEAN | CACHE_INV, CACHE_INV); if (ret) return ret; + if (vm && vm->locked_region.size) { + /* Unlock the region if there's a lock pending. */ + ret = as_send_cmd_and_wait(ptdev, vm->as.id, AS_COMMAND_UNLOCK); + if (ret) + return ret; + } + /* If the slot is going to be used immediately, don't bother changing * the config. */ From cedf6765ecfd60197d90437ec648feb8b3e31cb1 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 3 Dec 2025 09:19:11 +0000 Subject: [PATCH 33/94] drm/panthor: Remove redundant call to disable the MCU This commit removes the redundant call to disable the MCU firmware in the suspend path. Fixes: 514072549865 ("drm/panthor: Support GLB_REQ.STATE field for Mali-G1 GPUs") Signed-off-by: Akash Goel Signed-off-by: Liviu Dudau Link: https://patch.msgid.link/20251203091911.145623-1-akash.goel@arm.com --- drivers/gpu/drm/panthor/panthor_fw.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index 1a5e3c1a27fb..94a3cd6dfa6d 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -1187,7 +1187,6 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) else ptdev->reset.fast = true; } - panthor_fw_stop(ptdev); panthor_job_irq_suspend(&ptdev->fw->irq); panthor_fw_stop(ptdev); From ac5b392a8c355001c4c3f230a0e4b1f904e359ca Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Tue, 2 Dec 2025 09:40:28 -0800 Subject: [PATCH 34/94] drm/panthor: fix queue_reset_timeout_locked queue_check_job_completion calls queue_reset_timeout_locked to reset the timeout when progress is made. We want the reset to happen when the timeout is running, not when it is suspended. Fixes: 345c5b7cc0f85 ("drm/panthor: Make the timeout per-queue instead of per-job") Signed-off-by: Chia-I Wu Signed-off-by: Liviu Dudau Link: https://patch.msgid.link/20251202174028.1600218-1-olvaffe@gmail.com --- drivers/gpu/drm/panthor/panthor_sched.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 203f6a0a6b9a..33b9ef537e35 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -1051,18 +1051,6 @@ group_is_idle(struct panthor_group *group) return hweight32(inactive_queues) == group->queue_count; } -static void -queue_reset_timeout_locked(struct panthor_queue *queue) -{ - lockdep_assert_held(&queue->fence_ctx.lock); - - if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT) { - mod_delayed_work(queue->scheduler.timeout_wq, - &queue->timeout.work, - msecs_to_jiffies(JOB_TIMEOUT_MS)); - } -} - static bool group_can_run(struct panthor_group *group) { @@ -1079,6 +1067,18 @@ queue_timeout_is_suspended(struct panthor_queue *queue) return queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT; } +static void +queue_reset_timeout_locked(struct panthor_queue *queue) +{ + lockdep_assert_held(&queue->fence_ctx.lock); + + if (!queue_timeout_is_suspended(queue)) { + mod_delayed_work(queue->scheduler.timeout_wq, + &queue->timeout.work, + msecs_to_jiffies(JOB_TIMEOUT_MS)); + } +} + static void queue_suspend_timeout_locked(struct panthor_queue *queue) { From ee8721bee80150ed1e4ee5ebb6aaf070802ac81b Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 19 Sep 2025 14:15:27 +0100 Subject: [PATCH 35/94] drm/ttm: Make ttm_bo_init_validate safe against ttm_operation_ctx re-ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Struct ttm_operation_ctx initializer in ttm_bo_init_validate assumes the order of the structure fields when it is configuring the interruptible flag. Fix it by using named initialization. Signed-off-by: Tvrtko Ursulin Acked-by: Thadeu Lima de Souza Cascardo Reviewed-by: Christian König Signed-off-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250919131530.91247-2-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index bd27607f8076..1df487425e96 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1027,7 +1027,7 @@ int ttm_bo_init_validate(struct ttm_device *bdev, struct ttm_buffer_object *bo, struct sg_table *sg, struct dma_resv *resv, void (*destroy) (struct ttm_buffer_object *)) { - struct ttm_operation_ctx ctx = { interruptible, false }; + struct ttm_operation_ctx ctx = { .interruptible = interruptible }; int ret; ret = ttm_bo_init_reserved(bdev, bo, type, placement, alignment, &ctx, From feb065155bab2fabc3545bf57ae31e86d02df9a1 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 19 Sep 2025 14:15:28 +0100 Subject: [PATCH 36/94] drm/ttm: Resource cannot be NULL in ttm_resource_intersects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function has a single caller and the resource cannot be NULL therefore remove the early return check. Signed-off-by: Tvrtko Ursulin Reviewed-by: Thadeu Lima de Souza Cascardo Reviewed-by: Christian König Signed-off-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250919131530.91247-3-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/ttm/ttm_resource.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index f5aa29dc6ec0..2dd19f229d32 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -446,9 +446,6 @@ bool ttm_resource_intersects(struct ttm_device *bdev, { struct ttm_resource_manager *man; - if (!res) - return false; - man = ttm_manager_type(bdev, res->mem_type); if (!place || !man->func->intersects) return true; From 802620f5a9cf7231933cfce61817577b3b6543d9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 19 Sep 2025 14:15:29 +0100 Subject: [PATCH 37/94] drm/ttm: Tidy ttm_operation_ctx initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to initialize a subset of fields to zero. Signed-off-by: Tvrtko Ursulin Acked-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/r/20250919131530.91247-4-tvrtko.ursulin@igalia.com Reviewed-by: Christian König Signed-off-by: Tvrtko Ursulin [tursulin: fixup conflict in ttm_resource_manager_evict_all] --- drivers/gpu/drm/ttm/ttm_bo_util.c | 10 ++-------- drivers/gpu/drm/ttm/ttm_device.c | 5 +---- drivers/gpu/drm/ttm/ttm_resource.c | 5 +---- drivers/gpu/drm/ttm/ttm_tt.c | 2 +- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2ff35d55e462..c00371894fa1 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -343,10 +343,7 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, struct ttm_bo_kmap_obj *map) { struct ttm_resource *mem = bo->resource; - struct ttm_operation_ctx ctx = { - .interruptible = false, - .no_wait_gpu = false - }; + struct ttm_operation_ctx ctx = { }; struct ttm_tt *ttm = bo->ttm; struct ttm_resource_manager *man = ttm_manager_type(bo->bdev, bo->resource->mem_type); @@ -530,10 +527,7 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map) iosys_map_set_vaddr_iomem(map, vaddr_iomem); } else { - struct ttm_operation_ctx ctx = { - .interruptible = false, - .no_wait_gpu = false - }; + struct ttm_operation_ctx ctx = { }; struct ttm_tt *ttm = bo->ttm; pgprot_t prot; void *vaddr; diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 9a51afaf0749..d3bfb9a696a7 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -135,10 +135,7 @@ static int ttm_global_init(void) */ int ttm_device_prepare_hibernation(struct ttm_device *bdev) { - struct ttm_operation_ctx ctx = { - .interruptible = false, - .no_wait_gpu = false, - }; + struct ttm_operation_ctx ctx = { }; int ret; do { diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 2dd19f229d32..a31683f7f310 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -545,10 +545,7 @@ EXPORT_SYMBOL(ttm_resource_manager_init); int ttm_resource_manager_evict_all(struct ttm_device *bdev, struct ttm_resource_manager *man) { - struct ttm_operation_ctx ctx = { - .interruptible = false, - .no_wait_gpu = false, - }; + struct ttm_operation_ctx ctx = { }; struct dma_fence *fence; int ret, i; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 611d20ab966d..af33fa020249 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -456,7 +456,7 @@ EXPORT_SYMBOL_FOR_TESTS_ONLY(ttm_tt_unpopulate); /* Test the shrinker functions and dump the result */ static int ttm_tt_debugfs_shrink_show(struct seq_file *m, void *data) { - struct ttm_operation_ctx ctx = { false, false }; + struct ttm_operation_ctx ctx = { }; seq_printf(m, "%d\n", ttm_global_swapout(&ctx, GFP_KERNEL)); return 0; From c06da4b3573a2d3c906c185450d0b1059d02820e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 19 Sep 2025 14:15:30 +0100 Subject: [PATCH 38/94] drm/ttm: Tidy usage of local variables a little bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the moment the TTM code has a few places which exibit sub-optimal patterns regarding local variable usage: * Having a local with some object cached but not always using it. * Having a local for a single use object member access. * Failed opportunities to use a local to cache a pointer. Lets tidy this a little bit and apply some more consistency. It is mostly for consistency and redability but I have also checked that there are not negative code generation effects. In fact there are more positives: add/remove: 0/0 grow/shrink: 3/9 up/down: 12/-175 (-163) Function old new delta ttm_pool_restore_and_alloc 415 423 +8 ttm_bo_vunmap 147 149 +2 ttm_bo_evict 521 523 +2 ttm_bo_vm_fault_reserved 972 970 -2 ttm_bo_vm_dummy_page 155 152 -3 ttm_bo_vm_fault 203 196 -7 ttm_bo_populate 158 150 -8 ttm_bo_move_memcpy 600 592 -8 ttm_bo_kmap 667 644 -23 ttm_bo_shrink 333 305 -28 ttm_bo_release 750 720 -30 ttm_bo_swapout_cb 691 625 -66 Total: Before=42717, After=42554, chg -0.38% Signed-off-by: Tvrtko Ursulin Reviewed-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/r/20250919131530.91247-5-tvrtko.ursulin@igalia.com Acked-by: Christian König Signed-off-by: Tvrtko Ursulin [tursulin: fixup conflict in ttm_bo_move_pipeline_evict] --- drivers/gpu/drm/ttm/ttm_bo.c | 62 +++++++++++++++--------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 47 +++++++++++----------- drivers/gpu/drm/ttm/ttm_bo_vm.c | 12 +++--- drivers/gpu/drm/ttm/ttm_pool.c | 26 +++++++------ drivers/gpu/drm/ttm/ttm_resource.c | 6 +-- 5 files changed, 77 insertions(+), 76 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 1df487425e96..acb9197db879 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -268,8 +268,8 @@ static void ttm_bo_release(struct kref *kref) 30 * HZ); } - if (bo->bdev->funcs->release_notify) - bo->bdev->funcs->release_notify(bo); + if (bdev->funcs->release_notify) + bdev->funcs->release_notify(bo); drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); ttm_mem_io_free(bdev, bo->resource); @@ -283,7 +283,7 @@ static void ttm_bo_release(struct kref *kref) ttm_bo_flush_all_fences(bo); bo->deleted = true; - spin_lock(&bo->bdev->lru_lock); + spin_lock(&bdev->lru_lock); /* * Make pinned bos immediately available to @@ -299,7 +299,7 @@ static void ttm_bo_release(struct kref *kref) } kref_init(&bo->kref); - spin_unlock(&bo->bdev->lru_lock); + spin_unlock(&bdev->lru_lock); INIT_WORK(&bo->delayed_delete, ttm_bo_delayed_delete); @@ -359,7 +359,6 @@ static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo, static int ttm_bo_evict(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx) { - struct ttm_device *bdev = bo->bdev; struct ttm_resource *evict_mem; struct ttm_placement placement; struct ttm_place hop; @@ -370,7 +369,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, dma_resv_assert_held(bo->base.resv); placement.num_placement = 0; - bdev->funcs->evict_flags(bo, &placement); + bo->bdev->funcs->evict_flags(bo, &placement); if (!placement.num_placement) { ret = ttm_bo_wait_ctx(bo, ctx); @@ -423,16 +422,16 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { struct ttm_resource *res = bo->resource; - struct ttm_device *bdev = bo->bdev; dma_resv_assert_held(bo->base.resv); - if (bo->resource->mem_type == TTM_PL_SYSTEM) + + if (res->mem_type == TTM_PL_SYSTEM) return true; /* Don't evict this BO if it's outside of the * requested placement range */ - return ttm_resource_intersects(bdev, res, place, bo->base.size); + return ttm_resource_intersects(bo->bdev, res, place, bo->base.size); } EXPORT_SYMBOL(ttm_bo_eviction_valuable); @@ -1108,10 +1107,13 @@ struct ttm_bo_swapout_walk { static s64 ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) { - struct ttm_place place = {.mem_type = bo->resource->mem_type}; + struct ttm_resource *res = bo->resource; + struct ttm_place place = { .mem_type = res->mem_type }; struct ttm_bo_swapout_walk *swapout_walk = container_of(walk, typeof(*swapout_walk), walk); struct ttm_operation_ctx *ctx = walk->arg.ctx; + struct ttm_device *bdev = bo->bdev; + struct ttm_tt *tt = bo->ttm; s64 ret; /* @@ -1120,20 +1122,19 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) * The driver may use the fact that we're moving from SYSTEM * as an indication that we're about to swap out. */ - if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo, &place)) { + if (bo->pin_count || !bdev->funcs->eviction_valuable(bo, &place)) { ret = -EBUSY; goto out; } - if (!bo->ttm || !ttm_tt_is_populated(bo->ttm) || - bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL || - bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED) { + if (!tt || !ttm_tt_is_populated(tt) || + tt->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_SWAPPED)) { ret = -EBUSY; goto out; } if (bo->deleted) { - pgoff_t num_pages = bo->ttm->num_pages; + pgoff_t num_pages = tt->num_pages; ret = ttm_bo_wait_ctx(bo, ctx); if (ret) @@ -1147,7 +1148,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) /* * Move to system cached */ - if (bo->resource->mem_type != TTM_PL_SYSTEM) { + if (res->mem_type != TTM_PL_SYSTEM) { struct ttm_resource *evict_mem; struct ttm_place hop; @@ -1174,21 +1175,21 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) goto out; ttm_bo_unmap_virtual(bo); - if (bo->bdev->funcs->swap_notify) - bo->bdev->funcs->swap_notify(bo); + if (bdev->funcs->swap_notify) + bdev->funcs->swap_notify(bo); - if (ttm_tt_is_populated(bo->ttm)) { - spin_lock(&bo->bdev->lru_lock); - ttm_resource_del_bulk_move(bo->resource, bo); - spin_unlock(&bo->bdev->lru_lock); + if (ttm_tt_is_populated(tt)) { + spin_lock(&bdev->lru_lock); + ttm_resource_del_bulk_move(res, bo); + spin_unlock(&bdev->lru_lock); - ret = ttm_tt_swapout(bo->bdev, bo->ttm, swapout_walk->gfp_flags); + ret = ttm_tt_swapout(bdev, tt, swapout_walk->gfp_flags); - spin_lock(&bo->bdev->lru_lock); + spin_lock(&bdev->lru_lock); if (ret) - ttm_resource_add_bulk_move(bo->resource, bo); - ttm_resource_move_to_lru_tail(bo->resource); - spin_unlock(&bo->bdev->lru_lock); + ttm_resource_add_bulk_move(res, bo); + ttm_resource_move_to_lru_tail(res); + spin_unlock(&bdev->lru_lock); } out: @@ -1261,6 +1262,7 @@ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo) int ttm_bo_populate(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx) { + struct ttm_device *bdev = bo->bdev; struct ttm_tt *tt = bo->ttm; bool swapped; int ret; @@ -1271,16 +1273,16 @@ int ttm_bo_populate(struct ttm_buffer_object *bo, return 0; swapped = ttm_tt_is_swapped(tt); - ret = ttm_tt_populate(bo->bdev, tt, ctx); + ret = ttm_tt_populate(bdev, tt, ctx); if (ret) return ret; if (swapped && !ttm_tt_is_swapped(tt) && !bo->pin_count && bo->resource) { - spin_lock(&bo->bdev->lru_lock); + spin_lock(&bdev->lru_lock); ttm_resource_add_bulk_move(bo->resource, bo); ttm_resource_move_to_lru_tail(bo->resource); - spin_unlock(&bo->bdev->lru_lock); + spin_unlock(&bdev->lru_lock); } return 0; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index c00371894fa1..cabcfeaa70dc 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -174,13 +174,13 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, dst_iter = ttm_kmap_iter_linear_io_init(&_dst_iter.io, bdev, dst_mem); if (PTR_ERR(dst_iter) == -EINVAL && dst_man->use_tt) - dst_iter = ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm); + dst_iter = ttm_kmap_iter_tt_init(&_dst_iter.tt, ttm); if (IS_ERR(dst_iter)) return PTR_ERR(dst_iter); src_iter = ttm_kmap_iter_linear_io_init(&_src_iter.io, bdev, src_mem); if (PTR_ERR(src_iter) == -EINVAL && src_man->use_tt) - src_iter = ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm); + src_iter = ttm_kmap_iter_tt_init(&_src_iter.tt, ttm); if (IS_ERR(src_iter)) { ret = PTR_ERR(src_iter); goto out_src_iter; @@ -318,11 +318,11 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo, { struct ttm_resource *mem = bo->resource; - if (bo->resource->bus.addr) { + if (mem->bus.addr) { map->bo_kmap_type = ttm_bo_map_premapped; - map->virtual = ((u8 *)bo->resource->bus.addr) + offset; + map->virtual = ((u8 *)mem->bus.addr) + offset; } else { - resource_size_t res = bo->resource->bus.offset + offset; + resource_size_t res = mem->bus.offset + offset; map->bo_kmap_type = ttm_bo_map_iomap; if (mem->bus.caching == ttm_write_combined) @@ -346,7 +346,7 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, struct ttm_operation_ctx ctx = { }; struct ttm_tt *ttm = bo->ttm; struct ttm_resource_manager *man = - ttm_manager_type(bo->bdev, bo->resource->mem_type); + ttm_manager_type(bo->bdev, mem->mem_type); pgprot_t prot; int ret; @@ -425,20 +425,21 @@ int ttm_bo_kmap(struct ttm_buffer_object *bo, unsigned long start_page, unsigned long num_pages, struct ttm_bo_kmap_obj *map) { + struct ttm_resource *res = bo->resource; unsigned long offset, size; int ret; map->virtual = NULL; map->bo = bo; - if (num_pages > PFN_UP(bo->resource->size)) + if (num_pages > PFN_UP(res->size)) return -EINVAL; - if ((start_page + num_pages) > PFN_UP(bo->resource->size)) + if ((start_page + num_pages) > PFN_UP(res->size)) return -EINVAL; - ret = ttm_mem_io_reserve(bo->bdev, bo->resource); + ret = ttm_mem_io_reserve(bo->bdev, res); if (ret) return ret; - if (!bo->resource->bus.is_iomem) { + if (!res->bus.is_iomem) { return ttm_bo_kmap_ttm(bo, start_page, num_pages, map); } else { offset = start_page << PAGE_SHIFT; @@ -575,7 +576,7 @@ void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map *map) iounmap(map->vaddr_iomem); iosys_map_clear(map); - ttm_mem_io_free(bo->bdev, bo->resource); + ttm_mem_io_free(bo->bdev, mem); } EXPORT_SYMBOL(ttm_bo_vunmap); @@ -638,12 +639,11 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo, static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo, struct dma_fence *fence) { - struct ttm_device *bdev = bo->bdev; struct ttm_resource_manager *from; struct dma_fence *tmp; int i; - from = ttm_manager_type(bdev, bo->resource->mem_type); + from = ttm_manager_type(bo->bdev, bo->resource->mem_type); /** * BO doesn't have a TTM we need to bind/unbind. Just remember @@ -737,8 +737,8 @@ EXPORT_SYMBOL(ttm_bo_move_accel_cleanup); void ttm_bo_move_sync_cleanup(struct ttm_buffer_object *bo, struct ttm_resource *new_mem) { - struct ttm_device *bdev = bo->bdev; - struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type); + struct ttm_resource_manager *man = + ttm_manager_type(bo->bdev, new_mem->mem_type); int ret; ret = ttm_bo_wait_free_node(bo, man->use_tt); @@ -842,13 +842,12 @@ static int ttm_lru_walk_ticketlock(struct ttm_bo_lru_cursor *curs, struct ttm_buffer_object *bo) { struct ttm_lru_walk_arg *arg = curs->arg; - struct dma_resv *resv = bo->base.resv; int ret; if (arg->ctx->interruptible) - ret = dma_resv_lock_interruptible(resv, arg->ticket); + ret = dma_resv_lock_interruptible(bo->base.resv, arg->ticket); else - ret = dma_resv_lock(resv, arg->ticket); + ret = dma_resv_lock(bo->base.resv, arg->ticket); if (!ret) { curs->needs_unlock = true; @@ -1092,7 +1091,7 @@ long ttm_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, .num_placement = 1, .placement = &sys_placement_flags, }; - struct ttm_tt *tt = bo->ttm; + struct ttm_device *bdev = bo->bdev; long lret; dma_resv_assert_held(bo->base.resv); @@ -1114,19 +1113,19 @@ long ttm_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, return lret; if (bo->bulk_move) { - spin_lock(&bo->bdev->lru_lock); + spin_lock(&bdev->lru_lock); ttm_resource_del_bulk_move(bo->resource, bo); - spin_unlock(&bo->bdev->lru_lock); + spin_unlock(&bdev->lru_lock); } - lret = ttm_tt_backup(bo->bdev, tt, (struct ttm_backup_flags) + lret = ttm_tt_backup(bdev, bo->ttm, (struct ttm_backup_flags) {.purge = flags.purge, .writeback = flags.writeback}); if (lret <= 0 && bo->bulk_move) { - spin_lock(&bo->bdev->lru_lock); + spin_lock(&bdev->lru_lock); ttm_resource_add_bulk_move(bo->resource, bo); - spin_unlock(&bo->bdev->lru_lock); + spin_unlock(&bdev->lru_lock); } if (lret < 0 && lret != -EINTR) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index b47020fca199..772e1193b0c8 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -186,7 +186,6 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, { struct vm_area_struct *vma = vmf->vma; struct ttm_buffer_object *bo = vma->vm_private_data; - struct ttm_device *bdev = bo->bdev; unsigned long page_offset; unsigned long page_last; unsigned long pfn; @@ -205,7 +204,7 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, if (unlikely(ret != 0)) return ret; - err = ttm_mem_io_reserve(bdev, bo->resource); + err = ttm_mem_io_reserve(bo->bdev, bo->resource); if (unlikely(err != 0)) return VM_FAULT_SIGBUS; @@ -293,7 +292,6 @@ vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) { struct vm_area_struct *vma = vmf->vma; struct ttm_buffer_object *bo = vma->vm_private_data; - struct drm_device *ddev = bo->base.dev; vm_fault_t ret = VM_FAULT_NOPAGE; unsigned long address; unsigned long pfn; @@ -305,7 +303,8 @@ vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) return VM_FAULT_OOM; /* Set the page to be freed using drmm release action */ - if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page)) + if (drmm_add_action_or_reset(bo->base.dev, ttm_bo_release_dummy_page, + page)) return VM_FAULT_OOM; pfn = page_to_pfn(page); @@ -322,10 +321,9 @@ EXPORT_SYMBOL(ttm_bo_vm_dummy_page); vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - pgprot_t prot; struct ttm_buffer_object *bo = vma->vm_private_data; - struct drm_device *ddev = bo->base.dev; vm_fault_t ret; + pgprot_t prot; int idx; ret = ttm_bo_vm_reserve(bo, vmf); @@ -333,7 +331,7 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) return ret; prot = vma->vm_page_prot; - if (drm_dev_enter(ddev, &idx)) { + if (drm_dev_enter(bo->base.dev, &idx)) { ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 18b6db015619..217e45958099 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -845,32 +845,34 @@ EXPORT_SYMBOL(ttm_pool_alloc); int ttm_pool_restore_and_alloc(struct ttm_pool *pool, struct ttm_tt *tt, const struct ttm_operation_ctx *ctx) { + struct ttm_pool_tt_restore *restore = tt->restore; struct ttm_pool_alloc_state alloc; if (WARN_ON(!ttm_tt_is_backed_up(tt))) return -EINVAL; - if (!tt->restore) { + if (!restore) { gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; ttm_pool_alloc_state_init(tt, &alloc); if (ctx->gfp_retry_mayfail) gfp |= __GFP_RETRY_MAYFAIL; - tt->restore = kzalloc(sizeof(*tt->restore), gfp); - if (!tt->restore) + restore = kzalloc(sizeof(*restore), gfp); + if (!restore) return -ENOMEM; - tt->restore->snapshot_alloc = alloc; - tt->restore->pool = pool; - tt->restore->restored_pages = 1; - } else { - struct ttm_pool_tt_restore *restore = tt->restore; - int ret; + restore->snapshot_alloc = alloc; + restore->pool = pool; + restore->restored_pages = 1; + tt->restore = restore; + } else { alloc = restore->snapshot_alloc; - if (ttm_pool_restore_valid(tt->restore)) { - ret = ttm_pool_restore_commit(restore, tt->backup, ctx, &alloc); + if (ttm_pool_restore_valid(restore)) { + int ret = ttm_pool_restore_commit(restore, tt->backup, + ctx, &alloc); + if (ret) return ret; } @@ -878,7 +880,7 @@ int ttm_pool_restore_and_alloc(struct ttm_pool *pool, struct ttm_tt *tt, return 0; } - return __ttm_pool_alloc(pool, tt, ctx, &alloc, tt->restore); + return __ttm_pool_alloc(pool, tt, ctx, &alloc, restore); } /** diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index a31683f7f310..192fca24f37e 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -622,11 +622,11 @@ ttm_resource_cursor_check_bulk(struct ttm_resource_cursor *cursor, struct ttm_lru_item *next_lru) { struct ttm_resource *next = ttm_lru_item_to_res(next_lru); - struct ttm_lru_bulk_move *bulk = NULL; - struct ttm_buffer_object *bo = next->bo; + struct ttm_lru_bulk_move *bulk; lockdep_assert_held(&cursor->man->bdev->lru_lock); - bulk = bo->bulk_move; + + bulk = next->bo->bulk_move; if (cursor->bulk != bulk) { if (bulk) { From 2976aeb0de77da599ad37691963efbdcb07435ce Mon Sep 17 00:00:00 2001 From: Alexey Klimov Date: Wed, 3 Dec 2025 07:45:55 +0000 Subject: [PATCH 39/94] gpu/panel-edp: add AUO panel entry for B140HAN06.4 Add an eDP panel entry for AUO B140HAN06.4 that is also used in some variants of Lenovo Flex 5G with Qcom SC8180 SoC. The raw edid of the panel is: 00 ff ff ff ff ff ff 00 06 af 3d 64 00 00 00 00 2b 1d 01 04 a5 1f 11 78 03 b8 1a a6 54 4a 9b 26 0e 52 55 00 00 00 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 14 37 80 b8 70 38 24 40 10 10 3e 00 35 ae 10 00 00 18 10 2c 80 b8 70 38 24 40 10 10 3e 00 35 ae 10 00 00 18 00 00 00 fe 00 41 55 4f 0a 20 20 20 20 20 20 20 20 20 00 00 00 fe 00 42 31 34 30 48 41 4e 30 36 2e 34 20 0a 00 eb I do not have access to the datasheet and but it is tested on above mentioned laptop for a few weeks and seems to work just fine with timing info of similar panels. Cc: Bjorn Andersson Cc: Vinod Koul Signed-off-by: Alexey Klimov Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patch.msgid.link/20251203074555.690613-1-alexey.klimov@linaro.org --- drivers/gpu/drm/panel/panel-edp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 023fbbb10eb4..2c3597037743 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -1904,6 +1904,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x635c, &delay_200_500_e50, "B116XAN06.3"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x639c, &delay_200_500_e50, "B140HAK02.7"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x643d, &delay_200_500_e50, "B140HAN06.4"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x723c, &delay_200_500_e50, "B140XTN07.2"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x73aa, &delay_200_500_e50, "B116XTN02.3"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x8594, &delay_200_500_e50, "B133UAN01.0"), From e58b4dea9054c85688c8f639ebdfc8115261dae2 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 1 Dec 2025 11:50:05 +0100 Subject: [PATCH 40/94] dma-buf/dma-fence: Add dma_fence_test_signaled_flag() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dma_fence framework checks at many places whether the signaled flag of a fence is already set. The code can be simplified and made more readable by providing a helper function for that. Add dma_fence_test_signaled_flag(), which only checks whether a fence is signaled. Use it internally. Suggested-by: Tvrtko Ursulin Reviewed-by: Christian König Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251201105011.19386-3-phasta@kernel.org --- drivers/dma-buf/dma-fence.c | 16 ++++++++-------- include/linux/dma-fence.h | 24 ++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 2bb18af369b9..5e96cb5f1f3b 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -543,7 +543,7 @@ void dma_fence_release(struct kref *kref) trace_dma_fence_destroy(fence); if (!list_empty(&fence->cb_list) && - !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + !dma_fence_test_signaled_flag(fence)) { const char __rcu *timeline; const char __rcu *driver; unsigned long flags; @@ -600,7 +600,7 @@ static bool __dma_fence_enable_signaling(struct dma_fence *fence) was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (dma_fence_test_signaled_flag(fence)) return false; if (!was_set && fence->ops->enable_signaling) { @@ -664,7 +664,7 @@ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, if (WARN_ON(!fence || !func)) return -EINVAL; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + if (dma_fence_test_signaled_flag(fence)) { INIT_LIST_HEAD(&cb->node); return -ENOENT; } @@ -781,7 +781,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) spin_lock_irqsave(fence->lock, flags); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (dma_fence_test_signaled_flag(fence)) goto out; if (intr && signal_pending(current)) { @@ -798,7 +798,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) cb.task = current; list_add(&cb.base.node, &fence->cb_list); - while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) { + while (!dma_fence_test_signaled_flag(fence) && ret > 0) { if (intr) __set_current_state(TASK_INTERRUPTIBLE); else @@ -830,7 +830,7 @@ dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count, for (i = 0; i < count; ++i) { struct dma_fence *fence = fences[i]; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + if (dma_fence_test_signaled_flag(fence)) { if (idx) *idx = i; return true; @@ -1108,7 +1108,7 @@ const char __rcu *dma_fence_driver_name(struct dma_fence *fence) RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "RCU protection is required for safe access to returned string"); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (!dma_fence_test_signaled_flag(fence)) return fence->ops->get_driver_name(fence); else return "detached-driver"; @@ -1140,7 +1140,7 @@ const char __rcu *dma_fence_timeline_name(struct dma_fence *fence) RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "RCU protection is required for safe access to returned string"); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (!dma_fence_test_signaled_flag(fence)) return fence->ops->get_driver_name(fence); else return "signaled-timeline"; diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 64639e104110..19972f5d176f 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -401,6 +401,26 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence); const char __rcu *dma_fence_driver_name(struct dma_fence *fence); const char __rcu *dma_fence_timeline_name(struct dma_fence *fence); +/* + * dma_fence_test_signaled_flag - Only check whether a fence is signaled yet. + * @fence: the fence to check + * + * This function just checks whether @fence is signaled, without interacting + * with the fence in any way. The user must, therefore, ensure through other + * means that fences get signaled eventually. + * + * This function uses test_bit(), which is thread-safe. Naturally, this function + * should be used opportunistically; a fence could get signaled at any moment + * after the check is done. + * + * Return: true if signaled, false otherwise. + */ +static inline bool +dma_fence_test_signaled_flag(struct dma_fence *fence) +{ + return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); +} + /** * dma_fence_is_signaled_locked - Return an indication if the fence * is signaled yet. @@ -418,7 +438,7 @@ const char __rcu *dma_fence_timeline_name(struct dma_fence *fence); static inline bool dma_fence_is_signaled_locked(struct dma_fence *fence) { - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (dma_fence_test_signaled_flag(fence)) return true; if (fence->ops->signaled && fence->ops->signaled(fence)) { @@ -448,7 +468,7 @@ dma_fence_is_signaled_locked(struct dma_fence *fence) static inline bool dma_fence_is_signaled(struct dma_fence *fence) { - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (dma_fence_test_signaled_flag(fence)) return true; if (fence->ops->signaled && fence->ops->signaled(fence)) { From c891b99d25ddbb6b8167f9bdb904d4abc5a53b6b Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 1 Dec 2025 11:50:06 +0100 Subject: [PATCH 41/94] dma-buf/dma-fence: Add dma_fence_check_and_signal() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The overwhelming majority of users of dma_fence signaling functions don't care about whether the fence had already been signaled by someone else. Therefore, the return code shall be removed from those functions. For the few users who rely on the check, a new, specialized function shall be provided. Add dma_fence_check_and_signal(), which signals a fence if it had not yet been signaled, and informs the user about that. Add a counter part, dma_fence_check_and_signal_locked(), which doesn't take the spinlock. Reviewed-by: Christian König Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251201105011.19386-4-phasta@kernel.org --- drivers/dma-buf/dma-fence.c | 44 +++++++++++++++++++++++++++++++++++++ include/linux/dma-fence.h | 2 ++ 2 files changed, 46 insertions(+) diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 5e96cb5f1f3b..8d88e84c8c58 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -443,6 +443,50 @@ int dma_fence_signal_locked(struct dma_fence *fence) } EXPORT_SYMBOL(dma_fence_signal_locked); +/** + * dma_fence_check_and_signal_locked - signal the fence if it's not yet signaled + * @fence: the fence to check and signal + * + * Checks whether a fence was signaled and signals it if it was not yet signaled. + * + * Unlike dma_fence_check_and_signal(), this function must be called with + * &struct dma_fence.lock being held. + * + * Return: true if fence has been signaled already, false otherwise. + */ +bool dma_fence_check_and_signal_locked(struct dma_fence *fence) +{ + bool ret; + + ret = dma_fence_test_signaled_flag(fence); + dma_fence_signal_locked(fence); + + return ret; +} +EXPORT_SYMBOL(dma_fence_check_and_signal_locked); + +/** + * dma_fence_check_and_signal - signal the fence if it's not yet signaled + * @fence: the fence to check and signal + * + * Checks whether a fence was signaled and signals it if it was not yet signaled. + * All this is done in a race-free manner. + * + * Return: true if fence has been signaled already, false otherwise. + */ +bool dma_fence_check_and_signal(struct dma_fence *fence) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(fence->lock, flags); + ret = dma_fence_check_and_signal_locked(fence); + spin_unlock_irqrestore(fence->lock, flags); + + return ret; +} +EXPORT_SYMBOL(dma_fence_check_and_signal); + /** * dma_fence_signal - signal completion of a fence * @fence: the fence to signal diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 19972f5d176f..0504afe52c2a 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -365,6 +365,8 @@ static inline void __dma_fence_might_wait(void) {} #endif int dma_fence_signal(struct dma_fence *fence); +bool dma_fence_check_and_signal(struct dma_fence *fence); +bool dma_fence_check_and_signal_locked(struct dma_fence *fence); int dma_fence_signal_locked(struct dma_fence *fence); int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp); int dma_fence_signal_timestamp_locked(struct dma_fence *fence, From 06bc18e0def7d926265480faa555bf0b67a35a90 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 1 Dec 2025 11:50:07 +0100 Subject: [PATCH 42/94] amd/amdkfd: Use dma_fence_check_and_signal() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdkfd is one of the few users which relies on the return code of dma_fence_signal(), which, so far, informs the caller whether the fence had already been signaled. As there are barely any users, dma_fence signaling functions shall get the return value void. To do so, the few users must be ported to a function which preserves the old behavior. Replace the call to dma_fence_signal() with one to dma_fence_check_and_signal(). Suggested-by: Christian König Reviewed-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251201105011.19386-5-phasta@kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a085faac9fe1..bb252ec43733 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1987,10 +1987,10 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node, return -EINVAL; } -static int signal_eviction_fence(struct kfd_process *p) +static bool signal_eviction_fence(struct kfd_process *p) { struct dma_fence *ef; - int ret; + bool ret; rcu_read_lock(); ef = dma_fence_get_rcu_safe(&p->ef); @@ -1998,7 +1998,7 @@ static int signal_eviction_fence(struct kfd_process *p) if (!ef) return -EINVAL; - ret = dma_fence_signal(ef); + ret = dma_fence_check_and_signal(ef); dma_fence_put(ef); return ret; From dbcd754b845987f7157260c15e80ae19da81c9df Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 1 Dec 2025 11:50:08 +0100 Subject: [PATCH 43/94] drm/xe: Use dma_fence_check_and_signal_locked() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe is one of the few users utilizing the return code of dma_fence_signal() to check whether a fence had already been signaled by someone else. To clean up and simplify the dma_fence API, the few kernel users relying on that behavior shall be ported to an alternative function. Replace dma_fence_signal_locked() with dma_fence_check_and_signal_locked(). Acked-by: Christian König Acked-by: Rodrigo Vivi Reviewed-by: Matthew Brost Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251201105011.19386-6-phasta@kernel.org --- drivers/gpu/drm/xe/xe_hw_fence.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index b2a0c46dfcd4..f6057456e460 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -85,7 +85,6 @@ void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) { struct xe_hw_fence *fence, *next; unsigned long flags; - int err; bool tmp; if (XE_WARN_ON(!list_empty(&irq->pending))) { @@ -93,9 +92,8 @@ void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) spin_lock_irqsave(&irq->lock, flags); list_for_each_entry_safe(fence, next, &irq->pending, irq_link) { list_del_init(&fence->irq_link); - err = dma_fence_signal_locked(&fence->dma); + XE_WARN_ON(dma_fence_check_and_signal_locked(&fence->dma)); dma_fence_put(&fence->dma); - XE_WARN_ON(err); } spin_unlock_irqrestore(&irq->lock, flags); dma_fence_end_signalling(tmp); From 583d1fa19148c261868fa5c2382fe552c06a2164 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 1 Dec 2025 11:50:09 +0100 Subject: [PATCH 44/94] dma-buf: Don't misuse dma_fence_signal() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The return code of dma_fence_signal() is not really useful as there is nothing reasonable to do if a fence was already signaled. That return code shall be removed from the kernel. Moreover, dma_fence_signal() should not be used to check whether fences are signaled. That's what dma_fence_is_signaled() and dma_fence_test_signaled_flag() exist for. Replace the non-canonical usage of dma_fence_signal(). Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251201105011.19386-7-phasta@kernel.org --- drivers/dma-buf/st-dma-fence.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c index 27a36045410b..4dbe39c58bfb 100644 --- a/drivers/dma-buf/st-dma-fence.c +++ b/drivers/dma-buf/st-dma-fence.c @@ -126,7 +126,7 @@ static int test_signaling(void *arg) goto err_free; } - if (dma_fence_signal(f)) { + if (dma_fence_check_and_signal(f)) { pr_err("Fence reported being already signaled\n"); goto err_free; } @@ -136,7 +136,7 @@ static int test_signaling(void *arg) goto err_free; } - if (!dma_fence_signal(f)) { + if (!dma_fence_test_signaled_flag(f)) { pr_err("Fence reported not being already signaled\n"); goto err_free; } From c6c60a2290b335eb5b45c6c7eeb254f18027b3ec Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 1 Dec 2025 11:50:10 +0100 Subject: [PATCH 45/94] drm/ttm: Use dma_fence_check_and_signal() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The return code of dma_fence_signal() is not useful and shall be removed from the kernel. To do so, the few users who rely on the return code must be ported. Use dma_fence_check_and_signal() and mapp its boolean return code to dma_fence_signal()'s former value for already-signaled fences. Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251201105011.19386-8-phasta@kernel.org --- drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c index 2eda87882e65..6d95447a989d 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c @@ -692,7 +692,7 @@ static int threaded_fence_signal(void *arg) msleep(20); - return dma_fence_signal(fence); + return dma_fence_check_and_signal(fence) ? -EINVAL : 0; } static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test) From 88e721ab978a86426aa08da520de77430fa7bb84 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 1 Dec 2025 11:50:11 +0100 Subject: [PATCH 46/94] dma-buf/dma-fence: Remove return code of signaling-functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All functions used for signaling a fence return an error code whose sole purpose is to tell whether a fence was already signaled. This is racy and has been used by almost no party in the kernel, and the few users have been removed in preceding cleanup commits. Turn all signaling-functions into void-functions. Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251201105011.19386-9-phasta@kernel.org --- drivers/dma-buf/dma-fence.c | 40 ++++++++++--------------------------- include/linux/dma-fence.h | 9 ++++----- 2 files changed, 14 insertions(+), 35 deletions(-) diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 8d88e84c8c58..c82aa8ae1454 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -358,11 +358,8 @@ void __dma_fence_might_wait(void) * * Unlike dma_fence_signal_timestamp(), this function must be called with * &dma_fence.lock held. - * - * Returns 0 on success and a negative error value when @fence has been - * signalled already. */ -int dma_fence_signal_timestamp_locked(struct dma_fence *fence, +void dma_fence_signal_timestamp_locked(struct dma_fence *fence, ktime_t timestamp) { struct dma_fence_cb *cur, *tmp; @@ -372,7 +369,7 @@ int dma_fence_signal_timestamp_locked(struct dma_fence *fence, if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) - return -EINVAL; + return; /* Stash the cb_list before replacing it with the timestamp */ list_replace(&fence->cb_list, &cb_list); @@ -385,8 +382,6 @@ int dma_fence_signal_timestamp_locked(struct dma_fence *fence, INIT_LIST_HEAD(&cur->node); cur->func(fence, cur); } - - return 0; } EXPORT_SYMBOL(dma_fence_signal_timestamp_locked); @@ -401,23 +396,17 @@ EXPORT_SYMBOL(dma_fence_signal_timestamp_locked); * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. Set the timestamp provided as the fence * signal timestamp. - * - * Returns 0 on success and a negative error value when @fence has been - * signalled already. */ -int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) +void dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) { unsigned long flags; - int ret; if (WARN_ON(!fence)) - return -EINVAL; + return; spin_lock_irqsave(fence->lock, flags); - ret = dma_fence_signal_timestamp_locked(fence, timestamp); + dma_fence_signal_timestamp_locked(fence, timestamp); spin_unlock_irqrestore(fence->lock, flags); - - return ret; } EXPORT_SYMBOL(dma_fence_signal_timestamp); @@ -433,13 +422,10 @@ EXPORT_SYMBOL(dma_fence_signal_timestamp); * * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock * held. - * - * Returns 0 on success and a negative error value when @fence has been - * signalled already. */ -int dma_fence_signal_locked(struct dma_fence *fence) +void dma_fence_signal_locked(struct dma_fence *fence) { - return dma_fence_signal_timestamp_locked(fence, ktime_get()); + dma_fence_signal_timestamp_locked(fence, ktime_get()); } EXPORT_SYMBOL(dma_fence_signal_locked); @@ -496,28 +482,22 @@ EXPORT_SYMBOL(dma_fence_check_and_signal); * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. - * - * Returns 0 on success and a negative error value when @fence has been - * signalled already. */ -int dma_fence_signal(struct dma_fence *fence) +void dma_fence_signal(struct dma_fence *fence) { unsigned long flags; - int ret; bool tmp; if (WARN_ON(!fence)) - return -EINVAL; + return; tmp = dma_fence_begin_signalling(); spin_lock_irqsave(fence->lock, flags); - ret = dma_fence_signal_timestamp_locked(fence, ktime_get()); + dma_fence_signal_timestamp_locked(fence, ktime_get()); spin_unlock_irqrestore(fence->lock, flags); dma_fence_end_signalling(tmp); - - return ret; } EXPORT_SYMBOL(dma_fence_signal); diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 0504afe52c2a..d4c92fd35092 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -364,13 +364,12 @@ static inline void dma_fence_end_signalling(bool cookie) {} static inline void __dma_fence_might_wait(void) {} #endif -int dma_fence_signal(struct dma_fence *fence); +void dma_fence_signal(struct dma_fence *fence); bool dma_fence_check_and_signal(struct dma_fence *fence); bool dma_fence_check_and_signal_locked(struct dma_fence *fence); -int dma_fence_signal_locked(struct dma_fence *fence); -int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp); -int dma_fence_signal_timestamp_locked(struct dma_fence *fence, - ktime_t timestamp); +void dma_fence_signal_locked(struct dma_fence *fence); +void dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp); +void dma_fence_signal_timestamp_locked(struct dma_fence *fence, ktime_t timestamp); signed long dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout); int dma_fence_add_callback(struct dma_fence *fence, From d8f94cb02af3cc64013373c94a4b7780782ea59c Mon Sep 17 00:00:00 2001 From: Ashley Smith Date: Thu, 27 Nov 2025 11:44:02 +0000 Subject: [PATCH 47/94] drm/panthor: Enable timestamp propagation Set the GLB_COUNTER_EN bit to enable coherent propagation of GPU timestamp values to shader cores. This is a prerequisite for exposing device-coherent timestamp queries. Bump the version to 1.6 so userspace can detect support. v2: - GLB_COUNTER_EN is not a toggle bit move to panthor_fw_update_reqs Signed-off-by: Ashley Smith Reviewed-by: Steven Price Link: https://patch.msgid.link/20251127115019.2113040-1-ashley.smith@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_drv.c | 3 ++- drivers/gpu/drm/panthor/panthor_fw.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index d1d4c50da5bf..0b0ec3b978c6 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1604,6 +1604,7 @@ static void panthor_debugfs_init(struct drm_minor *minor) * - 1.3 - adds DRM_PANTHOR_GROUP_STATE_INNOCENT flag * - 1.4 - adds DRM_IOCTL_PANTHOR_BO_SET_LABEL ioctl * - 1.5 - adds DRM_PANTHOR_SET_USER_MMIO_OFFSET ioctl + * - 1.6 - enables GLB_COUNTER_EN */ static const struct drm_driver panthor_drm_driver = { .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ | @@ -1617,7 +1618,7 @@ static const struct drm_driver panthor_drm_driver = { .name = "panthor", .desc = "Panthor DRM driver", .major = 1, - .minor = 5, + .minor = 6, .gem_create_object = panthor_gem_create_object, .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index 94a3cd6dfa6d..4beaa589ba66 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -1044,7 +1044,8 @@ static void panthor_fw_init_global_iface(struct panthor_device *ptdev) if (panthor_fw_has_glb_state(ptdev)) glb_iface->input->ack_irq_mask |= GLB_STATE_MASK; - panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN); + panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN | GLB_COUNTER_EN, + GLB_IDLE_EN | GLB_COUNTER_EN); panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_CFG_ALLOC_EN | GLB_CFG_POWEROFF_TIMER | From 9d2d49027c3a9628989c9ec70ebef9d241f49c1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Fri, 5 Dec 2025 19:22:22 +0100 Subject: [PATCH 48/94] drm/shmem-helper: Simplify page offset calculation in fault handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a fault at address addr, the page offset is page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT = ((addr & PAGE_MASK) - vma->vm_start) >> PAGE_SHIFT = (addr - vma->vm_start) >> PAGE_SHIFT Since the faulty logical page offset based on VMA is vmf->pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) We can slightly simplify the calculation using page_offset = vmf->pgoff - vma->vm_pgoff v11: - remove misleading comment - add Boris R-b Signed-off-by: Loïc Molinari Reviewed-by: Boris Brezillon Link: https://patch.msgid.link/20251205182231.194072-2-loic.molinari@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/drm_gem_shmem_helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index dc94a27710e5..e47ab3ebb5c2 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -577,8 +577,8 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) struct page *page; pgoff_t page_offset; - /* We don't use vmf->pgoff since that has the fake offset */ - page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; + /* Offset to faulty address in the VMA. */ + page_offset = vmf->pgoff - vma->vm_pgoff; dma_resv_lock(shmem->base.resv, NULL); From 211b9a39f2619b9c0d85bcd48aeb399397910b42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Fri, 5 Dec 2025 19:22:23 +0100 Subject: [PATCH 49/94] drm/shmem-helper: Map huge pages in fault handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Attempt a PMD sized PFN insertion into the VMA if the faulty address of the fault handler is part of a huge page. On builds with CONFIG_TRANSPARENT_HUGEPAGE enabled, if the mmap() user address is PMD size aligned, if the GEM object is backed by shmem buffers on mountpoints setting the 'huge=' option and if the shmem backing store manages to allocate a huge folio, CPU mapping would then benefit from significantly increased memcpy() performance. When these conditions are met on a system with 2 MiB huge pages, an aligned copy of 2 MiB would raise a single page fault instead of 4096. v4: - implement map_pages instead of huge_fault v6: - get rid of map_pages handler for now (keep it for another series along with arm64 contpte support) v11: - remove page fault validity check helper - rename drm_gem_shmem_map_pmd() to drm_gem_shmem_try_map_pmd() - add Boris R-b v12: - move up ret var decl in fault handler to minimize diff Signed-off-by: Loïc Molinari Reviewed-by: Boris Brezillon Link: https://patch.msgid.link/20251205182231.194072-3-loic.molinari@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/drm_gem_shmem_helper.c | 37 ++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index e47ab3ebb5c2..e67216cbb469 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -567,6 +567,26 @@ int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, } EXPORT_SYMBOL_GPL(drm_gem_shmem_dumb_create); +static bool drm_gem_shmem_try_map_pmd(struct vm_fault *vmf, unsigned long addr, + struct page *page) +{ +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP + unsigned long pfn = page_to_pfn(page); + unsigned long paddr = pfn << PAGE_SHIFT; + bool aligned = (addr & ~PMD_MASK) == (paddr & ~PMD_MASK); + + if (aligned && + pmd_none(*vmf->pmd) && + folio_test_pmd_mappable(page_folio(page))) { + pfn &= PMD_MASK >> PAGE_SHIFT; + if (vmf_insert_pfn_pmd(vmf, pfn, false) == VM_FAULT_NOPAGE) + return true; + } +#endif + + return false; +} + static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -574,8 +594,9 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); loff_t num_pages = obj->size >> PAGE_SHIFT; vm_fault_t ret; - struct page *page; + struct page **pages = shmem->pages; pgoff_t page_offset; + unsigned long pfn; /* Offset to faulty address in the VMA. */ page_offset = vmf->pgoff - vma->vm_pgoff; @@ -586,12 +607,18 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) drm_WARN_ON_ONCE(obj->dev, !shmem->pages) || shmem->madv < 0) { ret = VM_FAULT_SIGBUS; - } else { - page = shmem->pages[page_offset]; - - ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page)); + goto out; } + if (drm_gem_shmem_try_map_pmd(vmf, vmf->address, pages[page_offset])) { + ret = VM_FAULT_NOPAGE; + goto out; + } + + pfn = page_to_pfn(pages[page_offset]); + ret = vmf_insert_pfn(vma, vmf->address, pfn); + + out: dma_resv_unlock(shmem->base.resv); return ret; From 99bda20d6d4cac30ed6d357658d8bc328c3b27d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Fri, 5 Dec 2025 19:22:24 +0100 Subject: [PATCH 50/94] drm/gem: Introduce drm_gem_get_unmapped_area() fop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mmap() calls on the DRM file pointer currently always end up using mm_get_unmapped_area() to get a free mapping region. On builds with CONFIG_TRANSPARENT_HUGEPAGE enabled, this isn't ideal for GEM objects backed by shmem buffers on mountpoints setting the 'huge=' option because it can't correctly figure out the potentially huge address alignment required. This commit introduces the drm_gem_get_unmapped_area() function which is meant to be used as a get_unmapped_area file operation on the DRM file pointer to lookup GEM objects based on their fake offsets and get a properly aligned region by calling shmem_get_unmapped_area() with the right file pointer. If a GEM object isn't available at the given offset or if the caller isn't granted access to it, the function falls back to mm_get_unmapped_area(). This also makes drm_gem_get_unmapped_area() part of the default GEM file operations so that all the DRM drivers can benefit from more efficient mappings thanks to the huge page fault handler introduced in previous commit 'drm/shmem-helper: Add huge page fault handler'. The shmem_get_unmapped_area() function needs to be exported so that it can be used from the DRM subsystem. v3: - include in drm_gem.c - forward to shmem layer in builds with CONFIG_TRANSPARENT_HUGEPAGE=n v6: - use GPL variant to export drm_gem_get_unmapped_area() - don't export shmem_get_unmapped_area() anymore (use f_op instead) v11: - rename drm_gem_object_lookup_from_offset() to drm_gem_object_lookup_at_offset() - add Boris R-b Signed-off-by: Loïc Molinari Reviewed-by: Boris Brezillon Link: https://patch.msgid.link/20251205182231.194072-4-loic.molinari@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/drm_gem.c | 108 ++++++++++++++++++++++++++++++-------- include/drm/drm_gem.h | 4 ++ 2 files changed, 90 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index efc79bbf3c73..933fc89dd648 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -1177,36 +1178,27 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, } EXPORT_SYMBOL(drm_gem_mmap_obj); -/** - * drm_gem_mmap - memory map routine for GEM objects - * @filp: DRM file pointer - * @vma: VMA for the area to be mapped - * - * If a driver supports GEM object mapping, mmap calls on the DRM file - * descriptor will end up here. - * - * Look up the GEM object based on the offset passed in (vma->vm_pgoff will - * contain the fake offset we created when the GTT map ioctl was called on - * the object) and map it with a call to drm_gem_mmap_obj(). - * - * If the caller is not granted access to the buffer object, the mmap will fail - * with EACCES. Please see the vma manager for more information. +/* + * Look up a GEM object in offset space based on the exact start address. The + * caller must be granted access to the object. Returns a GEM object on success + * or a negative error code on failure. The returned GEM object needs to be + * released with drm_gem_object_put(). */ -int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) +static struct drm_gem_object * +drm_gem_object_lookup_at_offset(struct file *filp, unsigned long start, + unsigned long pages) { struct drm_file *priv = filp->private_data; struct drm_device *dev = priv->minor->dev; struct drm_gem_object *obj = NULL; struct drm_vma_offset_node *node; - int ret; if (drm_dev_is_unplugged(dev)) - return -ENODEV; + return ERR_PTR(-ENODEV); drm_vma_offset_lock_lookup(dev->vma_offset_manager); node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, - vma->vm_pgoff, - vma_pages(vma)); + start, pages); if (likely(node)) { obj = container_of(node, struct drm_gem_object, vma_node); /* @@ -1225,14 +1217,86 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) drm_vma_offset_unlock_lookup(dev->vma_offset_manager); if (!obj) - return -EINVAL; + return ERR_PTR(-EINVAL); if (!drm_vma_node_is_allowed(node, priv)) { drm_gem_object_put(obj); - return -EACCES; + return ERR_PTR(-EACCES); } - ret = drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT, + return obj; +} + +/** + * drm_gem_get_unmapped_area - get memory mapping region routine for GEM objects + * @filp: DRM file pointer + * @uaddr: User address hint + * @len: Mapping length + * @pgoff: Offset (in pages) + * @flags: Mapping flags + * + * If a driver supports GEM object mapping, before ending up in drm_gem_mmap(), + * mmap calls on the DRM file descriptor will first try to find a free linear + * address space large enough for a mapping. Since GEM objects are backed by + * shmem buffers, this should preferably be handled by the shmem virtual memory + * filesystem which can appropriately align addresses to huge page sizes when + * needed. + * + * Look up the GEM object based on the offset passed in (vma->vm_pgoff will + * contain the fake offset we created) and call shmem_get_unmapped_area() with + * the right file pointer. + * + * If a GEM object is not available at the given offset or if the caller is not + * granted access to it, fall back to mm_get_unmapped_area(). + */ +unsigned long drm_gem_get_unmapped_area(struct file *filp, unsigned long uaddr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct drm_gem_object *obj; + unsigned long ret; + + obj = drm_gem_object_lookup_at_offset(filp, pgoff, len >> PAGE_SHIFT); + if (IS_ERR(obj) || !obj->filp || !obj->filp->f_op->get_unmapped_area) + return mm_get_unmapped_area(current->mm, filp, uaddr, len, 0, + flags); + + ret = obj->filp->f_op->get_unmapped_area(obj->filp, uaddr, len, 0, + flags); + + drm_gem_object_put(obj); + + return ret; +} +EXPORT_SYMBOL_GPL(drm_gem_get_unmapped_area); + +/** + * drm_gem_mmap - memory map routine for GEM objects + * @filp: DRM file pointer + * @vma: VMA for the area to be mapped + * + * If a driver supports GEM object mapping, mmap calls on the DRM file + * descriptor will end up here. + * + * Look up the GEM object based on the offset passed in (vma->vm_pgoff will + * contain the fake offset we created) and map it with a call to + * drm_gem_mmap_obj(). + * + * If the caller is not granted access to the buffer object, the mmap will fail + * with EACCES. Please see the vma manager for more information. + */ +int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_gem_object *obj; + int ret; + + obj = drm_gem_object_lookup_at_offset(filp, vma->vm_pgoff, + vma_pages(vma)); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + ret = drm_gem_mmap_obj(obj, + drm_vma_node_size(&obj->vma_node) << PAGE_SHIFT, vma); drm_gem_object_put(obj); diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 8d48d2af2649..7c8bd67d087c 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -469,6 +469,7 @@ struct drm_gem_object { .poll = drm_poll,\ .read = drm_read,\ .llseek = noop_llseek,\ + .get_unmapped_area = drm_gem_get_unmapped_area,\ .mmap = drm_gem_mmap, \ .fop_flags = FOP_UNSIGNED_OFFSET @@ -506,6 +507,9 @@ void drm_gem_vm_close(struct vm_area_struct *vma); int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, struct vm_area_struct *vma); int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); +unsigned long drm_gem_get_unmapped_area(struct file *filp, unsigned long uaddr, + unsigned long len, unsigned long pgoff, + unsigned long flags); /** * drm_gem_object_get - acquire a GEM buffer object reference From 6e0b1b82017b9ba16b87685e1e4902cd9dc762d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Fri, 5 Dec 2025 19:22:25 +0100 Subject: [PATCH 51/94] drm/gem: Add huge tmpfs mountpoint helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the drm_gem_huge_mnt_create() and drm_gem_get_huge_mnt() helpers to avoid code duplication in the i915, V3D, Panfrost and Panthor drivers. The former creates and mounts a dedicated huge tmpfs mountpoint, for the lifetime of a DRM device, used at GEM object initialization. The latter retrieves the dedicated huge tmpfs mountpoint used by a DRM device. The next commits will port drivers to these helpers. v3: - store huge tmpfs mountpoint in drm_device v4: - return 0 in builds with CONFIG_TRANSPARENT_HUGEPAGE=n - return 0 when huge_mnt already exists - use new vfs_parse_fs_string() helper v5: - remove warning on !dev->huge_mnt and reset to NULL on free - inline drm_gem_huge_mnt_create() to remove func from text and avoid calls in builds with CONFIG_TRANSPARENT_HUGEPAGE=n - compile out drm_device's huge_mnt field in builds with CONFIG_TRANSPARENT_HUGEPAGE=n - add drm_gem_has_huge_mnt() helper v6: - move huge_mnt doc into ifdef'd section - either inline or export drm_gem_huge_mnt_create() v7: - include in drm_gem.h v9: - replace drm_gem_has_huge_mnt() by drm_gem_get_huge_mnt() v11: - doc fixes - add Boris and Maíra R-bs Signed-off-by: Loïc Molinari Reviewed-by: Boris Brezillon Reviewed-by: Maíra Canal Link: https://patch.msgid.link/20251205182231.194072-5-loic.molinari@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/drm_gem.c | 57 +++++++++++++++++++++++++++++++++++++++ include/drm/drm_device.h | 15 +++++++++++ include/drm/drm_gem.h | 33 +++++++++++++++++++++++ 3 files changed, 105 insertions(+) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 933fc89dd648..32dddb23e211 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -29,6 +29,9 @@ #include #include #include +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#include +#endif #include #include #include @@ -82,6 +85,60 @@ * up at a later date, and as our interface with shmfs for memory allocation. */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void drm_gem_huge_mnt_free(struct drm_device *dev, void *data) +{ + kern_unmount(dev->huge_mnt); +} + +/** + * drm_gem_huge_mnt_create - Create, mount and use a huge tmpfs mountpoint + * @dev: DRM device that will use the huge tmpfs mountpoint + * @value: huge tmpfs mount option value + * + * This function creates and mounts a dedicated huge tmpfs mountpoint for the + * lifetime of the DRM device @dev which is used at GEM object initialization + * with drm_gem_object_init(). + * + * The most common option for @value is "within_size" which only allocates huge + * pages if the page will be fully within the GEM object size. "always", + * "advise" and "never" are supported too but the latter would just create a + * mountpoint similar to the default one (`shm_mnt`). See shmemfs and + * Transparent Hugepage for more information. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +int drm_gem_huge_mnt_create(struct drm_device *dev, const char *value) +{ + struct file_system_type *type; + struct fs_context *fc; + int ret; + + if (unlikely(drm_gem_get_huge_mnt(dev))) + return 0; + + type = get_fs_type("tmpfs"); + if (unlikely(!type)) + return -EOPNOTSUPP; + fc = fs_context_for_mount(type, SB_KERNMOUNT); + if (IS_ERR(fc)) + return PTR_ERR(fc); + ret = vfs_parse_fs_string(fc, "source", "tmpfs"); + if (unlikely(ret)) + return -ENOPARAM; + ret = vfs_parse_fs_string(fc, "huge", value); + if (unlikely(ret)) + return -ENOPARAM; + + dev->huge_mnt = fc_mount_longterm(fc); + put_fs_context(fc); + + return drmm_add_action_or_reset(dev, drm_gem_huge_mnt_free, NULL); +} +EXPORT_SYMBOL_GPL(drm_gem_huge_mnt_create); +#endif + static void drm_gem_init_release(struct drm_device *dev, void *ptr) { diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 5af49c5c3778..bc78fb77cc27 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -3,6 +3,9 @@ #include #include +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#include +#endif #include #include #include @@ -168,6 +171,18 @@ struct drm_device { */ struct drm_master *master; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + /** + * @huge_mnt: + * + * Huge tmpfs mountpoint used at GEM object initialization + * drm_gem_object_init(). Drivers can call drm_gem_huge_mnt_create() to + * create, mount and use it. The default tmpfs mountpoint (`shm_mnt`) is + * used if NULL. + */ + struct vfsmount *huge_mnt; +#endif + /** * @driver_features: per-device driver features * diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 7c8bd67d087c..97b5fca8966d 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -40,6 +40,9 @@ #include #include +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#include +#endif #include struct iosys_map; @@ -492,6 +495,36 @@ struct drm_gem_object { DRM_GEM_FOPS,\ } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +int drm_gem_huge_mnt_create(struct drm_device *dev, const char *value); +#else +static inline int drm_gem_huge_mnt_create(struct drm_device *dev, + const char *value) +{ + return 0; +} +#endif + +/** + * drm_gem_get_huge_mnt - Get the huge tmpfs mountpoint used by a DRM device + * @dev: DRM device + + * This function gets the huge tmpfs mountpoint used by DRM device @dev. A huge + * tmpfs mountpoint is used instead of `shm_mnt` after a successful call to + * drm_gem_huge_mnt_create() when CONFIG_TRANSPARENT_HUGEPAGE is enabled. + + * Returns: + * The huge tmpfs mountpoint in use, NULL otherwise. + */ +static inline struct vfsmount *drm_gem_get_huge_mnt(struct drm_device *dev) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + return dev->huge_mnt; +#else + return NULL; +#endif +} + void drm_gem_object_release(struct drm_gem_object *obj); void drm_gem_object_free(struct kref *kref); int drm_gem_object_init(struct drm_device *dev, From a8a9a590221c1959716277d4b13fe658816afc0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Fri, 5 Dec 2025 19:22:26 +0100 Subject: [PATCH 52/94] drm/i915: Use huge tmpfs mountpoint helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make use of the new drm_gem_huge_mnt_create() and drm_gem_get_huge_mnt() helpers to avoid code duplication. Now that it's just a few lines long, the single function in i915_gemfs.c is moved into i915_gem_shmem.c. v3: - use huge tmpfs mountpoint in drm_device - move i915_gemfs.c into i915_gem_shmem.c v4: - clean up mountpoint creation error handling v5: - use drm_gem_has_huge_mnt() helper v7: - include in i915_gem_shmem.c v8: - keep logging notice message with CONFIG_TRANSPARENT_HUGEPAGE=n - don't access huge_mnt field with CONFIG_TRANSPARENT_HUGEPAGE=n v9: - replace drm_gem_has_huge_mnt() by drm_gem_get_huge_mnt() - remove useless ternary op test in selftests/huge_pages.c v12: - fix layering violation in selftests (Tvrtko) - fix incorrect filename in commit message v13: - add Tvrtko A-b Signed-off-by: Loïc Molinari Acked-by: Tvrtko Ursulin Link: https://patch.msgid.link/20251205182231.194072-6-loic.molinari@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/i915/Makefile | 3 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 9 +-- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 58 ++++++++++----- drivers/gpu/drm/i915/gem/i915_gemfs.c | 71 ------------------- drivers/gpu/drm/i915/gem/i915_gemfs.h | 14 ---- .../gpu/drm/i915/gem/selftests/huge_pages.c | 15 +--- drivers/gpu/drm/i915/i915_drv.h | 5 -- 7 files changed, 50 insertions(+), 125 deletions(-) delete mode 100644 drivers/gpu/drm/i915/gem/i915_gemfs.c delete mode 100644 drivers/gpu/drm/i915/gem/i915_gemfs.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 84ec79b64960..b5a8c0a6b747 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -169,8 +169,7 @@ gem-y += \ gem/i915_gem_ttm_move.o \ gem/i915_gem_ttm_pm.o \ gem/i915_gem_userptr.o \ - gem/i915_gem_wait.o \ - gem/i915_gemfs.o + gem/i915_gem_wait.o i915-y += \ $(gem-y) \ i915_active.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 64600aa8227f..f94409e8ec4c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -348,12 +348,13 @@ struct drm_i915_gem_object { */ #define I915_BO_ALLOC_GPU_ONLY BIT(6) #define I915_BO_ALLOC_CCS_AUX BIT(7) +#define I915_BO_ALLOC_NOTHP BIT(8) /* * Object is allowed to retain its initial data and will not be cleared on first * access if used along with I915_BO_ALLOC_USER. This is mainly to keep * preallocated framebuffer data intact while transitioning it to i915drmfb. */ -#define I915_BO_PREALLOC BIT(8) +#define I915_BO_PREALLOC BIT(9) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ I915_BO_ALLOC_CPU_CLEAR | \ @@ -363,9 +364,9 @@ struct drm_i915_gem_object { I915_BO_ALLOC_GPU_ONLY | \ I915_BO_ALLOC_CCS_AUX | \ I915_BO_PREALLOC) -#define I915_BO_READONLY BIT(9) -#define I915_TILING_QUIRK_BIT 10 /* unknown swizzling; do not release! */ -#define I915_BO_PROTECTED BIT(11) +#define I915_BO_READONLY BIT(10) +#define I915_TILING_QUIRK_BIT 11 /* unknown swizzling; do not release! */ +#define I915_BO_PROTECTED BIT(12) /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 26dda55a07ff..6ad1d6f99363 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -9,14 +9,16 @@ #include #include +#include +#include #include "gem/i915_gem_region.h" #include "i915_drv.h" #include "i915_gem_object.h" #include "i915_gem_tiling.h" -#include "i915_gemfs.h" #include "i915_scatterlist.h" #include "i915_trace.h" +#include "i915_utils.h" /* * Move folios to appropriate lru and release the batch, decrementing the @@ -494,9 +496,11 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { static int __create_shmem(struct drm_i915_private *i915, struct drm_gem_object *obj, - resource_size_t size) + resource_size_t size, + unsigned int flags) { - unsigned long flags = VM_NORESERVE; + unsigned long shmem_flags = VM_NORESERVE; + struct vfsmount *huge_mnt; struct file *filp; drm_gem_private_object_init(&i915->drm, obj, size); @@ -515,11 +519,12 @@ static int __create_shmem(struct drm_i915_private *i915, if (BITS_PER_LONG == 64 && size > MAX_LFS_FILESIZE) return -E2BIG; - if (i915->mm.gemfs) - filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, - flags); + huge_mnt = drm_gem_get_huge_mnt(&i915->drm); + if (!(flags & I915_BO_ALLOC_NOTHP) && huge_mnt) + filp = shmem_file_setup_with_mnt(huge_mnt, "i915", size, + shmem_flags); else - filp = shmem_file_setup("i915", size, flags); + filp = shmem_file_setup("i915", size, shmem_flags); if (IS_ERR(filp)) return PTR_ERR(filp); @@ -548,7 +553,7 @@ static int shmem_object_init(struct intel_memory_region *mem, gfp_t mask; int ret; - ret = __create_shmem(i915, &obj->base, size); + ret = __create_shmem(i915, &obj->base, size, flags); if (ret) return ret; @@ -644,21 +649,40 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, static int init_shmem(struct intel_memory_region *mem) { - i915_gemfs_init(mem->i915); + struct drm_i915_private *i915 = mem->i915; + + /* + * By creating our own shmemfs mountpoint, we can pass in + * mount flags that better match our usecase. + * + * One example, although it is probably better with a per-file + * control, is selecting huge page allocations ("huge=within_size"). + * However, we only do so on platforms which benefit from it, or to + * offset the overhead of iommu lookups, where with latter it is a net + * win even on platforms which would otherwise see some performance + * regressions such a slow reads issue on Broadwell and Skylake. + */ + + if (GRAPHICS_VER(i915) < 11 && !i915_vtd_active(i915)) + goto no_thp; + + drm_gem_huge_mnt_create(&i915->drm, "within_size"); + if (drm_gem_get_huge_mnt(&i915->drm)) + drm_info(&i915->drm, "Using Transparent Hugepages\n"); + else + drm_notice(&i915->drm, + "Transparent Hugepage support is recommended for optimal performance%s\n", + GRAPHICS_VER(i915) >= 11 ? " on this platform!" : + " when IOMMU is enabled!"); + + no_thp: intel_memory_region_set_name(mem, "system"); - return 0; /* We have fallback to the kernel mnt if gemfs init failed. */ -} - -static int release_shmem(struct intel_memory_region *mem) -{ - i915_gemfs_fini(mem->i915); - return 0; + return 0; /* We have fallback to the kernel mnt if huge mnt failed. */ } static const struct intel_memory_region_ops shmem_region_ops = { .init = init_shmem, - .release = release_shmem, .init_object = shmem_object_init, }; diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c deleted file mode 100644 index 1f1290214031..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2017 Intel Corporation - */ - -#include -#include -#include - -#include - -#include "i915_drv.h" -#include "i915_gemfs.h" -#include "i915_utils.h" - -void i915_gemfs_init(struct drm_i915_private *i915) -{ - struct file_system_type *type; - struct fs_context *fc; - struct vfsmount *gemfs; - int ret; - - /* - * By creating our own shmemfs mountpoint, we can pass in - * mount flags that better match our usecase. - * - * One example, although it is probably better with a per-file - * control, is selecting huge page allocations ("huge=within_size"). - * However, we only do so on platforms which benefit from it, or to - * offset the overhead of iommu lookups, where with latter it is a net - * win even on platforms which would otherwise see some performance - * regressions such a slow reads issue on Broadwell and Skylake. - */ - - if (GRAPHICS_VER(i915) < 11 && !i915_vtd_active(i915)) - return; - - if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) - goto err; - - type = get_fs_type("tmpfs"); - if (!type) - goto err; - - fc = fs_context_for_mount(type, SB_KERNMOUNT); - if (IS_ERR(fc)) - goto err; - ret = vfs_parse_fs_string(fc, "source", "tmpfs"); - if (!ret) - ret = vfs_parse_fs_string(fc, "huge", "within_size"); - if (!ret) - gemfs = fc_mount_longterm(fc); - put_fs_context(fc); - if (ret) - goto err; - - i915->mm.gemfs = gemfs; - drm_info(&i915->drm, "Using Transparent Hugepages\n"); - return; - -err: - drm_notice(&i915->drm, - "Transparent Hugepage support is recommended for optimal performance%s\n", - GRAPHICS_VER(i915) >= 11 ? " on this platform!" : - " when IOMMU is enabled!"); -} - -void i915_gemfs_fini(struct drm_i915_private *i915) -{ - kern_unmount(i915->mm.gemfs); -} diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h deleted file mode 100644 index 16d4333c9a4e..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2017 Intel Corporation - */ - -#ifndef __I915_GEMFS_H__ -#define __I915_GEMFS_H__ - -struct drm_i915_private; - -void i915_gemfs_init(struct drm_i915_private *i915); -void i915_gemfs_fini(struct drm_i915_private *i915); - -#endif diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index bd08605a1611..02e9bf87f654 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1316,7 +1316,7 @@ typedef struct drm_i915_gem_object * static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) { - return i915->mm.gemfs && has_transparent_hugepage(); + return !!drm_gem_get_huge_mnt(&i915->drm); } static struct drm_i915_gem_object * @@ -1761,7 +1761,6 @@ static int igt_tmpfs_fallback(void *arg) struct drm_i915_private *i915 = arg; struct i915_address_space *vm; struct i915_gem_context *ctx; - struct vfsmount *gemfs = i915->mm.gemfs; struct drm_i915_gem_object *obj; struct i915_vma *vma; struct file *file; @@ -1779,15 +1778,8 @@ static int igt_tmpfs_fallback(void *arg) } vm = i915_gem_context_get_eb_vm(ctx); - /* - * Make sure that we don't burst into a ball of flames upon falling back - * to tmpfs, which we rely on if on the off-chance we encounter a failure - * when setting up gemfs. - */ - - i915->mm.gemfs = NULL; - - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + obj = i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], + PAGE_SIZE, 0, I915_BO_ALLOC_NOTHP); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out_restore; @@ -1819,7 +1811,6 @@ static int igt_tmpfs_fallback(void *arg) out_put: i915_gem_object_put(obj); out_restore: - i915->mm.gemfs = gemfs; i915_vm_put(vm); out: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 95f9ddf22ce4..93a5af3de334 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -141,11 +141,6 @@ struct i915_gem_mm { */ atomic_t free_count; - /** - * tmpfs instance used for shmem backed objects - */ - struct vfsmount *gemfs; - struct intel_memory_region *regions[INTEL_REGION_UNKNOWN]; struct notifier_block oom_notifier; From f19f99bbaf9f91d0b0a95d760f4d6755758b913d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Fri, 5 Dec 2025 19:22:27 +0100 Subject: [PATCH 53/94] drm/v3d: Use huge tmpfs mountpoint helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make use of the new drm_gem_huge_mnt_create() and drm_gem_get_huge_mnt() helpers to avoid code duplication. Now that it's just a few lines long, the single function in v3d_gemfs.c is moved into v3d_gem.c. v3: - use huge tmpfs mountpoint in drm_device - move v3d_gemfs.c into v3d_gem.c v4: - clean up mountpoint creation error handling v5: - fix CONFIG_TRANSPARENT_HUGEPAGE check - use drm_gem_has_huge_mnt() helper v8: - don't access huge_mnt field with CONFIG_TRANSPARENT_HUGEPAGE=n v9: - replace drm_gem_has_huge_mnt() by drm_gem_get_huge_mnt() v10: - get rid of CONFIG_TRANSPARENT_HUGEPAGE ifdefs v11: - remove superfluous comment - add Maíra and Boris R-bs Signed-off-by: Loïc Molinari Reviewed-by: Maíra Canal Reviewed-by: Boris Brezillon Link: https://patch.msgid.link/20251205182231.194072-7-loic.molinari@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/v3d/Makefile | 3 +- drivers/gpu/drm/v3d/v3d_bo.c | 5 ++- drivers/gpu/drm/v3d/v3d_drv.c | 2 +- drivers/gpu/drm/v3d/v3d_drv.h | 11 +----- drivers/gpu/drm/v3d/v3d_gem.c | 21 +++++++++-- drivers/gpu/drm/v3d/v3d_gemfs.c | 62 --------------------------------- 6 files changed, 24 insertions(+), 80 deletions(-) delete mode 100644 drivers/gpu/drm/v3d/v3d_gemfs.c diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile index fcf710926057..b7d673f1153b 100644 --- a/drivers/gpu/drm/v3d/Makefile +++ b/drivers/gpu/drm/v3d/Makefile @@ -13,8 +13,7 @@ v3d-y := \ v3d_trace_points.o \ v3d_sched.o \ v3d_sysfs.o \ - v3d_submit.o \ - v3d_gemfs.o + v3d_submit.o v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index d9547f5117b9..3ee8d9c36d92 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -114,7 +114,7 @@ v3d_bo_create_finish(struct drm_gem_object *obj) if (IS_ERR(sgt)) return PTR_ERR(sgt); - if (!v3d->gemfs) + if (!drm_gem_get_huge_mnt(obj->dev)) align = SZ_4K; else if (obj->size >= SZ_1M) align = SZ_1M; @@ -150,12 +150,11 @@ struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, size_t unaligned_size) { struct drm_gem_shmem_object *shmem_obj; - struct v3d_dev *v3d = to_v3d_dev(dev); struct v3d_bo *bo; int ret; shmem_obj = drm_gem_shmem_create_with_mnt(dev, unaligned_size, - v3d->gemfs); + drm_gem_get_huge_mnt(dev)); if (IS_ERR(shmem_obj)) return ERR_CAST(shmem_obj); bo = to_v3d_bo(&shmem_obj->base); diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index e8a46c8bad8a..8faa9382846f 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -107,7 +107,7 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, args->value = v3d->perfmon_info.max_counters; return 0; case DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES: - args->value = !!v3d->gemfs; + args->value = !!drm_gem_get_huge_mnt(dev); return 0; case DRM_V3D_PARAM_GLOBAL_RESET_COUNTER: mutex_lock(&v3d->reset_lock); diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 1884686985b8..99a39329bb85 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -158,11 +158,6 @@ struct v3d_dev { struct drm_mm mm; spinlock_t mm_lock; - /* - * tmpfs instance used for shmem backed objects - */ - struct vfsmount *gemfs; - struct work_struct overflow_mem_work; struct v3d_queue_state queue[V3D_MAX_QUEUES]; @@ -569,6 +564,7 @@ extern const struct dma_fence_ops v3d_fence_ops; struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue q); /* v3d_gem.c */ +extern bool super_pages; int v3d_gem_init(struct drm_device *dev); void v3d_gem_destroy(struct drm_device *dev); void v3d_reset_sms(struct v3d_dev *v3d); @@ -576,11 +572,6 @@ void v3d_reset(struct v3d_dev *v3d); void v3d_invalidate_caches(struct v3d_dev *v3d); void v3d_clean_caches(struct v3d_dev *v3d); -/* v3d_gemfs.c */ -extern bool super_pages; -void v3d_gemfs_init(struct v3d_dev *v3d); -void v3d_gemfs_fini(struct v3d_dev *v3d); - /* v3d_submit.c */ void v3d_job_cleanup(struct v3d_job *job); void v3d_job_put(struct v3d_job *job); diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 5a180dc6c452..697b0b3ca92c 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -259,6 +259,24 @@ v3d_invalidate_caches(struct v3d_dev *v3d) v3d_invalidate_slices(v3d, 0); } +static void +v3d_huge_mnt_init(struct v3d_dev *v3d) +{ + int err = 0; + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && super_pages) + err = drm_gem_huge_mnt_create(&v3d->drm, "within_size"); + + if (drm_gem_get_huge_mnt(&v3d->drm)) + drm_info(&v3d->drm, "Using Transparent Hugepages\n"); + else if (err) + drm_warn(&v3d->drm, "Can't use Transparent Hugepages (%d)\n", + err); + else + drm_notice(&v3d->drm, + "Transparent Hugepage support is recommended for optimal performance on this platform!\n"); +} + int v3d_gem_init(struct drm_device *dev) { @@ -310,7 +328,7 @@ v3d_gem_init(struct drm_device *dev) v3d_init_hw_state(v3d); v3d_mmu_set_page_table(v3d); - v3d_gemfs_init(v3d); + v3d_huge_mnt_init(v3d); ret = v3d_sched_init(v3d); if (ret) { @@ -330,7 +348,6 @@ v3d_gem_destroy(struct drm_device *dev) enum v3d_queue q; v3d_sched_fini(v3d); - v3d_gemfs_fini(v3d); /* Waiting for jobs to finish would need to be done before * unregistering V3D. diff --git a/drivers/gpu/drm/v3d/v3d_gemfs.c b/drivers/gpu/drm/v3d/v3d_gemfs.c deleted file mode 100644 index bf351fc0d488..000000000000 --- a/drivers/gpu/drm/v3d/v3d_gemfs.c +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* Copyright (C) 2024 Raspberry Pi */ - -#include -#include -#include - -#include - -#include "v3d_drv.h" - -void v3d_gemfs_init(struct v3d_dev *v3d) -{ - struct file_system_type *type; - struct fs_context *fc; - struct vfsmount *gemfs; - int ret; - - /* - * By creating our own shmemfs mountpoint, we can pass in - * mount flags that better match our usecase. However, we - * only do so on platforms which benefit from it. - */ - if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) - goto err; - - /* The user doesn't want to enable Super Pages */ - if (!super_pages) - goto err; - - type = get_fs_type("tmpfs"); - if (!type) - goto err; - - fc = fs_context_for_mount(type, SB_KERNMOUNT); - if (IS_ERR(fc)) - goto err; - ret = vfs_parse_fs_string(fc, "source", "tmpfs"); - if (!ret) - ret = vfs_parse_fs_string(fc, "huge", "within_size"); - if (!ret) - gemfs = fc_mount_longterm(fc); - put_fs_context(fc); - if (ret) - goto err; - - v3d->gemfs = gemfs; - drm_info(&v3d->drm, "Using Transparent Hugepages\n"); - - return; - -err: - v3d->gemfs = NULL; - drm_notice(&v3d->drm, - "Transparent Hugepage support is recommended for optimal performance on this platform!\n"); -} - -void v3d_gemfs_fini(struct v3d_dev *v3d) -{ - if (v3d->gemfs) - kern_unmount(v3d->gemfs); -} From 7cdf69d903759b81abde5973d703c93a742ddab7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Fri, 5 Dec 2025 19:22:28 +0100 Subject: [PATCH 54/94] drm/gem: Get rid of *_with_mnt helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_gem_object_init_with_mnt() and drm_gem_shmem_create_with_mnt() can be removed now that the drivers use the new drm_gem_huge_mnt_create() and drm_gem_get_huge_mnt() helpers. v5: - use drm_gem_has_huge_mnt() helper - compile out shmem_file_setup_with_mnt() call in builds with CONFIG_TRANSPARENT_HUGEPAGE=n v9: - replace drm_gem_has_huge_mnt() with drm_gem_get_huge_mnt() Signed-off-by: Loïc Molinari Reviewed-by: Boris Brezillon Reviewed-by: Maíra Canal Link: https://patch.msgid.link/20251205182231.194072-8-loic.molinari@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/drm_gem.c | 37 +++++++------------------ drivers/gpu/drm/drm_gem_shmem_helper.c | 38 ++++++-------------------- drivers/gpu/drm/v3d/v3d_bo.c | 3 +- include/drm/drm_gem.h | 3 -- include/drm/drm_gem_shmem_helper.h | 3 -- 5 files changed, 19 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 32dddb23e211..6021c4087a08 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -171,29 +171,28 @@ drm_gem_init(struct drm_device *dev) } /** - * drm_gem_object_init_with_mnt - initialize an allocated shmem-backed GEM - * object in a given shmfs mountpoint + * drm_gem_object_init - initialize an allocated shmem-backed GEM object * * @dev: drm_device the object should be initialized for * @obj: drm_gem_object to initialize * @size: object size - * @gemfs: tmpfs mount where the GEM object will be created. If NULL, use - * the usual tmpfs mountpoint (`shm_mnt`). * * Initialize an already allocated GEM object of the specified size with - * shmfs backing store. + * shmfs backing store. A huge mountpoint can be used by calling + * drm_gem_huge_mnt_create() beforehand. */ -int drm_gem_object_init_with_mnt(struct drm_device *dev, - struct drm_gem_object *obj, size_t size, - struct vfsmount *gemfs) +int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, + size_t size) { + struct vfsmount *huge_mnt; struct file *filp; drm_gem_private_object_init(dev, obj, size); - if (gemfs) - filp = shmem_file_setup_with_mnt(gemfs, "drm mm object", size, - VM_NORESERVE); + huge_mnt = drm_gem_get_huge_mnt(dev); + if (huge_mnt) + filp = shmem_file_setup_with_mnt(huge_mnt, "drm mm object", + size, VM_NORESERVE); else filp = shmem_file_setup("drm mm object", size, VM_NORESERVE); @@ -204,22 +203,6 @@ int drm_gem_object_init_with_mnt(struct drm_device *dev, return 0; } -EXPORT_SYMBOL(drm_gem_object_init_with_mnt); - -/** - * drm_gem_object_init - initialize an allocated shmem-backed GEM object - * @dev: drm_device the object should be initialized for - * @obj: drm_gem_object to initialize - * @size: object size - * - * Initialize an already allocated GEM object of the specified size with - * shmfs backing store. - */ -int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, - size_t size) -{ - return drm_gem_object_init_with_mnt(dev, obj, size, NULL); -} EXPORT_SYMBOL(drm_gem_object_init); /** diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index e67216cbb469..f8bcd1b0eb32 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -50,7 +50,7 @@ static const struct drm_gem_object_funcs drm_gem_shmem_funcs = { }; static int __drm_gem_shmem_init(struct drm_device *dev, struct drm_gem_shmem_object *shmem, - size_t size, bool private, struct vfsmount *gemfs) + size_t size, bool private) { struct drm_gem_object *obj = &shmem->base; int ret = 0; @@ -62,7 +62,7 @@ static int __drm_gem_shmem_init(struct drm_device *dev, struct drm_gem_shmem_obj drm_gem_private_object_init(dev, obj, size); shmem->map_wc = false; /* dma-buf mappings use always writecombine */ } else { - ret = drm_gem_object_init_with_mnt(dev, obj, size, gemfs); + ret = drm_gem_object_init(dev, obj, size); } if (ret) { drm_gem_private_object_fini(obj); @@ -103,13 +103,12 @@ static int __drm_gem_shmem_init(struct drm_device *dev, struct drm_gem_shmem_obj */ int drm_gem_shmem_init(struct drm_device *dev, struct drm_gem_shmem_object *shmem, size_t size) { - return __drm_gem_shmem_init(dev, shmem, size, false, NULL); + return __drm_gem_shmem_init(dev, shmem, size, false); } EXPORT_SYMBOL_GPL(drm_gem_shmem_init); static struct drm_gem_shmem_object * -__drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private, - struct vfsmount *gemfs) +__drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private) { struct drm_gem_shmem_object *shmem; struct drm_gem_object *obj; @@ -129,7 +128,7 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private, obj = &shmem->base; } - ret = __drm_gem_shmem_init(dev, shmem, size, private, gemfs); + ret = __drm_gem_shmem_init(dev, shmem, size, private); if (ret) { kfree(obj); return ERR_PTR(ret); @@ -150,31 +149,10 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private, */ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size) { - return __drm_gem_shmem_create(dev, size, false, NULL); + return __drm_gem_shmem_create(dev, size, false); } EXPORT_SYMBOL_GPL(drm_gem_shmem_create); -/** - * drm_gem_shmem_create_with_mnt - Allocate an object with the given size in a - * given mountpoint - * @dev: DRM device - * @size: Size of the object to allocate - * @gemfs: tmpfs mount where the GEM object will be created - * - * This function creates a shmem GEM object in a given tmpfs mountpoint. - * - * Returns: - * A struct drm_gem_shmem_object * on success or an ERR_PTR()-encoded negative - * error code on failure. - */ -struct drm_gem_shmem_object *drm_gem_shmem_create_with_mnt(struct drm_device *dev, - size_t size, - struct vfsmount *gemfs) -{ - return __drm_gem_shmem_create(dev, size, false, gemfs); -} -EXPORT_SYMBOL_GPL(drm_gem_shmem_create_with_mnt); - /** * drm_gem_shmem_release - Release resources associated with a shmem GEM object. * @shmem: shmem GEM object @@ -851,7 +829,7 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, size_t size = PAGE_ALIGN(attach->dmabuf->size); struct drm_gem_shmem_object *shmem; - shmem = __drm_gem_shmem_create(dev, size, true, NULL); + shmem = __drm_gem_shmem_create(dev, size, true); if (IS_ERR(shmem)) return ERR_CAST(shmem); @@ -899,7 +877,7 @@ struct drm_gem_object *drm_gem_shmem_prime_import_no_map(struct drm_device *dev, size = PAGE_ALIGN(attach->dmabuf->size); - shmem = __drm_gem_shmem_create(dev, size, true, NULL); + shmem = __drm_gem_shmem_create(dev, size, true); if (IS_ERR(shmem)) { ret = PTR_ERR(shmem); goto fail_detach; diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index 3ee8d9c36d92..c4316b768b3d 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -153,8 +153,7 @@ struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, struct v3d_bo *bo; int ret; - shmem_obj = drm_gem_shmem_create_with_mnt(dev, unaligned_size, - drm_gem_get_huge_mnt(dev)); + shmem_obj = drm_gem_shmem_create(dev, unaligned_size); if (IS_ERR(shmem_obj)) return ERR_CAST(shmem_obj); bo = to_v3d_bo(&shmem_obj->base); diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 97b5fca8966d..cca815dc87f3 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -529,9 +529,6 @@ void drm_gem_object_release(struct drm_gem_object *obj); void drm_gem_object_free(struct kref *kref); int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); -int drm_gem_object_init_with_mnt(struct drm_device *dev, - struct drm_gem_object *obj, size_t size, - struct vfsmount *gemfs); void drm_gem_private_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); void drm_gem_private_object_fini(struct drm_gem_object *obj); diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 589f7bfe7506..6b6478f5ca24 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -109,9 +109,6 @@ struct drm_gem_shmem_object { int drm_gem_shmem_init(struct drm_device *dev, struct drm_gem_shmem_object *shmem, size_t size); struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size); -struct drm_gem_shmem_object *drm_gem_shmem_create_with_mnt(struct drm_device *dev, - size_t size, - struct vfsmount *gemfs); void drm_gem_shmem_release(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem); From c569b369cc2114526bec2ba0a41a49cfc27609b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Fri, 5 Dec 2025 19:22:29 +0100 Subject: [PATCH 55/94] drm/panthor: Introduce huge tmpfs mountpoint option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the 'panthor.transparent_hugepage' boolean module parameter (false by default). When the parameter is set to true, a new tmpfs mountpoint is created and mounted using the 'huge=within_size' option. It's then used at GEM object creation instead of the default 'shm_mnt' mountpoint in order to enable Transparent Hugepage (THP) for the object (without having to rely on a system wide parameter). v3: - use huge tmpfs mountpoint in drm_device v4: - fix builds with CONFIG_TRANSPARENT_HUGEPAGE=n - clean up mountpoint creation error handling - print negative error value v5: - use drm_gem_has_huge_tmp() helper - get rid of CONFIG_TRANSPARENT_HUGEPAGE ifdefs v9: - replace drm_gem_has_huge_tmp() by drm_gem_get_huge_tmp() v11: - enable 'panthor.transparent_hugepage' by default Signed-off-by: Loïc Molinari Reviewed-by: Boris Brezillon Link: https://patch.msgid.link/20251205182231.194072-9-loic.molinari@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_device.c | 3 +++ drivers/gpu/drm/panthor/panthor_drv.c | 7 +++++++ drivers/gpu/drm/panthor/panthor_drv.h | 9 +++++++++ drivers/gpu/drm/panthor/panthor_gem.c | 18 ++++++++++++++++++ drivers/gpu/drm/panthor/panthor_gem.h | 2 ++ 5 files changed, 39 insertions(+) create mode 100644 drivers/gpu/drm/panthor/panthor_drv.h diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index e133b1e0ad6d..2979ee0e52c2 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -18,6 +18,7 @@ #include "panthor_devfreq.h" #include "panthor_device.h" #include "panthor_fw.h" +#include "panthor_gem.h" #include "panthor_gpu.h" #include "panthor_hw.h" #include "panthor_mmu.h" @@ -294,6 +295,8 @@ int panthor_device_init(struct panthor_device *ptdev) if (ret) goto err_unplug_fw; + panthor_gem_init(ptdev); + /* ~3 frames */ pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50); pm_runtime_use_autosuspend(ptdev->base.dev); diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 0b0ec3b978c6..1cfed4fc3503 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1559,6 +1559,7 @@ static const struct file_operations panthor_drm_driver_fops = { .read = drm_read, .llseek = noop_llseek, .mmap = panthor_mmap, + .get_unmapped_area = drm_gem_get_unmapped_area, .show_fdinfo = drm_show_fdinfo, .fop_flags = FOP_UNSIGNED_OFFSET, }; @@ -1627,6 +1628,12 @@ static const struct drm_driver panthor_drm_driver = { #endif }; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +bool panthor_transparent_hugepage = true; +module_param_named(transparent_hugepage, panthor_transparent_hugepage, bool, 0400); +MODULE_PARM_DESC(transparent_hugepage, "Use a dedicated tmpfs mount point with Transparent Hugepage enabled (true = default)"); +#endif + static int panthor_probe(struct platform_device *pdev) { struct panthor_device *ptdev; diff --git a/drivers/gpu/drm/panthor/panthor_drv.h b/drivers/gpu/drm/panthor/panthor_drv.h new file mode 100644 index 000000000000..1bc7ddbad23e --- /dev/null +++ b/drivers/gpu/drm/panthor/panthor_drv.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ +/* Copyright 2025 Amazon.com, Inc. or its affiliates */ + +#ifndef __PANTHOR_DRV_H__ +#define __PANTHOR_DRV_H__ + +extern bool panthor_transparent_hugepage; + +#endif diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 10d255cccc09..7ae07a9bc996 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 or MIT /* Copyright 2019 Linaro, Ltd, Rob Herring */ /* Copyright 2023 Collabora ltd. */ +/* Copyright 2025 Amazon.com, Inc. or its affiliates */ #include #include @@ -12,10 +13,27 @@ #include #include "panthor_device.h" +#include "panthor_drv.h" #include "panthor_fw.h" #include "panthor_gem.h" #include "panthor_mmu.h" +void panthor_gem_init(struct panthor_device *ptdev) +{ + int err; + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + !panthor_transparent_hugepage) + return; + + err = drm_gem_huge_mnt_create(&ptdev->base, "within_size"); + if (drm_gem_get_huge_mnt(&ptdev->base)) + drm_info(&ptdev->base, "Using Transparent Hugepage\n"); + else if (err) + drm_warn(&ptdev->base, "Can't use Transparent Hugepage (%d)\n", + err); +} + #ifdef CONFIG_DEBUG_FS static void panthor_gem_debugfs_bo_init(struct panthor_gem_object *bo) { diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h index 80c6e24112d0..2eefe9104e5e 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.h +++ b/drivers/gpu/drm/panthor/panthor_gem.h @@ -136,6 +136,8 @@ struct panthor_gem_object *to_panthor_bo(struct drm_gem_object *obj) return container_of(to_drm_gem_shmem_obj(obj), struct panthor_gem_object, base); } +void panthor_gem_init(struct panthor_device *ptdev); + struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size); int From c12e9fcb5a5a9713e242e8c9c6adecdea5067241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Fri, 5 Dec 2025 19:22:30 +0100 Subject: [PATCH 56/94] drm/panfrost: Introduce huge tmpfs mountpoint option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the 'panfrost.transparent_hugepage' boolean module parameter (false by default). When the parameter is set to true, a new tmpfs mountpoint is created and mounted using the 'huge=within_size' option. It's then used at GEM object creation instead of the default 'shm_mnt' mountpoint in order to enable Transparent Hugepage (THP) for the object (without having to rely on a system wide parameter). v3: - use huge tmpfs mountpoint in drm_device v4: - fix builds with CONFIG_TRANSPARENT_HUGEPAGE=n - clean up mountpoint creation error handling - print negative error value v5: - use drm_gem_has_huge_tmp() helper - get rid of CONFIG_TRANSPARENT_HUGEPAGE ifdefs v9: - replace drm_gem_has_huge_tmp() by drm_gem_get_huge_tmp() v11: - enable 'panfrost.transparent_hugepage' by default Signed-off-by: Loïc Molinari Reviewed-by: Boris Brezillon Link: https://patch.msgid.link/20251205182231.194072-10-loic.molinari@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_device.c | 3 +++ drivers/gpu/drm/panfrost/panfrost_drv.c | 6 ++++++ drivers/gpu/drm/panfrost/panfrost_drv.h | 9 +++++++++ drivers/gpu/drm/panfrost/panfrost_gem.c | 18 ++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.h | 2 ++ 5 files changed, 38 insertions(+) create mode 100644 drivers/gpu/drm/panfrost/panfrost_drv.h diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c index c61b97af120c..dedc13e56631 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.c +++ b/drivers/gpu/drm/panfrost/panfrost_device.c @@ -12,6 +12,7 @@ #include "panfrost_device.h" #include "panfrost_devfreq.h" #include "panfrost_features.h" +#include "panfrost_gem.h" #include "panfrost_issues.h" #include "panfrost_gpu.h" #include "panfrost_job.h" @@ -267,6 +268,8 @@ int panfrost_device_init(struct panfrost_device *pfdev) if (err) goto out_job; + panfrost_gem_init(pfdev); + return 0; out_job: panfrost_jm_fini(pfdev); diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 7d8c7c337606..4f5f19eda587 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -858,6 +858,12 @@ static const struct drm_driver panfrost_drm_driver = { #endif }; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +bool panfrost_transparent_hugepage = true; +module_param_named(transparent_hugepage, panfrost_transparent_hugepage, bool, 0400); +MODULE_PARM_DESC(transparent_hugepage, "Use a dedicated tmpfs mount point with Transparent Hugepage enabled (true = default)"); +#endif + static int panfrost_probe(struct platform_device *pdev) { struct panfrost_device *pfdev; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.h b/drivers/gpu/drm/panfrost/panfrost_drv.h new file mode 100644 index 000000000000..a2277ec61aab --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_drv.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ +/* Copyright 2025 Amazon.com, Inc. or its affiliates */ + +#ifndef __PANFROST_DRV_H__ +#define __PANFROST_DRV_H__ + +extern bool panfrost_transparent_hugepage; + +#endif diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 8041b65c6609..c1688a542ec2 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright 2019 Linaro, Ltd, Rob Herring */ +/* Copyright 2025 Amazon.com, Inc. or its affiliates */ #include #include @@ -10,9 +11,26 @@ #include #include #include "panfrost_device.h" +#include "panfrost_drv.h" #include "panfrost_gem.h" #include "panfrost_mmu.h" +void panfrost_gem_init(struct panfrost_device *pfdev) +{ + int err; + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + !panfrost_transparent_hugepage) + return; + + err = drm_gem_huge_mnt_create(&pfdev->base, "within_size"); + if (drm_gem_get_huge_mnt(&pfdev->base)) + drm_info(&pfdev->base, "Using Transparent Hugepage\n"); + else if (err) + drm_warn(&pfdev->base, "Can't use Transparent Hugepage (%d)\n", + err); +} + #ifdef CONFIG_DEBUG_FS static void panfrost_gem_debugfs_bo_add(struct panfrost_device *pfdev, struct panfrost_gem_object *bo) diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index 8de3e76f2717..1a62529ff06f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -124,6 +124,8 @@ drm_mm_node_to_panfrost_mapping(struct drm_mm_node *node) return container_of(node, struct panfrost_gem_mapping, mmnode); } +void panfrost_gem_init(struct panfrost_device *pfdev); + struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size); struct drm_gem_object * From 70478348fc6d52d5bb7568a035d3cbe5bcc6af4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Fri, 5 Dec 2025 19:22:31 +0100 Subject: [PATCH 57/94] Documentation/gpu/drm-mm: Add THP paragraph to GEM mapping section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a paragraph to the GEM Objects Creation section about the drm_gem_huge_mnt_create() helper and to the GEM objects mapping section explaining how transparent huge pages are handled by GEM. v4: - fix wording after huge_pages handler removal v6: - fix wording after map_pages handler removal v11: - mention drm_gem_huge_mnt_create() helper - add Boris and Maíra R-bs Signed-off-by: Loïc Molinari Reviewed-by: Bagas Sanjaya Reviewed-by: Boris Brezillon Reviewed-by: Maíra Canal Link: https://patch.msgid.link/20251205182231.194072-11-loic.molinari@collabora.com Signed-off-by: Boris Brezillon --- Documentation/gpu/drm-mm.rst | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index d55751cad67c..f22433470c76 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -155,7 +155,12 @@ drm_gem_object_init() will create an shmfs file of the requested size and store it into the struct :c:type:`struct drm_gem_object ` filp field. The memory is used as either main storage for the object when the graphics hardware -uses system memory directly or as a backing store otherwise. +uses system memory directly or as a backing store otherwise. Drivers +can call drm_gem_huge_mnt_create() to create, mount and use a huge +shmem mountpoint instead of the default one ('shm_mnt'). For builds +with CONFIG_TRANSPARENT_HUGEPAGE enabled, further calls to +drm_gem_object_init() will let shmem allocate huge pages when +possible. Drivers are responsible for the actual physical pages allocation by calling shmem_read_mapping_page_gfp() for each page. @@ -290,15 +295,27 @@ The open and close operations must update the GEM object reference count. Drivers can use the drm_gem_vm_open() and drm_gem_vm_close() helper functions directly as open and close handlers. -The fault operation handler is responsible for mapping individual pages -to userspace when a page fault occurs. Depending on the memory -allocation scheme, drivers can allocate pages at fault time, or can -decide to allocate memory for the GEM object at the time the object is -created. +The fault operation handler is responsible for mapping pages to +userspace when a page fault occurs. Depending on the memory allocation +scheme, drivers can allocate pages at fault time, or can decide to +allocate memory for the GEM object at the time the object is created. Drivers that want to map the GEM object upfront instead of handling page faults can implement their own mmap file operation handler. +In order to reduce page table overhead, if the internal shmem mountpoint +"shm_mnt" is configured to use transparent huge pages (for builds with +CONFIG_TRANSPARENT_HUGEPAGE enabled) and if the shmem backing store +managed to allocate a huge page for a faulty address, the fault handler +will first attempt to insert that huge page into the VMA before falling +back to individual page insertion. mmap() user address alignment for GEM +objects is handled by providing a custom get_unmapped_area file +operation which forwards to the shmem backing store. For most drivers, +which don't create a huge mountpoint by default or through a module +parameter, transparent huge pages can be enabled by either setting the +"transparent_hugepage_shmem" kernel parameter or the +"/sys/kernel/mm/transparent_hugepage/shmem_enabled" sysfs knob. + For platforms without MMU the GEM core provides a helper method drm_gem_dma_get_unmapped_area(). The mmap() routines will call this to get a proposed address for the mapping. From 4ebaaa3b622238ea44fbaa21998ad76bd8417a8c Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 14 Oct 2025 11:31:45 +0200 Subject: [PATCH 58/94] drm/atomic: Add dev pointer to drm_private_obj All the objects that need to implement some callbacks in KMS have a pointer in there structure to the main drm_device. However, it's not the case for drm_private_objs, which makes it harder than it needs to be to implement some of its callbacks. Let's add that pointer. Reviewed-by: Dmitry Baryshkov Reviewed-by: Tomi Valkeinen Reviewed-by: Luca Ceresoli Tested-by: Luca Ceresoli Link: https://patch.msgid.link/20251014-drm-private-obj-reset-v2-1-6dd60e985e9d@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_atomic.c | 1 + include/drm/drm_atomic.h | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 67e095e398a3..6d3ea8056b60 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -906,6 +906,7 @@ drm_atomic_private_obj_init(struct drm_device *dev, drm_modeset_lock_init(&obj->lock); + obj->dev = dev; obj->state = state; obj->funcs = funcs; list_add_tail(&obj->head, &dev->mode_config.privobj_list); diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 43783891d359..74ce26fa8838 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -339,6 +339,11 @@ struct drm_private_state_funcs { * drm_atomic_helper_wait_for_dependencies(). */ struct drm_private_obj { + /** + * @dev: parent DRM device + */ + struct drm_device *dev; + /** * @head: List entry used to attach a private object to a &drm_device * (queued to &drm_mode_config.privobj_list). From 51db5336e16de25074ef63d29b8a762a25d193d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 19 Sep 2025 16:40:06 +0200 Subject: [PATCH 59/94] dma-buf: improve sg_table debugging hack v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This debugging hack is important to enforce the rule that importers should *never* touch the underlying struct page of the exporter. Instead of just mangling the page link create a copy of the sg_table but only copy over the DMA addresses and not the pages. This will cause a NULL pointer de-reference if the importer tries to touch the struct page. Still quite a hack but this at least allows the exporter to properly keeps it's sg_table intact while allowing the DMA-buf maintainer to find and fix misbehaving importers and finally switch over to using a different data structure in the future. v2: improve the hack further by using a wrapper structure and explaining the background a bit more in the commit message. v3: fix some whitespace issues, use sg_assign_page(). v4: give the functions a better name Signed-off-by: Christian König Reviewed-by: Michael J. Ruhl Link: https://lore.kernel.org/r/20251205130604.1582-1-christian.koenig@amd.com --- drivers/dma-buf/dma-buf.c | 72 ++++++++++++++++++++++++++++++++------- 1 file changed, 59 insertions(+), 13 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 2305bb2cc1f1..533ea17b7175 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -35,6 +35,12 @@ #include "dma-buf-sysfs-stats.h" +/* Wrapper to hide the sg_table page link from the importer */ +struct dma_buf_sg_table_wrapper { + struct sg_table *original; + struct sg_table wrapper; +}; + static inline int is_dma_buf_file(struct file *); static DEFINE_MUTEX(dmabuf_list_mutex); @@ -828,21 +834,59 @@ void dma_buf_put(struct dma_buf *dmabuf) } EXPORT_SYMBOL_NS_GPL(dma_buf_put, "DMA_BUF"); -static void mangle_sg_table(struct sg_table *sg_table) +static int dma_buf_wrap_sg_table(struct sg_table **sg_table) { -#ifdef CONFIG_DMABUF_DEBUG - int i; - struct scatterlist *sg; + struct scatterlist *to_sg, *from_sg; + struct sg_table *from = *sg_table; + struct dma_buf_sg_table_wrapper *to; + int i, ret; - /* To catch abuse of the underlying struct page by importers mix - * up the bits, but take care to preserve the low SG_ bits to - * not corrupt the sgt. The mixing is undone on unmap - * before passing the sgt back to the exporter. + if (!IS_ENABLED(CONFIG_DMABUF_DEBUG)) + return 0; + + /* + * To catch abuse of the underlying struct page by importers copy the + * sg_table without copying the page_link and give only the copy back to + * the importer. */ - for_each_sgtable_sg(sg_table, sg, i) - sg->page_link ^= ~0xffUL; -#endif + to = kzalloc(sizeof(*to), GFP_KERNEL); + if (!to) + return -ENOMEM; + ret = sg_alloc_table(&to->wrapper, from->nents, GFP_KERNEL); + if (ret) + goto free_to; + + to_sg = to->wrapper.sgl; + for_each_sgtable_dma_sg(from, from_sg, i) { + to_sg->offset = 0; + to_sg->length = 0; + sg_assign_page(to_sg, NULL); + sg_dma_address(to_sg) = sg_dma_address(from_sg); + sg_dma_len(to_sg) = sg_dma_len(from_sg); + to_sg = sg_next(to_sg); + } + + to->original = from; + *sg_table = &to->wrapper; + return 0; + +free_to: + kfree(to); + return ret; +} + +static void dma_buf_unwrap_sg_table(struct sg_table **sg_table) +{ + struct dma_buf_sg_table_wrapper *copy; + + if (!IS_ENABLED(CONFIG_DMABUF_DEBUG)) + return; + + copy = container_of(*sg_table, typeof(*copy), wrapper); + *sg_table = copy->original; + sg_free_table(©->wrapper); + kfree(copy); } static inline bool @@ -1139,7 +1183,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, if (ret < 0) goto error_unmap; } - mangle_sg_table(sg_table); + ret = dma_buf_wrap_sg_table(&sg_table); + if (ret) + goto error_unmap; if (IS_ENABLED(CONFIG_DMA_API_DEBUG)) { struct scatterlist *sg; @@ -1220,7 +1266,7 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach, dma_resv_assert_held(attach->dmabuf->resv); - mangle_sg_table(sg_table); + dma_buf_unwrap_sg_table(&sg_table); attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction); if (dma_buf_pin_on_map(attach)) From 646013f513f38a3e75a60fde31ae0e6154ce19a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 4 Dec 2025 15:56:03 +0100 Subject: [PATCH 60/94] dma-buf: enable DMABUF_DEBUG by default on DEBUG kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The overhead of enforcing the DMA-buf rules for importers is now so low that it safe to enable it by default on DEBUG kernels. This will hopefully result in fixing more issues in importers. Signed-off-by: Christian König Reviewed-by: Michael J. Ruhl Link: https://lore.kernel.org/r/20251205130604.1582-2-christian.koenig@amd.com --- drivers/dma-buf/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig index b46eb8a552d7..fdd823e446cc 100644 --- a/drivers/dma-buf/Kconfig +++ b/drivers/dma-buf/Kconfig @@ -55,7 +55,7 @@ config DMABUF_MOVE_NOTIFY config DMABUF_DEBUG bool "DMA-BUF debug checks" depends on DMA_SHARED_BUFFER - default y if DMA_API_DEBUG + default y if DEBUG help This option enables additional checks for DMA-BUF importers and exporters. Specifically it validates that importers do not peek at the From cd77d5a4aaf8c5c1d819f47cf814bf7d4920b0a2 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 4 Dec 2025 10:16:03 -0800 Subject: [PATCH 61/94] accel/amdxdna: Fix tail-pointer polling in mailbox_get_msg() In mailbox_get_msg(), mailbox_reg_read_non_zero() is called to poll for a non-zero tail pointer. This assumed that a zero value indicates an error. However, certain corner cases legitimately produce a zero tail pointer. To handle these cases, remove mailbox_reg_read_non_zero(). The zero tail pointer will be treated as a valid rewind event. Reviewed-by: Maciej Falkowski Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20251204181603.793824-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/amdxdna_mailbox.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c index 858df97cd3fb..a60a85ce564c 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.c +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c @@ -112,22 +112,6 @@ static u32 mailbox_reg_read(struct mailbox_channel *mb_chann, u32 mbox_reg) return readl(ringbuf_addr); } -static int mailbox_reg_read_non_zero(struct mailbox_channel *mb_chann, u32 mbox_reg, u32 *val) -{ - struct xdna_mailbox_res *mb_res = &mb_chann->mb->res; - void __iomem *ringbuf_addr = mb_res->mbox_base + mbox_reg; - int ret, value; - - /* Poll till value is not zero */ - ret = readx_poll_timeout(readl, ringbuf_addr, value, - value, 1 /* us */, 100); - if (ret < 0) - return ret; - - *val = value; - return 0; -} - static inline void mailbox_set_headptr(struct mailbox_channel *mb_chann, u32 headptr_val) { @@ -286,8 +270,7 @@ static int mailbox_get_msg(struct mailbox_channel *mb_chann) u32 start_addr; int ret; - if (mailbox_reg_read_non_zero(mb_chann, mb_chann->res[CHAN_RES_I2X].mb_tail_ptr_reg, &tail)) - return -EINVAL; + tail = mailbox_get_tailptr(mb_chann, CHAN_RES_I2X); head = mb_chann->i2x_head; ringbuf_size = mailbox_get_ringbuf_size(mb_chann, CHAN_RES_I2X); start_addr = mb_chann->res[CHAN_RES_I2X].rb_start_addr; From bcd752c706c357229185a330ab450b86236d9031 Mon Sep 17 00:00:00 2001 From: Val Packett Date: Sat, 6 Dec 2025 14:37:28 -0300 Subject: [PATCH 62/94] drm/panel-edp: Add AUO B140QAX01.H panel A 14-inch 2560x1600 60Hz matte touch panel, found on a Dell Latitude 7455 laptop (second-source with BOE NE14QDM), according to online sources it's also found on the Latitude 7440 and some ASUS models. Raw EDID dump: 00 ff ff ff ff ff ff 00 06 af a4 0b 00 00 00 00 00 20 01 04 a5 1e 13 78 03 ad f5 a8 54 47 9c 24 0e 50 54 00 00 00 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 f0 68 00 a0 a0 40 2e 60 30 20 35 00 2d bc 10 00 00 1a f3 53 00 a0 a0 40 2e 60 30 20 35 00 2d bc 10 00 00 1a 00 00 00 fe 00 36 39 52 31 57 80 42 31 34 30 51 41 58 00 00 00 00 00 02 41 21 a8 00 01 00 00 1a 41 0a 20 20 00 a1 Don't have datasheet access, but the same timing as for other panels from the same manufacturer works fine. Signed-off-by: Val Packett [dianders: Moved to the right location in the table] Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patch.msgid.link/20251206173739.2222940-1-val@packett.cool --- drivers/gpu/drm/panel/panel-edp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 2c3597037743..85dd3f4cb8e1 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -1880,6 +1880,7 @@ static const struct panel_delay delay_80_500_e50_d50 = { */ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('A', 'U', 'O', 0x04a4, &delay_200_500_e50, "B122UAN01.0"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x0ba4, &delay_200_500_e50, "B140QAX01.H"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x105c, &delay_200_500_e50, "B116XTN01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x1062, &delay_200_500_e50, "B120XAN01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x125c, &delay_200_500_e50, "Unknown"), From 925ad0c26dd80d345018a3f7559799b7ad8f44e3 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:28 +0100 Subject: [PATCH 63/94] drm/panthor: Provide a custom dma_buf implementation Before we introduce cached CPU mappings, we want a dma_buf implementation satisfying synchronization requests around CPU accesses coming from a dma_buf exported by our driver. Let's provide our own implementation relying on the default gem_shmem_prime helpers designed for that purpose. v5: - New patch v6: - Collect R-b v7: - Hand-roll the dma_buf sync/import logic (was previously done by generic prime/shmem helpers) v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-2-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_drv.c | 1 + drivers/gpu/drm/panthor/panthor_gem.c | 118 +++++++++++++++++++++++++- drivers/gpu/drm/panthor/panthor_gem.h | 4 + 3 files changed, 122 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 1cfed4fc3503..73d26e17e2a2 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1623,6 +1623,7 @@ static const struct drm_driver panthor_drm_driver = { .gem_create_object = panthor_gem_create_object, .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, + .gem_prime_import = panthor_gem_prime_import, #ifdef CONFIG_DEBUG_FS .debugfs_init = panthor_debugfs_init, #endif diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 7ae07a9bc996..0de37733a2ef 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -202,14 +202,130 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, return ERR_PTR(ret); } +static struct sg_table * +panthor_gem_prime_map_dma_buf(struct dma_buf_attachment *attach, + enum dma_data_direction dir) +{ + struct sg_table *sgt = drm_gem_map_dma_buf(attach, dir); + + if (!IS_ERR(sgt)) + attach->priv = sgt; + + return sgt; +} + +static void +panthor_gem_prime_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction dir) +{ + attach->priv = NULL; + drm_gem_unmap_dma_buf(attach, sgt, dir); +} + +static int +panthor_gem_prime_begin_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction dir) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct drm_device *dev = obj->dev; + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + struct dma_buf_attachment *attach; + + dma_resv_lock(obj->resv, NULL); + if (shmem->sgt) + dma_sync_sgtable_for_cpu(dev->dev, shmem->sgt, dir); + + if (shmem->vaddr) + invalidate_kernel_vmap_range(shmem->vaddr, shmem->base.size); + + list_for_each_entry(attach, &dma_buf->attachments, node) { + struct sg_table *sgt = attach->priv; + + if (sgt) + dma_sync_sgtable_for_cpu(attach->dev, sgt, dir); + } + dma_resv_unlock(obj->resv); + + return 0; +} + +static int +panthor_gem_prime_end_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction dir) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct drm_device *dev = obj->dev; + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + struct dma_buf_attachment *attach; + + dma_resv_lock(obj->resv, NULL); + list_for_each_entry(attach, &dma_buf->attachments, node) { + struct sg_table *sgt = attach->priv; + + if (sgt) + dma_sync_sgtable_for_device(attach->dev, sgt, dir); + } + + if (shmem->vaddr) + flush_kernel_vmap_range(shmem->vaddr, shmem->base.size); + + if (shmem->sgt) + dma_sync_sgtable_for_device(dev->dev, shmem->sgt, dir); + + dma_resv_unlock(obj->resv); + return 0; +} + +static const struct dma_buf_ops panthor_dma_buf_ops = { + .attach = drm_gem_map_attach, + .detach = drm_gem_map_detach, + .map_dma_buf = panthor_gem_prime_map_dma_buf, + .unmap_dma_buf = panthor_gem_prime_unmap_dma_buf, + .release = drm_gem_dmabuf_release, + .mmap = drm_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, + .begin_cpu_access = panthor_gem_prime_begin_cpu_access, + .end_cpu_access = panthor_gem_prime_end_cpu_access, +}; + static struct dma_buf * panthor_gem_prime_export(struct drm_gem_object *obj, int flags) { + struct drm_device *dev = obj->dev; + struct dma_buf_export_info exp_info = { + .exp_name = KBUILD_MODNAME, + .owner = THIS_MODULE, + .ops = &panthor_dma_buf_ops, + .size = obj->size, + .flags = flags, + .priv = obj, + .resv = obj->resv, + }; + /* We can't export GEMs that have an exclusive VM. */ if (to_panthor_bo(obj)->exclusive_vm_root_gem) return ERR_PTR(-EINVAL); - return drm_gem_prime_export(obj, flags); + return drm_gem_dmabuf_export(dev, &exp_info); +} + +struct drm_gem_object * +panthor_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct drm_gem_object *obj = dma_buf->priv; + + if (dma_buf->ops == &panthor_dma_buf_ops && obj->dev == dev) { + /* Importing dmabuf exported from our own gem increases + * refcount on gem itself instead of f_count of dmabuf. + */ + drm_gem_object_get(obj); + return obj; + } + + return drm_gem_prime_import(dev, dma_buf); } static enum drm_gem_object_status panthor_gem_status(struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h index 2eefe9104e5e..262c77a4d3c1 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.h +++ b/drivers/gpu/drm/panthor/panthor_gem.h @@ -149,6 +149,10 @@ panthor_gem_create_with_handle(struct drm_file *file, void panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label); void panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label); +struct drm_gem_object * +panthor_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); + static inline u64 panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo) { From 9beb8dca9e749e9983e70b22e9823e6fcd519f91 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:29 +0100 Subject: [PATCH 64/94] drm/panthor: Fix panthor_gpu_coherency_set() GPU_COHERENCY_PROTOCOL takes one of GPU_COHERENCY_xx not BIT(GPU_COHERENCY_xx). v3: - New commit v4: - Add Steve's R-b v5: - No changes v6: - No changes v7: - No changes v8: - No changes Cc: Akash Goel Fixes: dd7db8d911a1 ("drm/panthor: Explicitly set the coherency mode") Reported-by: Steven Price Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-3-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c index 9cb5dee93212..ff5231269518 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.c +++ b/drivers/gpu/drm/panthor/panthor_gpu.c @@ -51,7 +51,7 @@ struct panthor_gpu { static void panthor_gpu_coherency_set(struct panthor_device *ptdev) { gpu_write(ptdev, GPU_COHERENCY_PROTOCOL, - ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE); + ptdev->coherent ? GPU_COHERENCY_ACE_LITE : GPU_COHERENCY_NONE); } static void panthor_gpu_l2_config_set(struct panthor_device *ptdev) From ea78ec98265339997959eba3c9d764317614675a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:30 +0100 Subject: [PATCH 65/94] drm/panthor: Expose the selected coherency protocol to the UMD If we want to be able to skip CPU cache maintenance operations on CPU-cached mappings, the UMD needs to know the kind of coherency in place. Add a field to drm_panthor_gpu_info to do that. We can re-use a padding field for that since this object is write-only from the KMD perspective, and the UMD should just ignore it. v2: - New commit v3: - Make coherency protocol a real enum, not a bitmask - Add BUILD_BUG_ON()s to make sure the values in panthor_regs.h and those exposed through the uAPI match v4: - Add Steve's R-b v5: - No changes v6: - No changes v7: - Fix kernel doc v8: - No changes Reviewed-by: Steven Price Reviewed-by: Karunika Choo Link: https://patch.msgid.link/20251208100841.730527-4-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_device.c | 10 +++++- drivers/gpu/drm/panthor/panthor_gpu.c | 2 +- include/uapi/drm/panthor_drm.h | 39 ++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index 2979ee0e52c2..54fbb1aa07c5 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -28,6 +28,12 @@ static int panthor_gpu_coherency_init(struct panthor_device *ptdev) { + BUILD_BUG_ON(GPU_COHERENCY_NONE != DRM_PANTHOR_GPU_COHERENCY_NONE); + BUILD_BUG_ON(GPU_COHERENCY_ACE_LITE != DRM_PANTHOR_GPU_COHERENCY_ACE_LITE); + BUILD_BUG_ON(GPU_COHERENCY_ACE != DRM_PANTHOR_GPU_COHERENCY_ACE); + + /* Start with no coherency, and update it if the device is flagged coherent. */ + ptdev->gpu_info.selected_coherency = GPU_COHERENCY_NONE; ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT; if (!ptdev->coherent) @@ -37,8 +43,10 @@ static int panthor_gpu_coherency_init(struct panthor_device *ptdev) * ACE protocol has never been supported for command stream frontend GPUs. */ if ((gpu_read(ptdev, GPU_COHERENCY_FEATURES) & - GPU_COHERENCY_PROT_BIT(ACE_LITE))) + GPU_COHERENCY_PROT_BIT(ACE_LITE))) { + ptdev->gpu_info.selected_coherency = GPU_COHERENCY_ACE_LITE; return 0; + } drm_err(&ptdev->base, "Coherency not supported by the device"); return -ENOTSUPP; diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c index ff5231269518..057e167468d0 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.c +++ b/drivers/gpu/drm/panthor/panthor_gpu.c @@ -51,7 +51,7 @@ struct panthor_gpu { static void panthor_gpu_coherency_set(struct panthor_device *ptdev) { gpu_write(ptdev, GPU_COHERENCY_PROTOCOL, - ptdev->coherent ? GPU_COHERENCY_ACE_LITE : GPU_COHERENCY_NONE); + ptdev->gpu_info.selected_coherency); } static void panthor_gpu_l2_config_set(struct panthor_device *ptdev) diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 467d365ed7ba..28cf9e878db6 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -245,6 +245,26 @@ enum drm_panthor_dev_query_type { DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO, }; +/** + * enum drm_panthor_gpu_coherency: Type of GPU coherency + */ +enum drm_panthor_gpu_coherency { + /** + * @DRM_PANTHOR_GPU_COHERENCY_ACE_LITE: ACE Lite coherency. + */ + DRM_PANTHOR_GPU_COHERENCY_ACE_LITE = 0, + + /** + * @DRM_PANTHOR_GPU_COHERENCY_ACE: ACE coherency. + */ + DRM_PANTHOR_GPU_COHERENCY_ACE = 1, + + /** + * @DRM_PANTHOR_GPU_COHERENCY_NONE: No coherency. + */ + DRM_PANTHOR_GPU_COHERENCY_NONE = 31, +}; + /** * struct drm_panthor_gpu_info - GPU information * @@ -301,7 +321,16 @@ struct drm_panthor_gpu_info { */ __u32 thread_max_barrier_size; - /** @coherency_features: Coherency features. */ + /** + * @coherency_features: Coherency features. + * + * Combination of drm_panthor_gpu_coherency flags. + * + * Note that this is just what the coherency protocols supported by the + * GPU, but the actual coherency in place depends on the SoC + * integration and is reflected by + * drm_panthor_gpu_info::selected_coherency. + */ __u32 coherency_features; /** @texture_features: Texture features. */ @@ -310,8 +339,12 @@ struct drm_panthor_gpu_info { /** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */ __u32 as_present; - /** @pad0: MBZ. */ - __u32 pad0; + /** + * @select_coherency: Coherency selected for this device. + * + * One of drm_panthor_gpu_coherency. + */ + __u32 selected_coherency; /** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */ __u64 shader_present; From e06177ec7a36391c66216b55b7c112d5ba8c4cc1 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:31 +0100 Subject: [PATCH 66/94] drm/panthor: Add a PANTHOR_BO_SYNC ioctl This will be used by the UMD to synchronize CPU-cached mappings when the UMD can't do it directly (no usermode cache maintenance instruction on Arm32). v2: - Change the flags so they better match the drm_gem_shmem_sync() semantics v3: - Add Steve's R-b v4: - No changes v5: - Drop Steve's R-b (the semantics changes call for a new review) v6: - Drop ret initialization in panthor_ioctl_bo_sync() - Bail out early in panthor_ioctl_bo_sync() if ops.count is zero - Drop unused PANTHOR_BO_SYNC_OP_FLAGS definition v7: - Hand-roll the sync logic (was previously provided by gem_shmem) v8: - Collect R-b Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-5-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_drv.c | 41 ++++++++++++- drivers/gpu/drm/panthor/panthor_gem.c | 85 +++++++++++++++++++++++++++ drivers/gpu/drm/panthor/panthor_gem.h | 2 + include/uapi/drm/panthor_drm.h | 52 ++++++++++++++++ 4 files changed, 179 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 73d26e17e2a2..2a9f1feac57a 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -177,7 +177,8 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride, PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \ - PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs)) + PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs), \ + PANTHOR_UOBJ_DECL(struct drm_panthor_bo_sync_op, size)) /** * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object. @@ -1396,6 +1397,43 @@ static int panthor_ioctl_set_user_mmio_offset(struct drm_device *ddev, return 0; } +static int panthor_ioctl_bo_sync(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panthor_bo_sync *args = data; + struct drm_panthor_bo_sync_op *ops; + struct drm_gem_object *obj; + int ret; + + if (!args->ops.count) + return 0; + + ret = PANTHOR_UOBJ_GET_ARRAY(ops, &args->ops); + if (ret) + return ret; + + for (u32 i = 0; i < args->ops.count; i++) { + obj = drm_gem_object_lookup(file, ops[i].handle); + if (!obj) { + ret = -ENOENT; + goto err_ops; + } + + ret = panthor_gem_sync(obj, ops[i].type, ops[i].offset, + ops[i].size); + + drm_gem_object_put(obj); + + if (ret) + goto err_ops; + } + +err_ops: + kvfree(ops); + + return ret; +} + static int panthor_open(struct drm_device *ddev, struct drm_file *file) { @@ -1470,6 +1508,7 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW), PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW), PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, DRM_RENDER_ALLOW), + PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW), }; static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 0de37733a2ef..69ee30603e0a 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -465,6 +465,91 @@ panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label) panthor_gem_bo_set_label(bo->obj, str); } +int +panthor_gem_sync(struct drm_gem_object *obj, u32 type, + u64 offset, u64 size) +{ + struct panthor_gem_object *bo = to_panthor_bo(obj); + struct drm_gem_shmem_object *shmem = &bo->base; + const struct drm_device *dev = shmem->base.dev; + struct sg_table *sgt; + struct scatterlist *sgl; + unsigned int count; + + /* Make sure the range is in bounds. */ + if (offset + size < offset || offset + size > shmem->base.size) + return -EINVAL; + + /* Disallow CPU-cache maintenance on imported buffers. */ + if (drm_gem_is_imported(&shmem->base)) + return -EINVAL; + + switch (type) { + case DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH: + case DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: + break; + + default: + return -EINVAL; + } + + /* Don't bother if it's WC-mapped */ + if (shmem->map_wc) + return 0; + + /* Nothing to do if the size is zero. */ + if (size == 0) + return 0; + + sgt = drm_gem_shmem_get_pages_sgt(shmem); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + for_each_sgtable_dma_sg(sgt, sgl, count) { + if (size == 0) + break; + + dma_addr_t paddr = sg_dma_address(sgl); + size_t len = sg_dma_len(sgl); + + if (len <= offset) { + offset -= len; + continue; + } + + paddr += offset; + len -= offset; + len = min_t(size_t, len, size); + size -= len; + offset = 0; + + /* It's unclear whether dma_sync_xxx() is the right API to do CPU + * cache maintenance given an IOMMU can register their own + * implementation doing more than just CPU cache flushes/invalidation, + * and what we really care about here is CPU caches only, but that's + * the best we have that is both arch-agnostic and does at least the + * CPU cache maintenance on a tuple. + * + * Also, I wish we could do a single + * + * dma_sync_single_for_device(BIDIR) + * + * and get a flush+invalidate, but that's not how it's implemented + * in practice (at least on arm64), so we have to make it + * + * dma_sync_single_for_device(TO_DEVICE) + * dma_sync_single_for_cpu(FROM_DEVICE) + * + * for the flush+invalidate case. + */ + dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE); + if (type == DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE) + dma_sync_single_for_cpu(dev->dev, paddr, len, DMA_FROM_DEVICE); + } + + return 0; +} + #ifdef CONFIG_DEBUG_FS struct gem_size_totals { size_t size; diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h index 262c77a4d3c1..22519c570b5a 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.h +++ b/drivers/gpu/drm/panthor/panthor_gem.h @@ -148,6 +148,8 @@ panthor_gem_create_with_handle(struct drm_file *file, void panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label); void panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label); +int panthor_gem_sync(struct drm_gem_object *obj, + u32 type, u64 offset, u64 size); struct drm_gem_object * panthor_gem_prime_import(struct drm_device *dev, diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 28cf9e878db6..9f810305db6e 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -144,6 +144,9 @@ enum drm_panthor_ioctl_id { * pgoff_t size. */ DRM_PANTHOR_SET_USER_MMIO_OFFSET, + + /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */ + DRM_PANTHOR_BO_SYNC, }; /** @@ -1073,6 +1076,53 @@ struct drm_panthor_set_user_mmio_offset { __u64 offset; }; +/** + * enum drm_panthor_bo_sync_op_type - BO sync type + */ +enum drm_panthor_bo_sync_op_type { + /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH: Flush CPU caches. */ + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH = 0, + + /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: Flush and invalidate CPU caches. */ + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE = 1, +}; + +/** + * struct drm_panthor_bo_sync_op - BO map sync op + */ +struct drm_panthor_bo_sync_op { + /** @handle: Handle of the buffer object to sync. */ + __u32 handle; + + /** @type: Type of operation. */ + __u32 type; + + /** + * @offset: Offset into the BO at which the sync range starts. + * + * This will be rounded down to the nearest cache line as needed. + */ + __u64 offset; + + /** + * @size: Size of the range to sync + * + * @size + @offset will be rounded up to the nearest cache line as + * needed. + */ + __u64 size; +}; + +/** + * struct drm_panthor_bo_sync - BO map sync request + */ +struct drm_panthor_bo_sync { + /** + * @ops: Array of struct drm_panthor_bo_sync_op sync operations. + */ + struct drm_panthor_obj_array ops; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1119,6 +1169,8 @@ enum { DRM_IOCTL_PANTHOR(WR, BO_SET_LABEL, bo_set_label), DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET = DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset), + DRM_IOCTL_PANTHOR_BO_SYNC = + DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync), }; #if defined(__cplusplus) From c146c82f862e9c7e602a908891c3adf992ef2beb Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:32 +0100 Subject: [PATCH 67/94] drm/panthor: Add an ioctl to query BO flags This is useful when importing BOs, so we can know about cacheability and flush the caches when needed. We can also know when the buffer comes from a different subsystem and take proper actions (avoid CPU mappings, or do kernel-based syncs instead of userland cache flushes). v2: - New commit v3: - Add Steve's R-b v4: - No changes v5: - No changes v6: - No changes v7: - No changes v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-6-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_drv.c | 24 +++++++++++ include/uapi/drm/panthor_drm.h | 57 +++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 2a9f1feac57a..67d694d00ccb 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1434,6 +1434,29 @@ static int panthor_ioctl_bo_sync(struct drm_device *ddev, void *data, return ret; } +static int panthor_ioctl_bo_query_info(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panthor_bo_query_info *args = data; + struct panthor_gem_object *bo; + struct drm_gem_object *obj; + + obj = drm_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + bo = to_panthor_bo(obj); + args->pad = 0; + args->create_flags = bo->flags; + + args->extra_flags = 0; + if (drm_gem_is_imported(&bo->base.base)) + args->extra_flags |= DRM_PANTHOR_BO_IS_IMPORTED; + + drm_gem_object_put(obj); + return 0; +} + static int panthor_open(struct drm_device *ddev, struct drm_file *file) { @@ -1509,6 +1532,7 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW), PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, DRM_RENDER_ALLOW), PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW), + PANTHOR_IOCTL(BO_QUERY_INFO, bo_query_info, DRM_RENDER_ALLOW), }; static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 9f810305db6e..39d5ce815742 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -147,6 +147,13 @@ enum drm_panthor_ioctl_id { /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */ DRM_PANTHOR_BO_SYNC, + + /** + * @DRM_PANTHOR_BO_QUERY_INFO: Query information about a BO. + * + * This is useful for imported BOs. + */ + DRM_PANTHOR_BO_QUERY_INFO, }; /** @@ -1123,6 +1130,54 @@ struct drm_panthor_bo_sync { struct drm_panthor_obj_array ops; }; +/** + * enum drm_panthor_bo_extra_flags - Set of flags returned on a BO_QUERY_INFO request + * + * Those are flags reflecting BO properties that are not directly coming from the flags + * passed are creation time, or information on BOs that were imported from other drivers. + */ +enum drm_panthor_bo_extra_flags { + /** + * @DRM_PANTHOR_BO_IS_IMPORTED: BO has been imported from an external driver. + * + * Note that imported dma-buf handles are not flagged as imported if they + * where exported by panthor. Only buffers that are coming from other drivers + * (dma heaps, other GPUs, display controllers, V4L, ...). + * + * It's also important to note that all imported BOs are mapped cached and can't + * be considered IO-coherent even if the GPU is. This means they require explicit + * syncs that must go through the DRM_PANTHOR_BO_SYNC ioctl (userland cache + * maintenance is not allowed in that case, because extra operations might be + * needed to make changes visible to the CPU/device, like buffer migration when the + * exporter is a GPU with its own VRAM). + */ + DRM_PANTHOR_BO_IS_IMPORTED = (1 << 0), +}; + +/** + * struct drm_panthor_bo_query_info - Query BO info + */ +struct drm_panthor_bo_query_info { + /** @handle: Handle of the buffer object to query flags on. */ + __u32 handle; + + /** + * @extra_flags: Combination of enum drm_panthor_bo_extra_flags flags. + */ + __u32 extra_flags; + + /** + * @create_flags: Flags passed at creation time. + * + * Combination of enum drm_panthor_bo_flags flags. + * Will be zero if the buffer comes from a different driver. + */ + __u32 create_flags; + + /** @pad: Will be zero on return. */ + __u32 pad; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1171,6 +1226,8 @@ enum { DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset), DRM_IOCTL_PANTHOR_BO_SYNC = DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync), + DRM_IOCTL_PANTHOR_BO_QUERY_INFO = + DRM_IOCTL_PANTHOR(WR, BO_QUERY_INFO, bo_query_info), }; #if defined(__cplusplus) From cd2c9c3015e642e28e1b528c52c06a79f350d600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Mon, 8 Dec 2025 11:08:33 +0100 Subject: [PATCH 68/94] drm/panthor: Add flag to map GEM object Write-Back Cacheable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be used by the UMD to optimize CPU accesses to buffers that are frequently read by the CPU, or on which the access pattern makes non-cacheable mappings inefficient. Mapping buffers CPU-cached implies taking care of the CPU cache maintenance in the UMD, unless the GPU is IO coherent. v2: - Add more to the commit message - Tweak the doc - Make sure we sync the section of the BO pointing to the CS syncobj before we read its seqno v3: - Fix formatting/spelling issues v4: - Add Steve's R-b v5: - Drop Steve's R-b (changes in the ioctl semantics requiring new review) v6: - Fix the uAPI doc - Fix inverted logic in some comment v7: - No changes v8: - Collect R-b Signed-off-by: Loïc Molinari Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-7-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_drv.c | 7 ++++- drivers/gpu/drm/panthor/panthor_gem.c | 37 +++++++++++++++++++++++-- drivers/gpu/drm/panthor/panthor_sched.c | 18 ++++++++++-- include/uapi/drm/panthor_drm.h | 9 ++++++ 4 files changed, 66 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 67d694d00ccb..598c7ad6f2b6 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -902,7 +902,8 @@ static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data, return panthor_vm_pool_destroy_vm(pfile->vms, args->id); } -#define PANTHOR_BO_FLAGS DRM_PANTHOR_BO_NO_MMAP +#define PANTHOR_BO_FLAGS (DRM_PANTHOR_BO_NO_MMAP | \ + DRM_PANTHOR_BO_WB_MMAP) static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data, struct drm_file *file) @@ -921,6 +922,10 @@ static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data, goto out_dev_exit; } + if ((args->flags & DRM_PANTHOR_BO_NO_MMAP) && + (args->flags & DRM_PANTHOR_BO_WB_MMAP)) + return -EINVAL; + if (args->exclusive_vm_id) { vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id); if (!vm) { diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 69ee30603e0a..360d05abe891 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -77,6 +77,39 @@ static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u static void panthor_gem_debugfs_bo_init(struct panthor_gem_object *bo) {} #endif +static bool +should_map_wc(struct panthor_gem_object *bo, struct panthor_vm *exclusive_vm) +{ + struct panthor_device *ptdev = container_of(bo->base.base.dev, struct panthor_device, base); + + /* We can't do uncached mappings if the device is coherent, + * because the zeroing done by the shmem layer at page allocation + * time happens on a cached mapping which isn't CPU-flushed (at least + * not on Arm64 where the flush is deferred to PTE setup time, and + * only done conditionally based on the mapping permissions). We can't + * rely on dma_map_sgtable()/dma_sync_sgtable_for_xxx() either to flush + * those, because they are NOPed if dma_dev_coherent() returns true. + * + * FIXME: Note that this problem is going to pop up again when we + * decide to support mapping buffers with the NO_MMAP flag as + * non-shareable (AKA buffers accessed only by the GPU), because we + * need the same CPU flush to happen after page allocation, otherwise + * there's a risk of data leak or late corruption caused by a dirty + * cacheline being evicted. At this point we'll need a way to force + * CPU cache maintenance regardless of whether the device is coherent + * or not. + */ + if (ptdev->coherent) + return false; + + /* Cached mappings are explicitly requested, so no write-combine. */ + if (bo->flags & DRM_PANTHOR_BO_WB_MMAP) + return false; + + /* The default is write-combine. */ + return true; +} + static void panthor_gem_free_object(struct drm_gem_object *obj) { struct panthor_gem_object *bo = to_panthor_bo(obj); @@ -163,6 +196,7 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, bo = to_panthor_bo(&obj->base); kbo->obj = &obj->base; bo->flags = bo_flags; + bo->base.map_wc = should_map_wc(bo, vm); bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm); drm_gem_object_get(bo->exclusive_vm_root_gem); bo->base.base.resv = bo->exclusive_vm_root_gem->resv; @@ -363,7 +397,6 @@ static const struct drm_gem_object_funcs panthor_gem_funcs = { */ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size) { - struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); struct panthor_gem_object *obj; obj = kzalloc(sizeof(*obj), GFP_KERNEL); @@ -371,7 +404,6 @@ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t return ERR_PTR(-ENOMEM); obj->base.base.funcs = &panthor_gem_funcs; - obj->base.map_wc = !ptdev->coherent; mutex_init(&obj->label.lock); panthor_gem_debugfs_bo_init(obj); @@ -406,6 +438,7 @@ panthor_gem_create_with_handle(struct drm_file *file, bo = to_panthor_bo(&shmem->base); bo->flags = flags; + bo->base.map_wc = should_map_wc(bo, exclusive_vm); if (exclusive_vm) { bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm); diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 33b9ef537e35..5abc5744e5ac 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -863,8 +863,11 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue struct iosys_map map; int ret; - if (queue->syncwait.kmap) - return queue->syncwait.kmap + queue->syncwait.offset; + if (queue->syncwait.kmap) { + bo = container_of(queue->syncwait.obj, + struct panthor_gem_object, base.base); + goto out_sync; + } bo = panthor_vm_get_bo_for_va(group->vm, queue->syncwait.gpu_va, @@ -881,6 +884,17 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap)) goto err_put_syncwait_obj; +out_sync: + /* Make sure the CPU caches are invalidated before the seqno is read. + * drm_gem_shmem_sync() is a NOP if map_wc=true, so no need to check + * it here. + */ + panthor_gem_sync(&bo->base.base, queue->syncwait.offset, + queue->syncwait.sync64 ? + sizeof(struct panthor_syncobj_64b) : + sizeof(struct panthor_syncobj_32b), + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE); + return queue->syncwait.kmap + queue->syncwait.offset; err_put_syncwait_obj: diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 39d5ce815742..e238c6264fa1 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -681,6 +681,15 @@ struct drm_panthor_vm_get_state { enum drm_panthor_bo_flags { /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */ DRM_PANTHOR_BO_NO_MMAP = (1 << 0), + + /** + * @DRM_PANTHOR_BO_WB_MMAP: Force "Write-Back Cacheable" CPU mapping. + * + * CPU map the buffer object in userspace by forcing the "Write-Back + * Cacheable" cacheability attribute. The mapping otherwise uses the + * "Non-Cacheable" attribute if the GPU is not IO coherent. + */ + DRM_PANTHOR_BO_WB_MMAP = (1 << 1), }; /** From ae09426f47ee64bd3f34fbe815ff61eb94cc3a6b Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 11:08:34 +0100 Subject: [PATCH 69/94] drm/panthor: Bump the driver version to 1.7 Bump the driver version to reflect the new cached-CPU mapping capability. v2: - Quickly describe what the new version exposes in the commit message v3: - Add Steve's R-b v4: - No changes v5: - No changes v6: - No changes v7: - No changes v8: - Bump to 1.7 (1.6 was picked in the meantime) Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-8-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_drv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 598c7ad6f2b6..98d4e8d867ed 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1674,6 +1674,10 @@ static void panthor_debugfs_init(struct drm_minor *minor) * - 1.4 - adds DRM_IOCTL_PANTHOR_BO_SET_LABEL ioctl * - 1.5 - adds DRM_PANTHOR_SET_USER_MMIO_OFFSET ioctl * - 1.6 - enables GLB_COUNTER_EN + * - 1.7 - adds DRM_PANTHOR_BO_WB_MMAP flag + * - adds DRM_IOCTL_PANTHOR_BO_SYNC ioctl + * - adds DRM_IOCTL_PANTHOR_BO_QUERY_INFO ioctl + * - adds drm_panthor_gpu_info::selected_coherency */ static const struct drm_driver panthor_drm_driver = { .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ | @@ -1687,7 +1691,7 @@ static const struct drm_driver panthor_drm_driver = { .name = "panthor", .desc = "Panthor DRM driver", .major = 1, - .minor = 6, + .minor = 7, .gem_create_object = panthor_gem_create_object, .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, From 3ae6637378e64e56634ce257cb354636ada96d69 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:35 +0100 Subject: [PATCH 70/94] drm/panfrost: Provide a custom dma_buf implementation Before we introduce cached CPU mappings, we want a dma_buf implementation satisfying synchronization requests around CPU accesses coming from a dma_buf exported by our driver. Let's provide our own implementation relying on the default gem_shmem_prime helpers designed for that purpose. v5: - New patch v6: - Collect R-b v7: - Hand-roll our own dma_buf boilerplate v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-9-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_drv.c | 1 + drivers/gpu/drm/panfrost/panfrost_gem.c | 123 ++++++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.h | 3 + 3 files changed, 127 insertions(+) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 4f5f19eda587..199073cc7d3f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -852,6 +852,7 @@ static const struct drm_driver panfrost_drm_driver = { .minor = 5, .gem_create_object = panfrost_gem_create_object, + .gem_prime_import = panfrost_gem_prime_import, .gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table, #ifdef CONFIG_DEBUG_FS .debugfs_init = panfrost_debugfs_init, diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index c1688a542ec2..02721863b6ae 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -277,6 +277,128 @@ static size_t panfrost_gem_rss(struct drm_gem_object *obj) return 0; } +static struct sg_table * +panfrost_gem_prime_map_dma_buf(struct dma_buf_attachment *attach, + enum dma_data_direction dir) +{ + struct sg_table *sgt = drm_gem_map_dma_buf(attach, dir); + + if (!IS_ERR(sgt)) + attach->priv = sgt; + + return sgt; +} + +static void +panfrost_gem_prime_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction dir) +{ + attach->priv = NULL; + drm_gem_unmap_dma_buf(attach, sgt, dir); +} + +static int +panfrost_gem_prime_begin_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction dir) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct drm_device *dev = obj->dev; + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + struct dma_buf_attachment *attach; + + dma_resv_lock(obj->resv, NULL); + if (shmem->sgt) + dma_sync_sgtable_for_cpu(dev->dev, shmem->sgt, dir); + + if (shmem->vaddr) + invalidate_kernel_vmap_range(shmem->vaddr, shmem->base.size); + + list_for_each_entry(attach, &dma_buf->attachments, node) { + struct sg_table *sgt = attach->priv; + + if (sgt) + dma_sync_sgtable_for_cpu(attach->dev, sgt, dir); + } + dma_resv_unlock(obj->resv); + + return 0; +} + +static int +panfrost_gem_prime_end_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction dir) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct drm_device *dev = obj->dev; + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + struct dma_buf_attachment *attach; + + dma_resv_lock(obj->resv, NULL); + list_for_each_entry(attach, &dma_buf->attachments, node) { + struct sg_table *sgt = attach->priv; + + if (sgt) + dma_sync_sgtable_for_device(attach->dev, sgt, dir); + } + + if (shmem->vaddr) + flush_kernel_vmap_range(shmem->vaddr, shmem->base.size); + + if (shmem->sgt) + dma_sync_sgtable_for_device(dev->dev, shmem->sgt, dir); + + dma_resv_unlock(obj->resv); + return 0; +} + +static const struct dma_buf_ops panfrost_dma_buf_ops = { + .attach = drm_gem_map_attach, + .detach = drm_gem_map_detach, + .map_dma_buf = panfrost_gem_prime_map_dma_buf, + .unmap_dma_buf = panfrost_gem_prime_unmap_dma_buf, + .release = drm_gem_dmabuf_release, + .mmap = drm_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, + .begin_cpu_access = panfrost_gem_prime_begin_cpu_access, + .end_cpu_access = panfrost_gem_prime_end_cpu_access, +}; + +static struct dma_buf * +panfrost_gem_prime_export(struct drm_gem_object *obj, int flags) +{ + struct drm_device *dev = obj->dev; + struct dma_buf_export_info exp_info = { + .exp_name = KBUILD_MODNAME, + .owner = THIS_MODULE, + .ops = &panfrost_dma_buf_ops, + .size = obj->size, + .flags = flags, + .priv = obj, + .resv = obj->resv, + }; + + return drm_gem_dmabuf_export(dev, &exp_info); +} + +struct drm_gem_object * +panfrost_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct drm_gem_object *obj = dma_buf->priv; + + if (dma_buf->ops == &panfrost_dma_buf_ops && obj->dev == dev) { + /* Importing dmabuf exported from our own gem increases + * refcount on gem itself instead of f_count of dmabuf. + */ + drm_gem_object_get(obj); + return obj; + } + + return drm_gem_prime_import(dev, dma_buf); +} + static const struct drm_gem_object_funcs panfrost_gem_funcs = { .free = panfrost_gem_free_object, .open = panfrost_gem_open, @@ -285,6 +407,7 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = { .pin = panfrost_gem_pin, .unpin = drm_gem_shmem_object_unpin, .get_sg_table = drm_gem_shmem_object_get_sg_table, + .export = panfrost_gem_prime_export, .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = drm_gem_shmem_object_mmap, diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index 1a62529ff06f..c2470e8255ab 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -132,6 +132,9 @@ struct drm_gem_object * panfrost_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); +struct drm_gem_object * +panfrost_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); struct panfrost_gem_object * panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags); From 2396d65d94fc75d39f096b9777f9edc9c8e677c1 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:36 +0100 Subject: [PATCH 71/94] drm/panfrost: Expose the selected coherency protocol to the UMD Will be needed if we want to skip CPU cache maintenance operations when the GPU can snoop CPU caches. v2: - New commit v3: - Fix the coherency values (enum instead of bitmask) v4: - Fix init/test on coherency_features v5: - No changes v6: - Collect R-b v7: - No changes v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-10-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_device.h | 1 + drivers/gpu/drm/panfrost/panfrost_drv.c | 1 + drivers/gpu/drm/panfrost/panfrost_gpu.c | 26 +++++++++++++++++++--- drivers/gpu/drm/panfrost/panfrost_regs.h | 10 +++++++-- include/uapi/drm/panfrost_drm.h | 7 ++++++ 5 files changed, 40 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h index e61c4329fd07..0f3992412205 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.h +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -79,6 +79,7 @@ struct panfrost_features { u32 thread_max_workgroup_sz; u32 thread_max_barrier_sz; u32 coherency_features; + u32 selected_coherency; u32 afbc_features; u32 texture_features[4]; u32 js_features[16]; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 199073cc7d3f..b2c3f6c81be0 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -95,6 +95,7 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct PANFROST_FEATURE_ARRAY(JS_FEATURES, js_features, 15); PANFROST_FEATURE(NR_CORE_GROUPS, nr_core_groups); PANFROST_FEATURE(THREAD_TLS_ALLOC, thread_tls_alloc); + PANFROST_FEATURE(SELECTED_COHERENCY, selected_coherency); case DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP: ret = panfrost_ioctl_query_timestamp(pfdev, ¶m->value); diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index 483d278eb154..7d555e63e21a 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -159,8 +159,8 @@ static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev) pfdev->features.revision >= 0x2000) quirks |= JM_MAX_JOB_THROTTLE_LIMIT << JM_JOB_THROTTLE_LIMIT_SHIFT; else if (panfrost_model_eq(pfdev, 0x6000) && - pfdev->features.coherency_features == COHERENCY_ACE) - quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) << + pfdev->features.coherency_features == BIT(COHERENCY_ACE)) + quirks |= (BIT(COHERENCY_ACE_LITE) | BIT(COHERENCY_ACE)) << JM_FORCE_COHERENCY_FEATURES_SHIFT; if (panfrost_has_hw_feature(pfdev, HW_FEATURE_IDVS_GROUP_SIZE)) @@ -263,7 +263,27 @@ static int panfrost_gpu_init_features(struct panfrost_device *pfdev) pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS); pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE); pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE); - pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES); + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_COHERENCY_REG)) + pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES); + else + pfdev->features.coherency_features = BIT(COHERENCY_ACE_LITE); + + BUILD_BUG_ON(COHERENCY_ACE_LITE != DRM_PANFROST_GPU_COHERENCY_ACE_LITE); + BUILD_BUG_ON(COHERENCY_ACE != DRM_PANFROST_GPU_COHERENCY_ACE); + BUILD_BUG_ON(COHERENCY_NONE != DRM_PANFROST_GPU_COHERENCY_NONE); + + if (!pfdev->coherent) { + pfdev->features.selected_coherency = COHERENCY_NONE; + } else if (pfdev->features.coherency_features & BIT(COHERENCY_ACE)) { + pfdev->features.selected_coherency = COHERENCY_ACE; + } else if (pfdev->features.coherency_features & BIT(COHERENCY_ACE_LITE)) { + pfdev->features.selected_coherency = COHERENCY_ACE_LITE; + } else { + drm_WARN(&pfdev->base, true, "No known coherency protocol supported"); + pfdev->features.selected_coherency = COHERENCY_NONE; + } + pfdev->features.afbc_features = gpu_read(pfdev, GPU_AFBC_FEATURES); for (i = 0; i < 4; i++) pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i)); diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h index 2b8f1617b836..ee15f6bf6e6f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_regs.h +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h @@ -102,9 +102,15 @@ #define GPU_L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ #define GPU_L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ +/* GPU_COHERENCY_FEATURES is a bitmask of BIT(COHERENCY_xxx) values encoding the + * set of supported coherency protocols. GPU_COHERENCY_ENABLE is passed a + * COHERENCY_xxx value. + */ #define GPU_COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ -#define COHERENCY_ACE_LITE BIT(0) -#define COHERENCY_ACE BIT(1) +#define GPU_COHERENCY_ENABLE 0x304 /* (RW) Coherency protocol selection */ +#define COHERENCY_ACE_LITE 0 +#define COHERENCY_ACE 1 +#define COHERENCY_NONE 31 #define GPU_STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ #define GPU_STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 1956431bb391..0c59714ae42b 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -228,6 +228,13 @@ enum drm_panfrost_param { DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP, DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY, DRM_PANFROST_PARAM_ALLOWED_JM_CTX_PRIORITIES, + DRM_PANFROST_PARAM_SELECTED_COHERENCY, +}; + +enum drm_panfrost_gpu_coherency { + DRM_PANFROST_GPU_COHERENCY_ACE_LITE = 0, + DRM_PANFROST_GPU_COHERENCY_ACE = 1, + DRM_PANFROST_GPU_COHERENCY_NONE = 31, }; struct drm_panfrost_get_param { From 7be45f5489769520aa9276137d0f1f543fb81286 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 11:08:37 +0100 Subject: [PATCH 72/94] drm/panfrost: Add a PANFROST_SYNC_BO ioctl This will be used by the UMD to synchronize CPU-cached mappings when the UMD can't do it directly (no usermode cache maintenance instruction on Arm32). v2: - Add more to the commit message - Change the flags to better match the drm_gem_shmem_sync semantics v3: - Add Steve's R-b v4: - No changes v5: - Drop Steve's R-b (semantics changes requiring a new review) v6: - Bail out early in panfrost_ioctl_sync_bo() if op_count is zero v7: - Hand-roll our own bo_sync() helper v8: - Collect R-b Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-11-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_drv.c | 51 +++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.c | 84 +++++++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.h | 2 + include/uapi/drm/panfrost_drm.h | 45 +++++++++++++ 4 files changed, 182 insertions(+) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index b2c3f6c81be0..450204fdbe45 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -580,6 +580,56 @@ static int panfrost_ioctl_jm_ctx_destroy(struct drm_device *dev, void *data, return panfrost_jm_ctx_destroy(file, args->handle); } +static int panfrost_ioctl_sync_bo(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panfrost_sync_bo *args = data; + struct drm_panfrost_bo_sync_op *ops; + struct drm_gem_object *obj; + int ret; + u32 i; + + if (args->pad) + return -EINVAL; + + if (!args->op_count) + return 0; + + ops = kvmalloc_array(args->op_count, sizeof(*ops), GFP_KERNEL); + if (!ops) { + DRM_DEBUG("Failed to allocate incoming BO sync ops array\n"); + return -ENOMEM; + } + + if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops, + args->op_count * sizeof(*ops))) { + DRM_DEBUG("Failed to copy in BO sync ops\n"); + ret = -EFAULT; + goto err_ops; + } + + for (i = 0; i < args->op_count; i++) { + obj = drm_gem_object_lookup(file, ops[i].handle); + if (!obj) { + ret = -ENOENT; + goto err_ops; + } + + ret = panfrost_gem_sync(obj, ops[i].type, + ops[i].offset, ops[i].size); + + drm_gem_object_put(obj); + + if (ret) + goto err_ops; + } + +err_ops: + kvfree(ops); + + return ret; +} + int panfrost_unstable_ioctl_check(void) { if (!unstable_ioctls) @@ -649,6 +699,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { PANFROST_IOCTL(SET_LABEL_BO, set_label_bo, DRM_RENDER_ALLOW), PANFROST_IOCTL(JM_CTX_CREATE, jm_ctx_create, DRM_RENDER_ALLOW), PANFROST_IOCTL(JM_CTX_DESTROY, jm_ctx_destroy, DRM_RENDER_ALLOW), + PANFROST_IOCTL(SYNC_BO, sync_bo, DRM_RENDER_ALLOW), }; static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev, diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 02721863b6ae..62c9e3a6b0e9 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -507,6 +507,90 @@ panfrost_gem_set_label(struct drm_gem_object *obj, const char *label) kfree_const(old_label); } +int +panfrost_gem_sync(struct drm_gem_object *obj, u32 type, u32 offset, u32 size) +{ + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + struct drm_gem_shmem_object *shmem = &bo->base; + const struct drm_device *dev = shmem->base.dev; + struct sg_table *sgt; + struct scatterlist *sgl; + unsigned int count; + + /* Make sure the range is in bounds. */ + if (offset + size < offset || offset + size > shmem->base.size) + return -EINVAL; + + /* Disallow CPU-cache maintenance on imported buffers. */ + if (drm_gem_is_imported(&shmem->base)) + return -EINVAL; + + switch (type) { + case PANFROST_BO_SYNC_CPU_CACHE_FLUSH: + case PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: + break; + + default: + return -EINVAL; + } + + /* Don't bother if it's WC-mapped */ + if (shmem->map_wc) + return 0; + + /* Nothing to do if the size is zero. */ + if (size == 0) + return 0; + + sgt = drm_gem_shmem_get_pages_sgt(shmem); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + for_each_sgtable_dma_sg(sgt, sgl, count) { + if (size == 0) + break; + + dma_addr_t paddr = sg_dma_address(sgl); + size_t len = sg_dma_len(sgl); + + if (len <= offset) { + offset -= len; + continue; + } + + paddr += offset; + len -= offset; + len = min_t(size_t, len, size); + size -= len; + offset = 0; + + /* It's unclear whether dma_sync_xxx() is the right API to do CPU + * cache maintenance given an IOMMU can register their own + * implementation doing more than just CPU cache flushes/invalidation, + * and what we really care about here is CPU caches only, but that's + * the best we have that is both arch-agnostic and does at least the + * CPU cache maintenance on a tuple. + * + * Also, I wish we could do a single + * + * dma_sync_single_for_device(BIDIR) + * + * and get a flush+invalidate, but that's not how it's implemented + * in practice (at least on arm64), so we have to make it + * + * dma_sync_single_for_device(TO_DEVICE) + * dma_sync_single_for_cpu(FROM_DEVICE) + * + * for the flush+invalidate case. + */ + dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE); + if (type == PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE) + dma_sync_single_for_cpu(dev->dev, paddr, len, DMA_FROM_DEVICE); + } + + return 0; +} + void panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label) { diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index c2470e8255ab..45e2aa846cc7 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -153,6 +153,8 @@ int panfrost_gem_shrinker_init(struct drm_device *dev); void panfrost_gem_shrinker_cleanup(struct drm_device *dev); void panfrost_gem_set_label(struct drm_gem_object *obj, const char *label); +int panfrost_gem_sync(struct drm_gem_object *obj, u32 type, + u32 offset, u32 size); void panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label); #ifdef CONFIG_DEBUG_FS diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 0c59714ae42b..e194e087a0c8 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -24,6 +24,7 @@ extern "C" { #define DRM_PANFROST_SET_LABEL_BO 0x09 #define DRM_PANFROST_JM_CTX_CREATE 0x0a #define DRM_PANFROST_JM_CTX_DESTROY 0x0b +#define DRM_PANFROST_SYNC_BO 0x0c #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -35,6 +36,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_SET_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SET_LABEL_BO, struct drm_panfrost_set_label_bo) #define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create) #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy) +#define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -308,6 +310,49 @@ struct drm_panfrost_set_label_bo { __u64 label; }; +/* Valid flags to pass to drm_panfrost_bo_sync_op */ +#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH 0 +#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE 1 + +/** + * struct drm_panthor_bo_flush_map_op - BO map sync op + */ +struct drm_panfrost_bo_sync_op { + /** @handle: Handle of the buffer object to sync. */ + __u32 handle; + + /** @type: Type of sync operation. */ + __u32 type; + + /** + * @offset: Offset into the BO at which the sync range starts. + * + * This will be rounded down to the nearest cache line as needed. + */ + __u32 offset; + + /** + * @size: Size of the range to sync + * + * @size + @offset will be rounded up to the nearest cache line as + * needed. + */ + __u32 size; +}; + +/** + * struct drm_panfrost_sync_bo - ioctl argument for syncing BO maps + */ +struct drm_panfrost_sync_bo { + /** Array of struct drm_panfrost_bo_sync_op */ + __u64 ops; + + /** Number of BO sync ops */ + __u32 op_count; + + __u32 pad; +}; + /* Definitions for coredump decoding in user space */ #define PANFROSTDUMP_MAJOR 1 #define PANFROSTDUMP_MINOR 0 From d17592e61fa8e3b2d58df7c4a24abc8ac58b8d3f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:38 +0100 Subject: [PATCH 73/94] drm/panfrost: Add an ioctl to query BO flags This is useful when importing BOs, so we can know about cacheability and flush the caches when needed. v2: - New commit v3: - Add Steve's R-b v4: - No changes v5: - No changes v6: - No changes v7: - No changes v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-12-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_drv.c | 33 +++++++++++++++++++++++++ include/uapi/drm/panfrost_drm.h | 19 ++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 450204fdbe45..d461ecf8829d 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -630,6 +630,38 @@ static int panfrost_ioctl_sync_bo(struct drm_device *ddev, void *data, return ret; } +static int panfrost_ioctl_query_bo_info(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_panfrost_query_bo_info *args = data; + struct drm_gem_object *gem_obj; + struct panfrost_gem_object *bo; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + bo = to_panfrost_bo(gem_obj); + args->pad = 0; + args->create_flags = 0; + args->extra_flags = 0; + + if (drm_gem_is_imported(gem_obj)) { + args->extra_flags |= DRM_PANFROST_BO_IS_IMPORTED; + } else { + if (bo->noexec) + args->create_flags |= PANFROST_BO_NOEXEC; + + if (bo->is_heap) + args->create_flags |= PANFROST_BO_HEAP; + } + + drm_gem_object_put(gem_obj); + return 0; +} + int panfrost_unstable_ioctl_check(void) { if (!unstable_ioctls) @@ -700,6 +732,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { PANFROST_IOCTL(JM_CTX_CREATE, jm_ctx_create, DRM_RENDER_ALLOW), PANFROST_IOCTL(JM_CTX_DESTROY, jm_ctx_destroy, DRM_RENDER_ALLOW), PANFROST_IOCTL(SYNC_BO, sync_bo, DRM_RENDER_ALLOW), + PANFROST_IOCTL(QUERY_BO_INFO, query_bo_info, DRM_RENDER_ALLOW), }; static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev, diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index e194e087a0c8..36ae48ea50d3 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -25,6 +25,7 @@ extern "C" { #define DRM_PANFROST_JM_CTX_CREATE 0x0a #define DRM_PANFROST_JM_CTX_DESTROY 0x0b #define DRM_PANFROST_SYNC_BO 0x0c +#define DRM_PANFROST_QUERY_BO_INFO 0x0d #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -37,6 +38,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create) #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy) #define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo) +#define DRM_IOCTL_PANFROST_QUERY_BO_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_QUERY_BO_INFO, struct drm_panfrost_query_bo_info) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -353,6 +355,23 @@ struct drm_panfrost_sync_bo { __u32 pad; }; +/** BO comes from a different subsystem. */ +#define DRM_PANFROST_BO_IS_IMPORTED (1 << 0) + +struct drm_panfrost_query_bo_info { + /** Handle of the object being queried. */ + __u32 handle; + + /** Extra flags that are not coming from the BO_CREATE ioctl(). */ + __u32 extra_flags; + + /** Flags passed at creation time. */ + __u32 create_flags; + + /** Will be zero on return. */ + __u32 pad; +}; + /* Definitions for coredump decoding in user space */ #define PANFROSTDUMP_MAJOR 1 #define PANFROSTDUMP_MINOR 0 From 62eedf1ccba534b318ca85d3890bf0951b9e0f87 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 11:08:39 +0100 Subject: [PATCH 74/94] drm/panfrost: Add flag to map GEM object Write-Back Cacheable Will be used by the UMD to optimize CPU accesses to buffers that are frequently read by the CPU, or on which the access pattern makes non-cacheable mappings inefficient. Mapping buffers CPU-cached implies taking care of the CPU cache maintenance in the UMD, unless the GPU is IO coherent. v2: - Add more to the commit message v3: - No changes v4: - Fix the map_wc test in panfrost_ioctl_query_bo_info() v5: - Drop Steve's R-b (enough has changed to justify a new review) v6: - Collect R-b v7: - No changes v8: - Fix double drm_gem_object_funcs::export assignment Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-13-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_drv.c | 10 ++++++-- drivers/gpu/drm/panfrost/panfrost_gem.c | 32 +++++++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.h | 5 ++++ include/uapi/drm/panfrost_drm.h | 5 +++- 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index d461ecf8829d..34969179544c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -126,6 +126,10 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct return 0; } +#define PANFROST_BO_FLAGS (PANFROST_BO_NOEXEC | \ + PANFROST_BO_HEAP | \ + PANFROST_BO_WB_MMAP) + static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *file) { @@ -135,8 +139,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct panfrost_gem_mapping *mapping; int ret; - if (!args->size || args->pad || - (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) + if (!args->size || args->pad || (args->flags & ~PANFROST_BO_FLAGS)) return -EINVAL; /* Heaps should never be executable */ @@ -656,6 +659,9 @@ static int panfrost_ioctl_query_bo_info(struct drm_device *dev, void *data, if (bo->is_heap) args->create_flags |= PANFROST_BO_HEAP; + + if (!bo->base.map_wc) + args->create_flags |= PANFROST_BO_WB_MMAP; } drm_gem_object_put(gem_obj); diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 62c9e3a6b0e9..44985b515212 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -444,12 +444,42 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t return &obj->base.base; } +static bool +should_map_wc(struct panfrost_gem_object *bo) +{ + struct panfrost_device *pfdev = to_panfrost_device(bo->base.base.dev); + + /* We can't do uncached mappings if the device is coherent, + * because the zeroing done by the shmem layer at page allocation + * time happens on a cached mapping which isn't CPU-flushed (at least + * not on Arm64 where the flush is deferred to PTE setup time, and + * only done conditionally based on the mapping permissions). We can't + * rely on dma_map_sgtable()/dma_sync_sgtable_for_xxx() either to flush + * those, because they are NOPed if dma_dev_coherent() returns true. + */ + if (pfdev->coherent) + return false; + + /* Cached mappings are explicitly requested, so no write-combine. */ + if (bo->wb_mmap) + return false; + + /* The default is write-combine. */ + return true; +} + struct panfrost_gem_object * panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) { struct drm_gem_shmem_object *shmem; struct panfrost_gem_object *bo; + /* The heap buffer is not supposed to be CPU-visible, so don't allow + * WB_MMAP on those. + */ + if ((flags & PANFROST_BO_HEAP) && (flags & PANFROST_BO_WB_MMAP)) + return ERR_PTR(-EINVAL); + /* Round up heap allocations to 2MB to keep fault handling simple */ if (flags & PANFROST_BO_HEAP) size = roundup(size, SZ_2M); @@ -461,6 +491,8 @@ panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) bo = to_panfrost_bo(&shmem->base); bo->noexec = !!(flags & PANFROST_BO_NOEXEC); bo->is_heap = !!(flags & PANFROST_BO_HEAP); + bo->wb_mmap = !!(flags & PANFROST_BO_WB_MMAP); + bo->base.map_wc = should_map_wc(bo); return bo; } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index 45e2aa846cc7..79d4377019e9 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -98,6 +98,11 @@ struct panfrost_gem_object { bool noexec :1; bool is_heap :1; + /* On coherent devices, this reflects the creation flags, not the true + * cacheability attribute of the mapping. + */ + bool wb_mmap :1; + #ifdef CONFIG_DEBUG_FS struct panfrost_gem_debugfs debugfs; #endif diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 36ae48ea50d3..50d5337f35ef 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -124,9 +124,12 @@ struct drm_panfrost_wait_bo { __s64 timeout_ns; }; -/* Valid flags to pass to drm_panfrost_create_bo */ +/* Valid flags to pass to drm_panfrost_create_bo. + * PANFROST_BO_WB_MMAP can't be set if PANFROST_BO_HEAP is. + */ #define PANFROST_BO_NOEXEC 1 #define PANFROST_BO_HEAP 2 +#define PANFROST_BO_WB_MMAP 4 /** * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs. From 0823bd894278e4c0f1acb8f3a8a3c67745e6d1f6 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 11:08:40 +0100 Subject: [PATCH 75/94] drm/panfrost: Bump the driver version to 1.6 Bump the driver version to reflect the new cached-CPU mapping capability. v2: - Quickly describe what the new version exposes in the commit message v3: - Add Steve's R-b v4: - No changes v5: - No changes v6: - No changes v7: - No changes v8: - No changes Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-14-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 34969179544c..b95120682a72 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -928,6 +928,9 @@ static void panfrost_debugfs_init(struct drm_minor *minor) * - 1.4 - adds SET_LABEL_BO * - 1.5 - adds JM_CTX_{CREATE,DESTROY} ioctls and extend SUBMIT to allow * context creation with configurable priorities/affinity + * - 1.6 - adds PANFROST_BO_MAP_WB, PANFROST_IOCTL_SYNC_BO, + * PANFROST_IOCTL_QUERY_BO_INFO and + * DRM_PANFROST_PARAM_SELECTED_COHERENCY */ static const struct drm_driver panfrost_drm_driver = { .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ, @@ -940,7 +943,7 @@ static const struct drm_driver panfrost_drm_driver = { .name = "panfrost", .desc = "panfrost DRM", .major = 1, - .minor = 5, + .minor = 6, .gem_create_object = panfrost_gem_create_object, .gem_prime_import = panfrost_gem_prime_import, From 00ffe45ece80160aef446d74ded906352f21dd72 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Mon, 8 Dec 2025 08:53:56 -0800 Subject: [PATCH 76/94] accel/amdxdna: Fix race condition when checking rpm_on When autosuspend is triggered, driver rpm_on flag is set to indicate that a suspend/resume is already in progress. However, when a userspace application submits a command during this narrow window, amdxdna_pm_resume_get() may incorrectly skip the resume operation because the rpm_on flag is still set. This results in commands being submitted while the device has not actually resumed, causing unexpected behavior. The set_dpm() is called by suspend/resume, it relied on rpm_on flag to avoid calling into rpm suspend/resume recursivly. So to fix this, remove the use of the rpm_on flag entirely. Instead, introduce aie2_pm_set_dpm() which explicitly resumes the device before invoking set_dpm(). With this change, set_dpm() is called directly inside the suspend or resume execution path. Otherwise, aie2_pm_set_dpm() is called. Fixes: 063db451832b ("accel/amdxdna: Enhance runtime power management") Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Maciej Falkowski Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20251208165356.1549237-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_message.c | 1 - drivers/accel/amdxdna/aie2_pci.c | 2 +- drivers/accel/amdxdna/aie2_pci.h | 1 + drivers/accel/amdxdna/aie2_pm.c | 17 +++++++++++++++- drivers/accel/amdxdna/aie2_smu.c | 27 ++++--------------------- drivers/accel/amdxdna/amdxdna_pci_drv.h | 1 - drivers/accel/amdxdna/amdxdna_pm.c | 22 ++------------------ 7 files changed, 24 insertions(+), 47 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index fee3b0627aba..a75156800467 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -39,7 +39,6 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev, if (!ndev->mgmt_chann) return -ENODEV; - drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna->dev_lock)); ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg); if (ret == -ETIME) { xdna_mailbox_stop_channel(ndev->mgmt_chann); diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index ceef1c502e9e..81a8e4137bfd 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -321,7 +321,7 @@ static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level) if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level) return 0; - return ndev->priv->hw_ops.set_dpm(ndev, dpm_level); + return aie2_pm_set_dpm(ndev, dpm_level); } static struct xrs_action_ops aie2_xrs_actions = { diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index cc9f933f80b2..c6b5cf4ae5c4 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -286,6 +286,7 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); /* aie2_pm.c */ int aie2_pm_init(struct amdxdna_dev_hdl *ndev); int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target); +int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); /* aie2_psp.c */ struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf); diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c index 426c38fce848..afcd6d4683e5 100644 --- a/drivers/accel/amdxdna/aie2_pm.c +++ b/drivers/accel/amdxdna/aie2_pm.c @@ -10,6 +10,7 @@ #include "aie2_pci.h" #include "amdxdna_pci_drv.h" +#include "amdxdna_pm.h" #define AIE2_CLK_GATING_ENABLE 1 #define AIE2_CLK_GATING_DISABLE 0 @@ -26,6 +27,20 @@ static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val) return 0; } +int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) +{ + int ret; + + ret = amdxdna_pm_resume_get(ndev->xdna); + if (ret) + return ret; + + ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level); + amdxdna_pm_suspend_put(ndev->xdna); + + return ret; +} + int aie2_pm_init(struct amdxdna_dev_hdl *ndev) { int ret; @@ -94,7 +109,7 @@ int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type return -EOPNOTSUPP; } - ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level); + ret = aie2_pm_set_dpm(ndev, dpm_level); if (ret) return ret; diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c index bd94ee96c2bc..2d195e41f83d 100644 --- a/drivers/accel/amdxdna/aie2_smu.c +++ b/drivers/accel/amdxdna/aie2_smu.c @@ -11,7 +11,6 @@ #include "aie2_pci.h" #include "amdxdna_pci_drv.h" -#include "amdxdna_pm.h" #define SMU_RESULT_OK 1 @@ -67,16 +66,12 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) u32 freq; int ret; - ret = amdxdna_pm_resume_get(ndev->xdna); - if (ret) - return ret; - ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq); if (ret) { XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n", ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret); - goto suspend_put; + return ret; } ndev->npuclk_freq = freq; @@ -85,10 +80,9 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) if (ret) { XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n", ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret); - goto suspend_put; + return ret; } - amdxdna_pm_suspend_put(ndev->xdna); ndev->hclk_freq = freq; ndev->dpm_level = dpm_level; ndev->max_tops = 2 * ndev->total_col; @@ -98,35 +92,26 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) ndev->npuclk_freq, ndev->hclk_freq); return 0; - -suspend_put: - amdxdna_pm_suspend_put(ndev->xdna); - return ret; } int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) { int ret; - ret = amdxdna_pm_resume_get(ndev->xdna); - if (ret) - return ret; - ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL); if (ret) { XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ", dpm_level, ret); - goto suspend_put; + return ret; } ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL); if (ret) { XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d", dpm_level, ret); - goto suspend_put; + return ret; } - amdxdna_pm_suspend_put(ndev->xdna); ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk; ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk; ndev->dpm_level = dpm_level; @@ -137,10 +122,6 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) ndev->npuclk_freq, ndev->hclk_freq); return 0; - -suspend_put: - amdxdna_pm_suspend_put(ndev->xdna); - return ret; } int aie2_smu_init(struct amdxdna_dev_hdl *ndev) diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h index c99477f5e454..0d50c4c8b353 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -101,7 +101,6 @@ struct amdxdna_dev { struct amdxdna_fw_ver fw_ver; struct rw_semaphore notifier_lock; /* for mmu notifier*/ struct workqueue_struct *notifier_wq; - bool rpm_on; }; /* diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c index fa38e65d617c..d024d480521c 100644 --- a/drivers/accel/amdxdna/amdxdna_pm.c +++ b/drivers/accel/amdxdna/amdxdna_pm.c @@ -15,14 +15,9 @@ int amdxdna_pm_suspend(struct device *dev) { struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); int ret = -EOPNOTSUPP; - bool rpm; - if (xdna->dev_info->ops->suspend) { - rpm = xdna->rpm_on; - xdna->rpm_on = false; + if (xdna->dev_info->ops->suspend) ret = xdna->dev_info->ops->suspend(xdna); - xdna->rpm_on = rpm; - } XDNA_DBG(xdna, "Suspend done ret %d", ret); return ret; @@ -32,14 +27,9 @@ int amdxdna_pm_resume(struct device *dev) { struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev)); int ret = -EOPNOTSUPP; - bool rpm; - if (xdna->dev_info->ops->resume) { - rpm = xdna->rpm_on; - xdna->rpm_on = false; + if (xdna->dev_info->ops->resume) ret = xdna->dev_info->ops->resume(xdna); - xdna->rpm_on = rpm; - } XDNA_DBG(xdna, "Resume done ret %d", ret); return ret; @@ -50,9 +40,6 @@ int amdxdna_pm_resume_get(struct amdxdna_dev *xdna) struct device *dev = xdna->ddev.dev; int ret; - if (!xdna->rpm_on) - return 0; - ret = pm_runtime_resume_and_get(dev); if (ret) { XDNA_ERR(xdna, "Resume failed: %d", ret); @@ -66,9 +53,6 @@ void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna) { struct device *dev = xdna->ddev.dev; - if (!xdna->rpm_on) - return; - pm_runtime_put_autosuspend(dev); } @@ -81,14 +65,12 @@ void amdxdna_pm_init(struct amdxdna_dev *xdna) pm_runtime_use_autosuspend(dev); pm_runtime_allow(dev); pm_runtime_put_autosuspend(dev); - xdna->rpm_on = true; } void amdxdna_pm_fini(struct amdxdna_dev *xdna) { struct device *dev = xdna->ddev.dev; - xdna->rpm_on = false; pm_runtime_get_noresume(dev); pm_runtime_forbid(dev); } From 6d2b55f7d7011ebc11c933bc680ba1b050ce1e88 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 4 Nov 2025 12:25:35 +0200 Subject: [PATCH 77/94] drm/hyperv: move Kconfig under driver directory Almost all DRM driver Kconfig options are in dedicated Kconfig files under driver directories. Follow suit in hyperv. Cc: Deepak Rawat Reviewed-by: Deepak Rawat Reviewed-by: Maarten Lankhorst Link: https://patch.msgid.link/4923196ab968bfdbcc2d7572d9be9886c32c06c9.1762251845.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/Kconfig | 14 +------------- drivers/gpu/drm/hyperv/Kconfig | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 13 deletions(-) create mode 100644 drivers/gpu/drm/hyperv/Kconfig diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 7e6bc0b3a589..15c2f941a4cd 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -398,19 +398,7 @@ source "drivers/gpu/drm/imagination/Kconfig" source "drivers/gpu/drm/tyr/Kconfig" -config DRM_HYPERV - tristate "DRM Support for Hyper-V synthetic video device" - depends on DRM && PCI && HYPERV_VMBUS - select DRM_CLIENT_SELECTION - select DRM_KMS_HELPER - select DRM_GEM_SHMEM_HELPER - help - This is a KMS driver for Hyper-V synthetic video device. Choose this - option if you would like to enable drm driver for Hyper-V virtual - machine. Unselect Hyper-V framebuffer driver (CONFIG_FB_HYPERV) so - that DRM driver is used by default. - - If M is selected the module will be called hyperv_drm. +source "drivers/gpu/drm/hyperv/Kconfig" # Separate option as not all DRM drivers use it config DRM_PANEL_BACKLIGHT_QUIRKS diff --git a/drivers/gpu/drm/hyperv/Kconfig b/drivers/gpu/drm/hyperv/Kconfig new file mode 100644 index 000000000000..86234f6a73f2 --- /dev/null +++ b/drivers/gpu/drm/hyperv/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0-only +config DRM_HYPERV + tristate "DRM Support for Hyper-V synthetic video device" + depends on DRM && PCI && HYPERV_VMBUS + select DRM_CLIENT_SELECTION + select DRM_KMS_HELPER + select DRM_GEM_SHMEM_HELPER + help + This is a KMS driver for Hyper-V synthetic video device. Choose this + option if you would like to enable drm driver for Hyper-V virtual + machine. Unselect Hyper-V framebuffer driver (CONFIG_FB_HYPERV) so + that DRM driver is used by default. + + If M is selected the module will be called hyperv_drm. From f88cb2660bd09fd76b54e6bd2e62f3d7501147b6 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 4 Nov 2025 12:25:36 +0200 Subject: [PATCH 78/94] drm/vgem: move Kconfig under driver directory Almost all DRM driver Kconfig options are in dedicated Kconfig files under driver directories. Follow suit in vgem. Reviewed-by: Maarten Lankhorst Link: https://patch.msgid.link/51935bfb299e8c64beae5a654d908231e2ec9c7f.1762251845.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/Kconfig | 9 +-------- drivers/gpu/drm/vgem/Kconfig | 9 +++++++++ 2 files changed, 10 insertions(+), 8 deletions(-) create mode 100644 drivers/gpu/drm/vgem/Kconfig diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 15c2f941a4cd..8ec9c06b45b7 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -283,14 +283,7 @@ source "drivers/gpu/drm/xe/Kconfig" source "drivers/gpu/drm/kmb/Kconfig" -config DRM_VGEM - tristate "Virtual GEM provider" - depends on DRM && MMU - select DRM_GEM_SHMEM_HELPER - help - Choose this option to get a virtual graphics memory manager, - as used by Mesa's software renderer for enhanced performance. - If M is selected the module will be called vgem. +source "drivers/gpu/drm/vgem/Kconfig" source "drivers/gpu/drm/vkms/Kconfig" diff --git a/drivers/gpu/drm/vgem/Kconfig b/drivers/gpu/drm/vgem/Kconfig new file mode 100644 index 000000000000..c419cdadd54c --- /dev/null +++ b/drivers/gpu/drm/vgem/Kconfig @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +config DRM_VGEM + tristate "Virtual GEM provider" + depends on DRM && MMU + select DRM_GEM_SHMEM_HELPER + help + Choose this option to get a virtual graphics memory manager, + as used by Mesa's software renderer for enhanced performance. + If M is selected the module will be called vgem. From 4cabf00fcd09fd1dfbb3f345abe3ed2330157f22 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 4 Nov 2025 12:25:37 +0200 Subject: [PATCH 79/94] drm/Kconfig: move generic Kconfig options above drivers Keep non-driver options together, above drivers. DRM_PANEL_ORIENTATION_QUIRKS remains alone at the end because it's outside of the whole "if DRM" block. Reviewed-by: Maarten Lankhorst Link: https://patch.msgid.link/a0f9e1a31a2190f535f2c2f94af6e22030db199f.1762251845.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/Kconfig | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 8ec9c06b45b7..5a9b5f7a40e2 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -265,6 +265,18 @@ config DRM_SCHED tristate depends on DRM +# Separate option as not all DRM drivers use it +config DRM_PANEL_BACKLIGHT_QUIRKS + tristate + +config DRM_LIB_RANDOM + bool + default n + +config DRM_PRIVACY_SCREEN + bool + default n + source "drivers/gpu/drm/sysfb/Kconfig" source "drivers/gpu/drm/arm/Kconfig" @@ -393,18 +405,6 @@ source "drivers/gpu/drm/tyr/Kconfig" source "drivers/gpu/drm/hyperv/Kconfig" -# Separate option as not all DRM drivers use it -config DRM_PANEL_BACKLIGHT_QUIRKS - tristate - -config DRM_LIB_RANDOM - bool - default n - -config DRM_PRIVACY_SCREEN - bool - default n - endif # Separate option because drm_panel_orientation_quirks.c is shared with fbdev From 22ba3bb3ff2a8e5a509bfa13f3e362d0d36083fd Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 4 Nov 2025 12:25:38 +0200 Subject: [PATCH 80/94] drm/Kconfig: sort driver Kconfig source list Sort the driver Kconfig source list, and remove the superfluous blank lines in between. Reviewed-by: Louis Chauvet Reviewed-by: Maarten Lankhorst Link: https://patch.msgid.link/4fa11ab0b938d5c726b6ad78d28c7527b830f696.1762251845.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/Kconfig | 186 ++++++++++++++-------------------------- 1 file changed, 62 insertions(+), 124 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 5a9b5f7a40e2..a33b90251530 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -277,133 +277,71 @@ config DRM_PRIVACY_SCREEN bool default n -source "drivers/gpu/drm/sysfb/Kconfig" - -source "drivers/gpu/drm/arm/Kconfig" - -source "drivers/gpu/drm/radeon/Kconfig" - -source "drivers/gpu/drm/amd/amdgpu/Kconfig" - -source "drivers/gpu/drm/nouveau/Kconfig" - -source "drivers/gpu/drm/nova/Kconfig" - -source "drivers/gpu/drm/i915/Kconfig" - -source "drivers/gpu/drm/xe/Kconfig" - -source "drivers/gpu/drm/kmb/Kconfig" - -source "drivers/gpu/drm/vgem/Kconfig" - -source "drivers/gpu/drm/vkms/Kconfig" - -source "drivers/gpu/drm/exynos/Kconfig" - -source "drivers/gpu/drm/rockchip/Kconfig" - -source "drivers/gpu/drm/vmwgfx/Kconfig" - -source "drivers/gpu/drm/gma500/Kconfig" - -source "drivers/gpu/drm/udl/Kconfig" - -source "drivers/gpu/drm/ast/Kconfig" - -source "drivers/gpu/drm/mgag200/Kconfig" - -source "drivers/gpu/drm/armada/Kconfig" - -source "drivers/gpu/drm/atmel-hlcdc/Kconfig" - -source "drivers/gpu/drm/renesas/Kconfig" - -source "drivers/gpu/drm/sun4i/Kconfig" - -source "drivers/gpu/drm/omapdrm/Kconfig" - -source "drivers/gpu/drm/tilcdc/Kconfig" - -source "drivers/gpu/drm/qxl/Kconfig" - -source "drivers/gpu/drm/virtio/Kconfig" - -source "drivers/gpu/drm/msm/Kconfig" - -source "drivers/gpu/drm/fsl-dcu/Kconfig" - -source "drivers/gpu/drm/tegra/Kconfig" - -source "drivers/gpu/drm/stm/Kconfig" - -source "drivers/gpu/drm/panel/Kconfig" - -source "drivers/gpu/drm/bridge/Kconfig" - -source "drivers/gpu/drm/sti/Kconfig" - -source "drivers/gpu/drm/imx/Kconfig" - -source "drivers/gpu/drm/ingenic/Kconfig" - -source "drivers/gpu/drm/v3d/Kconfig" - -source "drivers/gpu/drm/vc4/Kconfig" - -source "drivers/gpu/drm/loongson/Kconfig" - -source "drivers/gpu/drm/etnaviv/Kconfig" - -source "drivers/gpu/drm/hisilicon/Kconfig" - -source "drivers/gpu/drm/logicvc/Kconfig" - -source "drivers/gpu/drm/mediatek/Kconfig" - -source "drivers/gpu/drm/mxsfb/Kconfig" - -source "drivers/gpu/drm/meson/Kconfig" - -source "drivers/gpu/drm/tiny/Kconfig" - -source "drivers/gpu/drm/pl111/Kconfig" - -source "drivers/gpu/drm/tve200/Kconfig" - -source "drivers/gpu/drm/xen/Kconfig" - -source "drivers/gpu/drm/vboxvideo/Kconfig" - -source "drivers/gpu/drm/lima/Kconfig" - -source "drivers/gpu/drm/panfrost/Kconfig" - -source "drivers/gpu/drm/panthor/Kconfig" - -source "drivers/gpu/drm/aspeed/Kconfig" - -source "drivers/gpu/drm/mcde/Kconfig" - -source "drivers/gpu/drm/tidss/Kconfig" - +# DRM driver Kconfig files, sorted source "drivers/gpu/drm/adp/Kconfig" - -source "drivers/gpu/drm/xlnx/Kconfig" - +source "drivers/gpu/drm/amd/amdgpu/Kconfig" +source "drivers/gpu/drm/arm/Kconfig" +source "drivers/gpu/drm/armada/Kconfig" +source "drivers/gpu/drm/aspeed/Kconfig" +source "drivers/gpu/drm/ast/Kconfig" +source "drivers/gpu/drm/atmel-hlcdc/Kconfig" +source "drivers/gpu/drm/bridge/Kconfig" +source "drivers/gpu/drm/etnaviv/Kconfig" +source "drivers/gpu/drm/exynos/Kconfig" +source "drivers/gpu/drm/fsl-dcu/Kconfig" +source "drivers/gpu/drm/gma500/Kconfig" source "drivers/gpu/drm/gud/Kconfig" - -source "drivers/gpu/drm/sitronix/Kconfig" - -source "drivers/gpu/drm/solomon/Kconfig" - -source "drivers/gpu/drm/sprd/Kconfig" - -source "drivers/gpu/drm/imagination/Kconfig" - -source "drivers/gpu/drm/tyr/Kconfig" - +source "drivers/gpu/drm/hisilicon/Kconfig" source "drivers/gpu/drm/hyperv/Kconfig" +source "drivers/gpu/drm/i915/Kconfig" +source "drivers/gpu/drm/imagination/Kconfig" +source "drivers/gpu/drm/imx/Kconfig" +source "drivers/gpu/drm/ingenic/Kconfig" +source "drivers/gpu/drm/kmb/Kconfig" +source "drivers/gpu/drm/lima/Kconfig" +source "drivers/gpu/drm/logicvc/Kconfig" +source "drivers/gpu/drm/loongson/Kconfig" +source "drivers/gpu/drm/mcde/Kconfig" +source "drivers/gpu/drm/mediatek/Kconfig" +source "drivers/gpu/drm/meson/Kconfig" +source "drivers/gpu/drm/mgag200/Kconfig" +source "drivers/gpu/drm/msm/Kconfig" +source "drivers/gpu/drm/mxsfb/Kconfig" +source "drivers/gpu/drm/nouveau/Kconfig" +source "drivers/gpu/drm/nova/Kconfig" +source "drivers/gpu/drm/omapdrm/Kconfig" +source "drivers/gpu/drm/panel/Kconfig" +source "drivers/gpu/drm/panfrost/Kconfig" +source "drivers/gpu/drm/panthor/Kconfig" +source "drivers/gpu/drm/pl111/Kconfig" +source "drivers/gpu/drm/qxl/Kconfig" +source "drivers/gpu/drm/radeon/Kconfig" +source "drivers/gpu/drm/renesas/Kconfig" +source "drivers/gpu/drm/rockchip/Kconfig" +source "drivers/gpu/drm/sitronix/Kconfig" +source "drivers/gpu/drm/solomon/Kconfig" +source "drivers/gpu/drm/sprd/Kconfig" +source "drivers/gpu/drm/sti/Kconfig" +source "drivers/gpu/drm/stm/Kconfig" +source "drivers/gpu/drm/sun4i/Kconfig" +source "drivers/gpu/drm/sysfb/Kconfig" +source "drivers/gpu/drm/tegra/Kconfig" +source "drivers/gpu/drm/tidss/Kconfig" +source "drivers/gpu/drm/tilcdc/Kconfig" +source "drivers/gpu/drm/tiny/Kconfig" +source "drivers/gpu/drm/tve200/Kconfig" +source "drivers/gpu/drm/tyr/Kconfig" +source "drivers/gpu/drm/udl/Kconfig" +source "drivers/gpu/drm/v3d/Kconfig" +source "drivers/gpu/drm/vboxvideo/Kconfig" +source "drivers/gpu/drm/vc4/Kconfig" +source "drivers/gpu/drm/vgem/Kconfig" +source "drivers/gpu/drm/virtio/Kconfig" +source "drivers/gpu/drm/vkms/Kconfig" +source "drivers/gpu/drm/vmwgfx/Kconfig" +source "drivers/gpu/drm/xe/Kconfig" +source "drivers/gpu/drm/xen/Kconfig" +source "drivers/gpu/drm/xlnx/Kconfig" endif From ad9f266be8b2db26c7cc754d401278959bb7895c Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 9 Dec 2025 18:11:51 +0100 Subject: [PATCH 81/94] drm/gem: Fix builds with CONFIG_MMU=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_gem_get_unmapped_area() relies on mm_get_unmapped_area() which is only available if CONFIG_MMU=y. Fixes: 99bda20d6d4c ("drm/gem: Introduce drm_gem_get_unmapped_area() fop") Cc: Loïc Molinari Reviewed-by: Loïc Molinari Link: https://patch.msgid.link/20251209171151.2449120-1-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/drm_gem.c | 2 ++ include/drm/drm_gem.h | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 6021c4087a08..ca1956608261 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1267,6 +1267,7 @@ drm_gem_object_lookup_at_offset(struct file *filp, unsigned long start, return obj; } +#ifdef CONFIG_MMU /** * drm_gem_get_unmapped_area - get memory mapping region routine for GEM objects * @filp: DRM file pointer @@ -1309,6 +1310,7 @@ unsigned long drm_gem_get_unmapped_area(struct file *filp, unsigned long uaddr, return ret; } EXPORT_SYMBOL_GPL(drm_gem_get_unmapped_area); +#endif /** * drm_gem_mmap - memory map routine for GEM objects diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index cca815dc87f3..f4da8ed0d630 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -537,9 +537,14 @@ void drm_gem_vm_close(struct vm_area_struct *vma); int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, struct vm_area_struct *vma); int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); + +#ifdef CONFIG_MMU unsigned long drm_gem_get_unmapped_area(struct file *filp, unsigned long uaddr, unsigned long len, unsigned long pgoff, unsigned long flags); +#else +#define drm_gem_get_unmapped_area NULL +#endif /** * drm_gem_object_get - acquire a GEM buffer object reference From ff9e240212f6693c293f9e58ade05bc887297a1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Molinari?= Date: Wed, 10 Dec 2025 15:36:17 +0100 Subject: [PATCH 82/94] drm/i915: Fix BO alloc flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I915_BO_ALLOC_NOTHP must be added to the I915_BO_ALLOC_FLAGS mask in order to pass GEM_BUG_ON() valid flags checks. v2: - Add Tvrtko's A-b Reported-by: Chaitanya Kumar Borah Closes: https://lore.kernel.org/intel-gfx/d73adfa8-d61b-46b3-9385-dde53d8db8ad@intel.com/ Fixes: a8a9a590221c ("drm/i915: Use huge tmpfs mountpoint helpers") Suggested-by: Tvrtko Ursulin Signed-off-by: Loïc Molinari Acked-by: Tvrtko Ursulin Acked-by: Tvrtko Ursulin Link: https://patch.msgid.link/20251210143617.712808-1-loic.molinari@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index f94409e8ec4c..35d4c7d0c579 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -363,6 +363,7 @@ struct drm_i915_gem_object { I915_BO_ALLOC_PM_EARLY | \ I915_BO_ALLOC_GPU_ONLY | \ I915_BO_ALLOC_CCS_AUX | \ + I915_BO_ALLOC_NOTHP | \ I915_BO_PREALLOC) #define I915_BO_READONLY BIT(10) #define I915_TILING_QUIRK_BIT 11 /* unknown swizzling; do not release! */ From 3d32eb7a5ecff92d83a5fd34c45c171c17d3d5d0 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Tue, 9 Dec 2025 13:16:39 -0800 Subject: [PATCH 83/94] accel/amdxdna: Fix cu_idx being cleared by memset() during command setup For one command type, cu_idx is assigned before calling memset() on the command structure. This results in cu_idx being overwritten, causing the firmware to receive an incomplete or invalid command and leading to unexpected command failures. Fix this by moving the memset() call before initializing cu_idx so that all fields are populated in the correct order. Fixes: 71829d7f2f70 ("accel/amdxdna: Use MSG_OP_CHAIN_EXEC_NPU when supported") Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20251209211639.1636888-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_message.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index a75156800467..03b75757a6e6 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -652,6 +652,7 @@ aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *siz u32 cmd_len; void *cmd; + memset(npu_slot, 0, sizeof(*npu_slot)); cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); if (*size < sizeof(*npu_slot) + cmd_len) return -EINVAL; @@ -660,7 +661,6 @@ aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *siz if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; - memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->type = EXEC_NPU_TYPE_NON_ELF; npu_slot->arg_cnt = cmd_len / sizeof(u32); memcpy(npu_slot->args, cmd, cmd_len); @@ -677,6 +677,7 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si u32 cmd_len; u32 arg_sz; + memset(npu_slot, 0, sizeof(*npu_slot)); sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*sn); if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -689,7 +690,6 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; - memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->type = EXEC_NPU_TYPE_PARTIAL_ELF; npu_slot->inst_buf_addr = sn->buffer; npu_slot->inst_size = sn->buffer_size; @@ -709,6 +709,7 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t u32 cmd_len; u32 arg_sz; + memset(npu_slot, 0, sizeof(*npu_slot)); pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*pd); if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -721,7 +722,6 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t if (npu_slot->cu_idx == INVALID_CU_IDX) return -EINVAL; - memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->type = EXEC_NPU_TYPE_PREEMPT; npu_slot->inst_buf_addr = pd->inst_buf; npu_slot->save_buf_addr = pd->save_buf; @@ -745,6 +745,7 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si u32 cmd_len; u32 arg_sz; + memset(npu_slot, 0, sizeof(*npu_slot)); pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len); arg_sz = cmd_len - sizeof(*pd); if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE) @@ -753,7 +754,6 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si if (*size < sizeof(*npu_slot) + arg_sz) return -EINVAL; - memset(npu_slot, 0, sizeof(*npu_slot)); npu_slot->type = EXEC_NPU_TYPE_ELF; npu_slot->inst_buf_addr = pd->inst_buf; npu_slot->save_buf_addr = pd->save_buf; From a1542b8ca6edabbb42ec4141e55d0d0710c9b6df Mon Sep 17 00:00:00 2001 From: Eslam Khafagy Date: Tue, 25 Nov 2025 21:38:45 +0200 Subject: [PATCH 84/94] drm: pl111: replace dev_* print functions with drm_* variants Update the PL111 CLCD driver to use DRM print macros drm_*() instead of dev_*(). This change ensures consistency with DRM subsystem logging conventions [1]. [1] Link: https://docs.kernel.org/gpu/todo.html#convert-logging-to-drm-functions-with-drm-device-parameter Signed-off-by: Eslam Khafagy Signed-off-by: Linus Walleij Link: https://patch.msgid.link/20251125193845.425416-1-eslam.medhat1993@gmail.com --- drivers/gpu/drm/pl111/pl111_display.c | 4 +- drivers/gpu/drm/pl111/pl111_drv.c | 29 +++++++------- drivers/gpu/drm/pl111/pl111_nomadik.c | 4 +- drivers/gpu/drm/pl111/pl111_nomadik.h | 4 +- drivers/gpu/drm/pl111/pl111_versatile.c | 53 +++++++++++++------------ drivers/gpu/drm/pl111/pl111_versatile.h | 2 +- 6 files changed, 49 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c index 3a9661b9b1fc..5d10bc5fdf1f 100644 --- a/drivers/gpu/drm/pl111/pl111_display.c +++ b/drivers/gpu/drm/pl111/pl111_display.c @@ -138,7 +138,7 @@ static void pl111_display_enable(struct drm_simple_display_pipe *pipe, ret = clk_set_rate(priv->clk, mode->clock * 1000); if (ret) { - dev_err(drm->dev, + drm_err(drm, "Failed to set pixel clock rate to %d: %d\n", mode->clock * 1000, ret); } @@ -553,7 +553,7 @@ pl111_init_clock_divider(struct drm_device *drm) int ret; if (IS_ERR(parent)) { - dev_err(drm->dev, "CLCD: unable to get clcdclk.\n"); + drm_err(drm, "CLCD: unable to get clcdclk.\n"); return PTR_ERR(parent); } diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c index 56ff6a3fb483..ac9e4b6bd2eb 100644 --- a/drivers/gpu/drm/pl111/pl111_drv.c +++ b/drivers/gpu/drm/pl111/pl111_drv.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -98,7 +99,7 @@ static int pl111_modeset_init(struct drm_device *dev) struct drm_panel *tmp_panel; struct drm_bridge *tmp_bridge; - dev_dbg(dev->dev, "checking endpoint %d\n", i); + drm_dbg(dev, "checking endpoint %d\n", i); ret = drm_of_find_panel_or_bridge(dev->dev->of_node, 0, i, @@ -114,18 +115,18 @@ static int pl111_modeset_init(struct drm_device *dev) defer = true; } else if (ret != -ENODEV) { /* Continue, maybe something else is working */ - dev_err(dev->dev, + drm_err(dev, "endpoint %d returns %d\n", i, ret); } } if (tmp_panel) { - dev_info(dev->dev, + drm_info(dev, "found panel on endpoint %d\n", i); panel = tmp_panel; } if (tmp_bridge) { - dev_info(dev->dev, + drm_info(dev, "found bridge on endpoint %d\n", i); bridge = tmp_bridge; } @@ -149,9 +150,9 @@ static int pl111_modeset_init(struct drm_device *dev) goto finish; } } else if (bridge) { - dev_info(dev->dev, "Using non-panel bridge\n"); + drm_info(dev, "Using non-panel bridge\n"); } else { - dev_err(dev->dev, "No bridge, exiting\n"); + drm_err(dev, "No bridge, exiting\n"); return -ENODEV; } @@ -163,7 +164,7 @@ static int pl111_modeset_init(struct drm_device *dev) ret = pl111_display_init(dev); if (ret != 0) { - dev_err(dev->dev, "Failed to init display\n"); + drm_err(dev, "Failed to init display\n"); goto out_bridge; } @@ -175,7 +176,7 @@ static int pl111_modeset_init(struct drm_device *dev) if (!priv->variant->broken_vblank) { ret = drm_vblank_init(dev, 1); if (ret != 0) { - dev_err(dev->dev, "Failed to init vblank\n"); + drm_err(dev, "Failed to init vblank\n"); goto out_bridge; } } @@ -255,13 +256,13 @@ static int pl111_amba_probe(struct amba_device *amba_dev, ret = of_reserved_mem_device_init(dev); if (!ret) { - dev_info(dev, "using device-specific reserved memory\n"); + drm_info(drm, "using device-specific reserved memory\n"); priv->use_device_memory = true; } if (of_property_read_u32(dev->of_node, "max-memory-bandwidth", &priv->memory_bw)) { - dev_info(dev, "no max memory bandwidth specified, assume unlimited\n"); + drm_info(drm, "no max memory bandwidth specified, assume unlimited\n"); priv->memory_bw = 0; } @@ -276,17 +277,17 @@ static int pl111_amba_probe(struct amba_device *amba_dev, priv->regs = devm_ioremap_resource(dev, &amba_dev->res); if (IS_ERR(priv->regs)) { - dev_err(dev, "%s failed mmio\n", __func__); + drm_err(drm, "%s failed mmio\n", __func__); ret = PTR_ERR(priv->regs); goto dev_put; } /* This may override some variant settings */ - ret = pl111_versatile_init(dev, priv); + ret = pl111_versatile_init(drm, priv); if (ret) goto dev_put; - pl111_nomadik_init(dev); + pl111_nomadik_init(drm); /* turn off interrupts before requesting the irq */ writel(0, priv->regs + priv->ienb); @@ -294,7 +295,7 @@ static int pl111_amba_probe(struct amba_device *amba_dev, ret = devm_request_irq(dev, amba_dev->irq[0], pl111_irq, 0, variant->name, priv); if (ret != 0) { - dev_err(dev, "%s failed irq %d\n", __func__, ret); + drm_err(drm, "%s failed irq %d\n", __func__, ret); return ret; } diff --git a/drivers/gpu/drm/pl111/pl111_nomadik.c b/drivers/gpu/drm/pl111/pl111_nomadik.c index 6f385e59be22..f3218d59c5f1 100644 --- a/drivers/gpu/drm/pl111/pl111_nomadik.c +++ b/drivers/gpu/drm/pl111/pl111_nomadik.c @@ -9,7 +9,7 @@ #define PMU_CTRL_OFFSET 0x0000 #define PMU_CTRL_LCDNDIF BIT(26) -void pl111_nomadik_init(struct device *dev) +void pl111_nomadik_init(struct drm_device *dev) { struct regmap *pmu_regmap; @@ -31,6 +31,6 @@ void pl111_nomadik_init(struct device *dev) PMU_CTRL_OFFSET, PMU_CTRL_LCDNDIF, 0); - dev_info(dev, "set Nomadik PMU mux to CLCD mode\n"); + drm_info(dev, "set Nomadik PMU mux to CLCD mode\n"); } EXPORT_SYMBOL_GPL(pl111_nomadik_init); diff --git a/drivers/gpu/drm/pl111/pl111_nomadik.h b/drivers/gpu/drm/pl111/pl111_nomadik.h index 47ccf5c839fc..b2c9f7cc1c8c 100644 --- a/drivers/gpu/drm/pl111/pl111_nomadik.h +++ b/drivers/gpu/drm/pl111/pl111_nomadik.h @@ -8,11 +8,11 @@ struct device; #ifdef CONFIG_ARCH_NOMADIK -void pl111_nomadik_init(struct device *dev); +void pl111_nomadik_init(struct drm_device *dev); #else -static inline void pl111_nomadik_init(struct device *dev) +static inline void pl111_nomadik_init(struct drm_device *dev) { } diff --git a/drivers/gpu/drm/pl111/pl111_versatile.c b/drivers/gpu/drm/pl111/pl111_versatile.c index 5f460b296c0c..0d8331a3909f 100644 --- a/drivers/gpu/drm/pl111/pl111_versatile.c +++ b/drivers/gpu/drm/pl111/pl111_versatile.c @@ -20,6 +20,7 @@ #include #include +#include #include "pl111_versatile.h" #include "pl111_drm.h" @@ -116,7 +117,7 @@ static void pl111_integrator_enable(struct drm_device *drm, u32 format) { u32 val; - dev_info(drm->dev, "enable Integrator CLCD connectors\n"); + drm_info(drm, "enable Integrator CLCD connectors\n"); /* FIXME: really needed? */ val = INTEGRATOR_CLCD_LCD_STATIC1 | INTEGRATOR_CLCD_LCD_STATIC2 | @@ -134,7 +135,7 @@ static void pl111_integrator_enable(struct drm_device *drm, u32 format) val |= INTEGRATOR_CLCD_LCDMUX_VGA555; break; default: - dev_err(drm->dev, "unhandled format on Integrator 0x%08x\n", + drm_err(drm, "unhandled format on Integrator 0x%08x\n", format); break; } @@ -156,7 +157,7 @@ static void pl111_impd1_enable(struct drm_device *drm, u32 format) { u32 val; - dev_info(drm->dev, "enable IM-PD1 CLCD connectors\n"); + drm_info(drm, "enable IM-PD1 CLCD connectors\n"); val = IMPD1_CTRL_DISP_VGA | IMPD1_CTRL_DISP_ENABLE; regmap_update_bits(versatile_syscon_map, @@ -167,7 +168,7 @@ static void pl111_impd1_enable(struct drm_device *drm, u32 format) static void pl111_impd1_disable(struct drm_device *drm) { - dev_info(drm->dev, "disable IM-PD1 CLCD connectors\n"); + drm_info(drm, "disable IM-PD1 CLCD connectors\n"); regmap_update_bits(versatile_syscon_map, IMPD1_CTRL_OFFSET, @@ -194,7 +195,7 @@ static void pl111_impd1_disable(struct drm_device *drm) static void pl111_versatile_disable(struct drm_device *drm) { - dev_info(drm->dev, "disable Versatile CLCD connectors\n"); + drm_info(drm, "disable Versatile CLCD connectors\n"); regmap_update_bits(versatile_syscon_map, SYS_CLCD, SYS_CLCD_CONNECTOR_MASK, @@ -205,7 +206,7 @@ static void pl111_versatile_enable(struct drm_device *drm, u32 format) { u32 val = 0; - dev_info(drm->dev, "enable Versatile CLCD connectors\n"); + drm_info(drm, "enable Versatile CLCD connectors\n"); switch (format) { case DRM_FORMAT_ABGR8888: @@ -227,7 +228,7 @@ static void pl111_versatile_enable(struct drm_device *drm, u32 format) val |= SYS_CLCD_MODE_5551; break; default: - dev_err(drm->dev, "unhandled format on Versatile 0x%08x\n", + drm_err(drm, "unhandled format on Versatile 0x%08x\n", format); break; } @@ -247,7 +248,7 @@ static void pl111_versatile_enable(struct drm_device *drm, u32 format) static void pl111_realview_clcd_disable(struct drm_device *drm) { - dev_info(drm->dev, "disable RealView CLCD connectors\n"); + drm_info(drm, "disable RealView CLCD connectors\n"); regmap_update_bits(versatile_syscon_map, SYS_CLCD, SYS_CLCD_CONNECTOR_MASK, @@ -256,7 +257,7 @@ static void pl111_realview_clcd_disable(struct drm_device *drm) static void pl111_realview_clcd_enable(struct drm_device *drm, u32 format) { - dev_info(drm->dev, "enable RealView CLCD connectors\n"); + drm_info(drm, "enable RealView CLCD connectors\n"); regmap_update_bits(versatile_syscon_map, SYS_CLCD, SYS_CLCD_CONNECTOR_MASK, @@ -376,7 +377,7 @@ static const struct pl111_variant_data pl111_vexpress = { #define VEXPRESS_FPGAMUX_DAUGHTERBOARD_1 0x01 #define VEXPRESS_FPGAMUX_DAUGHTERBOARD_2 0x02 -static int pl111_vexpress_clcd_init(struct device *dev, struct device_node *np, +static int pl111_vexpress_clcd_init(struct drm_device *dev, struct device_node *np, struct pl111_drm_dev_private *priv) { struct platform_device *pdev; @@ -433,22 +434,22 @@ static int pl111_vexpress_clcd_init(struct device *dev, struct device_node *np, mux_motherboard = false; if (mux_motherboard) { - dev_info(dev, "DVI muxed to motherboard CLCD\n"); + drm_info(dev, "DVI muxed to motherboard CLCD\n"); val = VEXPRESS_FPGAMUX_MOTHERBOARD; - } else if (ct_clcd == dev->of_node) { - dev_info(dev, + } else if (ct_clcd == dev->dev->of_node) { + drm_info(dev, "DVI muxed to daughterboard 1 (core tile) CLCD\n"); val = VEXPRESS_FPGAMUX_DAUGHTERBOARD_1; } else { - dev_info(dev, "core tile graphics present\n"); - dev_info(dev, "this device will be deactivated\n"); + drm_info(dev, "core tile graphics present\n"); + drm_info(dev, "this device will be deactivated\n"); return -ENODEV; } /* Call into deep Vexpress configuration API */ pdev = of_find_device_by_node(np); if (!pdev) { - dev_err(dev, "can't find the sysreg device, deferring\n"); + drm_err(dev, "can't find the sysreg device, deferring\n"); return -EPROBE_DEFER; } @@ -461,17 +462,17 @@ static int pl111_vexpress_clcd_init(struct device *dev, struct device_node *np, ret = regmap_write(map, 0, val); platform_device_put(pdev); if (ret) { - dev_err(dev, "error setting DVI muxmode\n"); + drm_err(dev, "error setting DVI muxmode\n"); return -ENODEV; } priv->variant = &pl111_vexpress; - dev_info(dev, "initializing Versatile Express PL111\n"); + drm_info(dev, "initializing Versatile Express PL111\n"); return 0; } -int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) +int pl111_versatile_init(struct drm_device *dev, struct pl111_drm_dev_private *priv) { const struct of_device_id *clcd_id; enum versatile_clcd versatile_clcd_type; @@ -492,7 +493,7 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) int ret = pl111_vexpress_clcd_init(dev, np, priv); of_node_put(np); if (ret) - dev_err(dev, "Versatile Express init failed - %d", ret); + drm_err(dev, "Versatile Express init failed - %d", ret); return ret; } @@ -511,7 +512,7 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) map = syscon_node_to_regmap(np); of_node_put(np); if (IS_ERR(map)) { - dev_err(dev, "no Versatile syscon regmap\n"); + drm_err(dev, "no Versatile syscon regmap\n"); return PTR_ERR(map); } @@ -520,14 +521,14 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) versatile_syscon_map = map; priv->variant = &pl110_integrator; priv->variant_display_enable = pl111_integrator_enable; - dev_info(dev, "set up callbacks for Integrator PL110\n"); + drm_info(dev, "set up callbacks for Integrator PL110\n"); break; case INTEGRATOR_IMPD1: versatile_syscon_map = map; priv->variant = &pl110_impd1; priv->variant_display_enable = pl111_impd1_enable; priv->variant_display_disable = pl111_impd1_disable; - dev_info(dev, "set up callbacks for IM-PD1 PL110\n"); + drm_info(dev, "set up callbacks for IM-PD1 PL110\n"); break; case VERSATILE_CLCD: versatile_syscon_map = map; @@ -542,7 +543,7 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) */ priv->ienb = CLCD_PL111_IENB; priv->ctrl = CLCD_PL111_CNTL; - dev_info(dev, "set up callbacks for Versatile PL110\n"); + drm_info(dev, "set up callbacks for Versatile PL110\n"); break; case REALVIEW_CLCD_EB: case REALVIEW_CLCD_PB1176: @@ -553,10 +554,10 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) priv->variant = &pl111_realview; priv->variant_display_enable = pl111_realview_clcd_enable; priv->variant_display_disable = pl111_realview_clcd_disable; - dev_info(dev, "set up callbacks for RealView PL111\n"); + drm_info(dev, "set up callbacks for RealView PL111\n"); break; default: - dev_info(dev, "unknown Versatile system controller\n"); + drm_info(dev, "unknown Versatile system controller\n"); break; } diff --git a/drivers/gpu/drm/pl111/pl111_versatile.h b/drivers/gpu/drm/pl111/pl111_versatile.h index 143877010042..7a15c5f7efe8 100644 --- a/drivers/gpu/drm/pl111/pl111_versatile.h +++ b/drivers/gpu/drm/pl111/pl111_versatile.h @@ -7,6 +7,6 @@ struct device; struct pl111_drm_dev_private; -int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv); +int pl111_versatile_init(struct drm_device *dev, struct pl111_drm_dev_private *priv); #endif From d36137085a4aa2d2f039359a0d67d9e07667f2de Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 12 Nov 2025 17:34:34 +0100 Subject: [PATCH 85/94] drm/bridge: add drm_bridge_unplug() and drm_bridge_enter/exit() To allow DRM bridges to be removable, add synchronization functions allowing to tell when a bridge hardware has been physically unplugged and to mark a critical section that should not be entered after that. This is inspired by the drm_dev_unplugged/enter/exit() functions for struct drm_device. Suggested-by: Maxime Ripard Link: https://lore.kernel.org/all/20250106-vigorous-talented-viper-fa49d9@houat/ Reviewed-by: Maxime Ripard Link: https://patch.msgid.link/20251112-drm-bridge-atomic-vs-remove-v3-1-85db717ce094@bootlin.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/drm_bridge.c | 62 ++++++++++++++++++++++++++++++++++++ include/drm/drm_bridge.h | 12 +++++++ 2 files changed, 74 insertions(+) diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index 8f355df883d8..db40c26d1cb3 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -202,6 +203,67 @@ static DEFINE_MUTEX(bridge_lock); static LIST_HEAD(bridge_list); static LIST_HEAD(bridge_lingering_list); +DEFINE_STATIC_SRCU(drm_bridge_unplug_srcu); + +/** + * drm_bridge_enter - Enter DRM bridge critical section + * @bridge: DRM bridge + * @idx: Pointer to index that will be passed to the matching drm_bridge_exit() + * + * This function marks and protects the beginning of a section that should not + * be entered after the bridge has been unplugged. The section end is marked + * with drm_bridge_exit(). Calls to this function can be nested. + * + * Returns: + * True if it is OK to enter the section, false otherwise. + */ +bool drm_bridge_enter(struct drm_bridge *bridge, int *idx) +{ + *idx = srcu_read_lock(&drm_bridge_unplug_srcu); + + if (bridge->unplugged) { + srcu_read_unlock(&drm_bridge_unplug_srcu, *idx); + return false; + } + + return true; +} +EXPORT_SYMBOL(drm_bridge_enter); + +/** + * drm_bridge_exit - Exit DRM bridge critical section + * @idx: index returned by drm_bridge_enter() + * + * This function marks the end of a section that should not be entered after + * the bridge has been unplugged. + */ +void drm_bridge_exit(int idx) +{ + srcu_read_unlock(&drm_bridge_unplug_srcu, idx); +} +EXPORT_SYMBOL(drm_bridge_exit); + +/** + * drm_bridge_unplug - declare a DRM bridge was unplugged and remove it + * @bridge: DRM bridge + * + * This tells the bridge has been physically unplugged and no operations on + * device resources must be done anymore. Entry-points can use + * drm_bridge_enter() and drm_bridge_exit() to protect device resources in + * a race free manner. + * + * Also unregisters the bridge. + */ +void drm_bridge_unplug(struct drm_bridge *bridge) +{ + bridge->unplugged = true; + + synchronize_srcu(&drm_bridge_unplug_srcu); + + drm_bridge_remove(bridge); +} +EXPORT_SYMBOL(drm_bridge_unplug); + static void __drm_bridge_free(struct kref *kref) { struct drm_bridge *bridge = container_of(kref, struct drm_bridge, refcount); diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 0ff7ab4aa868..d2683846cc61 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -1143,6 +1143,14 @@ struct drm_bridge { */ struct kref refcount; + /** + * @unplugged: + * + * Flag to tell if the bridge has been unplugged. + * See drm_bridge_enter() and drm_bridge_unplug(). + */ + bool unplugged; + /** @driver_private: pointer to the bridge driver's internal context */ void *driver_private; /** @ops: bitmask of operations supported by the bridge */ @@ -1278,6 +1286,10 @@ drm_priv_to_bridge(struct drm_private_obj *priv) return container_of(priv, struct drm_bridge, base); } +bool drm_bridge_enter(struct drm_bridge *bridge, int *idx); +void drm_bridge_exit(int idx); +void drm_bridge_unplug(struct drm_bridge *bridge); + struct drm_bridge *drm_bridge_get(struct drm_bridge *bridge); void drm_bridge_put(struct drm_bridge *bridge); From d2e8d1bc840b849fc23d8812995645cc79990e7b Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 12 Nov 2025 17:34:35 +0100 Subject: [PATCH 86/94] drm/bridge: ti-sn65dsi83: protect device resources on unplug To support hot-unplug of this bridge we need to protect access to device resources in case sn65dsi83_remove() happens concurrently to other code. Some care is needed for the case when the unplug happens before sn65dsi83_atomic_disable() has a chance to enter the critical section (i.e. a successful drm_bridge_enter() call), which occurs whenever the hardware is removed while the display is active. When that happens, sn65dsi83_atomic_disable() in unable to release the resources taken by sn65dsi83_atomic_pre_enable(). To ensure those resources are released exactly once on device removal: * move the code to release them to a dedicated function * register that function when the resources are taken in sn65dsi83_atomic_pre_enable() * if sn65dsi83_atomic_disable() happens before sn65dsi83_remove() (typical non-hot-unplug case): * sn65dsi83_atomic_disable() can enter the critical section (drm_bridge_enter() returns 0) -> it releases and executes the devres action * if sn65dsi83_atomic_disable() happens after sn65dsi83_remove() (typical hot-unplug case): * sn65dsi83_remove() -> drm_bridge_unplug() prevents sn65dsi83_atomic_disable() from entering the critical section (drm_bridge_enter() returns nonzero), so sn65dsi83_atomic_disable() cannot release and execute the devres action * the devres action is executed at the end of sn65dsi83_remove() Reviewed-by: Maxime Ripard Link: https://patch.msgid.link/20251112-drm-bridge-atomic-vs-remove-v3-2-85db717ce094@bootlin.com Signed-off-by: Luca Ceresoli --- drivers/gpu/drm/bridge/ti-sn65dsi83.c | 86 ++++++++++++++++++++------- 1 file changed, 66 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c index 033c44326552..ac74b9e85b97 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c @@ -406,6 +406,10 @@ static void sn65dsi83_reset_work(struct work_struct *ws) { struct sn65dsi83 *ctx = container_of(ws, struct sn65dsi83, reset_work); int ret; + int idx; + + if (!drm_bridge_enter(&ctx->bridge, &idx)) + return; /* Reset the pipe */ ret = sn65dsi83_reset_pipe(ctx); @@ -415,12 +419,18 @@ static void sn65dsi83_reset_work(struct work_struct *ws) } if (ctx->irq) enable_irq(ctx->irq); + + drm_bridge_exit(idx); } static void sn65dsi83_handle_errors(struct sn65dsi83 *ctx) { unsigned int irq_stat; int ret; + int idx; + + if (!drm_bridge_enter(&ctx->bridge, &idx)) + return; /* * Schedule a reset in case of: @@ -441,6 +451,8 @@ static void sn65dsi83_handle_errors(struct sn65dsi83 *ctx) schedule_work(&ctx->reset_work); } + + drm_bridge_exit(idx); } static void sn65dsi83_monitor_work(struct work_struct *work) @@ -463,6 +475,37 @@ static void sn65dsi83_monitor_stop(struct sn65dsi83 *ctx) cancel_delayed_work_sync(&ctx->monitor_work); } +/* + * Release resources taken by sn65dsi83_atomic_pre_enable(). + * + * Invoked by sn65dsi83_atomic_disable() normally, or by devres after + * sn65dsi83_remove() in case this happens befora atomic_disable. + */ +static void sn65dsi83_release_resources(void *data) +{ + struct sn65dsi83 *ctx = (struct sn65dsi83 *)data; + int ret; + + if (ctx->irq) { + /* Disable irq */ + regmap_write(ctx->regmap, REG_IRQ_EN, 0x0); + regmap_write(ctx->regmap, REG_IRQ_GLOBAL, 0x0); + } else { + /* Stop the polling task */ + sn65dsi83_monitor_stop(ctx); + } + + /* Put the chip in reset, pull EN line low, and assure 10ms reset low timing. */ + gpiod_set_value_cansleep(ctx->enable_gpio, 0); + usleep_range(10000, 11000); + + ret = regulator_disable(ctx->vcc); + if (ret) + dev_err(ctx->dev, "Failed to disable vcc: %d\n", ret); + + regcache_mark_dirty(ctx->regmap); +} + static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge, struct drm_atomic_state *state) { @@ -478,11 +521,15 @@ static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge, __le16 le16val; u16 val; int ret; + int idx; + + if (!drm_bridge_enter(bridge, &idx)) + return; ret = regulator_enable(ctx->vcc); if (ret) { dev_err(ctx->dev, "Failed to enable vcc: %d\n", ret); - return; + goto err_exit; } /* Deassert reset */ @@ -625,7 +672,7 @@ static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge, dev_err(ctx->dev, "failed to lock PLL, ret=%i\n", ret); /* On failure, disable PLL again and exit. */ regmap_write(ctx->regmap, REG_RC_PLL_EN, 0x00); - return; + goto err_add_action; } /* Trigger reset after CSR register update. */ @@ -633,6 +680,11 @@ static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge, /* Wait for 10ms after soft reset as specified in datasheet */ usleep_range(10000, 12000); + +err_add_action: + devm_add_action(ctx->dev, sn65dsi83_release_resources, ctx); +err_exit: + drm_bridge_exit(idx); } static void sn65dsi83_atomic_enable(struct drm_bridge *bridge, @@ -640,6 +692,10 @@ static void sn65dsi83_atomic_enable(struct drm_bridge *bridge, { struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge); unsigned int pval; + int idx; + + if (!drm_bridge_enter(bridge, &idx)) + return; /* Clear all errors that got asserted during initialization. */ regmap_read(ctx->regmap, REG_IRQ_STAT, &pval); @@ -659,32 +715,22 @@ static void sn65dsi83_atomic_enable(struct drm_bridge *bridge, /* Use the polling task */ sn65dsi83_monitor_start(ctx); } + + drm_bridge_exit(idx); } static void sn65dsi83_atomic_disable(struct drm_bridge *bridge, struct drm_atomic_state *state) { struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge); - int ret; + int idx; - if (ctx->irq) { - /* Disable irq */ - regmap_write(ctx->regmap, REG_IRQ_EN, 0x0); - regmap_write(ctx->regmap, REG_IRQ_GLOBAL, 0x0); - } else { - /* Stop the polling task */ - sn65dsi83_monitor_stop(ctx); - } + if (!drm_bridge_enter(bridge, &idx)) + return; - /* Put the chip in reset, pull EN line low, and assure 10ms reset low timing. */ - gpiod_set_value_cansleep(ctx->enable_gpio, 0); - usleep_range(10000, 11000); + devm_release_action(ctx->dev, sn65dsi83_release_resources, ctx); - ret = regulator_disable(ctx->vcc); - if (ret) - dev_err(ctx->dev, "Failed to disable vcc: %d\n", ret); - - regcache_mark_dirty(ctx->regmap); + drm_bridge_exit(idx); } static enum drm_mode_status @@ -1005,7 +1051,7 @@ static void sn65dsi83_remove(struct i2c_client *client) { struct sn65dsi83 *ctx = i2c_get_clientdata(client); - drm_bridge_remove(&ctx->bridge); + drm_bridge_unplug(&ctx->bridge); } static const struct i2c_device_id sn65dsi83_id[] = { From 041baffb84a64ea792224852778a7ff7ddd3cefc Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 9 Dec 2025 16:23:09 +0200 Subject: [PATCH 87/94] drm/vblank: Unexport drm_wait_one_vblank() Make drm_wait_on_vblank() static. The function is an internal interface and not invoked directly by drivers. Signed-off-by: Thomas Zimmermann Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/b0ab9833a85f5fb6de95ad6cb0216864bf860c9e.1765290097.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_vblank.c | 14 +------------- include/drm/drm_vblank.h | 1 - 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 32d013c5c8fc..c15d6d9d0082 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -1286,18 +1286,7 @@ void drm_crtc_vblank_put(struct drm_crtc *crtc) } EXPORT_SYMBOL(drm_crtc_vblank_put); -/** - * drm_wait_one_vblank - wait for one vblank - * @dev: DRM device - * @pipe: CRTC index - * - * This waits for one vblank to pass on @pipe, using the irq driver interfaces. - * It is a failure to call this when the vblank irq for @pipe is disabled, e.g. - * due to lack of driver support or because the crtc is off. - * - * This is the legacy version of drm_crtc_wait_one_vblank(). - */ -void drm_wait_one_vblank(struct drm_device *dev, unsigned int pipe) +static void drm_wait_one_vblank(struct drm_device *dev, unsigned int pipe) { struct drm_vblank_crtc *vblank = drm_vblank_crtc(dev, pipe); int ret; @@ -1321,7 +1310,6 @@ void drm_wait_one_vblank(struct drm_device *dev, unsigned int pipe) drm_vblank_put(dev, pipe); } -EXPORT_SYMBOL(drm_wait_one_vblank); /** * drm_crtc_wait_one_vblank - wait for one vblank diff --git a/include/drm/drm_vblank.h b/include/drm/drm_vblank.h index ffa564d79638..94ee09b48895 100644 --- a/include/drm/drm_vblank.h +++ b/include/drm/drm_vblank.h @@ -302,7 +302,6 @@ bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe); bool drm_crtc_handle_vblank(struct drm_crtc *crtc); int drm_crtc_vblank_get(struct drm_crtc *crtc); void drm_crtc_vblank_put(struct drm_crtc *crtc); -void drm_wait_one_vblank(struct drm_device *dev, unsigned int pipe); void drm_crtc_wait_one_vblank(struct drm_crtc *crtc); void drm_crtc_vblank_off(struct drm_crtc *crtc); void drm_crtc_vblank_reset(struct drm_crtc *crtc); From 128d6e6255ea7c0f4d55d45025d1ce0343f6cd46 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 9 Dec 2025 16:23:10 +0200 Subject: [PATCH 88/94] drm/vblank: remove drm_wait_one_vblank() completely There's really no need for the extra static function at all. Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/fe969aad198d3f151fafd01faca5b0e73bfd9a03.1765290097.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_vblank.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index c15d6d9d0082..1d12836e3d80 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -1286,8 +1286,18 @@ void drm_crtc_vblank_put(struct drm_crtc *crtc) } EXPORT_SYMBOL(drm_crtc_vblank_put); -static void drm_wait_one_vblank(struct drm_device *dev, unsigned int pipe) +/** + * drm_crtc_wait_one_vblank - wait for one vblank + * @crtc: DRM crtc + * + * This waits for one vblank to pass on @crtc, using the irq driver interfaces. + * It is a failure to call this when the vblank irq for @crtc is disabled, e.g. + * due to lack of driver support or because the crtc is off. + */ +void drm_crtc_wait_one_vblank(struct drm_crtc *crtc) { + struct drm_device *dev = crtc->dev; + int pipe = drm_crtc_index(crtc); struct drm_vblank_crtc *vblank = drm_vblank_crtc(dev, pipe); int ret; u64 last; @@ -1310,19 +1320,6 @@ static void drm_wait_one_vblank(struct drm_device *dev, unsigned int pipe) drm_vblank_put(dev, pipe); } - -/** - * drm_crtc_wait_one_vblank - wait for one vblank - * @crtc: DRM crtc - * - * This waits for one vblank to pass on @crtc, using the irq driver interfaces. - * It is a failure to call this when the vblank irq for @crtc is disabled, e.g. - * due to lack of driver support or because the crtc is off. - */ -void drm_crtc_wait_one_vblank(struct drm_crtc *crtc) -{ - drm_wait_one_vblank(crtc->dev, drm_crtc_index(crtc)); -} EXPORT_SYMBOL(drm_crtc_wait_one_vblank); /** From 04f0aa5d69b88e4b9078d2e5aa3a970c71917850 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 9 Dec 2025 16:23:11 +0200 Subject: [PATCH 89/94] drm/vblank: remove superfluous pipe check Now that the pipe is crtc->pipe, there's no need to check it's within range. Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/ced963542bfb00c2f1a653e9e5f717fccbd25132.1765290097.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_vblank.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 1d12836e3d80..f4d1fe182a4d 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -1302,9 +1302,6 @@ void drm_crtc_wait_one_vblank(struct drm_crtc *crtc) int ret; u64 last; - if (drm_WARN_ON(dev, pipe >= dev->num_crtcs)) - return; - ret = drm_vblank_get(dev, pipe); if (drm_WARN(dev, ret, "vblank not available on crtc %i, ret=%i\n", pipe, ret)) From 65defc4a780885687b9ff669e6276f7ba7ffd8e9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 9 Dec 2025 16:23:12 +0200 Subject: [PATCH 90/94] drm/vblank: add return value to drm_crtc_wait_one_vblank() Let drivers deal with the vblank wait failures if they so desire. If the current warning backtrace gets toned down to a simple warning message, the drivers may wish to add the backtrace themselves. Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/7f2de4dd170771991756073f037c7ca043c3e746.1765290097.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_vblank.c | 8 ++++++-- include/drm/drm_vblank.h | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index f4d1fe182a4d..503eb23d38d2 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -1293,8 +1293,10 @@ EXPORT_SYMBOL(drm_crtc_vblank_put); * This waits for one vblank to pass on @crtc, using the irq driver interfaces. * It is a failure to call this when the vblank irq for @crtc is disabled, e.g. * due to lack of driver support or because the crtc is off. + * + * Returns: 0 on success, negative error on failures. */ -void drm_crtc_wait_one_vblank(struct drm_crtc *crtc) +int drm_crtc_wait_one_vblank(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; int pipe = drm_crtc_index(crtc); @@ -1305,7 +1307,7 @@ void drm_crtc_wait_one_vblank(struct drm_crtc *crtc) ret = drm_vblank_get(dev, pipe); if (drm_WARN(dev, ret, "vblank not available on crtc %i, ret=%i\n", pipe, ret)) - return; + return ret; last = drm_vblank_count(dev, pipe); @@ -1316,6 +1318,8 @@ void drm_crtc_wait_one_vblank(struct drm_crtc *crtc) drm_WARN(dev, ret == 0, "vblank wait timed out on crtc %i\n", pipe); drm_vblank_put(dev, pipe); + + return ret ? 0 : -ETIMEDOUT; } EXPORT_SYMBOL(drm_crtc_wait_one_vblank); diff --git a/include/drm/drm_vblank.h b/include/drm/drm_vblank.h index 94ee09b48895..2fcef9c0f5b1 100644 --- a/include/drm/drm_vblank.h +++ b/include/drm/drm_vblank.h @@ -302,7 +302,7 @@ bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe); bool drm_crtc_handle_vblank(struct drm_crtc *crtc); int drm_crtc_vblank_get(struct drm_crtc *crtc); void drm_crtc_vblank_put(struct drm_crtc *crtc); -void drm_crtc_wait_one_vblank(struct drm_crtc *crtc); +int drm_crtc_wait_one_vblank(struct drm_crtc *crtc); void drm_crtc_vblank_off(struct drm_crtc *crtc); void drm_crtc_vblank_reset(struct drm_crtc *crtc); void drm_crtc_vblank_on_config(struct drm_crtc *crtc, From e547890b9ae32202f280105b8725d64fd3934d73 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 9 Dec 2025 16:23:13 +0200 Subject: [PATCH 91/94] drm/vblank: limit vblank variable scope to atomic In drm_crtc_vblank_helper_get_vblank_timestamp_internal(), we only need the vblank variable for atomic modesetting. Limit the scope to make upcoming changes easier. Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/b50f0bff654a6902ffd7ae52c31d46fad9ed7540.1765290097.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_vblank.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 503eb23d38d2..91e63177daaf 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -710,7 +710,6 @@ drm_crtc_vblank_helper_get_vblank_timestamp_internal( { struct drm_device *dev = crtc->dev; unsigned int pipe = crtc->index; - struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; struct timespec64 ts_etime, ts_vblank_time; ktime_t stime, etime; bool vbl_status; @@ -729,10 +728,13 @@ drm_crtc_vblank_helper_get_vblank_timestamp_internal( return false; } - if (drm_drv_uses_atomic_modeset(dev)) + if (drm_drv_uses_atomic_modeset(dev)) { + struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + mode = &vblank->hwmode; - else + } else { mode = &crtc->hwmode; + } /* If mode timing undefined, just return as no-op: * Happens during initial modesetting of a crtc. From 976dd750a14d4d0e680aa8f83265451976cddad7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 9 Dec 2025 16:23:14 +0200 Subject: [PATCH 92/94] drm/vblank: use the drm_vblank_crtc() and drm_crtc_vblank_crtc() helpers more We have the helpers to avoid open coding dev->vblank[pipe] access. v2: Rebase Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/ad41f25c625d6a263b7e2e1d227cb14c5d0ce204.1765290097.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_vblank.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 91e63177daaf..a86561c4b999 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -551,7 +551,7 @@ int drm_vblank_init(struct drm_device *dev, unsigned int num_crtcs) dev->num_crtcs = num_crtcs; for (i = 0; i < num_crtcs; i++) { - struct drm_vblank_crtc *vblank = &dev->vblank[i]; + struct drm_vblank_crtc *vblank = drm_vblank_crtc(dev, i); vblank->dev = dev; vblank->pipe = i; @@ -605,7 +605,9 @@ EXPORT_SYMBOL(drm_dev_has_vblank); */ wait_queue_head_t *drm_crtc_vblank_waitqueue(struct drm_crtc *crtc) { - return &crtc->dev->vblank[drm_crtc_index(crtc)].queue; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); + + return &vblank->queue; } EXPORT_SYMBOL(drm_crtc_vblank_waitqueue); @@ -729,7 +731,7 @@ drm_crtc_vblank_helper_get_vblank_timestamp_internal( } if (drm_drv_uses_atomic_modeset(dev)) { - struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + struct drm_vblank_crtc *vblank = drm_vblank_crtc(dev, pipe); mode = &vblank->hwmode; } else { @@ -1784,7 +1786,7 @@ int drm_wait_vblank_ioctl(struct drm_device *dev, void *data, if (pipe >= dev->num_crtcs) return -EINVAL; - vblank = &dev->vblank[pipe]; + vblank = drm_vblank_crtc(dev, pipe); /* If the counter is currently enabled and accurate, short-circuit * queries to return the cached timestamp of the last vblank. From 1b3d18de5535f2553d237d64a20f7a1a3947df68 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 9 Dec 2025 16:23:15 +0200 Subject: [PATCH 93/94] drm/vblank: prefer drm_crtc_vblank_crtc() over drm_vblank_crtc() Use the higher level function where crtc is available. v2: Rebase Reviewed-by: Thomas Zimmermann Link: https://patch.msgid.link/29a29e746bc90c824d4f2bd15e42817dd7d0b199.1765290097.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_vblank.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index a86561c4b999..498fc91450e6 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -731,7 +731,7 @@ drm_crtc_vblank_helper_get_vblank_timestamp_internal( } if (drm_drv_uses_atomic_modeset(dev)) { - struct drm_vblank_crtc *vblank = drm_vblank_crtc(dev, pipe); + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); mode = &vblank->hwmode; } else { @@ -1304,7 +1304,7 @@ int drm_crtc_wait_one_vblank(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; int pipe = drm_crtc_index(crtc); - struct drm_vblank_crtc *vblank = drm_vblank_crtc(dev, pipe); + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); int ret; u64 last; From 470cb09a2936d3c1ff8aeff46e3c14dcc4314e9b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Dec 2025 14:09:25 +0300 Subject: [PATCH 94/94] drm/plane: Fix IS_ERR() vs NULL bug drm_plane_create_color_pipeline_property() The drm_property_create_enum() function returns NULL on error, it never returns error pointers. Fix the error checking to match. Fixes: 2afc3184f3b3 ("drm/plane: Add COLOR PIPELINE property") Signed-off-by: Dan Carpenter Signed-off-by: Simon Ser Link: https://patch.msgid.link/aTK9ZR0sMgqSACow@stanley.mountain --- drivers/gpu/drm/drm_plane.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index f6cfa8ac090c..b87f5f30d36b 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1867,9 +1867,9 @@ int drm_plane_create_color_pipeline_property(struct drm_plane *plane, prop = drm_property_create_enum(plane->dev, DRM_MODE_PROP_ATOMIC, "COLOR_PIPELINE", all_pipelines, len); - if (IS_ERR(prop)) { + if (!prop) { kfree(all_pipelines); - return PTR_ERR(prop); + return -ENOMEM; } drm_object_attach_property(&plane->base, prop, 0);