Merge tag 'amd-drm-fixes-7.1-2026-05-06' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-7.1-2026-05-06:

amdgpu:
- GFX9 fixes
- Hawaii SMU fixes
- SDMA4 fix
- GART fix
- Userq fixes

amdkfd:
- GPUVM TLB flush fix
- Hotplug fix

radeon:
- Hawaii SMU fixes

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20260506154631.1733034-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie
2026-05-08 16:13:07 +10:00
14 changed files with 81 additions and 155 deletions

View File

@@ -3149,11 +3149,7 @@ static int __init amdgpu_init(void)
r = amdgpu_sync_init();
if (r)
goto error_sync;
r = amdgpu_userq_fence_slab_init();
if (r)
goto error_fence;
return r;
amdgpu_register_atpx_handler();
amdgpu_acpi_detect();
@@ -3161,7 +3157,7 @@ static int __init amdgpu_init(void)
/* Ignore KFD init failures when CONFIG_HSA_AMD is not set. */
r = amdgpu_amdkfd_init();
if (r && r != -ENOENT)
goto error_fence;
goto error_fini_sync;
if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) {
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
@@ -3172,10 +3168,8 @@ static int __init amdgpu_init(void)
/* let modprobe override vga console setting */
return pci_register_driver(&amdgpu_kms_pci_driver);
error_fence:
error_fini_sync:
amdgpu_sync_fini();
error_sync:
return r;
}
@@ -3186,7 +3180,6 @@ static void __exit amdgpu_exit(void)
amdgpu_unregister_atpx_handler();
amdgpu_acpi_release();
amdgpu_sync_fini();
amdgpu_userq_fence_slab_fini();
mmu_notifier_synchronize();
amdgpu_xcp_drv_release();
}

View File

@@ -262,12 +262,19 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
*/
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
{
int r;
if (adev->gart.bo != NULL)
return 0;
return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo,
NULL, (void *)&adev->gart.ptr);
r = amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo,
NULL, (void *)&adev->gart.ptr);
if (r)
return r;
memset_io(adev->gart.ptr, adev->gart.gart_pte_flags, adev->gart.table_size);
return 0;
}
/**

View File

@@ -32,29 +32,9 @@
#include "amdgpu.h"
#include "amdgpu_userq_fence.h"
static const struct dma_fence_ops amdgpu_userq_fence_ops;
static struct kmem_cache *amdgpu_userq_fence_slab;
#define AMDGPU_USERQ_MAX_HANDLES (1U << 16)
int amdgpu_userq_fence_slab_init(void)
{
amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
sizeof(struct amdgpu_userq_fence),
0,
SLAB_HWCACHE_ALIGN,
NULL);
if (!amdgpu_userq_fence_slab)
return -ENOMEM;
return 0;
}
void amdgpu_userq_fence_slab_fini(void)
{
rcu_barrier();
kmem_cache_destroy(amdgpu_userq_fence_slab);
}
static const struct dma_fence_ops amdgpu_userq_fence_ops;
static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
{
@@ -231,7 +211,7 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
{
*userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
*userq_fence = kmalloc(sizeof(**userq_fence), GFP_KERNEL);
return *userq_fence ? 0 : -ENOMEM;
}
@@ -342,7 +322,7 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu)
amdgpu_userq_fence_driver_put(fence_drv);
kvfree(userq_fence->fence_drv_array);
kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
kfree(userq_fence);
}
static void amdgpu_userq_fence_release(struct dma_fence *f)
@@ -545,7 +525,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
if (r) {
mutex_unlock(&userq_mgr->userq_mutex);
kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
kfree(userq_fence);
goto put_gobj_write;
}

View File

@@ -58,9 +58,6 @@ struct amdgpu_userq_fence_driver {
char timeline_name[TASK_COMM_LEN];
};
int amdgpu_userq_fence_slab_init(void);
void amdgpu_userq_fence_slab_fini(void);
void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv);
void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,

View File

@@ -5660,9 +5660,6 @@ static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
{
struct amdgpu_device *adev = ring->adev;
/* we only allocate 32bit for each seq wb address */
BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
/* write fence seq to the "addr" */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |

View File

@@ -30,34 +30,6 @@
#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
static int
mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
{
int ret;
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
goto err_reserve_bo_failed;
}
ret = amdgpu_ttm_alloc_gart(&bo->tbo);
if (ret) {
DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
goto err_map_bo_gart_failed;
}
amdgpu_bo_unreserve(bo);
bo = amdgpu_bo_ref(bo);
return 0;
err_map_bo_gart_failed:
amdgpu_bo_unreserve(bo);
err_reserve_bo_failed:
return ret;
}
static int
mes_userq_create_wptr_mapping(struct amdgpu_device *adev,
struct amdgpu_userq_mgr *uq_mgr,
@@ -65,55 +37,62 @@ mes_userq_create_wptr_mapping(struct amdgpu_device *adev,
uint64_t wptr)
{
struct amdgpu_bo_va_mapping *wptr_mapping;
struct amdgpu_vm *wptr_vm;
struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj;
struct amdgpu_bo *obj;
struct amdgpu_vm *vm = queue->vm;
struct drm_exec exec;
int ret;
wptr_vm = queue->vm;
ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
if (ret)
return ret;
wptr &= AMDGPU_GMC_HOLE_MASK;
wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
amdgpu_bo_unreserve(wptr_vm->root.bo);
if (!wptr_mapping) {
DRM_ERROR("Failed to lookup wptr bo\n");
return -EINVAL;
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2);
drm_exec_until_all_locked(&exec) {
ret = amdgpu_vm_lock_pd(vm, &exec, 1);
drm_exec_retry_on_contention(&exec);
if (unlikely(ret))
goto fail_lock;
wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> PAGE_SHIFT);
if (!wptr_mapping) {
ret = -EINVAL;
goto fail_lock;
}
obj = wptr_mapping->bo_va->base.bo;
ret = drm_exec_lock_obj(&exec, &obj->tbo.base);
drm_exec_retry_on_contention(&exec);
if (unlikely(ret))
goto fail_lock;
}
wptr_obj->obj = wptr_mapping->bo_va->base.bo;
wptr_obj->obj = amdgpu_bo_ref(wptr_mapping->bo_va->base.bo);
if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n");
return -EINVAL;
}
ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj);
if (ret) {
DRM_ERROR("Failed to map wptr bo to GART\n");
return ret;
}
ret = amdgpu_bo_reserve(wptr_obj->obj, true);
if (ret) {
DRM_ERROR("Failed to reserve wptr bo\n");
return ret;
ret = -EINVAL;
goto fail_map;
}
/* TODO use eviction fence instead of pinning. */
ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT);
if (ret) {
drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin wptr bo\n");
goto unresv_bo;
DRM_ERROR("Failed to pin wptr bo. ret %d\n", ret);
goto fail_map;
}
ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo);
if (ret) {
DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
goto fail_map;
}
queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj);
amdgpu_bo_unreserve(wptr_obj->obj);
drm_exec_fini(&exec);
return 0;
unresv_bo:
amdgpu_bo_unreserve(wptr_obj->obj);
fail_map:
amdgpu_bo_unref(&wptr_obj->obj);
fail_lock:
drm_exec_fini(&exec);
return ret;
}

View File

@@ -889,7 +889,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
/* write the fence */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
/* zero in first two bits */
BUG_ON(addr & 0x3);
WARN_ON(addr & 0x3);
amdgpu_ring_write(ring, lower_32_bits(addr));
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
@@ -899,7 +899,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
addr += 4;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
/* zero in first two bits */
BUG_ON(addr & 0x3);
WARN_ON(addr & 0x3);
amdgpu_ring_write(ring, lower_32_bits(addr));
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, upper_32_bits(seq));

View File

@@ -1360,7 +1360,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
if (WARN_ON_ONCE(!peer_pdd))
continue;
kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
kfd_flush_tlb(peer_pdd);
}
kfree(devices_arr);
@@ -1455,7 +1455,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
if (WARN_ON_ONCE(!peer_pdd))
continue;
if (flush_tlb)
kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
kfd_flush_tlb(peer_pdd);
/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */
err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);

View File

@@ -1737,37 +1737,6 @@ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entr
return false;
}
/* check if there is kfd process still uses adev */
static bool kgd2kfd_check_device_idle(struct amdgpu_device *adev)
{
struct kfd_process *p;
struct hlist_node *p_temp;
unsigned int temp;
struct kfd_node *dev;
mutex_lock(&kfd_processes_mutex);
if (hash_empty(kfd_processes_table)) {
mutex_unlock(&kfd_processes_mutex);
return true;
}
/* check if there is device still use adev */
hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
for (int i = 0; i < p->n_pdds; i++) {
dev = p->pdds[i]->dev;
if (dev->adev == adev) {
mutex_unlock(&kfd_processes_mutex);
return false;
}
}
}
mutex_unlock(&kfd_processes_mutex);
return true;
}
/** kgd2kfd_teardown_processes - gracefully tear down existing
* kfd processes that use adev
*
@@ -1800,7 +1769,7 @@ void kgd2kfd_teardown_processes(struct amdgpu_device *adev)
mutex_unlock(&kfd_processes_mutex);
/* wait all kfd processes use adev terminate */
while (!kgd2kfd_check_device_idle(adev))
while (!!atomic_read(&adev->kfd.dev->kfd_processes_count))
cond_resched();
}

View File

@@ -572,7 +572,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
qpd->vmid,
qpd->page_table_base);
/* invalidate the VM context after pasid and vmid mapping is set up */
kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
kfd_flush_tlb(qpd_to_pdd(qpd));
if (dqm->dev->kfd2kgd->set_scratch_backing_va)
dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
@@ -610,7 +610,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
if (flush_texture_cache_nocpsch(q->device, qpd))
dev_err(dev, "Failed to flush TC\n");
kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
kfd_flush_tlb(qpd_to_pdd(qpd));
/* Release the vmid mapping */
set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
@@ -1284,7 +1284,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
dqm->dev->adev,
qpd->vmid,
qpd->page_table_base);
kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
kfd_flush_tlb(pdd);
}
/* Take a safe reference to the mm_struct, which may otherwise

View File

@@ -1554,13 +1554,13 @@ void kfd_signal_reset_event(struct kfd_node *dev);
void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
void kfd_signal_process_terminate_event(struct kfd_process *p);
static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
enum TLB_FLUSH_TYPE type)
static inline void kfd_flush_tlb(struct kfd_process_device *pdd)
{
struct amdgpu_device *adev = pdd->dev->adev;
struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask);
amdgpu_vm_flush_compute_tlb(adev, vm, TLB_FLUSH_HEAVYWEIGHT,
pdd->dev->xcc_mask);
}
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)

View File

@@ -1424,7 +1424,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
if (r)
break;
}
kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT);
kfd_flush_tlb(pdd);
}
return r;
@@ -1571,7 +1571,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
}
}
kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
kfd_flush_tlb(pdd);
}
return r;

View File

@@ -1333,12 +1333,13 @@ static int ci_populate_all_memory_levels(struct pp_hwmgr *hwmgr)
dev_id = adev->pdev->device;
if ((dpm_table->mclk_table.count >= 2)
&& ((dev_id == 0x67B0) || (dev_id == 0x67B1))) {
smu_data->smc_state_table.MemoryLevel[1].MinVddci =
smu_data->smc_state_table.MemoryLevel[0].MinVddci;
smu_data->smc_state_table.MemoryLevel[1].MinMvdd =
smu_data->smc_state_table.MemoryLevel[0].MinMvdd;
if ((dpm_table->mclk_table.count >= 2) &&
((dev_id == 0x67B0) || (dev_id == 0x67B1)) &&
(adev->pdev->revision == 0)) {
smu_data->smc_state_table.MemoryLevel[1].MinVddc =
smu_data->smc_state_table.MemoryLevel[0].MinVddc;
smu_data->smc_state_table.MemoryLevel[1].MinVddcPhases =
smu_data->smc_state_table.MemoryLevel[0].MinVddcPhases;
}
smu_data->smc_state_table.MemoryLevel[0].ActivityLevel = 0x1F;
CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.MemoryLevel[0].ActivityLevel);

View File

@@ -2461,7 +2461,8 @@ static void ci_register_patching_mc_arb(struct radeon_device *rdev,
if (patch &&
((rdev->pdev->device == 0x67B0) ||
(rdev->pdev->device == 0x67B1))) {
(rdev->pdev->device == 0x67B1)) &&
(rdev->pdev->revision == 0)) {
if ((memory_clock > 100000) && (memory_clock <= 125000)) {
tmp2 = (((0x31 * engine_clock) / 125000) - 1) & 0xff;
*dram_timimg2 &= ~0x00ff0000;
@@ -3304,7 +3305,8 @@ static int ci_populate_all_memory_levels(struct radeon_device *rdev)
pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1;
if ((dpm_table->mclk_table.count >= 2) &&
((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1))) {
((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1)) &&
(rdev->pdev->revision == 0)) {
pi->smc_state_table.MemoryLevel[1].MinVddc =
pi->smc_state_table.MemoryLevel[0].MinVddc;
pi->smc_state_table.MemoryLevel[1].MinVddcPhases =
@@ -4493,7 +4495,8 @@ static int ci_register_patching_mc_seq(struct radeon_device *rdev,
if (patch &&
((rdev->pdev->device == 0x67B0) ||
(rdev->pdev->device == 0x67B1))) {
(rdev->pdev->device == 0x67B1)) &&
(rdev->pdev->revision == 0)) {
for (i = 0; i < table->last; i++) {
if (table->last >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
return -EINVAL;