Merge tag 'drm-xe-fixes-2026-04-02' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

uAPI Fix:
 - Accept canonical GPU addresses in xe_vm_madvise_ioctl (Arvind)

Driver Fixes:
 - Disallow writes to read-only VMAs (Jonathan)
 - PXP fixes (Daniele)
 - Disable garbage collector work item on SVM clos (Brost)
 - void memory allocations in xe_device_declare_wedged (Brost)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patch.msgid.link/ac5mDHs-McR5cJSV@intel.com
This commit is contained in:
Dave Airlie
2026-04-03 18:36:51 +10:00
5 changed files with 48 additions and 26 deletions

View File

@@ -837,6 +837,14 @@ static void detect_preproduction_hw(struct xe_device *xe)
}
}
static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
{
struct xe_device *xe = arg;
if (atomic_read(&xe->wedged.flag))
xe_pm_runtime_put(xe);
}
int xe_device_probe(struct xe_device *xe)
{
struct xe_tile *tile;
@@ -1013,6 +1021,10 @@ int xe_device_probe(struct xe_device *xe)
detect_preproduction_hw(xe);
err = drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe);
if (err)
goto err_unregister_display;
return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
err_unregister_display:
@@ -1216,13 +1228,6 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
}
static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
{
struct xe_device *xe = arg;
xe_pm_runtime_put(xe);
}
/**
* DOC: Xe Device Wedging
*
@@ -1300,15 +1305,9 @@ void xe_device_declare_wedged(struct xe_device *xe)
return;
}
xe_pm_runtime_get_noresume(xe);
if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) {
drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n");
return;
}
if (!atomic_xchg(&xe->wedged.flag, 1)) {
xe->needs_flr_on_fini = true;
xe_pm_runtime_get_noresume(xe);
drm_err(&xe->drm,
"CRITICAL: Xe has declared device %s as wedged.\n"
"IOCTLs and executions are blocked. Only a rebind may clear the failure\n"

View File

@@ -187,6 +187,12 @@ static int xe_pagefault_service(struct xe_pagefault *pf)
goto unlock_vm;
}
if (xe_vma_read_only(vma) &&
pf->consumer.access_type != XE_PAGEFAULT_ACCESS_TYPE_READ) {
err = -EPERM;
goto unlock_vm;
}
atomic = xe_pagefault_access_is_atomic(pf->consumer.access_type);
if (xe_vma_is_cpu_addr_mirror(vma))

View File

@@ -380,6 +380,18 @@ int xe_pxp_init(struct xe_device *xe)
return 0;
}
/*
* On PTL, older GSC FWs have a bug that can cause them to crash during
* PXP invalidation events, which leads to a complete loss of power
* management on the media GT. Therefore, we can't use PXP on FWs that
* have this bug, which was fixed in PTL GSC build 1396.
*/
if (xe->info.platform == XE_PANTHERLAKE &&
gt->uc.gsc.fw.versions.found[XE_UC_FW_VER_RELEASE].build < 1396) {
drm_info(&xe->drm, "PXP requires PTL GSC build 1396 or newer\n");
return 0;
}
pxp = drmm_kzalloc(&xe->drm, sizeof(struct xe_pxp), GFP_KERNEL);
if (!pxp) {
err = -ENOMEM;
@@ -512,7 +524,7 @@ static int __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q)
static int pxp_start(struct xe_pxp *pxp, u8 type)
{
int ret = 0;
bool restart = false;
bool restart;
if (!xe_pxp_is_enabled(pxp))
return -ENODEV;
@@ -541,6 +553,8 @@ static int pxp_start(struct xe_pxp *pxp, u8 type)
msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS)))
return -ETIMEDOUT;
restart = false;
mutex_lock(&pxp->mutex);
/* If PXP is not already active, turn it on */
@@ -583,6 +597,7 @@ static int pxp_start(struct xe_pxp *pxp, u8 type)
drm_err(&pxp->xe->drm, "PXP termination failed before start\n");
mutex_lock(&pxp->mutex);
pxp->status = XE_PXP_ERROR;
complete_all(&pxp->termination);
goto out_unlock;
}
@@ -870,11 +885,6 @@ int xe_pxp_pm_suspend(struct xe_pxp *pxp)
pxp->key_instance++;
needs_queue_inval = true;
break;
default:
drm_err(&pxp->xe->drm, "unexpected state during PXP suspend: %u",
pxp->status);
ret = -EIO;
goto out;
}
/*
@@ -899,7 +909,6 @@ int xe_pxp_pm_suspend(struct xe_pxp *pxp)
pxp->last_suspend_key_instance = pxp->key_instance;
out:
return ret;
}

View File

@@ -903,7 +903,7 @@ int xe_svm_init(struct xe_vm *vm)
void xe_svm_close(struct xe_vm *vm)
{
xe_assert(vm->xe, xe_vm_is_closed(vm));
flush_work(&vm->svm.garbage_collector.work);
disable_work_sync(&vm->svm.garbage_collector.work);
xe_svm_put_pagemaps(vm);
drm_pagemap_release_owner(&vm->svm.peer);
}

View File

@@ -408,8 +408,15 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
struct xe_device *xe = to_xe_device(dev);
struct xe_file *xef = to_xe_file(file);
struct drm_xe_madvise *args = data;
struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
.range = args->range, };
struct xe_vmas_in_madvise_range madvise_range = {
/*
* Userspace may pass canonical (sign-extended) addresses.
* Strip the sign extension to get the internal non-canonical
* form used by the GPUVM, matching xe_vm_bind_ioctl() behavior.
*/
.addr = xe_device_uncanonicalize_addr(xe, args->start),
.range = args->range,
};
struct xe_madvise_details details;
struct xe_vm *vm;
struct drm_exec exec;
@@ -439,7 +446,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
if (err)
goto unlock_vm;
err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
err = xe_vm_alloc_madvise_vma(vm, madvise_range.addr, args->range);
if (err)
goto madv_fini;
@@ -482,7 +489,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args,
&details);
err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
err = xe_vm_invalidate_madvise_range(vm, madvise_range.addr,
madvise_range.addr + args->range);
if (madvise_range.has_svm_userptr_vmas)
xe_svm_notifier_unlock(vm);