From edce042da7984586ea5c7ed18ea5f58002afb969 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Mon, 27 Oct 2025 14:12:28 +0100 Subject: [PATCH 1/2] drm/xe: Fix uninitialized return value from xe_validation_guard() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the DEFINE_CLASS() macro creates an inline function and the init args are passed down to it; since _ret is passed as an int, whatever value is set inside the function is not visible to the caller. Pass _ret as a pointer so its value propagates to the caller. Fixes: c460bc2311df ("drm/xe: Introduce an xe_validation wrapper around drm_exec") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6220 Cc: Maarten Lankhorst Cc: Matthew Brost Cc: intel-xe@lists.freedesktop.org Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi Reviewed-by: Maarten Lankhorst Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251027131228.12098-1-thomas.hellstrom@linux.intel.com (cherry picked from commit fcb8c304f4673747d535c74b340b5b8a4823727b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_validation.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h index fec331d791e7..b2d09c596714 100644 --- a/drivers/gpu/drm/xe/xe_validation.h +++ b/drivers/gpu/drm/xe/xe_validation.h @@ -166,10 +166,10 @@ xe_validation_device_init(struct xe_validation_device *val) */ DEFINE_CLASS(xe_validation, struct xe_validation_ctx *, if (_T) xe_validation_ctx_fini(_T);, - ({_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); - _ret ? NULL : _ctx; }), + ({*_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); + *_ret ? NULL : _ctx; }), struct xe_validation_ctx *_ctx, struct xe_validation_device *_val, - struct drm_exec *_exec, const struct xe_val_flags _flags, int _ret); + struct drm_exec *_exec, const struct xe_val_flags _flags, int *_ret); static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) {return *_T; } #define class_xe_validation_is_conditional true @@ -186,7 +186,7 @@ static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) * exhaustive eviction. */ #define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \ - scoped_guard(xe_validation, _ctx, _val, _exec, _flags, _ret) \ + scoped_guard(xe_validation, _ctx, _val, _exec, _flags, &_ret) \ drm_exec_until_all_locked(_exec) #endif From b3fbda1a630a9439c885b2a5dc5230cc49a87e9e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 21 Oct 2025 17:55:37 -0700 Subject: [PATCH 2/2] drm/xe: Do not wake device during a GT reset Waking the device during a GT reset can lead to unintended memory allocation, which is not allowed since GT resets occur in the reclaim path. Prevent this by holding a PM reference while a reset is in flight. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20251022005538.828980-3-matthew.brost@intel.com (cherry picked from commit 480b358e7d8ef69fd8f1b0cad6e07c7d70a36ee4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3e0ad7e5b5df..6d3db5e55d98 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -813,12 +813,16 @@ static int gt_reset(struct xe_gt *gt) unsigned int fw_ref; int err; - if (xe_device_wedged(gt_to_xe(gt))) - return -ECANCELED; + if (xe_device_wedged(gt_to_xe(gt))) { + err = -ECANCELED; + goto err_pm_put; + } /* We only support GT resets with GuC submission */ - if (!xe_device_uc_enabled(gt_to_xe(gt))) - return -ENODEV; + if (!xe_device_uc_enabled(gt_to_xe(gt))) { + err = -ENODEV; + goto err_pm_put; + } xe_gt_info(gt, "reset started\n"); @@ -826,8 +830,6 @@ static int gt_reset(struct xe_gt *gt) if (!err) xe_gt_warn(gt, "reset block failed to get lifted"); - xe_pm_runtime_get(gt_to_xe(gt)); - if (xe_fault_inject_gt_reset()) { err = -ECANCELED; goto err_fail; @@ -874,6 +876,7 @@ static int gt_reset(struct xe_gt *gt) xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); xe_device_declare_wedged(gt_to_xe(gt)); +err_pm_put: xe_pm_runtime_put(gt_to_xe(gt)); return err; @@ -895,7 +898,9 @@ void xe_gt_reset_async(struct xe_gt *gt) return; xe_gt_info(gt, "reset queued\n"); - queue_work(gt->ordered_wq, >->reset.worker); + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + if (!queue_work(gt->ordered_wq, >->reset.worker)) + xe_pm_runtime_put(gt_to_xe(gt)); } void xe_gt_suspend_prepare(struct xe_gt *gt)