From e89aacd1ecdd3d13e8f347aa082687878621e03c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 14 Jan 2026 10:49:05 -0800 Subject: [PATCH 001/195] drm/xe: Reduce LRC timestamp stuck message on VFs to notice An LRC timestamp getting stuck is a somewhat normal occurrence. If a single VF submits a job that does not get timesliced, the LRC timestamp will not increment. Reduce the LRC timestamp stuck message on VFs to notice (same log level as job timeout) to avoid false CI bugs in tests where a VF submits a job that does not get timesliced. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7032 Fixes: bb63e7257e63 ("drm/xe: Avoid toggling schedule state to check LRC timestamp in TDR") Suggested-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Reviewed-by: Stuart Summers Link: https://patch.msgid.link/20260114184905.4189026-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index a27ea931b956..1b2f66f4425b 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1317,9 +1317,14 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0])); if (ctx_timestamp == job->sample_timestamp) { - xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id); + if (IS_SRIOV_VF(gt_to_xe(gt))) + xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", + xe_sched_job_seqno(job), + xe_sched_job_lrc_seqno(job), q->guc->id); + else + xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck", + xe_sched_job_seqno(job), + xe_sched_job_lrc_seqno(job), q->guc->id); return xe_sched_invalidate_job(job, 0); } From c51595b3d25123cb98bd9b1d6f50e57cc6be592b Mon Sep 17 00:00:00 2001 From: Nakshtra Goyal Date: Tue, 13 Jan 2026 14:49:28 +0530 Subject: [PATCH 002/195] drm/xe/xe_query: Remove check for gt There's no need to check a userspace-provided GT ID (which may come from any tile) against the number of GTs that can be present on a single tile. The xe_device_get_gt() lookup already checks that the GT ID passed is valid for the current device.(Matt Roper) Signed-off-by: Nakshtra Goyal Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260113091928.67446-1-nakshtra.goyal@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_query.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 75490683bad2..b7b4261968e0 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -142,9 +142,6 @@ query_engine_cycles(struct xe_device *xe, return -EINVAL; eci = &resp.eci; - if (eci->gt_id >= xe->info.max_gt_per_tile) - return -EINVAL; - gt = xe_device_get_gt(xe, eci->gt_id); if (!gt) return -EINVAL; From bbd36787308413d8564e1b0498fe6c1f765fa6c1 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 14:03:32 -0800 Subject: [PATCH 003/195] drm/xe: Ban entire multi-queue group on any job timeout In multi-queue mode, we only have control over the entire group, so we cannot ban individual queues or signal fences until the whole group is removed from hardware. Implement banning of the entire group if any job within it times out. v2: - Fix lock inversion (Niranjana) - Initialize new queues in group to stopped v3: - Blindly call xe_exec_queue_multi_queue_primary (Niranjana) - More comments around temporary list when stopping (Niranjana) - Restart group on false timeout (Niranjana) Cc: Niranjana Vishwanathapura Signed-off-by: Matthew Brost Reviewed-by: Niranjana Vishwanathapura Link: https://patch.msgid.link/20260116220333.861850-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 + drivers/gpu/drm/xe/xe_guc_submit.c | 120 ++++++++++++++++++----- 2 files changed, 97 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 5fc516b0bb77..562ea75891ba 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -66,6 +66,8 @@ struct xe_exec_queue_group { bool sync_pending; /** @banned: Group banned */ bool banned; + /** @stopped: Group is stopped, protected by list_lock */ + bool stopped; }; /** diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 1b2f66f4425b..dee0f9004024 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -556,6 +556,72 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) xe_sched_tdr_queue_imm(&q->guc->sched); } +static void xe_guc_exec_queue_group_stop(struct xe_exec_queue *q) +{ + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_exec_queue *eq, *next; + LIST_HEAD(tmp); + + xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), + xe_exec_queue_is_multi_queue(q)); + + mutex_lock(&group->list_lock); + + /* + * Stop all future queues being from executing while group is stopped. + */ + group->stopped = true; + + list_for_each_entry_safe(eq, next, &group->list, multi_queue.link) + /* + * Refcount prevents an attempted removal from &group->list, + * temporary list allows safe iteration after dropping + * &group->list_lock. + */ + if (xe_exec_queue_get_unless_zero(eq)) + list_move_tail(&eq->multi_queue.link, &tmp); + + mutex_unlock(&group->list_lock); + + /* We cannot stop under list lock without getting inversions */ + xe_sched_submission_stop(&primary->guc->sched); + list_for_each_entry(eq, &tmp, multi_queue.link) + xe_sched_submission_stop(&eq->guc->sched); + + mutex_lock(&group->list_lock); + list_for_each_entry_safe(eq, next, &tmp, multi_queue.link) { + /* + * Corner where we got banned while stopping and not on + * &group->list + */ + if (READ_ONCE(group->banned)) + xe_guc_exec_queue_trigger_cleanup(eq); + + list_move_tail(&eq->multi_queue.link, &group->list); + xe_exec_queue_put(eq); + } + mutex_unlock(&group->list_lock); +} + +static void xe_guc_exec_queue_group_start(struct xe_exec_queue *q) +{ + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_exec_queue *eq; + + xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), + xe_exec_queue_is_multi_queue(q)); + + xe_sched_submission_start(&primary->guc->sched); + + mutex_lock(&group->list_lock); + group->stopped = false; + list_for_each_entry(eq, &group->list, multi_queue.link) + xe_sched_submission_start(&eq->guc->sched); + mutex_unlock(&group->list_lock); +} + static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q) { struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); @@ -1414,7 +1480,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); struct drm_sched_job *tmp_job; - struct xe_exec_queue *q = job->q; + struct xe_exec_queue *q = job->q, *primary; struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_guc *guc = exec_queue_to_guc(q); const char *process_name = "no process"; @@ -1425,6 +1491,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); + primary = xe_exec_queue_multi_queue_primary(q); + /* * TDR has fired before free job worker. Common if exec queue * immediately closed after last fence signaled. Add back to pending @@ -1436,7 +1504,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) return DRM_GPU_SCHED_STAT_NO_HANG; /* Kill the run_job entry point */ - xe_sched_submission_stop(sched); + if (xe_exec_queue_is_multi_queue(q)) + xe_guc_exec_queue_group_stop(q); + else + xe_sched_submission_stop(sched); /* Must check all state after stopping scheduler */ skip_timeout_check = exec_queue_reset(q) || @@ -1451,14 +1522,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) if (xe_exec_queue_is_lr(q)) xe_gt_assert(guc_to_gt(guc), skip_timeout_check); - /* - * FIXME: In multi-queue scenario, the TDR must ensure that the whole - * multi-queue group is off the HW before signaling the fences to avoid - * possible memory corruptions. This means disabling scheduling on the - * primary queue before or during the secondary queue's TDR. Need to - * implement this in least obtrusive way. - */ - /* * If devcoredump not captured and GuC capture for the job is not ready * do manual capture first and decide later if we need to use it @@ -1485,10 +1548,11 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) set_exec_queue_banned(q); /* Kick job / queue off hardware */ - if (!wedged && (exec_queue_enabled(q) || exec_queue_pending_disable(q))) { + if (!wedged && (exec_queue_enabled(primary) || + exec_queue_pending_disable(primary))) { int ret; - if (exec_queue_reset(q)) + if (exec_queue_reset(primary)) err = -EIO; if (xe_uc_fw_is_running(&guc->fw)) { @@ -1497,8 +1561,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * modifying state */ ret = wait_event_timeout(guc->ct.wq, - (!exec_queue_pending_enable(q) && - !exec_queue_pending_disable(q)) || + (!exec_queue_pending_enable(primary) && + !exec_queue_pending_disable(primary)) || xe_guc_read_stopped(guc) || vf_recovery(guc), HZ * 5); if (vf_recovery(guc)) @@ -1506,7 +1570,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) if (!ret || xe_guc_read_stopped(guc)) goto trigger_reset; - disable_scheduling(q, skip_timeout_check); + disable_scheduling(primary, skip_timeout_check); } /* @@ -1520,7 +1584,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) smp_rmb(); ret = wait_event_timeout(guc->ct.wq, !xe_uc_fw_is_running(&guc->fw) || - !exec_queue_pending_disable(q) || + !exec_queue_pending_disable(primary) || xe_guc_read_stopped(guc) || vf_recovery(guc), HZ * 5); if (vf_recovery(guc)) @@ -1530,11 +1594,11 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) if (!ret) xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond, guc_id=%d", - q->guc->id); - xe_devcoredump(q, job, + primary->guc->id); + xe_devcoredump(primary, job, "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", - q->guc->id, ret, xe_guc_read_stopped(guc)); - xe_gt_reset_async(q->gt); + primary->guc->id, ret, xe_guc_read_stopped(guc)); + xe_gt_reset_async(primary->gt); xe_sched_tdr_queue_imm(sched); goto rearm; } @@ -1580,12 +1644,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL) xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED); - xe_sched_submission_start(sched); - - if (xe_exec_queue_is_multi_queue(q)) + if (xe_exec_queue_is_multi_queue(q)) { + xe_guc_exec_queue_group_start(q); xe_guc_exec_queue_group_trigger_cleanup(q); - else + } else { + xe_sched_submission_start(sched); xe_guc_exec_queue_trigger_cleanup(q); + } /* * We want the job added back to the pending list so it gets freed; this @@ -1599,7 +1664,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * but there is not currently an easy way to do in DRM scheduler. With * some thought, do this in a follow up. */ - xe_sched_submission_start(sched); + if (xe_exec_queue_is_multi_queue(q)) + xe_guc_exec_queue_group_start(q); + else + xe_sched_submission_start(sched); handle_vf_resume: return DRM_GPU_SCHED_STAT_NO_HANG; } @@ -1965,6 +2033,8 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) INIT_LIST_HEAD(&q->multi_queue.link); mutex_lock(&group->list_lock); + if (group->stopped) + WRITE_ONCE(q->guc->sched.base.pause_submit, true); list_add_tail(&q->multi_queue.link, &group->list); mutex_unlock(&group->list_lock); } From 769d7774a1b82f8fde8ce1ff8e4d006e68d8c153 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Fri, 16 Jan 2026 14:03:33 -0800 Subject: [PATCH 004/195] drm/xe/multi_queue: Enable multi_queue on xe3p_xpc xe3p_xpc supports multi_queue, enable it. v2: Rename multi_queue_enable_mask to multi_queue_engine_class_mask (Matt Brost) Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260116220333.861850-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 09189ff3da44..c000c25b5af9 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -112,6 +112,8 @@ static const struct xe_graphics_desc graphics_xe3p_xpc = { .hw_engine_mask = GENMASK(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS1) | GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0), + .multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | + BIT(XE_ENGINE_CLASS_COMPUTE), }; static const struct xe_media_desc media_xem = { From 888c7f991ffe608a2c9ad9f9420e16c61adea79d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 14:17:21 -0800 Subject: [PATCH 005/195] drm/xe: Add normalize_invalidation_range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract the code that determines the alignment of TLB invalidation into a helper function — normalize_invalidation_range. This will be useful when adding context-based invalidations to the GuC TLB invalidation backend. Signed-off-by: Nirmoy Das Signed-off-by: Matthew Brost Reviewed-by: Stuart Summers Tested-by: Stuart Summers Link: https://patch.msgid.link/20260116221731.868657-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 71 +++++++++++++-------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c index 774467befbb9..43f6dbcb2bc2 100644 --- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c @@ -111,6 +111,38 @@ static int send_page_reclaim(struct xe_guc *guc, u32 seqno, G2H_LEN_DW_PAGE_RECLAMATION, 1); } +static u64 normalize_invalidation_range(struct xe_gt *gt, u64 *start, u64 *end) +{ + u64 orig_start = *start; + u64 length = *end - *start; + u64 align; + + if (length < SZ_4K) + length = SZ_4K; + + align = roundup_pow_of_two(length); + *start = ALIGN_DOWN(*start, align); + *end = ALIGN(*end, align); + length = align; + while (*start + length < *end) { + length <<= 1; + *start = ALIGN_DOWN(orig_start, length); + } + + if (length >= SZ_2M) { + length = max_t(u64, SZ_16M, length); + *start = ALIGN_DOWN(orig_start, length); + } + + xe_gt_assert(gt, length >= SZ_4K); + xe_gt_assert(gt, is_power_of_2(length)); + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, + ilog2(SZ_2M) + 1))); + xe_gt_assert(gt, IS_ALIGNED(*start, length)); + + return length; +} + /* * Ensure that roundup_pow_of_two(length) doesn't overflow. * Note that roundup_pow_of_two() operates on unsigned long, @@ -138,48 +170,15 @@ static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); } else { - u64 orig_start = start; - u64 align; - - if (length < SZ_4K) - length = SZ_4K; - - /* - * We need to invalidate a higher granularity if start address - * is not aligned to length. When start is not aligned with - * length we need to find the length large enough to create an - * address mask covering the required range. - */ - align = roundup_pow_of_two(length); - start = ALIGN_DOWN(start, align); - end = ALIGN(end, align); - length = align; - while (start + length < end) { - length <<= 1; - start = ALIGN_DOWN(orig_start, length); - } - - /* - * Minimum invalidation size for a 2MB page that the hardware - * expects is 16MB - */ - if (length >= SZ_2M) { - length = max_t(u64, SZ_16M, length); - start = ALIGN_DOWN(orig_start, length); - } - - xe_gt_assert(gt, length >= SZ_4K); - xe_gt_assert(gt, is_power_of_2(length)); - xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, - ilog2(SZ_2M) + 1))); - xe_gt_assert(gt, IS_ALIGNED(start, length)); + u64 normalize_len = normalize_invalidation_range(gt, &start, + &end); /* Flush on NULL case, Media is not required to modify flush due to no PPC so NOP */ action[len++] = MAKE_INVAL_OP_FLUSH(XE_GUC_TLB_INVAL_PAGE_SELECTIVE, !prl_sa); action[len++] = asid; action[len++] = lower_32_bits(start); action[len++] = upper_32_bits(start); - action[len++] = ilog2(length) - ilog2(SZ_4K); + action[len++] = ilog2(normalize_len) - ilog2(SZ_4K); } xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); From 444d78578e8a79537e527b50fda17d6aa2d30b79 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 14:17:22 -0800 Subject: [PATCH 006/195] drm/xe: Make usm.asid_to_vm allocation use GFP_NOWAIT Ensure the asid_to_vm lookup is reclaim-safe so it can be performed during TLB invalidations, which is necessary for context-based TLB invalidation support. Signed-off-by: Matthew Brost Reviewed-by: Stuart Summers Tested-by: Stuart Summers Link: https://patch.msgid.link/20260116221731.868657-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index bbbc7e71b8ef..24647b128a17 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1653,7 +1653,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) down_write(&xe->usm.lock); err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, XA_LIMIT(1, XE_MAX_ASID - 1), - &xe->usm.next_asid, GFP_KERNEL); + &xe->usm.next_asid, GFP_NOWAIT); up_write(&xe->usm.lock); if (err < 0) goto err_close; From dea333b244818ab06253b8420a7534fd770eef36 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 14:17:23 -0800 Subject: [PATCH 007/195] drm/xe: Add has_ctx_tlb_inval to device info Add has_ctx_tlb_inval to device info indicating a device has context basd TLB invalidation. Signed-off-by: Matthew Brost Reviewed-by: Stuart Summers Tested-by: Stuart Summers Link: https://patch.msgid.link/20260116221731.868657-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 1 + drivers/gpu/drm/xe/xe_pci_types.h | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index f689766adcb1..72453206267b 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -353,6 +353,8 @@ struct xe_device { u8 has_pre_prod_wa:1; /** @info.has_pxp: Device has PXP support */ u8 has_pxp:1; + /** @info.has_ctx_tlb_inval: Has context based TLB invalidations */ + u8 has_ctx_tlb_inval:1; /** @info.has_range_tlb_inval: Has range based TLB invalidations */ u8 has_range_tlb_inval:1; /** @info.has_soc_remapper_sysctrl: Has SoC remapper system controller */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index c000c25b5af9..f367479fe3fb 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -893,6 +893,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_device_atomics_on_smem = 1; xe->info.has_range_tlb_inval = graphics_desc->has_range_tlb_inval; + xe->info.has_ctx_tlb_inval = graphics_desc->has_ctx_tlb_inval; xe->info.has_usm = graphics_desc->has_usm; xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 20acc5349ee6..7ccb0ab7a53b 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -72,6 +72,7 @@ struct xe_graphics_desc { u8 has_atomic_enable_pte_bit:1; u8 has_indirect_ring_state:1; u8 has_range_tlb_inval:1; + u8 has_ctx_tlb_inval:1; u8 has_usm:1; u8 has_64bit_timestamp:1; }; From 43c3e6eacb22c7bcf871bd0220a35a03b5aa0e5c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 14:17:24 -0800 Subject: [PATCH 008/195] drm/xe: Add xe_device_asid_to_vm helper Introduce the xe_device_asid_to_vm helper, which can be used throughout the driver to resolve the VM from a given ASID. v4: - Move forward declare after includes (Stuart) Signed-off-by: Matthew Brost Reviewed-by: Matt Atwood Reviewed-by: Stuart Summers Tested-by: Stuart Summers Link: https://patch.msgid.link/20260116221731.868657-5-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_device.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_device.h | 4 ++++ 2 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 495310a624b5..aad4aa53a51f 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -1375,3 +1375,28 @@ const char *xe_wedged_mode_to_string(enum xe_wedged_mode mode) return ""; } } + +/** + * xe_device_asid_to_vm() - Find VM from ASID + * @xe: the &xe_device + * @asid: Address space ID + * + * Find a VM from ASID and take a reference to VM which caller must drop. + * Reclaim safe. + * + * Return: VM on success, ERR_PTR on failure + */ +struct xe_vm *xe_device_asid_to_vm(struct xe_device *xe, u32 asid) +{ + struct xe_vm *vm; + + down_read(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, asid); + if (vm) + xe_vm_get(vm); + else + vm = ERR_PTR(-EINVAL); + up_read(&xe->usm.lock); + + return vm; +} diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 3740143790db..d25421e5181c 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -12,6 +12,8 @@ #include "xe_gt_types.h" #include "xe_sriov.h" +struct xe_vm; + static inline struct xe_device *to_xe_device(const struct drm_device *dev) { return container_of(dev, struct xe_device, drm); @@ -204,6 +206,8 @@ int xe_is_injection_active(void); bool xe_is_xe_file(const struct file *file); +struct xe_vm *xe_device_asid_to_vm(struct xe_device *xe, u32 asid); + /* * Occasionally it is seen that the G2H worker starts running after a delay of more than * a second even after being queued and activated by the Linux workqueue subsystem. This From a3866ce7b1221353b795603bb8d0c81d81e60e65 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 14:17:25 -0800 Subject: [PATCH 009/195] drm/xe: Add vm to exec queues association Maintain a list of exec queues per vm which will be used by TLB invalidation code to do context-ID based tlb invalidations. v4: - More asserts (Stuart) - Per GT list (CI) - Skip adding / removal if context TLB invalidatiions not supported (Stuart) Signed-off-by: Nirmoy Das Signed-off-by: Matthew Brost Reviewed-by: Stuart Summers Tested-by: Stuart Summers Link: https://patch.msgid.link/20260116221731.868657-6-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_device.h | 7 --- drivers/gpu/drm/xe/xe_device_types.h | 7 +++ drivers/gpu/drm/xe/xe_exec_queue.c | 7 ++- drivers/gpu/drm/xe/xe_exec_queue_types.h | 3 ++ drivers/gpu/drm/xe/xe_vm.c | 62 ++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm.h | 3 ++ drivers/gpu/drm/xe/xe_vm_types.h | 16 ++++++ 7 files changed, 97 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index d25421e5181c..58d7d8b2fea3 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -62,13 +62,6 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) return &xe->tiles[0]; } -/* - * Highest GT/tile count for any platform. Used only for memory allocation - * sizing. Any logic looping over GTs or mapping userspace GT IDs into GT - * structures should use the per-platform xe->info.max_gt_per_tile instead. - */ -#define XE_MAX_GT_PER_TILE 2 - static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) { struct xe_tile *tile; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 72453206267b..34feef79fa4e 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -79,6 +79,13 @@ enum xe_wedged_mode { #define XE_GT1 1 #define XE_MAX_TILES_PER_DEVICE (XE_GT1 + 1) +/* + * Highest GT/tile count for any platform. Used only for memory allocation + * sizing. Any logic looping over GTs or mapping userspace GT IDs into GT + * structures should use the per-platform xe->info.max_gt_per_tile instead. + */ +#define XE_MAX_GT_PER_TILE 2 + #define XE_MAX_ASID (BIT(20)) #define IS_PLATFORM_STEP(_xe, _platform, min_step, max_step) \ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index a940849bb6c7..a58968a0a781 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -152,8 +152,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) if (xe_exec_queue_is_multi_queue(q)) xe_exec_queue_group_cleanup(q); - if (q->vm) + if (q->vm) { + xe_vm_remove_exec_queue(q->vm, q); xe_vm_put(q->vm); + } if (q->xef) xe_file_put(q->xef); @@ -224,6 +226,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->ring_ops = gt->ring_ops[hwe->class]; q->ops = gt->exec_queue_ops; INIT_LIST_HEAD(&q->lr.link); + INIT_LIST_HEAD(&q->vm_exec_queue_link); INIT_LIST_HEAD(&q->multi_gt_link); INIT_LIST_HEAD(&q->hw_engine_group_link); INIT_LIST_HEAD(&q->pxp.link); @@ -1203,6 +1206,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } q->xef = xe_file_get(xef); + if (eci[0].engine_class != DRM_XE_ENGINE_CLASS_VM_BIND) + xe_vm_add_exec_queue(vm, q); /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 562ea75891ba..e30d295aaaae 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -207,6 +207,9 @@ struct xe_exec_queue { struct dma_fence *last_fence; } tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT]; + /** @vm_exec_queue_link: Link to track exec queue within a VM's list of exec queues. */ + struct list_head vm_exec_queue_link; + /** @pxp: PXP info tracking */ struct { /** @pxp.type: PXP session type used by this queue */ diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 24647b128a17..e330c794b626 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1529,11 +1529,24 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) INIT_WORK(&vm->destroy_work, vm_destroy_work_func); INIT_LIST_HEAD(&vm->preempt.exec_queues); + for (id = 0; id < XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE; ++id) + INIT_LIST_HEAD(&vm->exec_queues.list[id]); if (flags & XE_VM_FLAG_FAULT_MODE) vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms; else vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms; + init_rwsem(&vm->exec_queues.lock); + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&vm->exec_queues.lock); + fs_reclaim_release(GFP_KERNEL); + + down_read(&vm->exec_queues.lock); + might_lock(&xe_root_mmio_gt(xe)->uc.guc.ct.lock); + up_read(&vm->exec_queues.lock); + } + for_each_tile(tile, xe, id) xe_range_fence_tree_init(&vm->rftree[id]); @@ -4569,3 +4582,52 @@ int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t r return xe_vm_alloc_vma(vm, &map_req, false); } +/** + * xe_vm_add_exec_queue() - Add exec queue to VM + * @vm: The VM. + * @q: The exec_queue + * + * Add exec queue to VM, skipped if the device does not have context based TLB + * invalidations. + */ +void xe_vm_add_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) +{ + struct xe_device *xe = vm->xe; + + /* User VMs and queues only */ + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_MIGRATE)); + xe_assert(xe, vm->xef); + xe_assert(xe, vm == q->vm); + + if (!xe->info.has_ctx_tlb_inval) + return; + + down_write(&vm->exec_queues.lock); + list_add(&q->vm_exec_queue_link, &vm->exec_queues.list[q->gt->info.id]); + ++vm->exec_queues.count[q->gt->info.id]; + up_write(&vm->exec_queues.lock); +} + +/** + * xe_vm_remove_exec_queue() - Remove exec queue from VM + * @vm: The VM. + * @q: The exec_queue + * + * Remove exec queue from VM, skipped if the device does not have context based + * TLB invalidations. + */ +void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) +{ + if (!vm->xe->info.has_ctx_tlb_inval) + return; + + down_write(&vm->exec_queues.lock); + if (!list_empty(&q->vm_exec_queue_link)) { + list_del(&q->vm_exec_queue_link); + --vm->exec_queues.count[q->gt->info.id]; + } + up_write(&vm->exec_queues.lock); +} diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 6cc98df47291..288115c7844a 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -287,6 +287,9 @@ static inline struct dma_resv *xe_vm_resv(struct xe_vm *vm) void xe_vm_kill(struct xe_vm *vm, bool unlocked); +void xe_vm_add_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); +void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); + /** * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held. * @vm: The vm diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 437f64202f3b..43203e90ee3e 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -298,6 +298,22 @@ struct xe_vm { struct list_head pm_activate_link; } preempt; + /** @exec_queues: Manages list of exec queues attached to this VM, protected by lock. */ + struct { + /** + * @exec_queues.list: list of exec queues attached to this VM, + * per GT + */ + struct list_head list[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + /** + * @exec_queues.count: count of exec queues attached to this VM, + * per GT + */ + int count[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + /** @exec_queues.lock: lock to protect exec_queues list */ + struct rw_semaphore lock; + } exec_queues; + /** @um: unified memory state */ struct { /** @asid: address space ID, unique to each VM */ From 8d7a9f801ed72c6a2506bb26f2eec1f8245bface Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 14:17:26 -0800 Subject: [PATCH 010/195] drm/xe: Taint TLB invalidation seqno lock with GFP_KERNEL Taint TLB invalidation seqno lock with GFP_KERNEL as TLB invalidations can be in the path of reclaim (e.g., MMU notifiers). Signed-off-by: Matthew Brost Reviewed-by: Matt Atwood Tested-by: Stuart Summers Link: https://patch.msgid.link/20260116221731.868657-7-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_tlb_inval.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c index e837888367c4..21fef337f29c 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_tlb_inval.c @@ -111,6 +111,16 @@ static void tlb_inval_fini(struct drm_device *drm, void *arg) xe_tlb_inval_reset(tlb_inval); } +static void primelockdep(struct xe_tlb_inval *tlb_inval) +{ + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&tlb_inval->seqno_lock); + fs_reclaim_release(GFP_KERNEL); +} + /** * xe_gt_tlb_inval_init - Initialize TLB invalidation state * @gt: GT structure @@ -137,6 +147,8 @@ int xe_gt_tlb_inval_init_early(struct xe_gt *gt) if (err) return err; + primelockdep(tlb_inval); + tlb_inval->job_wq = drmm_alloc_ordered_workqueue(&xe->drm, "gt-tbl-inval-job-wq", WQ_MEM_RECLAIM); From edcc15f489c4c30667b78418228d6a84dbf6a464 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 14:17:27 -0800 Subject: [PATCH 011/195] drm/xe: Rename send_tlb_inval_ppgtt to send_tlb_inval_asid_ppgtt Context-based TLB invalidations have their own set of GuC TLB invalidation operations. Rename the current PPGTT invalidation function, which operates on ASIDs, to a more descriptive name that reflects its purpose. Signed-off-by: Matthew Brost Reviewed-by: Stuart Summers Tested-by: Stuart Summers Link: https://patch.msgid.link/20260116221731.868657-8-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c index 43f6dbcb2bc2..a6a1c371a28e 100644 --- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c @@ -150,9 +150,9 @@ static u64 normalize_invalidation_range(struct xe_gt *gt, u64 *start, u64 *end) */ #define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) -static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, - u64 start, u64 end, u32 asid, - struct drm_suballoc *prl_sa) +static int send_tlb_inval_asid_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, + u64 start, u64 end, u32 asid, + struct drm_suballoc *prl_sa) { #define MAX_TLB_INVALIDATION_LEN 7 struct xe_guc *guc = tlb_inval->private; @@ -219,7 +219,7 @@ static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval) static const struct xe_tlb_inval_ops guc_tlb_inval_ops = { .all = send_tlb_inval_all, .ggtt = send_tlb_inval_ggtt, - .ppgtt = send_tlb_inval_ppgtt, + .ppgtt = send_tlb_inval_asid_ppgtt, .initialized = tlb_inval_initialized, .flush = tlb_inval_flush, .timeout_delay = tlb_inval_timeout_delay, From 2d93d5d53024257e686b3aa839e148cde776e35e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 14:17:28 -0800 Subject: [PATCH 012/195] drm/xe: Add send_tlb_inval_ppgtt helper Extract the common code that issues a TLB invalidation H2G for PPGTTs into a helper function. This helper can be reused for both ASID-based and context-based TLB invalidations. Signed-off-by: Matthew Brost Reviewed-by: Stuart Summers Tested-by: Stuart Summers Link: https://patch.msgid.link/20260116221731.868657-9-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 30 +++++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c index a6a1c371a28e..070d2e2cb7c9 100644 --- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c @@ -150,20 +150,16 @@ static u64 normalize_invalidation_range(struct xe_gt *gt, u64 *start, u64 *end) */ #define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) -static int send_tlb_inval_asid_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, - u64 start, u64 end, u32 asid, - struct drm_suballoc *prl_sa) +static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start, + u64 end, u32 id, u32 type, + struct drm_suballoc *prl_sa) { #define MAX_TLB_INVALIDATION_LEN 7 - struct xe_guc *guc = tlb_inval->private; struct xe_gt *gt = guc_to_gt(guc); u32 action[MAX_TLB_INVALIDATION_LEN]; u64 length = end - start; int len = 0, err; - if (guc_to_xe(guc)->info.force_execlist) - return -ECANCELED; - action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = !prl_sa ? seqno : TLB_INVALIDATION_SEQNO_INVALID; if (!gt_to_xe(gt)->info.has_range_tlb_inval || @@ -174,14 +170,15 @@ static int send_tlb_inval_asid_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, &end); /* Flush on NULL case, Media is not required to modify flush due to no PPC so NOP */ - action[len++] = MAKE_INVAL_OP_FLUSH(XE_GUC_TLB_INVAL_PAGE_SELECTIVE, !prl_sa); - action[len++] = asid; + action[len++] = MAKE_INVAL_OP_FLUSH(type, !prl_sa); + action[len++] = id; action[len++] = lower_32_bits(start); action[len++] = upper_32_bits(start); action[len++] = ilog2(normalize_len) - ilog2(SZ_4K); } xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); +#undef MAX_TLB_INVALIDATION_LEN err = send_tlb_inval(guc, action, len); if (!err && prl_sa) @@ -189,6 +186,21 @@ static int send_tlb_inval_asid_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, return err; } +static int send_tlb_inval_asid_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, + u64 start, u64 end, u32 asid, + struct drm_suballoc *prl_sa) +{ + struct xe_guc *guc = tlb_inval->private; + + lockdep_assert_held(&tlb_inval->seqno_lock); + + if (guc_to_xe(guc)->info.force_execlist) + return -ECANCELED; + + return send_tlb_inval_ppgtt(guc, seqno, start, end, asid, + XE_GUC_TLB_INVAL_PAGE_SELECTIVE, prl_sa); +} + static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval) { struct xe_guc *guc = tlb_inval->private; From 6b42b635d6a20fd418ecc7c4c3ad52ef99fe7227 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 14:17:29 -0800 Subject: [PATCH 013/195] drm/xe: Add xe_tlb_inval_idle helper Introduce the xe_tlb_inval_idle helper to detect whether any TLB invalidations are currently in flight. This is used in context-based TLB invalidations to determine whether dummy TLB invalidations need to be sent to maintain proper TLB invalidation fence ordering.. v2: - Implement xe_tlb_inval_idle based on pending list Signed-off-by: Matthew Brost Reviewed-by: Stuart Summers Tested-by: Stuart Summers Link: https://patch.msgid.link/20260116221731.868657-10-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_tlb_inval.c | 21 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_tlb_inval.h | 2 ++ 2 files changed, 23 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c index 21fef337f29c..989fe0e7f8ee 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_tlb_inval.c @@ -41,11 +41,14 @@ static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence) static void xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence) { + struct xe_tlb_inval *tlb_inval = fence->tlb_inval; bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); lockdep_assert_held(&fence->tlb_inval->pending_lock); list_del(&fence->link); + if (list_empty(&tlb_inval->pending_fences)) + cancel_delayed_work(&tlb_inval->fence_tdr); trace_xe_tlb_inval_fence_signal(fence->tlb_inval->xe, fence); xe_tlb_inval_fence_fini(fence); dma_fence_signal(&fence->base); @@ -465,3 +468,21 @@ void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, dma_fence_get(&fence->base); fence->tlb_inval = tlb_inval; } + +/** + * xe_tlb_inval_idle() - Initialize TLB invalidation is idle + * @tlb_inval: TLB invalidation client + * + * Check the TLB invalidation seqno to determine if it is idle (i.e., no TLB + * invalidations are in flight). Expected to be called in the backend after the + * fence has been added to the pending list, and takes this into account. + * + * Return: True if TLB invalidation client is idle, False otherwise + */ +bool xe_tlb_inval_idle(struct xe_tlb_inval *tlb_inval) +{ + lockdep_assert_held(&tlb_inval->seqno_lock); + + guard(spinlock_irq)(&tlb_inval->pending_lock); + return list_is_singular(&tlb_inval->pending_fences); +} diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h index 858d0690f995..62089254fa23 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.h +++ b/drivers/gpu/drm/xe/xe_tlb_inval.h @@ -43,4 +43,6 @@ xe_tlb_inval_fence_wait(struct xe_tlb_inval_fence *fence) void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno); +bool xe_tlb_inval_idle(struct xe_tlb_inval *tlb_inval); + #endif /* _XE_TLB_INVAL_ */ From 628d59392cc571930f52e121892c7a72f7c1d65b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 14:17:30 -0800 Subject: [PATCH 014/195] drm/xe: Add exec queue active vfunc If an exec queue is inactive (e.g., not registered or scheduling is disabled), TLB invalidations are not issued for that queue. Add a virtual function to determine the active state, which TLB invalidation logic can hook into. v5: - Operate on primary in active function Signed-off-by: Matthew Brost Tested-by: Stuart Summers Reviewed-by: Stuart Summers Link: https://patch.msgid.link/20260116221731.868657-11-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 ++ drivers/gpu/drm/xe/xe_execlist.c | 7 +++++++ drivers/gpu/drm/xe/xe_guc_submit.c | 9 +++++++++ 3 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index e30d295aaaae..601e742c79ff 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -300,6 +300,8 @@ struct xe_exec_queue_ops { void (*resume)(struct xe_exec_queue *q); /** @reset_status: check exec queue reset status */ bool (*reset_status)(struct xe_exec_queue *q); + /** @active: check exec queue is active */ + bool (*active)(struct xe_exec_queue *q); }; #endif diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 8bf330aeaec0..005a5b2c36fe 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -468,6 +468,12 @@ static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q) return false; } +static bool execlist_exec_queue_active(struct xe_exec_queue *q) +{ + /* NIY */ + return false; +} + static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .init = execlist_exec_queue_init, .kill = execlist_exec_queue_kill, @@ -480,6 +486,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .suspend_wait = execlist_exec_queue_suspend_wait, .resume = execlist_exec_queue_resume, .reset_status = execlist_exec_queue_reset_status, + .active = execlist_exec_queue_active, }; int xe_execlist_init(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index dee0f9004024..456f549c16f6 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -2276,6 +2276,14 @@ static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); } +static bool guc_exec_queue_active(struct xe_exec_queue *q) +{ + struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q); + + return exec_queue_enabled(primary) && + !exec_queue_pending_disable(primary); +} + /* * All of these functions are an abstraction layer which other parts of Xe can * use to trap into the GuC backend. All of these functions, aside from init, @@ -2295,6 +2303,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { .suspend_wait = guc_exec_queue_suspend_wait, .resume = guc_exec_queue_resume, .reset_status = guc_exec_queue_reset_status, + .active = guc_exec_queue_active, }; static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) From 6cdaa5346d6f3f6116e607e49c92c2401390c267 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 16 Jan 2026 14:17:31 -0800 Subject: [PATCH 015/195] drm/xe: Add context-based invalidation to GuC TLB invalidation backend Introduce context-based invalidation support to the GuC TLB invalidation backend. This is implemented by iterating over each exec queue per GT within a VM, skipping inactive queues, and issuing a context-based (GuC ID) H2G TLB invalidation. All H2G messages, except the final one, are sent with an invalid seqno, which the G2H handler drops to ensure the TLB invalidation fence is only signaled once all H2G messages are completed. A watermark mechanism is also added to switch between context-based TLB invalidations and full device-wide invalidations, as the return on investment for context-based invalidation diminishes when many exec queues are mapped. v2: - Fix checkpatch warnings v3: - Rebase on PRL - Use ref counting to avoid racing with deregisters v4: - Extra braces (Stuart) - Use per GT list (CI) - Reorder put Signed-off-by: Matthew Brost Tested-by: Stuart Summers Reviewed-by: Stuart Summers Link: https://patch.msgid.link/20260116221731.868657-12-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_tlb_inval.c | 145 +++++++++++++++++++++++++- 1 file changed, 141 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c index 070d2e2cb7c9..ced58f46f846 100644 --- a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c @@ -6,15 +6,19 @@ #include "abi/guc_actions_abi.h" #include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_exec_queue_types.h" #include "xe_gt_stats.h" #include "xe_gt_types.h" #include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_guc_exec_queue_types.h" #include "xe_guc_tlb_inval.h" #include "xe_force_wake.h" #include "xe_mmio.h" #include "xe_sa.h" #include "xe_tlb_inval.h" +#include "xe_vm.h" #include "regs/xe_guc_regs.h" @@ -156,10 +160,16 @@ static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start, { #define MAX_TLB_INVALIDATION_LEN 7 struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = guc_to_xe(guc); u32 action[MAX_TLB_INVALIDATION_LEN]; u64 length = end - start; int len = 0, err; + xe_gt_assert(gt, (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE && + !xe->info.has_ctx_tlb_inval) || + (type == XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX && + xe->info.has_ctx_tlb_inval)); + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = !prl_sa ? seqno : TLB_INVALIDATION_SEQNO_INVALID; if (!gt_to_xe(gt)->info.has_range_tlb_inval || @@ -168,9 +178,11 @@ static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start, } else { u64 normalize_len = normalize_invalidation_range(gt, &start, &end); + bool need_flush = !prl_sa && + seqno != TLB_INVALIDATION_SEQNO_INVALID; /* Flush on NULL case, Media is not required to modify flush due to no PPC so NOP */ - action[len++] = MAKE_INVAL_OP_FLUSH(type, !prl_sa); + action[len++] = MAKE_INVAL_OP_FLUSH(type, need_flush); action[len++] = id; action[len++] = lower_32_bits(start); action[len++] = upper_32_bits(start); @@ -181,8 +193,10 @@ static int send_tlb_inval_ppgtt(struct xe_guc *guc, u32 seqno, u64 start, #undef MAX_TLB_INVALIDATION_LEN err = send_tlb_inval(guc, action, len); - if (!err && prl_sa) + if (!err && prl_sa) { + xe_gt_assert(gt, seqno != TLB_INVALIDATION_SEQNO_INVALID); err = send_page_reclaim(guc, seqno, xe_sa_bo_gpu_addr(prl_sa)); + } return err; } @@ -201,6 +215,114 @@ static int send_tlb_inval_asid_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, XE_GUC_TLB_INVAL_PAGE_SELECTIVE, prl_sa); } +static int send_tlb_inval_ctx_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, + u64 start, u64 end, u32 asid, + struct drm_suballoc *prl_sa) +{ + struct xe_guc *guc = tlb_inval->private; + struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q, *next, *last_q = NULL; + struct xe_vm *vm; + LIST_HEAD(tlb_inval_list); + int err = 0, id = guc_to_gt(guc)->info.id; + + lockdep_assert_held(&tlb_inval->seqno_lock); + + if (xe->info.force_execlist) + return -ECANCELED; + + vm = xe_device_asid_to_vm(xe, asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); + + down_read(&vm->exec_queues.lock); + + /* + * XXX: Randomly picking a threshold for now. This will need to be + * tuned based on expected UMD queue counts and performance profiling. + */ +#define EXEC_QUEUE_COUNT_FULL_THRESHOLD 8 + if (vm->exec_queues.count[id] >= EXEC_QUEUE_COUNT_FULL_THRESHOLD) { + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION, + seqno, + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), + }; + + err = send_tlb_inval(guc, action, ARRAY_SIZE(action)); + goto err_unlock; + } +#undef EXEC_QUEUE_COUNT_FULL_THRESHOLD + + /* + * Move exec queues to a temporary list to issue invalidations. The exec + * queue must active and a reference must be taken to prevent concurrent + * deregistrations. + * + * List modification is safe because we hold 'vm->exec_queues.lock' for + * reading, which prevents external modifications. Using a per-GT list + * is also safe since 'tlb_inval->seqno_lock' ensures no other GT users + * can enter this code path. + */ + list_for_each_entry_safe(q, next, &vm->exec_queues.list[id], + vm_exec_queue_link) { + if (q->ops->active(q) && xe_exec_queue_get_unless_zero(q)) { + last_q = q; + list_move_tail(&q->vm_exec_queue_link, &tlb_inval_list); + } + } + + if (!last_q) { + /* + * We can't break fence ordering for TLB invalidation jobs, if + * TLB invalidations are inflight issue a dummy invalidation to + * maintain ordering. Nor can we move safely the seqno_recv when + * returning -ECANCELED if TLB invalidations are in flight. Use + * GGTT invalidation as dummy invalidation given ASID + * invalidations are unsupported here. + */ + if (xe_tlb_inval_idle(tlb_inval)) + err = -ECANCELED; + else + err = send_tlb_inval_ggtt(tlb_inval, seqno); + goto err_unlock; + } + + list_for_each_entry_safe(q, next, &tlb_inval_list, vm_exec_queue_link) { + struct drm_suballoc *__prl_sa = NULL; + int __seqno = TLB_INVALIDATION_SEQNO_INVALID; + u32 type = XE_GUC_TLB_INVAL_PAGE_SELECTIVE_CTX; + + xe_assert(xe, q->vm == vm); + + if (err) + goto unref; + + if (last_q == q) { + __prl_sa = prl_sa; + __seqno = seqno; + } + + err = send_tlb_inval_ppgtt(guc, __seqno, start, end, + q->guc->id, type, __prl_sa); + +unref: + /* + * Must always return exec queue to original list / drop + * reference + */ + list_move_tail(&q->vm_exec_queue_link, + &vm->exec_queues.list[id]); + xe_exec_queue_put(q); + } + +err_unlock: + up_read(&vm->exec_queues.lock); + xe_vm_put(vm); + + return err; +} + static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval) { struct xe_guc *guc = tlb_inval->private; @@ -228,7 +350,7 @@ static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval) return hw_tlb_timeout + 2 * delay; } -static const struct xe_tlb_inval_ops guc_tlb_inval_ops = { +static const struct xe_tlb_inval_ops guc_tlb_inval_asid_ops = { .all = send_tlb_inval_all, .ggtt = send_tlb_inval_ggtt, .ppgtt = send_tlb_inval_asid_ppgtt, @@ -237,6 +359,15 @@ static const struct xe_tlb_inval_ops guc_tlb_inval_ops = { .timeout_delay = tlb_inval_timeout_delay, }; +static const struct xe_tlb_inval_ops guc_tlb_inval_ctx_ops = { + .ggtt = send_tlb_inval_ggtt, + .all = send_tlb_inval_all, + .ppgtt = send_tlb_inval_ctx_ppgtt, + .initialized = tlb_inval_initialized, + .flush = tlb_inval_flush, + .timeout_delay = tlb_inval_timeout_delay, +}; + /** * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early * @guc: GuC object @@ -248,8 +379,14 @@ static const struct xe_tlb_inval_ops guc_tlb_inval_ops = { void xe_guc_tlb_inval_init_early(struct xe_guc *guc, struct xe_tlb_inval *tlb_inval) { + struct xe_device *xe = guc_to_xe(guc); + tlb_inval->private = guc; - tlb_inval->ops = &guc_tlb_inval_ops; + + if (xe->info.has_ctx_tlb_inval) + tlb_inval->ops = &guc_tlb_inval_ctx_ops; + else + tlb_inval->ops = &guc_tlb_inval_asid_ops; } /** From 9dd08fdecc0c98d6516c2d2d1fa189c1332f8dab Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 20 Jan 2026 11:06:10 +0000 Subject: [PATCH 016/195] drm/xe/uapi: disallow bind queue sharing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently this is very broken if someone attempts to create a bind queue and share it across multiple VMs. For example currently we assume it is safe to acquire the user VM lock to protect some of the bind queue state, but if allow sharing the bind queue with multiple VMs then this quickly breaks down. To fix this reject using a bind queue with any VM that is not the same VM that was originally passed when creating the bind queue. This a uAPI change, however this was more of an oversight on kernel side that we didn't reject this, and expectation is that userspace shouldn't be using bind queues in this way, so in theory this change should go unnoticed. Based on a patch from Matt Brost. v2 (Matt B): - Hold the vm lock over queue create, to ensure it can't be closed as we attach the user_vm to the queue. - Make sure we actually check for NULL user_vm in destruction path. v3: - Fix error path handling. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Reported-by: Thomas Hellström Signed-off-by: Matthew Auld Cc: José Roberto de Souza Cc: Matthew Brost Cc: Michal Mrozek Cc: Carl Zhang Cc: # v6.8+ Acked-by: José Roberto de Souza Reviewed-by: Matthew Brost Reviewed-by: Arvind Yadav Acked-by: Michal Mrozek Link: https://patch.msgid.link/20260120110609.77958-3-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 32 +++++++++++++++++++++++- drivers/gpu/drm/xe/xe_exec_queue.h | 1 + drivers/gpu/drm/xe/xe_exec_queue_types.h | 6 +++++ drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 7 +++++- 5 files changed, 45 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index a58968a0a781..7e7e663189e4 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -412,6 +412,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe * @xe: Xe device. * @tile: tile which bind exec queue belongs to. * @flags: exec queue creation flags + * @user_vm: The user VM which this exec queue belongs to * @extensions: exec queue creation extensions * * Normalize bind exec queue creation. Bind exec queue is tied to migration VM @@ -425,6 +426,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe */ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *user_vm, u32 flags, u64 extensions) { struct xe_gt *gt = tile->primary_gt; @@ -461,6 +463,9 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, xe_exec_queue_put(q); return ERR_PTR(err); } + + if (user_vm) + q->user_vm = xe_vm_get(user_vm); } return q; @@ -491,6 +496,11 @@ void xe_exec_queue_destroy(struct kref *ref) xe_exec_queue_put(eq); } + if (q->user_vm) { + xe_vm_put(q->user_vm); + q->user_vm = NULL; + } + q->ops->destroy(q); } @@ -1121,6 +1131,22 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) return -EINVAL; + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -ENOENT; + + err = down_read_interruptible(&vm->lock); + if (err) { + xe_vm_put(vm); + return err; + } + + if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { + up_read(&vm->lock); + xe_vm_put(vm); + return -ENOENT; + } + for_each_tile(tile, xe, id) { struct xe_exec_queue *new; @@ -1128,9 +1154,11 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (id) flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; - new = xe_exec_queue_create_bind(xe, tile, flags, + new = xe_exec_queue_create_bind(xe, tile, vm, flags, args->extensions); if (IS_ERR(new)) { + up_read(&vm->lock); + xe_vm_put(vm); err = PTR_ERR(new); if (q) goto put_exec_queue; @@ -1142,6 +1170,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, list_add_tail(&new->multi_gt_list, &q->multi_gt_link); } + up_read(&vm->lock); + xe_vm_put(vm); } else { logical_mask = calc_validate_logical_mask(xe, eci, args->width, diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index b1e51789128f..c9e3a7c2d249 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -28,6 +28,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe u32 flags, u64 extensions); struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, struct xe_tile *tile, + struct xe_vm *user_vm, u32 flags, u64 extensions); void xe_exec_queue_fini(struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 601e742c79ff..e987d431ce27 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -92,6 +92,12 @@ struct xe_exec_queue { struct kref refcount; /** @vm: VM (address space) for this exec queue */ struct xe_vm *vm; + /** + * @user_vm: User VM (address space) for this exec queue (bind queues + * only) + */ + struct xe_vm *user_vm; + /** @class: class of this exec queue */ enum xe_engine_class class; /** diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c index 052a5071e69f..db023fb66a27 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -350,7 +350,7 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe) flags = EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT | EXEC_QUEUE_FLAG_MIGRATE; - q = xe_exec_queue_create_bind(xe, tile, flags, 0); + q = xe_exec_queue_create_bind(xe, tile, NULL, flags, 0); if (IS_ERR(q)) { err = PTR_ERR(q); goto err_ret; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e330c794b626..f7bb21ac1987 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1651,7 +1651,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) if (!vm->pt_root[id]) continue; - q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); + q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0); if (IS_ERR(q)) { err = PTR_ERR(q); goto err_close; @@ -3647,6 +3647,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } } + if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) { + err = -EINVAL; + goto put_exec_queue; + } + /* Ensure all UNMAPs visible */ xe_svm_flush(vm); From 9dd1048bca4fe2aa67c7a286bafb3947537adedb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 20 Jan 2026 11:06:11 +0000 Subject: [PATCH 017/195] drm/xe/migrate: fix job lock assert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are meant to be checking the user vm for the bind queue, but actually we are checking the migrate vm. For various reasons this is not currently firing but this will likely change in the future. Now that we have the user_vm attached to the bind queue, we can fix this by directly checking that here. Fixes: dba89840a920 ("drm/xe: Add GT TLB invalidation jobs") Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Matthew Brost Reviewed-by: Arvind Yadav Link: https://patch.msgid.link/20260120110609.77958-4-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 00eef41a9e36..6e202428aac2 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -2499,7 +2499,7 @@ void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q) if (is_migrate) mutex_lock(&m->job_mutex); else - xe_vm_assert_held(q->vm); /* User queues VM's should be locked */ + xe_vm_assert_held(q->user_vm); /* User queues VM's should be locked */ } /** @@ -2517,7 +2517,7 @@ void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q) if (is_migrate) mutex_unlock(&m->job_mutex); else - xe_vm_assert_held(q->vm); /* User queues VM's should be locked */ + xe_vm_assert_held(q->user_vm); /* User queues VM's should be locked */ } #if IS_ENABLED(CONFIG_PROVE_LOCKING) From 1e372b246199ca7a35f930177fea91b557dac16e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 21 Jan 2026 10:10:47 +0100 Subject: [PATCH 018/195] drm, drm/xe: Fix xe userptr in the absence of CONFIG_DEVICE_PRIVATE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_DEVICE_PRIVATE is not selected by default by some distros, for example Fedora, and that leads to a regression in the xe driver since userptr support gets compiled out. It turns out that DRM_GPUSVM, which is needed for xe userptr support compiles also without CONFIG_DEVICE_PRIVATE, but doesn't compile without CONFIG_ZONE_DEVICE. Exclude the drm_pagemap files from compilation with !CONFIG_ZONE_DEVICE, and remove the CONFIG_DEVICE_PRIVATE dependency from CONFIG_DRM_GPUSVM and the xe driver's selection of it, re-enabling xe userptr for those configs. v2: - Don't compile the drm_pagemap files unless CONFIG_ZONE_DEVICE is set. - Adjust the drm_pagemap.h header accordingly. Fixes: 9e9787414882 ("drm/xe/userptr: replace xe_hmm with gpusvm") Cc: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Thomas Hellström Cc: Matthew Brost Cc: "Thomas Hellström" Cc: Rodrigo Vivi Cc: dri-devel@lists.freedesktop.org Cc: # v6.18+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Acked-by: Maarten Lankhorst Link: https://patch.msgid.link/20260121091048.41371-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/Kconfig | 2 +- drivers/gpu/drm/Makefile | 4 +++- drivers/gpu/drm/xe/Kconfig | 2 +- include/drm/drm_pagemap.h | 18 ++++++++++++++---- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index a33b90251530..d3d52310c9cc 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -210,7 +210,7 @@ config DRM_GPUVM config DRM_GPUSVM tristate - depends on DRM && DEVICE_PRIVATE + depends on DRM select HMM_MIRROR select MMU_NOTIFIER help diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 0deee72ef935..0c21029c446f 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -108,9 +108,11 @@ obj-$(CONFIG_DRM_EXEC) += drm_exec.o obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o drm_gpusvm_helper-y := \ - drm_gpusvm.o\ + drm_gpusvm.o +drm_gpusvm_helper-$(CONFIG_ZONE_DEVICE) += \ drm_pagemap.o\ drm_pagemap_util.o + obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 4b288eb3f5b0..c34be1be155b 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -39,7 +39,7 @@ config DRM_XE select DRM_TTM select DRM_TTM_HELPER select DRM_EXEC - select DRM_GPUSVM if !UML && DEVICE_PRIVATE + select DRM_GPUSVM if !UML select DRM_GPUVM select DRM_SCHED select MMU_NOTIFIER diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h index 46e9c58f09e0..2baf0861f78f 100644 --- a/include/drm/drm_pagemap.h +++ b/include/drm/drm_pagemap.h @@ -243,6 +243,8 @@ struct drm_pagemap_devmem_ops { struct dma_fence *pre_migrate_fence); }; +#if IS_ENABLED(CONFIG_ZONE_DEVICE) + int drm_pagemap_init(struct drm_pagemap *dpagemap, struct dev_pagemap *pagemap, struct drm_device *drm, @@ -252,17 +254,22 @@ struct drm_pagemap *drm_pagemap_create(struct drm_device *drm, struct dev_pagemap *pagemap, const struct drm_pagemap_ops *ops); -#if IS_ENABLED(CONFIG_DRM_GPUSVM) +struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page); void drm_pagemap_put(struct drm_pagemap *dpagemap); #else +static inline struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page) +{ + return NULL; +} + static inline void drm_pagemap_put(struct drm_pagemap *dpagemap) { } -#endif /* IS_ENABLED(CONFIG_DRM_GPUSVM) */ +#endif /* IS_ENABLED(CONFIG_ZONE_DEVICE) */ /** * drm_pagemap_get() - Obtain a reference on a struct drm_pagemap @@ -334,6 +341,8 @@ struct drm_pagemap_migrate_details { u32 source_peer_migrates : 1; }; +#if IS_ENABLED(CONFIG_ZONE_DEVICE) + int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, struct mm_struct *mm, unsigned long start, unsigned long end, @@ -343,8 +352,6 @@ int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation); const struct dev_pagemap_ops *drm_pagemap_pagemap_ops_get(void); -struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page); - void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, struct device *dev, struct mm_struct *mm, const struct drm_pagemap_devmem_ops *ops, @@ -359,4 +366,7 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim); int drm_pagemap_reinit(struct drm_pagemap *dpagemap); + +#endif /* IS_ENABLED(CONFIG_ZONE_DEVICE) */ + #endif From 9386f49316074d2d76fd78d6bd359996de42fb7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 21 Jan 2026 10:10:48 +0100 Subject: [PATCH 019/195] drm/xe: Select CONFIG_DEVICE_PRIVATE when DRM_XE_GPUSVM is selected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_DEVICE_PRIVATE is a prerequisite for DRM_XE_GPUSVM. Explicitly select it so that DRM_XE_GPUSVM is not unintentionally left out from distro configs not explicitly enabling CONFIG_DEVICE_PRIVATE. v2: - Select also CONFIG_ZONE_DEVICE since it's needed by CONFIG_DEVICE_PRIVATE. v3: - Depend on CONFIG_ZONE_DEVICE rather than selecting it. Cc: Matthew Auld Cc: Matthew Brost Cc: Rodrigo Vivi Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patch.msgid.link/20260121091048.41371-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index c34be1be155b..4d7dcaff2b91 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -80,8 +80,9 @@ config DRM_XE_GPUSVM bool "Enable CPU to GPU address mirroring" depends on DRM_XE depends on !UML - depends on DEVICE_PRIVATE + depends on ZONE_DEVICE default y + select DEVICE_PRIVATE select DRM_GPUSVM help Enable this option if you want support for CPU to GPU address From dc2fc00ba94dee539593228a5e188c8e6a84ba47 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Wed, 21 Jan 2026 16:44:17 +0530 Subject: [PATCH 020/195] drm/xe: Use DRM_BUDDY_CONTIGUOUS_ALLOCATION for contiguous allocations The VRAM/stolen memory managers do not currently set DRM_BUDDY_CONTIGUOUS_ALLOCATION for contiguous allocations. Enabling this flag activates the buddy allocator's try_harder path, which helps handle fragmented memory scenarios. This enables the __alloc_contig_try_harder fallback in the buddy allocator, allowing contiguous allocation requests to succeed even when memory is fragmented by combining allocations from both(RHS and LHS) sides of a large free block. v2: (Matt B) - Remove redundant logic for rounding allocation size and trimming when TTM_PL_FLAG_CONTIGUOUS is set, since drm_buddy now handles this when DRM_BUDDY_CONTIGUOUS_ALLOCATION is enabled Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6713 Suggested-by: Matthew Auld Signed-off-by: Sanjay Yadav Reviewed-by: Matthew Brost Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patch.msgid.link/20260121111416.3104399-2-sanjay.kumar.yadav@intel.com --- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 6553a19f7cf2..d6aa61e55f4d 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -81,6 +81,9 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_TOPDOWN) vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) + vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; + if (place->fpfn || lpfn != man->size >> PAGE_SHIFT) vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; @@ -110,25 +113,12 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, goto error_unlock; } - if (place->fpfn + (size >> PAGE_SHIFT) != lpfn && - place->flags & TTM_PL_FLAG_CONTIGUOUS) { - size = roundup_pow_of_two(size); - min_page_size = size; - - lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn); - } - err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, (u64)lpfn << PAGE_SHIFT, size, min_page_size, &vres->blocks, vres->flags); if (err) goto error_unlock; - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) - size = vres->base.size; - } - if (lpfn <= mgr->visible_size >> PAGE_SHIFT) { vres->used_visible_size = size; } else { From 6ef02656c3222b1e12032a40d644ed56806b14fc Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Tue, 20 Jan 2026 11:17:25 +0530 Subject: [PATCH 021/195] drm/xe: derive mem copy capability from graphics version Drop .has_mem_copy_instr from the platform descriptors and set it in xe_info_init() after handle_gmdid() populates graphics_verx100. Centralizing the GRAPHICS_VER(xe) >= 20 check keeps MEM_COPY enabled on Xe2+ and removes redundant per-platform plumbing. Bspec: 57561 Fixes: 1e12dbae9d72 ("drm/xe/migrate: support MEM_COPY instruction") Cc: Matt Roper Reviewed-by: Matthew Auld Suggested-by: Matthew Auld Signed-off-by: Nitin Gote Link: https://patch.msgid.link/20260120054724.1982608-2-nitin.r.gote@intel.com Signed-off-by: Tejas Upadhyay --- drivers/gpu/drm/xe/xe_pci.c | 6 +----- drivers/gpu/drm/xe/xe_pci_types.h | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f367479fe3fb..ed661bc54c31 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -350,7 +350,6 @@ static const struct xe_device_desc lnl_desc = { .has_display = true, .has_flat_ccs = 1, .has_pxp = true, - .has_mem_copy_instr = true, .max_gt_per_tile = 2, .needs_scratch = true, .va_bits = 48, @@ -375,7 +374,6 @@ static const struct xe_device_desc bmg_desc = { .has_pre_prod_wa = 1, .has_soc_remapper_telem = true, .has_sriov = true, - .has_mem_copy_instr = true, .max_gt_per_tile = 2, .needs_scratch = true, .subplatforms = (const struct xe_subplatform_desc[]) { @@ -392,7 +390,6 @@ static const struct xe_device_desc ptl_desc = { .has_display = true, .has_flat_ccs = 1, .has_sriov = true, - .has_mem_copy_instr = true, .has_pre_prod_wa = 1, .has_pxp = true, .max_gt_per_tile = 2, @@ -407,7 +404,6 @@ static const struct xe_device_desc nvls_desc = { .dma_mask_size = 46, .has_display = true, .has_flat_ccs = 1, - .has_mem_copy_instr = true, .has_pre_prod_wa = 1, .max_gt_per_tile = 2, .require_force_probe = true, @@ -705,7 +701,6 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.has_soc_remapper_telem = desc->has_soc_remapper_telem; xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) && desc->has_sriov; - xe->info.has_mem_copy_instr = desc->has_mem_copy_instr; xe->info.skip_guc_pc = desc->skip_guc_pc; xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; @@ -896,6 +891,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_ctx_tlb_inval = graphics_desc->has_ctx_tlb_inval; xe->info.has_usm = graphics_desc->has_usm; xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; + xe->info.has_mem_copy_instr = GRAPHICS_VER(xe) >= 20; xe_info_probe_tile_count(xe); diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 7ccb0ab7a53b..8b2ff3f25607 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -49,7 +49,6 @@ struct xe_device_desc { u8 has_llc:1; u8 has_mbx_power_limits:1; u8 has_mbx_thermal_info:1; - u8 has_mem_copy_instr:1; u8 has_mert:1; u8 has_pre_prod_wa:1; u8 has_page_reclaim_hw_assist:1; From 4761791c1e736273d612ff564f318bfbbb04fa4e Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 22 Jan 2026 21:40:54 +0000 Subject: [PATCH 022/195] drm/xe: Skip address copy for sync-only execs For parallel exec queues, xe_exec_ioctl() copied the batch buffer address array from userspace without checking num_batch_buffer. If user creates a sync-only exec that doesn't use the address field, the exec will fail with -EFAULT. Add num_batch_buffer check to skip the copy, and the exec could be executed successfully. Here is the sync-only exec: struct drm_xe_exec exec = { .extensions = 0, .exec_queue_id = qid, .num_syncs = 1, .syncs = (uintptr_t)&sync, .address = 0, /* ignored for sync-only */ .num_batch_buffer = 0, /* sync-only */ }; Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260122214053.3189366-2-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_exec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index a5485fe6e3f1..dbe6c006f1d6 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -192,9 +192,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto err_syncs; } - if (xe_exec_queue_is_parallel(q)) { - err = copy_from_user(addresses, addresses_user, sizeof(u64) * - q->width); + if (args->num_batch_buffer && xe_exec_queue_is_parallel(q)) { + err = copy_from_user(addresses, addresses_user, + sizeof(u64) * q->width); if (err) { err = -EFAULT; goto err_syncs; From 7fe6cae2f7fad2b5166b0fc096618629f9e2ebcb Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 16 Jan 2026 09:50:40 +0000 Subject: [PATCH 023/195] drm/xe/xelp: Fix Wa_18022495364 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It looks I mistyped CS_DEBUG_MODE2 as CS_DEBUG_MODE1 when adding the workaround. Fix it. Signed-off-by: Tvrtko Ursulin Fixes: ca33cd271ef9 ("drm/xe/xelp: Add Wa_18022495364") Cc: Matt Roper Cc: "Thomas Hellström" Cc: Rodrigo Vivi Cc: # v6.18+ Reviewed-by: Matt Roper Signed-off-by: Thomas Hellström Link: https://patch.msgid.link/20260116095040.49335-1-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/xe/xe_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index f4f31bc240d9..3db7968aa5e2 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1203,7 +1203,7 @@ static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, return -ENOSPC; *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); - *cmd++ = CS_DEBUG_MODE1(0).addr; + *cmd++ = CS_DEBUG_MODE2(0).addr; *cmd++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); return cmd - batch; From 40ee63f5df2d5c6471b583df800aac89dc0502a4 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Fri, 23 Jan 2026 16:59:17 -0800 Subject: [PATCH 024/195] drm/xe/ptl: Disable DCC on PTL On PTL, the recommendation is to disable DCC(Duty Cycle Control) as it may cause some regressions due to added latencies. Upcoming GuC releases will disable DCC on PTL as well, but we need to force it in KMD so that this behavior is propagated to older kernels. v2: Update commit message (Rodrigo) v3: Rebase v4: Fix typo: s/propagted/propagated Fixes: 5cdb71d3b0db ("drm/xe/ptl: Add GuC FW definition for PTL") Cc: Daniele Ceraolo Spurio Cc: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Link: https://patch.msgid.link/20260124005917.398522-1-vinay.belgaumkar@intel.com Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_pc.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 54702a0fd05b..5e5495a39a3c 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -1198,6 +1198,36 @@ int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf) return ret; } +static int pc_action_set_dcc(struct xe_guc_pc *pc, bool enable) +{ + int ret; + + ret = pc_action_set_param(pc, + SLPC_PARAM_TASK_ENABLE_DCC, + enable); + if (!ret) + return pc_action_set_param(pc, + SLPC_PARAM_TASK_DISABLE_DCC, + !enable); + else + return ret; +} + +static int pc_modify_defaults(struct xe_guc_pc *pc) +{ + struct xe_device *xe = pc_to_xe(pc); + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (xe->info.platform == XE_PANTHERLAKE) { + ret = pc_action_set_dcc(pc, false); + if (unlikely(ret)) + xe_gt_err(gt, "Failed to modify DCC default: %pe\n", ERR_PTR(ret)); + } + + return ret; +} + /** * xe_guc_pc_start - Start GuC's Power Conservation component * @pc: Xe_GuC_PC instance @@ -1249,6 +1279,10 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) ktime_ms_delta(ktime_get(), earlier)); } + ret = pc_modify_defaults(pc); + if (ret) + return ret; + ret = pc_init_freqs(pc); if (ret) return ret; From 60bfb8baf8f0d5b0d521744dfd01c880ce1a23f3 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 9 Jan 2026 21:10:42 +0000 Subject: [PATCH 025/195] drm/xe: Unregister drm device on probe error Call drm_dev_unregister() when xe_device_probe() fails after successful drm_dev_register(). This ensures the DRM device is promptly unregistered before returning an error, avoiding leaving it registered on the failure path. Otherwise, there is warn message if xe_device_probe() is called again: " [ 207.322365] [drm:drm_minor_register] [ 207.322381] debugfs: '128' already exists in 'dri' [ 207.322432] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/0000:03:00.0/drm/renderD128' [ 207.322435] CPU: 5 UID: 0 PID: 10261 Comm: modprobe Tainted: G B W 6.19.0-rc2-lgci-xe-kernel+ #223 PREEMPT(voluntary) [ 207.322439] Tainted: [B]=BAD_PAGE, [W]=WARN [ 207.322440] Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 0812 02/24/2023 [ 207.322441] Call Trace: [ 207.322442] [ 207.322443] dump_stack_lvl+0xa0/0xc0 [ 207.322446] dump_stack+0x10/0x20 [ 207.322448] sysfs_warn_dup+0xd5/0x110 [ 207.322451] sysfs_create_dir_ns+0x1f6/0x280 [ 207.322453] ? __pfx_sysfs_create_dir_ns+0x10/0x10 [ 207.322455] ? lock_acquire+0x1a4/0x2e0 [ 207.322458] ? __kasan_check_read+0x11/0x20 [ 207.322461] kobject_add_internal+0x28d/0x8e0 [ 207.322464] kobject_add+0x11f/0x1f0 [ 207.322465] ? lock_acquire+0x1a4/0x2e0 [ 207.322467] ? __pfx_kobject_add+0x10/0x10 [ 207.322469] ? __kasan_check_write+0x14/0x20 [ 207.322471] ? kobject_put+0x62/0x4a0 [ 207.322473] ? get_device_parent.isra.0+0x1bb/0x4c0 [ 207.322475] ? kobject_put+0x62/0x4a0 [ 207.322477] device_add+0x2d7/0x1500 [ 207.322479] ? __pfx_device_add+0x10/0x10 [ 207.322481] ? drm_debugfs_add_file+0xfa/0x170 [ 207.322483] ? drm_debugfs_add_files+0x82/0xd0 [ 207.322485] ? drm_debugfs_add_files+0x82/0xd0 [ 207.322487] drm_minor_register+0x10a/0x2d0 [ 207.322489] drm_dev_register+0x143/0x860 [ 207.322491] ? xe_configfs_get_psmi_enabled+0x12/0x90 [xe] [ 207.322667] xe_device_probe+0x185b/0x2c40 [xe] [ 207.322812] ? __pfx___drm_dev_dbg+0x10/0x10 [ 207.322815] ? add_dr+0x180/0x220 [ 207.322818] ? __pfx___drmm_mutex_release+0x10/0x10 [ 207.322821] ? __pfx_xe_device_probe+0x10/0x10 [xe] [ 207.322966] ? xe_pm_init_early+0x33a/0x410 [xe] [ 207.323136] xe_pci_probe+0x936/0x1250 [xe] [ 207.323298] ? lock_acquire+0x1a4/0x2e0 [ 207.323302] ? __pfx_xe_pci_probe+0x10/0x10 [xe] [ 207.323464] local_pci_probe+0xe6/0x1a0 [ 207.323468] pci_device_probe+0x523/0x840 [ 207.323470] ? __pfx_pci_device_probe+0x10/0x10 [ 207.323473] ? sysfs_do_create_link_sd.isra.0+0x8c/0x110 [ 207.323476] ? sysfs_create_link+0x48/0xc0 [ 207.323479] really_probe+0x1fd/0x8a0 ... " Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Shuicheng Lin Reviewed-by: Jonathan Cavitt Link: https://patch.msgid.link/20260109211041.2446012-2-shuicheng.lin@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index aad4aa53a51f..7c2f8b783c56 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -1017,6 +1017,7 @@ int xe_device_probe(struct xe_device *xe) err_unregister_display: xe_display_unregister(xe); + drm_dev_unregister(&xe->drm); return err; } From 91e0c2fec10c975a0e2a4b91e137825d1447f50f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 21 Jan 2026 15:57:35 -0700 Subject: [PATCH 026/195] drm/xe: Move _THIS_IP_ usage from xe_vm_create() to dedicated function After commit a3866ce7b122 ("drm/xe: Add vm to exec queues association"), building for an architecture other than x86 (which defines its own _THIS_IP_) with clang fails with: drivers/gpu/drm/xe/xe_vm.c:1586:3: error: cannot jump from this indirect goto statement to one of its possible targets 1586 | drm_exec_retry_on_contention(&exec); | ^ include/drm/drm_exec.h:123:4: note: expanded from macro 'drm_exec_retry_on_contention' 123 | goto *__drm_exec_retry_ptr; \ | ^ drivers/gpu/drm/xe/xe_vm.c:1542:3: note: possible target of indirect goto statement 1542 | might_lock(&vm->exec_queues.lock); | ^ include/linux/lockdep.h:553:33: note: expanded from macro 'might_lock' 553 | lock_release(&(lock)->dep_map, _THIS_IP_); \ | ^ include/linux/instruction_pointer.h:10:41: note: expanded from macro '_THIS_IP_' 10 | #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) | ^ drivers/gpu/drm/xe/xe_vm.c:1583:2: note: jump exits scope of variable with __attribute__((cleanup)) 1583 | xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, | ^ drivers/gpu/drm/xe/xe_validation.h:189:2: note: expanded from macro 'xe_validation_guard' 189 | scoped_guard(xe_validation, _ctx, _val, _exec, _flags, &_ret) \ | ^ include/linux/cleanup.h:442:2: note: expanded from macro 'scoped_guard' 442 | __scoped_guard(_name, __UNIQUE_ID(label), args) | ^ include/linux/cleanup.h:433:20: note: expanded from macro '__scoped_guard' 433 | for (CLASS(_name, scope)(args); \ | ^ drivers/gpu/drm/xe/xe_vm.c:1542:3: note: jump enters a statement expression 1542 | might_lock(&vm->exec_queues.lock); | ^ include/linux/lockdep.h:553:33: note: expanded from macro 'might_lock' 553 | lock_release(&(lock)->dep_map, _THIS_IP_); \ | ^ include/linux/instruction_pointer.h:10:20: note: expanded from macro '_THIS_IP_' 10 | #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) | ^ While this is a false positive error because __drm_exec_retry_ptr is only ever assigned the label in drm_exec_until_all_locked() (thus it can never jump over the cleanup variable), this error is not unreasonable in general because the only supported use case for taking the address of a label is computed gotos [1]. The kernel's use of the address of a label in _THIS_IP_ is considered problematic by both GCC [2][3] and clang [4] but they need to provide something equivalent before they can break this use case. Hide the usage of _THIS_IP_ by moving the CONFIG_PROVE_LOCKING if statement to its own function, avoiding the error. This is similar to commit 187e16f69de2 ("drm/xe: Work around clang multiple goto-label error") but with the sources of _THIS_IP_. Fixes: a3866ce7b122 ("drm/xe: Add vm to exec queues association") Link: https://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html [1] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44298 [2] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120071 [3] Link: https://github.com/llvm/llvm-project/issues/138272 [4] Signed-off-by: Nathan Chancellor Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260121-xe-vm-fix-clang-goto-error-v1-1-7e121d81512e@kernel.org --- drivers/gpu/drm/xe/xe_vm.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f7bb21ac1987..293b92ed2fdd 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1474,6 +1474,20 @@ static void xe_vm_pt_destroy(struct xe_vm *vm) } } +static void xe_vm_init_prove_locking(struct xe_device *xe, struct xe_vm *vm) +{ + if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) + return; + + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&vm->exec_queues.lock); + fs_reclaim_release(GFP_KERNEL); + + down_read(&vm->exec_queues.lock); + might_lock(&xe_root_mmio_gt(xe)->uc.guc.ct.lock); + up_read(&vm->exec_queues.lock); +} + struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) { struct drm_gem_object *vm_resv_obj; @@ -1537,15 +1551,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms; init_rwsem(&vm->exec_queues.lock); - if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { - fs_reclaim_acquire(GFP_KERNEL); - might_lock(&vm->exec_queues.lock); - fs_reclaim_release(GFP_KERNEL); - - down_read(&vm->exec_queues.lock); - might_lock(&xe_root_mmio_gt(xe)->uc.guc.ct.lock); - up_read(&vm->exec_queues.lock); - } + xe_vm_init_prove_locking(xe, vm); for_each_tile(tile, xe, id) xe_range_fence_tree_init(&vm->rftree[id]); From d043b95983e692718922abdc6ba652322e66d318 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 22 Jan 2026 16:19:24 +0100 Subject: [PATCH 027/195] drm/xe/vf: Reset VF GuC state on fini Unlike native/PF driver, which was explicitly triggering full GuC reset during driver unwind, the VF driver was not notifying GuC that it is about to unwind, and this could lead GuC to access stale data, which in turn could be interpreted as VF's malicious activity. Add managed action to send to GuC VF_RESET message during GT unwind. Signed-off-by: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Link: https://patch.msgid.link/20260122151924.3726-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 44360437beeb..2efc4678fa73 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -668,6 +668,13 @@ static void guc_fini_hw(void *arg) guc_g2g_fini(guc); } +static void vf_guc_fini_hw(void *arg) +{ + struct xe_guc *guc = arg; + + xe_gt_sriov_vf_reset(guc_to_gt(guc)); +} + /** * xe_guc_comm_init_early - early initialization of GuC communication * @guc: the &xe_guc to initialize @@ -772,6 +779,10 @@ int xe_guc_init(struct xe_guc *guc) xe->info.has_page_reclaim_hw_assist = false; if (IS_SRIOV_VF(xe)) { + ret = devm_add_action_or_reset(xe->drm.dev, vf_guc_fini_hw, guc); + if (ret) + goto out; + ret = xe_guc_ct_init(&guc->ct); if (ret) goto out; From 6edeabacb71cfaadb6b406655cdbc53ed40b521c Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 23 Jan 2026 18:04:26 +0000 Subject: [PATCH 028/195] drm/xe/gt: Use CLASS() for forcewake in xe_gt_enable_comp_1wcoh Adopt the scoped forcewake management using CLASS(xe_force_wake, ...) to simplify the code and ensure proper resource release. Cc: Xin Wang Cc: Matthew Auld Cc: Matt Roper Signed-off-by: Shuicheng Lin Reviewed-by: Xin Wang Reviewed-by: Matt Roper Signed-off-by: Ashutosh Dixit Link: https://patch.msgid.link/20260123180425.3262944-2-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 9d090d0f2438..22132467ff4f 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -141,15 +141,14 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; u32 reg; if (IS_SRIOV_VF(xe)) return; if (GRAPHICS_VER(xe) >= 30 && xe->info.has_flat_ccs) { - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref.domains) return; reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); @@ -163,8 +162,6 @@ static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt) reg |= EN_CMP_1WCOH_GW; xe_gt_mcr_multicast_write(gt, XE2_GAMWALK_CTRL_3D, reg); } - - xe_force_wake_put(gt_to_fw(gt), fw_ref); } } From 63b33604365bdca43dee41bab809da2230491036 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 21 Jan 2026 17:37:51 +0000 Subject: [PATCH 029/195] drm/xe/configfs: Fix is_bound() pci_dev lifetime Move pci_dev_put() after pci_dbg() to avoid using pdev after dropping its reference. Fixes: 2674f1ef29f46 ("drm/xe/configfs: Block runtime attribute changes") Signed-off-by: Shuicheng Lin Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://patch.msgid.link/20260121173750.3090907-2-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_configfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 9f6251b1008b..82edd0466005 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -347,11 +347,10 @@ static bool is_bound(struct xe_config_group_device *dev) return false; ret = pci_get_drvdata(pdev); - pci_dev_put(pdev); - if (ret) pci_dbg(pdev, "Already bound to driver\n"); + pci_dev_put(pdev); return ret; } From 11035eab1b7d88daa7904440046e64d3810b1ca1 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Tue, 20 Jan 2026 18:32:41 +0000 Subject: [PATCH 030/195] drm/xe/nvm: Manage nvm aux cleanup with devres Move nvm teardown to a devm-managed action registered from xe_nvm_init(). This ensures the auxiliary NVM device is deleted on probe failure and device detach without requiring explicit calls from remove paths. As part of this, drop xe_nvm_fini() from xe_device_remove() and from the survivability sysfs teardown, and remove the public xe_nvm_fini() API from the header. This is to fix below warn message when there is probe failure after xe_nvm_init(), then xe_device_probe() is called again: " [ 207.318152] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:01.0/0000:01:00.0/0000:02:01.0/0000:03:00.0/xe.nvm.768' [ 207.318157] CPU: 5 UID: 0 PID: 10261 Comm: modprobe Tainted: G B W 6.19.0-rc2-lgci-xe-kernel+ #223 PREEMPT(voluntary) [ 207.318160] Tainted: [B]=BAD_PAGE, [W]=WARN [ 207.318161] Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 0812 02/24/2023 [ 207.318163] Call Trace: [ 207.318163] [ 207.318165] dump_stack_lvl+0xa0/0xc0 [ 207.318170] dump_stack+0x10/0x20 [ 207.318171] sysfs_warn_dup+0xd5/0x110 [ 207.318175] sysfs_create_dir_ns+0x1f6/0x280 [ 207.318177] ? __pfx_sysfs_create_dir_ns+0x10/0x10 [ 207.318179] ? lock_acquire+0x1a4/0x2e0 [ 207.318182] ? __kasan_check_read+0x11/0x20 [ 207.318185] ? do_raw_spin_unlock+0x5c/0x240 [ 207.318187] kobject_add_internal+0x28d/0x8e0 [ 207.318189] kobject_add+0x11f/0x1f0 [ 207.318191] ? __pfx_kobject_add+0x10/0x10 [ 207.318193] ? lockdep_init_map_type+0x4b/0x230 [ 207.318195] ? get_device_parent.isra.0+0x43/0x4c0 [ 207.318197] ? kobject_get+0x55/0xf0 [ 207.318199] device_add+0x2d7/0x1500 [ 207.318201] ? __pfx_device_add+0x10/0x10 [ 207.318203] ? lockdep_init_map_type+0x4b/0x230 [ 207.318205] __auxiliary_device_add+0x99/0x140 [ 207.318208] xe_nvm_init+0x7a2/0xef0 [xe] [ 207.318333] ? xe_devcoredump_init+0x80/0x110 [xe] [ 207.318452] ? __devm_add_action+0x82/0xc0 [ 207.318454] ? fs_reclaim_release+0xc0/0x110 [ 207.318457] xe_device_probe+0x17dd/0x2c40 [xe] [ 207.318574] ? __pfx___drm_dev_dbg+0x10/0x10 [ 207.318576] ? add_dr+0x180/0x220 [ 207.318579] ? __pfx___drmm_mutex_release+0x10/0x10 [ 207.318582] ? __pfx_xe_device_probe+0x10/0x10 [xe] [ 207.318697] ? xe_pm_init_early+0x33a/0x410 [xe] [ 207.318850] xe_pci_probe+0x936/0x1250 [xe] [ 207.318999] ? lock_acquire+0x1a4/0x2e0 [ 207.319003] ? __pfx_xe_pci_probe+0x10/0x10 [xe] [ 207.319151] local_pci_probe+0xe6/0x1a0 [ 207.319154] pci_device_probe+0x523/0x840 [ 207.319157] ? __pfx_pci_device_probe+0x10/0x10 [ 207.319159] ? sysfs_do_create_link_sd.isra.0+0x8c/0x110 [ 207.319162] ? sysfs_create_link+0x48/0xc0 ... " Fixes: c28bfb107dac ("drm/xe/nvm: add on-die non-volatile memory device") Reviewed-by: Alexander Usyskin Reviewed-by: Brian Nguyen Cc: Rodrigo Vivi Cc: Riana Tauro Signed-off-by: Shuicheng Lin Signed-off-by: Ashutosh Dixit Link: https://patch.msgid.link/20260120183239.2966782-6-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_device.c | 2 - drivers/gpu/drm/xe/xe_nvm.c | 43 +++++++++++----------- drivers/gpu/drm/xe/xe_nvm.h | 2 - drivers/gpu/drm/xe/xe_survivability_mode.c | 4 -- 4 files changed, 22 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 7c2f8b783c56..9cf82bde36c4 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -1026,8 +1026,6 @@ void xe_device_remove(struct xe_device *xe) { xe_display_unregister(xe); - xe_nvm_fini(xe); - drm_dev_unplug(&xe->drm); xe_bo_pci_dev_remove_all(xe); diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 437375046517..77856f460770 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -94,6 +94,27 @@ static bool xe_nvm_writable_override(struct xe_device *xe) return writable_override; } +static void xe_nvm_fini(void *arg) +{ + struct xe_device *xe = arg; + struct intel_dg_nvm_dev *nvm = xe->nvm; + + if (!xe->info.has_gsc_nvm) + return; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return; + + /* Nvm pointer should not be NULL here */ + if (WARN_ON(!nvm)) + return; + + auxiliary_device_delete(&nvm->aux_dev); + auxiliary_device_uninit(&nvm->aux_dev); + xe->nvm = NULL; +} + int xe_nvm_init(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -152,30 +173,10 @@ int xe_nvm_init(struct xe_device *xe) auxiliary_device_uninit(aux_dev); goto err; } - return 0; + return devm_add_action_or_reset(xe->drm.dev, xe_nvm_fini, xe); err: kfree(nvm); xe->nvm = NULL; return ret; } - -void xe_nvm_fini(struct xe_device *xe) -{ - struct intel_dg_nvm_dev *nvm = xe->nvm; - - if (!xe->info.has_gsc_nvm) - return; - - /* No access to internal NVM from VFs */ - if (IS_SRIOV_VF(xe)) - return; - - /* Nvm pointer should not be NULL here */ - if (WARN_ON(!nvm)) - return; - - auxiliary_device_delete(&nvm->aux_dev); - auxiliary_device_uninit(&nvm->aux_dev); - xe->nvm = NULL; -} diff --git a/drivers/gpu/drm/xe/xe_nvm.h b/drivers/gpu/drm/xe/xe_nvm.h index 7f3d5f57bed0..fd3467ad35a4 100644 --- a/drivers/gpu/drm/xe/xe_nvm.h +++ b/drivers/gpu/drm/xe/xe_nvm.h @@ -10,6 +10,4 @@ struct xe_device; int xe_nvm_init(struct xe_device *xe); -void xe_nvm_fini(struct xe_device *xe); - #endif diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index 6578ffc77bd5..db64cac39c94 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -233,13 +233,9 @@ static SURVIVABILITY_ATTR_RO(fdo_mode, FDO_INFO); static void xe_survivability_mode_fini(void *arg) { struct xe_device *xe = arg; - struct xe_survivability *survivability = &xe->survivability; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct device *dev = &pdev->dev; - if (survivability->fdo_mode) - xe_nvm_fini(xe); - device_remove_file(dev, &dev_attr_survivability_mode); } From a3187c0c2bbd947ffff97f90d077ac88f9c2a215 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Tue, 20 Jan 2026 18:32:42 +0000 Subject: [PATCH 031/195] drm/xe/nvm: Fix double-free on aux add failure After a successful auxiliary_device_init(), aux_dev->dev.release (xe_nvm_release_dev()) is responsible for the kfree(nvm). When there is failure with auxiliary_device_add(), driver will call auxiliary_device_uninit(), which call put_device(). So that the .release callback will be triggered to free the memory associated with the auxiliary_device. Move the kfree(nvm) into the auxiliary_device_init() failure path and remove the err goto path to fix below error. " [ 13.232905] ================================================================== [ 13.232911] BUG: KASAN: double-free in xe_nvm_init+0x751/0xf10 [xe] [ 13.233112] Free of addr ffff888120635000 by task systemd-udevd/273 [ 13.233120] CPU: 8 UID: 0 PID: 273 Comm: systemd-udevd Not tainted 6.19.0-rc2-lgci-xe-kernel+ #225 PREEMPT(voluntary) ... [ 13.233125] Call Trace: [ 13.233126] [ 13.233127] dump_stack_lvl+0x7f/0xc0 [ 13.233132] print_report+0xce/0x610 [ 13.233136] ? kasan_complete_mode_report_info+0x5d/0x1e0 [ 13.233139] ? xe_nvm_init+0x751/0xf10 [xe] ... " v2: drop err goto path. (Alexander) Fixes: d4c3ed963e41 ("drm/xe: defer free of NVM auxiliary container to device release callback") Reviewed-by: Nitin Gote Reviewed-by: Brian Nguyen Cc: Alexander Usyskin Cc: Rodrigo Vivi Suggested-by: Brian Nguyen Signed-off-by: Shuicheng Lin Signed-off-by: Ashutosh Dixit Link: https://patch.msgid.link/20260120183239.2966782-7-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_nvm.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 77856f460770..6f9dd519371c 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -164,19 +164,17 @@ int xe_nvm_init(struct xe_device *xe) ret = auxiliary_device_init(aux_dev); if (ret) { drm_err(&xe->drm, "xe-nvm aux init failed %d\n", ret); - goto err; + kfree(nvm); + xe->nvm = NULL; + return ret; } ret = auxiliary_device_add(aux_dev); if (ret) { drm_err(&xe->drm, "xe-nvm aux add failed %d\n", ret); auxiliary_device_uninit(aux_dev); - goto err; + xe->nvm = NULL; + return ret; } return devm_add_action_or_reset(xe->drm.dev, xe_nvm_fini, xe); - -err: - kfree(nvm); - xe->nvm = NULL; - return ret; } From 7755ed58a49f4c7f603e2b7b9fd5073a70d96406 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Tue, 20 Jan 2026 18:32:43 +0000 Subject: [PATCH 032/195] drm/xe/nvm: Defer xe->nvm assignment until init succeeds Allocate and initialize the NVM structure using a local pointer and assign it to xe->nvm only after all initialization steps succeed. This avoids exposing a partially initialized xe->nvm and removes the need to explicitly clear xe->nvm on error paths, simplifying error handling and making the lifetime rules clearer. Cc: Alexander Usyskin Cc: Rodrigo Vivi Cc: Brian Nguyen Signed-off-by: Shuicheng Lin Reviewed-by: Brian Nguyen Signed-off-by: Ashutosh Dixit Link: https://patch.msgid.link/20260120183239.2966782-8-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_nvm.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 6f9dd519371c..bc88804de514 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -133,12 +133,10 @@ int xe_nvm_init(struct xe_device *xe) if (WARN_ON(xe->nvm)) return -EFAULT; - xe->nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); - if (!xe->nvm) + nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); + if (!nvm) return -ENOMEM; - nvm = xe->nvm; - nvm->writable_override = xe_nvm_writable_override(xe); nvm->non_posted_erase = xe_nvm_non_posted_erase(xe); nvm->bar.parent = &pdev->resource[0]; @@ -165,7 +163,6 @@ int xe_nvm_init(struct xe_device *xe) if (ret) { drm_err(&xe->drm, "xe-nvm aux init failed %d\n", ret); kfree(nvm); - xe->nvm = NULL; return ret; } @@ -173,8 +170,9 @@ int xe_nvm_init(struct xe_device *xe) if (ret) { drm_err(&xe->drm, "xe-nvm aux add failed %d\n", ret); auxiliary_device_uninit(aux_dev); - xe->nvm = NULL; return ret; } + + xe->nvm = nvm; return devm_add_action_or_reset(xe->drm.dev, xe_nvm_fini, xe); } From e694179a2c02700087185bc300ba81dd17b7ad40 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Mon, 26 Jan 2026 09:42:42 -0800 Subject: [PATCH 033/195] drm/xe/multi_queue: Protect priority against concurrent access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a spinlock to protect multi-queue priority being concurrently updated by multiple set_priority ioctls and to protect against concurrent read and write to this field. v2: Update documentation, remove WRITE/READ_LOCK() (Thomas) Use scoped_guard, reduced lock scope (Matt Brost) v3: Fix author (checkpatch) Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patch.msgid.link/20260126174241.3470390-2-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 1 + drivers/gpu/drm/xe/xe_exec_queue_types.h | 7 ++++++- drivers/gpu/drm/xe/xe_guc_submit.c | 19 +++++++++++++++---- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7e7e663189e4..66d0e10ee2c4 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -230,6 +230,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, INIT_LIST_HEAD(&q->multi_gt_link); INIT_LIST_HEAD(&q->hw_engine_group_link); INIT_LIST_HEAD(&q->pxp.link); + spin_lock_init(&q->multi_queue.lock); q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL; q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index e987d431ce27..3791fed34ffa 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -161,8 +161,13 @@ struct xe_exec_queue { struct xe_exec_queue_group *group; /** @multi_queue.link: Link into group's secondary queues list */ struct list_head link; - /** @multi_queue.priority: Queue priority within the multi-queue group */ + /** + * @multi_queue.priority: Queue priority within the multi-queue group. + * It is protected by @multi_queue.lock. + */ enum xe_multi_queue_priority priority; + /** @multi_queue.lock: Lock for protecting certain members */ + spinlock_t lock; /** @multi_queue.pos: Position of queue within the multi-queue group */ u8 pos; /** @multi_queue.valid: Queue belongs to a multi queue group */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 456f549c16f6..1f4625ddae0e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -804,6 +804,7 @@ static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, { struct xe_exec_queue_group *group = q->multi_queue.group; struct xe_device *xe = guc_to_xe(guc); + enum xe_multi_queue_priority priority; long ret; /* @@ -827,7 +828,10 @@ static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, return; } - xe_lrc_set_multi_queue_priority(q->lrc[0], q->multi_queue.priority); + scoped_guard(spinlock, &q->multi_queue.lock) + priority = q->multi_queue.priority; + + xe_lrc_set_multi_queue_priority(q->lrc[0], priority); xe_guc_exec_queue_group_cgp_update(xe, q); WRITE_ONCE(group->sync_pending, true); @@ -2181,15 +2185,22 @@ static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q, xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q)); - if (q->multi_queue.priority == priority || - exec_queue_killed_or_banned_or_wedged(q)) + if (exec_queue_killed_or_banned_or_wedged(q)) return 0; msg = kmalloc(sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; - q->multi_queue.priority = priority; + scoped_guard(spinlock, &q->multi_queue.lock) { + if (q->multi_queue.priority == priority) { + kfree(msg); + return 0; + } + + q->multi_queue.priority = priority; + } + guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY); return 0; From b47239bc30ef85e70b93e357ce76b205baac3b77 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 17 Dec 2025 16:07:02 +0100 Subject: [PATCH 034/195] drm/xe/pf: Fix typo in function kernel-doc The function name is missing an underscore, which results in: Warning: ../drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c:1261 This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst * xe_gt_sriov_pf_control_trigger restore_vf() - Start ... Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20251217150702.2669-1-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index bf48b05797de..5cb705c7ee7a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -1259,7 +1259,7 @@ int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int v } /** - * xe_gt_sriov_pf_control_trigger restore_vf() - Start an SR-IOV VF migration data restore sequence. + * xe_gt_sriov_pf_control_trigger_restore_vf() - Start an SR-IOV VF migration data restore sequence. * @gt: the &xe_gt * @vfid: the VF identifier * From de96c43a69b1caf3190609a6509f22433a118782 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Wed, 28 Jan 2026 11:59:12 +0530 Subject: [PATCH 035/195] drm/xe: Apply WA_16028005424 to Media Apply WA_16028005424 to following IPs: Xe2_LPM, Xe2_HPM, Xe3_LPM, Xe3p_LPM While doing this move the same WA defined for Xe3_LPG under the comment for Xe3_LPG. It was wrongly placed under Xe3_LPM. Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260128062911.1456539-2-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/xe_wa.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index a991ee2b8781..1e8d61ac581b 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -236,6 +236,10 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("16028005424"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) + }, /* Xe2_HPG */ @@ -262,6 +266,10 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("16028005424"), + XE_RTP_RULES(MEDIA_VERSION(1301)), + XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) + }, /* Xe3_LPG */ @@ -269,6 +277,10 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(UNSLCGCTL9454, LSCFE_CLKGATE_DIS)) }, + { XE_RTP_NAME("16028005424"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005)), + XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) + }, /* Xe3_LPM */ @@ -303,7 +315,14 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, { XE_RTP_NAME("16028005424"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005)), + XE_RTP_RULES(MEDIA_VERSION_RANGE(3000, 3002)), + XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) + }, + + /* Xe3p_LPM */ + + { XE_RTP_NAME("16028005424"), + XE_RTP_RULES(MEDIA_VERSION(3500)), XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) }, }; From dd8ea2f2ab71b98887fdc426b0651dbb1d1ea760 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 29 Jan 2026 10:25:48 -0800 Subject: [PATCH 036/195] drm/xe/guc: Fix CFI violation in debugfs access. xe_guc_print_info is void-returning, but the function pointer it is assigned to expects an int-returning function, leading to the following CFI error: [ 206.873690] CFI failure at guc_debugfs_show+0xa1/0xf0 [xe] (target: xe_guc_print_info+0x0/0x370 [xe]; expected type: 0xbe3bc66a) Fix this by updating xe_guc_print_info to return an integer. Fixes: e15826bb3c2c ("drm/xe/guc: Refactor GuC debugfs initialization") Signed-off-by: Daniele Ceraolo Spurio Cc: Michal Wajdeczko Cc: George D Sworo Reviewed-by: Michal Wajdeczko Link: https://patch.msgid.link/20260129182547.32899-2-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 6 ++++-- drivers/gpu/drm/xe/xe_guc.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 2efc4678fa73..c0eeaf08479d 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1672,7 +1672,7 @@ void xe_guc_runtime_resume(struct xe_guc *guc) xe_guc_submit_unpause(guc); } -void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) +int xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) { struct xe_gt *gt = guc_to_gt(guc); u32 status; @@ -1683,7 +1683,7 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) if (!IS_SRIOV_VF(gt_to_xe(gt))) { CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); if (!fw_ref.domains) - return; + return -EIO; status = xe_mmio_read32(>->mmio, GUC_STATUS); @@ -1709,6 +1709,8 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p) drm_puts(p, "\n"); xe_guc_submit_print(guc, p); + + return 0; } /** diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index a169f231cbd8..66e7edc70ed9 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -53,7 +53,7 @@ int xe_guc_self_cfg32(struct xe_guc *guc, u16 key, u32 val); int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val); void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir); void xe_guc_sanitize(struct xe_guc *guc); -void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p); +int xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p); int xe_guc_reset_prepare(struct xe_guc *guc); void xe_guc_reset_wait(struct xe_guc *guc); void xe_guc_stop_prepare(struct xe_guc *guc); From f89dbe14a0c8854b7aaf960dd842c10698b3ff19 Mon Sep 17 00:00:00 2001 From: Chaitanya Kumar Borah Date: Fri, 30 Jan 2026 19:22:10 +0530 Subject: [PATCH 037/195] drm/xe/guc: Fix kernel-doc warning in GuC scheduler ABI header The GuC scheduler ABI header contains a file-level comment that is not intended to document a kernel-doc symbol. Using kernel-doc comment syntax (/** */) triggers kernel-doc warnings. With "-Werror", this causes the build to fail. Convert the comment to a regular block comment. HDRTEST drivers/gpu/drm/xe/abi/guc_scheduler_abi.h Warning: drivers/gpu/drm/xe/abi/guc_scheduler_abi.h:11 This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst * Generic defines required for registration with and submissions to the GuC 1 warnings as errors make[6]: *** [drivers/gpu/drm/xe/Makefile:377: drivers/gpu/drm/xe/abi/guc_scheduler_abi.hdrtest] Error 3 make[5]: *** [scripts/Makefile.build:544: drivers/gpu/drm/xe] Error 2 make[4]: *** [scripts/Makefile.build:544: drivers/gpu/drm] Error 2 make[3]: *** [scripts/Makefile.build:544: drivers/gpu] Error 2 make[2]: *** [scripts/Makefile.build:544: drivers] Error 2 make[1]: *** [/home/kbuild2/kernel/Makefile:2088: .] Error 2 make: *** [Makefile:248: __sub-make] Error 2 v2: - Add Fixes tag (Daniele) Fixes: b0c5cf4f5917 ("drm/gt/guc: extract scheduler-related defines from guc_fwif.h") Signed-off-by: Chaitanya Kumar Borah Reviewed-by: Shuicheng Lin Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patch.msgid.link/20260130135210.2659200-1-chaitanya.kumar.borah@intel.com --- drivers/gpu/drm/xe/abi/guc_scheduler_abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/abi/guc_scheduler_abi.h b/drivers/gpu/drm/xe/abi/guc_scheduler_abi.h index 513b22a87428..19ec89bf39c5 100644 --- a/drivers/gpu/drm/xe/abi/guc_scheduler_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_scheduler_abi.h @@ -8,7 +8,7 @@ #include -/** +/* * Generic defines required for registration with and submissions to the GuC * scheduler. Includes engine class/instance defines and context attributes * (id, priority, etc) From 568d9d0d83f5b55144c4486fefe26bd132a66ea1 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 30 Jan 2026 17:53:49 +0000 Subject: [PATCH 038/195] drm/xe: use entry_dump callbacks for xe2+ PAT dumps Move xe2+ PAT entry printing into the entry_dump op so platform specific logic stays localized, simplifying future maintenance. v2: - Do not null xe->pat.ops for VFs. - Skip PAT init and dump on VFs (-EOPNOTSUPP), avoiding NULL ops use. v3: - fixed typo v4: (Matt) - Switch xe2_dump() to use the new ops->entry_dump() vfunc. - Remove xe3p_xpc_dump() and reuse the common xe2_dump() for Xe3p XPC. - This also fixes Xe3p_HPM media PAT dumping by using the proper non-MCR access for the PAT register range (bspec 76445). Cc: Matt Roper Suggested-by: Brian Nguyen Signed-off-by: Xin Wang Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260130175349.2249033-1-x.wang@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_pat.c | 67 +++++++++---------------------------- 1 file changed, 16 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 14d0dce5190a..2cd3fd1c3953 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -88,6 +88,7 @@ struct xe_pat_ops { void (*program_media)(struct xe_gt *gt, const struct xe_pat_table_entry table[], int n_entries); int (*dump)(struct xe_gt *gt, struct drm_printer *p); + void (*entry_dump)(struct drm_printer *p, const char *label, u32 pat, bool rsvd); }; static const struct xe_pat_table_entry xelp_pat_table[] = { @@ -458,7 +459,7 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); xe_pat_index_label(label, sizeof(label), i); - xe2_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid); + xe->pat.ops->entry_dump(p, label, pat, !xe->pat.table[i].valid); } /* @@ -471,7 +472,7 @@ static int xe2_dump(struct xe_gt *gt, struct drm_printer *p) pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA)); drm_printf(p, "Page Table Access:\n"); - xe2_pat_entry_dump(p, "PTA_MODE", pat, false); + xe->pat.ops->entry_dump(p, "PTA_MODE", pat, false); return 0; } @@ -480,44 +481,14 @@ static const struct xe_pat_ops xe2_pat_ops = { .program_graphics = program_pat_mcr, .program_media = program_pat, .dump = xe2_dump, + .entry_dump = xe2_pat_entry_dump, }; -static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_device *xe = gt_to_xe(gt); - u32 pat; - int i; - char label[PAT_LABEL_LEN]; - - CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref.domains) - return -ETIMEDOUT; - - drm_printf(p, "PAT table: (* = reserved entry)\n"); - - for (i = 0; i < xe->pat.n_entries; i++) { - pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i))); - - xe_pat_index_label(label, sizeof(label), i); - xe3p_xpc_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid); - } - - /* - * Also print PTA_MODE, which describes how the hardware accesses - * PPGTT entries. - */ - pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA)); - - drm_printf(p, "Page Table Access:\n"); - xe3p_xpc_pat_entry_dump(p, "PTA_MODE", pat, false); - - return 0; -} - static const struct xe_pat_ops xe3p_xpc_pat_ops = { .program_graphics = program_pat_mcr, .program_media = program_pat, - .dump = xe3p_xpc_dump, + .dump = xe2_dump, + .entry_dump = xe3p_xpc_pat_entry_dump, }; void xe_pat_init_early(struct xe_device *xe) @@ -600,20 +571,17 @@ void xe_pat_init_early(struct xe_device *xe) GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); } - /* VFs can't program nor dump PAT settings */ - if (IS_SRIOV_VF(xe)) - xe->pat.ops = NULL; - - xe_assert(xe, !xe->pat.ops || xe->pat.ops->dump); - xe_assert(xe, !xe->pat.ops || xe->pat.ops->program_graphics); - xe_assert(xe, !xe->pat.ops || MEDIA_VER(xe) < 13 || xe->pat.ops->program_media); + xe_assert(xe, xe->pat.ops->dump); + xe_assert(xe, xe->pat.ops->program_graphics); + xe_assert(xe, MEDIA_VER(xe) < 13 || xe->pat.ops->program_media); + xe_assert(xe, GRAPHICS_VER(xe) < 20 || xe->pat.ops->entry_dump); } void xe_pat_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - if (!xe->pat.ops) + if (IS_SRIOV_VF(xe)) return; if (xe_gt_is_media_type(gt)) @@ -633,7 +601,7 @@ int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); - if (!xe->pat.ops) + if (IS_SRIOV_VF(xe)) return -EOPNOTSUPP; return xe->pat.ops->dump(gt, p); @@ -658,12 +626,9 @@ int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p) for (u32 i = 0; i < xe->pat.n_entries; i++) { u32 pat = xe->pat.table[i].value; - if (GRAPHICS_VERx100(xe) == 3511) { + if (GRAPHICS_VER(xe) >= 20) { xe_pat_index_label(label, sizeof(label), i); - xe3p_xpc_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid); - } else if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { - xe_pat_index_label(label, sizeof(label), i); - xe2_pat_entry_dump(p, label, pat, !xe->pat.table[i].valid); + xe->pat.ops->entry_dump(p, label, pat, !xe->pat.table[i].valid); } else if (xe->info.platform == XE_METEORLAKE) { xelpg_pat_entry_dump(p, i, pat); } else if (xe->info.platform == XE_PVC) { @@ -679,14 +644,14 @@ int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p) u32 pat = xe->pat.pat_pta->value; drm_printf(p, "Page Table Access:\n"); - xe2_pat_entry_dump(p, "PTA_MODE", pat, false); + xe->pat.ops->entry_dump(p, "PTA_MODE", pat, false); } if (xe->pat.pat_ats) { u32 pat = xe->pat.pat_ats->value; drm_printf(p, "PCIe ATS/PASID:\n"); - xe2_pat_entry_dump(p, "PAT_ATS ", pat, false); + xe->pat.ops->entry_dump(p, "PAT_ATS ", pat, false); } drm_printf(p, "Cache Level:\n"); From c2a6859138e7f73ad904be17dd7d1da6cc7f06b3 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 30 Jan 2026 04:39:08 +0000 Subject: [PATCH 039/195] drm/xe/query: Fix topology query pointer advance The topology query helper advanced the user pointer by the size of the pointer, not the size of the structure. This can misalign the output blob and corrupt the following mask. Fix the increment to use sizeof(*topo). There is no issue currently, as sizeof(*topo) happens to be equal to sizeof(topo) on 64-bit systems (both evaluate to 8 bytes). Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Shuicheng Lin Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260130043907.465128-2-shuicheng.lin@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index b7b4261968e0..34db266b723f 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -487,7 +487,7 @@ static int copy_mask(void __user **ptr, if (copy_to_user(*ptr, topo, sizeof(*topo))) return -EFAULT; - *ptr += sizeof(topo); + *ptr += sizeof(*topo); if (copy_to_user(*ptr, mask, mask_size)) return -EFAULT; From 9fd8da717934f05125b9ba6782622c459a368dc0 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 29 Jan 2026 23:38:36 +0000 Subject: [PATCH 040/195] drm/xe: Fix kerneldoc for xe_migrate_exec_queue Correct the function name in the kerneldoc. It is for below warning: "Warning: drivers/gpu/drm/xe/xe_migrate.c:1262 expecting prototype for xe_get_migrate_exec_queue(). Prototype was for xe_migrate_exec_queue() instead" Fixes: 916ee4704a865 ("drm/xe/vf: Register CCS read/write contexts with Guc") Reviewed-by: Michal Wajdeczko Signed-off-by: Shuicheng Lin Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260129233834.419977-6-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 6e202428aac2..078a9bc2821d 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1254,7 +1254,7 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, } /** - * xe_get_migrate_exec_queue() - Get the execution queue from migrate context. + * xe_migrate_exec_queue() - Get the execution queue from migrate context. * @migrate: Migrate context. * * Return: Pointer to execution queue on success, error on failure From 0651dbb9d6a72e99569576fbec4681fd8160d161 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 29 Jan 2026 23:38:37 +0000 Subject: [PATCH 041/195] drm/xe: Fix kerneldoc for xe_gt_tlb_inval_init_early Correct the function name in the kerneldoc. It is for below warning: "Warning: drivers/gpu/drm/xe/xe_tlb_inval.c:136 expecting prototype for xe_gt_tlb_inval_init(). Prototype was for xe_gt_tlb_inval_init_early() instead" v2: add () for the function. (Michal) Fixes: db16f9d90c1d9 ("drm/xe: Split TLB invalidation code in frontend and backend") Reviewed-by: Michal Wajdeczko Signed-off-by: Shuicheng Lin Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260129233834.419977-7-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_tlb_inval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c index 989fe0e7f8ee..933f30fb617d 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_tlb_inval.c @@ -125,7 +125,7 @@ static void primelockdep(struct xe_tlb_inval *tlb_inval) } /** - * xe_gt_tlb_inval_init - Initialize TLB invalidation state + * xe_gt_tlb_inval_init_early() - Initialize TLB invalidation state * @gt: GT structure * * Initialize TLB invalidation state, purely software initialization, should From 9f9c117ac566cb567dd56cc5b7564c45653f7a2a Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 29 Jan 2026 23:38:38 +0000 Subject: [PATCH 042/195] drm/xe: Fix kerneldoc for xe_tlb_inval_job_alloc_dep Correct the function name in the kerneldoc. It is for below warning: "Warning: drivers/gpu/drm/xe/xe_tlb_inval_job.c:210 expecting prototype for xe_tlb_inval_alloc_dep(). Prototype was for xe_tlb_inval_job_alloc_dep() instead" Fixes: 15366239e2130 ("drm/xe: Decouple TLB invalidations from GT") Reviewed-by: Michal Wajdeczko Signed-off-by: Shuicheng Lin Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260129233834.419977-8-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_tlb_inval_job.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c index 6a7bd6315797..01c413a2537e 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval_job.c +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c @@ -198,7 +198,7 @@ static void xe_tlb_inval_job_destroy(struct kref *ref) } /** - * xe_tlb_inval_alloc_dep() - TLB invalidation job alloc dependency + * xe_tlb_inval_job_alloc_dep() - TLB invalidation job alloc dependency * @job: TLB invalidation job to alloc dependency for * * Allocate storage for a dependency in the TLB invalidation fence. This From 316b05ae7ed90544733f5c01e14f64d6e84a80dc Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 28 Jan 2026 23:27:13 +0100 Subject: [PATCH 043/195] drm/xe/pf: Simplify IS_SRIOV_PF macro Instead of two having variants of the IS_SRIOV_PF macro, move the CONFIG_PCI_IOV check to the xe_device_is_sriov_pf() function and let the compiler optimize that. This will help us drop poor man's type check of the macro parameter that fails on const xe pointer. Signed-off-by: Michal Wajdeczko Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260128222714.3056-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index 6db45df55615..72e55543c30e 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -28,7 +28,8 @@ static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe static inline bool xe_device_is_sriov_pf(const struct xe_device *xe) { - return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_PF; + return IS_ENABLED(CONFIG_PCI_IOV) && + xe_device_sriov_mode(xe) == XE_SRIOV_MODE_PF; } static inline bool xe_device_is_sriov_vf(const struct xe_device *xe) @@ -36,11 +37,7 @@ static inline bool xe_device_is_sriov_vf(const struct xe_device *xe) return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_VF; } -#ifdef CONFIG_PCI_IOV #define IS_SRIOV_PF(xe) xe_device_is_sriov_pf(xe) -#else -#define IS_SRIOV_PF(xe) (typecheck(struct xe_device *, (xe)) && false) -#endif #define IS_SRIOV_VF(xe) xe_device_is_sriov_vf(xe) #define IS_SRIOV(xe) (IS_SRIOV_PF(xe) || IS_SRIOV_VF(xe)) From 94a2ceb1906d2bcbf2c6afbc5ede400eea3872c8 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 27 Jan 2026 20:37:21 +0100 Subject: [PATCH 044/195] drm/xe: Promote relaxed_ms_sleep We want to have single place with sleep related helpers for better code reuse. Create xe_sleep.h and move relaxed_ms_sleep() there. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260127193727.601-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 23 ++----------------- drivers/gpu/drm/xe/xe_sleep.h | 36 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 21 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_sleep.h diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 1f4625ddae0e..bab930ca62e3 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -8,9 +8,7 @@ #include #include #include -#include #include -#include #include @@ -42,6 +40,7 @@ #include "xe_pm.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" +#include "xe_sleep.h" #include "xe_trace.h" #include "xe_uc_fw.h" #include "xe_vm.h" @@ -1032,24 +1031,6 @@ static u32 wq_space_until_wrap(struct xe_exec_queue *q) return (WQ_SIZE - q->guc->wqi_tail); } -static inline void relaxed_ms_sleep(unsigned int delay_ms) -{ - unsigned long min_us, max_us; - - if (!delay_ms) - return; - - if (delay_ms > 20) { - msleep(delay_ms); - return; - } - - min_us = mul_u32_u32(delay_ms, 1000); - max_us = min_us + 500; - - usleep_range(min_us, max_us); -} - static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) { struct xe_guc *guc = exec_queue_to_guc(q); @@ -1834,7 +1815,7 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) since_resume_ms; if (wait_ms > 0 && q->guc->resume_time) - relaxed_ms_sleep(wait_ms); + xe_sleep_relaxed_ms(wait_ms); set_exec_queue_suspended(q); disable_scheduling(q, false); diff --git a/drivers/gpu/drm/xe/xe_sleep.h b/drivers/gpu/drm/xe/xe_sleep.h new file mode 100644 index 000000000000..a772f1a37395 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sleep.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef _XE_SLEEP_H_ +#define _XE_SLEEP_H_ + +#include +#include + +/** + * xe_sleep_relaxed_ms() - Sleep for an approximate time. + * @delay_ms: time in msec to sleep + * + * For smaller timeouts, sleep with 0.5ms accuracy. + */ +static inline void xe_sleep_relaxed_ms(unsigned int delay_ms) +{ + unsigned long min_us, max_us; + + if (!delay_ms) + return; + + if (delay_ms > 20) { + msleep(delay_ms); + return; + } + + min_us = mul_u32_u32(delay_ms, 1000); + max_us = min_us + 500; + + usleep_range(min_us, max_us); +} + +#endif From eec43f368466e0de9a4aaa4d09ac23815186d2a6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 27 Jan 2026 20:37:22 +0100 Subject: [PATCH 045/195] drm/xe: Move exponential sleep logic to helper We want to reuse the same increased sleep logic in other places. To avoid code duplication, move it to the helper. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260127193727.601-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 5 +---- drivers/gpu/drm/xe/xe_sleep.h | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index bab930ca62e3..de716c1fb18e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1049,10 +1049,7 @@ static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) return -ENODEV; } - msleep(sleep_period_ms); - sleep_total_ms += sleep_period_ms; - if (sleep_period_ms < 64) - sleep_period_ms <<= 1; + sleep_total_ms += xe_sleep_exponential_ms(&sleep_period_ms, 64); goto try_again; } } diff --git a/drivers/gpu/drm/xe/xe_sleep.h b/drivers/gpu/drm/xe/xe_sleep.h index a772f1a37395..2bc3f4c0ee0b 100644 --- a/drivers/gpu/drm/xe/xe_sleep.h +++ b/drivers/gpu/drm/xe/xe_sleep.h @@ -33,4 +33,25 @@ static inline void xe_sleep_relaxed_ms(unsigned int delay_ms) usleep_range(min_us, max_us); } +/** + * xe_sleep_exponential_ms() - Sleep for a exponentially increased time. + * @sleep_period_ms: current time in msec to sleep + * @max_sleep_ms: maximum time in msec to sleep + * + * Sleep for the @sleep_period_ms and exponentially increase this time for the + * next loop, unless reaching the @max_sleep_ms limit. + * + * Return: approximate time in msec the task was delayed. + */ +static inline unsigned int xe_sleep_exponential_ms(unsigned int *sleep_period_ms, + unsigned int max_sleep_ms) +{ + unsigned int delay_ms = *sleep_period_ms; + unsigned int next_delay_ms = 2 * delay_ms; + + xe_sleep_relaxed_ms(delay_ms); + *sleep_period_ms = min(next_delay_ms, max_sleep_ms); + return delay_ms; +} + #endif From 943c4d0637cff358f7be8e71694475538e7b1ce3 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 27 Jan 2026 20:37:23 +0100 Subject: [PATCH 046/195] drm/xe/guc: Limit sleep while waiting for H2G credits Instead of endlessly increasing the sleep timeout while waiting for the H2G credits, use exponential increase only up to the given limit, like it was initially done in the GuC submission code. While here, fix the actual timeout to the 1s as it was documented. Suggested-by: Matthew Brost Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260127193727.601-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index dfbf76037b04..d4111124cd5f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -32,6 +32,7 @@ #include "xe_guc_tlb_inval.h" #include "xe_map.h" #include "xe_pm.h" +#include "xe_sleep.h" #include "xe_sriov_vf.h" #include "xe_trace_guc.h" @@ -1101,7 +1102,8 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct); */ static bool guc_ct_send_wait_for_retry(struct xe_guc_ct *ct, u32 len, u32 g2h_len, struct g2h_fence *g2h_fence, - unsigned int *sleep_period_ms) + unsigned int *sleep_period_ms, + unsigned int *sleep_total_ms) { struct xe_device *xe = ct_to_xe(ct); @@ -1115,15 +1117,14 @@ static bool guc_ct_send_wait_for_retry(struct xe_guc_ct *ct, u32 len, if (!h2g_has_room(ct, len + GUC_CTB_HDR_LEN)) { struct guc_ctb *h2g = &ct->ctbs.h2g; - if (*sleep_period_ms == 1024) + if (*sleep_total_ms > 1000) return false; trace_xe_guc_ct_h2g_flow_control(xe, h2g->info.head, h2g->info.tail, h2g->info.size, h2g->info.space, len + GUC_CTB_HDR_LEN); - msleep(*sleep_period_ms); - *sleep_period_ms <<= 1; + xe_sleep_exponential_ms(sleep_period_ms, 64); } else { struct xe_device *xe = ct_to_xe(ct); struct guc_ctb *g2h = &ct->ctbs.g2h; @@ -1161,6 +1162,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, { struct xe_gt *gt = ct_to_gt(ct); unsigned int sleep_period_ms = 1; + unsigned int sleep_total_ms = 0; int ret; xe_gt_assert(gt, !g2h_len || !g2h_fence); @@ -1173,7 +1175,7 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, if (unlikely(ret == -EBUSY)) { if (!guc_ct_send_wait_for_retry(ct, len, g2h_len, g2h_fence, - &sleep_period_ms)) + &sleep_period_ms, &sleep_total_ms)) goto broken; goto try_again; } From 09b45fd9d330e52a5b31d1abc0db4a22b16d340a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 27 Jan 2026 20:37:24 +0100 Subject: [PATCH 047/195] drm/xe/guc: Drop redundant register read The xe_mmio_wait32() already returns the last value of the register for which we were waiting, there is no need read it again. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260127193727.601-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index c0eeaf08479d..2ae5a78b52ed 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1405,11 +1405,11 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); struct xe_mmio *mmio = >->mmio; - u32 header, reply; struct xe_reg reply_reg = xe_gt_is_media_type(gt) ? MED_VF_SW_FLAG(0) : VF_SW_FLAG(0); const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1; bool lost = false; + u32 header; int ret; int i; @@ -1441,21 +1441,20 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, ret = xe_mmio_wait32(mmio, reply_reg, GUC_HXG_MSG_0_ORIGIN, FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC), - 50000, &reply, false); + 50000, &header, false); if (ret) { /* scratch registers might be cleared during FLR, try once more */ - if (!reply && !lost) { + if (!header && !lost) { xe_gt_dbg(gt, "GuC mmio request %#x: lost, trying again\n", request[0]); lost = true; goto retry; } timeout: xe_gt_err(gt, "GuC mmio request %#x: no reply %#x\n", - request[0], reply); + request[0], header); return ret; } - header = xe_mmio_read32(mmio, reply_reg); if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_BUSY) { /* From e116fd5c60c4950171fbe7773a34a5841a021b9f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 27 Jan 2026 20:37:25 +0100 Subject: [PATCH 048/195] drm/xe/guc: Wait before retrying sending H2G We shall resend H2G message after receiving NO_RESPONSE_RETRY reply, but since GuC dropped that H2G due to some interim state, we should give it a little time to stabilize. Wait before sending the same H2G again, start with 1ms delay, then increase exponentially to 256ms. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260127193727.601-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 2ae5a78b52ed..6cc778e7cb57 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -40,6 +40,7 @@ #include "xe_memirq.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_sleep.h" #include "xe_sriov.h" #include "xe_sriov_pf_migration.h" #include "xe_uc.h" @@ -1408,6 +1409,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, struct xe_reg reply_reg = xe_gt_is_media_type(gt) ? MED_VF_SW_FLAG(0) : VF_SW_FLAG(0); const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1; + unsigned int sleep_period_ms = 1; bool lost = false; u32 header; int ret; @@ -1490,6 +1492,8 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, xe_gt_dbg(gt, "GuC mmio request %#x: retrying, reason %#x\n", request[0], reason); + + xe_sleep_exponential_ms(&sleep_period_ms, 256); goto retry; } From 65b9886062137ad4d708045f2f4c92d06f285e8b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 27 Jan 2026 20:37:26 +0100 Subject: [PATCH 049/195] drm/xe/guc: Allow second H2G retry on FLR During VF FLR the scratch registers could be cleared both by the GuC and by the PF driver. Allow to retry more times once we find out that the HXG header was cleared and wait at least 256ms before resending the same message again to the GuC. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260127193727.601-7-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 6cc778e7cb57..d5910b0adbaa 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1400,6 +1400,9 @@ int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr) return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); } +#define MAX_RETRIES_ON_FLR 2 +#define MIN_SLEEP_MS_ON_FLR 256 + int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, u32 len, u32 *response_buf) { @@ -1410,7 +1413,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, MED_VF_SW_FLAG(0) : VF_SW_FLAG(0); const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1; unsigned int sleep_period_ms = 1; - bool lost = false; + unsigned int lost = 0; u32 header; int ret; int i; @@ -1446,9 +1449,14 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, 50000, &header, false); if (ret) { /* scratch registers might be cleared during FLR, try once more */ - if (!header && !lost) { + if (!header) { + if (++lost > MAX_RETRIES_ON_FLR) { + xe_gt_err(gt, "GuC mmio request %#x: lost, too many retries %u\n", + request[0], lost); + return -ENOLINK; + } xe_gt_dbg(gt, "GuC mmio request %#x: lost, trying again\n", request[0]); - lost = true; + xe_sleep_relaxed_ms(MIN_SLEEP_MS_ON_FLR); goto retry; } timeout: From fa171b805f25d774a358ad2af72da4c8444859cd Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Mon, 2 Feb 2026 11:37:55 +0100 Subject: [PATCH 050/195] drm/xe: replace use of system_unbound_wq with system_dfl_wq This patch continues the effort to refactor workqueue APIs, which has begun with the changes introducing new workqueues and a new alloc_workqueue flag: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") The point of the refactoring is to eventually alter the default behavior of workqueues to become unbound by default so that their workload placement is optimized by the scheduler. Before that to happen, workqueue users must be converted to the better named new workqueues with no intended behaviour changes: system_wq -> system_percpu_wq system_unbound_wq -> system_dfl_wq This way the old obsolete workqueues (system_wq, system_unbound_wq) can be removed in the future. Link: https://lore.kernel.org/all/20250221112003.1dSuoGyc@linutronix.de/ Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260202103756.62138-2-marco.crivellari@suse.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_devcoredump.c | 2 +- drivers/gpu/drm/xe/xe_execlist.c | 2 +- drivers/gpu/drm/xe/xe_guc_ct.c | 4 ++-- drivers/gpu/drm/xe/xe_oa.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index cf41bb6d2172..558a1a9841a0 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -356,7 +356,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, xe_engine_snapshot_capture_for_queue(q); - queue_work(system_unbound_wq, &ss->work); + queue_work(system_dfl_wq, &ss->work); dma_fence_end_signalling(cookie); } diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 005a5b2c36fe..dc25caf47813 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -421,7 +421,7 @@ static void execlist_exec_queue_kill(struct xe_exec_queue *q) static void execlist_exec_queue_destroy(struct xe_exec_queue *q) { INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async); - queue_work(system_unbound_wq, &q->execlist->destroy_async); + queue_work(system_dfl_wq, &q->execlist->destroy_async); } static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index d4111124cd5f..8a45573f8812 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -644,7 +644,7 @@ static int __xe_guc_ct_start(struct xe_guc_ct *ct, bool needs_register) spin_lock_irq(&ct->dead.lock); if (ct->dead.reason) { ct->dead.reason |= (1 << CT_DEAD_STATE_REARM); - queue_work(system_unbound_wq, &ct->dead.worker); + queue_work(system_dfl_wq, &ct->dead.worker); } spin_unlock_irq(&ct->dead.lock); #endif @@ -2167,7 +2167,7 @@ static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reaso spin_unlock_irqrestore(&ct->dead.lock, flags); - queue_work(system_unbound_wq, &(ct)->dead.worker); + queue_work(system_dfl_wq, &(ct)->dead.worker); } static void ct_dead_print(struct xe_dead_ct *dead) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index abf87fe0b345..8b37e49f639f 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -969,7 +969,7 @@ static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb) struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb); INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn); - queue_delayed_work(system_unbound_wq, &ofence->work, + queue_delayed_work(system_dfl_wq, &ofence->work, usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US)); dma_fence_put(fence); } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 293b92ed2fdd..e6cfa5dc7f62 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1112,7 +1112,7 @@ static void vma_destroy_cb(struct dma_fence *fence, struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); INIT_WORK(&vma->destroy_work, vma_destroy_work_func); - queue_work(system_unbound_wq, &vma->destroy_work); + queue_work(system_dfl_wq, &vma->destroy_work); } static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) @@ -1894,7 +1894,7 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm) struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); /* To destroy the VM we need to be able to sleep */ - queue_work(system_unbound_wq, &vm->destroy_work); + queue_work(system_dfl_wq, &vm->destroy_work); } struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) From 0bc2c2e1a388fa4d94d2fca1a40d3619f923870f Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Mon, 2 Feb 2026 11:37:56 +0100 Subject: [PATCH 051/195] drm/xe: add WQ_PERCPU to alloc_workqueue users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This continues the effort to refactor workqueue APIs, which began with the introduction of new workqueues and a new alloc_workqueue flag in: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") The refactoring is going to alter the default behavior of alloc_workqueue() to be unbound by default. With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND), any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND must now use WQ_PERCPU. For more details see the Link tag below. In order to keep alloc_workqueue() behavior identical, explicitly request WQ_PERCPU. Link: https://lore.kernel.org/all/20250221112 Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260202103756.62138-3-marco.crivellari@suse.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 4 ++-- drivers/gpu/drm/xe/xe_ggtt.c | 2 +- drivers/gpu/drm/xe/xe_hw_engine_group.c | 3 ++- drivers/gpu/drm/xe/xe_sriov.c | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 9cf82bde36c4..9e5fb0d4b8e7 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -508,8 +508,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", WQ_MEM_RECLAIM); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); - xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); - xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0); + xe->unordered_wq = alloc_workqueue("xe-unordered-wq", WQ_PERCPU, 0); + xe->destroy_wq = alloc_workqueue("xe-destroy-wq", WQ_PERCPU, 0); if (!xe->ordered_wq || !xe->unordered_wq || !xe->preempt_fence_wq || !xe->destroy_wq) { /* diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 60665ad1415b..8b9d7c0bbe90 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -367,7 +367,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) else ggtt->pt_ops = &xelp_pt_ops; - ggtt->wq = alloc_workqueue("xe-ggtt-wq", WQ_MEM_RECLAIM, 0); + ggtt->wq = alloc_workqueue("xe-ggtt-wq", WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!ggtt->wq) return -ENOMEM; diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 2ef33dfbe3a2..4c2b113364d3 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -51,7 +51,8 @@ hw_engine_group_alloc(struct xe_device *xe) if (!group) return ERR_PTR(-ENOMEM); - group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); + group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", WQ_PERCPU, + 0); if (!group->resume_wq) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index ea411944609b..f3835867fce5 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -120,7 +120,7 @@ int xe_sriov_init(struct xe_device *xe) xe_sriov_vf_init_early(xe); xe_assert(xe, !xe->sriov.wq); - xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0); + xe->sriov.wq = alloc_workqueue("xe-sriov-wq", WQ_PERCPU, 0); if (!xe->sriov.wq) return -ENOMEM; From 44f44d43f991825c856903bed01390c26c21e489 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 21 Jan 2026 22:42:12 +0100 Subject: [PATCH 052/195] drm/xe: Keep all defaults in single header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have most of Xe defaults defined in xe_module.c, where we use them for the modparam initializations, but some were defined elsewhere, which breaks the consistency. Introduce xe_defaults.h file, that will act as a placeholder for all our default values, and can be used from other places. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Acked-by: Rodrigo Vivi Link: https://patch.msgid.link/20260121214218.2817-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_defaults.h | 25 +++++++++++++++++ drivers/gpu/drm/xe/xe_device.c | 3 +- drivers/gpu/drm/xe/xe_device_types.h | 3 -- drivers/gpu/drm/xe/xe_module.c | 42 ++++++++++------------------ 4 files changed, 41 insertions(+), 32 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_defaults.h diff --git a/drivers/gpu/drm/xe/xe_defaults.h b/drivers/gpu/drm/xe/xe_defaults.h new file mode 100644 index 000000000000..9183d05b96e1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_defaults.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ +#ifndef _XE_DEFAULTS_H_ +#define _XE_DEFAULTS_H_ + +#include "xe_device_types.h" + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define XE_DEFAULT_GUC_LOG_LEVEL 3 +#else +#define XE_DEFAULT_GUC_LOG_LEVEL 1 +#endif + +#define XE_DEFAULT_PROBE_DISPLAY true +#define XE_DEFAULT_VRAM_BAR_SIZE 0 +#define XE_DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE +#define XE_DEFAULT_MAX_VFS ~0 +#define XE_DEFAULT_MAX_VFS_STR "unlimited" +#define XE_DEFAULT_WEDGED_MODE XE_WEDGED_MODE_UPON_CRITICAL_ERROR +#define XE_DEFAULT_WEDGED_MODE_STR "upon-critical-error" +#define XE_DEFAULT_SVM_NOTIFIER_SIZE 512 + +#endif diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 9e5fb0d4b8e7..055084fa50e5 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -26,6 +26,7 @@ #include "xe_bo.h" #include "xe_bo_evict.h" #include "xe_debugfs.h" +#include "xe_defaults.h" #include "xe_devcoredump.h" #include "xe_device_sysfs.h" #include "xe_dma_buf.h" @@ -743,7 +744,7 @@ int xe_device_probe_early(struct xe_device *xe) assert_lmem_ready(xe); xe->wedged.mode = xe_device_validate_wedged_mode(xe, xe_modparam.wedged_mode) ? - XE_WEDGED_MODE_DEFAULT : xe_modparam.wedged_mode; + XE_DEFAULT_WEDGED_MODE : xe_modparam.wedged_mode; drm_dbg(&xe->drm, "wedged_mode: setting mode (%u) %s\n", xe->wedged.mode, xe_wedged_mode_to_string(xe->wedged.mode)); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 34feef79fa4e..b4600c5069a9 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -62,9 +62,6 @@ enum xe_wedged_mode { XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET = 2, }; -#define XE_WEDGED_MODE_DEFAULT XE_WEDGED_MODE_UPON_CRITICAL_ERROR -#define XE_WEDGED_MODE_DEFAULT_STR "upon-critical-error" - #define XE_BO_INVALID_OFFSET LONG_MAX #define GRAPHICS_VER(xe) ((xe)->info.graphics_verx100 / 100) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index a0048f64ed12..903d3b433421 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -10,6 +10,7 @@ #include +#include "xe_defaults.h" #include "xe_device_types.h" #include "xe_drv.h" #include "xe_configfs.h" @@ -19,51 +20,36 @@ #include "xe_observation.h" #include "xe_sched_job.h" -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) -#define DEFAULT_GUC_LOG_LEVEL 3 -#else -#define DEFAULT_GUC_LOG_LEVEL 1 -#endif - -#define DEFAULT_PROBE_DISPLAY true -#define DEFAULT_VRAM_BAR_SIZE 0 -#define DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE -#define DEFAULT_MAX_VFS ~0 -#define DEFAULT_MAX_VFS_STR "unlimited" -#define DEFAULT_WEDGED_MODE XE_WEDGED_MODE_DEFAULT -#define DEFAULT_WEDGED_MODE_STR XE_WEDGED_MODE_DEFAULT_STR -#define DEFAULT_SVM_NOTIFIER_SIZE 512 - struct xe_modparam xe_modparam = { - .probe_display = DEFAULT_PROBE_DISPLAY, - .guc_log_level = DEFAULT_GUC_LOG_LEVEL, - .force_probe = DEFAULT_FORCE_PROBE, + .probe_display = XE_DEFAULT_PROBE_DISPLAY, + .guc_log_level = XE_DEFAULT_GUC_LOG_LEVEL, + .force_probe = XE_DEFAULT_FORCE_PROBE, #ifdef CONFIG_PCI_IOV - .max_vfs = DEFAULT_MAX_VFS, + .max_vfs = XE_DEFAULT_MAX_VFS, #endif - .wedged_mode = DEFAULT_WEDGED_MODE, - .svm_notifier_size = DEFAULT_SVM_NOTIFIER_SIZE, + .wedged_mode = XE_DEFAULT_WEDGED_MODE, + .svm_notifier_size = XE_DEFAULT_SVM_NOTIFIER_SIZE, /* the rest are 0 by default */ }; module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be power of 2 " - "[default=" __stringify(DEFAULT_SVM_NOTIFIER_SIZE) "]"); + "[default=" __stringify(XE_DEFAULT_SVM_NOTIFIER_SIZE) "]"); module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched " - "[default=" __stringify(DEFAULT_PROBE_DISPLAY) "])"); + "[default=" __stringify(XE_DEFAULT_PROBE_DISPLAY) "])"); module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600); MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size in MiB (<0=disable-resize, 0=max-needed-size, >0=force-size " - "[default=" __stringify(DEFAULT_VRAM_BAR_SIZE) "])"); + "[default=" __stringify(XE_DEFAULT_VRAM_BAR_SIZE) "])"); module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1=normal, 2..5=verbose-levels " - "[default=" __stringify(DEFAULT_GUC_LOG_LEVEL) "])"); + "[default=" __stringify(XE_DEFAULT_GUC_LOG_LEVEL) "])"); module_param_named_unsafe(guc_firmware_path, xe_modparam.guc_firmware_path, charp, 0400); MODULE_PARM_DESC(guc_firmware_path, @@ -80,20 +66,20 @@ MODULE_PARM_DESC(gsc_firmware_path, module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details " - "[default=" DEFAULT_FORCE_PROBE "])"); + "[default=" XE_DEFAULT_FORCE_PROBE "])"); #ifdef CONFIG_PCI_IOV module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400); MODULE_PARM_DESC(max_vfs, "Limit number of Virtual Functions (VFs) that could be managed. " "(0=no VFs; N=allow up to N VFs " - "[default=" DEFAULT_MAX_VFS_STR "])"); + "[default=" XE_DEFAULT_MAX_VFS_STR "])"); #endif module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, uint, 0600); MODULE_PARM_DESC(wedged_mode, "Module's default policy for the wedged mode (0=never, 1=upon-critical-error, 2=upon-any-hang-no-reset " - "[default=" DEFAULT_WEDGED_MODE_STR "])"); + "[default=" XE_DEFAULT_WEDGED_MODE_STR "])"); static int xe_check_nomodeset(void) { From 56dfa9fc39f0d03ca165f84382d088825f5e5404 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 21 Jan 2026 22:42:13 +0100 Subject: [PATCH 053/195] drm/xe/configfs: Use proper notation for local include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For local includes we should use "" notation, not <>. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Acked-by: Rodrigo Vivi Link: https://patch.msgid.link/20260121214218.2817-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_configfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h index fed57be0b90e..510888354a7c 100644 --- a/drivers/gpu/drm/xe/xe_configfs.h +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -8,7 +8,7 @@ #include #include -#include +#include "xe_hw_engine_types.h" struct pci_dev; From 0dfc7306b9600127afce5dbcb53389ca72237031 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 21 Jan 2026 22:42:14 +0100 Subject: [PATCH 054/195] drm/xe/configfs: Always return consistent max_vfs value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The max_vfs parameter used by the Xe driver has its default value definition, but it could be altered by the module parameter or by the device specific configfs attribute. To avoid mistakes or code duplication, always rely on the configfs helper (or stub), which will provide necessary fallback if needed. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Acked-by: Rodrigo Vivi Link: https://patch.msgid.link/20260121214218.2817-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_configfs.c | 3 ++- drivers/gpu/drm/xe/xe_configfs.h | 8 +++++++- drivers/gpu/drm/xe/xe_sriov_pf.c | 4 +--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 82edd0466005..5a54ca67d3dc 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -15,6 +15,7 @@ #include "instructions/xe_mi_commands.h" #include "xe_configfs.h" +#include "xe_defaults.h" #include "xe_gt_types.h" #include "xe_hw_engine_types.h" #include "xe_module.h" @@ -280,7 +281,7 @@ static const struct xe_config_device device_defaults = { .survivability_mode = false, .enable_psmi = false, .sriov = { - .max_vfs = UINT_MAX, + .max_vfs = XE_DEFAULT_MAX_VFS, }, }; diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h index 510888354a7c..e0a555b871b3 100644 --- a/drivers/gpu/drm/xe/xe_configfs.h +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -9,6 +9,7 @@ #include #include "xe_hw_engine_types.h" +#include "xe_module.h" struct pci_dev; @@ -41,7 +42,12 @@ static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum const u32 **cs) { return 0; } static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, const u32 **cs) { return 0; } -static inline unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev) { return UINT_MAX; } +#ifdef CONFIG_PCI_IOV +static inline unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev) +{ + return xe_modparam.max_vfs; +} +#endif #endif #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 6ce3c58e003c..33bd754d138f 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -22,9 +22,7 @@ static unsigned int wanted_max_vfs(struct xe_device *xe) { - if (IS_ENABLED(CONFIG_CONFIGFS_FS)) - return xe_configfs_get_max_vfs(to_pci_dev(xe->drm.dev)); - return xe_modparam.max_vfs; + return xe_configfs_get_max_vfs(to_pci_dev(xe->drm.dev)); } static int pf_reduce_totalvfs(struct xe_device *xe, int limit) From 10f817c256d7d5fd2680ecdb4ab52c5506fe90b5 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 27 Jan 2026 22:05:01 +0100 Subject: [PATCH 055/195] drm/xe/pf: Define admin_only as real flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of doing guesses each time during the runtime, set flag admin_only once during PF's initialization. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Acked-by: Rodrigo Vivi Link: https://patch.msgid.link/20260127210501.794-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c | 2 +- drivers/gpu/drm/xe/xe_sriov_pf.c | 6 ++++++ drivers/gpu/drm/xe/xe_sriov_pf_helpers.h | 3 ++- drivers/gpu/drm/xe/xe_sriov_pf_types.h | 3 +++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c index 42bfc4bcfbcf..3889dc3e49ca 100644 --- a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c +++ b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c @@ -15,7 +15,7 @@ static void pf_set_admin_mode(struct xe_device *xe, bool enable) { /* should match logic of xe_sriov_pf_admin_only() */ - xe->info.probe_display = !enable; + xe->sriov.pf.admin_only = enable; KUNIT_EXPECT_EQ(kunit_get_current_test(), enable, xe_sriov_pf_admin_only(xe)); } diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 33bd754d138f..919f176a19eb 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -20,6 +20,11 @@ #include "xe_sriov_pf_sysfs.h" #include "xe_sriov_printk.h" +static bool wanted_admin_only(struct xe_device *xe) +{ + return !xe->info.probe_display; +} + static unsigned int wanted_max_vfs(struct xe_device *xe) { return xe_configfs_get_max_vfs(to_pci_dev(xe->drm.dev)); @@ -74,6 +79,7 @@ bool xe_sriov_pf_readiness(struct xe_device *xe) pf_reduce_totalvfs(xe, newlimit); + xe->sriov.pf.admin_only = wanted_admin_only(xe); xe->sriov.pf.device_total_vfs = totalvfs; xe->sriov.pf.driver_max_vfs = newlimit; diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h index 9054fdc34597..0fcc6cec4afc 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h @@ -56,7 +56,8 @@ static inline unsigned int xe_sriov_pf_num_vfs(const struct xe_device *xe) */ static inline bool xe_sriov_pf_admin_only(const struct xe_device *xe) { - return !xe->info.probe_display; + xe_assert(xe, IS_SRIOV_PF(xe)); + return xe->sriov.pf.admin_only; } static inline struct mutex *xe_sriov_pf_master_mutex(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h index b0253e1ae5da..080cf10512f4 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -36,6 +36,9 @@ struct xe_sriov_metadata { * @XE_SRIOV_MODE_PF mode. */ struct xe_device_pf { + /** @admin_only: PF functionality focused on VFs management only. */ + bool admin_only; + /** @device_total_vfs: Maximum number of VFs supported by the device. */ u16 device_total_vfs; From 34ef561a0d497dd86dd4f511a86c7481ddf29aeb Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 21 Jan 2026 22:42:16 +0100 Subject: [PATCH 056/195] drm/xe/configfs: Add sriov.admin_only_pf attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of relying on fixed relation to the display probe flag, add configfs attribute to allow an administrator to configure desired PF operation mode in a more flexible way. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Acked-by: Rodrigo Vivi Link: https://patch.msgid.link/20260121214218.2817-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_configfs.c | 61 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_configfs.h | 6 ++++ drivers/gpu/drm/xe/xe_defaults.h | 1 + drivers/gpu/drm/xe/xe_sriov_pf.c | 2 +- 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 5a54ca67d3dc..d8c3fbe81aa6 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -264,6 +264,7 @@ struct xe_config_group_device { bool enable_psmi; struct { unsigned int max_vfs; + bool admin_only_pf; } sriov; } config; @@ -282,6 +283,7 @@ static const struct xe_config_device device_defaults = { .enable_psmi = false, .sriov = { .max_vfs = XE_DEFAULT_MAX_VFS, + .admin_only_pf = XE_DEFAULT_ADMIN_ONLY_PF, }, }; @@ -897,10 +899,40 @@ static ssize_t sriov_max_vfs_store(struct config_item *item, const char *page, s return len; } +static ssize_t sriov_admin_only_pf_show(struct config_item *item, char *page) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent); + + guard(mutex)(&dev->lock); + + return sprintf(page, "%s\n", str_yes_no(dev->config.sriov.admin_only_pf)); +} + +static ssize_t sriov_admin_only_pf_store(struct config_item *item, const char *page, size_t len) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent); + bool admin_only_pf; + int ret; + + guard(mutex)(&dev->lock); + + if (is_bound(dev)) + return -EBUSY; + + ret = kstrtobool(page, &admin_only_pf); + if (ret) + return ret; + + dev->config.sriov.admin_only_pf = admin_only_pf; + return len; +} + CONFIGFS_ATTR(sriov_, max_vfs); +CONFIGFS_ATTR(sriov_, admin_only_pf); static struct configfs_attribute *xe_config_sriov_attrs[] = { &sriov_attr_max_vfs, + &sriov_attr_admin_only_pf, NULL, }; @@ -911,6 +943,8 @@ static bool xe_config_sriov_is_visible(struct config_item *item, if (attr == &sriov_attr_max_vfs && dev->mode != XE_SRIOV_MODE_PF) return false; + if (attr == &sriov_attr_admin_only_pf && dev->mode != XE_SRIOV_MODE_PF) + return false; return true; } @@ -1064,6 +1098,7 @@ static void dump_custom_dev_config(struct pci_dev *pdev, PRI_CUSTOM_ATTR("%llx", engines_allowed); PRI_CUSTOM_ATTR("%d", enable_psmi); PRI_CUSTOM_ATTR("%d", survivability_mode); + PRI_CUSTOM_ATTR("%u", sriov.admin_only_pf); #undef PRI_CUSTOM_ATTR } @@ -1242,6 +1277,32 @@ u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, } #ifdef CONFIG_PCI_IOV +/** + * xe_configfs_admin_only_pf() - Get PF's operational mode. + * @pdev: the &pci_dev device + * + * Find the configfs group that belongs to the PCI device and return a flag + * whether the PF driver should be dedicated for VFs management only. + * + * If configfs group is not present, use driver's default value. + * + * Return: true if PF driver is dedicated for VFs administration only. + */ +bool xe_configfs_admin_only_pf(struct pci_dev *pdev) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + bool admin_only_pf; + + if (!dev) + return XE_DEFAULT_ADMIN_ONLY_PF; + + scoped_guard(mutex, &dev->lock) + admin_only_pf = dev->config.sriov.admin_only_pf; + + config_group_put(&dev->group); + + return admin_only_pf; +} /** * xe_configfs_get_max_vfs() - Get number of VFs that could be managed * @pdev: the &pci_dev device diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h index e0a555b871b3..487531269511 100644 --- a/drivers/gpu/drm/xe/xe_configfs.h +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -8,6 +8,7 @@ #include #include +#include "xe_defaults.h" #include "xe_hw_engine_types.h" #include "xe_module.h" @@ -28,6 +29,7 @@ u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_cla const u32 **cs); #ifdef CONFIG_PCI_IOV unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev); +bool xe_configfs_admin_only_pf(struct pci_dev *pdev); #endif #else static inline int xe_configfs_init(void) { return 0; } @@ -47,6 +49,10 @@ static inline unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev) { return xe_modparam.max_vfs; } +static inline bool xe_configfs_admin_only_pf(struct pci_dev *pdev) +{ + return XE_DEFAULT_ADMIN_ONLY_PF; +} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_defaults.h b/drivers/gpu/drm/xe/xe_defaults.h index 9183d05b96e1..5d5d41d067c5 100644 --- a/drivers/gpu/drm/xe/xe_defaults.h +++ b/drivers/gpu/drm/xe/xe_defaults.h @@ -18,6 +18,7 @@ #define XE_DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE #define XE_DEFAULT_MAX_VFS ~0 #define XE_DEFAULT_MAX_VFS_STR "unlimited" +#define XE_DEFAULT_ADMIN_ONLY_PF false #define XE_DEFAULT_WEDGED_MODE XE_WEDGED_MODE_UPON_CRITICAL_ERROR #define XE_DEFAULT_WEDGED_MODE_STR "upon-critical-error" #define XE_DEFAULT_SVM_NOTIFIER_SIZE 512 diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 919f176a19eb..47a6e0fd66e0 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -22,7 +22,7 @@ static bool wanted_admin_only(struct xe_device *xe) { - return !xe->info.probe_display; + return xe_configfs_admin_only_pf(to_pci_dev(xe->drm.dev)); } static unsigned int wanted_max_vfs(struct xe_device *xe) From 39125eaf8863ab09d70c4b493f58639b08d5a897 Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Fri, 23 Jan 2026 23:02:38 +0530 Subject: [PATCH 057/195] drm/xe/pm: Disable D3Cold for BMG only on specific platforms Restrict D3Cold disablement for BMG to unsupported NUC platforms, instead of disabling it on all platforms. Signed-off-by: Karthik Poosa Fixes: 3e331a6715ee ("drm/xe/pm: Temporarily disable D3Cold on BMG") Link: https://patch.msgid.link/20260123173238.1642383-1-karthik.poosa@intel.com Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 559cf5490ac0..01185f10a883 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -366,9 +367,15 @@ ALLOW_ERROR_INJECTION(xe_pm_init_early, ERRNO); /* See xe_pci_probe() */ static u32 vram_threshold_value(struct xe_device *xe) { - /* FIXME: D3Cold temporarily disabled by default on BMG */ - if (xe->info.platform == XE_BATTLEMAGE) - return 0; + if (xe->info.platform == XE_BATTLEMAGE) { + const char *product_name; + + product_name = dmi_get_system_info(DMI_PRODUCT_NAME); + if (product_name && strstr(product_name, "NUC13RNG")) { + drm_warn(&xe->drm, "BMG + D3Cold not supported on this platform\n"); + return 0; + } + } return DEFAULT_VRAM_THRESHOLD; } From 556dba95473900073a6c03121361c11f646dc551 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 30 Jan 2026 11:49:27 -0800 Subject: [PATCH 058/195] drm/gpusvm: Force unmapping on error in drm_gpusvm_get_pages drm_gpusvm_get_pages() only sets the local flags prior to committing the pages. If an error occurs mid-mapping, has_dma_mapping will be clear, causing the unmap function to skip unmapping pages that were successfully mapped before the error. Fix this by forcibly setting has_dma_mapping in the error path to ensure all previously mapped pages are properly unmapped. Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Francois Dugast Link: https://patch.msgid.link/20260130194928.3255613-2-matthew.brost@intel.com --- drivers/gpu/drm/drm_gpusvm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index aa9a0b60e727..d733599ceb9a 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1572,6 +1572,7 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, return 0; err_unmap: + svm_pages->flags.has_dma_mapping = true; __drm_gpusvm_unmap_pages(gpusvm, svm_pages, num_dma_mapped); drm_gpusvm_notifier_unlock(gpusvm); err_free: From ec49857ad181f2a68a3bea15422f2936ff366d47 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 30 Jan 2026 11:49:28 -0800 Subject: [PATCH 059/195] drm/gpusvm: Allow device pages to be mapped in mixed mappings after system pages The current code rejects device mappings whenever system pages have already been encountered. This is not the intended behavior when allow_mixed is set. Relax the restriction by permitting a single pagemap to be selected when allow_mixed is enabled, even if system pages were found earlier. Fixes: bce13d6ecd6c ("drm/gpusvm, drm/xe: Allow mixed mappings for userptr") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Francois Dugast Link: https://patch.msgid.link/20260130194928.3255613-3-matthew.brost@intel.com --- drivers/gpu/drm/drm_gpusvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index d733599ceb9a..871fcccd128a 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1495,7 +1495,7 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, } zdd = page->zone_device_data; if (pagemap != page_pgmap(page)) { - if (i > 0) { + if (pagemap) { err = -EOPNOTSUPP; goto err_unmap; } From f27e64422054ff78566316509120f7ee2ecc7660 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 4 Feb 2026 11:10:26 -0800 Subject: [PATCH 060/195] drm/xe: Drop unnecessary goto in xe_device_create The error label in this function just does an immediate return without any further cleanup or processing. Replace the goto statements with returns. Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260204191025.3957211-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_device.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 055084fa50e5..743c18e0c580 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -456,16 +456,16 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->drm.anon_inode->i_mapping, xe->drm.vma_offset_manager, 0); if (WARN_ON(err)) - goto err; + return ERR_PTR(err); xe_bo_dev_init(&xe->bo_device); err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL); if (err) - goto err; + return ERR_PTR(err); err = xe_shrinker_create(xe); if (err) - goto err; + return ERR_PTR(err); xe->info.devid = pdev->device; xe->info.revid = pdev->revision; @@ -475,7 +475,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, err = xe_irq_init(xe); if (err) - goto err; + return ERR_PTR(err); xe_validation_device_init(&xe->val); @@ -485,7 +485,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, err = xe_pagemap_shrinker_create(xe); if (err) - goto err; + return ERR_PTR(err); xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); @@ -504,7 +504,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, err = xe_bo_pinned_init(xe); if (err) - goto err; + return ERR_PTR(err); xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", WQ_MEM_RECLAIM); @@ -518,18 +518,14 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, * drmm_add_action_or_reset register above */ drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); - err = -ENOMEM; - goto err; + return ERR_PTR(-ENOMEM); } err = drmm_mutex_init(&xe->drm, &xe->pmt.lock); if (err) - goto err; + return ERR_PTR(err); return xe; - -err: - return ERR_PTR(err); } ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */ From 98b16727f07e26a5d4de84d88805ce7ffcfdd324 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 4 Feb 2026 00:53:32 +0100 Subject: [PATCH 061/195] drm/xe/pf: Fix sysfs initialization In case of devm_add_action_or_reset() failure the provided cleanup action will be run immediately on the not yet initialized kobject. This may lead to errors like: [ ] kobject: '(null)' (ff110001393608e0): is not initialized, yet kobject_put() is being called. [ ] WARNING: lib/kobject.c:734 at kobject_put+0xd9/0x250, CPU#0: kworker/0:0/9 [ ] RIP: 0010:kobject_put+0xdf/0x250 [ ] Call Trace: [ ] xe_sriov_pf_sysfs_init+0x21/0x100 [xe] [ ] xe_sriov_pf_init_late+0x87/0x2b0 [xe] [ ] xe_sriov_init_late+0x5f/0x2c0 [xe] [ ] xe_device_probe+0x5f2/0xc20 [xe] [ ] xe_pci_probe+0x396/0x610 [xe] [ ] local_pci_probe+0x47/0xb0 [ ] refcount_t: underflow; use-after-free. [ ] WARNING: lib/refcount.c:28 at refcount_warn_saturate+0x68/0xb0, CPU#0: kworker/0:0/9 [ ] RIP: 0010:refcount_warn_saturate+0x68/0xb0 [ ] Call Trace: [ ] kobject_put+0x174/0x250 [ ] xe_sriov_pf_sysfs_init+0x21/0x100 [xe] [ ] xe_sriov_pf_init_late+0x87/0x2b0 [xe] [ ] xe_sriov_init_late+0x5f/0x2c0 [xe] [ ] xe_device_probe+0x5f2/0xc20 [xe] [ ] xe_pci_probe+0x396/0x610 [xe] [ ] local_pci_probe+0x47/0xb0 Fix that by calling kobject_init() and kobject_add() separately and register cleanup action after the kobject is initialized. Also make this cleanup registration a part of the create helper to fix another mistake, as in the loop we were wrongly passing parent kobject while registering cleanup action, and this resulted in some undetected leaks. Fixes: 5c170a4d9c53 ("drm/xe/pf: Prepare sysfs for SR-IOV admin attributes") Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260203235332.1350-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c | 54 +++++++++++++------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c index 3d140506ba36..82a1055985ba 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c @@ -349,18 +349,33 @@ static const struct attribute_group *xe_sriov_vf_attr_groups[] = { /* no user serviceable parts below */ -static struct kobject *create_xe_sriov_kobj(struct xe_device *xe, unsigned int vfid) +static void action_put_kobject(void *arg) +{ + struct kobject *kobj = arg; + + kobject_put(kobj); +} + +static struct kobject *create_xe_sriov_kobj(struct xe_device *xe, unsigned int vfid, + const struct kobj_type *ktype) { struct xe_sriov_kobj *vkobj; + int err; xe_sriov_pf_assert_vfid(xe, vfid); vkobj = kzalloc(sizeof(*vkobj), GFP_KERNEL); if (!vkobj) - return NULL; + return ERR_PTR(-ENOMEM); vkobj->xe = xe; vkobj->vfid = vfid; + kobject_init(&vkobj->base, ktype); + + err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, &vkobj->base); + if (err) + return ERR_PTR(err); + return &vkobj->base; } @@ -463,28 +478,17 @@ static void pf_sysfs_note(struct xe_device *xe, int err, const char *what) xe_sriov_dbg(xe, "Failed to setup sysfs %s (%pe)\n", what, ERR_PTR(err)); } -static void action_put_kobject(void *arg) -{ - struct kobject *kobj = arg; - - kobject_put(kobj); -} - static int pf_setup_root(struct xe_device *xe) { struct kobject *parent = &xe->drm.dev->kobj; struct kobject *root; int err; - root = create_xe_sriov_kobj(xe, PFID); - if (!root) - return pf_sysfs_error(xe, -ENOMEM, "root obj"); + root = create_xe_sriov_kobj(xe, PFID, &xe_sriov_dev_ktype); + if (IS_ERR(root)) + return pf_sysfs_error(xe, PTR_ERR(root), "root obj"); - err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, root); - if (err) - return pf_sysfs_error(xe, err, "root action"); - - err = kobject_init_and_add(root, &xe_sriov_dev_ktype, parent, "sriov_admin"); + err = kobject_add(root, parent, "sriov_admin"); if (err) return pf_sysfs_error(xe, err, "root init"); @@ -505,20 +509,14 @@ static int pf_setup_tree(struct xe_device *xe) root = xe->sriov.pf.sysfs.root; for (n = 0; n <= totalvfs; n++) { - kobj = create_xe_sriov_kobj(xe, VFID(n)); - if (!kobj) - return pf_sysfs_error(xe, -ENOMEM, "tree obj"); - - err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, root); - if (err) - return pf_sysfs_error(xe, err, "tree action"); + kobj = create_xe_sriov_kobj(xe, VFID(n), &xe_sriov_vf_ktype); + if (IS_ERR(kobj)) + return pf_sysfs_error(xe, PTR_ERR(kobj), "tree obj"); if (n) - err = kobject_init_and_add(kobj, &xe_sriov_vf_ktype, - root, "vf%u", n); + err = kobject_add(kobj, root, "vf%u", n); else - err = kobject_init_and_add(kobj, &xe_sriov_vf_ktype, - root, "pf"); + err = kobject_add(kobj, root, "pf"); if (err) return pf_sysfs_error(xe, err, "tree init"); From 8965e00883cc4d66749e1aa89322588c860077cd Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 3 Feb 2026 22:12:36 +0100 Subject: [PATCH 062/195] drm/xe: Move xe_root_tile_mmio() to xe_device.h It seems to be a better place for this helper function, where we already have other 'root' oriented helpers. Signed-off-by: Michal Wajdeczko Reviewed-by: Stuart Summers Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260203211240.745-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_device.h | 5 +++++ drivers/gpu/drm/xe/xe_i2c.c | 2 +- drivers/gpu/drm/xe/xe_mmio.h | 5 ----- drivers/gpu/drm/xe/xe_nvm.c | 2 +- drivers/gpu/drm/xe/xe_soc_remapper.c | 1 + 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 58d7d8b2fea3..39464650533b 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -109,6 +109,11 @@ static inline struct xe_gt *xe_root_mmio_gt(struct xe_device *xe) return xe_device_get_root_tile(xe)->primary_gt; } +static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe) +{ + return &xe->tiles[0].mmio; +} + static inline bool xe_device_uc_enabled(struct xe_device *xe) { return !xe->info.force_execlist; diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index 1e1fb72e49bf..1deb812fe01d 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -27,7 +27,7 @@ #include "regs/xe_i2c_regs.h" #include "regs/xe_irq_regs.h" -#include "xe_device_types.h" +#include "xe_device.h" #include "xe_i2c.h" #include "xe_mmio.h" #include "xe_sriov.h" diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 15362789ab99..6a8faebb2c29 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -37,11 +37,6 @@ static inline u32 xe_mmio_adjusted_addr(const struct xe_mmio *mmio, u32 addr) return addr; } -static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe) -{ - return &xe->tiles[0].mmio; -} - #ifdef CONFIG_PCI_IOV void xe_mmio_init_vf_view(struct xe_mmio *mmio, const struct xe_mmio *base, unsigned int vfid); #endif diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index bc88804de514..1fdfb690ea3d 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -6,7 +6,7 @@ #include #include -#include "xe_device_types.h" +#include "xe_device.h" #include "xe_mmio.h" #include "xe_nvm.h" #include "xe_pcode_api.h" diff --git a/drivers/gpu/drm/xe/xe_soc_remapper.c b/drivers/gpu/drm/xe/xe_soc_remapper.c index 1c391d719196..c031336a6d75 100644 --- a/drivers/gpu/drm/xe/xe_soc_remapper.c +++ b/drivers/gpu/drm/xe/xe_soc_remapper.c @@ -4,6 +4,7 @@ */ #include "regs/xe_soc_remapper_regs.h" +#include "xe_device.h" #include "xe_mmio.h" #include "xe_soc_remapper.h" From ed61c18617b1d95a9e0d4917749f28776eccefb7 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 3 Feb 2026 22:12:37 +0100 Subject: [PATCH 063/195] drm/xe: Promote struct xe_mmio definition to own file We already have separate .c and .h files for xe_mmio functions, time to introduce _types.h to follow what other components do. Signed-off-by: Michal Wajdeczko Reviewed-by: Stuart Summers #v1 Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260203211240.745-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_device_types.h | 40 +---------------- drivers/gpu/drm/xe/xe_gt_types.h | 5 --- drivers/gpu/drm/xe/xe_mmio.h | 2 +- drivers/gpu/drm/xe/xe_mmio_types.h | 64 ++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 45 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_mmio_types.h diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index b4600c5069a9..2ea931c1550a 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -18,6 +18,7 @@ #include "xe_lmtt_types.h" #include "xe_memirq_types.h" #include "xe_mert.h" +#include "xe_mmio_types.h" #include "xe_oa_types.h" #include "xe_pagefault_types.h" #include "xe_platform_types.h" @@ -100,45 +101,6 @@ enum xe_wedged_mode { const struct xe_tile * : (const struct xe_device *)((tile__)->xe), \ struct xe_tile * : (tile__)->xe) -/** - * struct xe_mmio - register mmio structure - * - * Represents an MMIO region that the CPU may use to access registers. A - * region may share its IO map with other regions (e.g., all GTs within a - * tile share the same map with their parent tile, but represent different - * subregions of the overall IO space). - */ -struct xe_mmio { - /** @tile: Backpointer to tile, used for tracing */ - struct xe_tile *tile; - - /** @regs: Map used to access registers. */ - void __iomem *regs; - - /** - * @sriov_vf_gt: Backpointer to GT. - * - * This pointer is only set for GT MMIO regions and only when running - * as an SRIOV VF structure - */ - struct xe_gt *sriov_vf_gt; - - /** - * @regs_size: Length of the register region within the map. - * - * The size of the iomap set in *regs is generally larger than the - * register mmio space since it includes unused regions and/or - * non-register regions such as the GGTT PTEs. - */ - size_t regs_size; - - /** @adj_limit: adjust MMIO address if address is below this value */ - u32 adj_limit; - - /** @adj_offset: offset to add to MMIO address when adjusting */ - u32 adj_offset; -}; - /** * struct xe_tile - hardware tile structure * diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 5318d92fd473..1d7360b56ac6 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -45,11 +45,6 @@ typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)]; typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)]; typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_MASK_BITS)]; -struct xe_mmio_range { - u32 start; - u32 end; -}; - /* * The hardware has multiple kinds of multicast register ranges that need * special register steering (and future platforms are expected to add diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 6a8faebb2c29..41ae720acbc3 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -6,7 +6,7 @@ #ifndef _XE_MMIO_H_ #define _XE_MMIO_H_ -#include "xe_gt_types.h" +#include "xe_mmio_types.h" struct xe_device; struct xe_reg; diff --git a/drivers/gpu/drm/xe/xe_mmio_types.h b/drivers/gpu/drm/xe/xe_mmio_types.h new file mode 100644 index 000000000000..99e8f269eaf2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio_types.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022-2026 Intel Corporation + */ + +#ifndef _XE_MMIO_TYPES_H_ +#define _XE_MMIO_TYPES_H_ + +#include + +struct xe_gt; +struct xe_tile; + +/** + * struct xe_mmio - register mmio structure + * + * Represents an MMIO region that the CPU may use to access registers. A + * region may share its IO map with other regions (e.g., all GTs within a + * tile share the same map with their parent tile, but represent different + * subregions of the overall IO space). + */ +struct xe_mmio { + /** @tile: Backpointer to tile, used for tracing */ + struct xe_tile *tile; + + /** @regs: Map used to access registers. */ + void __iomem *regs; + + /** + * @sriov_vf_gt: Backpointer to GT. + * + * This pointer is only set for GT MMIO regions and only when running + * as an SRIOV VF structure + */ + struct xe_gt *sriov_vf_gt; + + /** + * @regs_size: Length of the register region within the map. + * + * The size of the iomap set in *regs is generally larger than the + * register mmio space since it includes unused regions and/or + * non-register regions such as the GGTT PTEs. + */ + size_t regs_size; + + /** @adj_limit: adjust MMIO address if address is below this value */ + u32 adj_limit; + + /** @adj_offset: offset to add to MMIO address when adjusting */ + u32 adj_offset; +}; + +/** + * struct xe_mmio_range - register range structure + * + * @start: first register offset in the range. + * @end: last register offset in the range. + */ +struct xe_mmio_range { + u32 start; + u32 end; +}; + +#endif From e7002e0eb438450822212bb36d16ef4318cde78a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 3 Feb 2026 22:12:38 +0100 Subject: [PATCH 064/195] drm/xe: Promote struct xe_tile definition to own file We already have separate .c and .h files for xe_tile functions, time to introduce _types.h to follow what other components do. Signed-off-by: Michal Wajdeczko Cc: Matt Roper Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260203211240.745-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_device_types.h | 129 +----------------------- drivers/gpu/drm/xe/xe_tile_types.h | 141 +++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 128 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_tile_types.h diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 2ea931c1550a..14bf2c027f89 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -15,10 +15,6 @@ #include "xe_devcoredump_types.h" #include "xe_heci_gsc.h" #include "xe_late_bind_fw_types.h" -#include "xe_lmtt_types.h" -#include "xe_memirq_types.h" -#include "xe_mert.h" -#include "xe_mmio_types.h" #include "xe_oa_types.h" #include "xe_pagefault_types.h" #include "xe_platform_types.h" @@ -30,7 +26,7 @@ #include "xe_sriov_vf_ccs_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" -#include "xe_tile_sriov_vf_types.h" +#include "xe_tile_types.h" #include "xe_validation.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) @@ -96,129 +92,6 @@ enum xe_wedged_mode { (_xe)->info.step.graphics >= (min_step) && \ (_xe)->info.step.graphics < (max_step)) -#define tile_to_xe(tile__) \ - _Generic(tile__, \ - const struct xe_tile * : (const struct xe_device *)((tile__)->xe), \ - struct xe_tile * : (tile__)->xe) - -/** - * struct xe_tile - hardware tile structure - * - * From a driver perspective, a "tile" is effectively a complete GPU, containing - * an SGunit, 1-2 GTs, and (for discrete platforms) VRAM. - * - * Multi-tile platforms effectively bundle multiple GPUs behind a single PCI - * device and designate one "root" tile as being responsible for external PCI - * communication. PCI BAR0 exposes the GGTT and MMIO register space for each - * tile in a stacked layout, and PCI BAR2 exposes the local memory associated - * with each tile similarly. Device-wide interrupts can be enabled/disabled - * at the root tile, and the MSTR_TILE_INTR register will report which tiles - * have interrupts that need servicing. - */ -struct xe_tile { - /** @xe: Backpointer to tile's PCI device */ - struct xe_device *xe; - - /** @id: ID of the tile */ - u8 id; - - /** - * @primary_gt: Primary GT - */ - struct xe_gt *primary_gt; - - /** - * @media_gt: Media GT - * - * Only present on devices with media version >= 13. - */ - struct xe_gt *media_gt; - - /** - * @mmio: MMIO info for a tile. - * - * Each tile has its own 16MB space in BAR0, laid out as: - * * 0-4MB: registers - * * 4MB-8MB: reserved - * * 8MB-16MB: global GTT - */ - struct xe_mmio mmio; - - /** @mem: memory management info for tile */ - struct { - /** - * @mem.kernel_vram: kernel-dedicated VRAM info for tile. - * - * Although VRAM is associated with a specific tile, it can - * still be accessed by all tiles' GTs. - */ - struct xe_vram_region *kernel_vram; - - /** - * @mem.vram: general purpose VRAM info for tile. - * - * Although VRAM is associated with a specific tile, it can - * still be accessed by all tiles' GTs. - */ - struct xe_vram_region *vram; - - /** @mem.ggtt: Global graphics translation table */ - struct xe_ggtt *ggtt; - - /** - * @mem.kernel_bb_pool: Pool from which batchbuffers are allocated. - * - * Media GT shares a pool with its primary GT. - */ - struct xe_sa_manager *kernel_bb_pool; - - /** - * @mem.reclaim_pool: Pool for PRLs allocated. - * - * Only main GT has page reclaim list allocations. - */ - struct xe_sa_manager *reclaim_pool; - } mem; - - /** @sriov: tile level virtualization data */ - union { - struct { - /** @sriov.pf.lmtt: Local Memory Translation Table. */ - struct xe_lmtt lmtt; - } pf; - struct { - /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ - struct xe_ggtt_node *ggtt_balloon[2]; - /** @sriov.vf.self_config: VF configuration data */ - struct xe_tile_sriov_vf_selfconfig self_config; - } vf; - } sriov; - - /** @memirq: Memory Based Interrupts. */ - struct xe_memirq memirq; - - /** @csc_hw_error_work: worker to report CSC HW errors */ - struct work_struct csc_hw_error_work; - - /** @pcode: tile's PCODE */ - struct { - /** @pcode.lock: protecting tile's PCODE mailbox data */ - struct mutex lock; - } pcode; - - /** @migrate: Migration helper for vram blits and clearing */ - struct xe_migrate *migrate; - - /** @sysfs: sysfs' kobj used by xe_tile_sysfs */ - struct kobject *sysfs; - - /** @debugfs: debugfs directory associated with this tile */ - struct dentry *debugfs; - - /** @mert: MERT-related data */ - struct xe_mert mert; -}; - /** * struct xe_device - Top level struct of Xe device */ diff --git a/drivers/gpu/drm/xe/xe_tile_types.h b/drivers/gpu/drm/xe/xe_tile_types.h new file mode 100644 index 000000000000..33932fd547d7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_types.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022-2026 Intel Corporation + */ + +#ifndef _XE_TILE_TYPES_H_ +#define _XE_TILE_TYPES_H_ + +#include +#include + +#include "xe_lmtt_types.h" +#include "xe_memirq_types.h" +#include "xe_mert.h" +#include "xe_mmio_types.h" +#include "xe_tile_sriov_vf_types.h" + +#define tile_to_xe(tile__) \ + _Generic(tile__, \ + const struct xe_tile * : (const struct xe_device *)((tile__)->xe), \ + struct xe_tile * : (tile__)->xe) + +/** + * struct xe_tile - hardware tile structure + * + * From a driver perspective, a "tile" is effectively a complete GPU, containing + * an SGunit, 1-2 GTs, and (for discrete platforms) VRAM. + * + * Multi-tile platforms effectively bundle multiple GPUs behind a single PCI + * device and designate one "root" tile as being responsible for external PCI + * communication. PCI BAR0 exposes the GGTT and MMIO register space for each + * tile in a stacked layout, and PCI BAR2 exposes the local memory associated + * with each tile similarly. Device-wide interrupts can be enabled/disabled + * at the root tile, and the MSTR_TILE_INTR register will report which tiles + * have interrupts that need servicing. + */ +struct xe_tile { + /** @xe: Backpointer to tile's PCI device */ + struct xe_device *xe; + + /** @id: ID of the tile */ + u8 id; + + /** + * @primary_gt: Primary GT + */ + struct xe_gt *primary_gt; + + /** + * @media_gt: Media GT + * + * Only present on devices with media version >= 13. + */ + struct xe_gt *media_gt; + + /** + * @mmio: MMIO info for a tile. + * + * Each tile has its own 16MB space in BAR0, laid out as: + * * 0-4MB: registers + * * 4MB-8MB: reserved + * * 8MB-16MB: global GTT + */ + struct xe_mmio mmio; + + /** @mem: memory management info for tile */ + struct { + /** + * @mem.kernel_vram: kernel-dedicated VRAM info for tile. + * + * Although VRAM is associated with a specific tile, it can + * still be accessed by all tiles' GTs. + */ + struct xe_vram_region *kernel_vram; + + /** + * @mem.vram: general purpose VRAM info for tile. + * + * Although VRAM is associated with a specific tile, it can + * still be accessed by all tiles' GTs. + */ + struct xe_vram_region *vram; + + /** @mem.ggtt: Global graphics translation table */ + struct xe_ggtt *ggtt; + + /** + * @mem.kernel_bb_pool: Pool from which batchbuffers are allocated. + * + * Media GT shares a pool with its primary GT. + */ + struct xe_sa_manager *kernel_bb_pool; + + /** + * @mem.reclaim_pool: Pool for PRLs allocated. + * + * Only main GT has page reclaim list allocations. + */ + struct xe_sa_manager *reclaim_pool; + } mem; + + /** @sriov: tile level virtualization data */ + union { + struct { + /** @sriov.pf.lmtt: Local Memory Translation Table. */ + struct xe_lmtt lmtt; + } pf; + struct { + /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ + struct xe_ggtt_node *ggtt_balloon[2]; + /** @sriov.vf.self_config: VF configuration data */ + struct xe_tile_sriov_vf_selfconfig self_config; + } vf; + } sriov; + + /** @memirq: Memory Based Interrupts. */ + struct xe_memirq memirq; + + /** @csc_hw_error_work: worker to report CSC HW errors */ + struct work_struct csc_hw_error_work; + + /** @pcode: tile's PCODE */ + struct { + /** @pcode.lock: protecting tile's PCODE mailbox data */ + struct mutex lock; + } pcode; + + /** @migrate: Migration helper for vram blits and clearing */ + struct xe_migrate *migrate; + + /** @sysfs: sysfs' kobj used by xe_tile_sysfs */ + struct kobject *sysfs; + + /** @debugfs: debugfs directory associated with this tile */ + struct dentry *debugfs; + + /** @mert: MERT-related data */ + struct xe_mert mert; +}; + +#endif From 18443ff22559c4d1c83356ab75108db30c1f893b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 3 Feb 2026 22:12:39 +0100 Subject: [PATCH 065/195] drm/xe: Drop unnecessary include from xe_tile.h We don't need to include xe_device_types.h there. Signed-off-by: Michal Wajdeczko Reviewed-by: Matt Roper Reviewed-by: Tejas Upadhyay Link: https://patch.msgid.link/20260203211240.745-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_tile.h | 4 ++-- drivers/gpu/drm/xe/xe_tile_sysfs.c | 2 +- drivers/gpu/drm/xe/xe_vram_freq.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index 734132eddda5..a2a2c0e936b8 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -6,10 +6,10 @@ #ifndef _XE_TILE_H_ #define _XE_TILE_H_ -#include "xe_device_types.h" +#include "xe_tile_types.h" +struct xe_device; struct xe_pagemap; -struct xe_tile; int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); int xe_tile_init_noalloc(struct xe_tile *tile); diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index 9e1236a9ec67..f2bf7314954d 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -7,8 +7,8 @@ #include #include +#include "xe_device_types.h" #include "xe_pm.h" -#include "xe_tile.h" #include "xe_tile_sysfs.h" #include "xe_vram_freq.h" diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c index 6f8281e0b96a..8717367ccd4c 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.c +++ b/drivers/gpu/drm/xe/xe_vram_freq.c @@ -5,9 +5,9 @@ #include #include +#include "xe_device_types.h" #include "xe_pcode.h" #include "xe_pcode_api.h" -#include "xe_tile.h" #include "xe_tile_sysfs.h" #include "xe_vram_freq.h" From f59cde8a2452b392115d2af8f1143a94725f4827 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 3 Feb 2026 20:37:45 +0100 Subject: [PATCH 066/195] drm/xe/configfs: Fix 'parameter name omitted' errors On some configs and old compilers we can get following build errors: ../drivers/gpu/drm/xe/xe_configfs.h: In function 'xe_configfs_get_ctx_restore_mid_bb': ../drivers/gpu/drm/xe/xe_configfs.h:40:76: error: parameter name omitted static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, ^~~~~~~~~~~~~~~~~~~~ ../drivers/gpu/drm/xe/xe_configfs.h: In function 'xe_configfs_get_ctx_restore_post_bb': ../drivers/gpu/drm/xe/xe_configfs.h:42:77: error: parameter name omitted static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, ^~~~~~~~~~~~~~~~~~~~ when trying to define our configfs stub functions. Fix that. Fixes: 7a4756b2fd04 ("drm/xe/lrc: Allow to add user commands mid context switch") Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260203193745.576-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_configfs.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h index 487531269511..07d62bf0c152 100644 --- a/drivers/gpu/drm/xe/xe_configfs.h +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -23,9 +23,11 @@ bool xe_configfs_primary_gt_allowed(struct pci_dev *pdev); bool xe_configfs_media_gt_allowed(struct pci_dev *pdev); u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev); bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev); -u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, +u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, + enum xe_engine_class class, const u32 **cs); -u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, +u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, + enum xe_engine_class class, const u32 **cs); #ifdef CONFIG_PCI_IOV unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev); @@ -40,9 +42,11 @@ static inline bool xe_configfs_primary_gt_allowed(struct pci_dev *pdev) { return static inline bool xe_configfs_media_gt_allowed(struct pci_dev *pdev) { return true; } static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; } static inline bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) { return false; } -static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, +static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, + enum xe_engine_class class, const u32 **cs) { return 0; } -static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, +static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, + enum xe_engine_class class, const u32 **cs) { return 0; } #ifdef CONFIG_PCI_IOV static inline unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev) From 4e8f602ac3574cf1ebc7acfb6624d06e04b30c91 Mon Sep 17 00:00:00 2001 From: Nareshkumar Gollakoti Date: Mon, 2 Feb 2026 22:38:12 +0530 Subject: [PATCH 067/195] drm/xe: Prevent VFs from exposing the CCS mode sysfs file Skip creating CCS sysfs files in VF mode to ensure VFs do not try to change CCS mode, as it is predefined and immutable in the SR-IOV mode. Reviewed-by: Michal Wajdeczko Signed-off-by: Nareshkumar Gollakoti Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260202170810.1393147-5-naresh.kumar.g@intel.com --- drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 91ac22ef5703..fe944687728c 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -191,7 +191,7 @@ int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int err; - if (!xe_gt_ccs_mode_enabled(gt)) + if (!xe_gt_ccs_mode_enabled(gt) || IS_SRIOV_VF(xe)) return 0; err = sysfs_create_files(gt->sysfs, gt_ccs_mode_attrs); From 9b5e995e61290741b0e2b70fc631cc591e21d499 Mon Sep 17 00:00:00 2001 From: Nareshkumar Gollakoti Date: Mon, 2 Feb 2026 22:38:13 +0530 Subject: [PATCH 068/195] drm/xe: Mutual exclusivity between CCS-mode and PF Due to SLA agreement between PF and VFs, currently we block CCS mode changes if driver is running as PF, even if there are no VFs enabled yet. Use lockdown mechanism provided by the PF to relax that limitation and still enforce above VFs related requirements. Reviewed-by: Michal Wajdeczko Signed-off-by: Nareshkumar Gollakoti Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260202170810.1393147-6-naresh.kumar.g@intel.com --- drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 42 +++++++++++++++++++---------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index fe944687728c..b35be36b0eaa 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -13,6 +13,7 @@ #include "xe_gt_sysfs.h" #include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) { @@ -88,6 +89,11 @@ void xe_gt_apply_ccs_mode(struct xe_gt *gt) __xe_gt_apply_ccs_mode(gt, gt->ccs_mode); } +static bool gt_ccs_mode_default(struct xe_gt *gt) +{ + return gt->ccs_mode == 1; +} + static ssize_t num_cslices_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -117,12 +123,6 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, u32 num_engines, num_slices; int ret; - if (IS_SRIOV(xe)) { - xe_gt_dbg(gt, "Can't change compute mode when running as %s\n", - xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); - return -EOPNOTSUPP; - } - ret = kstrtou32(buff, 0, &num_engines); if (ret) return ret; @@ -139,21 +139,35 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr, } /* CCS mode can only be updated when there are no drm clients */ - mutex_lock(&xe->drm.filelist_mutex); + guard(mutex)(&xe->drm.filelist_mutex); if (!list_empty(&xe->drm.filelist)) { - mutex_unlock(&xe->drm.filelist_mutex); xe_gt_dbg(gt, "Rejecting compute mode change as there are active drm clients\n"); return -EBUSY; } - if (gt->ccs_mode != num_engines) { - xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); - gt->ccs_mode = num_engines; - xe_gt_record_user_engines(gt); - xe_gt_reset(gt); + if (gt->ccs_mode == num_engines) + return count; + + /* + * Changing default CCS mode is only allowed when there + * are no VFs. Try to lockdown PF to find out. + */ + if (gt_ccs_mode_default(gt) && IS_SRIOV_PF(xe)) { + ret = xe_sriov_pf_lockdown(xe); + if (ret) { + xe_gt_dbg(gt, "Can't change CCS Mode: VFs are enabled\n"); + return ret; + } } - mutex_unlock(&xe->drm.filelist_mutex); + xe_gt_info(gt, "Setting compute mode to %d\n", num_engines); + gt->ccs_mode = num_engines; + xe_gt_record_user_engines(gt); + xe_gt_reset(gt); + + /* We may end PF lockdown once CCS mode is default again */ + if (gt_ccs_mode_default(gt) && IS_SRIOV_PF(xe)) + xe_sriov_pf_end_lockdown(xe); return count; } From 106340775a8f5ec45e18bab18666f56e4af13d92 Mon Sep 17 00:00:00 2001 From: Pallavi Mishra Date: Thu, 29 Jan 2026 05:47:22 +0000 Subject: [PATCH 069/195] drm/xe/tests: Fix g2g_test_array indexing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The G2G KUnit test allocates a compact N×N matrix sized by gt_count and verifies entries using dense indices: idx = (j * gt_count) + i The producer path currently computes idx using gt->info.id. However, gt->info.id values are not guaranteed to be contiguous. For example, with gt_count=2 and IDs {0,3}, this formula produces indices beyond the allocated range, causing mismatches and potential out-of-bounds access. Update the producer to map each GT to a dense index in [0..gt_count-1] and compute: idx = (tx_dense * gt_count) + rx_dense Additionally, introduce an event-based delay in g2g_test_in_order() to ensure ordering between sends. v2: Add single helper function (Daniele) v3: Modify comment (Daniele) Signed-off-by: Pallavi Mishra Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Vinay Belgaumkar Link: https://patch.msgid.link/20260129054722.2150674-1-pallavi.mishra@intel.com --- drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c | 61 +++++++++++++++++++++- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c index 3b213fcae916..ea8ca03effa9 100644 --- a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c +++ b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c @@ -48,6 +48,38 @@ struct g2g_test_payload { u32 seqno; }; +static int slot_index_from_gts(struct xe_gt *tx_gt, struct xe_gt *rx_gt) +{ + struct xe_device *xe = gt_to_xe(tx_gt); + int idx = 0, found = 0, id, tx_idx, rx_idx; + struct xe_gt *gt; + struct kunit *test = kunit_get_current_test(); + + for (id = 0; id < xe->info.tile_count * xe->info.max_gt_per_tile; id++) { + gt = xe_device_get_gt(xe, id); + if (!gt) + continue; + if (gt == tx_gt) { + tx_idx = idx; + found++; + } + if (gt == rx_gt) { + rx_idx = idx; + found++; + } + + if (found == 2) + break; + + idx++; + } + + if (found != 2) + KUNIT_FAIL(test, "GT index not found"); + + return (tx_idx * xe->info.gt_count) + rx_idx; +} + static void g2g_test_send(struct kunit *test, struct xe_guc *guc, u32 far_tile, u32 far_dev, struct g2g_test_payload *payload) @@ -163,7 +195,7 @@ int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len) goto done; } - idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id; + idx = slot_index_from_gts(tx_gt, rx_gt); if (xe->g2g_test_array[idx] != payload->seqno - 1) { xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> %d:%d!\n", @@ -180,13 +212,17 @@ int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len) return ret; } +#define G2G_WAIT_TIMEOUT_MS 100 +#define G2G_WAIT_POLL_MS 1 + /* * Send the given seqno from all GuCs to all other GuCs in tile/GT order */ static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqno) { struct xe_gt *near_gt, *far_gt; - int i, j; + int i, j, waited; + u32 idx; for_each_gt(near_gt, xe, i) { u32 near_tile = gt_to_tile(near_gt)->id; @@ -205,6 +241,27 @@ static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqn payload.rx_dev = far_dev; payload.rx_tile = far_tile; payload.seqno = seqno; + + /* Calculate idx for event-based wait */ + idx = slot_index_from_gts(near_gt, far_gt); + waited = 0; + + /* + * Wait for previous seqno to be acknowledged before sending, + * to avoid queuing too many back-to-back messages and + * causing a test timeout. Actual correctness of message + * will be checked later in xe_guc_g2g_test_notification() + */ + while (xe->g2g_test_array[idx] != (seqno - 1)) { + msleep(G2G_WAIT_POLL_MS); + waited += G2G_WAIT_POLL_MS; + if (waited >= G2G_WAIT_TIMEOUT_MS) { + kunit_info(test, "Timeout waiting! tx gt: %d, rx gt: %d\n", + near_gt->info.id, far_gt->info.id); + break; + } + } + g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, &payload); } } From 40a684f91d267164f9adf0d35b572b4cad0b8d3c Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 3 Feb 2026 17:42:32 -0800 Subject: [PATCH 070/195] drm/xe: Decouple GuC RC code from xe_guc_pc Move enable/disable GuC RC logic into the new file. This will allow us to independently enable/disable GuC RC and not rely on SLPC related functions. GuC already provides separate H2G interfaces to setup GuC RC and SLPC. Cc: Riana Tauro Cc: Michal Wajdeczko Reviewed-by: Michal Wajdeczko Reviewed-by: Riana Tauro Signed-off-by: Vinay Belgaumkar Link: https://patch.msgid.link/20260204014234.2867763-2-vinay.belgaumkar@intel.com --- Documentation/gpu/xe/xe_firmware.rst | 3 + drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt.c | 1 - drivers/gpu/drm/xe/xe_guc.c | 6 ++ drivers/gpu/drm/xe/xe_guc_pc.c | 68 +++----------- drivers/gpu/drm/xe/xe_guc_pc.h | 1 - drivers/gpu/drm/xe/xe_guc_rc.c | 130 +++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_rc.h | 15 ++++ drivers/gpu/drm/xe/xe_uc.c | 10 +-- drivers/gpu/drm/xe/xe_uc.h | 1 - 10 files changed, 172 insertions(+), 64 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_guc_rc.c create mode 100644 drivers/gpu/drm/xe/xe_guc_rc.h diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst index 5d23e9f27391..9c15a300bc62 100644 --- a/Documentation/gpu/xe/xe_firmware.rst +++ b/Documentation/gpu/xe/xe_firmware.rst @@ -31,6 +31,9 @@ GuC Power Conservation (PC) .. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c :doc: GuC Power Conservation (PC) +.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_rc.c + :doc: GuC Render C-states (GuC RC) + PCIe Gen5 Limitations ===================== diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 3a6a707638b5..4a7eaeeca293 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -74,6 +74,7 @@ xe-y += xe_bb.o \ xe_guc_log.o \ xe_guc_pagefault.o \ xe_guc_pc.o \ + xe_guc_rc.o \ xe_guc_submit.o \ xe_guc_tlb_inval.o \ xe_heci_gsc.o \ diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 22132467ff4f..68c4771de040 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -849,7 +849,6 @@ static void gt_reset_worker(struct work_struct *w) if (IS_SRIOV_PF(gt_to_xe(gt))) xe_gt_sriov_pf_stop_prepare(gt); - xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); xe_pagefault_reset(gt_to_xe(gt), gt); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index d5910b0adbaa..303f6ae7c418 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -35,6 +35,7 @@ #include "xe_guc_klv_helpers.h" #include "xe_guc_log.h" #include "xe_guc_pc.h" +#include "xe_guc_rc.h" #include "xe_guc_relay.h" #include "xe_guc_submit.h" #include "xe_memirq.h" @@ -881,6 +882,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) if (ret) return ret; + ret = xe_guc_rc_init(guc); + if (ret) + return ret; + ret = xe_guc_engine_activity_init(guc); if (ret) return ret; @@ -1631,6 +1636,7 @@ void xe_guc_stop_prepare(struct xe_guc *guc) if (!IS_SRIOV_VF(guc_to_xe(guc))) { int err; + xe_guc_rc_disable(guc); err = xe_guc_pc_stop(&guc->pc); xe_gt_WARN(guc_to_gt(guc), err, "Failed to stop GuC PC: %pe\n", ERR_PTR(err)); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 5e5495a39a3c..c8313e308f96 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -92,6 +92,17 @@ * Render-C states is also a GuC PC feature that is now enabled in Xe for * all platforms. * + * Implementation details: + * ----------------------- + * The implementation for GuC Power Management features is split as follows: + * + * xe_guc_rc: Logic for handling GuC RC + * xe_gt_idle: Host side logic for RC6 and Coarse Power gating (CPG) + * xe_guc_pc: Logic for all other SLPC related features + * + * There is some cross interaction between these where host C6 will need to be + * enabled when we plan to skip GuC RC. Also, the GuC RC mode is currently + * overridden through 0x3003 which is an SLPC H2G call. */ static struct xe_guc *pc_to_guc(struct xe_guc_pc *pc) @@ -253,22 +264,6 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id) return ret; } -static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) -{ - struct xe_guc_ct *ct = pc_to_ct(pc); - u32 action[] = { - GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC, - mode, - }; - int ret; - - ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) - xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u failed: %pe\n", - mode, ERR_PTR(ret)); - return ret; -} - static u32 decode_freq(u32 raw) { return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER, @@ -1050,30 +1045,6 @@ int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc) return ret; } -/** - * xe_guc_pc_gucrc_disable - Disable GuC RC - * @pc: Xe_GuC_PC instance - * - * Disables GuC RC by taking control of RC6 back from GuC. - * - * Return: 0 on success, negative error code on error. - */ -int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) -{ - struct xe_device *xe = pc_to_xe(pc); - struct xe_gt *gt = pc_to_gt(pc); - int ret = 0; - - if (xe->info.skip_guc_pc) - return 0; - - ret = pc_action_setup_gucrc(pc, GUCRC_HOST_CONTROL); - if (ret) - return ret; - - return xe_gt_idle_disable_c6(gt); -} - /** * xe_guc_pc_override_gucrc_mode - override GUCRC mode * @pc: Xe_GuC_PC instance @@ -1247,9 +1218,6 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) return -ETIMEDOUT; if (xe->info.skip_guc_pc) { - if (xe->info.platform != XE_PVC) - xe_gt_idle_enable_c6(gt); - /* Request max possible since dynamic freq mgmt is not enabled */ pc_set_cur_freq(pc, UINT_MAX); return 0; @@ -1291,15 +1259,6 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) if (ret) return ret; - if (xe->info.platform == XE_PVC) { - xe_guc_pc_gucrc_disable(pc); - return 0; - } - - ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL); - if (ret) - return ret; - /* Enable SLPC Optimized Strategy for compute */ ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); @@ -1319,10 +1278,8 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) { struct xe_device *xe = pc_to_xe(pc); - if (xe->info.skip_guc_pc) { - xe_gt_idle_disable_c6(pc_to_gt(pc)); + if (xe->info.skip_guc_pc) return 0; - } mutex_lock(&pc->freq_lock); pc->freq_ready = false; @@ -1344,7 +1301,6 @@ static void xe_guc_pc_fini_hw(void *arg) return; CLASS(xe_force_wake, fw_ref)(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); - xe_guc_pc_gucrc_disable(pc); XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 0e31396f103c..1b95873b262e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -15,7 +15,6 @@ struct drm_printer; int xe_guc_pc_init(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); int xe_guc_pc_stop(struct xe_guc_pc *pc); -int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode); int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc); void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_guc_rc.c b/drivers/gpu/drm/xe/xe_guc_rc.c new file mode 100644 index 000000000000..6c4591e712d9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_rc.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2026 Intel Corporation + */ + +#include + +#include "abi/guc_actions_slpc_abi.h" +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_idle.h" +#include "xe_gt_printk.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_rc.h" +#include "xe_pm.h" + +/** + * DOC: GuC RC (Render C-states) + * + * GuC handles the GT transition to deeper C-states in conjunction with Pcode. + * GuC RC can be enabled independently of the frequency component in SLPC, + * which is also controlled by GuC. + * + * This file will contain all H2G related logic for handling Render C-states. + * There are some calls to xe_gt_idle, where we enable host C6 when GuC RC is + * skipped. GuC RC is mostly independent of xe_guc_pc with the exception of + * functions that override the mode for which we have to rely on the SLPC H2G + * calls. + */ + +static int guc_action_setup_gucrc(struct xe_guc *guc, u32 control) +{ + u32 action[] = { + GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC, + control, + }; + int ret; + + ret = xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); + if (ret && !(xe_device_wedged(guc_to_xe(guc)) && ret == -ECANCELED)) + xe_gt_err(guc_to_gt(guc), + "GuC RC setup %s(%u) failed (%pe)\n", + control == GUCRC_HOST_CONTROL ? "HOST_CONTROL" : + control == GUCRC_FIRMWARE_CONTROL ? "FIRMWARE_CONTROL" : + "UNKNOWN", control, ERR_PTR(ret)); + return ret; +} + +/** + * xe_guc_rc_disable() - Disable GuC RC + * @guc: Xe GuC instance + * + * Disables GuC RC by taking control of RC6 back from GuC. + */ +void xe_guc_rc_disable(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + if (!xe->info.skip_guc_pc && xe->info.platform != XE_PVC) + if (guc_action_setup_gucrc(guc, GUCRC_HOST_CONTROL)) + return; + + xe_gt_WARN_ON(gt, xe_gt_idle_disable_c6(gt)); +} + +static void xe_guc_rc_fini_hw(void *arg) +{ + struct xe_guc *guc = arg; + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + if (xe_device_wedged(xe)) + return; + + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + xe_guc_rc_disable(guc); +} + +/** + * xe_guc_rc_init() - Init GuC RC + * @guc: Xe GuC instance + * + * Add callback action for GuC RC + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_rc_init(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + xe_gt_assert(gt, xe_device_uc_enabled(xe)); + + return devm_add_action_or_reset(xe->drm.dev, xe_guc_rc_fini_hw, guc); +} + +/** + * xe_guc_rc_enable() - Enable GuC RC feature if applicable + * @guc: Xe GuC instance + * + * Enables GuC RC feature. + * + * Return: 0 on success, negative error code on error. + */ +int xe_guc_rc_enable(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + xe_gt_assert(gt, xe_device_uc_enabled(xe)); + + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FW_GT)) + return -ETIMEDOUT; + + if (xe->info.platform == XE_PVC) { + xe_guc_rc_disable(guc); + return 0; + } + + if (xe->info.skip_guc_pc) { + xe_gt_idle_enable_c6(gt); + return 0; + } + + return guc_action_setup_gucrc(guc, GUCRC_FIRMWARE_CONTROL); +} diff --git a/drivers/gpu/drm/xe/xe_guc_rc.h b/drivers/gpu/drm/xe/xe_guc_rc.h new file mode 100644 index 000000000000..2e7e15e2be50 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_rc.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef _XE_GUC_RC_H_ +#define _XE_GUC_RC_H_ + +struct xe_guc; + +int xe_guc_rc_init(struct xe_guc *guc); +int xe_guc_rc_enable(struct xe_guc *guc); +void xe_guc_rc_disable(struct xe_guc *guc); + +#endif diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 3f63c2a7e86d..d9aa845a308d 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -13,6 +13,7 @@ #include "xe_gt_sriov_vf.h" #include "xe_guc.h" #include "xe_guc_pc.h" +#include "xe_guc_rc.h" #include "xe_guc_engine_activity.h" #include "xe_huc.h" #include "xe_sriov.h" @@ -214,6 +215,10 @@ int xe_uc_load_hw(struct xe_uc *uc) if (ret) goto err_out; + ret = xe_guc_rc_enable(&uc->guc); + if (ret) + goto err_out; + xe_guc_engine_activity_enable_stats(&uc->guc); /* We don't fail the driver load if HuC fails to auth */ @@ -242,11 +247,6 @@ int xe_uc_reset_prepare(struct xe_uc *uc) return xe_guc_reset_prepare(&uc->guc); } -void xe_uc_gucrc_disable(struct xe_uc *uc) -{ - XE_WARN_ON(xe_guc_pc_gucrc_disable(&uc->guc.pc)); -} - void xe_uc_stop_prepare(struct xe_uc *uc) { xe_gsc_stop_prepare(&uc->gsc); diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 5398da1a8097..255a54a8f876 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -12,7 +12,6 @@ int xe_uc_init_noalloc(struct xe_uc *uc); int xe_uc_init(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); int xe_uc_load_hw(struct xe_uc *uc); -void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_runtime_resume(struct xe_uc *uc); void xe_uc_runtime_suspend(struct xe_uc *uc); From a3f949cd61e0ea1bc93c00241ea1d7d9e1fcb3e9 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 3 Feb 2026 17:42:33 -0800 Subject: [PATCH 071/195] drm/xe: Use FORCEWAKE_GT in xe_guc_pc_fini_hw() No need to use FORCEWAKE_ALL since the registers being written are in GT domain. Cc: Michal Wajdeczko Signed-off-by: Vinay Belgaumkar Reviewed-by: Michal Wajdeczko Link: https://patch.msgid.link/20260204014234.2867763-3-vinay.belgaumkar@intel.com --- drivers/gpu/drm/xe/xe_guc_pc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index c8313e308f96..878eb273c3e6 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -1300,7 +1300,7 @@ static void xe_guc_pc_fini_hw(void *arg) if (xe_device_wedged(xe)) return; - CLASS(xe_force_wake, fw_ref)(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); + CLASS(xe_force_wake, fw_ref)(gt_to_fw(pc_to_gt(pc)), XE_FW_GT); XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ From fabedb758fcb16fcd80230c28356f8aaff972446 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 3 Feb 2026 17:42:34 -0800 Subject: [PATCH 072/195] drm/xe: Add a wrapper for SLPC set/unset params Also, extract out the GuC RC related set/unset param functions into xe_guc_rc file. GuC still allows us to override GuC RC mode using an SLPC H2G interface. Continue to use that interface, but move the related code to the newly created xe_guc_rc file. Cc: Riana Tauro Cc: Michal Wajdeczko Reviewed-by: Riana Tauro Reviewed-by: Michal Wajdeczko Signed-off-by: Vinay Belgaumkar Link: https://patch.msgid.link/20260204014234.2867763-4-vinay.belgaumkar@intel.com --- drivers/gpu/drm/xe/xe_guc_pc.c | 56 +++++++++++++++++++--------------- drivers/gpu/drm/xe/xe_guc_pc.h | 5 ++- drivers/gpu/drm/xe/xe_guc_rc.c | 30 ++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_rc.h | 3 ++ drivers/gpu/drm/xe/xe_oa.c | 9 +++--- 5 files changed, 70 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 878eb273c3e6..21fe73ab4583 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -264,6 +264,37 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id) return ret; } +/** + * xe_guc_pc_action_set_param() - Set value of SLPC param + * @pc: Xe_GuC_PC instance + * @id: Param id + * @value: Value to set + * + * This function can be used to set any SLPC param. + * + * Return: 0 on Success + */ +int xe_guc_pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) +{ + xe_device_assert_mem_access(pc_to_xe(pc)); + return pc_action_set_param(pc, id, value); +} + +/** + * xe_guc_pc_action_unset_param() - Revert to default value + * @pc: Xe_GuC_PC instance + * @id: Param id + * + * This function can be used revert any SLPC param to its default value. + * + * Return: 0 on Success + */ +int xe_guc_pc_action_unset_param(struct xe_guc_pc *pc, u8 id) +{ + xe_device_assert_mem_access(pc_to_xe(pc)); + return pc_action_unset_param(pc, id); +} + static u32 decode_freq(u32 raw) { return DIV_ROUND_CLOSEST(raw * GT_FREQUENCY_MULTIPLIER, @@ -1045,31 +1076,6 @@ int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc) return ret; } -/** - * xe_guc_pc_override_gucrc_mode - override GUCRC mode - * @pc: Xe_GuC_PC instance - * @mode: new value of the mode. - * - * Return: 0 on success, negative error code on error - */ -int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode) -{ - guard(xe_pm_runtime)(pc_to_xe(pc)); - return pc_action_set_param(pc, SLPC_PARAM_PWRGATE_RC_MODE, mode); -} - -/** - * xe_guc_pc_unset_gucrc_mode - unset GUCRC mode override - * @pc: Xe_GuC_PC instance - * - * Return: 0 on success, negative error code on error - */ -int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc) -{ - guard(xe_pm_runtime)(pc_to_xe(pc)); - return pc_action_unset_param(pc, SLPC_PARAM_PWRGATE_RC_MODE); -} - static void pc_init_pcode_freq(struct xe_guc_pc *pc) { u32 min = DIV_ROUND_CLOSEST(pc->rpn_freq, GT_FREQUENCY_MULTIPLIER); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 1b95873b262e..0678a4e787b3 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -9,15 +9,14 @@ #include struct xe_guc_pc; -enum slpc_gucrc_mode; struct drm_printer; int xe_guc_pc_init(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); int xe_guc_pc_stop(struct xe_guc_pc *pc); -int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode); -int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc); void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p); +int xe_guc_pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value); +int xe_guc_pc_action_unset_param(struct xe_guc_pc *pc, u8 id); u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); diff --git a/drivers/gpu/drm/xe/xe_guc_rc.c b/drivers/gpu/drm/xe/xe_guc_rc.c index 6c4591e712d9..427a889b2a1e 100644 --- a/drivers/gpu/drm/xe/xe_guc_rc.c +++ b/drivers/gpu/drm/xe/xe_guc_rc.c @@ -13,6 +13,7 @@ #include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_guc_pc.h" #include "xe_guc_rc.h" #include "xe_pm.h" @@ -128,3 +129,32 @@ int xe_guc_rc_enable(struct xe_guc *guc) return guc_action_setup_gucrc(guc, GUCRC_FIRMWARE_CONTROL); } + +/** + * xe_guc_rc_set_mode() - set new GUCRC mode + * @guc: Xe GuC instance + * @mode: new value of the mode. + * + * Function to set GuC RC mode to one of the enum values. + * + * Returns: 0 on success, negative error code on error + */ +int xe_guc_rc_set_mode(struct xe_guc *guc, enum slpc_gucrc_mode mode) +{ + guard(xe_pm_runtime_noresume)(guc_to_xe(guc)); + return xe_guc_pc_action_set_param(&guc->pc, SLPC_PARAM_PWRGATE_RC_MODE, mode); +} + +/** + * xe_guc_rc_unset_mode() - revert to default mode + * @guc: Xe GuC instance + * + * Function to revert GuC RC mode to platform defaults. + * + * Returns: 0 on success, negative error code on error + */ +int xe_guc_rc_unset_mode(struct xe_guc *guc) +{ + guard(xe_pm_runtime_noresume)(guc_to_xe(guc)); + return xe_guc_pc_action_unset_param(&guc->pc, SLPC_PARAM_PWRGATE_RC_MODE); +} diff --git a/drivers/gpu/drm/xe/xe_guc_rc.h b/drivers/gpu/drm/xe/xe_guc_rc.h index 2e7e15e2be50..f1f949e7ecc0 100644 --- a/drivers/gpu/drm/xe/xe_guc_rc.h +++ b/drivers/gpu/drm/xe/xe_guc_rc.h @@ -7,9 +7,12 @@ #define _XE_GUC_RC_H_ struct xe_guc; +enum slpc_gucrc_mode; int xe_guc_rc_init(struct xe_guc *guc); int xe_guc_rc_enable(struct xe_guc *guc); void xe_guc_rc_disable(struct xe_guc *guc); +int xe_guc_rc_set_mode(struct xe_guc *guc, enum slpc_gucrc_mode mode); +int xe_guc_rc_unset_mode(struct xe_guc *guc); #endif diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 8b37e49f639f..a462d6983d8d 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -29,7 +29,7 @@ #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_gt_printk.h" -#include "xe_guc_pc.h" +#include "xe_guc_rc.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" @@ -875,7 +875,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ if (stream->override_gucrc) - xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); + xe_gt_WARN_ON(gt, xe_guc_rc_unset_mode(>->uc.guc)); xe_oa_free_configs(stream); xe_file_put(stream->xef); @@ -1765,8 +1765,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, * state. Prevent this by overriding GUCRC mode. */ if (XE_GT_WA(stream->gt, 1509372804)) { - ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc, - SLPC_GUCRC_MODE_GUCRC_NO_RC6); + ret = xe_guc_rc_set_mode(>->uc.guc, SLPC_GUCRC_MODE_GUCRC_NO_RC6); if (ret) goto err_free_configs; @@ -1824,7 +1823,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); xe_pm_runtime_put(stream->oa->xe); if (stream->override_gucrc) - xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); + xe_gt_WARN_ON(gt, xe_guc_rc_unset_mode(>->uc.guc)); err_free_configs: xe_oa_free_configs(stream); exit: From 2ea05b4b023129f3f70e341bc3b8dc39debb266b Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Wed, 4 Feb 2026 11:03:13 -0800 Subject: [PATCH 073/195] drm/xe: Add GSC to powergate_info Add GSC powergate status to the existing debugfs. Reviewed-by: Badal Nilawar Signed-off-by: Vinay Belgaumkar Link: https://patch.msgid.link/20260204190314.2904009-2-vinay.belgaumkar@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_gt_idle.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 24fc64fc832e..7d3ec1fe4f7f 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -367,6 +367,7 @@ #define FORCEWAKE_RENDER XE_REG(0xa278) #define POWERGATE_DOMAIN_STATUS XE_REG(0xa2a0) +#define GSC_AWAKE_STATUS REG_BIT(8) #define MEDIA_SLICE3_AWAKE_STATUS REG_BIT(4) #define MEDIA_SLICE2_AWAKE_STATUS REG_BIT(3) #define MEDIA_SLICE1_AWAKE_STATUS REG_BIT(2) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 7a569e1730a4..94d3403ec11e 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -254,6 +254,11 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "Media Samplers Power Gating Enabled: %s\n", str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE)); + if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)) { + drm_printf(p, "GSC Power Gate Status: %s\n", + str_up_down(pg_status & GSC_AWAKE_STATUS)); + } + return 0; } From 91be6115e4677ad69bf8186988972b319d32ae61 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Wed, 4 Feb 2026 11:03:14 -0800 Subject: [PATCH 074/195] drm/xe: Add forcewake status to powergate_info Dump forcewake status and ref counts for all domains as part of this debugfs. This is the sample output from gt1- $ cat /sys/kernel/debug/dri//0/gt1/powergate_info Media Power Gating Enabled: yes Media Slice0 Power Gate Status: down GSC Power Gate Status: down GT.ref_count=0, GT.forcewake=0x10000 VDBox0.ref_count=0, VDBox0.forcewake=0x10000 VEBox0.ref_count=0, VEBox0.forcewake=0x10000 GSC.ref_count=0, GSC.forcewake=0x10000 v2: Fix checkpatch issues Reviewed-by: Badal Nilawar Signed-off-by: Vinay Belgaumkar Link: https://patch.msgid.link/20260204190314.2904009-3-vinay.belgaumkar@intel.com --- drivers/gpu/drm/xe/xe_force_wake.c | 46 ++++++++++++++++++++++++++---- drivers/gpu/drm/xe/xe_force_wake.h | 11 +++++++ drivers/gpu/drm/xe/xe_gt_idle.c | 20 +++++++++++++ 3 files changed, 71 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 76e054f314ee..197e2197bd0a 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -148,12 +148,6 @@ static int domain_sleep_wait(struct xe_gt *gt, return __domain_wait(gt, domain, false); } -#define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ - for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \ - for_each_if((domain__ = ((fw__)->domains + \ - (ffs(tmp__) - 1))) && \ - domain__->reg_ctl.addr) - /** * xe_force_wake_get() : Increase the domain refcount * @fw: struct xe_force_wake @@ -266,3 +260,43 @@ void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref) xe_gt_WARN(gt, ack_fail, "Forcewake domain%s %#x failed to acknowledge sleep request\n", str_plural(hweight_long(ack_fail)), ack_fail); } + +const char *xe_force_wake_domain_to_str(enum xe_force_wake_domain_id id) +{ + switch (id) { + case XE_FW_DOMAIN_ID_GT: + return "GT"; + case XE_FW_DOMAIN_ID_RENDER: + return "Render"; + case XE_FW_DOMAIN_ID_MEDIA: + return "Media"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX0: + return "VDBox0"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX1: + return "VDBox1"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX2: + return "VDBox2"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX3: + return "VDBox3"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX4: + return "VDBox4"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX5: + return "VDBox5"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX6: + return "VDBox6"; + case XE_FW_DOMAIN_ID_MEDIA_VDBOX7: + return "VDBox7"; + case XE_FW_DOMAIN_ID_MEDIA_VEBOX0: + return "VEBox0"; + case XE_FW_DOMAIN_ID_MEDIA_VEBOX1: + return "VEBox1"; + case XE_FW_DOMAIN_ID_MEDIA_VEBOX2: + return "VEBox2"; + case XE_FW_DOMAIN_ID_MEDIA_VEBOX3: + return "VEBox3"; + case XE_FW_DOMAIN_ID_GSC: + return "GSC"; + default: + return "Unknown"; + } +} diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h index 1e2198f6a007..e2721f205d6c 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.h +++ b/drivers/gpu/drm/xe/xe_force_wake.h @@ -19,6 +19,17 @@ unsigned int __must_check xe_force_wake_get(struct xe_force_wake *fw, enum xe_force_wake_domains domains); void xe_force_wake_put(struct xe_force_wake *fw, unsigned int fw_ref); +const char *xe_force_wake_domain_to_str(enum xe_force_wake_domain_id id); + +#define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ + for (tmp__ = (mask__); tmp__; tmp__ &= ~BIT(ffs(tmp__) - 1)) \ + for_each_if(((domain__) = ((fw__)->domains + \ + (ffs(tmp__) - 1))) && \ + (domain__)->reg_ctl.addr) + +#define for_each_fw_domain(domain__, fw__, tmp__) \ + for_each_fw_domain_masked((domain__), (fw__)->initialized_domains, (fw__), (tmp__)) + static inline int xe_force_wake_ref(struct xe_force_wake *fw, enum xe_force_wake_domains domain) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 94d3403ec11e..4a2d9edb6a4c 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -168,6 +168,24 @@ void xe_gt_idle_disable_pg(struct xe_gt *gt) xe_mmio_write32(>->mmio, POWERGATE_ENABLE, gtidle->powergate_enable); } +static void force_wake_domains_show(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_force_wake_domain *domain; + struct xe_force_wake *fw = gt_to_fw(gt); + unsigned int tmp; + unsigned long flags; + + spin_lock_irqsave(&fw->lock, flags); + for_each_fw_domain(domain, fw, tmp) { + drm_printf(p, "%s.ref_count=%u, %s.fwake=0x%x\n", + xe_force_wake_domain_to_str(domain->id), + READ_ONCE(domain->ref), + xe_force_wake_domain_to_str(domain->id), + xe_mmio_read32(>->mmio, domain->reg_ctl)); + } + spin_unlock_irqrestore(&fw->lock, flags); +} + /** * xe_gt_idle_pg_print - Xe powergating info * @gt: GT object @@ -259,6 +277,8 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) str_up_down(pg_status & GSC_AWAKE_STATUS)); } + force_wake_domains_show(gt, p); + return 0; } From 6acf3d3ed6c1f0febdd046578ea9cafcd47912f4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Feb 2026 13:41:40 -0800 Subject: [PATCH 075/195] drm/xe: Move number of XeCore fuse registers to graphics descriptor The number of registers used to express the XeCore mask has some "special cases" that don't always get inherited by later IP versions so it's cleaner and simpler to record the numbers in the IP descriptor rather than adding extra conditions to the standalone get_num_dss_regs() function. Note that a minor change here is that we now always treat the number of registers as 0 for the media GT. Technically a copy of these fuse registers does exist in the media GT as well (at the usual 0x380000+$offset location), but the value of those is always supposed to read back as 0 because media GTs never have any XeCores or EUs. v2: - Add a kunit assertion to catch descriptors that forget to initialize either count. (Gustavo) Cc: Gustavo Sousa Reviewed-by: Gustavo Sousa Link: https://patch.msgid.link/20260205214139.48515-3-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/tests/xe_pci_test.c | 8 ++++++ drivers/gpu/drm/xe/xe_gt_topology.c | 37 +++++--------------------- drivers/gpu/drm/xe/xe_gt_types.h | 10 +++++++ drivers/gpu/drm/xe/xe_pci.c | 12 +++++++++ drivers/gpu/drm/xe/xe_pci_types.h | 2 ++ 5 files changed, 39 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index 4d10a7e2b570..acf5a5555130 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -19,6 +19,8 @@ static void check_graphics_ip(struct kunit *test) const struct xe_ip *param = test->param_value; const struct xe_graphics_desc *graphics = param->desc; u64 mask = graphics->hw_engine_mask; + u8 fuse_regs = graphics->num_geometry_xecore_fuse_regs + + graphics->num_compute_xecore_fuse_regs; /* RCS, CCS, and BCS engines are allowed on the graphics IP */ mask &= ~(XE_HW_ENGINE_RCS_MASK | @@ -27,6 +29,12 @@ static void check_graphics_ip(struct kunit *test) /* Any remaining engines are an error */ KUNIT_ASSERT_EQ(test, mask, 0); + + /* + * All graphics IP should have at least one geometry and/or compute + * XeCore fuse register. + */ + KUNIT_ASSERT_GE(test, fuse_regs, 1); } static void check_media_ip(struct kunit *test) diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index bd5260221d8d..575dcfd5eb9d 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -205,24 +205,6 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) } } -static void -get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) -{ - if (GRAPHICS_VER(xe) > 20) { - *geometry_regs = 3; - *compute_regs = 3; - } else if (GRAPHICS_VERx100(xe) == 1260) { - *geometry_regs = 0; - *compute_regs = 2; - } else if (GRAPHICS_VERx100(xe) >= 1250) { - *geometry_regs = 1; - *compute_regs = 1; - } else { - *geometry_regs = 1; - *compute_regs = 0; - } -} - void xe_gt_topology_init(struct xe_gt *gt) { @@ -236,23 +218,19 @@ xe_gt_topology_init(struct xe_gt *gt) XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, XE2_GT_COMPUTE_DSS_2, }; - int num_geometry_regs, num_compute_regs; - struct xe_device *xe = gt_to_xe(gt); struct drm_printer p; - get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs); - /* * Register counts returned shouldn't exceed the number of registers * passed as parameters below. */ - xe_gt_assert(gt, num_geometry_regs <= ARRAY_SIZE(geometry_regs)); - xe_gt_assert(gt, num_compute_regs <= ARRAY_SIZE(compute_regs)); + xe_gt_assert(gt, gt->info.num_geometry_xecore_fuse_regs <= ARRAY_SIZE(geometry_regs)); + xe_gt_assert(gt, gt->info.num_compute_xecore_fuse_regs <= ARRAY_SIZE(compute_regs)); load_dss_mask(gt, gt->fuse_topo.g_dss_mask, - num_geometry_regs, geometry_regs); + gt->info.num_geometry_xecore_fuse_regs, geometry_regs); load_dss_mask(gt, gt->fuse_topo.c_dss_mask, - num_compute_regs, compute_regs); + gt->info.num_compute_xecore_fuse_regs, compute_regs); load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type); load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); @@ -330,15 +308,14 @@ xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask) */ bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad) { - struct xe_device *xe = gt_to_xe(gt); xe_dss_mask_t all_dss; - int g_dss_regs, c_dss_regs, dss_per_quad, quad_first; + int dss_per_quad, quad_first; bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, XE_MAX_DSS_FUSE_BITS); - get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs); - dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4; + dss_per_quad = 32 * max(gt->info.num_geometry_xecore_fuse_regs, + gt->info.num_compute_xecore_fuse_regs) / 4; quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 1d7360b56ac6..44a4e7af11b1 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -144,6 +144,16 @@ struct xe_gt { u8 id; /** @info.has_indirect_ring_state: GT has indirect ring state support */ u8 has_indirect_ring_state:1; + /** + * @info.num_geometry_xecore_fuse_regs: Number of 32b-bit fuse + * registers the geometry XeCore mask spans. + */ + u8 num_geometry_xecore_fuse_regs; + /** + * @info.num_compute_xecore_fuse_regs: Number of 32b-bit fuse + * registers the compute XeCore mask spans. + */ + u8 num_compute_xecore_fuse_regs; } info; #if IS_ENABLED(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index ed661bc54c31..02c192831323 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -52,6 +52,7 @@ __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); static const struct xe_graphics_desc graphics_xelp = { .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0), + .num_geometry_xecore_fuse_regs = 1, }; #define XE_HP_FEATURES \ @@ -62,6 +63,8 @@ static const struct xe_graphics_desc graphics_xehpg = { BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_CCS0) | BIT(XE_HW_ENGINE_CCS1) | BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3), + .num_geometry_xecore_fuse_regs = 1, + .num_compute_xecore_fuse_regs = 1, XE_HP_FEATURES, }; @@ -81,12 +84,15 @@ static const struct xe_graphics_desc graphics_xehpc = { .has_asid = 1, .has_atomic_enable_pte_bit = 1, .has_usm = 1, + .num_compute_xecore_fuse_regs = 2, }; static const struct xe_graphics_desc graphics_xelpg = { .hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0) | BIT(XE_HW_ENGINE_CCS0), + .num_geometry_xecore_fuse_regs = 1, + .num_compute_xecore_fuse_regs = 1, XE_HP_FEATURES, }; @@ -104,6 +110,8 @@ static const struct xe_graphics_desc graphics_xelpg = { static const struct xe_graphics_desc graphics_xe2 = { XE2_GFX_FEATURES, + .num_geometry_xecore_fuse_regs = 3, + .num_compute_xecore_fuse_regs = 3, }; static const struct xe_graphics_desc graphics_xe3p_xpc = { @@ -114,6 +122,8 @@ static const struct xe_graphics_desc graphics_xe3p_xpc = { GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0), .multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE), + .num_geometry_xecore_fuse_regs = 3, + .num_compute_xecore_fuse_regs = 3, }; static const struct xe_media_desc media_xem = { @@ -782,6 +792,8 @@ static struct xe_gt *alloc_primary_gt(struct xe_tile *tile, gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; gt->info.multi_queue_engine_class_mask = graphics_desc->multi_queue_engine_class_mask; gt->info.engine_mask = graphics_desc->hw_engine_mask; + gt->info.num_geometry_xecore_fuse_regs = graphics_desc->num_geometry_xecore_fuse_regs; + gt->info.num_compute_xecore_fuse_regs = graphics_desc->num_compute_xecore_fuse_regs; /* * Before media version 13, the media IP was part of the primary GT diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 8b2ff3f25607..470d31a1f0d6 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -66,6 +66,8 @@ struct xe_device_desc { struct xe_graphics_desc { u64 hw_engine_mask; /* hardware engines provided by graphics IP */ u16 multi_queue_engine_class_mask; /* bitmask of engine classes which support multi queue */ + u8 num_geometry_xecore_fuse_regs; + u8 num_compute_xecore_fuse_regs; u8 has_asid:1; u8 has_atomic_enable_pte_bit:1; From e8100643ff01be0fc74048b8296cfb2b9b5c90ed Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Feb 2026 13:41:41 -0800 Subject: [PATCH 076/195] drm/xe/xe3p_xpc: XeCore mask spans four registers On Xe3p_XPC, there are now four registers reserved to express the XeCore mask rather than just three. Define the new registers and update the IP descriptor accordingly. Note that this only applies to Xe3p_XPC for now; Xe3p_LPG still only uses three registers to express the mask. Reviewed-by: Gustavo Sousa Link: https://patch.msgid.link/20260205214139.48515-4-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 ++ drivers/gpu/drm/xe/xe_gt_topology.c | 2 ++ drivers/gpu/drm/xe/xe_gt_types.h | 2 +- drivers/gpu/drm/xe/xe_pci.c | 4 ++-- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 7d3ec1fe4f7f..52440100a731 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -253,6 +253,8 @@ #define XE2_GT_COMPUTE_DSS_2 XE_REG(0x914c) #define XE2_GT_GEOMETRY_DSS_1 XE_REG(0x9150) #define XE2_GT_GEOMETRY_DSS_2 XE_REG(0x9154) +#define XE3P_XPC_GT_GEOMETRY_DSS_3 XE_REG(0x915c) +#define XE3P_XPC_GT_COMPUTE_DSS_3 XE_REG(0x9160) #define SERVICE_COPY_ENABLE XE_REG(0x9170) #define FUSE_SERVICE_COPY_ENABLE_MASK REG_GENMASK(7, 0) diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 575dcfd5eb9d..bfe87e682879 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -212,11 +212,13 @@ xe_gt_topology_init(struct xe_gt *gt) XELP_GT_GEOMETRY_DSS_ENABLE, XE2_GT_GEOMETRY_DSS_1, XE2_GT_GEOMETRY_DSS_2, + XE3P_XPC_GT_GEOMETRY_DSS_3, }; static const struct xe_reg compute_regs[] = { XEHP_GT_COMPUTE_DSS_ENABLE, XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, XE2_GT_COMPUTE_DSS_2, + XE3P_XPC_GT_COMPUTE_DSS_3, }; struct drm_printer p; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 44a4e7af11b1..caf7e7e78be9 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -35,7 +35,7 @@ enum xe_gt_eu_type { XE_GT_EU_TYPE_SIMD16, }; -#define XE_MAX_DSS_FUSE_REGS 3 +#define XE_MAX_DSS_FUSE_REGS 4 #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) #define XE_MAX_EU_FUSE_REGS 1 #define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 02c192831323..aec386c5ca9a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -122,8 +122,8 @@ static const struct xe_graphics_desc graphics_xe3p_xpc = { GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0), .multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE), - .num_geometry_xecore_fuse_regs = 3, - .num_compute_xecore_fuse_regs = 3, + .num_geometry_xecore_fuse_regs = 4, + .num_compute_xecore_fuse_regs = 4, }; static const struct xe_media_desc media_xem = { From 6fa45759cf43df3508a94dda7b6b02735ab19c4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Fri, 30 Jan 2026 22:56:24 +0100 Subject: [PATCH 077/195] drm/xe/pf: Fix the address range assert in ggtt_get_pte helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ggtt_get_pte helper used for saving VF GGTT incorrectly assumes that ggtt_size == ggtt_end. Fix it to avoid triggering spurious asserts if VF GGTT object lands in high GGTT range. Reviewed-by: Michal Wajdeczko Link: https://patch.msgid.link/20260130215624.556099-1-michal.winiarski@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/xe_ggtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 8b9d7c0bbe90..2ab880772847 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -193,7 +193,7 @@ static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte) static u64 xe_ggtt_get_pte(struct xe_ggtt *ggtt, u64 addr) { xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); - xe_tile_assert(ggtt->tile, addr < ggtt->size); + xe_tile_assert(ggtt->tile, addr < ggtt->start + ggtt->size); return readq(&ggtt->gsm[addr >> XE_PTE_SHIFT]); } From 944a3329b05510d55c69c2ef455136e2fc02de29 Mon Sep 17 00:00:00 2001 From: Jia Yao Date: Thu, 5 Feb 2026 16:15:29 +0000 Subject: [PATCH 078/195] drm/xe: Add bounds check on pat_index to prevent OOB kernel read in madvise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When user provides a bogus pat_index value through the madvise IOCTL, the xe_pat_index_get_coh_mode() function performs an array access without validating bounds. This allows a malicious user to trigger an out-of-bounds kernel read from the xe->pat.table array. The vulnerability exists because the validation in madvise_args_are_sane() directly calls xe_pat_index_get_coh_mode(xe, args->pat_index.val) without first checking if pat_index is within [0, xe->pat.n_entries). Although xe_pat_index_get_coh_mode() has a WARN_ON to catch this in debug builds, it still performs the unsafe array access in production kernels. v2(Matthew Auld) - Using array_index_nospec() to mitigate spectre attacks when the value is used v3(Matthew Auld) - Put the declarations at the start of the block Fixes: ada7486c5668 ("drm/xe: Implement madvise ioctl for xe") Reviewed-by: Matthew Auld Cc: # v6.18+ Cc: Matthew Brost Cc: Shuicheng Lin Cc: Himal Prasad Ghimiray Cc: "Thomas Hellström" Cc: Rodrigo Vivi Cc: Matthew Auld Signed-off-by: Jia Yao Signed-off-by: Matthew Auld Link: https://patch.msgid.link/20260205161529.1819276-1-jia.yao@intel.com --- drivers/gpu/drm/xe/xe_vm_madvise.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index add9a6ca2390..52147f5eaaa0 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -291,8 +291,13 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv break; case DRM_XE_MEM_RANGE_ATTR_PAT: { - u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val); + u16 pat_index, coh_mode; + if (XE_IOCTL_DBG(xe, args->pat_index.val >= xe->pat.n_entries)) + return false; + + pat_index = array_index_nospec(args->pat_index.val, xe->pat.n_entries); + coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); if (XE_IOCTL_DBG(xe, !coh_mode)) return false; From dc90ead44054736131f73b1dd319b8be06088d36 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 30 Jan 2026 12:51:06 +0000 Subject: [PATCH 079/195] drm/xe/uapi: update used tracking kernel-doc In commit 4d0b035fd6da ("drm/xe/uapi: loosen used tracking restriction") we dropped the CAP_PERMON restriction but missed updating the corresponding kernel-doc. Fix that. v2 (Sanjay): - Don't drop the note around the extra cpu_visible_used expectations. Reported-by: Ulisses Furquim Fixes: 4d0b035fd6da ("drm/xe/uapi: loosen used tracking restriction") Signed-off-by: Matthew Auld Cc: Sanjay Yadav Reviewed-by: Sanjay Yadav Link: https://patch.msgid.link/20260130125105.451229-2-matthew.auld@intel.com --- include/uapi/drm/xe_drm.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 077e66a682e2..c9e70f78e723 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -335,10 +335,6 @@ struct drm_xe_mem_region { __u64 total_size; /** * @used: Estimate of the memory used in bytes for this region. - * - * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable - * accounting. Without this the value here will always equal - * zero. */ __u64 used; /** @@ -363,9 +359,7 @@ struct drm_xe_mem_region { * @cpu_visible_used: Estimate of CPU visible memory used, in * bytes. * - * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable - * accounting. Without this the value here will always equal - * zero. Note this is only currently tracked for + * Note this is only currently tracked for * DRM_XE_MEM_REGION_CLASS_VRAM regions (for other types the value * here will always be zero). */ From c57db41b8d2cd410e7219729f446aa17965485ad Mon Sep 17 00:00:00 2001 From: Sk Anirban Date: Mon, 2 Feb 2026 16:23:15 +0530 Subject: [PATCH 080/195] drm/xe/guc: Add Wa_14025883347 for GuC DMA failure on reset Prevent GuC firmware DMA failures during GuC-only reset by disabling idle flow and verifying SRAM handling completion. Without this, reset can be issued while SRAM handler is copying WOPCM to SRAM, causing GuC HW to get stuck. v2: Modify error message (Badal) Rename reg bit name (Daniele) Update WA skip condition (Daniele) Update SRAM handling logic (Daniele) v3: Reorder WA call (Badal) Wait for GuC ready status (Daniele) v4: Update reg name (Badal) Add comment (Daniele) Add extended graphics version (Daniele) Modify rules Signed-off-by: Sk Anirban Reviewed-by: Badal Nilawar Acked-by: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Link: https://patch.msgid.link/20260202105313.3338094-4-sk.anirban@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/regs/xe_guc_regs.h | 8 ++++++ drivers/gpu/drm/xe/xe_guc.c | 38 +++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 3 +++ 3 files changed, 49 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index 87984713dd12..5faac8316b66 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -40,6 +40,9 @@ #define GS_BOOTROM_JUMP_PASSED REG_FIELD_PREP(GS_BOOTROM_MASK, 0x76) #define GS_MIA_IN_RESET REG_BIT(0) +#define BOOT_HASH_CHK XE_REG(0xc010) +#define GUC_BOOT_UKERNEL_VALID REG_BIT(31) + #define GUC_HEADER_INFO XE_REG(0xc014) #define GUC_WOPCM_SIZE XE_REG(0xc050) @@ -83,7 +86,12 @@ #define GUC_WOPCM_OFFSET_MASK REG_GENMASK(31, GUC_WOPCM_OFFSET_SHIFT) #define HUC_LOADING_AGENT_GUC REG_BIT(1) #define GUC_WOPCM_OFFSET_VALID REG_BIT(0) + +#define GUC_SRAM_STATUS XE_REG(0xc398) +#define GUC_SRAM_HANDLING_MASK REG_GENMASK(8, 7) + #define GUC_MAX_IDLE_COUNT XE_REG(0xc3e4) +#define GUC_IDLE_FLOW_DISABLE REG_BIT(31) #define GUC_PMTIMESTAMP_LO XE_REG(0xc3e8) #define GUC_PMTIMESTAMP_HI XE_REG(0xc3ec) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 303f6ae7c418..cbbb4d665b8f 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -917,6 +917,41 @@ int xe_guc_post_load_init(struct xe_guc *guc) return xe_guc_submit_enable(guc); } +/* + * Wa_14025883347: Prevent GuC firmware DMA failures during GuC-only reset by ensuring + * SRAM save/restore operations are complete before reset. + */ +static void guc_prevent_fw_dma_failure_on_reset(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + u32 boot_hash_chk, guc_status, sram_status; + int ret; + + guc_status = xe_mmio_read32(>->mmio, GUC_STATUS); + if (guc_status & GS_MIA_IN_RESET) + return; + + boot_hash_chk = xe_mmio_read32(>->mmio, BOOT_HASH_CHK); + if (!(boot_hash_chk & GUC_BOOT_UKERNEL_VALID)) + return; + + /* Disable idle flow during reset (GuC reset re-enables it automatically) */ + xe_mmio_rmw32(>->mmio, GUC_MAX_IDLE_COUNT, 0, GUC_IDLE_FLOW_DISABLE); + + ret = xe_mmio_wait32(>->mmio, GUC_STATUS, GS_UKERNEL_MASK, + FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY), + 100000, &guc_status, false); + if (ret) + xe_gt_warn(gt, "GuC not ready after disabling idle flow (GUC_STATUS: 0x%x)\n", + guc_status); + + ret = xe_mmio_wait32(>->mmio, GUC_SRAM_STATUS, GUC_SRAM_HANDLING_MASK, + 0, 5000, &sram_status, false); + if (ret) + xe_gt_warn(gt, "SRAM handling not complete (GUC_SRAM_STATUS: 0x%x)\n", + sram_status); +} + int xe_guc_reset(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); @@ -929,6 +964,9 @@ int xe_guc_reset(struct xe_guc *guc) if (IS_SRIOV_VF(gt_to_xe(gt))) return xe_gt_sriov_vf_bootstrap(gt); + if (XE_GT_WA(gt, 14025883347)) + guc_prevent_fw_dma_failure_on_reset(guc); + xe_mmio_write32(mmio, GDRST, GRDOM_GUC); ret = xe_mmio_wait32(mmio, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false); diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 5cd7fa6d2a5c..ac08f94f90a1 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -73,3 +73,6 @@ 15015404425_disable PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER) 16026007364 MEDIA_VERSION(3000) 14020316580 MEDIA_VERSION(1301) + +14025883347 MEDIA_VERSION_RANGE(1301, 3503) + GRAPHICS_VERSION_RANGE(2004, 3005) From 4e2796c8280ad21f1a252c05204845261d988e2f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 7 Feb 2026 22:44:28 +0100 Subject: [PATCH 081/195] drm/xe/vf: Allow VF to initialize MCR tables While VFs can't access MCR registers, it's still safe to initialize our per-platform MCR tables, as we might need them later in the LRC programming, as engines itself may access MCR steer registers and thanks to all our past fixes to the VF probe initialization order, VFs are able to use values of the fuse registers needed here. Signed-off-by: Michal Wajdeczko Cc: Matt Roper Link: https://patch.msgid.link/20260207214428.5205-1-michal.wajdeczko@intel.com Reviewed-by: Matt Roper Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_gt_mcr.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 7c1fe9ac120d..66c4efca80fe 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -505,9 +505,6 @@ void xe_gt_mcr_init_early(struct xe_gt *gt) spin_lock_init(>->mcr_lock); - if (IS_SRIOV_VF(xe)) - return; - if (gt->info.type == XE_GT_TYPE_MEDIA) { drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); @@ -568,9 +565,6 @@ void xe_gt_mcr_init_early(struct xe_gt *gt) */ void xe_gt_mcr_init(struct xe_gt *gt) { - if (IS_SRIOV_VF(gt_to_xe(gt))) - return; - /* Select non-terminated steering target for each type */ for (int i = 0; i < NUM_STEERING_TYPES; i++) { gt->steering[i].initialized = true; From a30f999681126b128a43137793ac84b6a5b7443f Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 30 Jan 2026 16:56:22 +0000 Subject: [PATCH 082/195] drm/xe/mmio: Avoid double-adjust in 64-bit reads xe_mmio_read64_2x32() was adjusting register addresses and then calling xe_mmio_read32(), which applies the adjustment again. This may shift accesses twice if adj_offset < adj_limit. There is no issue currently, as for media gt, adj_offset > adj_limit, so the 2nd adjust will be a no-op. But it may not work in future. To fix it, replace the adjusted-address comparison with a direct sanity check that ensures the MMIO address adjustment cutoff never falls within the 8-byte range of a 64-bit register. And let xe_mmio_read32() handle address translation. v2: rewrite the sanity check in a more natural way. (Matt) v3: Add Fixes tag. (Jani) Fixes: 07431945d8ae ("drm/xe: Avoid 64-bit register reads") Reviewed-by: Matt Roper Cc: Jani Nikula Cc: Rodrigo Vivi Signed-off-by: Shuicheng Lin Link: https://patch.msgid.link/20260130165621.471408-2-shuicheng.lin@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_mmio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index bcb6674b7dac..a1a05c68dc7d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -256,11 +256,11 @@ u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg) struct xe_reg reg_udw = { .addr = reg.addr + 0x4 }; u32 ldw, udw, oldudw, retries; - reg.addr = xe_mmio_adjusted_addr(mmio, reg.addr); - reg_udw.addr = xe_mmio_adjusted_addr(mmio, reg_udw.addr); - - /* we shouldn't adjust just one register address */ - xe_tile_assert(mmio->tile, reg_udw.addr == reg.addr + 0x4); + /* + * The two dwords of a 64-bit register can never straddle the offset + * adjustment cutoff. + */ + xe_tile_assert(mmio->tile, !in_range(mmio->adj_limit, reg.addr + 1, 7)); oldudw = xe_mmio_read32(mmio, reg_udw); for (retries = 5; retries; --retries) { From 8fcb7dfb8bbfeb05073f834d07f3eb44df797f20 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Fri, 6 Feb 2026 15:35:57 -0300 Subject: [PATCH 083/195] drm/xe/xe3p_lpg: Add support for graphics IP 35.10 Add Xe3p_LPG graphics IP version 35.10. Xe3p_LPG supports all features described by XE2_GFX_FEATURES and also multi-queue feature on BCS and CCS engines. As such, create a new struct xe_graphics_desc named graphics_xe3p_lpg that inherits from XE2_GFX_FEATURES and also includes the necessary .multi_queue_engine_class_mask. Here is a list of fields and associated Bspec references for the members of the IP descriptor: .hw_engine_mask (Bspec 60149) .multi_queue_engine_class_mask (Bspec 74110) .has_asid (Bspec 71132) .has_atomic_enable_pte_bit (Bspec 59510, 74675) .has_indirect_ring_state (Bspec 67296) .has_range_tlb_inval (Bspec 71126) .has_usm (Bspec 59651) .has_64bit_timestamp (Bspec 60318) .num_geometry_xecore_fuse_regs (Bspec 62566, 67401, 67536) .num_compute_xecore_fuse_regs (Bspec 62565, 62561, 67537) v2: - Drop non-existing fields from the list in the commit message. (Matt) - Squash patch adding .multi_queue_engine_class_mask here. (Matt) - Rename graphics_xe3p to graphics_xe3p_lpg. (Matt) - Add fields .num_geometry_xecore_fuse_regs and .num_compute_xecore_fuse_regs after rebasing and inheriting commit 6acf3d3ed6c1 ("drm/xe: Move number of XeCore fuse registers to graphics descriptor"). (Gustavo) Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-1-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/xe_pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index aec386c5ca9a..4abd64eccf27 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -114,6 +114,13 @@ static const struct xe_graphics_desc graphics_xe2 = { .num_compute_xecore_fuse_regs = 3, }; +static const struct xe_graphics_desc graphics_xe3p_lpg = { + XE2_GFX_FEATURES, + .multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE), + .num_geometry_xecore_fuse_regs = 3, + .num_compute_xecore_fuse_regs = 3, +}; + static const struct xe_graphics_desc graphics_xe3p_xpc = { XE2_GFX_FEATURES, .has_indirect_ring_state = 1, @@ -158,6 +165,7 @@ static const struct xe_ip graphics_ips[] = { { 3003, "Xe3_LPG", &graphics_xe2 }, { 3004, "Xe3_LPG", &graphics_xe2 }, { 3005, "Xe3_LPG", &graphics_xe2 }, + { 3510, "Xe3p_LPG", &graphics_xe3p_lpg }, { 3511, "Xe3p_XPC", &graphics_xe3p_xpc }, }; From 835cd6cbb0d0e44c3fc4efc1ba6c79ffb2e2f751 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Fri, 6 Feb 2026 15:35:58 -0300 Subject: [PATCH 084/195] drm/xe/xe3p_lpg: Add initial workarounds for graphics version 35.10 Add the initial set of workarounds for Xe3p_LPG graphics version 35.10. v2: - Fix spacing style for field LOCALITYDIS. (Matt) - Drop unnecessary Wa_14025780377. (Matt) Signed-off-by: Shekhar Chauhan Co-developed-by: Nitin Gote Signed-off-by: Nitin Gote Co-developed-by: Tangudu Tilak Tirumalesh Signed-off-by: Tangudu Tilak Tirumalesh Co-developed-by: Mallesh Koujalagi Signed-off-by: Mallesh Koujalagi Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-2-636e1ad32688@intel.com Co-developed-by: Gustavo Sousa Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 16 ++++++++++ drivers/gpu/drm/xe/xe_reg_whitelist.c | 8 +++++ drivers/gpu/drm/xe/xe_wa.c | 43 +++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 52440100a731..d593331202e8 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -100,6 +100,9 @@ #define VE1_AUX_INV XE_REG(0x42b8) #define AUX_INV REG_BIT(0) +#define GAMSTLB_CTRL2 XE_REG_MCR(0x4788) +#define STLB_SINGLE_BANK_MODE REG_BIT(11) + #define XE2_LMEM_CFG XE_REG(0x48b0) #define XE2_GAMWALK_CTRL 0x47e4 @@ -107,6 +110,9 @@ #define XE2_GAMWALK_CTRL_3D XE_REG_MCR(XE2_GAMWALK_CTRL) #define EN_CMP_1WCOH_GW REG_BIT(14) +#define MMIOATSREQLIMIT_GAM_WALK_3D XE_REG_MCR(0x47f8) +#define DIS_ATS_WRONLY_PG REG_BIT(18) + #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) #define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8) @@ -210,6 +216,9 @@ #define GSCPSMI_BASE XE_REG(0x880c) +#define CCCHKNREG2 XE_REG_MCR(0x881c) +#define LOCALITYDIS REG_BIT(7) + #define CCCHKNREG1 XE_REG_MCR(0x8828) #define L3CMPCTRL REG_BIT(23) #define ENCOMPPERFFIX REG_BIT(18) @@ -423,6 +432,8 @@ #define LSN_DIM_Z_WGT(value) REG_FIELD_PREP(LSN_DIM_Z_WGT_MASK, value) #define L3SQCREG2 XE_REG_MCR(0xb104) +#define L3_SQ_DISABLE_COAMA_2WAY_COH REG_BIT(30) +#define L3_SQ_DISABLE_COAMA REG_BIT(22) #define COMPMEMRD256BOVRFETCHEN REG_BIT(20) #define L3SQCREG3 XE_REG_MCR(0xb108) @@ -553,11 +564,16 @@ #define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32) #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) #define XE2_ALLOC_DPA_STARVE_FIX_DIS REG_BIT(47 - 32) +#define SAMPLER_LD_LSC_DISABLE REG_BIT(45 - 32) #define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) #define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) +#define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32) + +#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0) +#define CPSS_AWARE_DIS REG_BIT(3) #define SARB_CHICKEN1 XE_REG_MCR(0xe90c) #define COMP_CKN_IN REG_GENMASK(30, 29) diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 1d36c09681aa..9c513778d370 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -81,6 +81,14 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { WHITELIST(VFLSKPD, RING_FORCE_TO_NONPRIV_ACCESS_RW)) }, + { XE_RTP_NAME("14024997852"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(WHITELIST(FF_MODE, + RING_FORCE_TO_NONPRIV_ACCESS_RW), + WHITELIST(VFLSKPD, + RING_FORCE_TO_NONPRIV_ACCESS_RW)) + }, #define WHITELIST_OA_MMIO_TRG(trg, status, head) \ WHITELIST(trg, RING_FORCE_TO_NONPRIV_ACCESS_RW), \ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 1e8d61ac581b..682865f1fc16 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -325,6 +325,31 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_RULES(MEDIA_VERSION(3500)), XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) }, + + /* Xe3P_LPG */ + + { XE_RTP_NAME("14025160223"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(MMIOATSREQLIMIT_GAM_WALK_3D, + DIS_ATS_WRONLY_PG)) + }, + { XE_RTP_NAME("16028780921"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(CCCHKNREG2, LOCALITYDIS)) + }, + { XE_RTP_NAME("14026144927"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(L3SQCREG2, L3_SQ_DISABLE_COAMA_2WAY_COH | + L3_SQ_DISABLE_COAMA)) + }, + { XE_RTP_NAME("14025635424"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(GAMSTLB_CTRL2, STLB_SINGLE_BANK_MODE)) + }, + { XE_RTP_NAME("16028005424"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)), + XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) + }, }; static const struct xe_rtp_entry_sr engine_was[] = { @@ -699,6 +724,24 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) }, + + /* Xe3p_LPG*/ + + { XE_RTP_NAME("22021149932"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, SAMPLER_LD_LSC_DISABLE)) + }, + { XE_RTP_NAME("14025676848"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE)) + }, + { XE_RTP_NAME("16028951944"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN5, CPSS_AWARE_DIS)) + }, }; static const struct xe_rtp_entry_sr lrc_was[] = { From a08104551d08cf5c1542ba552bd7d1cf9d4ecd23 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 6 Feb 2026 15:35:59 -0300 Subject: [PATCH 085/195] drm/xe/pat: Differentiate between primary and media for PTA Differently from currently supported platforms, in upcoming changes we will need to have different PAT entries for PTA based on the GT type. As such, let's prepare the code to support that by having two separate PTA-specific members in the pat struct, one for each type of GT. While at it, also fix the kerneldoc for pat_ats. Co-developed-by: Tejas Upadhyay Signed-off-by: Tejas Upadhyay Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-3-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/xe_device_types.h | 8 +++++--- drivers/gpu/drm/xe/xe_pat.c | 27 ++++++++++++++++++--------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 14bf2c027f89..059f026e80d5 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -400,10 +400,12 @@ struct xe_device { const struct xe_pat_table_entry *table; /** @pat.n_entries: Number of PAT entries */ int n_entries; - /** @pat.ats_entry: PAT entry for PCIe ATS responses */ + /** @pat.pat_ats: PAT entry for PCIe ATS responses */ const struct xe_pat_table_entry *pat_ats; - /** @pat.pta_entry: PAT entry for page table accesses */ - const struct xe_pat_table_entry *pat_pta; + /** @pat.pat_primary_pta: primary GT PAT entry for page table accesses */ + const struct xe_pat_table_entry *pat_primary_pta; + /** @pat.pat_media_pta: media GT PAT entry for page table accesses */ + const struct xe_pat_table_entry *pat_media_pta; u32 idx[__XE_CACHE_LEVEL_COUNT]; } pat; diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 2cd3fd1c3953..5ba650948a4a 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -285,8 +285,10 @@ static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[ if (xe->pat.pat_ats) xe_mmio_write32(>->mmio, XE_REG(_PAT_ATS), xe->pat.pat_ats->value); - if (xe->pat.pat_pta) - xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe->pat.pat_pta->value); + if (xe->pat.pat_primary_pta && xe_gt_is_main_type(gt)) + xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe->pat.pat_primary_pta->value); + if (xe->pat.pat_media_pta && xe_gt_is_media_type(gt)) + xe_mmio_write32(>->mmio, XE_REG(_PAT_PTA), xe->pat.pat_media_pta->value); } static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[], @@ -302,8 +304,10 @@ static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry ta if (xe->pat.pat_ats) xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe->pat.pat_ats->value); - if (xe->pat.pat_pta) - xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_pta->value); + if (xe->pat.pat_primary_pta && xe_gt_is_main_type(gt)) + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_primary_pta->value); + if (xe->pat.pat_media_pta && xe_gt_is_media_type(gt)) + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_media_pta->value); } static int xelp_dump(struct xe_gt *gt, struct drm_printer *p) @@ -498,7 +502,8 @@ void xe_pat_init_early(struct xe_device *xe) xe->pat.ops = &xe3p_xpc_pat_ops; xe->pat.table = xe3p_xpc_pat_table; xe->pat.pat_ats = &xe3p_xpc_pat_ats; - xe->pat.pat_pta = &xe3p_xpc_pat_pta; + xe->pat.pat_primary_pta = &xe3p_xpc_pat_pta; + xe->pat.pat_media_pta = &xe3p_xpc_pat_pta; xe->pat.n_entries = ARRAY_SIZE(xe3p_xpc_pat_table); xe->pat.idx[XE_CACHE_NONE] = 3; xe->pat.idx[XE_CACHE_WT] = 3; /* N/A (no display); use UC */ @@ -512,8 +517,10 @@ void xe_pat_init_early(struct xe_device *xe) xe->pat.table = xe2_pat_table; } xe->pat.pat_ats = &xe2_pat_ats; - if (IS_DGFX(xe)) - xe->pat.pat_pta = &xe2_pat_pta; + if (IS_DGFX(xe)) { + xe->pat.pat_primary_pta = &xe2_pat_pta; + xe->pat.pat_media_pta = &xe2_pat_pta; + } /* Wa_16023588340. XXX: Should use XE_WA */ if (GRAPHICS_VERx100(xe) == 2001) @@ -617,6 +624,8 @@ int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p) int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); + const struct xe_pat_table_entry *pta_entry = xe_gt_is_main_type(gt) ? + xe->pat.pat_primary_pta : xe->pat.pat_media_pta; char label[PAT_LABEL_LEN]; if (!xe->pat.table || !xe->pat.n_entries) @@ -640,8 +649,8 @@ int xe_pat_dump_sw_config(struct xe_gt *gt, struct drm_printer *p) } } - if (xe->pat.pat_pta) { - u32 pat = xe->pat.pat_pta->value; + if (pta_entry) { + u32 pat = pta_entry->value; drm_printf(p, "Page Table Access:\n"); xe->pat.ops->entry_dump(p, "PTA_MODE", pat, false); From f3e5f71fd6eaa3363df966bad7755980ac276910 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Feb 2026 15:36:00 -0300 Subject: [PATCH 086/195] drm/xe/xe3p_lpg: Add new PAT table PAT programming for Xe3p_LPG is more similar to Xe2 and Xe3 than it is to Xe3p_XPC. Compared to Xe2/Xe3 we have: * There's a slight update to the PAT table, where two new indices (18 and 19) are added to expose a new "WB - Transient App" L3 caching mode. * The PTA_MODE entry must be programmed differently according to the media type, and both differ from Xe2. There are no changes to the underlying registers, so the Xe2 ops can be re-used for Xe3p. Bspec: 71582, 74160 Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-4-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/xe_pat.c | 55 ++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 5ba650948a4a..f840d9a58740 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -124,7 +124,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = { * - no_promote: 0=promotable, 1=no promote * - comp_en: 0=disable, 1=enable * - l3clos: L3 class of service (0-3) - * - l3_policy: 0=WB, 1=XD ("WB - Transient Display"), 3=UC + * - l3_policy: 0=WB, 1=XD ("WB - Transient Display"), + * 2=XA ("WB - Transient App" for Xe3p), 3=UC * - l4_policy: 0=WB, 1=WT, 3=UC * - coh_mode: 0=no snoop, 2=1-way coherent, 3=2-way coherent * @@ -253,6 +254,44 @@ static const struct xe_pat_table_entry xe3p_xpc_pat_table[] = { [31] = XE3P_XPC_PAT( 0, 3, 0, 0, 3 ), }; +static const struct xe_pat_table_entry xe3p_primary_pat_pta = XE2_PAT(0, 0, 0, 0, 0, 3); +static const struct xe_pat_table_entry xe3p_media_pat_pta = XE2_PAT(0, 0, 0, 0, 0, 2); + +static const struct xe_pat_table_entry xe3p_lpg_pat_table[] = { + [ 0] = XE2_PAT( 0, 0, 0, 0, 3, 0 ), + [ 1] = XE2_PAT( 0, 0, 0, 0, 3, 2 ), + [ 2] = XE2_PAT( 0, 0, 0, 0, 3, 3 ), + [ 3] = XE2_PAT( 0, 0, 0, 3, 3, 0 ), + [ 4] = XE2_PAT( 0, 0, 0, 3, 0, 2 ), + [ 5] = XE2_PAT( 0, 0, 0, 3, 3, 2 ), + [ 6] = XE2_PAT( 1, 0, 0, 1, 3, 0 ), + [ 7] = XE2_PAT( 0, 0, 0, 3, 0, 3 ), + [ 8] = XE2_PAT( 0, 0, 0, 3, 0, 0 ), + [ 9] = XE2_PAT( 0, 1, 0, 0, 3, 0 ), + [10] = XE2_PAT( 0, 1, 0, 3, 0, 0 ), + [11] = XE2_PAT( 1, 1, 0, 1, 3, 0 ), + [12] = XE2_PAT( 0, 1, 0, 3, 3, 0 ), + [13] = XE2_PAT( 0, 0, 0, 0, 0, 0 ), + [14] = XE2_PAT( 0, 1, 0, 0, 0, 0 ), + [15] = XE2_PAT( 1, 1, 0, 1, 1, 0 ), + [16] = XE2_PAT( 0, 1, 0, 0, 3, 2 ), + /* 17 is reserved; leave set to all 0's */ + [18] = XE2_PAT( 1, 0, 0, 2, 3, 0 ), + [19] = XE2_PAT( 1, 0, 0, 2, 3, 2 ), + [20] = XE2_PAT( 0, 0, 1, 0, 3, 0 ), + [21] = XE2_PAT( 0, 1, 1, 0, 3, 0 ), + [22] = XE2_PAT( 0, 0, 1, 0, 3, 2 ), + [23] = XE2_PAT( 0, 0, 1, 0, 3, 3 ), + [24] = XE2_PAT( 0, 0, 2, 0, 3, 0 ), + [25] = XE2_PAT( 0, 1, 2, 0, 3, 0 ), + [26] = XE2_PAT( 0, 0, 2, 0, 3, 2 ), + [27] = XE2_PAT( 0, 0, 2, 0, 3, 3 ), + [28] = XE2_PAT( 0, 0, 3, 0, 3, 0 ), + [29] = XE2_PAT( 0, 1, 3, 0, 3, 0 ), + [30] = XE2_PAT( 0, 0, 3, 0, 3, 2 ), + [31] = XE2_PAT( 0, 0, 3, 0, 3, 3 ), +}; + u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index) { WARN_ON(pat_index >= xe->pat.n_entries); @@ -508,6 +547,20 @@ void xe_pat_init_early(struct xe_device *xe) xe->pat.idx[XE_CACHE_NONE] = 3; xe->pat.idx[XE_CACHE_WT] = 3; /* N/A (no display); use UC */ xe->pat.idx[XE_CACHE_WB] = 2; + } else if (GRAPHICS_VER(xe) == 35) { + xe->pat.ops = &xe2_pat_ops; + xe->pat.table = xe3p_lpg_pat_table; + xe->pat.pat_ats = &xe2_pat_ats; + if (!IS_DGFX(xe)) { + xe->pat.pat_primary_pta = &xe3p_primary_pat_pta; + xe->pat.pat_media_pta = &xe3p_media_pat_pta; + } + xe->pat.n_entries = ARRAY_SIZE(xe3p_lpg_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; + xe->pat.idx[XE_CACHE_WT] = 15; + xe->pat.idx[XE_CACHE_WB] = 2; + xe->pat.idx[XE_CACHE_NONE_COMPRESSION] = 12; + xe->pat.idx[XE_CACHE_WB_COMPRESSION] = 16; } else if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) { xe->pat.ops = &xe2_pat_ops; if (GRAPHICS_VER(xe) == 30) { From 641a2208c052256242f4e7808d997cd2239bb7e2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Feb 2026 15:36:01 -0300 Subject: [PATCH 087/195] drm/xe/xe3p_lpg: Add MCR steering Xe3p_LPG has nearly identical steering to Xe2 and Xe3. The only DSS/XeCore change from those IPs is an additional range from 0xDE00-0xDE7F that was previously reserved, so we can simply grow one of the existing ranges in the Xe2 table to include it. Similarly, the "instance0" table is also almost identical, but gains one additional PSMI range and requires a separate table. v2: - Drop reserved range from MEMPIPE range. (Dnyaneshwar) Bspec: 75242 Signed-off-by: Matt Roper Reviewed-by: Dnyaneshwar Bhadane Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-5-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/xe_gt_mcr.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 66c4efca80fe..8989d02ea84d 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -201,7 +201,7 @@ static const struct xe_mmio_range xe2lpg_dss_steering_table[] = { { 0x009680, 0x0096FF }, /* DSS */ { 0x00D800, 0x00D87F }, /* SLICE */ { 0x00DC00, 0x00DCFF }, /* SLICE */ - { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ + { 0x00DE00, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */ { 0x00E980, 0x00E9FF }, /* SLICE */ { 0x013000, 0x0133FF }, /* DSS (0x13000-0x131FF), SLICE (0x13200-0x133FF) */ {}, @@ -280,6 +280,19 @@ static const struct xe_mmio_range xe3p_xpc_instance0_steering_table[] = { {}, }; +static const struct xe_mmio_range xe3p_lpg_instance0_steering_table[] = { + { 0x004000, 0x004AFF }, /* GAM, rsvd, GAMWKR */ + { 0x008700, 0x00887F }, /* NODE */ + { 0x00B000, 0x00B3FF }, /* NODE, L3BANK */ + { 0x00B500, 0x00B6FF }, /* PSMI */ + { 0x00C800, 0x00CFFF }, /* GAM */ + { 0x00D880, 0x00D8FF }, /* NODE */ + { 0x00DD00, 0x00DD7F }, /* MEMPIPE */ + { 0x00F000, 0x00FFFF }, /* GAM, GAMWKR */ + { 0x013400, 0x0135FF }, /* MEMPIPE */ + {}, +}; + static void init_steering_l3bank(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -530,6 +543,9 @@ void xe_gt_mcr_init_early(struct xe_gt *gt) gt->steering[INSTANCE0].ranges = xe3p_xpc_instance0_steering_table; gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table; gt->steering[NODE].ranges = xe3p_xpc_node_steering_table; + } else if (GRAPHICS_VERx100(xe) >= 3510) { + gt->steering[DSS].ranges = xe2lpg_dss_steering_table; + gt->steering[INSTANCE0].ranges = xe3p_lpg_instance0_steering_table; } else if (GRAPHICS_VER(xe) >= 20) { gt->steering[DSS].ranges = xe2lpg_dss_steering_table; gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table; From 4a0836a2604f1300b9f8886049e47f5aa4300c57 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Feb 2026 15:36:02 -0300 Subject: [PATCH 088/195] drm/xe/xe3p_lpg: Add LRC parsing for additional RCS engine state Xe3p_LPG adds some additional state instructions to the RCS engine's LRC. Add support for these to the debugfs LRC parser. Note that the bspec's LRC description page seems to have a few mistakes in the name/spelling of these new instructions (e.g., "3DSTATE_TASK_DATA_EXT" instead of "3DSTATE_TASK_SHADER_DATA_EXT" or "3DSTATE_VIEWPORT_STATE_POINTERS_CL_SF_2" instead of "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_2"). Bspec: 65182 Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-6-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h | 10 ++++++++++ drivers/gpu/drm/xe/xe_lrc.c | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h index 457881af8af9..4d854c85e588 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h @@ -55,6 +55,7 @@ #define PIPELINE_SELECT GFXPIPE_SINGLE_DW_CMD(0x1, 0x4) #define CMD_3DSTATE_DRAWING_RECTANGLE_FAST GFXPIPE_3D_CMD(0x0, 0x0) +#define CMD_3DSTATE_CUSTOM_SAMPLE_PATTERN GFXPIPE_3D_CMD(0x0, 0x2) #define CMD_3DSTATE_CLEAR_PARAMS GFXPIPE_3D_CMD(0x0, 0x4) #define CMD_3DSTATE_DEPTH_BUFFER GFXPIPE_3D_CMD(0x0, 0x5) #define CMD_3DSTATE_STENCIL_BUFFER GFXPIPE_3D_CMD(0x0, 0x6) @@ -138,8 +139,16 @@ #define CMD_3DSTATE_SBE_MESH GFXPIPE_3D_CMD(0x0, 0x82) #define CMD_3DSTATE_CPSIZE_CONTROL_BUFFER GFXPIPE_3D_CMD(0x0, 0x83) #define CMD_3DSTATE_COARSE_PIXEL GFXPIPE_3D_CMD(0x0, 0x89) +#define CMD_3DSTATE_MESH_SHADER_DATA_EXT GFXPIPE_3D_CMD(0x0, 0x8A) +#define CMD_3DSTATE_TASK_SHADER_DATA_EXT GFXPIPE_3D_CMD(0x0, 0x8B) +#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_CC_2 GFXPIPE_3D_CMD(0x0, 0x8D) +#define CMD_3DSTATE_CC_STATE_POINTERS_2 GFXPIPE_3D_CMD(0x0, 0x8E) +#define CMD_3DSTATE_SCISSOR_STATE_POINTERS_2 GFXPIPE_3D_CMD(0x0, 0x8F) +#define CMD_3DSTATE_BLEND_STATE_POINTERS_2 GFXPIPE_3D_CMD(0x0, 0xA0) +#define CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_2 GFXPIPE_3D_CMD(0x0, 0xA1) #define CMD_3DSTATE_DRAWING_RECTANGLE GFXPIPE_3D_CMD(0x1, 0x0) +#define CMD_3DSTATE_URB_MEMORY GFXPIPE_3D_CMD(0x1, 0x1) #define CMD_3DSTATE_CHROMA_KEY GFXPIPE_3D_CMD(0x1, 0x4) #define CMD_3DSTATE_POLY_STIPPLE_OFFSET GFXPIPE_3D_CMD(0x1, 0x6) #define CMD_3DSTATE_POLY_STIPPLE_PATTERN GFXPIPE_3D_CMD(0x1, 0x7) @@ -160,5 +169,6 @@ #define CMD_3DSTATE_SUBSLICE_HASH_TABLE GFXPIPE_3D_CMD(0x1, 0x1F) #define CMD_3DSTATE_SLICE_TABLE_STATE_POINTERS GFXPIPE_3D_CMD(0x1, 0x20) #define CMD_3DSTATE_PTBR_TILE_PASS_INFO GFXPIPE_3D_CMD(0x1, 0x22) +#define CMD_3DSTATE_SLICE_TABLE_STATE_POINTER_2 GFXPIPE_3D_CMD(0x1, 0xA0) #endif diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 3db7968aa5e2..e58eb8b9db78 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1966,6 +1966,7 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH(PIPELINE_SELECT); MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); + MATCH3D(3DSTATE_CUSTOM_SAMPLE_PATTERN); MATCH3D(3DSTATE_CLEAR_PARAMS); MATCH3D(3DSTATE_DEPTH_BUFFER); MATCH3D(3DSTATE_STENCIL_BUFFER); @@ -2049,8 +2050,16 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH3D(3DSTATE_SBE_MESH); MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); MATCH3D(3DSTATE_COARSE_PIXEL); + MATCH3D(3DSTATE_MESH_SHADER_DATA_EXT); + MATCH3D(3DSTATE_TASK_SHADER_DATA_EXT); + MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC_2); + MATCH3D(3DSTATE_CC_STATE_POINTERS_2); + MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS_2); + MATCH3D(3DSTATE_BLEND_STATE_POINTERS_2); + MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_2); MATCH3D(3DSTATE_DRAWING_RECTANGLE); + MATCH3D(3DSTATE_URB_MEMORY); MATCH3D(3DSTATE_CHROMA_KEY); MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); @@ -2070,6 +2079,7 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); + MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTER_2); default: drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", From e5db97a30594e6b6406b7721d8a734df7af80c2c Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Feb 2026 15:36:03 -0300 Subject: [PATCH 089/195] drm/xe/xe3p_lpg: Disable reporting of context switch status to GHWSP By default the hardware reports context switch status into the global hardware status page. The Xe driver doesn't use this information for anything, and as of Xe3p, leaving this setting enabled will prevent other hardware optimizations from being enabled. Disable this reporting as suggested by the tuning guide. Bspec: 72161 Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-7-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/xe_tuning.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 5766fa7742d3..a97872b3214b 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -10,6 +10,7 @@ #include #include +#include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "xe_gt_types.h" #include "xe_platform_types.h" @@ -107,6 +108,12 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) }, + { XE_RTP_NAME("Tuning: disable HW reporting of ctx switch to GHWSP"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3500, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, }; static const struct xe_rtp_entry_sr lrc_tunings[] = { From ce0e1a6384f7b6f2ee39ef2a747e61455b418dcd Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Feb 2026 15:36:04 -0300 Subject: [PATCH 090/195] drm/xe/xe3p_lpg: Drop unnecessary tuning settings From Xe3p onward, the desired settings are now the hardware's default values and the driver does not need to program them explicitly. Since 35.xx seems to be the starting point for "Xe3p" version numbers; we'll adjust the bounds of the old programming to stop at 34.99. Even though there's no platform with version 35.00 at the moment, this is simplest in case one does show up in the future. Bspec: 72161, 59928, 59930 Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-8-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/xe_tuning.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index a97872b3214b..694385ae75f1 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -32,12 +32,12 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { /* Xe2 */ { XE_RTP_NAME("Tuning: L3 cache"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3499)), XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, { XE_RTP_NAME("Tuning: L3 cache - media"), - XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, 3499)), XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, @@ -53,7 +53,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { SET(XE2LPM_CCCHKNREG1, L3CMPCTRL)) }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3499)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"), From 60fcdf645c47699c04e421382d5b36130b476262 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Feb 2026 15:36:05 -0300 Subject: [PATCH 091/195] drm/xe/xe3p_lpg: Extend 'group ID' mask size Xe3p_LPG extends the 'group ID' register mask by one bit. Since the new upper bit (12) was unused on previous platforms, we can safely extend the existing mask size without worrying about adding conditional version checks to the register programming. Bspec: 67175 Signed-off-by: Matt Roper Reviewed-by: Dnyaneshwar Bhadane Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-9-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d593331202e8..ff77523e823e 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -58,7 +58,7 @@ #define MCR_SLICE(slice) REG_FIELD_PREP(MCR_SLICE_MASK, slice) #define MCR_SUBSLICE_MASK REG_GENMASK(26, 24) #define MCR_SUBSLICE(subslice) REG_FIELD_PREP(MCR_SUBSLICE_MASK, subslice) -#define MTL_MCR_GROUPID REG_GENMASK(11, 8) +#define MTL_MCR_GROUPID REG_GENMASK(12, 8) #define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) #define PS_INVOCATION_COUNT XE_REG(0x2348) From 1888b3397e11f5663452e0fc14811f9b27d633a2 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 6 Feb 2026 15:36:06 -0300 Subject: [PATCH 092/195] drm/xe/xe3p_lpg: Update LRC sizes Like with previous generations, the engine context images for of both RCS and CCS in Xe3p_LPG contain a common layout at the end for the context related to the "Compute Pipeline". The size of the memory area written to such section varies; it depends on the type of preemption has taken place during the execution and type of command streamer instruction that was used on the pipeline. For Xe3p_LPG, the maximum possible size, including NOOPs for cache line alignment, is 4368 dwords, which would be the case of a mid-thread preemption during the execution of a COMPUTE_WALKER_2 instruction. The maximum size has increased in such a way that we need to update xe_gt_lrc_size() to match the new sizing requirement. When we add that to the engine-specific parts, we have: - RCS context image: 6672 dwords = 26688 bytes -> 7 pages - CCS context image: 5024 dwords = 20096 bytes -> 5 pages Bspec: 65182, 55793, 73590 Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-10-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/xe_lrc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index e58eb8b9db78..38f648b98868 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -113,13 +113,17 @@ size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class) /* Engine context image */ switch (class) { case XE_ENGINE_CLASS_RENDER: - if (GRAPHICS_VER(xe) >= 20) + if (GRAPHICS_VERx100(xe) >= 3510) + size += 7 * SZ_4K; + else if (GRAPHICS_VER(xe) >= 20) size += 3 * SZ_4K; else size += 13 * SZ_4K; break; case XE_ENGINE_CLASS_COMPUTE: - if (GRAPHICS_VER(xe) >= 20) + if (GRAPHICS_VERx100(xe) >= 3510) + size += 5 * SZ_4K; + else if (GRAPHICS_VER(xe) >= 20) size += 2 * SZ_4K; else size += 13 * SZ_4K; From 377c89bfaa5d5588214202515d60eacac61395d4 Mon Sep 17 00:00:00 2001 From: Aradhya Bhatia Date: Fri, 6 Feb 2026 15:36:07 -0300 Subject: [PATCH 093/195] drm/xe/xe3p_lpg: Set STLB bank hash mode to 4KB Since the dominant size of the pages referred in an i-gpu, such as Xe3p_LPG, will be 4KB, the HW default of mix of 64K and 2M for STLB bank hash mode does not make sense. Allow the SW to change it to 4KB Mode, for Xe3p_LPG. v2: - Add Bspec reference. (Matt) Bspec: 78248 Signed-off-by: Aradhya Bhatia Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-11-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 ++ drivers/gpu/drm/xe/xe_tuning.c | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index ff77523e823e..a375ffd666ba 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -473,6 +473,8 @@ #define FORCE_MISS_FTLB REG_BIT(3) #define XEHP_GAMSTLB_CTRL XE_REG_MCR(0xcf4c) +#define BANK_HASH_MODE REG_GENMASK(27, 26) +#define BANK_HASH_4KB_MODE REG_FIELD_PREP(BANK_HASH_MODE, 0x3) #define CONTROL_BLOCK_CLKGATE_DIS REG_BIT(12) #define EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11) #define TAG_BLOCK_CLKGATE_DIS REG_BIT(7) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 694385ae75f1..316f5e2b2e48 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -90,6 +90,15 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_RULES(MEDIA_VERSION(2000)), XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) }, + + /* Xe3p */ + + { XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED), + IS_INTEGRATED), + XE_RTP_ACTIONS(FIELD_SET(XEHP_GAMSTLB_CTRL, BANK_HASH_MODE, + BANK_HASH_4KB_MODE)) + }, }; static const struct xe_rtp_entry_sr engine_tunings[] = { From be07d8f707e41cb694c4a56364978c30683a687d Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Fri, 6 Feb 2026 15:36:08 -0300 Subject: [PATCH 094/195] drm/xe/nvlp: Add NVL-P platform definition Add platform definition along with device IDs for NVL-P. Here is the list of device descriptor fields and associated Bspec references: .dma_mask_size (Bspec 74198) .has_cached_pt (Bspec 71582) .has_display (Bspec 74196) .has_flat_ccs (Bspec 74110) .has_page_reclaim_hw_assist (Bspec 73451) .max_gt_per_tile (Bspec 74196) .va_bits (Bspec 74198) .vm_max_level (Bspec 59507) v2: - Add list of descriptor fields and Bspec references. (Matt) Signed-off-by: Shekhar Chauhan Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-12-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/xe_bo.c | 4 ++-- drivers/gpu/drm/xe/xe_pci.c | 15 +++++++++++++++ drivers/gpu/drm/xe/xe_platform_types.h | 1 + include/drm/intel/pciids.h | 12 ++++++++++++ 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index e9180b01a4e4..cb8a177ec02b 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -512,8 +512,8 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, /* * Display scanout is always non-coherent with the CPU cache. * - * For Xe_LPG and beyond, PPGTT PTE lookups are also - * non-coherent and require a CPU:WC mapping. + * For Xe_LPG and beyond up to NVL-P (excluding), PPGTT PTE + * lookups are also non-coherent and require a CPU:WC mapping. */ if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || (!xe->info.has_cached_pt && bo->flags & XE_BO_FLAG_PAGETABLE)) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 4abd64eccf27..3e1a87dd78e0 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -450,6 +450,20 @@ static const struct xe_device_desc cri_desc = { .vm_max_level = 4, }; +static const struct xe_device_desc nvlp_desc = { + PLATFORM(NOVALAKE_P), + .dma_mask_size = 46, + .has_cached_pt = true, + .has_display = true, + .has_flat_ccs = 1, + .has_page_reclaim_hw_assist = true, + .has_pre_prod_wa = true, + .max_gt_per_tile = 2, + .require_force_probe = true, + .va_bits = 48, + .vm_max_level = 4, +}; + #undef PLATFORM __diag_pop(); @@ -479,6 +493,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_WCL_IDS(INTEL_VGA_DEVICE, &ptl_desc), INTEL_NVLS_IDS(INTEL_VGA_DEVICE, &nvls_desc), INTEL_CRI_IDS(INTEL_PCI_DEVICE, &cri_desc), + INTEL_NVLP_IDS(INTEL_VGA_DEVICE, &nvlp_desc), { } }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index f516dbddfd88..6cff385227ea 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -26,6 +26,7 @@ enum xe_platform { XE_PANTHERLAKE, XE_NOVALAKE_S, XE_CRESCENTISLAND, + XE_NOVALAKE_P, }; enum xe_subplatform { diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 52520e684ab1..33b91cb2e684 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -900,4 +900,16 @@ #define INTEL_CRI_IDS(MACRO__, ...) \ MACRO__(0x674C, ## __VA_ARGS__) +/* NVL-P */ +#define INTEL_NVLP_IDS(MACRO__, ...) \ + MACRO__(0xD750, ## __VA_ARGS__), \ + MACRO__(0xD751, ## __VA_ARGS__), \ + MACRO__(0xD752, ## __VA_ARGS__), \ + MACRO__(0xD753, ## __VA_ARGS__), \ + MACRO__(0XD754, ## __VA_ARGS__), \ + MACRO__(0XD755, ## __VA_ARGS__), \ + MACRO__(0XD756, ## __VA_ARGS__), \ + MACRO__(0XD757, ## __VA_ARGS__), \ + MACRO__(0xD75F, ## __VA_ARGS__) + #endif /* __PCIIDS_H__ */ From b9006dacb8e4a6d2a7bc870c7f037c2ea21bdf34 Mon Sep 17 00:00:00 2001 From: Dnyaneshwar Bhadane Date: Fri, 6 Feb 2026 15:36:09 -0300 Subject: [PATCH 095/195] drm/xe/nvlp: Attach MOCS table for nvlp The MOCS table for NVL-P is same as for Xe2/Xe3 platforms. Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-13-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/xe_mocs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 54822497c21e..1d19df860bea 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -600,6 +600,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->wb_index = 4; info->unused_entries_index = 4; break; + case XE_NOVALAKE_P: case XE_NOVALAKE_S: case XE_PANTHERLAKE: case XE_LUNARLAKE: From d59d94f91f056f007bffb27d2689d82a14448889 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Feb 2026 15:36:10 -0300 Subject: [PATCH 096/195] drm/i915/nvlp: Hook up display support Although NVL-S and NVL-P are quite different on the GT side, they use identical Xe3p_LPD display IP and should take all the same codepaths. Signed-off-by: Matt Roper Reviewed-by: Suraj Kandpal Acked-by: Jani Nikula Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-14-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/i915/display/intel_display_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index 471f236c9ddf..1a7f3ca079e8 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1500,6 +1500,7 @@ static const struct { INTEL_PTL_IDS(INTEL_DISPLAY_DEVICE, &ptl_desc), INTEL_WCL_IDS(INTEL_DISPLAY_DEVICE, &ptl_desc), INTEL_NVLS_IDS(INTEL_DISPLAY_DEVICE, &nvl_desc), + INTEL_NVLP_IDS(INTEL_DISPLAY_DEVICE, &nvl_desc), }; static const struct { From d2e0540a62693f324d8e1f5ad7440994350cd998 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 6 Feb 2026 15:36:11 -0300 Subject: [PATCH 097/195] drm/xe/nvlp: Bump maximum WOPCM size On NVL-P, the primary GT's WOPCM gained an extra 8MiB for the Memory URB. As such, we need to bump the maximum size in the driver so that the driver is able to load without erroring out thinking that the WOPCM is too small. FIXME: The wopcm code in xe driver is a bit confusing. For the case where the offsets for GUC WOPCM are already locked, it appears we are using the maximum overall WOPCM size instead of the sizes relative to each type of GT. The function __check_layout() should be checking against the latter. Bspec: 67090 Reviewed-by: Dnyaneshwar Bhadane Link: https://patch.msgid.link/20260206-nvl-p-upstreaming-v3-15-636e1ad32688@intel.com Signed-off-by: Gustavo Sousa --- drivers/gpu/drm/xe/xe_wopcm.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c index dde4f4967ca3..900daf1d1b1b 100644 --- a/drivers/gpu/drm/xe/xe_wopcm.c +++ b/drivers/gpu/drm/xe/xe_wopcm.c @@ -55,8 +55,6 @@ #define MTL_WOPCM_SIZE SZ_4M #define WOPCM_SIZE SZ_2M -#define MAX_WOPCM_SIZE SZ_8M - /* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */ #define WOPCM_RESERVED_SIZE SZ_16K @@ -186,6 +184,14 @@ u32 xe_wopcm_size(struct xe_device *xe) WOPCM_SIZE; } +static u32 max_wopcm_size(struct xe_device *xe) +{ + if (xe->info.platform == XE_NOVALAKE_P) + return SZ_16M; + else + return SZ_8M; +} + /** * xe_wopcm_init() - Initialize the WOPCM structure. * @wopcm: pointer to xe_wopcm. @@ -227,8 +233,11 @@ int xe_wopcm_init(struct xe_wopcm *wopcm) * When the GuC wopcm base and size are preprogrammed by * BIOS/IFWI, check against the max allowed wopcm size to * validate if the programmed values align to the wopcm layout. + * + * FIXME: This is giving the maximum overall WOPCM size and not + * the size relative to each GT. */ - wopcm->size = MAX_WOPCM_SIZE; + wopcm->size = max_wopcm_size(xe); goto check; } From e04c609eedf4d6748ac0bcada4de1275b034fed6 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Feb 2026 14:05:09 -0800 Subject: [PATCH 098/195] drm/xe/xe2_hpg: Fix handling of Wa_14019988906 & Wa_14019877138 The PSS_CHICKEN register has been part of the RCS engine's LRC since it was first introduced in Xe_LP. That means that any workarounds that adjust its value (such as Wa_14019988906 and Wa_14019877138) need to be implemented in the lrc_was[] table so that they become part of the default LRC from which all subsequent LRCs are copied. Although these workarounds were implemented correctly on most platforms, they were incorrectly placed on the engine_was[] table for Xe2_HPG. Move the workarounds to the proper lrc_was[] table and switch the 'xe_rtp_match_first_render_or_compute' rule to specifically match the RCS since that's the engine whose LRC manages the register. Bspec: 65182 Fixes: 7f3ee7d88058 ("drm/xe/xe2hpg: Add initial GT workarounds") Reviewed-by: Shekhar Chauhan Link: https://patch.msgid.link/20260205220508.51905-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 682865f1fc16..843ce9fe7aab 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -592,16 +592,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) }, - { XE_RTP_NAME("14019988906"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) - }, - { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) - }, { XE_RTP_NAME("14020338487"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), FUNC(xe_rtp_match_first_render_or_compute)), @@ -895,6 +885,14 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) }, + { XE_RTP_NAME("14019988906"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) + }, + { XE_RTP_NAME("14019877138"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) + }, { XE_RTP_NAME("14021490052"), XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(FF_MODE, From d287dee565c3c32e1ed76ec1847af46809c29b90 Mon Sep 17 00:00:00 2001 From: Maciej Patelczyk Date: Mon, 9 Feb 2026 13:34:33 +0100 Subject: [PATCH 099/195] drm/gpusvm: Fix unbalanced unlock in drm_gpusvm_scan_mm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a unbalanced lock/unlock to gpusvm notifier lock: [ 931.045868] ===================================== [ 931.046509] WARNING: bad unlock balance detected! [ 931.047149] 6.19.0-rc6+xe-**************** #9 Tainted: G U [ 931.048150] ------------------------------------- [ 931.048790] kworker/u5:0/51 is trying to release lock (&gpusvm->notifier_lock) at: [ 931.049801] [] drm_gpusvm_scan_mm+0x188/0x460 [drm_gpusvm_helper] [ 931.050802] but there are no more locks to release! [ 931.051463] The drm_gpusvm_notifier_unlock() sits under err_free label and the first jump to err_free is just before calling the drm_gpusvm_notifier_lock() causing unbalanced unlock. Fixes: f1d08a586482 ("drm/gpusvm: Introduce a function to scan the current migration state") Signed-off-by: Maciej Patelczyk Cc: Thomas Hellström Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260209123433.1271053-1-maciej.patelczyk@intel.com --- drivers/gpu/drm/drm_gpusvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 871fcccd128a..c25f50cad6fe 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -819,7 +819,7 @@ enum drm_gpusvm_scan_result drm_gpusvm_scan_mm(struct drm_gpusvm_range *range, if (!(pfns[i] & HMM_PFN_VALID)) { state = DRM_GPUSVM_SCAN_UNPOPULATED; - goto err_free; + break; } page = hmm_pfn_to_page(pfns[i]); @@ -856,9 +856,9 @@ enum drm_gpusvm_scan_result drm_gpusvm_scan_mm(struct drm_gpusvm_range *range, i += 1ul << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); } -err_free: drm_gpusvm_notifier_unlock(range->gpusvm); +err_free: kvfree(pfns); return state; } From a69d1ab971a624c6f112cea61536569d579c3215 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 10 Feb 2026 12:56:53 +0100 Subject: [PATCH 100/195] mm: Fix a hmm_range_fault() livelock / starvation problem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If hmm_range_fault() fails a folio_trylock() in do_swap_page, trying to acquire the lock of a device-private folio for migration, to ram, the function will spin until it succeeds grabbing the lock. However, if the process holding the lock is depending on a work item to be completed, which is scheduled on the same CPU as the spinning hmm_range_fault(), that work item might be starved and we end up in a livelock / starvation situation which is never resolved. This can happen, for example if the process holding the device-private folio lock is stuck in migrate_device_unmap()->lru_add_drain_all() sinc lru_add_drain_all() requires a short work-item to be run on all online cpus to complete. A prerequisite for this to happen is: a) Both zone device and system memory folios are considered in migrate_device_unmap(), so that there is a reason to call lru_add_drain_all() for a system memory folio while a folio lock is held on a zone device folio. b) The zone device folio has an initial mapcount > 1 which causes at least one migration PTE entry insertion to be deferred to try_to_migrate(), which can happen after the call to lru_add_drain_all(). c) No or voluntary only preemption. This all seems pretty unlikely to happen, but indeed is hit by the "xe_exec_system_allocator" igt test. Resolve this by waiting for the folio to be unlocked if the folio_trylock() fails in do_swap_page(). Rename migration_entry_wait_on_locked() to softleaf_entry_wait_unlock() and update its documentation to indicate the new use-case. Future code improvements might consider moving the lru_add_drain_all() call in migrate_device_unmap() to be called *after* all pages have migration entries inserted. That would eliminate also b) above. v2: - Instead of a cond_resched() in hmm_range_fault(), eliminate the problem by waiting for the folio to be unlocked in do_swap_page() (Alistair Popple, Andrew Morton) v3: - Add a stub migration_entry_wait_on_locked() for the !CONFIG_MIGRATION case. (Kernel Test Robot) v4: - Rename migrate_entry_wait_on_locked() to softleaf_entry_wait_on_locked() and update docs (Alistair Popple) v5: - Add a WARN_ON_ONCE() for the !CONFIG_MIGRATION version of softleaf_entry_wait_on_locked(). - Modify wording around function names in the commit message (Andrew Morton) Suggested-by: Alistair Popple Fixes: 1afaeb8293c9 ("mm/migrate: Trylock device page in do_swap_page") Cc: Ralph Campbell Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Andrew Morton Cc: Matthew Brost Cc: John Hubbard Cc: Alistair Popple Cc: linux-mm@kvack.org Cc: Signed-off-by: Thomas Hellström Cc: # v6.15+ Reviewed-by: John Hubbard #v3 Reviewed-by: Alistair Popple Link: https://patch.msgid.link/20260210115653.92413-1-thomas.hellstrom@linux.intel.com --- include/linux/migrate.h | 10 +++++++++- mm/filemap.c | 15 ++++++++++----- mm/memory.c | 3 ++- mm/migrate.c | 8 ++++---- mm/migrate_device.c | 2 +- 5 files changed, 26 insertions(+), 12 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 26ca00c325d9..d5af2b7f577b 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -65,7 +65,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); -void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) +void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, @@ -97,6 +97,14 @@ static inline int set_movable_ops(const struct movable_operations *ops, enum pag return -ENOSYS; } +static inline void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) + __releases(ptl) +{ + WARN_ON_ONCE(1); + + spin_unlock(ptl); +} + #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_NUMA_BALANCING diff --git a/mm/filemap.c b/mm/filemap.c index ebd75684cb0a..d98e4883f13d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1379,14 +1379,16 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, #ifdef CONFIG_MIGRATION /** - * migration_entry_wait_on_locked - Wait for a migration entry to be removed - * @entry: migration swap entry. + * softleaf_entry_wait_on_locked - Wait for a migration entry or + * device_private entry to be removed. + * @entry: migration or device_private swap entry. * @ptl: already locked ptl. This function will drop the lock. * - * Wait for a migration entry referencing the given page to be removed. This is + * Wait for a migration entry referencing the given page, or device_private + * entry referencing a dvice_private page to be unlocked. This is * equivalent to folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE) except * this can be called without taking a reference on the page. Instead this - * should be called while holding the ptl for the migration entry referencing + * should be called while holding the ptl for @entry referencing * the page. * * Returns after unlocking the ptl. @@ -1394,7 +1396,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, * This follows the same logic as folio_wait_bit_common() so see the comments * there. */ -void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) +void softleaf_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl) { struct wait_page_queue wait_page; @@ -1428,6 +1430,9 @@ void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) * If a migration entry exists for the page the migration path must hold * a valid reference to the page, and it must take the ptl to remove the * migration entry. So the page is valid until the ptl is dropped. + * Similarly any path attempting to drop the last reference to a + * device-private page needs to grab the ptl to remove the device-private + * entry. */ spin_unlock(ptl); diff --git a/mm/memory.c b/mm/memory.c index 2a55edc48a65..0ad50df25846 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4681,7 +4681,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock_page(vmf->page); put_page(vmf->page); } else { - pte_unmap_unlock(vmf->pte, vmf->ptl); + pte_unmap(vmf->pte); + softleaf_entry_wait_on_locked(entry, vmf->ptl); } } else if (softleaf_is_hwpoison(entry)) { ret = VM_FAULT_HWPOISON; diff --git a/mm/migrate.c b/mm/migrate.c index 5169f9717f60..75e384b042ef 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -499,7 +499,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, if (!softleaf_is_migration(entry)) goto out; - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); return; out: spin_unlock(ptl); @@ -531,10 +531,10 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, p * If migration entry existed, safe to release vma lock * here because the pgtable page won't be freed without the * pgtable lock released. See comment right above pgtable - * lock release in migration_entry_wait_on_locked(). + * lock release in softleaf_entry_wait_on_locked(). */ hugetlb_vma_unlock_read(vma); - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); return; } @@ -552,7 +552,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) ptl = pmd_lock(mm, pmd); if (!pmd_is_migration_entry(*pmd)) goto unlock; - migration_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl); + softleaf_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl); return; unlock: spin_unlock(ptl); diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 23379663b1e1..deab89fd4541 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -176,7 +176,7 @@ static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start, } if (softleaf_is_migration(entry)) { - migration_entry_wait_on_locked(entry, ptl); + softleaf_entry_wait_on_locked(entry, ptl); spin_unlock(ptl); return -EAGAIN; } From 0bcacf56dc0b265f9c47056c6a4f0c1394a8a3f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Pi=C3=B3rkowski?= Date: Mon, 2 Feb 2026 12:50:41 +0100 Subject: [PATCH 101/195] drm/xe/vf: Avoid reading media version when media GT is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the media GT is not allowed, a VF must not attempt to read the media version from the GuC. The GuC may not be loaded, and any attempt to communicate with it would result in a timeout and a VF probe failure: (...) [ 1912.406046] xe 0000:01:00.1: [drm] *ERROR* Tile0: GT1: GuC mmio request 0x5507: no reply 0x5507 [ 1912.407277] xe 0000:01:00.1: [drm] *ERROR* Tile0: GT1: [GUC COMMUNICATION] MMIO send failed (-ETIMEDOUT) [ 1912.408689] xe 0000:01:00.1: [drm] *ERROR* VF: Tile0: GT1: Failed to reset GuC state (-ETIMEDOUT) [ 1912.413986] xe 0000:01:00.1: probe with driver xe failed with error -110 Let's skip reading the media version for VFs when the media GT is not allowed. v2: move the condition directly to the VF path Fixes: 7abd69278bb5 ("drm/xe/configfs: Add attribute to disable GT types") Signed-off-by: Piotr Piórkowski Cc: Matt Roper Cc: Michal Wajdeczko Cc: Shuicheng Lin Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260202115041.2863357-1-piotr.piorkowski@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/xe_pci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3e1a87dd78e0..56a768f2cfca 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -588,6 +588,12 @@ static int read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u struct xe_gt *gt __free(kfree) = NULL; int err; + /* Don't try to read media ver if media GT is not allowed */ + if (type == GMDID_MEDIA && !xe_configfs_media_gt_allowed(to_pci_dev(xe->drm.dev))) { + *ver = *revid = 0; + return 0; + } + gt = kzalloc(sizeof(*gt), GFP_KERNEL); if (!gt) return -ENOMEM; From 25c9aa4dcb5ef2ad9f354d19f8f1eeb690d1c161 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Mon, 2 Feb 2026 18:18:54 +0000 Subject: [PATCH 102/195] drm/xe: Make xe_modparam.force_vram_bar_size signed vram_bar_size is registered as an int module parameter and is documented to accept negative values to disable BAR resizing. Store it as an int in xe_modparam as well, so negative values work as intended and the module_param type matches. Fixes: 80742a1aa26e ("drm/xe: Allow to drop vram resizing") Reviewed-by: Michal Wajdeczko Signed-off-by: Shuicheng Lin Link: https://patch.msgid.link/20260202181853.1095736-2-shuicheng.lin@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 1c75f38ca393..79cb9639c0f3 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -12,7 +12,7 @@ struct xe_modparam { bool force_execlist; bool probe_display; - u32 force_vram_bar_size; + int force_vram_bar_size; int guc_log_level; char *guc_firmware_path; char *huc_firmware_path; From 1a3c0049b3f56278c9caf2784c53f6ab435fd12c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 11 Feb 2026 11:41:59 +0100 Subject: [PATCH 103/195] Revert "drm/pagemap: Disable device-to-device migration" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With commit a69d1ab971a6 ("mm: Fix a hmm_range_fault() livelock / starvation problem") device-to-device migration is not functional again and the disabling can be reverted. Add the above commit as a Fixes: tag in order for the revert to not take place unless that commit is present. This reverts commit 10dd1eaa80a56d3cf6d7c36b5269c8fed617f001. Cc: Matthew Brost Fixes: a69d1ab971a6 ("mm: Fix a hmm_range_fault() livelock / starvation problem") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260211104159.114947-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/drm_pagemap.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index 03ee39a761a4..aa43a8475100 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -480,18 +480,8 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, .start = start, .end = end, .pgmap_owner = pagemap->owner, - /* - * FIXME: MIGRATE_VMA_SELECT_DEVICE_PRIVATE intermittently - * causes 'xe_exec_system_allocator --r *race*no*' to trigger aa - * engine reset and a hard hang due to getting stuck on a folio - * lock. This should work and needs to be root-caused. The only - * downside of not selecting MIGRATE_VMA_SELECT_DEVICE_PRIVATE - * is that device-to-device migrations won’t work; instead, - * memory will bounce through system memory. This path should be - * rare and only occur when the madvise attributes of memory are - * changed or atomics are being used. - */ - .flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT, + .flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT | + MIGRATE_VMA_SELECT_DEVICE_PRIVATE, }; unsigned long i, npages = npages_in_range(start, end); unsigned long own_pages = 0, migrated_pages = 0; From 6d83ef1adaae89c2b85ec486ec90397538deba1b Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Thu, 5 Feb 2026 17:04:24 +0530 Subject: [PATCH 104/195] drm/xe: Update xe_device_declare_wedged() error log Since the introduction of DRM wedged event, there are now a few different procedures to recover the device depending on selected recovery method. Update the error log to reflect this and point the user to correct documentation for it. Signed-off-by: Raag Jadav Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260205113424.1629204-1-raag.jadav@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 743c18e0c580..4b68a2d55651 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -1308,7 +1308,8 @@ void xe_device_declare_wedged(struct xe_device *xe) xe->needs_flr_on_fini = true; drm_err(&xe->drm, "CRITICAL: Xe has declared device %s as wedged.\n" - "IOCTLs and executions are blocked. Only a rebind may clear the failure\n" + "IOCTLs and executions are blocked.\n" + "For recovery procedure, refer to https://docs.kernel.org/gpu/drm-uapi.html#device-wedging\n" "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", dev_name(xe->drm.dev)); } From b5b55d0932eef682b648e456df177430968e19d5 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 10 Feb 2026 10:25:19 -0800 Subject: [PATCH 105/195] drm/xe/xe3p_xpc: Add new XeCore fuse registers to VF runtime regs SRIOV VFs do not automatically have access to the XeCore fuse registers. Add the two new registers that show up on Xe3p_XPC to the runtime register list to grant VFs access. Since there's a single runtime register list for all Xe3p, this will technically also grant access on Xe3p_LPG platforms where the registers don't exist, but that should be harmless since even if a VF tries to read a non-existent register on those platforms it will just get back a sensible value of 0x0. Fixes: e8100643ff01 ("drm/xe/xe3p_xpc: XeCore mask spans four registers") Cc: Michal Wajdeczko Reviewed-by: Ngai-Mint Kwan Link: https://patch.msgid.link/20260210182519.206952-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index b5e0a5b7723e..6586df2fcb91 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -111,6 +111,8 @@ static const struct xe_reg ver_35_runtime_regs[] = { XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */ XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */ XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */ + XE3P_XPC_GT_GEOMETRY_DSS_3, /* _MMIO(0x915c) */ + XE3P_XPC_GT_COMPUTE_DSS_3, /* _MMIO(0x9160) */ SERVICE_COPY_ENABLE, /* _MMIO(0x9170) */ }; From 1ff4b1730ca421de6bd84b871f68107b9ae46bcf Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 11 Feb 2026 15:47:36 -0800 Subject: [PATCH 106/195] drm/xe: Stop applying Wa_16018737384 from Xe3 onward Wa_16018737384 is one of the rare cases where the hardware teams mark a workaround as "driver change required" rather than "permanent/temporary workaround" in the internal workaround database, signifying that the implementation details of the workaround should just be considered standard programming instructions on all platforms going forward. Cases like this are the only time that using XE_RTP_END_VERSION_UNDEFINED as an upper bound for a workaround's IP range is warranted and correct. However in this specific case, the register bit in question (0xE4F0[1]) simply no longer exists in hardware from Xe3 onward. Trying to write to that bit on Xe3 or Xe3p platforms is harmless and just doesn't have any effect, but it's possible that the register bit could get repurposed to control something else down the road on future platforms. To avoid any surprises in the future we should replace the unbounded upper bound in our RTP table with a value that accurately reflects that Wa_16018737384 can only apply to Xe2 platforms. Bspec: 56849 Reviewed-by: Dnyaneshwar Bhadane Link: https://patch.msgid.link/20260211234735.620087-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 843ce9fe7aab..61c4187dc0ae 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -588,7 +588,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) }, { XE_RTP_NAME("16018737384"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2999), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) }, From aafbb42be589d4a3d282b377f50a1ddb42400628 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Pi=C3=B3rkowski?= Date: Wed, 11 Feb 2026 18:14:41 +0100 Subject: [PATCH 107/195] drm/xe: Force EXEC_QUEUE_FLAG_KERNEL for kernel internal VMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VMs created without an associated xe_file originate from kernel contexts and should use kernel exec queues. Ensure such VMs create bind exec queues with EXEC_QUEUE_FLAG_KERNEL set. Let's ensure bind exec queues created for kernel VMs are always marked with EXEC_QUEUE_FLAG_KERNEL. Signed-off-by: Piotr Piórkowski Reviewed-by: Matthew Brost Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260211171441.3246686-1-piotr.piorkowski@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e6cfa5dc7f62..a46f11a71c37 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1657,6 +1657,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) if (!vm->pt_root[id]) continue; + if (!xef) /* Not from userspace */ + create_flags |= EXEC_QUEUE_FLAG_KERNEL; + q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0); if (IS_ERR(q)) { err = PTR_ERR(q); From c020fff70d757612933711dd3cc3751d7d782d3c Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Thu, 12 Feb 2026 11:26:22 +0530 Subject: [PATCH 108/195] drm/xe/bo: Redirect faults to dummy page for wedged device As per uapi documentation[1], the prerequisite for wedged device is to redirected page faults to a dummy page. Follow it. [1] Documentation/gpu/drm-uapi.rst v2: Add uapi reference and fixes tag (Matthew Brost) Fixes: 7bc00751f877 ("drm/xe: Use device wedged event") Signed-off-by: Raag Jadav Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260212055622.2054991-1-raag.jadav@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_bo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index cb8a177ec02b..d6c2cb959cdd 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1941,7 +1941,7 @@ static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf) int err = 0; int idx; - if (!drm_dev_enter(&xe->drm, &idx)) + if (xe_device_wedged(xe) || !drm_dev_enter(&xe->drm, &idx)) return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm); From 2882094e0db192c431db14aab09cc08d18dcd059 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 12 Feb 2026 12:29:20 +0530 Subject: [PATCH 109/195] drm/xe/xe2: Apply Wa_14024997852 Applied Wa_14024997852 to Graphics version 20.01 to 20.04 Whitelist registers needed for userspace to control autostrip on xe2. v2: - set Bit 31 of FF_MODE, for TE autostrip disable (Nitin) v3: - Need to whitelist these for Xe2 IPs (MATT R) v4: - Combine these into a single range for simplicity:(2001, 3005) (MATT R) Cc: Tejas Upadhyay Reviewed-by: Nitin Gote Reviewed-by: Matt Roper Signed-off-by: Arvind Yadav Link: https://patch.msgid.link/20260212065920.1815979-1-arvind.yadav@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_reg_whitelist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 9c513778d370..728aba8dbd95 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -75,7 +75,7 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { XE_RTP_ACTIONS(WHITELIST(CSBE_DEBUG_STATUS(RENDER_RING_BASE), 0)) }, { XE_RTP_NAME("14024997852"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3005), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(WHITELIST(FF_MODE, RING_FORCE_TO_NONPRIV_ACCESS_RW), WHITELIST(VFLSKPD, From 68be2bfe4bcd70932a3dcb2eb20398933b5f454f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 12 Feb 2026 12:42:26 -0800 Subject: [PATCH 110/195] drm/xe: Pack fault type and level into a u8 Pack the fault type and level fields into a single u8 to save space in struct xe_pagefault. This also makes future extensions easier. Signed-off-by: Matthew Brost Reviewed-by: Francois Dugast Link: https://patch.msgid.link/20260212204227.2764054-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_pagefault.c | 9 ++++++--- drivers/gpu/drm/xe/xe_pagefault.c | 12 +++++++----- drivers/gpu/drm/xe/xe_pagefault_types.h | 14 +++++++------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.c b/drivers/gpu/drm/xe/xe_guc_pagefault.c index 719a18187a31..1166b0a5fa21 100644 --- a/drivers/gpu/drm/xe/xe_guc_pagefault.c +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.c @@ -76,11 +76,14 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) PFD_VIRTUAL_ADDR_LO_SHIFT); pf.consumer.asid = FIELD_GET(PFD_ASID, msg[1]); pf.consumer.access_type = FIELD_GET(PFD_ACCESS_TYPE, msg[2]); - pf.consumer.fault_type = FIELD_GET(PFD_FAULT_TYPE, msg[2]); if (FIELD_GET(XE2_PFD_TRVA_FAULT, msg[0])) - pf.consumer.fault_level = XE_PAGEFAULT_LEVEL_NACK; + pf.consumer.fault_type_level = XE_PAGEFAULT_TYPE_LEVEL_NACK; else - pf.consumer.fault_level = FIELD_GET(PFD_FAULT_LEVEL, msg[0]); + pf.consumer.fault_type_level = + FIELD_PREP(XE_PAGEFAULT_LEVEL_MASK, + FIELD_GET(PFD_FAULT_LEVEL, msg[0])) | + FIELD_PREP(XE_PAGEFAULT_TYPE_MASK, + FIELD_GET(PFD_FAULT_TYPE, msg[2])); pf.consumer.engine_class = FIELD_GET(PFD_ENG_CLASS, msg[0]); pf.consumer.engine_instance = FIELD_GET(PFD_ENG_INSTANCE, msg[0]); diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c index 6bee53d6ffc3..72f589fd2b64 100644 --- a/drivers/gpu/drm/xe/xe_pagefault.c +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -164,7 +164,7 @@ static int xe_pagefault_service(struct xe_pagefault *pf) bool atomic; /* Producer flagged this fault to be nacked */ - if (pf->consumer.fault_level == XE_PAGEFAULT_LEVEL_NACK) + if (pf->consumer.fault_type_level == XE_PAGEFAULT_TYPE_LEVEL_NACK) return -EFAULT; vm = xe_pagefault_asid_to_vm(xe, pf->consumer.asid); @@ -225,17 +225,19 @@ static void xe_pagefault_print(struct xe_pagefault *pf) { xe_gt_info(pf->gt, "\n\tASID: %d\n" "\tFaulted Address: 0x%08x%08x\n" - "\tFaultType: %d\n" + "\tFaultType: %lu\n" "\tAccessType: %d\n" - "\tFaultLevel: %d\n" + "\tFaultLevel: %lu\n" "\tEngineClass: %d %s\n" "\tEngineInstance: %d\n", pf->consumer.asid, upper_32_bits(pf->consumer.page_addr), lower_32_bits(pf->consumer.page_addr), - pf->consumer.fault_type, + FIELD_GET(XE_PAGEFAULT_TYPE_MASK, + pf->consumer.fault_type_level), pf->consumer.access_type, - pf->consumer.fault_level, + FIELD_GET(XE_PAGEFAULT_LEVEL_MASK, + pf->consumer.fault_type_level), pf->consumer.engine_class, xe_hw_engine_class_to_str(pf->consumer.engine_class), pf->consumer.engine_instance); diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h index d3b516407d60..0e378f41ede6 100644 --- a/drivers/gpu/drm/xe/xe_pagefault_types.h +++ b/drivers/gpu/drm/xe/xe_pagefault_types.h @@ -73,19 +73,19 @@ struct xe_pagefault { */ u8 access_type; /** - * @consumer.fault_type: fault type, u8 rather than enum to - * keep size compact + * @consumer.fault_type_level: fault type and level, u8 rather + * than enum to keep size compact */ - u8 fault_type; -#define XE_PAGEFAULT_LEVEL_NACK 0xff /* Producer indicates nack fault */ - /** @consumer.fault_level: fault level */ - u8 fault_level; + u8 fault_type_level; +#define XE_PAGEFAULT_TYPE_LEVEL_NACK 0xff /* Producer indicates nack fault */ +#define XE_PAGEFAULT_LEVEL_MASK GENMASK(3, 0) +#define XE_PAGEFAULT_TYPE_MASK GENMASK(7, 4) /** @consumer.engine_class: engine class */ u8 engine_class; /** @consumer.engine_instance: engine instance */ u8 engine_instance; /** consumer.reserved: reserved bits for future expansion */ - u8 reserved[7]; + u64 reserved; } consumer; /** * @producer: State for the producer (i.e., HW/FW interface). Populated From 2405ba53ffe8dad77c530677bfec4c601bd2110a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 12 Feb 2026 12:42:27 -0800 Subject: [PATCH 111/195] drm/xe: Avoid touching consumer fields in GuC pagefault ack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GuC pagefault acknowledgment code is designed to extract the fields needed for the acknowledgment from the producer-stored message so that the consumer fields can be overloaded to return additional information. The ASID is stored in the producer message; extract it from there to future‑proof this logic. Signed-off-by: Matthew Brost Reviewed-by: Francois Dugast Link: https://patch.msgid.link/20260212204227.2764054-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_pagefault.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.c b/drivers/gpu/drm/xe/xe_guc_pagefault.c index 1166b0a5fa21..d48f6ed103bb 100644 --- a/drivers/gpu/drm/xe/xe_guc_pagefault.c +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.c @@ -17,6 +17,7 @@ static void guc_ack_fault(struct xe_pagefault *pf, int err) u32 pdata = FIELD_GET(PFD_PDATA_LO, pf->producer.msg[0]) | (FIELD_GET(PFD_PDATA_HI, pf->producer.msg[1]) << PFD_PDATA_HI_SHIFT); + u32 asid = FIELD_GET(PFD_ASID, pf->producer.msg[1]); u32 action[] = { XE_GUC_ACTION_PAGE_FAULT_RES_DESC, @@ -24,7 +25,7 @@ static void guc_ack_fault(struct xe_pagefault *pf, int err) FIELD_PREP(PFR_SUCCESS, !!err) | FIELD_PREP(PFR_REPLY, PFR_ACCESS) | FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | - FIELD_PREP(PFR_ASID, pf->consumer.asid), + FIELD_PREP(PFR_ASID, asid), FIELD_PREP(PFR_VFID, vfid) | FIELD_PREP(PFR_ENG_INSTANCE, engine_instance) | From 4a175759e30cde8182ae9c65fad5cf35864546a0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 12 Feb 2026 15:12:06 +0200 Subject: [PATCH 112/195] drm/xe: remove unnecessary struct dram_info forward declaration There's no longer any need for the struct dram_info forward declaration. Remove it. Signed-off-by: Jani Nikula Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260212131206.1804113-1-jani.nikula@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_device_types.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 059f026e80d5..8f3ef836541e 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -33,7 +33,6 @@ #define TEST_VM_OPS_ERROR #endif -struct dram_info; struct drm_pagemap_shrinker; struct intel_display; struct intel_dg_nvm_dev; From 7feebdb041a99ff4bfac72f1a18de44de2ed5d63 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 6 Feb 2026 12:21:10 +0100 Subject: [PATCH 113/195] drm/xe: Make xe_ggtt_node offset relative to starting offset Fix all functions that use node->start to use xe_ggtt_node_addr, and add ggtt->start to node->start. This will make node shifting for SR-IOV VF a one-liner, instead of manually changing each GGTT node's base address. Also convert some uses of mutex_lock/unlock to mutex guards. Reviewed-by: Michal Wajdeczko Link: https://patch.msgid.link/20260206112108.1453809-8-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_ggtt.c | 53 +++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 2ab880772847..479df63f31bf 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -299,7 +299,7 @@ static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u64 start, u64 size) { ggtt->start = start; ggtt->size = size; - drm_mm_init(&ggtt->mm, start, size); + drm_mm_init(&ggtt->mm, 0, size); } int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 start, u32 size) @@ -401,7 +401,7 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) /* Display may have allocated inside ggtt, so be careful with clearing here */ mutex_lock(&ggtt->lock); drm_mm_for_each_hole(hole, &ggtt->mm, start, end) - xe_ggtt_clear(ggtt, start, end - start); + xe_ggtt_clear(ggtt, ggtt->start + start, end - start); xe_ggtt_invalidate(ggtt); mutex_unlock(&ggtt->lock); @@ -418,7 +418,7 @@ static void ggtt_node_remove(struct xe_ggtt_node *node) mutex_lock(&ggtt->lock); if (bound) - xe_ggtt_clear(ggtt, node->base.start, node->base.size); + xe_ggtt_clear(ggtt, xe_ggtt_node_addr(node), xe_ggtt_node_size(node)); drm_mm_remove_node(&node->base); node->base.size = 0; mutex_unlock(&ggtt->lock); @@ -570,16 +570,17 @@ int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64 xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); + xe_tile_assert(ggtt->tile, start >= ggtt->start); lockdep_assert_held(&ggtt->lock); node->base.color = 0; - node->base.start = start; + node->base.start = start - ggtt->start; node->base.size = end - start; err = drm_mm_reserve_node(&ggtt->mm, &node->base); if (xe_tile_WARN(ggtt->tile, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", - node->base.start, node->base.start + node->base.size, ERR_PTR(err))) + xe_ggtt_node_addr(node), xe_ggtt_node_addr(node) + node->base.size, ERR_PTR(err))) return err; xe_ggtt_dump_node(ggtt, &node->base, "balloon"); @@ -770,7 +771,7 @@ static void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, if (XE_WARN_ON(!node)) return; - start = node->base.start; + start = xe_ggtt_node_addr(node); end = start + xe_bo_size(bo); if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { @@ -891,6 +892,14 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, } mutex_lock(&ggtt->lock); + xe_tile_assert(ggtt->tile, start >= ggtt->start || !start); + xe_tile_assert(ggtt->tile, end >= ggtt->start); + + if (start) + start -= ggtt->start; + + end -= ggtt->start; + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, xe_bo_size(bo), alignment, 0, start, end, 0); if (err) { @@ -1002,16 +1011,17 @@ static u64 xe_encode_vfid_pte(u16 vfid) return FIELD_PREP(GGTT_PTE_VFID, vfid) | XE_PAGE_PRESENT; } -static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) +static void xe_ggtt_assign_locked(const struct xe_ggtt_node *node, u16 vfid) { - u64 start = node->start; - u64 size = node->size; + struct xe_ggtt *ggtt = node->ggtt; + u64 start = xe_ggtt_node_addr(node); + u64 size = xe_ggtt_node_size(node); u64 end = start + size - 1; u64 pte = xe_encode_vfid_pte(vfid); lockdep_assert_held(&ggtt->lock); - if (!drm_mm_node_allocated(node)) + if (!xe_ggtt_node_allocated(node)) return; while (start < end) { @@ -1033,9 +1043,8 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node */ void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) { - mutex_lock(&node->ggtt->lock); - xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); - mutex_unlock(&node->ggtt->lock); + guard(mutex)(&node->ggtt->lock); + xe_ggtt_assign_locked(node, vfid); } /** @@ -1057,14 +1066,14 @@ int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfi if (!node) return -ENOENT; - guard(mutex)(&node->ggtt->lock); + ggtt = node->ggtt; + guard(mutex)(&ggtt->lock); if (xe_ggtt_node_pt_size(node) != size) return -EINVAL; - ggtt = node->ggtt; - start = node->base.start; - end = start + node->base.size - 1; + start = xe_ggtt_node_addr(node); + end = start + xe_ggtt_node_size(node) - 1; while (start < end) { pte = ggtt->pt_ops->ggtt_get_pte(ggtt, start); @@ -1097,14 +1106,14 @@ int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u if (!node) return -ENOENT; - guard(mutex)(&node->ggtt->lock); + ggtt = node->ggtt; + guard(mutex)(&ggtt->lock); if (xe_ggtt_node_pt_size(node) != size) return -EINVAL; - ggtt = node->ggtt; - start = node->base.start; - end = start + node->base.size - 1; + start = xe_ggtt_node_addr(node); + end = start + xe_ggtt_node_size(node) - 1; while (start < end) { vfid_pte = u64_replace_bits(*buf++, vfid, GGTT_PTE_VFID); @@ -1211,7 +1220,7 @@ u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset) */ u64 xe_ggtt_node_addr(const struct xe_ggtt_node *node) { - return node->base.start; + return node->base.start + node->ggtt->start; } /** From e904c56ba6e0d4eff5f48a70356fd5d764c2a966 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 6 Feb 2026 12:21:11 +0100 Subject: [PATCH 114/195] drm/xe: Rewrite GGTT VF initialization The previous code was using a complicated system with 2 balloons to set GGTT size and adjust GGTT offset. While it works, it's overly complicated. A better approach is to set the offset and size when initializing GGTT, this removes the need for adding balloons. The resize function only needs readjust ggtt->start to have GGTT at the new offset. This removes the need to manipulate the internals of xe_ggtt outside of xe_ggtt, and cleans up a lot of now unneeded code. Co-developed-by: Matthew Brost Reviewed-by: Michal Wajdeczko Link: https://patch.msgid.link/20260206112108.1453809-9-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_ggtt.c | 187 +++++++----------------- drivers/gpu/drm/xe/xe_ggtt.h | 5 +- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 35 +++-- drivers/gpu/drm/xe/xe_tile_sriov_vf.c | 198 +------------------------- drivers/gpu/drm/xe/xe_tile_sriov_vf.h | 3 - 5 files changed, 76 insertions(+), 352 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 479df63f31bf..df9def284c28 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -70,8 +70,8 @@ * struct xe_ggtt_node - A node in GGTT. * * This struct needs to be initialized (only-once) with xe_ggtt_node_init() before any node - * insertion, reservation, or 'ballooning'. - * It will, then, be finalized by either xe_ggtt_node_remove() or xe_ggtt_node_deballoon(). + * insertion or reservation. + * It will, then, be finalized by xe_ggtt_node_remove(). */ struct xe_ggtt_node { /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */ @@ -347,9 +347,15 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) ggtt_start = wopcm; ggtt_size = (gsm_size / 8) * (u64)XE_PAGE_SIZE - ggtt_start; } else { - /* GGTT is expected to be 4GiB */ - ggtt_start = wopcm; - ggtt_size = SZ_4G - ggtt_start; + ggtt_start = xe_tile_sriov_vf_ggtt_base(ggtt->tile); + ggtt_size = xe_tile_sriov_vf_ggtt(ggtt->tile); + + if (ggtt_start < wopcm || + ggtt_start + ggtt_size > GUC_GGTT_TOP) { + xe_tile_err(ggtt->tile, "Invalid GGTT configuration: %#llx-%#llx\n", + ggtt_start, ggtt_start + ggtt_size - 1); + return -ERANGE; + } } ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M; @@ -377,17 +383,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (err) return err; - err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); - if (err) - return err; - - if (IS_SRIOV_VF(xe)) { - err = xe_tile_sriov_vf_prepare_ggtt(ggtt->tile); - if (err) - return err; - } - - return 0; + return devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); } ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */ @@ -538,120 +534,28 @@ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); } -static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, - const struct drm_mm_node *node, const char *description) -{ - char buf[10]; - - if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { - string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf)); - xe_tile_dbg(ggtt->tile, "GGTT %#llx-%#llx (%s) %s\n", - node->start, node->start + node->size, buf, description); - } -} - /** - * xe_ggtt_node_insert_balloon_locked - prevent allocation of specified GGTT addresses - * @node: the &xe_ggtt_node to hold reserved GGTT node - * @start: the starting GGTT address of the reserved region - * @end: then end GGTT address of the reserved region - * - * To be used in cases where ggtt->lock is already taken. - * Use xe_ggtt_node_remove_balloon_locked() to release a reserved GGTT node. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64 end) -{ - struct xe_ggtt *ggtt = node->ggtt; - int err; - - xe_tile_assert(ggtt->tile, start < end); - xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); - xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); - xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); - xe_tile_assert(ggtt->tile, start >= ggtt->start); - lockdep_assert_held(&ggtt->lock); - - node->base.color = 0; - node->base.start = start - ggtt->start; - node->base.size = end - start; - - err = drm_mm_reserve_node(&ggtt->mm, &node->base); - - if (xe_tile_WARN(ggtt->tile, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", - xe_ggtt_node_addr(node), xe_ggtt_node_addr(node) + node->base.size, ERR_PTR(err))) - return err; - - xe_ggtt_dump_node(ggtt, &node->base, "balloon"); - return 0; -} - -/** - * xe_ggtt_node_remove_balloon_locked - release a reserved GGTT region - * @node: the &xe_ggtt_node with reserved GGTT region - * - * To be used in cases where ggtt->lock is already taken. - * See xe_ggtt_node_insert_balloon_locked() for details. - */ -void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node) -{ - if (!xe_ggtt_node_allocated(node)) - return; - - lockdep_assert_held(&node->ggtt->lock); - - xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); - - drm_mm_remove_node(&node->base); -} - -static void xe_ggtt_assert_fit(struct xe_ggtt *ggtt, u64 start, u64 size) -{ - struct xe_tile *tile = ggtt->tile; - - xe_tile_assert(tile, start >= ggtt->start); - xe_tile_assert(tile, start + size <= ggtt->start + ggtt->size); -} - -/** - * xe_ggtt_shift_nodes_locked - Shift GGTT nodes to adjust for a change in usable address range. + * xe_ggtt_shift_nodes() - Shift GGTT nodes to adjust for a change in usable address range. * @ggtt: the &xe_ggtt struct instance - * @shift: change to the location of area provisioned for current VF + * @new_start: new location of area provisioned for current VF * - * This function moves all nodes from the GGTT VM, to a temp list. These nodes are expected - * to represent allocations in range formerly assigned to current VF, before the range changed. - * When the GGTT VM is completely clear of any nodes, they are re-added with shifted offsets. + * Ensure that all struct &xe_ggtt_node are moved to the @new_start base address + * by changing the base offset of the GGTT. * - * The function has no ability of failing - because it shifts existing nodes, without - * any additional processing. If the nodes were successfully existing at the old address, - * they will do the same at the new one. A fail inside this function would indicate that - * the list of nodes was either already damaged, or that the shift brings the address range - * outside of valid bounds. Both cases justify an assert rather than error code. + * This function may be called multiple times during recovery, but if + * @new_start is unchanged from the current base, it's a noop. + * + * @new_start should be a value between xe_wopcm_size() and #GUC_GGTT_TOP. */ -void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift) +void xe_ggtt_shift_nodes(struct xe_ggtt *ggtt, u64 new_start) { - struct xe_tile *tile __maybe_unused = ggtt->tile; - struct drm_mm_node *node, *tmpn; - LIST_HEAD(temp_list_head); + guard(mutex)(&ggtt->lock); - lockdep_assert_held(&ggtt->lock); + xe_tile_assert(ggtt->tile, new_start >= xe_wopcm_size(tile_to_xe(ggtt->tile))); + xe_tile_assert(ggtt->tile, new_start + ggtt->size <= GUC_GGTT_TOP); - if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) - drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) - xe_ggtt_assert_fit(ggtt, node->start + shift, node->size); - - drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) { - drm_mm_remove_node(node); - list_add(&node->node_list, &temp_list_head); - } - - list_for_each_entry_safe(node, tmpn, &temp_list_head, node_list) { - list_del(&node->node_list); - node->start += shift; - drm_mm_reserve_node(&ggtt->mm, node); - xe_tile_assert(tile, drm_mm_node_allocated(node)); - } + /* pairs with READ_ONCE in xe_ggtt_node_addr() */ + WRITE_ONCE(ggtt->start, new_start); } static int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, @@ -692,12 +596,8 @@ int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) * * This function will allocate the struct %xe_ggtt_node and return its pointer. * This struct will then be freed after the node removal upon xe_ggtt_node_remove() - * or xe_ggtt_node_remove_balloon_locked(). - * - * Having %xe_ggtt_node struct allocated doesn't mean that the node is already - * allocated in GGTT. Only xe_ggtt_node_insert(), allocation through - * xe_ggtt_node_insert_transform(), or xe_ggtt_node_insert_balloon_locked() will ensure the node is inserted or reserved - * in GGTT. + * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated + * in GGTT. Only xe_ggtt_node_insert() will ensure the node is inserted or reserved in GGTT. * * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. **/ @@ -718,9 +618,9 @@ struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) * xe_ggtt_node_fini - Forcebly finalize %xe_ggtt_node struct * @node: the &xe_ggtt_node to be freed * - * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), - * or xe_ggtt_node_insert_balloon_locked(); and this @node is not going to be reused, then, - * this function needs to be called to free the %xe_ggtt_node struct + * If anything went wrong with either xe_ggtt_node_insert() and this @node is + * not going to be reused, then this function needs to be called to free the + * %xe_ggtt_node struct **/ void xe_ggtt_node_fini(struct xe_ggtt_node *node) { @@ -892,13 +792,25 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, } mutex_lock(&ggtt->lock); - xe_tile_assert(ggtt->tile, start >= ggtt->start || !start); - xe_tile_assert(ggtt->tile, end >= ggtt->start); - - if (start) + /* + * When inheriting the initial framebuffer, the framebuffer is + * physically located at VRAM address 0, and usually at GGTT address 0 too. + * + * The display code will ask for a GGTT allocation between end of BO and + * remainder of GGTT, unaware that the start is reserved by WOPCM. + */ + if (start >= ggtt->start) start -= ggtt->start; + else + start = 0; - end -= ggtt->start; + /* Should never happen, but since we handle start, fail graciously for end */ + if (end >= ggtt->start) + end -= ggtt->start; + else + end = 0; + + xe_tile_assert(ggtt->tile, end >= start + xe_bo_size(bo)); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, xe_bo_size(bo), alignment, 0, start, end, 0); @@ -1220,7 +1132,8 @@ u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset) */ u64 xe_ggtt_node_addr(const struct xe_ggtt_node *node) { - return node->base.start + node->ggtt->start; + /* pairs with WRITE_ONCE in xe_ggtt_shift_nodes() */ + return node->base.start + READ_ONCE(node->ggtt->start); } /** diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 70d5e07ac4b6..49ea8e7ecc10 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -19,10 +19,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt); struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); void xe_ggtt_node_fini(struct xe_ggtt_node *node); -int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, - u64 start, u64 size); -void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node); -void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift); +void xe_ggtt_shift_nodes(struct xe_ggtt *ggtt, u64 new_base); u64 xe_ggtt_start(struct xe_ggtt *ggtt); u64 xe_ggtt_size(struct xe_ggtt *ggtt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 30e8c2cf5f09..527ded3c9c22 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -488,16 +488,12 @@ u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt) static int vf_get_ggtt_info(struct xe_gt *gt) { struct xe_tile *tile = gt_to_tile(gt); - struct xe_ggtt *ggtt = tile->mem.ggtt; struct xe_guc *guc = >->uc.guc; u64 start, size, ggtt_size; - s64 shift; int err; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - guard(mutex)(&ggtt->lock); - err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_START_KEY, &start); if (unlikely(err)) return err; @@ -509,8 +505,21 @@ static int vf_get_ggtt_info(struct xe_gt *gt) if (!size) return -ENODATA; + xe_tile_sriov_vf_ggtt_base_store(tile, start); ggtt_size = xe_tile_sriov_vf_ggtt(tile); - if (ggtt_size && ggtt_size != size) { + if (!ggtt_size) { + /* + * This function is called once during xe_guc_init_noalloc(), + * at which point ggtt_size = 0 and we have to initialize everything, + * and GGTT is not yet initialized. + * + * Return early as there's nothing to fixup. + */ + xe_tile_sriov_vf_ggtt_store(tile, size); + return 0; + } + + if (ggtt_size != size) { xe_gt_sriov_err(gt, "Unexpected GGTT reassignment: %lluK != %lluK\n", size / SZ_1K, ggtt_size / SZ_1K); return -EREMCHG; @@ -519,15 +528,13 @@ static int vf_get_ggtt_info(struct xe_gt *gt) xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n", start, start + size - 1, size / SZ_1K); - shift = start - (s64)xe_tile_sriov_vf_ggtt_base(tile); - xe_tile_sriov_vf_ggtt_base_store(tile, start); - xe_tile_sriov_vf_ggtt_store(tile, size); - - if (shift && shift != start) { - xe_gt_sriov_info(gt, "Shifting GGTT base by %lld to 0x%016llx\n", - shift, start); - xe_tile_sriov_vf_fixup_ggtt_nodes_locked(gt_to_tile(gt), shift); - } + /* + * This function can be called repeatedly from post migration fixups, + * at which point we inform the GGTT of the new base address. + * xe_ggtt_shift_nodes() may be called multiple times for each migration, + * but will be a noop if the base is unchanged. + */ + xe_ggtt_shift_nodes(tile->mem.ggtt, start); if (xe_sriov_vf_migration_supported(gt_to_xe(gt))) { WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false); diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.c b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c index c9bac2cfdd04..24293521e090 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c @@ -14,173 +14,12 @@ #include "xe_tile_sriov_vf.h" #include "xe_wopcm.h" -static int vf_init_ggtt_balloons(struct xe_tile *tile) -{ - struct xe_ggtt *ggtt = tile->mem.ggtt; - - xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - - tile->sriov.vf.ggtt_balloon[0] = xe_ggtt_node_init(ggtt); - if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) - return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); - - tile->sriov.vf.ggtt_balloon[1] = xe_ggtt_node_init(ggtt); - if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { - xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]); - return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); - } - - return 0; -} - -/** - * xe_tile_sriov_vf_balloon_ggtt_locked - Insert balloon nodes to limit used GGTT address range. - * @tile: the &xe_tile struct instance - * - * Return: 0 on success or a negative error code on failure. - */ -static int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile) -{ - u64 ggtt_base = tile->sriov.vf.self_config.ggtt_base; - u64 ggtt_size = tile->sriov.vf.self_config.ggtt_size; - struct xe_device *xe = tile_to_xe(tile); - u64 wopcm = xe_wopcm_size(xe); - u64 start, end; - int err; - - xe_tile_assert(tile, IS_SRIOV_VF(xe)); - xe_tile_assert(tile, ggtt_size); - lockdep_assert_held(&tile->mem.ggtt->lock); - - /* - * VF can only use part of the GGTT as allocated by the PF: - * - * WOPCM GUC_GGTT_TOP - * |<------------ Total GGTT size ------------------>| - * - * VF GGTT base -->|<- size ->| - * - * +--------------------+----------+-----------------+ - * |////////////////////| block |\\\\\\\\\\\\\\\\\| - * +--------------------+----------+-----------------+ - * - * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->| - */ - - if (ggtt_base < wopcm || ggtt_base > GUC_GGTT_TOP || - ggtt_size > GUC_GGTT_TOP - ggtt_base) { - xe_sriov_err(xe, "tile%u: Invalid GGTT configuration: %#llx-%#llx\n", - tile->id, ggtt_base, ggtt_base + ggtt_size - 1); - return -ERANGE; - } - - start = wopcm; - end = ggtt_base; - if (end != start) { - err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[0], - start, end); - if (err) - return err; - } - - start = ggtt_base + ggtt_size; - end = GUC_GGTT_TOP; - if (end != start) { - err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[1], - start, end); - if (err) { - xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]); - return err; - } - } - - return 0; -} - -static int vf_balloon_ggtt(struct xe_tile *tile) -{ - struct xe_ggtt *ggtt = tile->mem.ggtt; - int err; - - mutex_lock(&ggtt->lock); - err = xe_tile_sriov_vf_balloon_ggtt_locked(tile); - mutex_unlock(&ggtt->lock); - - return err; -} - -/** - * xe_tile_sriov_vf_deballoon_ggtt_locked - Remove balloon nodes. - * @tile: the &xe_tile struct instance - */ -void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile) -{ - xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - - xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]); -} - -static void vf_deballoon_ggtt(struct xe_tile *tile) -{ - mutex_lock(&tile->mem.ggtt->lock); - xe_tile_sriov_vf_deballoon_ggtt_locked(tile); - mutex_unlock(&tile->mem.ggtt->lock); -} - -static void vf_fini_ggtt_balloons(struct xe_tile *tile) -{ - xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - - xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]); -} - -static void cleanup_ggtt(struct drm_device *drm, void *arg) -{ - struct xe_tile *tile = arg; - - vf_deballoon_ggtt(tile); - vf_fini_ggtt_balloons(tile); -} - -/** - * xe_tile_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration. - * @tile: the &xe_tile - * - * This function is for VF use only. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile) -{ - struct xe_device *xe = tile_to_xe(tile); - int err; - - err = vf_init_ggtt_balloons(tile); - if (err) - return err; - - err = vf_balloon_ggtt(tile); - if (err) { - vf_fini_ggtt_balloons(tile); - return err; - } - - return drmm_add_action_or_reset(&xe->drm, cleanup_ggtt, tile); -} - /** * DOC: GGTT nodes shifting during VF post-migration recovery * * The first fixup applied to the VF KMD structures as part of post-migration * recovery is shifting nodes within &xe_ggtt instance. The nodes are moved * from range previously assigned to this VF, into newly provisioned area. - * The changes include balloons, which are resized accordingly. - * - * The balloon nodes are there to eliminate unavailable ranges from use: one - * reserves the GGTT area below the range for current VF, and another one - * reserves area above. * * Below is a GGTT layout of example VF, with a certain address range assigned to * said VF, and inaccessible areas above and below: @@ -198,10 +37,6 @@ int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile) * * |<------- inaccessible for VF ------->||<-- inaccessible for VF ->| * - * GGTT nodes used for tracking allocations: - * - * |<---------- balloon ------------>|<- nodes->|<----- balloon ------>| - * * After the migration, GGTT area assigned to the VF might have shifted, either * to lower or to higher address. But we expect the total size and extra areas to * be identical, as migration can only happen between matching platforms. @@ -219,37 +54,12 @@ int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile) * So the VF has a new slice of GGTT assigned, and during migration process, the * memory content was copied to that new area. But the &xe_ggtt nodes are still * tracking allocations using the old addresses. The nodes within VF owned area - * have to be shifted, and balloon nodes need to be resized to properly mask out - * areas not owned by the VF. + * have to be shifted, and the start offset for GGTT adjusted. * - * Fixed &xe_ggtt nodes used for tracking allocations: - * - * |<------ balloon ------>|<- nodes->|<----------- balloon ----------->| - * - * Due to use of GPU profiles, we do not expect the old and new GGTT ares to + * Due to use of GPU profiles, we do not expect the old and new GGTT areas to * overlap; but our node shifting will fix addresses properly regardless. */ -/** - * xe_tile_sriov_vf_fixup_ggtt_nodes_locked - Shift GGTT allocations to match assigned range. - * @tile: the &xe_tile struct instance - * @shift: the shift value - * - * Since Global GTT is not virtualized, each VF has an assigned range - * within the global space. This range might have changed during migration, - * which requires all memory addresses pointing to GGTT to be shifted. - */ -void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift) -{ - struct xe_ggtt *ggtt = tile->mem.ggtt; - - lockdep_assert_held(&ggtt->lock); - - xe_tile_sriov_vf_deballoon_ggtt_locked(tile); - xe_ggtt_shift_nodes_locked(ggtt, shift); - xe_tile_sriov_vf_balloon_ggtt_locked(tile); -} - /** * xe_tile_sriov_vf_lmem - VF LMEM configuration. * @tile: the &xe_tile @@ -330,7 +140,7 @@ u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile) xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - return config->ggtt_base; + return READ_ONCE(config->ggtt_base); } /** @@ -346,5 +156,5 @@ void xe_tile_sriov_vf_ggtt_base_store(struct xe_tile *tile, u64 ggtt_base) xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - config->ggtt_base = ggtt_base; + WRITE_ONCE(config->ggtt_base, ggtt_base); } diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.h b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h index 749f41504883..f2bbc4fc5734 100644 --- a/drivers/gpu/drm/xe/xe_tile_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h @@ -10,9 +10,6 @@ struct xe_tile; -int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile); -void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile); -void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift); u64 xe_tile_sriov_vf_ggtt(struct xe_tile *tile); void xe_tile_sriov_vf_ggtt_store(struct xe_tile *tile, u64 ggtt_size); u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile); From 95f5f9a96dcfb2982af28d0915598bad3abb8b86 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 6 Feb 2026 12:21:12 +0100 Subject: [PATCH 115/195] drm/xe: Move struct xe_ggtt to xe_ggtt.c No users left outside of xe_ggtt.c, so we can make the struct private. This prevents us from accidentally touching it before init. Reviewed-by: Matthew Brost Reviewed-by: Michal Wajdeczko Link: https://patch.msgid.link/20260206112108.1453809-10-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_ggtt.c | 55 +++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_ggtt.h | 1 + drivers/gpu/drm/xe/xe_ggtt_types.h | 60 +----------------------------- 3 files changed, 58 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index df9def284c28..3ea94d9c257c 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -84,6 +84,61 @@ struct xe_ggtt_node { bool invalidate_on_remove; }; +/** + * struct xe_ggtt_pt_ops - GGTT Page table operations + * Which can vary from platform to platform. + */ +struct xe_ggtt_pt_ops { + /** @pte_encode_flags: Encode PTE flags for a given BO */ + u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index); + + /** @ggtt_set_pte: Directly write into GGTT's PTE */ + xe_ggtt_set_pte_fn ggtt_set_pte; + + /** @ggtt_get_pte: Directly read from GGTT's PTE */ + u64 (*ggtt_get_pte)(struct xe_ggtt *ggtt, u64 addr); +}; + +/** + * struct xe_ggtt - Main GGTT struct + * + * In general, each tile can contains its own Global Graphics Translation Table + * (GGTT) instance. + */ +struct xe_ggtt { + /** @tile: Back pointer to tile where this GGTT belongs */ + struct xe_tile *tile; + /** @start: Start offset of GGTT */ + u64 start; + /** @size: Total usable size of this GGTT */ + u64 size; + +#define XE_GGTT_FLAGS_64K BIT(0) + /** + * @flags: Flags for this GGTT + * Acceptable flags: + * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. + */ + unsigned int flags; + /** @scratch: Internal object allocation used as a scratch page */ + struct xe_bo *scratch; + /** @lock: Mutex lock to protect GGTT data */ + struct mutex lock; + /** + * @gsm: The iomem pointer to the actual location of the translation + * table located in the GSM for easy PTE manipulation + */ + u64 __iomem *gsm; + /** @pt_ops: Page Table operations per platform */ + const struct xe_ggtt_pt_ops *pt_ops; + /** @mm: The memory manager used to manage individual GGTT allocations */ + struct drm_mm mm; + /** @access_count: counts GGTT writes */ + unsigned int access_count; + /** @wq: Dedicated unordered work queue to process node removals */ + struct workqueue_struct *wq; +}; + static u64 xelp_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) { u64 pte = XE_PAGE_PRESENT; diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 49ea8e7ecc10..403eb5c0db49 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -9,6 +9,7 @@ #include "xe_ggtt_types.h" struct drm_printer; +struct xe_bo; struct xe_tile; struct drm_exec; diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index d82b71a198bc..cf754e4d502a 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -6,72 +6,16 @@ #ifndef _XE_GGTT_TYPES_H_ #define _XE_GGTT_TYPES_H_ +#include #include -#include "xe_pt_types.h" - -struct xe_bo; +struct xe_ggtt; struct xe_ggtt_node; -struct xe_gt; - -/** - * struct xe_ggtt - Main GGTT struct - * - * In general, each tile can contains its own Global Graphics Translation Table - * (GGTT) instance. - */ -struct xe_ggtt { - /** @tile: Back pointer to tile where this GGTT belongs */ - struct xe_tile *tile; - /** @start: Start offset of GGTT */ - u64 start; - /** @size: Total usable size of this GGTT */ - u64 size; - -#define XE_GGTT_FLAGS_64K BIT(0) - /** - * @flags: Flags for this GGTT - * Acceptable flags: - * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. - */ - unsigned int flags; - /** @scratch: Internal object allocation used as a scratch page */ - struct xe_bo *scratch; - /** @lock: Mutex lock to protect GGTT data */ - struct mutex lock; - /** - * @gsm: The iomem pointer to the actual location of the translation - * table located in the GSM for easy PTE manipulation - */ - u64 __iomem *gsm; - /** @pt_ops: Page Table operations per platform */ - const struct xe_ggtt_pt_ops *pt_ops; - /** @mm: The memory manager used to manage individual GGTT allocations */ - struct drm_mm mm; - /** @access_count: counts GGTT writes */ - unsigned int access_count; - /** @wq: Dedicated unordered work queue to process node removals */ - struct workqueue_struct *wq; -}; typedef void (*xe_ggtt_set_pte_fn)(struct xe_ggtt *ggtt, u64 addr, u64 pte); typedef void (*xe_ggtt_transform_cb)(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u64 pte_flags, xe_ggtt_set_pte_fn set_pte, void *arg); -/** - * struct xe_ggtt_pt_ops - GGTT Page table operations - * Which can vary from platform to platform. - */ -struct xe_ggtt_pt_ops { - /** @pte_encode_flags: Encode PTE flags for a given BO */ - u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index); - - /** @ggtt_set_pte: Directly write into GGTT's PTE */ - xe_ggtt_set_pte_fn ggtt_set_pte; - - /** @ggtt_get_pte: Directly read from GGTT's PTE */ - u64 (*ggtt_get_pte)(struct xe_ggtt *ggtt, u64 addr); -}; #endif From a4eac88e313529e6c0bc67e28061a481b08a6477 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 6 Feb 2026 12:21:13 +0100 Subject: [PATCH 116/195] drm/xe: Make xe_ggtt_node_insert return a node This extra step is easier to handle inside xe_ggtt.c and makes xe_ggtt_node_allocated a simple null check instead, as the intermediate state 'allocated but not inserted' is no longer used. Privatize xe_ggtt_node_fini() and init() as they're no longer used outside of xe_ggtt.c Reviewed-by: Matthew Brost #v1 Reviewed-by: Michal Wajdeczko Link: https://patch.msgid.link/20260206112108.1453809-11-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 2 +- drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c | 6 +- drivers/gpu/drm/xe/xe_ggtt.c | 97 +++++++++------------ drivers/gpu/drm/xe/xe_ggtt.h | 7 +- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 24 +---- 5 files changed, 48 insertions(+), 88 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 77b05b556ba6..03e93aeabdbd 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -256,7 +256,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, size = intel_rotation_info_size(&view->rotated) * XE_PAGE_SIZE; pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]); - vma->node = xe_ggtt_node_insert_transform(ggtt, bo, pte, + vma->node = xe_ggtt_insert_node_transform(ggtt, bo, pte, ALIGN(size, align), align, view->type == I915_GTT_VIEW_NORMAL ? NULL : write_ggtt_rotated_node, diff --git a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c index acddbedcf17c..51e1e04001ac 100644 --- a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c +++ b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c @@ -38,12 +38,8 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * if (flags & XE_BO_FLAG_GGTT) { struct xe_ggtt *ggtt = tile->mem.ggtt; - bo->ggtt_node[tile->id] = xe_ggtt_node_init(ggtt); + bo->ggtt_node[tile->id] = xe_ggtt_insert_node(ggtt, xe_bo_size(bo), SZ_4K); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo->ggtt_node[tile->id]); - - KUNIT_ASSERT_EQ(test, 0, - xe_ggtt_node_insert(bo->ggtt_node[tile->id], - xe_bo_size(bo), SZ_4K)); } return bo; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 3ea94d9c257c..9eec820b7b8d 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -69,9 +69,8 @@ /** * struct xe_ggtt_node - A node in GGTT. * - * This struct needs to be initialized (only-once) with xe_ggtt_node_init() before any node - * insertion or reservation. - * It will, then, be finalized by xe_ggtt_node_remove(). + * This struct is allocated with xe_ggtt_insert_node(,_transform) or xe_ggtt_insert_bo(,_at). + * It will be deallocated using xe_ggtt_node_remove(). */ struct xe_ggtt_node { /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */ @@ -458,6 +457,11 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) mutex_unlock(&ggtt->lock); } +static void ggtt_node_fini(struct xe_ggtt_node *node) +{ + kfree(node); +} + static void ggtt_node_remove(struct xe_ggtt_node *node) { struct xe_ggtt *ggtt = node->ggtt; @@ -483,7 +487,7 @@ static void ggtt_node_remove(struct xe_ggtt_node *node) drm_dev_exit(idx); free_node: - xe_ggtt_node_fini(node); + ggtt_node_fini(node); } static void ggtt_node_remove_work_func(struct work_struct *work) @@ -613,50 +617,14 @@ void xe_ggtt_shift_nodes(struct xe_ggtt *ggtt, u64 new_start) WRITE_ONCE(ggtt->start, new_start); } -static int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, +static int xe_ggtt_insert_node_locked(struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags) { return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0, mm_flags); } -/** - * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT - * @node: the &xe_ggtt_node to be inserted - * @size: size of the node - * @align: alignment constrain of the node - * - * It cannot be called without first having called xe_ggtt_init() once. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) -{ - int ret; - - if (!node || !node->ggtt) - return -ENOENT; - - mutex_lock(&node->ggtt->lock); - ret = xe_ggtt_node_insert_locked(node, size, align, - DRM_MM_INSERT_HIGH); - mutex_unlock(&node->ggtt->lock); - - return ret; -} - -/** - * xe_ggtt_node_init - Initialize %xe_ggtt_node struct - * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. - * - * This function will allocate the struct %xe_ggtt_node and return its pointer. - * This struct will then be freed after the node removal upon xe_ggtt_node_remove() - * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated - * in GGTT. Only xe_ggtt_node_insert() will ensure the node is inserted or reserved in GGTT. - * - * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. - **/ -struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) +static struct xe_ggtt_node *ggtt_node_init(struct xe_ggtt *ggtt) { struct xe_ggtt_node *node = kzalloc(sizeof(*node), GFP_NOFS); @@ -670,16 +638,31 @@ struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) } /** - * xe_ggtt_node_fini - Forcebly finalize %xe_ggtt_node struct - * @node: the &xe_ggtt_node to be freed + * xe_ggtt_insert_node - Insert a &xe_ggtt_node into the GGTT + * @ggtt: the &xe_ggtt into which the node should be inserted. + * @size: size of the node + * @align: alignment constrain of the node * - * If anything went wrong with either xe_ggtt_node_insert() and this @node is - * not going to be reused, then this function needs to be called to free the - * %xe_ggtt_node struct - **/ -void xe_ggtt_node_fini(struct xe_ggtt_node *node) + * Return: &xe_ggtt_node on success or a ERR_PTR on failure. + */ +struct xe_ggtt_node *xe_ggtt_insert_node(struct xe_ggtt *ggtt, u32 size, u32 align) { - kfree(node); + struct xe_ggtt_node *node; + int ret; + + node = ggtt_node_init(ggtt); + if (IS_ERR(node)) + return node; + + guard(mutex)(&ggtt->lock); + ret = xe_ggtt_insert_node_locked(node, size, align, + DRM_MM_INSERT_HIGH); + if (ret) { + ggtt_node_fini(node); + return ERR_PTR(ret); + } + + return node; } /** @@ -767,7 +750,7 @@ void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) } /** - * xe_ggtt_node_insert_transform - Insert a newly allocated &xe_ggtt_node into the GGTT + * xe_ggtt_insert_node_transform - Insert a newly allocated &xe_ggtt_node into the GGTT * @ggtt: the &xe_ggtt where the node will inserted/reserved. * @bo: The bo to be transformed * @pte_flags: The extra GGTT flags to add to mapping. @@ -781,7 +764,7 @@ void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) * * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. */ -struct xe_ggtt_node *xe_ggtt_node_insert_transform(struct xe_ggtt *ggtt, +struct xe_ggtt_node *xe_ggtt_insert_node_transform(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 pte_flags, u64 size, u32 align, xe_ggtt_transform_cb transform, void *arg) @@ -789,7 +772,7 @@ struct xe_ggtt_node *xe_ggtt_node_insert_transform(struct xe_ggtt *ggtt, struct xe_ggtt_node *node; int ret; - node = xe_ggtt_node_init(ggtt); + node = ggtt_node_init(ggtt); if (IS_ERR(node)) return ERR_CAST(node); @@ -798,7 +781,7 @@ struct xe_ggtt_node *xe_ggtt_node_insert_transform(struct xe_ggtt *ggtt, goto err; } - ret = xe_ggtt_node_insert_locked(node, size, align, 0); + ret = xe_ggtt_insert_node_locked(node, size, align, 0); if (ret) goto err_unlock; @@ -813,7 +796,7 @@ struct xe_ggtt_node *xe_ggtt_node_insert_transform(struct xe_ggtt *ggtt, err_unlock: mutex_unlock(&ggtt->lock); err: - xe_ggtt_node_fini(node); + ggtt_node_fini(node); return ERR_PTR(ret); } @@ -839,7 +822,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); - bo->ggtt_node[tile_id] = xe_ggtt_node_init(ggtt); + bo->ggtt_node[tile_id] = ggtt_node_init(ggtt); if (IS_ERR(bo->ggtt_node[tile_id])) { err = PTR_ERR(bo->ggtt_node[tile_id]); bo->ggtt_node[tile_id] = NULL; @@ -870,7 +853,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, xe_bo_size(bo), alignment, 0, start, end, 0); if (err) { - xe_ggtt_node_fini(bo->ggtt_node[tile_id]); + ggtt_node_fini(bo->ggtt_node[tile_id]); bo->ggtt_node[tile_id] = NULL; } else { u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 403eb5c0db49..9e6210c6f44e 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -18,15 +18,14 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size); int xe_ggtt_init(struct xe_ggtt *ggtt); -struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); -void xe_ggtt_node_fini(struct xe_ggtt_node *node); void xe_ggtt_shift_nodes(struct xe_ggtt *ggtt, u64 new_base); u64 xe_ggtt_start(struct xe_ggtt *ggtt); u64 xe_ggtt_size(struct xe_ggtt *ggtt); -int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align); struct xe_ggtt_node * -xe_ggtt_node_insert_transform(struct xe_ggtt *ggtt, +xe_ggtt_insert_node(struct xe_ggtt *ggtt, u32 size, u32 align); +struct xe_ggtt_node * +xe_ggtt_insert_node_transform(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 pte, u64 size, u32 align, xe_ggtt_transform_cb transform, void *arg); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 23601ce79348..3fe664cd3b88 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -482,23 +482,9 @@ static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u6 return err ?: err2; } -static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) -{ - if (xe_ggtt_node_allocated(node)) { - /* - * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() - * is redundant, as PTE will be implicitly re-assigned to PF by - * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). - */ - xe_ggtt_node_remove(node, false); - } else { - xe_ggtt_node_fini(node); - } -} - static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config) { - pf_release_ggtt(gt_to_tile(gt), config->ggtt_region); + xe_ggtt_node_remove(config->ggtt_region, false); config->ggtt_region = NULL; } @@ -533,14 +519,10 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) if (!size) return 0; - node = xe_ggtt_node_init(ggtt); + node = xe_ggtt_insert_node(ggtt, size, alignment); if (IS_ERR(node)) return PTR_ERR(node); - err = xe_ggtt_node_insert(node, size, alignment); - if (unlikely(err)) - goto err; - xe_ggtt_assign(node, vfid); xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n", vfid, xe_ggtt_node_addr(node), xe_ggtt_node_addr(node) + size - 1); @@ -552,7 +534,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) config->ggtt_region = node; return 0; err: - pf_release_ggtt(tile, node); + xe_ggtt_node_remove(node, false); return err; } From 08d05c736605fb3dd3852a37c8bf20cd0fc2e08b Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 6 Feb 2026 12:21:14 +0100 Subject: [PATCH 117/195] drm/xe: Remove xe_ggtt_node_allocated With the intermediate state gone, no longer useful. Just check against NULL where needed. After looking carefully, the check for allocated in xe_fb_pin.c is unneeded. vma->node is never NULL. The check is specifically only to check if vma->node == the bo's root tile ggtt_obj. Reviewed-by: Michal Wajdeczko Link: https://patch.msgid.link/20260206112108.1453809-12-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 3 +-- drivers/gpu/drm/xe/xe_ggtt.c | 17 ----------------- drivers/gpu/drm/xe/xe_ggtt.h | 1 - drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 12 ++++++------ 4 files changed, 7 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 03e93aeabdbd..e53fc2bb59ce 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -352,8 +352,7 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma) if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node[tile_id]) || - vma->bo->ggtt_node[tile_id] != vma->node) + else if (vma->bo->ggtt_node[tile_id] != vma->node) xe_ggtt_node_remove(vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 9eec820b7b8d..79310f565fe3 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -665,20 +665,6 @@ struct xe_ggtt_node *xe_ggtt_insert_node(struct xe_ggtt *ggtt, u32 size, u32 ali return node; } -/** - * xe_ggtt_node_allocated - Check if node is allocated in GGTT - * @node: the &xe_ggtt_node to be inspected - * - * Return: True if allocated, False otherwise. - */ -bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) -{ - if (!node || !node->ggtt) - return false; - - return drm_mm_node_allocated(&node->base); -} - /** * xe_ggtt_node_pt_size() - Get the size of page table entries needed to map a GGTT node. * @node: the &xe_ggtt_node @@ -971,9 +957,6 @@ static void xe_ggtt_assign_locked(const struct xe_ggtt_node *node, u16 vfid) lockdep_assert_held(&ggtt->lock); - if (!xe_ggtt_node_allocated(node)) - return; - while (start < end) { ggtt->pt_ops->ggtt_set_pte(ggtt, start, pte); start += XE_PAGE_SIZE; diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 9e6210c6f44e..c864cc975a69 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -30,7 +30,6 @@ xe_ggtt_insert_node_transform(struct xe_ggtt *ggtt, u64 size, u32 align, xe_ggtt_transform_cb transform, void *arg); void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); -bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node); void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, struct drm_exec *exec); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 3fe664cd3b88..888193e1d2c5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -279,7 +279,7 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, { struct xe_ggtt_node *node = config->ggtt_region; - if (!xe_ggtt_node_allocated(node)) + if (!node) return 0; return encode_ggtt(cfg, xe_ggtt_node_addr(node), xe_ggtt_node_size(node), details); @@ -503,7 +503,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) size = round_up(size, alignment); - if (xe_ggtt_node_allocated(config->ggtt_region)) { + if (config->ggtt_region) { err = pf_distribute_config_ggtt(tile, vfid, 0, 0); if (unlikely(err)) return err; @@ -514,7 +514,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) if (unlikely(err)) return err; } - xe_gt_assert(gt, !xe_ggtt_node_allocated(config->ggtt_region)); + xe_gt_assert(gt, !config->ggtt_region); if (!size) return 0; @@ -544,7 +544,7 @@ static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) struct xe_ggtt_node *node = config->ggtt_region; xe_gt_assert(gt, xe_gt_is_main_type(gt)); - return xe_ggtt_node_allocated(node) ? xe_ggtt_node_size(node) : 0; + return node ? xe_ggtt_node_size(node) : 0; } /** @@ -2558,7 +2558,7 @@ int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool forc static void pf_sanitize_ggtt(struct xe_ggtt_node *ggtt_region, unsigned int vfid) { - if (xe_ggtt_node_allocated(ggtt_region)) + if (ggtt_region) xe_ggtt_assign(ggtt_region, vfid); } @@ -3017,7 +3017,7 @@ int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) for (n = 1; n <= total_vfs; n++) { config = >->sriov.pf.vfs[n].config; - if (!xe_ggtt_node_allocated(config->ggtt_region)) + if (!config->ggtt_region) continue; string_get_size(xe_ggtt_node_size(config->ggtt_region), 1, STRING_UNITS_2, From 95162db0208aee122d10ac1342fe97a1721cd258 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Feb 2026 14:46:01 +0100 Subject: [PATCH 118/195] drm/pagemap: pass pagemap_addr by reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passing a structure by value into a function is sometimes problematic, for a number of reasons. Of of these is a warning from the 32-bit arm compiler: drivers/gpu/drm/drm_gpusvm.c: In function '__drm_gpusvm_unmap_pages': drivers/gpu/drm/drm_gpusvm.c:1152:33: note: parameter passing for argument of type 'struct drm_pagemap_addr' changed in GCC 9.1 1152 | dpagemap->ops->device_unmap(dpagemap, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1153 | dev, *addr); | ~~~~~~~~~~~ This particular problem is harmless since we are not mixing compiler versions inside of the compiler. However, passing this by reference avoids the warning along with providing slightly better calling conventions as it avoids an extra copy on the stack. Fixes: 75af93b3f5d0 ("drm/pagemap, drm/xe: Support destination migration over interconnect") Fixes: 2df55d9e66a2 ("drm/xe: Support pcie p2p dma as a fast interconnect") Signed-off-by: Arnd Bergmann Reviewed-by: Thomas Hellström Signed-off-by: Thomas Hellström Link: https://patch.msgid.link/20260216134644.1025365-1-arnd@kernel.org Acked-by: Maarten Lankhorst --- drivers/gpu/drm/drm_gpusvm.c | 2 +- drivers/gpu/drm/drm_pagemap.c | 2 +- drivers/gpu/drm/xe/xe_svm.c | 8 ++++---- include/drm/drm_pagemap.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index c25f50cad6fe..81626b00b755 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1150,7 +1150,7 @@ static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, addr->dir); else if (dpagemap && dpagemap->ops->device_unmap) dpagemap->ops->device_unmap(dpagemap, - dev, *addr); + dev, addr); i += 1 << addr->order; } diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index aa43a8475100..5f28f035bb1f 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -318,7 +318,7 @@ static void drm_pagemap_migrate_unmap_pages(struct device *dev, struct drm_pagemap_zdd *zdd = page->zone_device_data; struct drm_pagemap *dpagemap = zdd->dpagemap; - dpagemap->ops->device_unmap(dpagemap, dev, pagemap_addr[i]); + dpagemap->ops->device_unmap(dpagemap, dev, &pagemap_addr[i]); } else { dma_unmap_page(dev, pagemap_addr[i].addr, PAGE_SIZE << pagemap_addr[i].order, dir); diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 213f0334518a..78f4b2c60670 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -1676,13 +1676,13 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, static void xe_drm_pagemap_device_unmap(struct drm_pagemap *dpagemap, struct device *dev, - struct drm_pagemap_addr addr) + const struct drm_pagemap_addr *addr) { - if (addr.proto != XE_INTERCONNECT_P2P) + if (addr->proto != XE_INTERCONNECT_P2P) return; - dma_unmap_resource(dev, addr.addr, PAGE_SIZE << addr.order, - addr.dir, DMA_ATTR_SKIP_CPU_SYNC); + dma_unmap_resource(dev, addr->addr, PAGE_SIZE << addr->order, + addr->dir, DMA_ATTR_SKIP_CPU_SYNC); } static void xe_pagemap_destroy_work(struct work_struct *work) diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h index 2baf0861f78f..c848f578e3da 100644 --- a/include/drm/drm_pagemap.h +++ b/include/drm/drm_pagemap.h @@ -95,7 +95,7 @@ struct drm_pagemap_ops { */ void (*device_unmap)(struct drm_pagemap *dpagemap, struct device *dev, - struct drm_pagemap_addr addr); + const struct drm_pagemap_addr *addr); /** * @populate_mm: Populate part of the mm with @dpagemap memory, From 48eb073c7d95883eca2789447f94e1e8cafbabe5 Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Fri, 6 Feb 2026 13:46:55 +0530 Subject: [PATCH 119/195] drm/xe/hwmon: Prevent unintended VRAM channel creation Remove the unnecessary VRAM channel entry introduced in xe_hwmon_channel. Without this, adding any new hwmon channel causes extra VRAM channel to appear. This remained unnoticed earlier because VRAM was the final xe hwmon channel. v2: Use MAX_VRAM_CHANNELS with in_range() instead of CHANNEL_VRAM_N_MAX. (Raag) Fixes: 49a498338417 ("drm/xe/hwmon: Expose individual VRAM channel temperature") Signed-off-by: Karthik Poosa Reviewed-by: Raag Jadav Link: https://patch.msgid.link/20260206081655.2115439-1-karthik.poosa@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hwmon.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index baf277955b33..0fd4d4f1014a 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -48,7 +48,7 @@ enum xe_hwmon_channel { CHANNEL_MCTRL, CHANNEL_PCIE, CHANNEL_VRAM_N, - CHANNEL_VRAM_N_MAX = CHANNEL_VRAM_N + MAX_VRAM_CHANNELS, + CHANNEL_VRAM_N_MAX = CHANNEL_VRAM_N + MAX_VRAM_CHANNELS - 1, CHANNEL_MAX, }; @@ -264,7 +264,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg return BMG_PACKAGE_TEMPERATURE; else if (channel == CHANNEL_VRAM) return BMG_VRAM_TEMPERATURE; - else if (in_range(channel, CHANNEL_VRAM_N, CHANNEL_VRAM_N_MAX)) + else if (in_range(channel, CHANNEL_VRAM_N, MAX_VRAM_CHANNELS)) return BMG_VRAM_TEMPERATURE_N(channel - CHANNEL_VRAM_N); } else if (xe->info.platform == XE_DG2) { if (channel == CHANNEL_PKG) @@ -1427,7 +1427,7 @@ static int xe_hwmon_read_label(struct device *dev, *str = "mctrl"; else if (channel == CHANNEL_PCIE) *str = "pcie"; - else if (in_range(channel, CHANNEL_VRAM_N, CHANNEL_VRAM_N_MAX)) + else if (in_range(channel, CHANNEL_VRAM_N, MAX_VRAM_CHANNELS)) *str = hwmon->temp.vram_label[channel - CHANNEL_VRAM_N]; return 0; case hwmon_power: From 9ff885ef8b428febbf41f13a511755d74704949e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 17 Feb 2026 12:05:52 -0800 Subject: [PATCH 120/195] drm/xe: Convert GT stats to per-cpu counters Current GT statistics use atomic64_t counters. Atomic operations incur a global coherency penalty. Transition to dynamic per-cpu counters using alloc_percpu(). This allows stats to be incremented via this_cpu_add(), which compiles to a single non-locking instruction. This approach keeps the hot-path updates local to the CPU, avoiding expensive cross-core cache invalidation traffic. Use for_each_possible_cpu() during aggregation and clear operations to ensure data consistency across CPU hotplug events. Signed-off-by: Matthew Brost Reviewed-by: Stuart Summers Link: https://patch.msgid.link/20260217200552.596718-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 5 ++ drivers/gpu/drm/xe/xe_gt_stats.c | 63 +++++++++++++++++++++----- drivers/gpu/drm/xe/xe_gt_stats.h | 6 +++ drivers/gpu/drm/xe/xe_gt_stats_types.h | 19 ++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 5 +- 5 files changed, 82 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 68c4771de040..1203d087b68f 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -33,6 +33,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_vf.h" +#include "xe_gt_stats.h" #include "xe_gt_sysfs.h" #include "xe_gt_topology.h" #include "xe_guc_exec_queue_types.h" @@ -455,6 +456,10 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; + err = xe_gt_stats_init(gt); + if (err) + return err; + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT); if (!fw_ref.domains) return -ETIMEDOUT; diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 37506434d7a3..8ed0160a6041 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -3,12 +3,37 @@ * Copyright © 2024 Intel Corporation */ -#include - +#include #include +#include "xe_device.h" #include "xe_gt_stats.h" -#include "xe_gt_types.h" + +static void xe_gt_stats_fini(struct drm_device *drm, void *arg) +{ + struct xe_gt *gt = arg; + + free_percpu(gt->stats); +} + +/** + * xe_gt_stats_init() - Initialize GT statistics + * @gt: GT structure + * + * Allocate per-CPU GT statistics. Using per-CPU stats allows increments + * to occur without cross-CPU atomics. + * + * Return: 0 on success, -ENOMEM on failure. + */ +int xe_gt_stats_init(struct xe_gt *gt) +{ + gt->stats = alloc_percpu(struct xe_gt_stats); + if (!gt->stats) + return -ENOMEM; + + return drmm_add_action_or_reset(>_to_xe(gt)->drm, xe_gt_stats_fini, + gt); +} /** * xe_gt_stats_incr - Increments the specified stats counter @@ -23,7 +48,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) if (id >= __XE_GT_STATS_NUM_IDS) return; - atomic64_add(incr, >->stats.counters[id]); + this_cpu_add(gt->stats->counters[id], incr); } #define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name @@ -94,23 +119,37 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) { enum xe_gt_stats_id id; - for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) - drm_printf(p, "%s: %lld\n", stat_description[id], - atomic64_read(>->stats.counters[id])); + for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) { + u64 total = 0; + int cpu; + + for_each_possible_cpu(cpu) { + struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu); + + total += s->counters[id]; + } + + drm_printf(p, "%s: %lld\n", stat_description[id], total); + } return 0; } /** - * xe_gt_stats_clear - Clear the GT stats + * xe_gt_stats_clear() - Clear the GT stats * @gt: GT structure * - * This clear (zeros) all the available GT stats. + * Clear (zero) all available GT stats. Note that if the stats are being + * updated while this function is running, the results may be unpredictable. + * Intended to be called on an idle GPU. */ void xe_gt_stats_clear(struct xe_gt *gt) { - int id; + int cpu; - for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id) - atomic64_set(>->stats.counters[id], 0); + for_each_possible_cpu(cpu) { + struct xe_gt_stats *s = per_cpu_ptr(gt->stats, cpu); + + memset(s, 0, sizeof(*s)); + } } diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h index 59a7bf60e242..3d0defab9b30 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.h +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -14,10 +14,16 @@ struct xe_gt; struct drm_printer; #ifdef CONFIG_DEBUG_FS +int xe_gt_stats_init(struct xe_gt *gt); int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); void xe_gt_stats_clear(struct xe_gt *gt); void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); #else +static inline int xe_gt_stats_init(struct xe_gt *gt) +{ + return 0; +} + static inline void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index b8accdbc54eb..79568591bd67 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -6,6 +6,8 @@ #ifndef _XE_GT_STATS_TYPES_H_ #define _XE_GT_STATS_TYPES_H_ +#include + enum xe_gt_stats_id { XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, XE_GT_STATS_ID_TLB_INVAL, @@ -58,4 +60,21 @@ enum xe_gt_stats_id { __XE_GT_STATS_NUM_IDS, }; +/** + * struct xe_gt_stats - Per-CPU GT statistics counters + * @counters: Array of 64-bit counters indexed by &enum xe_gt_stats_id + * + * This structure is used for high-frequency, per-CPU statistics collection + * in the Xe driver. By using a per-CPU allocation and ensuring the structure + * is cache-line aligned, we avoid the performance-heavy atomics and cache + * coherency traffic. + * + * Updates to these counters should be performed using the this_cpu_add() + * macro to ensure they are atomic with respect to local interrupts and + * preemption-safe without the overhead of explicit locking. + */ +struct xe_gt_stats { + u64 counters[__XE_GT_STATS_NUM_IDS]; +} ____cacheline_aligned; + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index caf7e7e78be9..8b55cf25a75f 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -158,10 +158,7 @@ struct xe_gt { #if IS_ENABLED(CONFIG_DEBUG_FS) /** @stats: GT stats */ - struct { - /** @stats.counters: counters for various GT stats */ - atomic64_t counters[__XE_GT_STATS_NUM_IDS]; - } stats; + struct xe_gt_stats __percpu *stats; #endif /** From 6c2e331c915ba9e774aa847921262805feb00863 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Feb 2026 14:30:59 -0800 Subject: [PATCH 121/195] drm/xe/wa: Steer RMW of MCR registers while building default LRC When generating the default LRC, if a register is not masked, we apply any save-restore programming necessary via a read-modify-write sequence that will ensure we only update the relevant bits/fields without clobbering the rest of the register. However some of the registers that need to be updated might be MCR registers which require steering to a non-terminated instance to ensure we can read back a valid, non-zero value. The steering of reads originating from a command streamer is controlled by register CS_MMIO_GROUP_INSTANCE_SELECT. Emit additional MI_LRI commands to update the steering before any RMW of an MCR register to ensure the reads are performed properly. Note that needing to perform a RMW of an MCR register while building the default LRC is pretty rare. Most of the MCR registers that are part of an engine's LRCs are also masked registers, so no MCR is necessary. Fixes: f2f90989ccff ("drm/xe: Avoid reading RMW registers in emit_wa_job") Cc: Michal Wajdeczko Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260206223058.387014-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 6 +++ drivers/gpu/drm/xe/xe_gt.c | 66 +++++++++++++++++++----- 2 files changed, 60 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 68172b0248a6..dc5a4fafa70c 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -96,6 +96,12 @@ #define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13) #define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED) + +#define CS_MMIO_GROUP_INSTANCE_SELECT(base) XE_REG((base) + 0xcc) +#define SELECTIVE_READ_ADDRESSING REG_BIT(30) +#define SELECTIVE_READ_GROUP REG_GENMASK(29, 23) +#define SELECTIVE_READ_INSTANCE REG_GENMASK(22, 16) + /* * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. * The lsb of each can be considered a separate enabling bit for encryption. diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 1203d087b68f..b455af1e6072 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -208,11 +208,15 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return ret; } +/* Dwords required to emit a RMW of a register */ +#define EMIT_RMW_DW 20 + static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { - struct xe_reg_sr *sr = &q->hwe->reg_lrc; + struct xe_hw_engine *hwe = q->hwe; + struct xe_reg_sr *sr = &hwe->reg_lrc; struct xe_reg_sr_entry *entry; - int count_rmw = 0, count = 0, ret; + int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret; unsigned long idx; struct xe_bb *bb; size_t bb_len = 0; @@ -222,6 +226,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) ++count; + else if (entry->reg.mcr) + ++count_rmw_mcr; else ++count_rmw; } @@ -229,17 +235,35 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (count) bb_len += count * 2 + 1; - if (count_rmw) - bb_len += count_rmw * 20 + 7; + /* + * RMW of MCR registers is the same as a normal RMW, except an + * additional LRI (3 dwords) is required per register to steer the read + * to a nom-terminated instance. + * + * We could probably shorten the batch slightly by eliding the + * steering for consecutive MCR registers that have the same + * group/instance target, but it's not worth the extra complexity to do + * so. + */ + bb_len += count_rmw * EMIT_RMW_DW; + bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3); - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + /* + * After doing all RMW, we need 7 trailing dwords to clean up, + * plus an additional 3 dwords to reset steering if any of the + * registers were MCR. + */ + if (count_rmw || count_rmw_mcr) + bb_len += 7 + (count_rmw_mcr ? 3 : 0); + + if (hwe->class == XE_ENGINE_CLASS_RENDER) /* * Big enough to emit all of the context's 3DSTATE via * xe_lrc_emit_hwe_state_instructions() */ - bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32); - xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len); bb = xe_bb_new(gt, bb_len, false); if (IS_ERR(bb)) @@ -274,13 +298,23 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) } } - if (count_rmw) { - /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ - + if (count_rmw || count_rmw_mcr) { xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) continue; + if (entry->reg.mcr) { + struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw }; + u8 group, instance; + + xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr; + *cs++ = SELECTIVE_READ_ADDRESSING | + REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) | + REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance); + } + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; *cs++ = entry->reg.addr; *cs++ = CS_GPR_REG(0, 0).addr; @@ -306,8 +340,9 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) *cs++ = CS_GPR_REG(0, 0).addr; *cs++ = entry->reg.addr; - xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", - entry->reg.addr, entry->clr_bits, entry->set_bits); + xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n", + entry->reg.addr, entry->clr_bits, entry->set_bits, + entry->reg.mcr ? " (MCR)" : ""); } /* reset used GPR */ @@ -319,6 +354,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) *cs++ = 0; *cs++ = CS_GPR_REG(0, 2).addr; *cs++ = 0; + + /* reset steering */ + if (count_rmw_mcr) { + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr; + *cs++ = 0; + } } cs = xe_lrc_emit_hwe_state_instructions(q, cs); From a41ee215b59d78f8cd0a4b05e373335944e0ecb0 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 18 Feb 2026 14:09:12 -0800 Subject: [PATCH 122/195] drm/xe/reg_sr: Don't process gt/hwe lists in VF There are a few different reg_sr lists managed by the driver for workarounds/tuning: - gt->reg_sr - hwe->reg_sr - hwe->reg_lrc The first two are not relevant to SRIOV VFs; a VF KMD does not have access to the registers that appear on this list and it is the PF KMD's responsibility to apply such programming on behalf of the entire system. However the third list contains per-client values that the VF KMD needs to ensure are incorporated whenever a new LRC is created. Handling of reg_sr lists comes in two steps: processing an RTP table to build a reg_sr from the relevant entries, and then applying the contents of the reg_sr. Skipping the RTP processing (resulting in an empty reg_sr) or skipping the application of a reg_sr are both valid ways to avoid having a VF accidentally try to write registers it doesn't have access to. In commit c19e705ec981 ("drm/xe/vf: Stop applying save-restore MMIOs if VF") and commit 92a5bd302458 ("drm/xe/vf: Unblock xe_rtp_process_to_sr for VFs") we adjusted the drivers behavior to always process the RTP table into a reg_sr and just skipped the application step. This works fine functionally, but can lead to confusion during debugging since facilities like the debugfs 'register-save-restore' will still report a bunch of registers that the VF KMD isn't actually trying to handle. It will also mislead other upcoming debug changes. Let's go back to skipping the RTP => reg_sr processing step, but only for GT / hwe tables this time. This will allow LRC reg_sr handling to continue to work, but will ensure that gt->reg_sr and hwe->reg_sr remain empty and that debugfs reporting more accurately reflects the KMD's behavior. v2: - Also skip the hwe processing in hw_engine_setup_default_state() and xe_reg_whitelist_process_engine(). v3: - Handle skipping via an additional parameter passed to xe_rtp_process_to_sr() rather than adding conditions at each callsite. (Ashutosh) Cc: Michal Wajdeczko Cc: Ashutosh Dixit Cc: Harish Chegondi Reviewed-by: Ashutosh Dixit Link: https://patch.msgid.link/20260218-sr_verify-v4-1-35d6deeb3421@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/tests/xe_rtp_test.c | 3 ++- drivers/gpu/drm/xe/xe_hw_engine.c | 6 ++++-- drivers/gpu/drm/xe/xe_reg_sr.c | 8 ++++++-- drivers/gpu/drm/xe/xe_reg_whitelist.c | 2 +- drivers/gpu/drm/xe/xe_rtp.c | 8 +++++++- drivers/gpu/drm/xe/xe_rtp.h | 3 ++- drivers/gpu/drm/xe/xe_tuning.c | 9 ++++++--- drivers/gpu/drm/xe/xe_wa.c | 9 ++++++--- 8 files changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index d2255a59e58f..e5a0f985a700 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -322,7 +322,8 @@ static void xe_rtp_process_to_sr_tests(struct kunit *test) count_rtp_entries++; xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries); - xe_rtp_process_to_sr(&ctx, param->entries, count_rtp_entries, reg_sr); + xe_rtp_process_to_sr(&ctx, param->entries, count_rtp_entries, + reg_sr, false); xa_for_each(®_sr->xa, idx, sre) { if (idx == param->expected_reg.addr) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 4d3ee5226e3a..05810428236e 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -408,7 +408,8 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) }, }; - xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), + &hwe->reg_lrc, true); } static void @@ -472,7 +473,8 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) }, }; - xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), + &hwe->reg_sr, false); } static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance) diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index d3e13ea33123..1ac911fc6e94 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -13,6 +13,7 @@ #include #include +#include "xe_assert.h" #include "xe_device.h" #include "xe_device_types.h" #include "xe_force_wake.h" @@ -169,8 +170,11 @@ void xe_reg_sr_apply_mmio(struct xe_reg_sr *sr, struct xe_gt *gt) if (xa_empty(&sr->xa)) return; - if (IS_SRIOV_VF(gt_to_xe(gt))) - return; + /* + * We don't process non-LRC reg_sr lists in VF, so they should have + * been empty in the check above. + */ + xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt))); xe_gt_dbg(gt, "Applying %s save-restore MMIOs\n", sr->name); diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 728aba8dbd95..80577e4b7437 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -189,7 +189,7 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); xe_rtp_process_to_sr(&ctx, register_whitelist, ARRAY_SIZE(register_whitelist), - &hwe->reg_whitelist); + &hwe->reg_whitelist, false); whitelist_apply_to_hwe(hwe); } diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index b7c26e2fb411..7bfdc6795ce6 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -270,6 +270,8 @@ static void rtp_mark_active(struct xe_device *xe, * @sr: Save-restore struct where matching rules execute the action. This can be * viewed as the "coalesced view" of multiple the tables. The bits for each * register set are expected not to collide with previously added entries + * @process_in_vf: Whether this RTP table should get processed for SR-IOV VF + * devices. Should generally only be 'true' for LRC tables. * * Walk the table pointed by @entries (with an empty sentinel) and add all * entries with matching rules to @sr. If @hwe is not NULL, its mmio_base is @@ -278,7 +280,8 @@ static void rtp_mark_active(struct xe_device *xe, void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry_sr *entries, size_t n_entries, - struct xe_reg_sr *sr) + struct xe_reg_sr *sr, + bool process_in_vf) { const struct xe_rtp_entry_sr *entry; struct xe_hw_engine *hwe = NULL; @@ -287,6 +290,9 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, rtp_get_context(ctx, &hwe, >, &xe); + if (!process_in_vf && IS_SRIOV_VF(xe)) + return; + xe_assert(xe, entries); for (entry = entries; entry - entries < n_entries; entry++) { diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index ba5f940c0a96..be4195264286 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -431,7 +431,8 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry_sr *entries, - size_t n_entries, struct xe_reg_sr *sr); + size_t n_entries, struct xe_reg_sr *sr, + bool process_in_vf); void xe_rtp_process(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry *entries); diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 316f5e2b2e48..ea90e8c99754 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -15,6 +15,7 @@ #include "xe_gt_types.h" #include "xe_platform_types.h" #include "xe_rtp.h" +#include "xe_sriov.h" #undef XE_REG_MCR #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) @@ -200,7 +201,8 @@ void xe_tuning_process_gt(struct xe_gt *gt) xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)); - xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), >->reg_sr); + xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), + >->reg_sr, false); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); @@ -212,7 +214,7 @@ void xe_tuning_process_engine(struct xe_hw_engine *hwe) hwe->gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)); xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings), - &hwe->reg_sr); + &hwe->reg_sr, false); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); @@ -231,7 +233,8 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)); - xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), + &hwe->reg_lrc, true); } /** diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 61c4187dc0ae..e6b7f65f2fc1 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -1005,7 +1005,8 @@ void xe_wa_process_gt(struct xe_gt *gt) xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt, ARRAY_SIZE(gt_was)); - xe_rtp_process_to_sr(&ctx, gt_was, ARRAY_SIZE(gt_was), >->reg_sr); + xe_rtp_process_to_sr(&ctx, gt_was, ARRAY_SIZE(gt_was), + >->reg_sr, false); } EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt); @@ -1023,7 +1024,8 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine, ARRAY_SIZE(engine_was)); - xe_rtp_process_to_sr(&ctx, engine_was, ARRAY_SIZE(engine_was), &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_was, ARRAY_SIZE(engine_was), + &hwe->reg_sr, false); } /** @@ -1040,7 +1042,8 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc, ARRAY_SIZE(lrc_was)); - xe_rtp_process_to_sr(&ctx, lrc_was, ARRAY_SIZE(lrc_was), &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_was, ARRAY_SIZE(lrc_was), + &hwe->reg_lrc, true); } /** From d389489225b85aac3ad90ed8b5661679f27a2da4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 18 Feb 2026 14:09:13 -0800 Subject: [PATCH 123/195] drm/xe/reg_sr: Add debugfs to verify status of reg_sr programming When applying save-restore register programming for workarounds, tuning settings, and general device configuration we assume the programming was successful. However there are a number of cases where the desired reg_sr programming can become lost: - workarounds implemented on the wrong RTP table might not get saved/restored at the right time leading to, for example, failure to re-apply the programming after engine resets - some hardware registers become "locked" and can no longer be updated after firmware or the driver finishes initializing them - sometimes the hardware teams just made a mistake when documenting the register and/or bits that needed to be programmed Add a debugfs entry that will read back the registers referenced on a GT's save-restore lists and print any cases where the desired programming is no longer in effect. Such cases might indicate the presence of a driver/firmware bug, might indicate that the documentation we were following has a mistake, or might be benign (occasionally registers have broken read-back capability preventing verification, but previous writes were still successful and effective). For now we only verify the GT and engine reg_sr lists. Verifying the LRC list will require checking the expected programming against the default_lrc contents, not the live registers (which may not reflect the reg_sr programming if no context is actively running). Reviewed-by: Ashutosh Dixit Link: https://patch.msgid.link/20260218-sr_verify-v4-2-35d6deeb3421@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 26 +++++++++++++++++++++++ drivers/gpu/drm/xe/xe_reg_sr.c | 34 ++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_reg_sr.h | 3 +++ 3 files changed, 63 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 4363bc9c3606..aa43427a9f4b 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -155,6 +155,30 @@ static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) return 0; } +/* + * Check the registers referenced on a save-restore list and report any + * save-restore entries that did not get applied. + */ +static int register_save_restore_check(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) { + drm_printf(p, "ERROR: Could not acquire forcewake\n"); + return -ETIMEDOUT; + } + + xe_reg_sr_readback_check(>->reg_sr, gt, p); + for_each_hw_engine(hwe, gt, id) + xe_reg_sr_readback_check(&hwe->reg_sr, gt, p); + + /* TODO: Check hwe->reg_lrc against contents of default_lrc. */ + + return 0; +} + static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_RENDER); @@ -209,6 +233,8 @@ static const struct drm_info_list vf_safe_debugfs_list[] = { { "default_lrc_vecs", .show = xe_gt_debugfs_show_with_rpm, .data = vecs_default_lrc }, { "hwconfig", .show = xe_gt_debugfs_show_with_rpm, .data = hwconfig }, { "pat_sw_config", .show = xe_gt_debugfs_simple_show, .data = xe_pat_dump_sw_config }, + { "register-save-restore-check", + .show = xe_gt_debugfs_show_with_rpm, .data = register_save_restore_check }, }; /* everything else should be added here */ diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 1ac911fc6e94..75aa4426b3ec 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -208,3 +208,37 @@ void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p) str_yes_no(entry->reg.masked), str_yes_no(entry->reg.mcr)); } + +static u32 readback_reg(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_reg_mcr mcr_reg = to_xe_reg_mcr(reg); + + if (reg.mcr) + return xe_gt_mcr_unicast_read_any(gt, mcr_reg); + else + return xe_mmio_read32(>->mmio, reg); +} + +/** + * xe_reg_sr_readback_check() - Readback registers referenced in save/restore + * entries and check whether the programming is in place. + * @sr: Save/restore entries + * @gt: GT to read register from + * @p: DRM printer to report discrepancies on + */ +void xe_reg_sr_readback_check(struct xe_reg_sr *sr, + struct xe_gt *gt, + struct drm_printer *p) +{ + struct xe_reg_sr_entry *entry; + unsigned long offset; + + xa_for_each(&sr->xa, offset, entry) { + u32 val = readback_reg(gt, entry->reg); + u32 mask = entry->clr_bits | entry->set_bits; + + if ((val & mask) != entry->set_bits) + drm_printf(p, "%#8lx & %#10x :: expected %#10x got %#10x\n", + offset, mask, entry->set_bits, val & mask); + } +} diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h index 51fbba423e27..cd133a09aa9b 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.h +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -19,6 +19,9 @@ struct drm_printer; int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe); void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p); +void xe_reg_sr_readback_check(struct xe_reg_sr *sr, + struct xe_gt *gt, + struct drm_printer *p); int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e, struct xe_gt *gt); From e950b06014793c035f5328915f2d6d93ec0b5874 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 18 Feb 2026 14:09:14 -0800 Subject: [PATCH 124/195] drm/xe: Add facility to lookup the value of a register in a default LRC An LRC is stored in memory as a special batchbuffer that hardware will execute to re-load state when switching to the context; it's a collection of register values (encoded as MI_LOAD_REGISTER_IMM commands) and other state instructions (e.g., 3DSTATE_*). The value that will be loaded for a given register can be determined by parsing the batchbuffer to find MI_LRI commands and extracting the value from the offset/value pairs it contains. Add functions to do this, which will be used in a future patch to help verify that our expected reg_sr programming is in place. The implementation here returns the value as soon as it finds a match in the LRC. Technically a register could appear multiple times (either due to memory corruption or a hardware defect) and the last value encountered would be the one in effect when the context resumes execution. We can adjust the logic to keep looking and return the last match instead of first in the future if we encounter real-world cases where this would assist with debugging. Reviewed-by: Ashutosh Dixit Link: https://patch.msgid.link/20260218-sr_verify-v4-3-35d6deeb3421@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_lrc.c | 96 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_lrc.h | 4 ++ 2 files changed, 100 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 38f648b98868..57ef4f527ed0 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -2155,6 +2155,102 @@ void xe_lrc_dump_default(struct drm_printer *p, } } +/* + * Lookup the value of a register within the offset/value pairs of an + * MI_LOAD_REGISTER_IMM instruction. + * + * Return -ENOENT if the register is not present in the MI_LRI instruction. + */ +static int lookup_reg_in_mi_lri(u32 offset, u32 *value, + const u32 *dword_pair, int num_regs) +{ + for (int i = 0; i < num_regs; i++) { + if (dword_pair[2 * i] == offset) { + *value = dword_pair[2 * i + 1]; + return 0; + } + } + + return -ENOENT; +} + +/* + * Lookup the value of a register in a specific engine type's default LRC. + * + * Return -EINVAL if the default LRC doesn't exist, or ENOENT if the register + * cannot be found in the default LRC. + */ +int xe_lrc_lookup_default_reg_value(struct xe_gt *gt, + enum xe_engine_class hwe_class, + u32 offset, + u32 *value) +{ + u32 *dw; + int remaining_dw, ret; + + if (!gt->default_lrc[hwe_class]) + return -EINVAL; + + /* + * Skip the beginning of the LRC since it contains the per-process + * hardware status page. + */ + dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; + remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; + + while (remaining_dw > 0) { + u32 num_dw = instr_dw(*dw); + + if (num_dw > remaining_dw) + num_dw = remaining_dw; + + switch (*dw & XE_INSTR_CMD_TYPE) { + case XE_INSTR_MI: + switch (*dw & MI_OPCODE) { + case MI_BATCH_BUFFER_END: + /* End of LRC; register not found */ + return -ENOENT; + + case MI_NOOP: + case MI_TOPOLOGY_FILTER: + /* + * MI_NOOP and MI_TOPOLOGY_FILTER don't have + * a length field and are always 1-dword + * instructions. + */ + remaining_dw--; + dw++; + break; + + case MI_LOAD_REGISTER_IMM: + ret = lookup_reg_in_mi_lri(offset, value, + dw + 1, (num_dw - 1) / 2); + if (ret == 0) + return 0; + + fallthrough; + + default: + /* + * Jump to next instruction based on length + * field. + */ + remaining_dw -= num_dw; + dw += num_dw; + break; + } + break; + + default: + /* Jump to next instruction based on length field. */ + remaining_dw -= num_dw; + dw += num_dw; + } + } + + return -ENOENT; +} + struct instr_state { u32 instr; u16 num_dw; diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index c307a3fd9ea2..3e500004f1ae 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -133,6 +133,10 @@ size_t xe_lrc_skip_size(struct xe_device *xe); void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class); +int xe_lrc_lookup_default_reg_value(struct xe_gt *gt, + enum xe_engine_class hwe_class, + u32 offset, + u32 *value); u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs); From 764af38af22a6231af2b3685f74d214a9175b822 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 18 Feb 2026 14:09:15 -0800 Subject: [PATCH 125/195] drm/xe/reg_sr: Allow register_save_restore_check debugfs to verify LRC values reg_sr programming that applies to an engines LRC cannot be verified by a simple CPU-based register readout because the reg_sr's values may not be in effect if no context is executing on the hardware at the time we check. Instead, we should verify correct reg_sr application by searching for the register in the default_lrc. Reviewed-by: Ashutosh Dixit Link: https://patch.msgid.link/20260218-sr_verify-v4-4-35d6deeb3421@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 4 ++-- drivers/gpu/drm/xe/xe_reg_sr.c | 30 ++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_reg_sr.h | 4 ++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index aa43427a9f4b..f45306308cd6 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -173,8 +173,8 @@ static int register_save_restore_check(struct xe_gt *gt, struct drm_printer *p) xe_reg_sr_readback_check(>->reg_sr, gt, p); for_each_hw_engine(hwe, gt, id) xe_reg_sr_readback_check(&hwe->reg_sr, gt, p); - - /* TODO: Check hwe->reg_lrc against contents of default_lrc. */ + for_each_hw_engine(hwe, gt, id) + xe_reg_sr_lrc_check(&hwe->reg_lrc, gt, hwe, p); return 0; } diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 75aa4426b3ec..83a668f2a0d5 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -21,6 +21,7 @@ #include "xe_gt_printk.h" #include "xe_gt_types.h" #include "xe_hw_engine_types.h" +#include "xe_lrc.h" #include "xe_mmio.h" #include "xe_rtp_types.h" @@ -242,3 +243,32 @@ void xe_reg_sr_readback_check(struct xe_reg_sr *sr, offset, mask, entry->set_bits, val & mask); } } + +/** + * xe_reg_sr_lrc_check() - Check LRC for registers referenced in save/restore + * entries and check whether the programming is in place. + * @sr: Save/restore entries + * @gt: GT to read register from + * @hwe: Hardware engine type to check LRC for + * @p: DRM printer to report discrepancies on + */ +void xe_reg_sr_lrc_check(struct xe_reg_sr *sr, + struct xe_gt *gt, + struct xe_hw_engine *hwe, + struct drm_printer *p) +{ + struct xe_reg_sr_entry *entry; + unsigned long offset; + + xa_for_each(&sr->xa, offset, entry) { + u32 val; + int ret = xe_lrc_lookup_default_reg_value(gt, hwe->class, offset, &val); + u32 mask = entry->clr_bits | entry->set_bits; + + if (ret == -ENOENT) + drm_printf(p, "%#8lx :: not found in LRC for %s\n", offset, hwe->name); + else if ((val & mask) != entry->set_bits) + drm_printf(p, "%#8lx & %#10x :: expected %#10x got %#10x\n", + offset, mask, entry->set_bits, val & mask); + } +} diff --git a/drivers/gpu/drm/xe/xe_reg_sr.h b/drivers/gpu/drm/xe/xe_reg_sr.h index cd133a09aa9b..1ec6e8ecf278 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.h +++ b/drivers/gpu/drm/xe/xe_reg_sr.h @@ -22,6 +22,10 @@ void xe_reg_sr_dump(struct xe_reg_sr *sr, struct drm_printer *p); void xe_reg_sr_readback_check(struct xe_reg_sr *sr, struct xe_gt *gt, struct drm_printer *p); +void xe_reg_sr_lrc_check(struct xe_reg_sr *sr, + struct xe_gt *gt, + struct xe_hw_engine *hwe, + struct drm_printer *p); int xe_reg_sr_add(struct xe_reg_sr *sr, const struct xe_reg_sr_entry *e, struct xe_gt *gt); From 9812865cc6d029313a607e54ff5ba76f56278cdc Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Thu, 19 Feb 2026 13:59:32 +0530 Subject: [PATCH 126/195] drm/xe/xe3p_lpg: Add Wa_14026781792 Wa_14026781792 applies Xe3p_LPG graphics version 35.10. Signed-off-by: Nitin Gote Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260219082931.2199618-2-nitin.r.gote@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index a375ffd666ba..90b9017770ea 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -131,6 +131,7 @@ #define VS_HIT_MAX_VALUE_MASK REG_GENMASK(25, 20) #define DIS_MESH_PARTIAL_AUTOSTRIP REG_BIT(16) #define DIS_MESH_AUTOSTRIP REG_BIT(15) +#define DIS_TE_PATCH_CTRL REG_BIT(4) #define VFLSKPD XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED) #define DIS_PARTIAL_AUTOSTRIP REG_BIT(9) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index e6b7f65f2fc1..78f205869086 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -939,6 +939,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, + { XE_RTP_NAME("14026781792"), + XE_RTP_RULES(GRAPHICS_VERSION(3510), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(FF_MODE, DIS_TE_PATCH_CTRL)) + }, }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { From a800b95c2498b1f67a8a37ca98b827042d0e926e Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Tue, 10 Feb 2026 13:58:25 -0800 Subject: [PATCH 127/195] drm/xe/xe2hpg: Remove SRIOV VF check for Wa_18041344222 Engine WAs are not applied for SRIOV VF, even though they are processed. Remove the SRIOV VF check. Cc: Matt Roper Signed-off-by: Harish Chegondi Reviewed-by: Matt Roper Link: https://patch.msgid.link/4043a30d6a971cda3c13145e081e4eed7cc4e440.1770760591.git.harish.chegondi@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 78f205869086..0cf752446a53 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -641,7 +641,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { { XE_RTP_NAME("18041344222"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), FUNC(xe_rtp_match_first_render_or_compute), - FUNC(xe_rtp_match_not_sriov_vf), FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) }, From 0ffe9dcf260b3cd3885fa2e43f592bd55adfddd7 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Tue, 10 Feb 2026 13:58:26 -0800 Subject: [PATCH 128/195] drm/xe/xe3: Remove SRIOV VF check for Wa_18041344222 Engine WAs are not applied for SRIOV VF, even though they are processed. Remove the SRIOV VF check. Cc: Matt Roper Signed-off-by: Harish Chegondi Reviewed-by: Matt Roper Link: https://patch.msgid.link/5879396bf202b64d9b5c4cb8c720f3e65d358fc1.1770760591.git.harish.chegondi@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 0cf752446a53..7cb4ef26eca7 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -709,7 +709,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { { XE_RTP_NAME("18041344222"), XE_RTP_RULES(GRAPHICS_VERSION(3000), FUNC(xe_rtp_match_first_render_or_compute), - FUNC(xe_rtp_match_not_sriov_vf), FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) }, From 7c9b2de8a95c4b65b6e83c1312d225c6444dfbf7 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Tue, 10 Feb 2026 13:58:27 -0800 Subject: [PATCH 129/195] drm/xe/xe2lpg: Extend Wa_18041344222 to graphics IP 20.04 Apply WA 18041344222 to Xe2 LPG graphics IP version 20.04 too. Bspec: 56024 Cc: Matt Roper Cc: Dnyaneshwar Bhadane Signed-off-by: Harish Chegondi Reviewed-by: Matt Roper Link: https://patch.msgid.link/6e66746246439249a278f3d157f06071d83504b6.1770760591.git.harish.chegondi@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 7cb4ef26eca7..76fa6d510aad 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -579,6 +579,12 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) }, + { XE_RTP_NAME("18041344222"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), + FUNC(xe_rtp_match_first_render_or_compute), + FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), + XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) + }, /* Xe2_HPG */ From c2366539d3746219000f58d821fdf8607bd8cfec Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Fri, 13 Feb 2026 15:00:08 +0100 Subject: [PATCH 130/195] drm/xe/guc: Increase GuC log sizes in debug builds Increase event log size for GuC debug to 16MB, and for general debug to 8MB. This allows for useful debug even if performance-affecting DRM_XE_DEBUG_GUC is not enabled. Without this change, GuC logs gathered by CI are useless for debug due to limited size, which translates to time frame not even able to cover cleanup after test. Signed-off-by: Tomasz Lis Cc: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Cc: Matthew Brost Cc: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260213140008.1473400-1-tomasz.lis@intel.com --- drivers/gpu/drm/xe/xe_guc_log.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h index 1b05bb60c1c7..4649a260755e 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.h +++ b/drivers/gpu/drm/xe/xe_guc_log.h @@ -13,9 +13,13 @@ struct drm_printer; struct xe_device; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) -#define XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE SZ_8M +#define XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE SZ_16M #define XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE SZ_1M #define XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE SZ_2M +#elif IS_ENABLED(CONFIG_DRM_XE_DEBUG) +#define XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE SZ_8M +#define XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE SZ_1M +#define XE_GUC_LOG_STATE_CAPTURE_BUFFER_SIZE SZ_1M #else #define XE_GUC_LOG_EVENT_DATA_BUFFER_SIZE SZ_64K #define XE_GUC_LOG_CRASH_DUMP_BUFFER_SIZE SZ_16K From 2d892455f38b82fb4e93f45e7a8e8ea41683ca78 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 18 Feb 2026 21:55:43 +0100 Subject: [PATCH 131/195] drm/xe/pf: Expose LMTT page size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The underlying LMTT implementation already provides the info about the page size it is using. There is no need to have a separate helper function that is making assumption about the required size. Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Piotr Piórkowski Link: https://patch.msgid.link/20260218205553.3561-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 3 +-- drivers/gpu/drm/xe/xe_lmtt.c | 17 +++++++++++++++++ drivers/gpu/drm/xe/xe_lmtt.h | 1 + 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 888193e1d2c5..e06baf12e108 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1451,8 +1451,7 @@ int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, static u64 pf_get_lmem_alignment(struct xe_gt *gt) { - /* this might be platform dependent */ - return SZ_2M; + return xe_lmtt_page_size(>->tile->sriov.pf.lmtt); } static u64 pf_get_min_spare_lmem(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 2077e1ef8b43..b583e0f20183 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -57,6 +57,23 @@ static u64 lmtt_page_size(struct xe_lmtt *lmtt) return BIT_ULL(lmtt->ops->lmtt_pte_shift(0)); } +/** + * xe_lmtt_page_size() - Get LMTT page size. + * @lmtt: the &xe_lmtt + * + * This function shall be called only by PF. + * + * Return: LMTT page size. + */ +u64 xe_lmtt_page_size(struct xe_lmtt *lmtt) +{ + lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt))); + lmtt_assert(lmtt, xe_device_has_lmtt(lmtt_to_xe(lmtt))); + lmtt_assert(lmtt, lmtt->ops); + + return lmtt_page_size(lmtt); +} + static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level) { unsigned int num_entries = level ? lmtt->ops->lmtt_pte_num(level) : 0; diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h index 75a234fbf367..8fa387b38c52 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.h +++ b/drivers/gpu/drm/xe/xe_lmtt.h @@ -20,6 +20,7 @@ int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range); int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset); void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid); u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size); +u64 xe_lmtt_page_size(struct xe_lmtt *lmtt); #else static inline int xe_lmtt_init(struct xe_lmtt *lmtt) { return 0; } static inline void xe_lmtt_init_hw(struct xe_lmtt *lmtt) { } From 146f25b40ce4b97b193ec19ae747629e44dfdce9 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 18 Feb 2026 21:55:44 +0100 Subject: [PATCH 132/195] drm/xe/pf: Add locked variants of VRAM configuration functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have few functions to configure LMEM (aka VRAM) but they all are taking master mutex. Split them and expose locked variants to allow use by the caller who already hold this mutex. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patch.msgid.link/20260218205553.3561-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 95 ++++++++++++++++++---- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h | 4 + 2 files changed, 83 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index e06baf12e108..f67c8822e592 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1735,7 +1735,44 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size } /** - * xe_gt_sriov_pf_config_bulk_set_lmem - Provision many VFs with LMEM. + * xe_gt_sriov_pf_config_bulk_set_lmem_locked() - Provision many VFs with LMEM. + * @gt: the &xe_gt (can't be media) + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision + * @size: requested LMEM size + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_lmem_locked(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs, u64 size) +{ + unsigned int n; + int err = 0; + + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + xe_gt_assert(gt, xe_device_has_lmtt(gt_to_xe(gt))); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); + xe_gt_assert(gt, vfid); + + if (!num_vfs) + return 0; + + for (n = vfid; n < vfid + num_vfs; n++) { + err = pf_provision_vf_lmem(gt, n, size); + if (err) + break; + } + + return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size, + pf_get_vf_config_lmem, + "LMEM", n, err); +} + +/** + * xe_gt_sriov_pf_config_bulk_set_lmem() - Provision many VFs with LMEM. * @gt: the &xe_gt (can't be media) * @vfid: starting VF identifier (can't be 0) * @num_vfs: number of VFs to provision @@ -1748,26 +1785,52 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, u64 size) { - unsigned int n; - int err = 0; + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + return xe_gt_sriov_pf_config_bulk_set_lmem_locked(gt, vfid, num_vfs, size); +} + +/** + * xe_gt_sriov_pf_config_get_lmem_locked() - Get VF's LMEM quota. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be 0 == PFID) + * + * This function can only be called on PF. + * + * Return: VF's LMEM quota. + */ +u64 xe_gt_sriov_pf_config_get_lmem_locked(struct xe_gt *gt, unsigned int vfid) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); xe_gt_assert(gt, vfid); + + return pf_get_vf_config_lmem(gt, vfid); +} + +/** + * xe_gt_sriov_pf_config_set_lmem_locked() - Provision VF with LMEM. + * @gt: the &xe_gt (can't be media) + * @vfid: the VF identifier (can't be 0 == PFID) + * @size: requested LMEM size + * + * This function can only be called on PF. + */ +int xe_gt_sriov_pf_config_set_lmem_locked(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + int err; + + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + xe_gt_assert(gt, xe_device_has_lmtt(gt_to_xe(gt))); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); xe_gt_assert(gt, xe_gt_is_main_type(gt)); + xe_gt_assert(gt, vfid); - if (!num_vfs) - return 0; + err = pf_provision_vf_lmem(gt, vfid, size); - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - for (n = vfid; n < vfid + num_vfs; n++) { - err = pf_provision_vf_lmem(gt, n, size); - if (err) - break; - } - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); - - return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size, - xe_gt_sriov_pf_config_get_lmem, - "LMEM", n, err); + return pf_config_set_u64_done(gt, vfid, size, + pf_get_vf_config_lmem(gt, vfid), + "LMEM", err); } static struct xe_bo *pf_get_vf_config_lmem_obj(struct xe_gt *gt, unsigned int vfid) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index 3c6c8b6655af..4a004ecd6140 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -36,6 +36,10 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, u64 size); +u64 xe_gt_sriov_pf_config_get_lmem_locked(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_lmem_locked(struct xe_gt *gt, unsigned int vfid, u64 size); +int xe_gt_sriov_pf_config_bulk_set_lmem_locked(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs, u64 size); struct xe_bo *xe_gt_sriov_pf_config_get_lmem_obj(struct xe_gt *gt, unsigned int vfid); u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid); From 5ae3c886a1f5d54fbd5e477bcbfb4f3154a7247e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 18 Feb 2026 21:55:45 +0100 Subject: [PATCH 133/195] drm/xe/pf: Add functions for VRAM provisioning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have functions to configure VF LMEM (aka VRAM) on the tile/GT level, used by the auto-provisioning and debugfs, but we also need functions that will work on the device level that will configure VRAM on all tiles at once. We will use these new functions in upcoming patch. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patch.msgid.link/20260218205553.3561-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov_pf_provision.c | 106 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf_provision.h | 4 + 2 files changed, 110 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c index 01470c42e8a7..f22ff65c59aa 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c @@ -7,6 +7,7 @@ #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" #include "xe_gt_sriov_pf_policy.h" +#include "xe_lmtt.h" #include "xe_sriov.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_pf_provision.h" @@ -436,3 +437,108 @@ int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int v return !count ? -ENODATA : 0; } + +static u64 vram_per_tile(struct xe_tile *tile, u64 total) +{ + struct xe_device *xe = tile->xe; + unsigned int tcount = xe->info.tile_count; + u64 alignment = xe_lmtt_page_size(&tile->sriov.pf.lmtt); + + total = round_up(total, tcount * alignment); + return div_u64(total, tcount); +} + +/** + * xe_sriov_pf_provision_bulk_apply_vram() - Change VRAM provisioning for all VFs. + * @xe: the PF &xe_device + * @size: the VRAM size in [bytes] to set + * + * Change all VFs VRAM (LMEM) provisioning on all tiles. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_bulk_apply_vram(struct xe_device *xe, u64 size) +{ + unsigned int num_vfs = xe_sriov_pf_get_totalvfs(xe); + struct xe_tile *tile; + unsigned int id; + int result = 0; + int err; + + xe_assert(xe, xe_device_has_lmtt(xe)); + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_tile(tile, xe, id) { + err = xe_gt_sriov_pf_config_bulk_set_lmem_locked(tile->primary_gt, + VFID(1), num_vfs, + vram_per_tile(tile, size)); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_apply_vf_vram() - Change single VF VRAM allocation. + * @xe: the PF &xe_device + * @vfid: the VF identifier (can't be 0 == PFID) + * @size: VRAM size to set + * + * Change VF's VRAM provisioning on all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_apply_vf_vram(struct xe_device *xe, unsigned int vfid, u64 size) +{ + struct xe_tile *tile; + unsigned int id; + int result = 0; + int err; + + xe_assert(xe, vfid); + xe_assert(xe, xe_device_has_lmtt(xe)); + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_tile(tile, xe, id) { + err = xe_gt_sriov_pf_config_set_lmem_locked(tile->primary_gt, vfid, + vram_per_tile(tile, size)); + result = result ?: err; + } + + return result; +} + +/** + * xe_sriov_pf_provision_query_vf_vram() - Query VF's VRAM allocation. + * @xe: the PF &xe_device + * @vfid: the VF identifier (can't be 0 == PFID) + * @size: placeholder for the returned VRAM size + * + * Query VF's VRAM provisioning from all tiles/GTs. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_provision_query_vf_vram(struct xe_device *xe, unsigned int vfid, u64 *size) +{ + struct xe_tile *tile; + unsigned int id; + u64 total = 0; + + xe_assert(xe, vfid); + + guard(mutex)(xe_sriov_pf_master_mutex(xe)); + + for_each_tile(tile, xe, id) + total += xe_gt_sriov_pf_config_get_lmem_locked(tile->primary_gt, vfid); + + *size = total; + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h index bccf23d51396..f26f49539697 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h @@ -24,6 +24,10 @@ int xe_sriov_pf_provision_bulk_apply_priority(struct xe_device *xe, u32 prio); int xe_sriov_pf_provision_apply_vf_priority(struct xe_device *xe, unsigned int vfid, u32 prio); int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int vfid, u32 *prio); +int xe_sriov_pf_provision_bulk_apply_vram(struct xe_device *xe, u64 size); +int xe_sriov_pf_provision_apply_vf_vram(struct xe_device *xe, unsigned int vfid, u64 size); +int xe_sriov_pf_provision_query_vf_vram(struct xe_device *xe, unsigned int vfid, u64 *size); + int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs); int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs); From b1d2746aa5af17d1c901c36564e52da5c6bedae5 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 18 Feb 2026 21:55:46 +0100 Subject: [PATCH 134/195] drm/xe/pf: Allow to change VFs VRAM quota using sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On current discrete platforms, PF will provision all VFs with a fair amount of the VRAM (LMEM) during VFs enabling. However, in some cases this automatic VRAM provisioning might be either non-reproducible or sub-optimal. This could break VF's migration or impact performance. Expose per-VF VRAM quota read-write sysfs attributes to allow admin change default VRAM provisioning performed by the PF. /sys/bus/pci/drivers/xe/BDF/ ├── sriov_admin/ ├── .bulk_profile │ └── vram_quota [RW] unsigned integer ├── vf1/ │ └── profile │ └── vram_quota [RW] unsigned integer ├── vf2/ │ └── profile │ └── vram_quota [RW] unsigned integer Above values represent total provisioned VRAM from all tiles where VFs were assigned, and currently it's from all tiles always. Note that changing VRAM provisioning is only possible when VF is not running, otherwise GuC will complain. To make sure that given VF is idle, triggering VF FLR might be needed. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Piotr Piórkowski Acked-by: Rodrigo Vivi Link: https://patch.msgid.link/20260218205553.3561-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c | 31 ++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c index 82a1055985ba..aa05c143a4d6 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c @@ -9,6 +9,7 @@ #include #include "xe_assert.h" +#include "xe_device.h" #include "xe_pci_sriov.h" #include "xe_pm.h" #include "xe_sriov.h" @@ -44,7 +45,8 @@ static int emit_choice(char *buf, int choice, const char * const *array, size_t * ├── .bulk_profile * │ ├── exec_quantum_ms * │ ├── preempt_timeout_us - * │ └── sched_priority + * │ ├── sched_priority + * │ └── vram_quota * ├── pf/ * │ ├── ... * │ ├── device -> ../../../BDF @@ -59,7 +61,8 @@ static int emit_choice(char *buf, int choice, const char * const *array, size_t * │ └── profile * │ ├── exec_quantum_ms * │ ├── preempt_timeout_us - * │ └── sched_priority + * │ ├── sched_priority + * │ └── vram_quota * ├── vf2/ * : * └── vfN/ @@ -132,6 +135,7 @@ static XE_SRIOV_DEV_ATTR_WO(NAME) DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(exec_quantum_ms, eq, u32); DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(preempt_timeout_us, pt, u32); +DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(vram_quota, vram, u64); static const char * const sched_priority_names[] = { [GUC_SCHED_PRIORITY_LOW] = "low", @@ -181,12 +185,26 @@ static struct attribute *bulk_profile_dev_attrs[] = { &xe_sriov_dev_attr_exec_quantum_ms.attr, &xe_sriov_dev_attr_preempt_timeout_us.attr, &xe_sriov_dev_attr_sched_priority.attr, + &xe_sriov_dev_attr_vram_quota.attr, NULL }; +static umode_t profile_dev_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj); + + if (attr == &xe_sriov_dev_attr_vram_quota.attr && + !xe_device_has_lmtt(vkobj->xe)) + return 0; + + return attr->mode; +} + static const struct attribute_group bulk_profile_dev_attr_group = { .name = ".bulk_profile", .attrs = bulk_profile_dev_attrs, + .is_visible = profile_dev_attr_is_visible, }; static const struct attribute_group *xe_sriov_dev_attr_groups[] = { @@ -228,6 +246,7 @@ static XE_SRIOV_VF_ATTR(NAME) DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(exec_quantum_ms, eq, u32, "%u\n"); DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(preempt_timeout_us, pt, u32, "%u\n"); +DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(vram_quota, vram, u64, "%llu\n"); static ssize_t xe_sriov_vf_attr_sched_priority_show(struct xe_device *xe, unsigned int vfid, char *buf) @@ -274,6 +293,7 @@ static struct attribute *profile_vf_attrs[] = { &xe_sriov_vf_attr_exec_quantum_ms.attr, &xe_sriov_vf_attr_preempt_timeout_us.attr, &xe_sriov_vf_attr_sched_priority.attr, + &xe_sriov_vf_attr_vram_quota.attr, NULL }; @@ -286,6 +306,13 @@ static umode_t profile_vf_attr_is_visible(struct kobject *kobj, !sched_priority_change_allowed(vkobj->vfid)) return attr->mode & 0444; + if (attr == &xe_sriov_vf_attr_vram_quota.attr) { + if (!IS_DGFX(vkobj->xe) || vkobj->vfid == PFID) + return 0; + if (!xe_device_has_lmtt(vkobj->xe)) + return attr->mode & 0444; + } + return attr->mode; } From 81d417d56a23f94b288587af59111f20aaf83c03 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 18 Feb 2026 21:55:47 +0100 Subject: [PATCH 135/195] drm/xe/pf: Use migration-friendly VRAM auto-provisioning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of trying very hard to find the largest fair VRAM (aka LMEM) size that could be allocated for VFs on the current tile, pick some smaller rounded down to power-of-two value that is more likely to be provisioned in the same manner by the other PF instances. In some cases, the outcome of above calculation might not be optimal, but it's expected that admin will do fine-tuning using sysfs files. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patch.msgid.link/20260218205553.3561-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index f67c8822e592..72cd46554230 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1900,6 +1900,28 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) return fair; } +static u64 pf_profile_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) +{ + struct xe_tile *tile = gt_to_tile(gt); + bool admin_only_pf = xe_sriov_pf_admin_only(tile->xe); + u64 usable = xe_vram_region_usable_size(tile->mem.vram); + u64 spare = pf_get_min_spare_lmem(gt); + u64 available = usable > spare ? usable - spare : 0; + u64 shareable = ALIGN_DOWN(available, SZ_1G); + u64 alignment = pf_get_lmem_alignment(gt); + u64 fair; + + if (admin_only_pf) + fair = div_u64(shareable, num_vfs); + else + fair = div_u64(shareable, 1 + num_vfs); + + if (!admin_only_pf && fair) + fair = rounddown_pow_of_two(fair); + + return ALIGN_DOWN(fair, alignment); +} + /** * xe_gt_sriov_pf_config_set_fair_lmem - Provision many VFs with fair LMEM. * @gt: the &xe_gt (can't be media) @@ -1913,6 +1935,7 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs) { + u64 profile; u64 fair; xe_gt_assert(gt, vfid); @@ -1929,6 +1952,12 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, if (!fair) return -ENOSPC; + profile = pf_profile_fair_lmem(gt, num_vfs); + fair = min(fair, profile); + if (fair < profile) + xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %llu vs %llu)\n", + "VRAM", fair, profile); + return xe_gt_sriov_pf_config_bulk_set_lmem(gt, vfid, num_vfs, fair); } From cbe29da6f7c0d0541ec135e7292b35b97f8ca402 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 18 Feb 2026 21:55:48 +0100 Subject: [PATCH 136/195] drm/xe/tests: Add KUnit tests for new VRAM fair provisioning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add basic test cases to check outcome of the fair VRAM provisioning for regular and admin-only PF mode. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patch.msgid.link/20260218205553.3561-7-michal.wajdeczko@intel.com --- .../xe/tests/xe_gt_sriov_pf_config_kunit.c | 96 ++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c index 3889dc3e49ca..305dbd4e5d1a 100644 --- a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c +++ b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c @@ -11,6 +11,7 @@ #include "xe_pci_test.h" #define TEST_MAX_VFS 63 +#define TEST_VRAM 0x37a800000ull static void pf_set_admin_mode(struct xe_device *xe, bool enable) { @@ -19,6 +20,17 @@ static void pf_set_admin_mode(struct xe_device *xe, bool enable) KUNIT_EXPECT_EQ(kunit_get_current_test(), enable, xe_sriov_pf_admin_only(xe)); } +static void pf_set_usable_vram(struct xe_device *xe, u64 usable) +{ + struct xe_tile *tile = xe_device_get_root_tile(xe); + struct kunit *test = kunit_get_current_test(); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile); + xe->mem.vram->usable_size = usable; + tile->mem.vram->usable_size = usable; + KUNIT_ASSERT_EQ(test, usable, xe_vram_region_usable_size(tile->mem.vram)); +} + static const void *num_vfs_gen_param(struct kunit *test, const void *prev, char *desc) { unsigned long next = 1 + (unsigned long)prev; @@ -34,9 +46,11 @@ static int pf_gt_config_test_init(struct kunit *test) { struct xe_pci_fake_data fake = { .sriov_mode = XE_SRIOV_MODE_PF, - .platform = XE_TIGERLAKE, /* any random platform with SR-IOV */ + .platform = XE_BATTLEMAGE, /* any random DGFX platform with SR-IOV */ .subplatform = XE_SUBPLATFORM_NONE, + .graphics_verx100 = 2001, }; + struct xe_vram_region *vram; struct xe_device *xe; struct xe_gt *gt; @@ -50,6 +64,19 @@ static int pf_gt_config_test_init(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt); test->priv = gt; + /* pretend it has some VRAM */ + KUNIT_ASSERT_TRUE(test, IS_DGFX(xe)); + vram = kunit_kzalloc(test, sizeof(*vram), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vram); + vram->usable_size = TEST_VRAM; + xe->mem.vram = vram; + xe->tiles[0].mem.vram = vram; + + /* pretend we have a valid LMTT */ + KUNIT_ASSERT_TRUE(test, xe_device_has_lmtt(xe)); + KUNIT_ASSERT_GE(test, GRAPHICS_VERx100(xe), 1260); + xe->tiles[0].sriov.pf.lmtt.ops = &lmtt_ml_ops; + /* pretend it can support up to 63 VFs */ xe->sriov.pf.device_total_vfs = TEST_MAX_VFS; xe->sriov.pf.driver_max_vfs = TEST_MAX_VFS; @@ -189,13 +216,80 @@ static void fair_ggtt(struct kunit *test) KUNIT_ASSERT_EQ(test, SZ_2G, pf_profile_fair_ggtt(gt, num_vfs)); } +static const u64 vram_sizes[] = { + SZ_4G - SZ_512M, + SZ_8G + SZ_4G - SZ_512M, + SZ_16G - SZ_512M, + SZ_32G - SZ_512M, + SZ_64G - SZ_512M, + TEST_VRAM, +}; + +static void u64_param_get_desc(const u64 *p, char *desc) +{ + string_get_size(*p, 1, STRING_UNITS_2, desc, KUNIT_PARAM_DESC_SIZE); +} + +KUNIT_ARRAY_PARAM(vram_size, vram_sizes, u64_param_get_desc); + +static void fair_vram_1vf(struct kunit *test) +{ + const u64 usable = *(const u64 *)test->param_value; + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, false); + pf_set_usable_vram(xe, usable); + + KUNIT_EXPECT_NE(test, 0, pf_profile_fair_lmem(gt, 1)); + KUNIT_EXPECT_GE(test, usable, pf_profile_fair_lmem(gt, 1)); + KUNIT_EXPECT_TRUE(test, is_power_of_2(pf_profile_fair_lmem(gt, 1))); + KUNIT_EXPECT_GE(test, usable - pf_profile_fair_lmem(gt, 1), pf_profile_fair_lmem(gt, 1)); +} + +static void fair_vram_1vf_admin_only(struct kunit *test) +{ + const u64 usable = *(const u64 *)test->param_value; + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + + pf_set_admin_mode(xe, true); + pf_set_usable_vram(xe, usable); + + KUNIT_EXPECT_NE(test, 0, pf_profile_fair_lmem(gt, 1)); + KUNIT_EXPECT_GE(test, usable, pf_profile_fair_lmem(gt, 1)); + KUNIT_EXPECT_LT(test, usable - pf_profile_fair_lmem(gt, 1), pf_profile_fair_lmem(gt, 1)); + KUNIT_EXPECT_TRUE(test, IS_ALIGNED(pf_profile_fair_lmem(gt, 1), SZ_1G)); +} + +static void fair_vram(struct kunit *test) +{ + unsigned int num_vfs = (unsigned long)test->param_value; + struct xe_gt *gt = test->priv; + struct xe_device *xe = gt_to_xe(gt); + u64 alignment = pf_get_lmem_alignment(gt); + char size[10]; + + pf_set_admin_mode(xe, false); + + string_get_size(pf_profile_fair_lmem(gt, num_vfs), 1, STRING_UNITS_2, size, sizeof(size)); + kunit_info(test, "fair %s %llx\n", size, pf_profile_fair_lmem(gt, num_vfs)); + + KUNIT_EXPECT_TRUE(test, is_power_of_2(pf_profile_fair_lmem(gt, num_vfs))); + KUNIT_EXPECT_TRUE(test, IS_ALIGNED(pf_profile_fair_lmem(gt, num_vfs), alignment)); + KUNIT_EXPECT_GE(test, TEST_VRAM, num_vfs * pf_profile_fair_lmem(gt, num_vfs)); +} + static struct kunit_case pf_gt_config_test_cases[] = { KUNIT_CASE(fair_contexts_1vf), KUNIT_CASE(fair_doorbells_1vf), KUNIT_CASE(fair_ggtt_1vf), + KUNIT_CASE_PARAM(fair_vram_1vf, vram_size_gen_params), + KUNIT_CASE_PARAM(fair_vram_1vf_admin_only, vram_size_gen_params), KUNIT_CASE_PARAM(fair_contexts, num_vfs_gen_param), KUNIT_CASE_PARAM(fair_doorbells, num_vfs_gen_param), KUNIT_CASE_PARAM(fair_ggtt, num_vfs_gen_param), + KUNIT_CASE_PARAM(fair_vram, num_vfs_gen_param), {} }; From 62acbb1dd5c281ad708f7985031230b0268ddc61 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 18 Feb 2026 21:55:49 +0100 Subject: [PATCH 137/195] drm/xe/pf: Don't check for empty config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already turn off VFs auto-provisioning once we detect manual VFs provisioning over the debugfs, so we can skip additional check for all VFs configs being still empty. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patch.msgid.link/20260218205553.3561-8-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_sriov_pf_provision.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c index f22ff65c59aa..abe3677d33ed 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c @@ -33,17 +33,6 @@ static bool pf_auto_provisioning_mode(struct xe_device *xe) return xe->sriov.pf.provision.mode == XE_SRIOV_PROVISIONING_MODE_AUTO; } -static bool pf_needs_provisioning(struct xe_gt *gt, unsigned int num_vfs) -{ - unsigned int n; - - for (n = 1; n <= num_vfs; n++) - if (!xe_gt_sriov_pf_config_is_empty(gt, n)) - return false; - - return true; -} - static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) { struct xe_gt *gt; @@ -52,8 +41,6 @@ static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs) int err; for_each_gt(gt, xe, id) { - if (!pf_needs_provisioning(gt, num_vfs)) - return -EUCLEAN; err = xe_gt_sriov_pf_config_set_fair(gt, VFID(1), num_vfs); result = result ?: err; } From 67a716b693f96177be253c1fa6a205db743d5445 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 18 Feb 2026 21:55:50 +0100 Subject: [PATCH 138/195] drm/xe/pf: Prefer guard(mutex) when doing fair LMEM provisioning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will add more code there and with guard() it will easier to avoid mistakes in unlocking. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patch.msgid.link/20260218205553.3561-9-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 72cd46554230..d2a2201440b8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1945,10 +1945,9 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, if (!xe_device_has_lmtt(gt_to_xe(gt))) return 0; - mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - fair = pf_estimate_fair_lmem(gt, num_vfs); - mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + fair = pf_estimate_fair_lmem(gt, num_vfs); if (!fair) return -ENOSPC; @@ -1958,7 +1957,7 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %llu vs %llu)\n", "VRAM", fair, profile); - return xe_gt_sriov_pf_config_bulk_set_lmem(gt, vfid, num_vfs, fair); + return xe_gt_sriov_pf_config_bulk_set_lmem_locked(gt, vfid, num_vfs, fair); } /** From d039fa856ee190ae8cd799800972137a00b55d43 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 18 Feb 2026 21:55:51 +0100 Subject: [PATCH 139/195] drm/xe/pf: Skip VRAM auto-provisioning if already provisioned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case VF's VRAM provisioning using sysfs is done by the admin prior to VFs enabling, this provisioning will be lost as PF will run VRAM auto-provisioning anyway. To avoid that skip this auto- provisioning if any VF has been already provisioned with VRAM. To help admin find any mistakes, add diagnostics messages about which VFs were provisioned with VRAM and which were missed. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patch.msgid.link/20260218205553.3561-10-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 56 ++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index d2a2201440b8..cba20eb6b36b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1922,6 +1922,59 @@ static u64 pf_profile_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) return ALIGN_DOWN(fair, alignment); } +static void __pf_show_provisioning_lmem(struct xe_gt *gt, unsigned int first_vf, + unsigned int num_vfs, bool provisioned) +{ + unsigned int allvfs = 1 + xe_gt_sriov_pf_get_totalvfs(gt); /* PF plus VFs */ + unsigned long *bitmap __free(bitmap) = bitmap_zalloc(allvfs, GFP_KERNEL); + unsigned int weight; + unsigned int n; + + if (!bitmap) + return; + + for (n = first_vf; n < first_vf + num_vfs; n++) { + if (!!pf_get_vf_config_lmem(gt, VFID(n)) == provisioned) + bitmap_set(bitmap, n, 1); + } + + weight = bitmap_weight(bitmap, allvfs); + if (!weight) + return; + + xe_gt_sriov_info(gt, "VF%s%*pbl %s provisioned with VRAM\n", + weight > 1 ? "s " : "", allvfs, bitmap, + provisioned ? "already" : "not"); +} + +static void pf_show_all_provisioned_lmem(struct xe_gt *gt) +{ + __pf_show_provisioning_lmem(gt, VFID(1), xe_gt_sriov_pf_get_totalvfs(gt), true); +} + +static void pf_show_unprovisioned_lmem(struct xe_gt *gt, unsigned int first_vf, + unsigned int num_vfs) +{ + __pf_show_provisioning_lmem(gt, first_vf, num_vfs, false); +} + +static bool pf_needs_provision_lmem(struct xe_gt *gt, unsigned int first_vf, + unsigned int num_vfs) +{ + unsigned int vfid; + + for (vfid = first_vf; vfid < first_vf + num_vfs; vfid++) { + if (pf_get_vf_config_lmem(gt, vfid)) { + pf_show_all_provisioned_lmem(gt); + pf_show_unprovisioned_lmem(gt, first_vf, num_vfs); + return false; + } + } + + pf_show_all_provisioned_lmem(gt); + return true; +} + /** * xe_gt_sriov_pf_config_set_fair_lmem - Provision many VFs with fair LMEM. * @gt: the &xe_gt (can't be media) @@ -1947,6 +2000,9 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, guard(mutex)(xe_gt_sriov_pf_master_mutex(gt)); + if (!pf_needs_provision_lmem(gt, vfid, num_vfs)) + return 0; + fair = pf_estimate_fair_lmem(gt, num_vfs); if (!fair) return -ENOSPC; From 9ca192cbcd5b9baefdc3c4a0d2740e04a427cd18 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 18 Feb 2026 21:55:52 +0100 Subject: [PATCH 140/195] drm/xe/pf: Add documentation for vram_quota MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add initial documentation for recently added VRAM provisioning Xe driver specific SR-IOV sysfs files under device/sriov_admin. Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Piotr Piórkowski Acked-by: Rodrigo Vivi Link: https://patch.msgid.link/20260218205553.3561-11-michal.wajdeczko@intel.com --- .../ABI/testing/sysfs-driver-intel-xe-sriov | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov index 7f5ef9eada53..1d6eaff6882f 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-sriov @@ -129,6 +129,37 @@ Description: -EIO if FW refuses to change the provisioning. +What: /sys/bus/pci/drivers/xe/.../sriov_admin/.bulk_profile/vram_quota +What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf/profile/vram_quota +Date: February 2026 +KernelVersion: 7.0 +Contact: intel-xe@lists.freedesktop.org +Description: + These files allow to perform initial VFs VRAM provisioning prior to VFs + enabling or to change VFs VRAM provisioning once the VFs are enabled. + Any non-zero initial VRAM provisioning will block VFs auto-provisioning. + Without initial VRAM provisioning those files will show result of the + VRAM auto-provisioning performed by the PF once the VFs are enabled. + Once the VFs are disabled, all VRAM provisioning will be released. + These files are visible only on discrete Intel Xe platforms with VRAM + and are writeable only if dynamic VFs VRAM provisioning is supported. + + .bulk_profile/vram_quota: (WO) unsigned integer + The amount of the provisioned VRAM in [bytes] for each VF. + Actual quota value might be aligned per HW/FW requirements. + + profile/vram_quota: (RW) unsigned integer + The amount of the provisioned VRAM in [bytes] for this VF. + Actual quota value might be aligned per HW/FW requirements. + + Default is 0 (unprovisioned). + + Writes to these attributes may fail with errors like: + -EINVAL if provided input is malformed or not recognized, + -EPERM if change is not applicable on given HW/FW, + -EIO if FW refuses to change the provisioning. + + What: /sys/bus/pci/drivers/xe/.../sriov_admin/vf/stop Date: October 2025 KernelVersion: 6.19 From f939bdd9207a5d1fc55cced5459858480686ce22 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 19 Feb 2026 23:35:18 +0000 Subject: [PATCH 141/195] drm/xe/sync: Cleanup partially initialized sync on parse failure xe_sync_entry_parse() can allocate references (syncobj, fence, chain fence, or user fence) before hitting a later failure path. Several of those paths returned directly, leaving partially initialized state and leaking refs. Route these error paths through a common free_sync label and call xe_sync_entry_cleanup(sync) before returning the error. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260219233516.2938172-5-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_sync.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index c8fdcdbd6ae7..f03ab2e27284 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -146,8 +146,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (!signal) { sync->fence = drm_syncobj_fence_get(sync->syncobj); - if (XE_IOCTL_DBG(xe, !sync->fence)) - return -EINVAL; + if (XE_IOCTL_DBG(xe, !sync->fence)) { + err = -EINVAL; + goto free_sync; + } } break; @@ -167,17 +169,21 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (signal) { sync->chain_fence = dma_fence_chain_alloc(); - if (!sync->chain_fence) - return -ENOMEM; + if (!sync->chain_fence) { + err = -ENOMEM; + goto free_sync; + } } else { sync->fence = drm_syncobj_fence_get(sync->syncobj); - if (XE_IOCTL_DBG(xe, !sync->fence)) - return -EINVAL; + if (XE_IOCTL_DBG(xe, !sync->fence)) { + err = -EINVAL; + goto free_sync; + } err = dma_fence_chain_find_seqno(&sync->fence, sync_in.timeline_value); if (err) - return err; + goto free_sync; } break; @@ -216,6 +222,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, sync->timeline_value = sync_in.timeline_value; return 0; + +free_sync: + xe_sync_entry_cleanup(sync); + return err; } ALLOW_ERROR_INJECTION(xe_sync_entry_parse, ERRNO); From a5d5634cde48a9fcd68c8504aa07f89f175074a0 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 19 Feb 2026 23:35:19 +0000 Subject: [PATCH 142/195] drm/xe/sync: Fix user fence leak on alloc failure When dma_fence_chain_alloc() fails, properly release the user fence reference to prevent a memory leak. Fixes: adda4e855ab6 ("drm/xe: Enforce correct user fence signaling order using") Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260219233516.2938172-6-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_sync.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index f03ab2e27284..52cefbd985ac 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -206,8 +206,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) return PTR_ERR(sync->ufence); sync->ufence_chain_fence = dma_fence_chain_alloc(); - if (!sync->ufence_chain_fence) - return -ENOMEM; + if (!sync->ufence_chain_fence) { + err = -ENOMEM; + goto free_sync; + } sync->ufence_syncobj = ufence_syncobj; } From 16843e6638b743dd0376a1fc0845f2fd34daff98 Mon Sep 17 00:00:00 2001 From: Satyanarayana K V P Date: Fri, 20 Feb 2026 05:55:21 +0000 Subject: [PATCH 143/195] drm/sa: Split drm_suballoc_new() into SA alloc and init helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_suballoc_new() currently both allocates the SA object using kmalloc() and searches for a suitable hole in the sub-allocator for the requested size. If SA allocation is done by holding sub-allocator mutex, this design can lead to reclaim safety issues. By splitting the kmalloc() step outside of the critical section, we allow the memory allocation to use GFP_KERNEL (reclaim-safe) while ensuring that the initialization step that holds reclaim-tainted locks (sub-allocator mutex) operates in a reclaim-unsafe context with pre-allocated memory. This separation prevents potential deadlocks where memory reclaim could attempt to acquire locks that are already held during the sub-allocator operations. Signed-off-by: Satyanarayana K V P Suggested-by: Matthew Brost Cc: Thomas Hellström Cc: Michal Wajdeczko Cc: Matthew Auld Cc: Christian König Cc: dri-devel@lists.freedesktop.org Cc: Maarten Lankhorst Reviewed-by: Christian König Reviewed-by: Thomas Hellström Reviewed-by: Matthew Brost Acked-by: Maarten Lankhorst Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260220055519.2485681-6-satyanarayana.k.v.p@intel.com --- drivers/gpu/drm/drm_suballoc.c | 106 ++++++++++++++++++++++++++------- include/drm/drm_suballoc.h | 6 ++ 2 files changed, 92 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/drm_suballoc.c b/drivers/gpu/drm/drm_suballoc.c index 879ea33dbbc4..dc9bef3c0419 100644 --- a/drivers/gpu/drm/drm_suballoc.c +++ b/drivers/gpu/drm/drm_suballoc.c @@ -293,45 +293,66 @@ static bool drm_suballoc_next_hole(struct drm_suballoc_manager *sa_manager, } /** - * drm_suballoc_new() - Make a suballocation. + * drm_suballoc_alloc() - Allocate uninitialized suballoc object. + * @gfp: gfp flags used for memory allocation. + * + * Allocate memory for an uninitialized suballoc object. Intended usage is + * allocate memory for suballoc object outside of a reclaim tainted context + * and then be initialized at a later time in a reclaim tainted context. + * + * @drm_suballoc_free() should be used to release the memory if returned + * suballoc object is in uninitialized state. + * + * Return: a new uninitialized suballoc object, or an ERR_PTR(-ENOMEM). + */ +struct drm_suballoc *drm_suballoc_alloc(gfp_t gfp) +{ + struct drm_suballoc *sa; + + sa = kmalloc(sizeof(*sa), gfp); + if (!sa) + return ERR_PTR(-ENOMEM); + + sa->manager = NULL; + + return sa; +} +EXPORT_SYMBOL(drm_suballoc_alloc); + +/** + * drm_suballoc_insert() - Initialize a suballocation and insert a hole. * @sa_manager: pointer to the sa_manager + * @sa: The struct drm_suballoc. * @size: number of bytes we want to suballocate. - * @gfp: gfp flags used for memory allocation. Typically GFP_KERNEL but - * the argument is provided for suballocations from reclaim context or - * where the caller wants to avoid pipelining rather than wait for - * reclaim. * @intr: Whether to perform waits interruptible. This should typically * always be true, unless the caller needs to propagate a * non-interruptible context from above layers. * @align: Alignment. Must not exceed the default manager alignment. * If @align is zero, then the manager alignment is used. * - * Try to make a suballocation of size @size, which will be rounded - * up to the alignment specified in specified in drm_suballoc_manager_init(). + * Try to make a suballocation on a pre-allocated suballoc object of size @size, + * which will be rounded up to the alignment specified in specified in + * drm_suballoc_manager_init(). * - * Return: a new suballocated bo, or an ERR_PTR. + * Return: zero on success, errno on failure. */ -struct drm_suballoc * -drm_suballoc_new(struct drm_suballoc_manager *sa_manager, size_t size, - gfp_t gfp, bool intr, size_t align) +int drm_suballoc_insert(struct drm_suballoc_manager *sa_manager, + struct drm_suballoc *sa, size_t size, + bool intr, size_t align) { struct dma_fence *fences[DRM_SUBALLOC_MAX_QUEUES]; unsigned int tries[DRM_SUBALLOC_MAX_QUEUES]; unsigned int count; int i, r; - struct drm_suballoc *sa; if (WARN_ON_ONCE(align > sa_manager->align)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (WARN_ON_ONCE(size > sa_manager->size || !size)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (!align) align = sa_manager->align; - sa = kmalloc(sizeof(*sa), gfp); - if (!sa) - return ERR_PTR(-ENOMEM); sa->manager = sa_manager; sa->fence = NULL; INIT_LIST_HEAD(&sa->olist); @@ -348,7 +369,7 @@ drm_suballoc_new(struct drm_suballoc_manager *sa_manager, size_t size, if (drm_suballoc_try_alloc(sa_manager, sa, size, align)) { spin_unlock(&sa_manager->wq.lock); - return sa; + return 0; } /* see if we can skip over some allocations */ @@ -385,8 +406,48 @@ drm_suballoc_new(struct drm_suballoc_manager *sa_manager, size_t size, } while (!r); spin_unlock(&sa_manager->wq.lock); - kfree(sa); - return ERR_PTR(r); + sa->manager = NULL; + return r; +} +EXPORT_SYMBOL(drm_suballoc_insert); + +/** + * drm_suballoc_new() - Make a suballocation. + * @sa_manager: pointer to the sa_manager + * @size: number of bytes we want to suballocate. + * @gfp: gfp flags used for memory allocation. Typically GFP_KERNEL but + * the argument is provided for suballocations from reclaim context or + * where the caller wants to avoid pipelining rather than wait for + * reclaim. + * @intr: Whether to perform waits interruptible. This should typically + * always be true, unless the caller needs to propagate a + * non-interruptible context from above layers. + * @align: Alignment. Must not exceed the default manager alignment. + * If @align is zero, then the manager alignment is used. + * + * Try to make a suballocation of size @size, which will be rounded + * up to the alignment specified in specified in drm_suballoc_manager_init(). + * + * Return: a new suballocated bo, or an ERR_PTR. + */ +struct drm_suballoc * +drm_suballoc_new(struct drm_suballoc_manager *sa_manager, size_t size, + gfp_t gfp, bool intr, size_t align) +{ + struct drm_suballoc *sa; + int err; + + sa = drm_suballoc_alloc(gfp); + if (IS_ERR(sa)) + return sa; + + err = drm_suballoc_insert(sa_manager, sa, size, intr, align); + if (err) { + drm_suballoc_free(sa, NULL); + return ERR_PTR(err); + } + + return sa; } EXPORT_SYMBOL(drm_suballoc_new); @@ -405,6 +466,11 @@ void drm_suballoc_free(struct drm_suballoc *suballoc, if (!suballoc) return; + if (!suballoc->manager) { + kfree(suballoc); + return; + } + sa_manager = suballoc->manager; spin_lock(&sa_manager->wq.lock); diff --git a/include/drm/drm_suballoc.h b/include/drm/drm_suballoc.h index 7ba72a81a808..29befdda35d2 100644 --- a/include/drm/drm_suballoc.h +++ b/include/drm/drm_suballoc.h @@ -53,6 +53,12 @@ void drm_suballoc_manager_init(struct drm_suballoc_manager *sa_manager, void drm_suballoc_manager_fini(struct drm_suballoc_manager *sa_manager); +struct drm_suballoc *drm_suballoc_alloc(gfp_t gfp); + +int drm_suballoc_insert(struct drm_suballoc_manager *sa_manager, + struct drm_suballoc *sa, size_t size, bool intr, + size_t align); + struct drm_suballoc * drm_suballoc_new(struct drm_suballoc_manager *sa_manager, size_t size, gfp_t gfp, bool intr, size_t align); From bcd768d787e7bb4e06d77709fa17d5bafec8612e Mon Sep 17 00:00:00 2001 From: Satyanarayana K V P Date: Fri, 20 Feb 2026 05:55:22 +0000 Subject: [PATCH 144/195] drm/xe/vf: Fix fs_reclaim warning with CCS save/restore BB allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CCS save/restore batch buffers are attached during BO allocation and detached during BO teardown. The shrinker triggers xe_bo_move(), which is used for both allocation and deletion paths. When BO allocation and shrinking occur concurrently, a circular locking dependency involving fs_reclaim and swap_guard can occur, leading to a deadlock such as: *===============================================================* * WARNING: possible circular locking dependency detected * *---------------------------------------------------------------* * * * CPU0 CPU1 * * ---- ---- * * lock(fs_reclaim); * * lock(&sa_manager->swap_guard); * * lock(fs_reclaim); * * lock(&sa_manager->swap_guard); * * * * *** DEADLOCK *** * *===============================================================* To avoid this, the BB pointer and SA are allocated using xe_bb_alloc() before taking lock and SA is initialized using xe_bb_init() preventing reclaim from being invoked in this context. Fixes: 864690cf4dd62 ("drm/xe/vf: Attach and detach CCS copy commands with BO") Signed-off-by: Satyanarayana K V P Cc: Matthew Brost Cc: Michal Wajdeczko Cc: Matthew Auld Cc: Thomas Hellström Cc: Maarten Lankhorst Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260220055519.2485681-7-satyanarayana.k.v.p@intel.com --- drivers/gpu/drm/xe/xe_bb.c | 59 +++++++++++----- drivers/gpu/drm/xe/xe_bb.h | 6 +- drivers/gpu/drm/xe/xe_migrate.c | 119 +++++++++++++++++--------------- drivers/gpu/drm/xe/xe_sa.c | 30 ++++++++ drivers/gpu/drm/xe/xe_sa.h | 2 + 5 files changed, 142 insertions(+), 74 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 8b678297aaa2..b0aceaec2685 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -59,16 +59,51 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) return ERR_PTR(err); } -struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, - enum xe_sriov_vf_ccs_rw_ctxs ctx_id) +/** + * xe_bb_alloc() - Allocate a new batch buffer structure + * @gt: the &xe_gt + * + * Allocates and initializes a new xe_bb structure with an associated + * uninitialized suballoc object. + * + * Returns: Batch buffer structure or an ERR_PTR(-ENOMEM). + */ +struct xe_bb *xe_bb_alloc(struct xe_gt *gt) { struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); - struct xe_device *xe = gt_to_xe(gt); - struct xe_sa_manager *bb_pool; int err; if (!bb) return ERR_PTR(-ENOMEM); + + bb->bo = xe_sa_bo_alloc(GFP_KERNEL); + if (IS_ERR(bb->bo)) { + err = PTR_ERR(bb->bo); + goto err; + } + + return bb; + +err: + kfree(bb); + return ERR_PTR(err); +} + +/** + * xe_bb_init() - Initialize a batch buffer with memory from a sub-allocator pool + * @bb: Batch buffer structure to initialize + * @bb_pool: Suballoc memory pool to allocate from + * @dwords: Number of dwords to be allocated + * + * Initializes the batch buffer by allocating memory from the specified + * suballoc pool. + * + * Return: 0 on success, negative error code on failure. + */ +int xe_bb_init(struct xe_bb *bb, struct xe_sa_manager *bb_pool, u32 dwords) +{ + int err; + /* * We need to allocate space for the requested number of dwords & * one additional MI_BATCH_BUFFER_END dword. Since the whole SA @@ -76,22 +111,14 @@ struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, * is not over written when the last chunk of SA is allocated for BB. * So, this extra DW acts as a guard here. */ - - bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool; - bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1)); - - if (IS_ERR(bb->bo)) { - err = PTR_ERR(bb->bo); - goto err; - } + err = xe_sa_bo_init(bb_pool, bb->bo, 4 * (dwords + 1)); + if (err) + return err; bb->cs = xe_sa_bo_cpu_addr(bb->bo); bb->len = 0; - return bb; -err: - kfree(bb); - return ERR_PTR(err); + return 0; } static struct xe_sched_job * diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h index 2a8adc9a6dee..231870b24c2f 100644 --- a/drivers/gpu/drm/xe/xe_bb.h +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -12,12 +12,12 @@ struct dma_fence; struct xe_gt; struct xe_exec_queue; +struct xe_sa_manager; struct xe_sched_job; -enum xe_sriov_vf_ccs_rw_ctxs; struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm); -struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, - enum xe_sriov_vf_ccs_rw_ctxs ctx_id); +struct xe_bb *xe_bb_alloc(struct xe_gt *gt); +int xe_bb_init(struct xe_bb *bb, struct xe_sa_manager *bb_pool, u32 dwords); struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb); struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 078a9bc2821d..333af7b57ae9 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -25,6 +25,7 @@ #include "xe_exec_queue.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_map.h" @@ -1148,65 +1149,73 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, size -= src_L0; } + bb = xe_bb_alloc(gt); + if (IS_ERR(bb)) + return PTR_ERR(bb); + bb_pool = ctx->mem.ccs_bb_pool; - guard(mutex) (xe_sa_bo_swap_guard(bb_pool)); - xe_sa_bo_swap_shadow(bb_pool); + scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) { + xe_sa_bo_swap_shadow(bb_pool); - bb = xe_bb_ccs_new(gt, batch_size, read_write); - if (IS_ERR(bb)) { - drm_err(&xe->drm, "BB allocation failed.\n"); - err = PTR_ERR(bb); - return err; + err = xe_bb_init(bb, bb_pool, batch_size); + if (err) { + xe_gt_err(gt, "BB allocation failed.\n"); + xe_bb_free(bb, NULL); + return err; + } + + batch_size_allocated = batch_size; + size = xe_bo_size(src_bo); + batch_size = 0; + + /* + * Emit PTE and copy commands here. + * The CCS copy command can only support limited size. If the size to be + * copied is more than the limit, divide copy into chunks. So, calculate + * sizes here again before copy command is emitted. + */ + + while (size) { + batch_size += 10; /* Flush + ggtt addr + 2 NOP */ + u32 flush_flags = 0; + u64 ccs_ofs, ccs_size; + u32 ccs_pt; + + u32 avail_pts = max_mem_transfer_per_pass(xe) / + LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + src_L0 = xe_migrate_res_sizes(m, &src_it); + + batch_size += pte_update_size(m, false, src, &src_it, &src_L0, + &src_L0_ofs, &src_L0_pt, 0, 0, + avail_pts); + + ccs_size = xe_device_ccs_bytes(xe, src_L0); + batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, + &ccs_pt, 0, avail_pts, avail_pts); + xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); + batch_size += EMIT_COPY_CCS_DW; + + emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src); + + emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); + + bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, + src_L0_ofs, dst_is_pltt, + src_L0, ccs_ofs, true); + bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); + + size -= src_L0; + } + + xe_assert(xe, (batch_size_allocated == bb->len)); + src_bo->bb_ccs[read_write] = bb; + + xe_sriov_vf_ccs_rw_update_bb_addr(ctx); + xe_sa_bo_sync_shadow(bb->bo); } - batch_size_allocated = batch_size; - size = xe_bo_size(src_bo); - batch_size = 0; - - /* - * Emit PTE and copy commands here. - * The CCS copy command can only support limited size. If the size to be - * copied is more than the limit, divide copy into chunks. So, calculate - * sizes here again before copy command is emitted. - */ - while (size) { - batch_size += 10; /* Flush + ggtt addr + 2 NOP */ - u32 flush_flags = 0; - u64 ccs_ofs, ccs_size; - u32 ccs_pt; - - u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; - - src_L0 = xe_migrate_res_sizes(m, &src_it); - - batch_size += pte_update_size(m, false, src, &src_it, &src_L0, - &src_L0_ofs, &src_L0_pt, 0, 0, - avail_pts); - - ccs_size = xe_device_ccs_bytes(xe, src_L0); - batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, - &ccs_pt, 0, avail_pts, avail_pts); - xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); - batch_size += EMIT_COPY_CCS_DW; - - emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src); - - emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); - - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); - flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, - src_L0_ofs, dst_is_pltt, - src_L0, ccs_ofs, true); - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); - - size -= src_L0; - } - - xe_assert(xe, (batch_size_allocated == bb->len)); - src_bo->bb_ccs[read_write] = bb; - - xe_sriov_vf_ccs_rw_update_bb_addr(ctx); - xe_sa_bo_sync_shadow(bb->bo); return 0; } diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index b738102575d4..c7ee952e8914 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -175,6 +175,36 @@ struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size, return drm_suballoc_new(&sa_manager->base, size, gfp, true, 0); } +/** + * xe_sa_bo_alloc() - Allocate uninitialized suballoc object. + * @gfp: gfp flags used for memory allocation. + * + * Allocate memory for an uninitialized suballoc object. Intended usage is + * allocate memory for suballoc object outside of a reclaim tainted context + * and then be initialized at a later time in a reclaim tainted context. + * + * Return: a new uninitialized suballoc object, or an ERR_PTR(-ENOMEM). + */ +struct drm_suballoc *xe_sa_bo_alloc(gfp_t gfp) +{ + return drm_suballoc_alloc(gfp); +} + +/** + * xe_sa_bo_init() - Initialize a suballocation. + * @sa_manager: pointer to the sa_manager + * @sa: The struct drm_suballoc. + * @size: number of bytes we want to suballocate. + * + * Try to make a suballocation on a pre-allocated suballoc object of size @size. + * + * Return: zero on success, errno on failure. + */ +int xe_sa_bo_init(struct xe_sa_manager *sa_manager, struct drm_suballoc *sa, size_t size) +{ + return drm_suballoc_insert(&sa_manager->base, sa, size, true, 0); +} + /** * xe_sa_bo_flush_write() - Copy the data from the sub-allocation to the GPU memory. * @sa_bo: the &drm_suballoc to flush diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h index 05e9a4e00e78..50218b0d1404 100644 --- a/drivers/gpu/drm/xe/xe_sa.h +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -38,6 +38,8 @@ static inline struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager return __xe_sa_bo_new(sa_manager, size, GFP_KERNEL); } +struct drm_suballoc *xe_sa_bo_alloc(gfp_t gfp); +int xe_sa_bo_init(struct xe_sa_manager *sa_manager, struct drm_suballoc *sa, size_t size); void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo); void xe_sa_bo_sync_read(struct drm_suballoc *sa_bo); void xe_sa_bo_free(struct drm_suballoc *sa_bo, struct dma_fence *fence); From dfeef98e55d0e5f622886d0a9980c235ff08fd97 Mon Sep 17 00:00:00 2001 From: Satyanarayana K V P Date: Fri, 20 Feb 2026 05:55:23 +0000 Subject: [PATCH 145/195] drm/xe/sa: Add lockdep annotations for SA manager swap_guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Annotate the SA manager init path to model taking swap_guard while under reclaim context. This helps lockdep catch potential circular dependencies between fs_reclaim and swap_guard in debug builds. Without this annotation, lockdep is unaware of this chain until the shrinker runs. Signed-off-by: Satyanarayana K V P Suggested-by: Matthew Brost Cc: Michal Wajdeczko Cc: Matthew Auld Reviewed-by: Matthew Brost Reviewed-by: Thomas Hellström Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260220055519.2485681-8-satyanarayana.k.v.p@intel.com --- drivers/gpu/drm/xe/xe_sa.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index c7ee952e8914..f32045f40b7a 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -89,6 +89,12 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, if (ret) return ERR_PTR(ret); + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&sa_manager->swap_guard); + fs_reclaim_release(GFP_KERNEL); + } + shadow = xe_managed_bo_create_pin_map(xe, tile, size, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | From c129f8ebca1750bde614a101c3cd03945d27646a Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 23 Feb 2026 11:49:05 +0530 Subject: [PATCH 146/195] drm/xe: Add counter for invalid prefetch pagefaults Add a stats counter for invalid prefetch page faults to avoid excessive logging. Cc: Matthew Brost Cc: Matt Roper Reviewed-by: Matthew Brost Signed-off-by: Lucas De Marchi Signed-off-by: Varun Gupta Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260223061906.1420883-2-varun.gupta@intel.com --- drivers/gpu/drm/xe/xe_gt_stats.c | 1 + drivers/gpu/drm/xe/xe_gt_stats_types.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 8ed0160a6041..81cec441b449 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -60,6 +60,7 @@ static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { DEF_STAT_STR(SVM_TLB_INVAL_US, "svm_tlb_inval_us"), DEF_STAT_STR(VMA_PAGEFAULT_COUNT, "vma_pagefault_count"), DEF_STAT_STR(VMA_PAGEFAULT_KB, "vma_pagefault_kb"), + DEF_STAT_STR(INVALID_PREFETCH_PAGEFAULT_COUNT, "invalid_prefetch_pagefault_count"), DEF_STAT_STR(SVM_4K_PAGEFAULT_COUNT, "svm_4K_pagefault_count"), DEF_STAT_STR(SVM_64K_PAGEFAULT_COUNT, "svm_64K_pagefault_count"), DEF_STAT_STR(SVM_2M_PAGEFAULT_COUNT, "svm_2M_pagefault_count"), diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index 79568591bd67..b6081c312474 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -15,6 +15,7 @@ enum xe_gt_stats_id { XE_GT_STATS_ID_SVM_TLB_INVAL_US, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, + XE_GT_STATS_ID_INVALID_PREFETCH_PAGEFAULT_COUNT, XE_GT_STATS_ID_SVM_4K_PAGEFAULT_COUNT, XE_GT_STATS_ID_SVM_64K_PAGEFAULT_COUNT, XE_GT_STATS_ID_SVM_2M_PAGEFAULT_COUNT, From f5ab554a6a4a1303dc6b7485ecc84d6523ca54c7 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Mon, 23 Feb 2026 11:49:06 +0530 Subject: [PATCH 147/195] drm/xe: Add prefetch fault support for Xe3p Xe3p hardware prefetches memory ranges and notifies software via an additional bit (bit 11) in the page fault descriptor that the fault was caused by prefetch. Extract the prefetch bit from the fault descriptor and echo it in the response (bit 6) only when the page fault handling fails. This allows the HW to suppress CAT errors for unsuccessful prefetch faults. For prefetch faults that fail, increment stats counter without verbose logging to avoid spamming the log. The prefetch flag is packed into BIT(7) of the access_type field to avoid growing the consumer struct. Based on original patches by Brian Welty and Priyanka Dandamudi . Bspec: 59311 Cc: Matthew Brost Cc: Priyanka Dandamudi Cc: Matt Roper Reviewed-by: Matthew Brost Signed-off-by: Lucas De Marchi Signed-off-by: Varun Gupta Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260223061906.1420883-3-varun.gupta@intel.com --- drivers/gpu/drm/xe/xe_guc_fwif.h | 5 +++-- drivers/gpu/drm/xe/xe_guc_pagefault.c | 6 +++++- drivers/gpu/drm/xe/xe_pagefault.c | 19 +++++++++++++------ drivers/gpu/drm/xe/xe_pagefault_types.h | 6 ++++-- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index a33ea288b907..bb8f71d38611 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -261,7 +261,8 @@ struct xe_guc_pagefault_desc { #define PFD_ACCESS_TYPE GENMASK(1, 0) #define PFD_FAULT_TYPE GENMASK(3, 2) #define PFD_VFID GENMASK(9, 4) -#define PFD_RSVD_1 GENMASK(11, 10) +#define PFD_RSVD_1 BIT(10) +#define PFD_PREFETCH BIT(11) /* Only valid on Xe3+, reserved on prior platforms */ #define PFD_VIRTUAL_ADDR_LO GENMASK(31, 12) #define PFD_VIRTUAL_ADDR_LO_SHIFT 12 @@ -281,7 +282,7 @@ struct xe_guc_pagefault_reply { u32 dw1; #define PFR_VFID GENMASK(5, 0) -#define PFR_RSVD_1 BIT(6) +#define PFR_PREFETCH BIT(6) /* Only valid on Xe3+, reserved on prior platforms */ #define PFR_ENG_INSTANCE GENMASK(12, 7) #define PFR_ENG_CLASS GENMASK(15, 13) #define PFR_PDATA GENMASK(31, 16) diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.c b/drivers/gpu/drm/xe/xe_guc_pagefault.c index d48f6ed103bb..607e32392f46 100644 --- a/drivers/gpu/drm/xe/xe_guc_pagefault.c +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.c @@ -8,10 +8,12 @@ #include "xe_guc_ct.h" #include "xe_guc_pagefault.h" #include "xe_pagefault.h" +#include "xe_pagefault_types.h" static void guc_ack_fault(struct xe_pagefault *pf, int err) { u32 vfid = FIELD_GET(PFD_VFID, pf->producer.msg[2]); + u32 prefetch = FIELD_GET(PFD_PREFETCH, pf->producer.msg[2]); u32 engine_instance = FIELD_GET(PFD_ENG_INSTANCE, pf->producer.msg[0]); u32 engine_class = FIELD_GET(PFD_ENG_CLASS, pf->producer.msg[0]); u32 pdata = FIELD_GET(PFD_PDATA_LO, pf->producer.msg[0]) | @@ -28,6 +30,7 @@ static void guc_ack_fault(struct xe_pagefault *pf, int err) FIELD_PREP(PFR_ASID, asid), FIELD_PREP(PFR_VFID, vfid) | + FIELD_PREP(PFR_PREFETCH, err ? prefetch : 0) | FIELD_PREP(PFR_ENG_INSTANCE, engine_instance) | FIELD_PREP(PFR_ENG_CLASS, engine_class) | FIELD_PREP(PFR_PDATA, pdata), @@ -76,7 +79,8 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) (FIELD_GET(PFD_VIRTUAL_ADDR_LO, msg[2]) << PFD_VIRTUAL_ADDR_LO_SHIFT); pf.consumer.asid = FIELD_GET(PFD_ASID, msg[1]); - pf.consumer.access_type = FIELD_GET(PFD_ACCESS_TYPE, msg[2]); + pf.consumer.access_type = FIELD_GET(PFD_ACCESS_TYPE, msg[2]) | + (FIELD_GET(PFD_PREFETCH, msg[2]) ? XE_PAGEFAULT_ACCESS_PREFETCH : 0); if (FIELD_GET(XE2_PFD_TRVA_FAULT, msg[0])) pf.consumer.fault_type_level = XE_PAGEFAULT_TYPE_LEVEL_NACK; else diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c index 72f589fd2b64..ea4857acf28d 100644 --- a/drivers/gpu/drm/xe/xe_pagefault.c +++ b/drivers/gpu/drm/xe/xe_pagefault.c @@ -136,7 +136,7 @@ static int xe_pagefault_handle_vma(struct xe_gt *gt, struct xe_vma *vma, static bool xe_pagefault_access_is_atomic(enum xe_pagefault_access_type access_type) { - return access_type == XE_PAGEFAULT_ACCESS_TYPE_ATOMIC; + return (access_type & XE_PAGEFAULT_ACCESS_TYPE_MASK) == XE_PAGEFAULT_ACCESS_TYPE_ATOMIC; } static struct xe_vm *xe_pagefault_asid_to_vm(struct xe_device *xe, u32 asid) @@ -226,7 +226,7 @@ static void xe_pagefault_print(struct xe_pagefault *pf) xe_gt_info(pf->gt, "\n\tASID: %d\n" "\tFaulted Address: 0x%08x%08x\n" "\tFaultType: %lu\n" - "\tAccessType: %d\n" + "\tAccessType: %lu\n" "\tFaultLevel: %lu\n" "\tEngineClass: %d %s\n" "\tEngineInstance: %d\n", @@ -235,7 +235,8 @@ static void xe_pagefault_print(struct xe_pagefault *pf) lower_32_bits(pf->consumer.page_addr), FIELD_GET(XE_PAGEFAULT_TYPE_MASK, pf->consumer.fault_type_level), - pf->consumer.access_type, + FIELD_GET(XE_PAGEFAULT_ACCESS_TYPE_MASK, + pf->consumer.access_type), FIELD_GET(XE_PAGEFAULT_LEVEL_MASK, pf->consumer.fault_type_level), pf->consumer.engine_class, @@ -261,9 +262,15 @@ static void xe_pagefault_queue_work(struct work_struct *w) err = xe_pagefault_service(&pf); if (err) { - xe_pagefault_print(&pf); - xe_gt_info(pf.gt, "Fault response: Unsuccessful %pe\n", - ERR_PTR(err)); + if (!(pf.consumer.access_type & XE_PAGEFAULT_ACCESS_PREFETCH)) { + xe_pagefault_print(&pf); + xe_gt_info(pf.gt, "Fault response: Unsuccessful %pe\n", + ERR_PTR(err)); + } else { + xe_gt_stats_incr(pf.gt, XE_GT_STATS_ID_INVALID_PREFETCH_PAGEFAULT_COUNT, 1); + xe_gt_dbg(pf.gt, "Prefetch Fault response: Unsuccessful %pe\n", + ERR_PTR(err)); + } } pf.producer.ops->ack_fault(&pf, err); diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h index 0e378f41ede6..b3289219b1be 100644 --- a/drivers/gpu/drm/xe/xe_pagefault_types.h +++ b/drivers/gpu/drm/xe/xe_pagefault_types.h @@ -68,10 +68,12 @@ struct xe_pagefault { /** @consumer.asid: address space ID */ u32 asid; /** - * @consumer.access_type: access type, u8 rather than enum to - * keep size compact + * @consumer.access_type: access type and prefetch flag packed + * into a u8. */ u8 access_type; +#define XE_PAGEFAULT_ACCESS_TYPE_MASK GENMASK(1, 0) +#define XE_PAGEFAULT_ACCESS_PREFETCH BIT(7) /** * @consumer.fault_type_level: fault type and level, u8 rather * than enum to keep size compact From ad41e9418d97aa48a30517845a56ae8032c24a87 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:23 -0800 Subject: [PATCH 148/195] drm/xe/mtl: Drop pre-prod workarounds Wa_14015795083 & Wa_14014475959 Wa_14015795083 and Wa_14014475959 only apply to early steppings of Xe_LPG that appeared only in pre-production hardware (in fact Wa_14014475959 wasn't supposed to apply to _any_ steppings of version 12.71). Xe1 platforms already aren't officially supported by the Xe driver, but pre-production steppings are especially out of scope (and 'has_pre_prod_wa' is not set in the device descriptor). Drop both workarounds. Bspec: 55420 Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-1-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 4 ---- drivers/gpu/drm/xe/xe_wa_oob.rules | 3 +-- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 76fa6d510aad..aa5755bcdc04 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -191,10 +191,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { /* Xe_LPG */ - { XE_RTP_NAME("14015795083"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) - }, { XE_RTP_NAME("14018575942"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)), XE_RTP_ACTIONS(SET(COMP_MOD_CTRL, FORCE_MISS_FTLB)) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index ac08f94f90a1..cc988f2a18d3 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -2,8 +2,7 @@ 16010904313 GRAPHICS_VERSION_RANGE(1200, 1210) 18022495364 GRAPHICS_VERSION_RANGE(1200, 1210) 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) -14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0) - PLATFORM(DG2) +14014475959 PLATFORM(DG2) 22011391025 PLATFORM(DG2) 22012727170 SUBPLATFORM(DG2, G11) 22012727685 SUBPLATFORM(DG2, G11) From 4405938293631604f0bec290c2e30bd392608393 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:24 -0800 Subject: [PATCH 149/195] drm/xe/pvc: Drop pre-prod workarounds Production PVC hardware had a graphics stepping of C0. Xe1 platforms already aren't officially supported by the Xe driver, but pre-production steppings are especially out of scope (and 'has_pre_prod_wa' is not set in the device descriptor). Drop the workarounds that aren't relevant to production hardware. v2: - Drop the stream->override_gucrc which is no longer set anywhere after the removal of Wa_1509372804. (Bala) - Drop xe_guc_rc_set_mode / xe_guc_rc_unset_mode which are no longer used after the removal of Wa_1509372804. Bspec: 44484 Cc: Balasubramani Vivekanandan Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-2-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_guc.c | 3 --- drivers/gpu/drm/xe/xe_guc_rc.c | 29 ----------------------------- drivers/gpu/drm/xe/xe_guc_rc.h | 2 -- drivers/gpu/drm/xe/xe_oa.c | 19 ------------------- drivers/gpu/drm/xe/xe_oa_types.h | 3 --- drivers/gpu/drm/xe/xe_wa.c | 5 ----- drivers/gpu/drm/xe/xe_wa_oob.rules | 2 -- 7 files changed, 63 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index cbbb4d665b8f..54d2fc780127 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -213,9 +213,6 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; - if (XE_GT_WA(gt, 1509372804)) - flags |= GUC_WA_RENDER_RST_RC6_EXIT; - if (XE_GT_WA(gt, 14018913170)) flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; diff --git a/drivers/gpu/drm/xe/xe_guc_rc.c b/drivers/gpu/drm/xe/xe_guc_rc.c index 427a889b2a1e..99fa127b261f 100644 --- a/drivers/gpu/drm/xe/xe_guc_rc.c +++ b/drivers/gpu/drm/xe/xe_guc_rc.c @@ -129,32 +129,3 @@ int xe_guc_rc_enable(struct xe_guc *guc) return guc_action_setup_gucrc(guc, GUCRC_FIRMWARE_CONTROL); } - -/** - * xe_guc_rc_set_mode() - set new GUCRC mode - * @guc: Xe GuC instance - * @mode: new value of the mode. - * - * Function to set GuC RC mode to one of the enum values. - * - * Returns: 0 on success, negative error code on error - */ -int xe_guc_rc_set_mode(struct xe_guc *guc, enum slpc_gucrc_mode mode) -{ - guard(xe_pm_runtime_noresume)(guc_to_xe(guc)); - return xe_guc_pc_action_set_param(&guc->pc, SLPC_PARAM_PWRGATE_RC_MODE, mode); -} - -/** - * xe_guc_rc_unset_mode() - revert to default mode - * @guc: Xe GuC instance - * - * Function to revert GuC RC mode to platform defaults. - * - * Returns: 0 on success, negative error code on error - */ -int xe_guc_rc_unset_mode(struct xe_guc *guc) -{ - guard(xe_pm_runtime_noresume)(guc_to_xe(guc)); - return xe_guc_pc_action_unset_param(&guc->pc, SLPC_PARAM_PWRGATE_RC_MODE); -} diff --git a/drivers/gpu/drm/xe/xe_guc_rc.h b/drivers/gpu/drm/xe/xe_guc_rc.h index f1f949e7ecc0..b083fc364dd4 100644 --- a/drivers/gpu/drm/xe/xe_guc_rc.h +++ b/drivers/gpu/drm/xe/xe_guc_rc.h @@ -12,7 +12,5 @@ enum slpc_gucrc_mode; int xe_guc_rc_init(struct xe_guc *guc); int xe_guc_rc_enable(struct xe_guc *guc); void xe_guc_rc_disable(struct xe_guc *guc); -int xe_guc_rc_set_mode(struct xe_guc *guc, enum slpc_gucrc_mode mode); -int xe_guc_rc_unset_mode(struct xe_guc *guc); #endif diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a462d6983d8d..72fc4424017b 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -873,10 +873,6 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); xe_pm_runtime_put(stream->oa->xe); - /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ - if (stream->override_gucrc) - xe_gt_WARN_ON(gt, xe_guc_rc_unset_mode(>->uc.guc)); - xe_oa_free_configs(stream); xe_file_put(stream->xef); } @@ -1760,18 +1756,6 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, goto exit; } - /* - * GuC reset of engines causes OA to lose configuration - * state. Prevent this by overriding GUCRC mode. - */ - if (XE_GT_WA(stream->gt, 1509372804)) { - ret = xe_guc_rc_set_mode(>->uc.guc, SLPC_GUCRC_MODE_GUCRC_NO_RC6); - if (ret) - goto err_free_configs; - - stream->override_gucrc = true; - } - /* Take runtime pm ref and forcewake to disable RC6 */ xe_pm_runtime_get(stream->oa->xe); stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); @@ -1822,9 +1806,6 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, err_fw_put: xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); xe_pm_runtime_put(stream->oa->xe); - if (stream->override_gucrc) - xe_gt_WARN_ON(gt, xe_guc_rc_unset_mode(>->uc.guc)); -err_free_configs: xe_oa_free_configs(stream); exit: xe_file_put(stream->xef); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 08cc8d7c2215..b03ffd513483 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -239,9 +239,6 @@ struct xe_oa_stream { /** @poll_period_ns: hrtimer period for checking OA buffer for available data */ u64 poll_period_ns; - /** @override_gucrc: GuC RC has been overridden for the OA stream */ - bool override_gucrc; - /** @oa_status: temporary storage for oa_status register value */ u32 oa_status; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index aa5755bcdc04..4b2c77f51fd8 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -499,11 +499,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { ENGINE_CLASS(COMPUTE)), XE_RTP_ACTIONS(SET(RING_HWSTAM(RENDER_RING_BASE), ~0)) }, - { XE_RTP_NAME("14014999345"), - XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), - GRAPHICS_STEP(B0, C0)), - XE_RTP_ACTIONS(SET(CACHE_MODE_SS, DISABLE_ECC)) - }, /* Xe_LPG */ diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index cc988f2a18d3..c3222d950488 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -8,7 +8,6 @@ 22012727685 SUBPLATFORM(DG2, G11) 22016596838 PLATFORM(PVC) 18020744125 PLATFORM(PVC) -1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0) 1409600907 GRAPHICS_VERSION_RANGE(1200, 1250) 22014953428 SUBPLATFORM(DG2, G10) SUBPLATFORM(DG2, G12) @@ -53,7 +52,6 @@ 18013179988 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 1274) 1508761755 GRAPHICS_VERSION(1255) - GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) 16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) MEDIA_VERSION_RANGE(1301, 3000) MEDIA_VERSION(3002) From 32fbd22860326af33db6b421dd478211d2106e1d Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:25 -0800 Subject: [PATCH 150/195] drm/xe/wa: Document new policy regarding workaround IP ranges During early Xe driver development, our policy for applying workarounds to ranges of IP versions was to only use GRAPHICS_VERSION_RANGE and MEDIA_VERSION_RANGE rules when all of the affected IP versions had consecutive version numbers; otherwise separate RTP entries should be used. For example, a workaround that applies to all Xe2-based platforms would be implemented in the driver with two RTP entries: one using GRAPHICS_VERSION_RANGE(2001, 2002) and the other using GRAPHICS_VERSION(2004). This ensured that if a new IP variant showed up in the future with currently unused version 20.03, an old workaround entry wouldn't automatically apply to it by accident (and we could always consolidate those two distinct entries in the future if the workaround database did explicitly indicate that 20.03 also needed the workaround). Now that we're a couple years down the road with this driver, the number of IP versions supported is much larger (several Xe2 20.xx versions, several Xe3 30.xx versions, and a couple Xe3p 35.xx versions). When new workarounds are discovered that need to apply to a wide range of IPs, it's becoming more of a pain to create independent entries for each non-contiguous range of versions, and the general consensus is that we should revisit our previous policy and start allowing use of VERSION_RANGE constructs for non-contiguous version ranges. Note that allowing ranges that cover currently unused versions will require additional care if/when some of those intermediate version numbers start being used in the future. We'll need to re-check every workaround that has a range including the new IP version and check the hardware database to see whether the workaround also applies to the new version (no code change required) or whether we need to split the existing range into two separate ranges that don't cover the new version. The platform enabling engineers are willing to take on this extra review burden at the time we first enable a new IP in the driver (see lore link below for one recent discussion). Update the kerneldoc for the workaround file to make the new policy official. Link: https://lore.kernel.org/all/20260203233600.GT458797@mdroper-desk1.amr.corp.intel.com/ Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-3-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 4b2c77f51fd8..c3d694947bd5 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -111,6 +111,17 @@ * difference of how they are maintained in the code. In xe it uses the * xe_rtp infrastructure so the workarounds can be kept in tables, following * a more declarative approach rather than procedural. + * + * .. note:: + * When a workaround applies to every single known IP version in a range, + * the preferred handling is to use a single range-based RTP entry rather + * than individual entries for each version, even if some of the intermediate + * version numbers are currently unused. If a new intermediate IP version + * appears in the future and is enabled in the driver, any existing + * range-based entries that contain the new version number will need to be + * analyzed to determine whether their workarounds should apply to the new + * version, or whether any existing range based entries needs to be split + * into two entries that do not include the new intermediate version. */ #undef XE_REG_MCR From 941f538b0af81512212374031ad2db8cde0e3b6f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:26 -0800 Subject: [PATCH 151/195] drm/xe: Consolidate workaround entries for Wa_16021867713 Wa_16021867713 applies to every single media IP from 13.00 to 30.02 (inclusive). We can consolidate the multiple per-version entries down to a single range entry. Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-4-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 38 ++++++++------------------------------ 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c3d694947bd5..4a8685f2ea98 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -131,6 +131,8 @@ __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); static const struct xe_rtp_entry_sr gt_was[] = { + /* Workarounds applying over a range of IPs */ + { XE_RTP_NAME("14011060649"), XE_RTP_RULES(MEDIA_VERSION_RANGE(1200, 1255), ENGINE_CLASS(VIDEO_DECODE), @@ -146,6 +148,12 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1260)), XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) }, + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1300, 3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, /* DG1 */ @@ -213,12 +221,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { /* Xe_LPM+ */ - { XE_RTP_NAME("16021867713"), - XE_RTP_RULES(MEDIA_VERSION(1300), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, { XE_RTP_NAME("22016670082"), XE_RTP_RULES(MEDIA_VERSION(1300)), XE_RTP_ACTIONS(SET(XELPMP_SQCNT1, ENFORCE_RAR)) @@ -232,12 +234,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, - { XE_RTP_NAME("16021867713"), - XE_RTP_RULES(MEDIA_VERSION(2000), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, { XE_RTP_NAME("14019449301"), XE_RTP_RULES(MEDIA_VERSION(2000), ENGINE_CLASS(VIDEO_DECODE)), XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), @@ -262,12 +258,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { /* Xe2_HPM */ - { XE_RTP_NAME("16021867713"), - XE_RTP_RULES(MEDIA_VERSION(1301), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, { XE_RTP_NAME("14019449301"), XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)), XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), @@ -291,12 +281,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { /* Xe3_LPM */ - { XE_RTP_NAME("16021867713"), - XE_RTP_RULES(MEDIA_VERSION(3000), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, { XE_RTP_NAME("16021865536"), XE_RTP_RULES(MEDIA_VERSION(3000), ENGINE_CLASS(VIDEO_DECODE)), @@ -309,12 +293,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, - { XE_RTP_NAME("16021867713"), - XE_RTP_RULES(MEDIA_VERSION(3002), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, { XE_RTP_NAME("14021486841"), XE_RTP_RULES(MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), ENGINE_CLASS(VIDEO_DECODE)), From aa0f0a678370b026235481d2a05eebe6e3cc90d8 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:27 -0800 Subject: [PATCH 152/195] drm/xe: Consolidate workaround entries for Wa_14019449301 Wa_14019449301 applies to both media IP 13.01 and 20.00 and none of the version numbers between those are used. Conslidate the two entries into a single range entry. Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-5-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 4a8685f2ea98..33e7e33db831 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -154,6 +154,11 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("14019449301"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 2000), ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, /* DG1 */ @@ -234,11 +239,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, - { XE_RTP_NAME("14019449301"), - XE_RTP_RULES(MEDIA_VERSION(2000), ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, { XE_RTP_NAME("16028005424"), XE_RTP_RULES(MEDIA_VERSION(2000)), XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) @@ -258,11 +258,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { /* Xe2_HPM */ - { XE_RTP_NAME("14019449301"), - XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, { XE_RTP_NAME("16028005424"), XE_RTP_RULES(MEDIA_VERSION(1301)), XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) From c9459e8c8baa9ff6f90b047505a03b394b3e27f4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:28 -0800 Subject: [PATCH 153/195] drm/xe: Consolidate workaround entries for Wa_16028005424 Wa_16028005424 applies to all media IPs from 13.01 to 35.00 (inclusive) and all graphics IPs from 30.00 and 30.05 (inclusive). Conslidate the multiple RTP entries into a single range-based entry. Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-6-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 31 +++++-------------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 33e7e33db831..4009323da802 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -159,6 +159,11 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("16028005424"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), OR, + MEDIA_VERSION_RANGE(1301, 3500)), + XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) + }, /* DG1 */ @@ -239,10 +244,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, - { XE_RTP_NAME("16028005424"), - XE_RTP_RULES(MEDIA_VERSION(2000)), - XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) - }, /* Xe2_HPG */ @@ -256,23 +257,12 @@ static const struct xe_rtp_entry_sr gt_was[] = { LSN_DIM_Z_WGT(1))) }, - /* Xe2_HPM */ - - { XE_RTP_NAME("16028005424"), - XE_RTP_RULES(MEDIA_VERSION(1301)), - XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) - }, - /* Xe3_LPG */ { XE_RTP_NAME("14021871409"), XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0)), XE_RTP_ACTIONS(SET(UNSLCGCTL9454, LSCFE_CLKGATE_DIS)) }, - { XE_RTP_NAME("16028005424"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005)), - XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) - }, /* Xe3_LPM */ @@ -294,17 +284,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), RAMDFTUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, - { XE_RTP_NAME("16028005424"), - XE_RTP_RULES(MEDIA_VERSION_RANGE(3000, 3002)), - XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) - }, - - /* Xe3p_LPM */ - - { XE_RTP_NAME("16028005424"), - XE_RTP_RULES(MEDIA_VERSION(3500)), - XE_RTP_ACTIONS(SET(GUC_INTR_CHICKEN, DISABLE_SIGNALING_ENGINES)) - }, /* Xe3P_LPG */ From 6b4578b7c1b5bb1e0ec46cea87858bb4eed7092f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:29 -0800 Subject: [PATCH 154/195] drm/xe: Consolidate workaround entries for Wa_16021865536 Wa_16021865536 applies to both media versions 30.00 and 30.02; since version 30.01 is currently unused we can consolidate the two RTP entries into a single range-based entry. Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-7-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 4009323da802..092d46f48165 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -267,13 +267,7 @@ static const struct xe_rtp_entry_sr gt_was[] = { /* Xe3_LPM */ { XE_RTP_NAME("16021865536"), - XE_RTP_RULES(MEDIA_VERSION(3000), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, - { XE_RTP_NAME("16021865536"), - XE_RTP_RULES(MEDIA_VERSION(3002), + XE_RTP_RULES(MEDIA_VERSION_RANGE(3000, 3002), ENGINE_CLASS(VIDEO_DECODE)), XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), From ef32868d58490f62a67458f57f69a11c466e64dd Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:30 -0800 Subject: [PATCH 155/195] drm/xe: Consolidate workaround entries for Wa_18032247524 Wa_18032247524 applies to all graphics versions from 20.01 through 20.04 (inclusive). Consolidate the two RTP entries into a single range-based entry. Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-8-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 092d46f48165..a483a76c7665 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -306,6 +306,8 @@ static const struct xe_rtp_entry_sr gt_was[] = { }; static const struct xe_rtp_entry_sr engine_was[] = { + /* Workarounds applying over a range of IPs */ + { XE_RTP_NAME("22010931296, 18011464164, 14010919138"), XE_RTP_RULES(GRAPHICS_VERSION(1200), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(FF_THREAD_MODE(RENDER_RING_BASE), @@ -341,6 +343,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN1(RENDER_RING_BASE), FFSC_PERCTX_PREEMPT_CTRL)) }, + { XE_RTP_NAME("18032247524"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) + }, /* TGL */ @@ -478,11 +485,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { /* Xe2_LPG */ - { XE_RTP_NAME("18032247524"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) - }, { XE_RTP_NAME("16018712365"), XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) @@ -551,11 +553,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) }, - { XE_RTP_NAME("18032247524"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) - }, { XE_RTP_NAME("14018471104"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), FUNC(xe_rtp_match_first_render_or_compute)), From 1aaea53115870d2acce80ce9c0f3566438545755 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:31 -0800 Subject: [PATCH 156/195] drm/xe: Consolidate workaround entries for Wa_16018712365 Wa_16018712365 applies to all graphics versions from 20.01 through 20.04 (inclusive). Consolidate the two RTP entries into a single range-based entry. Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-9-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index a483a76c7665..f99f1fbe3aee 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -348,6 +348,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) }, + { XE_RTP_NAME("16018712365"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) + }, /* TGL */ @@ -485,10 +490,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { /* Xe2_LPG */ - { XE_RTP_NAME("16018712365"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) - }, { XE_RTP_NAME("14020338487"), XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) @@ -538,11 +539,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { /* Xe2_HPG */ - { XE_RTP_NAME("16018712365"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) - }, { XE_RTP_NAME("16018737384"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2999), FUNC(xe_rtp_match_first_render_or_compute)), From 37681c32f25970aa7d7968343e467e0e10453819 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:32 -0800 Subject: [PATCH 157/195] drm/xe: Consolidate workaround entries for Wa_14020338487 Wa_14020338487 applies to all graphics versions from 20.01 through 20.04 (inclusive). Consolidate the two RTP entries into a single range-based entry. Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-10-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index f99f1fbe3aee..4809166ba8ec 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -353,6 +353,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) }, + { XE_RTP_NAME("14020338487"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) + }, /* TGL */ @@ -490,10 +495,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { /* Xe2_LPG */ - { XE_RTP_NAME("14020338487"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) - }, { XE_RTP_NAME("18034896535, 16021540221"), /* 16021540221: GRAPHICS_STEP(A0, B0) */ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), FUNC(xe_rtp_match_first_render_or_compute)), @@ -544,11 +545,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) }, - { XE_RTP_NAME("14020338487"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) - }, { XE_RTP_NAME("14018471104"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), FUNC(xe_rtp_match_first_render_or_compute)), From 792880cada369a92da375c0393c7f0359706e547 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:33 -0800 Subject: [PATCH 158/195] drm/xe: Consolidate workaround entries for Wa_14018471104 Wa_14018471104 applies to all graphics versions from 20.01 through 20.04 (inclusive). Consolidate the two RTP entries into a single range-based entry. Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-11-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 4809166ba8ec..9119ca9ba103 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -358,6 +358,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) }, + { XE_RTP_NAME("14018471104"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) + }, /* TGL */ @@ -500,10 +505,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) }, - { XE_RTP_NAME("14018471104"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) - }, /* * These two workarounds are the same, just applying to different * engines. Although Wa_18032095049 (for the RCS) isn't required on @@ -545,11 +546,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) }, - { XE_RTP_NAME("14018471104"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) - }, /* * Although this workaround isn't required for the RCS, disabling these * reports has no impact for our driver or the GuC, so we go ahead and From 909a0bcd37c6c9e1fb7452820bcfb76d64fc84f1 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:34 -0800 Subject: [PATCH 159/195] drm/xe: Consolidate workaround entries for Wa_16021639441 Wa_16021639441 applies to all graphics versions from 20.01 through 20.04 (inclusive) and all media versions from 13.01 to 20.00 (inclusive). Consolidate the RTP entries into a single range-based entry. Also drop the reference to Wa_18032095049 which was only relevant to pre-production platforms that we no longer support. Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-12-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 59 +++++++++----------------------------- 1 file changed, 13 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 9119ca9ba103..f0c42be06214 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -363,6 +363,19 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) }, + /* + * Although this workaround isn't required for the RCS, disabling these + * reports has no impact for our driver or the GuC, so we go ahead and + * apply this to all engines for simplicity. + */ + { XE_RTP_NAME("16021639441"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), OR, + MEDIA_VERSION_RANGE(1301, 2000)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, /* TGL */ @@ -505,20 +518,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) }, - /* - * These two workarounds are the same, just applying to different - * engines. Although Wa_18032095049 (for the RCS) isn't required on - * all steppings, disabling these reports has no impact for our - * driver or the GuC, so we go ahead and treat it the same as - * Wa_16021639441 which does apply to all steppings. - */ - { XE_RTP_NAME("18032095049, 16021639441"), - XE_RTP_RULES(GRAPHICS_VERSION(2004)), - XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), - GHWSP_CSB_REPORT_DIS | - PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, { XE_RTP_NAME("16018610683"), XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) @@ -546,18 +545,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) }, - /* - * Although this workaround isn't required for the RCS, disabling these - * reports has no impact for our driver or the GuC, so we go ahead and - * apply this to all engines for simplicity. - */ - { XE_RTP_NAME("16021639441"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002)), - XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), - GHWSP_CSB_REPORT_DIS | - PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, { XE_RTP_NAME("14019811474"), XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), @@ -584,26 +571,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) }, - /* Xe2_LPM */ - - { XE_RTP_NAME("16021639441"), - XE_RTP_RULES(MEDIA_VERSION(2000)), - XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), - GHWSP_CSB_REPORT_DIS | - PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, - - /* Xe2_HPM */ - - { XE_RTP_NAME("16021639441"), - XE_RTP_RULES(MEDIA_VERSION(1301)), - XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), - GHWSP_CSB_REPORT_DIS | - PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, - XE_RTP_ACTION_FLAG(ENGINE_BASE))) - }, - /* Xe3_LPG */ { XE_RTP_NAME("14021402888"), From a9b2f1467464fd4521c938d15cc6e1fdf35f1f28 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:35 -0800 Subject: [PATCH 160/195] drm/xe: Consolidate workaround entries for Wa_14021402888 Wa_14021402888 applies to all graphics versions from 20.01 through 30.05 (inclusive). Consolidate the RTP entries into a single range-based entry. Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-13-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index f0c42be06214..5d71cb9dca51 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -376,6 +376,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, /* TGL */ @@ -522,10 +526,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) }, - { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) - }, { XE_RTP_NAME("13012615864"), XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), @@ -550,10 +550,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS)) }, - { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) - }, { XE_RTP_NAME("14021821874, 14022954250"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), FUNC(xe_rtp_match_first_render_or_compute)), @@ -573,11 +569,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { /* Xe3_LPG */ - { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) - }, { XE_RTP_NAME("18034896535"), XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0), FUNC(xe_rtp_match_first_render_or_compute)), @@ -608,10 +599,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - { XE_RTP_NAME("14021402888"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3003, 3005), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) - }, { XE_RTP_NAME("18041344222"), XE_RTP_RULES(GRAPHICS_VERSION(3000), FUNC(xe_rtp_match_first_render_or_compute), From 7b6f615d2b1f01e2860e9aa7977884c31af30df1 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:36 -0800 Subject: [PATCH 161/195] drm/xe: Consolidate workaround entries for Wa_13012615864 Wa_13012615864 applies to all graphics versions from 20.01 through 30.05 (inclusive). Consolidate the RTP entries into a single range-based entry. Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-14-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 5d71cb9dca51..190b31a9d49d 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -380,6 +380,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3005), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("13012615864"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3005), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) + }, /* TGL */ @@ -526,11 +531,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) }, - { XE_RTP_NAME("13012615864"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) - }, { XE_RTP_NAME("18041344222"), XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute), @@ -555,11 +555,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) }, - { XE_RTP_NAME("13012615864"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) - }, { XE_RTP_NAME("18041344222"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), FUNC(xe_rtp_match_first_render_or_compute), @@ -587,12 +582,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) }, - { XE_RTP_NAME("13012615864"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), OR, - GRAPHICS_VERSION_RANGE(3003, 3005), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) - }, { XE_RTP_NAME("16023105232"), XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 3000), OR, GRAPHICS_VERSION_RANGE(2001, 3001)), From 55b19abb6c44db40fe1ebd01e9c16aa02c4cf663 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:38 -0800 Subject: [PATCH 162/195] drm/xe: Consolidate workaround entries for Wa_14019877138 Wa_14019877138 applies to all graphics versions from 12.55 through 20.04 (inclusive) that have a render engine. Consolidate the RTP entries into a single range-based entry. Note that the DG2 entry for this workaround was missing an ENGINE_CLASS(RENDER) rule; that mistake is fixed by this consolidation. Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-16-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 190b31a9d49d..dd871bea3528 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -648,6 +648,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(1200)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC)) }, + { XE_RTP_NAME("14019877138"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1255, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) + }, /* DG1 */ @@ -684,10 +688,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, - { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(PLATFORM(DG2)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) - }, /* PVC */ @@ -705,10 +705,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)), XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, - { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) - }, /* Xe2_LPG */ @@ -716,10 +712,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) }, - { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) - }, { XE_RTP_NAME("14019988906"), XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) @@ -769,10 +761,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) }, - { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) - }, { XE_RTP_NAME("14021490052"), XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(FF_MODE, From f0d6d356f8ac427d1f3eb8fb783a64ac3efd6fc7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:39 -0800 Subject: [PATCH 163/195] drm/xe: Consolidate workaround entries for Wa_14019386621 Wa_14019386621 applies to all graphics versions from 20.01 through 20.04 (inclusive). Consolidate the RTP entries into a single range-based entry. Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-17-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index dd871bea3528..a9730020103c 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -652,6 +652,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1255, 2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) }, + { XE_RTP_NAME("14019386621"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) + }, /* DG1 */ @@ -708,10 +712,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { /* Xe2_LPG */ - { XE_RTP_NAME("14019386621"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) - }, { XE_RTP_NAME("14019988906"), XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) @@ -749,10 +749,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN)) }, - { XE_RTP_NAME("14019386621"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) - }, { XE_RTP_NAME("14020756599"), XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) From c2142a1a841525d897ef69b3e6a5ab48183e1fcf Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:40 -0800 Subject: [PATCH 164/195] drm/xe: Consolidate workaround entries for Wa_14019988906 Wa_14019988906 applies to all graphics versions from 20.01 through 20.04 (inclusive). Consolidate the RTP entries into a single range-based entry. Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-18-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index a9730020103c..ddf747fdf340 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -656,6 +656,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) }, + { XE_RTP_NAME("14019988906"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) + }, /* DG1 */ @@ -712,10 +716,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { /* Xe2_LPG */ - { XE_RTP_NAME("14019988906"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) - }, { XE_RTP_NAME("18033852989"), XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) @@ -753,10 +753,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) }, - { XE_RTP_NAME("14019988906"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) - }, { XE_RTP_NAME("14021490052"), XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(FF_MODE, From fe681e7b44d78fd77d79de21eca58c3b6bdcda0e Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:41 -0800 Subject: [PATCH 165/195] drm/xe: Consolidate workaround entries for Wa_18033852989 Wa_18033852989 applies to all graphics versions from 20.01 through 20.04 (inclusive). Consolidate the RTP entries into a single range-based entry. Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-19-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index ddf747fdf340..30e93e1a8114 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -660,6 +660,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) }, + { XE_RTP_NAME("18033852989"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) + }, /* DG1 */ @@ -716,10 +720,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { /* Xe2_LPG */ - { XE_RTP_NAME("18033852989"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) - }, { XE_RTP_NAME("14021567978"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), ENGINE_CLASS(RENDER)), @@ -770,10 +770,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, - { XE_RTP_NAME("18033852989"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) - }, /* Xe3_LPG */ { XE_RTP_NAME("14021490052"), From 97ec6cb8109cea75c8feb0ec540c124c9b4be749 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:42 -0800 Subject: [PATCH 166/195] drm/xe: Consolidate workaround entries for Wa_15016589081 Wa_15016589081 applies to all graphics versions from 20.01 through 20.04 (inclusive). Consolidate the RTP entries into a single range-based entry. Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-20-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 30e93e1a8114..b57b783a9d98 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -664,6 +664,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) }, + { XE_RTP_NAME("15016589081"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) + }, /* DG1 */ @@ -739,10 +743,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, - { XE_RTP_NAME("15016589081"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) - }, /* Xe2_HPG */ { XE_RTP_NAME("15010599737"), @@ -762,10 +762,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, - { XE_RTP_NAME("15016589081"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) - }, { XE_RTP_NAME("22021007897"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) From a2d0403de6492a32e5c175b22ebac053f579d350 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:44 -0800 Subject: [PATCH 167/195] drm/xe/wa_oob: Consolidate some OOB rules Given the new policy of allowing graphics/media IP ranges to extend over unused IP versions, we can consolidate some of the OOB workaround rules and simplify the table. If new IP variants eventually show up that use these unused versions (e.g., media version 30.01, graphics versions 20.03 / 30.02, etc.), and if an existing workaround does not extend to that new intermediate version, the ranges will be split back apart as part of the enablement work for that new IP version. Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-22-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa_oob.rules | 31 ++++++++---------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index c3222d950488..80b54b195f20 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -24,18 +24,9 @@ MEDIA_VERSION(2000) 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) -13011645652 GRAPHICS_VERSION(2004) - GRAPHICS_VERSION_RANGE(3000, 3001) - GRAPHICS_VERSION(3003) - GRAPHICS_VERSION_RANGE(3004, 3005) -14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) - GRAPHICS_VERSION(2004) - GRAPHICS_VERSION_RANGE(3000, 3005) -22019794406 GRAPHICS_VERSION_RANGE(2001, 2002) - GRAPHICS_VERSION(2004) - GRAPHICS_VERSION_RANGE(3000, 3001) - GRAPHICS_VERSION(3003) - GRAPHICS_VERSION_RANGE(3004, 3005) +13011645652 GRAPHICS_VERSION_RANGE(2004, 3005) +14022293748 GRAPHICS_VERSION_RANGE(2001, 3005) +22019794406 GRAPHICS_VERSION_RANGE(2001, 3005) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) @@ -52,18 +43,12 @@ 18013179988 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 1274) 1508761755 GRAPHICS_VERSION(1255) -16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) - MEDIA_VERSION_RANGE(1301, 3000) - MEDIA_VERSION(3002) - GRAPHICS_VERSION_RANGE(3003, 3005) -16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) - MEDIA_VERSION_RANGE(1300, 3000) - MEDIA_VERSION(3002) - GRAPHICS_VERSION_RANGE(3003, 3005) +16023105232 GRAPHICS_VERSION_RANGE(2001, 3005) + MEDIA_VERSION_RANGE(1301, 3002) +16026508708 GRAPHICS_VERSION_RANGE(1200, 3005) + MEDIA_VERSION_RANGE(1300, 3002) 14020001231 GRAPHICS_VERSION_RANGE(2001,2004), FUNC(xe_rtp_match_psmi_enabled) - MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled) - MEDIA_VERSION(3000), FUNC(xe_rtp_match_psmi_enabled) - MEDIA_VERSION(3002), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION_RANGE(2000, 3002), FUNC(xe_rtp_match_psmi_enabled) 16023683509 MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_psmi_enabled) From 1aafc0a9916b95b3f2bdecc8951bd0bb5c3a352b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 20 Feb 2026 09:27:45 -0800 Subject: [PATCH 168/195] drm/xe: Consolidate workaround entries for Wa_14023061436 Wa_14023061436 applies to all graphics versions from 30.00 through 30.05 (inclusive) since there is currently no IP that uses version 30.02. Consolidate the RTP rules into a single range. Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260220-forupstream-wa_cleanup-v2-23-b12005a05af6@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index b57b783a9d98..50f5d3381b4c 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -576,9 +576,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { SMP_FORCE_128B_OVERFETCH)) }, { XE_RTP_NAME("14023061436"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), - FUNC(xe_rtp_match_first_render_or_compute), OR, - GRAPHICS_VERSION_RANGE(3003, 3005), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) }, From 369cc88049855269b7620426bda4fb9ce2a2d1ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 4 Feb 2026 16:33:20 +0100 Subject: [PATCH 169/195] drm/xe/uapi: Introduce a flag to disallow vm overcommit in fault mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some compute applications may try to allocate device memory to probe how much device memory is actually available, assuming that the application will be the only one running on the particular GPU. That strategy fails in fault mode since it allows VM overcommit. While this could be resolved in user-space it's further complicated by cgroups potentially restricting the amount of memory available to the application. Introduce a vm create flag, DRM_XE_VM_CREATE_NO_VM_OVERCOMMIT, that allows fault mode to mimic the behaviour of !fault mode WRT this. It blocks evicting same vm bos during VM_BIND processing. However, it does *not* block evicting same-vm bos during pagefault processing, preferring eviction rather than VM banning in OOM situations. Cc: John Falkowski Cc: Michal Mrozek Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260204153320.17989-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_vm.c | 11 +++++++++-- drivers/gpu/drm/xe/xe_vm.h | 7 +++++++ drivers/gpu/drm/xe/xe_vm_types.h | 1 + include/uapi/drm/xe_drm.h | 6 ++++++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a46f11a71c37..550208ef63f8 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1941,7 +1941,8 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ DRM_XE_VM_CREATE_FLAG_LR_MODE | \ - DRM_XE_VM_CREATE_FLAG_FAULT_MODE) + DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \ + DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT) int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -1980,12 +1981,18 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; + if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && + args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)) + return -EINVAL; + if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) flags |= XE_VM_FLAG_SCRATCH_PAGE; if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) flags |= XE_VM_FLAG_LR_MODE; if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) flags |= XE_VM_FLAG_FAULT_MODE; + if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT) + flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT; vm = xe_vm_create(xe, flags, xef); if (IS_ERR(vm)) @@ -2906,7 +2913,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) err = xe_bo_validate(bo, vm, - !xe_vm_in_preempt_fence_mode(vm) && + xe_vm_allow_vm_eviction(vm) && res_evict, exec); } diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 288115c7844a..f849e369432b 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -220,6 +220,13 @@ static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm) return xe_vm_in_lr_mode(vm) && !xe_vm_in_fault_mode(vm); } +static inline bool xe_vm_allow_vm_eviction(struct xe_vm *vm) +{ + return !xe_vm_in_lr_mode(vm) || + (xe_vm_in_fault_mode(vm) && + !(vm->flags & XE_VM_FLAG_NO_VM_OVERCOMMIT)); +} + int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 43203e90ee3e..1f6f7e30e751 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -232,6 +232,7 @@ struct xe_vm { #define XE_VM_FLAG_TILE_ID(flags) FIELD_GET(GENMASK(7, 6), flags) #define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(7, 6), (tile)->id) #define XE_VM_FLAG_GSC BIT(8) +#define XE_VM_FLAG_NO_VM_OVERCOMMIT BIT(9) unsigned long flags; /** diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index c9e70f78e723..ef2565048bdf 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -969,6 +969,11 @@ struct drm_xe_gem_mmap_offset { * demand when accessed, and also allows per-VM overcommit of memory. * The xe driver internally uses recoverable pagefaults to implement * this. + * - %DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT - Requires also + * DRM_XE_VM_CREATE_FLAG_FAULT_MODE. This disallows per-VM overcommit + * but only during a &DRM_IOCTL_XE_VM_BIND operation with the + * %DRM_XE_VM_BIND_FLAG_IMMEDIATE flag set. This may be useful for + * user-space naively probing the amount of available memory. */ struct drm_xe_vm_create { /** @extensions: Pointer to the first extension struct, if any */ @@ -977,6 +982,7 @@ struct drm_xe_vm_create { #define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE (1 << 0) #define DRM_XE_VM_CREATE_FLAG_LR_MODE (1 << 1) #define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (1 << 2) +#define DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT (1 << 3) /** @flags: Flags */ __u32 flags; From ef547cf1b823ee73c52bd2fedfc77b99a17198e9 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 23 Feb 2026 16:11:32 -0800 Subject: [PATCH 170/195] drm/xe: Consolidate workaround entries for Wa_18041344222 Wa_18041344222 applies to all graphics versions from 20.01 through 30.00 (inclusive). Consolidate the RTP entries into a single range-based entry. v2: - Drop the FUNC(xe_rtp_match_not_sriov_vf) to align with commit a800b95c2498 ("drm/xe/xe2hpg: Remove SRIOV VF check for Wa_18041344222") and commit 0ffe9dcf260b ("drm/xe/xe3: Remove SRIOV VF check for Wa_18041344222") which just landed. (Shuicheng) Cc: Shuicheng Lin Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260223-forupstream-wa_cleanup-v3-1-7f201eb2f172@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 50f5d3381b4c..d1a8c375ba03 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -385,6 +385,12 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) }, + { XE_RTP_NAME("18041344222"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 3000), + FUNC(xe_rtp_match_first_render_or_compute), + FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), + XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) + }, /* TGL */ @@ -531,12 +537,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) }, - { XE_RTP_NAME("18041344222"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), - FUNC(xe_rtp_match_first_render_or_compute), - FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), - XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) - }, /* Xe2_HPG */ @@ -555,12 +555,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) }, - { XE_RTP_NAME("18041344222"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), - FUNC(xe_rtp_match_first_render_or_compute), - FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), - XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) - }, /* Xe3_LPG */ @@ -586,12 +580,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - { XE_RTP_NAME("18041344222"), - XE_RTP_RULES(GRAPHICS_VERSION(3000), - FUNC(xe_rtp_match_first_render_or_compute), - FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), - XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) - }, /* Xe3p_LPG*/ From 1046bc7b416814833a43af8e66c52b0ea71c2021 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 23 Feb 2026 16:11:33 -0800 Subject: [PATCH 171/195] drm/xe/xe2_hpg: Drop invalid workaround Wa_15010599737 Wa_15010599737 was a workaround originally proposed (and ultimately rejected) for DG2-G10. There's no record of it ever being relevant or even considered for any other platforms. The specific bit this workaround was setting is documented as "This bit should be set to 1 for the DX9 API and 0 for all other APIs" which means that it should almost always be left at the default value of 0 on Linux. The register itself is directly accessible from userspace, so in the special cases where it might be relevant (e.g., Wine/Proton running Windows DX9 apps), the userspace drivers already have the ability to change the setting without involvement of the kernel. Fixes: 7f3ee7d88058 ("drm/xe/xe2hpg: Add initial GT workarounds") Reviewed-by: Balasubramani Vivekanandan Link: https://patch.msgid.link/20260223-forupstream-wa_cleanup-v3-2-7f201eb2f172@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index d1a8c375ba03..26950b8a7543 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -731,10 +731,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { }, /* Xe2_HPG */ - { XE_RTP_NAME("15010599737"), - XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN)) - }, + { XE_RTP_NAME("14020756599"), XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) From 4953d806a423087fcc49d4f8884fde85cd23ec1e Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Mon, 23 Feb 2026 22:40:15 +0530 Subject: [PATCH 172/195] drm/xe/compat: Remove unused i915_reg.h from compat header Display Code is made independent of i915_reg.h, hence it can be dropped from compat header. Signed-off-by: Uma Shankar Reviewed-by: Jani Nikula Link: https://patch.msgid.link/20260223171015.1035550-1-uma.shankar@intel.com --- drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h deleted file mode 100644 index 8619ec015ad4..000000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#include "../../i915/i915_reg.h" From 8313c938dc1a070256f07b1dbfc06cab8bd58311 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Mon, 23 Feb 2026 16:23:52 +0000 Subject: [PATCH 173/195] drm/xe/guc: Accumulate CT H2G retry sleep budget guc_ct_send_wait_for_retry() introduced sleep_total_ms as a budget guard, but never incremented it. As a result, the "about 1 second" bailout condition never triggers in the H2G backpressure path. Accumulate the delay returned by xe_sleep_exponential_ms() into sleep_total_ms so the timeout logic works as intended. Fixes: 943c4d0637cf ("drm/xe/guc: Limit sleep while waiting for H2G credits") Cc: Michal Wajdeczko Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260223162350.3205364-5-shuicheng.lin@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_guc_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8a45573f8812..1c37813d2ccd 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1124,7 +1124,7 @@ static bool guc_ct_send_wait_for_retry(struct xe_guc_ct *ct, u32 len, h2g->info.size, h2g->info.space, len + GUC_CTB_HDR_LEN); - xe_sleep_exponential_ms(sleep_period_ms, 64); + *sleep_total_ms += xe_sleep_exponential_ms(sleep_period_ms, 64); } else { struct xe_device *xe = ct_to_xe(ct); struct guc_ctb *g2h = &ct->ctbs.g2h; From 9c1ea3962e0a24db88fd0fa3663f88416a2dacbc Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Mon, 23 Feb 2026 16:23:53 +0000 Subject: [PATCH 174/195] drm/xe/guc: Refine CT queue checks and log formatting Fix three code-level cleanups in xe_guc_ct.c: - Use SZ_4K for the queue size alignment assertion in xe_guc_ct_queue_proc_time_jiffies(). - Drop an unused local variable in guc_ct_send_wait_for_retry(). - Add missing trailing newlines in CT error/warn log messages. These changes keep behavior unchanged while improving correctness checks and log formatting. Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260223162350.3205364-6-shuicheng.lin@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_guc_ct.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 1c37813d2ccd..d0f0586121bc 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -275,7 +275,7 @@ static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) */ long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct) { - BUILD_BUG_ON(!IS_ALIGNED(CTB_H2G_BUFFER_SIZE, SZ_4)); + BUILD_BUG_ON(!IS_ALIGNED(CTB_H2G_BUFFER_SIZE, SZ_4K)); return (CTB_H2G_BUFFER_SIZE / SZ_4K) * HZ; } @@ -1126,7 +1126,6 @@ static bool guc_ct_send_wait_for_retry(struct xe_guc_ct *ct, u32 len, len + GUC_CTB_HDR_LEN); *sleep_total_ms += xe_sleep_exponential_ms(sleep_period_ms, 64); } else { - struct xe_device *xe = ct_to_xe(ct); struct guc_ctb *g2h = &ct->ctbs.g2h; int ret; @@ -1148,7 +1147,7 @@ static bool guc_ct_send_wait_for_retry(struct xe_guc_ct *ct, u32 len, ret = dequeue_one_g2h(ct); if (ret < 0) { if (ret != -ECANCELED) - xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)", + xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)\n", ERR_PTR(ret)); return false; } @@ -1324,7 +1323,7 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, */ mutex_lock(&ct->lock); if (!ret) { - xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s", + xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s\n", g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done)); xa_erase(&ct->fence_lookup, g2h_fence.seqno); mutex_unlock(&ct->lock); @@ -1834,7 +1833,7 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; default: - xe_gt_warn(gt, "NOT_POSSIBLE"); + xe_gt_warn(gt, "NOT_POSSIBLE\n"); } if (ret) { @@ -1937,7 +1936,7 @@ static void receive_g2h(struct xe_guc_ct *ct) mutex_unlock(&ct->lock); if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) { - xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d", ret); + xe_gt_err(ct_to_gt(ct), "CT dequeue failed: %d\n", ret); CT_DEAD(ct, NULL, G2H_RECV); kick_reset(ct); } From 57a5422d72c293b8e48da8b4b5c76a0d5f84c79e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 21 Feb 2026 16:22:28 +0100 Subject: [PATCH 175/195] drm/xe/pf: Use explicit VRAM BO flag for VRAM provisioning When we are about to provision VRAM/LMEM for VF, there is no point in using semi-automatic flag that supports fallback to the SMEM. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260221152230.7071-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index cba20eb6b36b..6383763d02ab 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1652,7 +1652,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) bo = xe_bo_create_pin_range_novm(xe, tile, ALIGN(size, PAGE_SIZE), 0, ~0ull, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_VRAM(tile->mem.vram) | XE_BO_FLAG_NEEDS_2M | XE_BO_FLAG_PINNED | XE_BO_FLAG_PINNED_LATE_RESTORE | From 494752bdc41547b7525d94e6a3de3e44c691520a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 21 Feb 2026 16:22:29 +0100 Subject: [PATCH 176/195] drm/xe/pf: Don't force 2MB VRAM alignment There is no need to always request VRAM BO to have 2MB alignment as for now this is required by the LMTT only, which could be not present on some platforms with VRAM. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260221152230.7071-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 6383763d02ab..53f03e7075d2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1626,13 +1626,15 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) struct xe_device *xe = gt_to_xe(gt); struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo; + u64 alignment; int err; xe_gt_assert(gt, vfid); xe_gt_assert(gt, IS_DGFX(xe)); xe_gt_assert(gt, xe_gt_is_main_type(gt)); - size = round_up(size, pf_get_lmem_alignment(gt)); + alignment = pf_get_lmem_alignment(gt); + size = round_up(size, alignment); if (config->lmem_obj) { err = pf_distribute_config_lmem(gt, vfid, 0); @@ -1648,12 +1650,12 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) if (!size) return 0; - xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M); + xe_gt_assert(gt, alignment == XE_PAGE_SIZE || alignment == SZ_2M); bo = xe_bo_create_pin_range_novm(xe, tile, ALIGN(size, PAGE_SIZE), 0, ~0ull, ttm_bo_type_kernel, XE_BO_FLAG_VRAM(tile->mem.vram) | - XE_BO_FLAG_NEEDS_2M | + (alignment == SZ_2M ? XE_BO_FLAG_NEEDS_2M : 0) | XE_BO_FLAG_PINNED | XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM); From 6d09a56b49380d996f225fcbcb96a64512bbe8e8 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 21 Feb 2026 16:22:30 +0100 Subject: [PATCH 177/195] drm/xe/pf: Don't use LMTT page size if no LMTT While today all our DGFX platforms have LMTT, we already started preparation to do not rely on this assumption. Add check for the LMTT presence and return default page size as VRAM/LMEM alignment if there is no LMTT. Signed-off-by: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260221152230.7071-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 53f03e7075d2..b867203b4997 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1451,7 +1451,8 @@ int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, static u64 pf_get_lmem_alignment(struct xe_gt *gt) { - return xe_lmtt_page_size(>->tile->sriov.pf.lmtt); + return xe_device_has_lmtt(gt_to_xe(gt)) ? + xe_lmtt_page_size(>_to_tile(gt)->sriov.pf.lmtt) : XE_PAGE_SIZE; } static u64 pf_get_min_spare_lmem(struct xe_gt *gt) From 4cb522015a87ae0fdb6110fcda09f992ca61dac0 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 16 Feb 2026 19:32:40 +0100 Subject: [PATCH 178/195] drm/xe: Don't expose display modparam if no display support With CONFIG_DRM_XE_DISPLAY=n the display support will be compiled out, so exposing probe_display modparam is pointless. Signed-off-by: Michal Wajdeczko Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260216183240.1308-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_defaults.h | 2 +- drivers/gpu/drm/xe/xe_module.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_defaults.h b/drivers/gpu/drm/xe/xe_defaults.h index 5d5d41d067c5..c8ae1d5f3d60 100644 --- a/drivers/gpu/drm/xe/xe_defaults.h +++ b/drivers/gpu/drm/xe/xe_defaults.h @@ -13,7 +13,7 @@ #define XE_DEFAULT_GUC_LOG_LEVEL 1 #endif -#define XE_DEFAULT_PROBE_DISPLAY true +#define XE_DEFAULT_PROBE_DISPLAY IS_ENABLED(CONFIG_DRM_XE_DISPLAY) #define XE_DEFAULT_VRAM_BAR_SIZE 0 #define XE_DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE #define XE_DEFAULT_MAX_VFS ~0 diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 903d3b433421..4cb578182912 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -39,9 +39,11 @@ MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be p module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched " "[default=" __stringify(XE_DEFAULT_PROBE_DISPLAY) "])"); +#endif module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600); MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size in MiB (<0=disable-resize, 0=max-needed-size, >0=force-size " From c2190f11dc3bf24b177e364c23e232b859c8716f Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Thu, 26 Feb 2026 17:04:13 +0530 Subject: [PATCH 179/195] Revert "drm/xe/compat: Remove unused i915_reg.h from compat header" This reverts commit 4953d806a423087fcc49d4f8884fde85cd23ec1e. Change breaks build for drm-xe-next, hence revert. Signed-off-by: Uma Shankar Acked-by: Jani Nikula Link: https://patch.msgid.link/20260226113413.2045899-1-uma.shankar@intel.com --- drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h new file mode 100644 index 000000000000..8619ec015ad4 --- /dev/null +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_reg.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#include "../../i915/i915_reg.h" From 2bcbf2dcde0c839a73af664a3c77d4e77d58a3eb Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 14 Jan 2026 16:45:46 -0800 Subject: [PATCH 180/195] drm/xe: Do not preempt fence signaling CS instructions If a batch buffer is complete, it makes little sense to preempt the fence signaling instructions in the ring, as the largest portion of the work (the batch buffer) is already done and fence signaling consists of only a few instructions. If these instructions are preempted, the GuC would need to perform a context switch just to signal the fence, which is costly and delays fence signaling. Avoid this scenario by disabling preemption immediately after the BB start instruction and re-enabling it after executing the fence signaling instructions. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Daniele Ceraolo Spurio Cc: Carlos Santa Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Link: https://patch.msgid.link/20260115004546.58060-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_ring_ops.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 248620b0901d..53d420d72164 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -280,6 +280,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + /* Don't preempt fence signaling */ + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; + if (job->user_fence.used) { i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, @@ -345,6 +348,9 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + /* Don't preempt fence signaling */ + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; + if (job->user_fence.used) { i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, @@ -397,6 +403,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); + /* Don't preempt fence signaling */ + dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; + i = emit_render_cache_flush(job, dw, i); if (job->user_fence.used) From af3de6cf06f9497a60510b87bf77e8fb4124ba9f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 17 Feb 2026 20:33:17 -0800 Subject: [PATCH 181/195] drm/xe: Split H2G and G2H into separate buffer objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit H2G and G2H buffers have different access patterns (H2G is CPU-write, GuC-read, while G2H is GPU-write, CPU-read). On dGPU, these patterns benefit from different memory placements: H2G in VRAM and G2H in system memory. Split the CT buffer into two separate buffers—one for H2G and one for G2H—and select the optimal placement for each. This provides a significant performance improvement on the G2H read path, reducing a single read from ~20 µs to under 1 µs on BMG. Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patch.msgid.link/20260218043319.809548-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 67 +++++++++++++++++++--------- drivers/gpu/drm/xe/xe_guc_ct_types.h | 4 +- 2 files changed, 47 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index d0f0586121bc..018dd64ab1d5 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -255,6 +255,7 @@ static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) #define CTB_H2G_BUFFER_OFFSET (CTB_DESC_SIZE * 2) +#define CTB_G2H_BUFFER_OFFSET (CTB_DESC_SIZE * 2) #define CTB_H2G_BUFFER_SIZE (SZ_4K) #define CTB_H2G_BUFFER_DWORDS (CTB_H2G_BUFFER_SIZE / sizeof(u32)) #define CTB_G2H_BUFFER_SIZE (SZ_128K) @@ -279,10 +280,14 @@ long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct) return (CTB_H2G_BUFFER_SIZE / SZ_4K) * HZ; } -static size_t guc_ct_size(void) +static size_t guc_h2g_size(void) { - return CTB_H2G_BUFFER_OFFSET + CTB_H2G_BUFFER_SIZE + - CTB_G2H_BUFFER_SIZE; + return CTB_H2G_BUFFER_OFFSET + CTB_H2G_BUFFER_SIZE; +} + +static size_t guc_g2h_size(void) +{ + return CTB_G2H_BUFFER_OFFSET + CTB_G2H_BUFFER_SIZE; } static void guc_ct_fini(struct drm_device *drm, void *arg) @@ -311,7 +316,8 @@ int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct) struct xe_gt *gt = ct_to_gt(ct); int err; - xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); + xe_gt_assert(gt, !(guc_h2g_size() % PAGE_SIZE)); + xe_gt_assert(gt, !(guc_g2h_size() % PAGE_SIZE)); err = drmm_mutex_init(&xe->drm, &ct->lock); if (err) @@ -356,7 +362,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo; - bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), + bo = xe_managed_bo_create_pin_map(xe, tile, guc_h2g_size(), XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE | @@ -364,7 +370,17 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) if (IS_ERR(bo)) return PTR_ERR(bo); - ct->bo = bo; + ct->ctbs.h2g.bo = bo; + + bo = xe_managed_bo_create_pin_map(xe, tile, guc_g2h_size(), + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + ct->ctbs.g2h.bo = bo; return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct); } @@ -389,7 +405,7 @@ int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct) xe_assert(xe, !xe_guc_ct_enabled(ct)); if (IS_DGFX(xe)) { - ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->bo); + ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->ctbs.h2g.bo); if (ret) return ret; } @@ -439,8 +455,7 @@ static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h, g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE); xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc)); - g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_H2G_BUFFER_OFFSET + - CTB_H2G_BUFFER_SIZE); + g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_G2H_BUFFER_OFFSET); } static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct) @@ -449,8 +464,8 @@ static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct) u32 desc_addr, ctb_addr, size; int err; - desc_addr = xe_bo_ggtt_addr(ct->bo); - ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_H2G_BUFFER_OFFSET; + desc_addr = xe_bo_ggtt_addr(ct->ctbs.h2g.bo); + ctb_addr = xe_bo_ggtt_addr(ct->ctbs.h2g.bo) + CTB_H2G_BUFFER_OFFSET; size = ct->ctbs.h2g.info.size * sizeof(u32); err = xe_guc_self_cfg64(guc, @@ -476,9 +491,8 @@ static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct) u32 desc_addr, ctb_addr, size; int err; - desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE; - ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_H2G_BUFFER_OFFSET + - CTB_H2G_BUFFER_SIZE; + desc_addr = xe_bo_ggtt_addr(ct->ctbs.g2h.bo) + CTB_DESC_SIZE; + ctb_addr = xe_bo_ggtt_addr(ct->ctbs.g2h.bo) + CTB_G2H_BUFFER_OFFSET; size = ct->ctbs.g2h.info.size * sizeof(u32); err = xe_guc_self_cfg64(guc, @@ -605,9 +619,12 @@ static int __xe_guc_ct_start(struct xe_guc_ct *ct, bool needs_register) xe_gt_assert(gt, !xe_guc_ct_enabled(ct)); if (needs_register) { - xe_map_memset(xe, &ct->bo->vmap, 0, 0, xe_bo_size(ct->bo)); - guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); - guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); + xe_map_memset(xe, &ct->ctbs.h2g.bo->vmap, 0, 0, + xe_bo_size(ct->ctbs.h2g.bo)); + xe_map_memset(xe, &ct->ctbs.g2h.bo->vmap, 0, 0, + xe_bo_size(ct->ctbs.g2h.bo)); + guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->ctbs.h2g.bo->vmap); + guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->ctbs.g2h.bo->vmap); err = guc_ct_ctb_h2g_register(ct); if (err) @@ -624,7 +641,7 @@ static int __xe_guc_ct_start(struct xe_guc_ct *ct, bool needs_register) ct->ctbs.h2g.info.broken = false; ct->ctbs.g2h.info.broken = false; /* Skip everything in H2G buffer */ - xe_map_memset(xe, &ct->bo->vmap, CTB_H2G_BUFFER_OFFSET, 0, + xe_map_memset(xe, &ct->ctbs.h2g.bo->vmap, CTB_H2G_BUFFER_OFFSET, 0, CTB_H2G_BUFFER_SIZE); } @@ -1962,8 +1979,9 @@ static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bo if (!snapshot) return NULL; - if (ct->bo && want_ctb) { - snapshot->ctb_size = xe_bo_size(ct->bo); + if (ct->ctbs.h2g.bo && ct->ctbs.g2h.bo && want_ctb) { + snapshot->ctb_size = xe_bo_size(ct->ctbs.h2g.bo) + + xe_bo_size(ct->ctbs.g2h.bo); snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL); } @@ -2011,8 +2029,13 @@ static struct xe_guc_ct_snapshot *guc_ct_snapshot_capture(struct xe_guc_ct *ct, guc_ctb_snapshot_capture(xe, &ct->ctbs.g2h, &snapshot->g2h); } - if (ct->bo && snapshot->ctb) - xe_map_memcpy_from(xe, snapshot->ctb, &ct->bo->vmap, 0, snapshot->ctb_size); + if (ct->ctbs.h2g.bo && ct->ctbs.g2h.bo && snapshot->ctb) { + xe_map_memcpy_from(xe, snapshot->ctb, &ct->ctbs.h2g.bo->vmap, 0, + xe_bo_size(ct->ctbs.h2g.bo)); + xe_map_memcpy_from(xe, snapshot->ctb + xe_bo_size(ct->ctbs.h2g.bo), + &ct->ctbs.g2h.bo->vmap, 0, + xe_bo_size(ct->ctbs.g2h.bo)); + } return snapshot; } diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index 09d7ff1ef42a..46ad1402347d 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -39,6 +39,8 @@ struct guc_ctb_info { * struct guc_ctb - GuC command transport buffer (CTB) */ struct guc_ctb { + /** @bo: Xe BO for CTB */ + struct xe_bo *bo; /** @desc: dma buffer map for CTB descriptor */ struct iosys_map desc; /** @cmds: dma buffer map for CTB commands */ @@ -126,8 +128,6 @@ struct xe_fast_req_fence { * for the H2G and G2H requests sent and received through the buffers. */ struct xe_guc_ct { - /** @bo: Xe BO for CT */ - struct xe_bo *bo; /** @lock: protects everything in CT layer */ struct mutex lock; /** @fast_lock: protects G2H channel and credits */ From 74bbd87dcc5c102147e24058e8db97a228d6ee03 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 17 Feb 2026 20:33:18 -0800 Subject: [PATCH 182/195] drm/xe: Avoid unconditional VRAM reads in H2G path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit desc_read() issues an VRAM read which serializes the CPU and drains posted writes on dGPU platforms. The H2G tracepoint evaluated its arguments unconditionally, so even with tracing disabled the submission path paid the full VRAM readf latency. Guard the tracepoint with trace_xe_guc_ctb_h2g_enabled(). Adso move the descriptor status verification under CONFIG_DRM_XE_DEBUG. This removes another unnecessary VRAM read in non-debug builfds. This results in ~10× faster H2G submission and significantly reduces lock contention across the driver. Signed-off-by: Matthew Brost Reviewed-by: Stuart Summers Reviewed-by: Thomas Hellström Link: https://patch.msgid.link/20260218043319.809548-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 018dd64ab1d5..10fbdeb0550c 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -939,22 +939,22 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 full_len; struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&h2g->cmds, tail * sizeof(u32)); - u32 desc_status; full_len = len + GUC_CTB_HDR_LEN; lockdep_assert_held(&ct->lock); xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN); - desc_status = desc_read(xe, h2g, status); - if (desc_status) { - xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status); - goto corrupted; - } - if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { u32 desc_tail = desc_read(xe, h2g, tail); u32 desc_head = desc_read(xe, h2g, head); + u32 desc_status; + + desc_status = desc_read(xe, h2g, status); + if (desc_status) { + xe_gt_err(gt, "CT write: non-zero status: %u\n", desc_status); + goto corrupted; + } if (tail != desc_tail) { desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_MISMATCH); @@ -1023,8 +1023,15 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, /* Update descriptor */ desc_write(xe, h2g, tail, h2g->info.tail); - trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len, - desc_read(xe, h2g, head), h2g->info.tail); + /* + * desc_read() performs an VRAM read which serializes the CPU and drains + * posted writes on dGPU platforms. Tracepoints evaluate arguments even + * when disabled, so guard the event to avoid adding µs-scale latency to + * the fast H2G submission path when tracing is not active. + */ + if (trace_xe_guc_ctb_h2g_enabled()) + trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len, + desc_read(xe, h2g, head), h2g->info.tail); return 0; From 3954313b39e4ce37d444c66e1fd1219a43a719c7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 17 Feb 2026 20:33:19 -0800 Subject: [PATCH 183/195] drm/xe: Move LRC seqno to system memory to avoid slow dGPU reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LRC seqno is read by the CPU in the fence signaling path. On dGPU that read can turn into a PCIe transaction when the seqno lives in the main LRC BO, making the hot-path poll/peek much more expensive. Allocate a small dedicated seqno BO in system memory and map the seqno and start_seqno fields from there instead. The GPU still updates the values, but CPU reads stay in cached system memory and avoid PCIe read latency. Update the LRC map/address helpers to accept a BO expression and use the new lrc->seqno_bo for seqno mappings. Unpin/unmap seqno_bo during teardown. Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patch.msgid.link/20260218043319.809548-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 57 +++++++++++++++++++------------ drivers/gpu/drm/xe/xe_lrc_types.h | 6 ++++ 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 57ef4f527ed0..84360fcdf743 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -715,12 +715,13 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset #define __xe_lrc_regs_offset xe_lrc_regs_offset -#define LRC_SEQNO_PPHWSP_OFFSET 512 -#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) -#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) +#define LRC_CTX_JOB_TIMESTAMP_OFFSET 512 #define LRC_ENGINE_ID_PPHWSP_OFFSET 1024 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 +#define LRC_SEQNO_OFFSET 0 +#define LRC_START_SEQNO_OFFSET (LRC_SEQNO_OFFSET + 8) + u32 xe_lrc_regs_offset(struct xe_lrc *lrc) { return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; @@ -747,14 +748,12 @@ size_t xe_lrc_skip_size(struct xe_device *xe) static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) { - /* The seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; + return LRC_SEQNO_OFFSET; } static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) { - /* The start seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; + return LRC_START_SEQNO_OFFSET; } static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) @@ -805,10 +804,11 @@ static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; } -#define DECL_MAP_ADDR_HELPERS(elem) \ +#define DECL_MAP_ADDR_HELPERS(elem, bo_expr) \ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ { \ - struct iosys_map map = lrc->bo->vmap; \ + struct xe_bo *bo = (bo_expr); \ + struct iosys_map map = bo->vmap; \ \ xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ @@ -816,20 +816,22 @@ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ } \ static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ { \ - return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ + struct xe_bo *bo = (bo_expr); \ +\ + return xe_bo_ggtt_addr(bo) + __xe_lrc_##elem##_offset(lrc); \ } \ -DECL_MAP_ADDR_HELPERS(ring) -DECL_MAP_ADDR_HELPERS(pphwsp) -DECL_MAP_ADDR_HELPERS(seqno) -DECL_MAP_ADDR_HELPERS(regs) -DECL_MAP_ADDR_HELPERS(start_seqno) -DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) -DECL_MAP_ADDR_HELPERS(ctx_timestamp) -DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw) -DECL_MAP_ADDR_HELPERS(parallel) -DECL_MAP_ADDR_HELPERS(indirect_ring) -DECL_MAP_ADDR_HELPERS(engine_id) +DECL_MAP_ADDR_HELPERS(ring, lrc->bo) +DECL_MAP_ADDR_HELPERS(pphwsp, lrc->bo) +DECL_MAP_ADDR_HELPERS(seqno, lrc->seqno_bo) +DECL_MAP_ADDR_HELPERS(regs, lrc->bo) +DECL_MAP_ADDR_HELPERS(start_seqno, lrc->seqno_bo) +DECL_MAP_ADDR_HELPERS(ctx_job_timestamp, lrc->bo) +DECL_MAP_ADDR_HELPERS(ctx_timestamp, lrc->bo) +DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw, lrc->bo) +DECL_MAP_ADDR_HELPERS(parallel, lrc->bo) +DECL_MAP_ADDR_HELPERS(indirect_ring, lrc->bo) +DECL_MAP_ADDR_HELPERS(engine_id, lrc->bo) #undef DECL_MAP_ADDR_HELPERS @@ -1036,6 +1038,7 @@ static void xe_lrc_finish(struct xe_lrc *lrc) { xe_hw_fence_ctx_finish(&lrc->fence_ctx); xe_bo_unpin_map_no_vm(lrc->bo); + xe_bo_unpin_map_no_vm(lrc->seqno_bo); } /* @@ -1445,6 +1448,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *seqno_bo; struct iosys_map map; u32 arb_enable; u32 bo_flags; @@ -1479,6 +1483,17 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); + seqno_bo = xe_bo_create_pin_map_novm(xe, tile, PAGE_SIZE, + ttm_bo_type_kernel, + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_SYSTEM, false); + if (IS_ERR(seqno_bo)) { + err = PTR_ERR(seqno_bo); + goto err_lrc_finish; + } + lrc->seqno_bo = seqno_bo; + xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index a4373d280c39..5a718f759ed6 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -22,6 +22,12 @@ struct xe_lrc { */ struct xe_bo *bo; + /** + * @seqno_bo: Buffer object (memory) for seqno numbers. Always in system + * memory as this a CPU read, GPU write path object. + */ + struct xe_bo *seqno_bo; + /** @size: size of the lrc and optional indirect ring state */ u32 size; From 9941b3ca10977a583a6e78707023066fd9cd6311 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 25 Feb 2026 12:37:20 -0800 Subject: [PATCH 184/195] drm/xe/xe3p_xpc: Drop stale MCR steering TODO comment Proper steering+termination for Xe3p_XPC NODE and BANK multicast ranges was added in commit 6d5511e56b2d ("drm/xe/xe3p_xpc: Add MCR steering for NODE and L3BANK ranges") but we forgot to remove the corresponding TODO comment at the top of the block. Drop it now to avoid confusion. Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260225203719.3310570-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_gt_mcr.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 8989d02ea84d..7c6f039c880d 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -532,12 +532,6 @@ void xe_gt_mcr_init_early(struct xe_gt *gt) } } else { if (GRAPHICS_VERx100(xe) == 3511) { - /* - * TODO: there are some ranges in bspec with missing - * termination: [0x00B000, 0x00B0FF] and - * [0x00D880, 0x00D8FF] (NODE); [0x00B100, 0x00B3FF] - * (L3BANK). Update them here once bspec is updated. - */ gt->steering[DSS].ranges = xe3p_xpc_xecore_steering_table; gt->steering[GAM1].ranges = xe3p_xpc_gam_grp1_steering_table; gt->steering[INSTANCE0].ranges = xe3p_xpc_instance0_steering_table; From a235e7d0098337c3f2d1e8f3610c719a589e115f Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 25 Feb 2026 01:34:49 +0000 Subject: [PATCH 185/195] drm/xe/configfs: Free ctx_restore_mid_bb in release ctx_restore_mid_bb memory is allocated in wa_bb_store(), but xe_config_device_release() only frees ctx_restore_post_bb. Free ctx_restore_mid_bb[0].cs as well to avoid leaking the allocation when the configfs device is removed. Fixes: b30d5de3d40c ("drm/xe/configfs: Add mid context restore bb") Signed-off-by: Shuicheng Lin Reviewed-by: Nitin Gote Link: https://patch.msgid.link/20260225013448.3547687-2-shuicheng.lin@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_configfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index d8c3fbe81aa6..af1599f21338 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -833,6 +833,7 @@ static void xe_config_device_release(struct config_item *item) mutex_destroy(&dev->lock); + kfree(dev->config.ctx_restore_mid_bb[0].cs); kfree(dev->config.ctx_restore_post_bb[0].cs); kfree(dev); } From 8ccf5f6b2295164962bbee5b0770f4366fd9bee2 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 24 Feb 2026 15:50:56 -0800 Subject: [PATCH 186/195] drm/xe/tuning: Apply windower hardware filtering setting on Xe3 and Xe3p A recent bspec tuning guide update asks us to program COMMON_SLICE_CHICKEN4[5] on Xe3 and Xe3p platforms. Add this setting to our LRC tuning RTP table so that the setting will become part of each context's LRC. Bspec: 72161, 55902 Reviewed-by: Shuicheng Lin Link: https://patch.msgid.link/20260224235055.3038710-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_tuning.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 90b9017770ea..66ddad767ad4 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -176,6 +176,7 @@ #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) #define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) +#define HW_FILTERING REG_BIT(5) #define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED) #define XEHP_COMMON_SLICE_CHICKEN3 XE_REG_MCR(0x7304, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index ea90e8c99754..f8de6a4bf189 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -127,6 +127,11 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { }; static const struct xe_rtp_entry_sr lrc_tunings[] = { + { XE_RTP_NAME("Tuning: Windower HW Filtering"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3599), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, HW_FILTERING)) + }, + /* DG2 */ { XE_RTP_NAME("Tuning: L3 cache"), From 223b2f51ba6bbf70dbebdb69b93b66b074289e04 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Tue, 24 Feb 2026 18:20:14 -0800 Subject: [PATCH 187/195] drm/xe: restrict multi-lrc to VCS/VECS engines Tighten uapi validation to restrict multi-lrc support to VIDEO_DECODE and VIDEO_ENHANCE engines only. This check should have been in place from the start, as the driver typically avoids allowing uapi cases that we have no userspace consumer for. Additionally, the GuC firmware on ModSched platforms no longer supports multi-lrc on non-media engines. V4: - use a unified mask for all platforms since engine instance count is an independent runtime check (Matt Roper, Matthew Brost) V3: - store a multi-lrc enable class mask in xe->info and populate from xe_device_desc in xe_pci.c (Matthew Brost) V2: - correct the typo (Shuicheng) - move the check earlier to avoid VM lookup (Shuicheng, Matt Roper) - remove the graphics version check (Matt Roper) - input more details in the commit info (Matt Roper) Cc: Shuicheng Lin Cc: Matt Roper Cc: Matthew Brost Signed-off-by: Xin Wang Reviewed-by: Shuicheng Lin Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260225022014.45394-1-x.wang@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_exec_queue.c | 5 +++++ drivers/gpu/drm/xe/xe_pci.c | 21 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_pci_types.h | 1 + 4 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 8f3ef836541e..caa8f34a6744 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -138,6 +138,8 @@ struct xe_device { u8 tile_count; /** @info.max_gt_per_tile: Number of GT IDs allocated to each tile */ u8 max_gt_per_tile; + /** @info.multi_lrc_mask: bitmask of engine classes which support multi-lrc */ + u8 multi_lrc_mask; /** @info.gt_count: Total number of GTs for entire device */ u8 gt_count; /** @info.vm_max_level: Max VM level */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 66d0e10ee2c4..5abb29454d1f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -1184,6 +1184,11 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; + /* multi-lrc is only supported on select engine classes */ + if (XE_IOCTL_DBG(xe, args->width > 1 && + !(xe->info.multi_lrc_mask & BIT(hwe->class)))) + return -EOPNOTSUPP; + vm = xe_vm_lookup(xef, args->vm_id); if (XE_IOCTL_DBG(xe, !vm)) return -ENOENT; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 56a768f2cfca..b48e84549888 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -184,6 +184,10 @@ static const struct xe_ip media_ips[] = { { 3503, "Xe3p_HPM", &media_xelpmp }, }; +#define MULTI_LRC_MASK \ + .multi_lrc_mask = BIT(XE_ENGINE_CLASS_VIDEO_DECODE) | \ + BIT(XE_ENGINE_CLASS_VIDEO_ENHANCE) + static const struct xe_device_desc tgl_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xelp, .pre_gmdid_media_ip = &media_ip_xem, @@ -194,6 +198,7 @@ static const struct xe_device_desc tgl_desc = { .has_llc = true, .has_sriov = true, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 48, .vm_max_level = 3, @@ -208,6 +213,7 @@ static const struct xe_device_desc rkl_desc = { .has_display = true, .has_llc = true, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 48, .vm_max_level = 3, @@ -225,6 +231,7 @@ static const struct xe_device_desc adl_s_desc = { .has_llc = true, .has_sriov = true, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, @@ -246,6 +253,7 @@ static const struct xe_device_desc adl_p_desc = { .has_llc = true, .has_sriov = true, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, @@ -265,6 +273,7 @@ static const struct xe_device_desc adl_n_desc = { .has_llc = true, .has_sriov = true, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 48, .vm_max_level = 3, @@ -283,6 +292,7 @@ static const struct xe_device_desc dg1_desc = { .has_gsc_nvm = 1, .has_heci_gscfi = 1, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 48, .vm_max_level = 3, @@ -313,6 +323,7 @@ static const struct xe_device_desc ats_m_desc = { .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, DG2_FEATURES, @@ -325,6 +336,7 @@ static const struct xe_device_desc dg2_desc = { .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, .max_gt_per_tile = 1, + MULTI_LRC_MASK, .require_force_probe = true, DG2_FEATURES, @@ -343,6 +355,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .has_heci_gscfi = 1, .max_gt_per_tile = 1, .max_remote_tiles = 1, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 57, .vm_max_level = 4, @@ -358,6 +371,7 @@ static const struct xe_device_desc mtl_desc = { .has_display = true, .has_pxp = true, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .va_bits = 48, .vm_max_level = 3, }; @@ -369,6 +383,7 @@ static const struct xe_device_desc lnl_desc = { .has_flat_ccs = 1, .has_pxp = true, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .needs_scratch = true, .va_bits = 48, .vm_max_level = 4, @@ -393,6 +408,7 @@ static const struct xe_device_desc bmg_desc = { .has_soc_remapper_telem = true, .has_sriov = true, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .needs_scratch = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_BATTLEMAGE_G21, "G21", bmg_g21_ids }, @@ -411,6 +427,7 @@ static const struct xe_device_desc ptl_desc = { .has_pre_prod_wa = 1, .has_pxp = true, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .needs_scratch = true, .needs_shared_vf_gt_wq = true, .va_bits = 48, @@ -424,6 +441,7 @@ static const struct xe_device_desc nvls_desc = { .has_flat_ccs = 1, .has_pre_prod_wa = 1, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 48, .vm_max_level = 4, @@ -445,6 +463,7 @@ static const struct xe_device_desc cri_desc = { .has_soc_remapper_telem = true, .has_sriov = true, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 57, .vm_max_level = 4, @@ -459,6 +478,7 @@ static const struct xe_device_desc nvlp_desc = { .has_page_reclaim_hw_assist = true, .has_pre_prod_wa = true, .max_gt_per_tile = 2, + MULTI_LRC_MASK, .require_force_probe = true, .va_bits = 48, .vm_max_level = 4, @@ -745,6 +765,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.skip_pcode = desc->skip_pcode; xe->info.needs_scratch = desc->needs_scratch; xe->info.needs_shared_vf_gt_wq = desc->needs_shared_vf_gt_wq; + xe->info.multi_lrc_mask = desc->multi_lrc_mask; xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.probe_display && diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index 470d31a1f0d6..47e8a1552c2b 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -30,6 +30,7 @@ struct xe_device_desc { u8 dma_mask_size; u8 max_remote_tiles:2; u8 max_gt_per_tile:2; + u8 multi_lrc_mask; u8 va_bits; u8 vm_max_level; u8 vram_flags; From 393e5fea6f7d7054abc2c3d97a4cfe8306cd6079 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Thu, 26 Feb 2026 22:26:58 +0100 Subject: [PATCH 188/195] drm/xe/queue: Call fini on exec queue creation fail Every call to queue init should have a corresponding fini call. Skipping this would mean skipping removal of the queue from GuC list (which is part of guc_id allocation). A damaged queue stored in exec_queue_lookup list would lead to invalid memory reference, sooner or later. Call fini to free guc_id. This must be done before any internal LRCs are freed. Since the finalization with this extra call became very similar to __xe_exec_queue_fini(), reuse that. To make this reuse possible, alter xe_lrc_put() so it can survive NULL parameters, like other similar functions. v2: Reuse _xe_exec_queue_fini(). Make xe_lrc_put() aware of NULLs. Fixes: 3c1fa4aa60b1 ("drm/xe: Move queue init before LRC creation") Signed-off-by: Tomasz Lis Reviewed-by: Matthew Brost (v1) Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260226212701.2937065-2-tomasz.lis@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 23 +++++++++++------------ drivers/gpu/drm/xe/xe_lrc.h | 3 ++- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 5abb29454d1f..a1075755e3d2 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -270,6 +270,16 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, return q; } +static void __xe_exec_queue_fini(struct xe_exec_queue *q) +{ + int i; + + q->ops->fini(q); + + for (i = 0; i < q->width; ++i) + xe_lrc_put(q->lrc[i]); +} + static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) { int i, err; @@ -324,21 +334,10 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) return 0; err_lrc: - for (i = i - 1; i >= 0; --i) - xe_lrc_put(q->lrc[i]); + __xe_exec_queue_fini(q); return err; } -static void __xe_exec_queue_fini(struct xe_exec_queue *q) -{ - int i; - - q->ops->fini(q); - - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); -} - struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hwe, u32 flags, diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 3e500004f1ae..48f7c26cf129 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -75,7 +75,8 @@ static inline struct xe_lrc *xe_lrc_get(struct xe_lrc *lrc) */ static inline void xe_lrc_put(struct xe_lrc *lrc) { - kref_put(&lrc->refcount, xe_lrc_destroy); + if (lrc) + kref_put(&lrc->refcount, xe_lrc_destroy); } /** From ec172c7befc4a48ea7d6afe6f0fa23c533222233 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Thu, 26 Feb 2026 22:26:59 +0100 Subject: [PATCH 189/195] drm/xe: Wrappers for setting and getting LRC references There is a small but non-zero chance that VF post migration fixups are running on an exec queue during teardown. The chances are decreased by starting the teardown by releasing guc_id, but remain non-zero. On the other hand the sync between fixups and EQ creation (wait_valid_ggtt) drastically increases the chance for such parallel teardown if queue creation error path is entered (err_lrc label). The exec queue itself is not going to cause an issue, but LRCs have a small chance of getting freed during the fixups. Creating a setter and a getter makes it easier to protect the fixup operations with a lock. For other driver activities, the original access method (without any protection) can still be used. v2: Separate lock, only for LRCs. Kerneldoc fixes. Subject tag fix. Signed-off-by: Tomasz Lis Reviewed-by: Matthew Brost Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260226212701.2937065-3-tomasz.lis@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 73 ++++++++++++++++++------ drivers/gpu/drm/xe/xe_exec_queue.h | 1 + drivers/gpu/drm/xe/xe_exec_queue_types.h | 5 ++ 3 files changed, 60 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index a1075755e3d2..0e46cbed9006 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -231,6 +231,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, INIT_LIST_HEAD(&q->hw_engine_group_link); INIT_LIST_HEAD(&q->pxp.link); spin_lock_init(&q->multi_queue.lock); + spin_lock_init(&q->lrc_lookup_lock); q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL; q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; @@ -270,6 +271,56 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, return q; } +static void xe_exec_queue_set_lrc(struct xe_exec_queue *q, struct xe_lrc *lrc, u16 idx) +{ + xe_assert(gt_to_xe(q->gt), idx < q->width); + + scoped_guard(spinlock, &q->lrc_lookup_lock) + q->lrc[idx] = lrc; +} + +/** + * xe_exec_queue_get_lrc() - Get the LRC from exec queue. + * @q: The exec queue instance. + * @idx: Index within multi-LRC array. + * + * Retrieves LRC of given index for the exec queue under lock + * and takes reference. + * + * Return: Pointer to LRC on success, error on failure, NULL on + * lookup failure. + */ +struct xe_lrc *xe_exec_queue_get_lrc(struct xe_exec_queue *q, u16 idx) +{ + struct xe_lrc *lrc; + + xe_assert(gt_to_xe(q->gt), idx < q->width); + + scoped_guard(spinlock, &q->lrc_lookup_lock) { + lrc = q->lrc[idx]; + if (lrc) + xe_lrc_get(lrc); + } + + return lrc; +} + +/** + * xe_exec_queue_lrc() - Get the LRC from exec queue. + * @q: The exec queue instance. + * + * Retrieves the primary LRC for the exec queue. Note that this function + * returns only the first LRC instance, even when multiple parallel LRCs + * are configured. This function does not increment reference count, + * so the reference can be just forgotten after use. + * + * Return: Pointer to LRC on success, error on failure + */ +struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) +{ + return q->lrc[0]; +} + static void __xe_exec_queue_fini(struct xe_exec_queue *q) { int i; @@ -327,8 +378,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) goto err_lrc; } - /* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */ - WRITE_ONCE(q->lrc[i], lrc); + xe_exec_queue_set_lrc(q, lrc, i); } return 0; @@ -1293,21 +1343,6 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, return ret; } -/** - * xe_exec_queue_lrc() - Get the LRC from exec queue. - * @q: The exec_queue. - * - * Retrieves the primary LRC for the exec queue. Note that this function - * returns only the first LRC instance, even when multiple parallel LRCs - * are configured. - * - * Return: Pointer to LRC on success, error on failure - */ -struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) -{ - return q->lrc[0]; -} - /** * xe_exec_queue_is_lr() - Whether an exec_queue is long-running * @q: The exec_queue @@ -1667,14 +1702,14 @@ int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) for (i = 0; i < q->width; ++i) { struct xe_lrc *lrc; - /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ - lrc = READ_ONCE(q->lrc[i]); + lrc = xe_exec_queue_get_lrc(q, i); if (!lrc) continue; xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch); xe_lrc_update_hwctx_regs_with_address(lrc); err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch); + xe_lrc_put(lrc); if (err) break; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index c9e3a7c2d249..a82d99bd77bc 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -160,6 +160,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q); int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch); struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q); +struct xe_lrc *xe_exec_queue_get_lrc(struct xe_exec_queue *q, u16 idx); /** * xe_exec_queue_idle_skip_suspend() - Can exec queue skip suspend diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 3791fed34ffa..a1f3938f4173 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -257,6 +257,11 @@ struct xe_exec_queue { u64 tlb_flush_seqno; /** @hw_engine_group_link: link into exec queues in the same hw engine group */ struct list_head hw_engine_group_link; + /** + * @lrc_lookup_lock: Lock for protecting lrc array access. Only used when + * running in parallel to queue creation is possible. + */ + spinlock_t lrc_lookup_lock; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[] __counted_by(width); }; From f3fb5f1ebbf39e685dd2885c9dbc8bb0a80be7c6 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Thu, 26 Feb 2026 22:27:00 +0100 Subject: [PATCH 190/195] drm/xe/vf: Wait for all fixups before using default LRCs When a context is being created during save/restore, the LRC creation needs to wait for GGTT address space to be shifted. But it also needs to have fixed default LRCs. This is mandatory to avoid the situation where LRC will be created based on data from before the fixups, but reference within exec queue will be set too late for fixups. This fixes an issue where contexts created during save/restore have a large chance of having one unfixed LRC, due to the xe_lrc_create() being synced for equal start to race with default LRC fixups. v2: Move the fixups confirmation further, behind all fixups. Revert some renames. Signed-off-by: Tomasz Lis Reviewed-by: Matthew Brost Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260226212701.2937065-4-tomasz.lis@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 16 +++++++++------- drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 527ded3c9c22..7f83c0d3b099 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -536,12 +536,6 @@ static int vf_get_ggtt_info(struct xe_gt *gt) */ xe_ggtt_shift_nodes(tile->mem.ggtt, start); - if (xe_sriov_vf_migration_supported(gt_to_xe(gt))) { - WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false); - smp_wmb(); /* Ensure above write visible before wake */ - wake_up_all(>->sriov.vf.migration.wq); - } - return 0; } @@ -846,6 +840,13 @@ static void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt) xe_default_lrc_update_memirq_regs_with_address(hwe); } +static void vf_post_migration_mark_fixups_done(struct xe_gt *gt) +{ + WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false); + smp_wmb(); /* Ensure above write visible before wake */ + wake_up_all(>->sriov.vf.migration.wq); +} + static void vf_start_migration_recovery(struct xe_gt *gt) { bool started; @@ -1380,6 +1381,7 @@ static void vf_post_migration_recovery(struct xe_gt *gt) if (err) goto fail; + vf_post_migration_mark_fixups_done(gt); vf_post_migration_rearm(gt); err = vf_post_migration_resfix_done(gt, marker); @@ -1514,7 +1516,7 @@ static bool vf_valid_ggtt(struct xe_gt *gt) } /** - * xe_gt_sriov_vf_wait_valid_ggtt() - VF wait for valid GGTT addresses + * xe_gt_sriov_vf_wait_valid_ggtt() - wait for valid GGTT nodes and address refs * @gt: the &xe_gt */ void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index 4ef881b9b662..fca18be589db 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -73,7 +73,7 @@ struct xe_gt_sriov_vf_migration { bool recovery_queued; /** @recovery_inprogress: VF post migration recovery in progress */ bool recovery_inprogress; - /** @ggtt_need_fixes: VF GGTT needs fixes */ + /** @ggtt_need_fixes: VF GGTT and references to it need fixes */ bool ggtt_need_fixes; }; From c692ae39e9fd33d0e58ac24bf3d98b352b5064da Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Thu, 26 Feb 2026 22:27:01 +0100 Subject: [PATCH 191/195] drm/xe/vf: Redo LRC creation while in VF fixups If the xe module within a VM was creating a new LRC during save/ restore, this LRC will be invalid. The fixups procedure may not be able to reach it, as there will be a race to add the new LRC reference to an exec queue. Even if the new LRC which was being created during VM migration is added to EQ in time for fixups, said LRC may still remain damaged. In a small percentage of specially crafted test cases, the resulting LRC was still damaged and caused GPU hang. Any LRC which could be created in such a situation, have to be re-created. Due to VM having arbitrarily set amount of CPU cores, it is possible to limit the amount to 1. In such case, there is a possibility that kernel will switch CPU contexts in a way which allows to miss VF migration recovery running in parallel (by simply not switching to the LRC creation thread during recovery). Therefore checking if the migration is in progress just after LRC creation, is not enough to ensure detection. Free the incorrectly created LRC, and trigger a re-run of the creation, but only after waiting for default LRC to get fixups. Use additional atomic value increased after fixups, to ensure any VF migration that avoided detection by just checking for recovery in progress, will be caught. v2: Merge marker and wait for default LRC, reducing amount of calls within xe_init_eq(). Alter the LRC creation loop to remove a race with post-migration fixups worker. v3: Kerneldoc fixes. Rename fixups_complete_count. Signed-off-by: Tomasz Lis Reviewed-by: Matthew Brost Signed-off-by: Michal Wajdeczko Link: https://patch.msgid.link/20260226212701.2937065-5-tomasz.lis@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 29 +++++++++++----- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 40 ++++++++++++++++++++--- drivers/gpu/drm/xe/xe_gt_sriov_vf.h | 3 +- drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h | 2 ++ 4 files changed, 60 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 0e46cbed9006..bb273ca02d28 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -368,17 +368,28 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) * from the moment vCPU resumes execution. */ for (i = 0; i < q->width; ++i) { - struct xe_lrc *lrc; + struct xe_lrc *__lrc = NULL; + int marker; - xe_gt_sriov_vf_wait_valid_ggtt(q->gt); - lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state, - xe_lrc_ring_size(), q->msix_vec, flags); - if (IS_ERR(lrc)) { - err = PTR_ERR(lrc); - goto err_lrc; - } + do { + struct xe_lrc *lrc; - xe_exec_queue_set_lrc(q, lrc, i); + marker = xe_gt_sriov_vf_wait_valid_ggtt(q->gt); + + lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state, + xe_lrc_ring_size(), q->msix_vec, flags); + if (IS_ERR(lrc)) { + err = PTR_ERR(lrc); + goto err_lrc; + } + + xe_exec_queue_set_lrc(q, lrc, i); + + if (__lrc) + xe_lrc_put(__lrc); + __lrc = lrc; + + } while (marker != xe_vf_migration_fixups_complete_count(q->gt)); } return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 7f83c0d3b099..8989c8e1be95 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -1277,6 +1277,8 @@ static int vf_post_migration_fixups(struct xe_gt *gt) if (err) return err; + atomic_inc(>->sriov.vf.migration.fixups_complete_count); + return 0; } @@ -1516,19 +1518,49 @@ static bool vf_valid_ggtt(struct xe_gt *gt) } /** - * xe_gt_sriov_vf_wait_valid_ggtt() - wait for valid GGTT nodes and address refs - * @gt: the &xe_gt + * xe_vf_migration_fixups_complete_count() - Get count of VF fixups completions. + * @gt: the &xe_gt instance which contains affected Global GTT + * + * Return: number of times VF fixups were completed since driver + * probe, or 0 if migration is not available, or -1 if fixups are + * pending or being applied right now. */ -void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt) +int xe_vf_migration_fixups_complete_count(struct xe_gt *gt) +{ + if (!IS_SRIOV_VF(gt_to_xe(gt)) || + !xe_sriov_vf_migration_supported(gt_to_xe(gt))) + return 0; + + /* should never match fixups_complete_count value */ + if (!vf_valid_ggtt(gt)) + return -1; + + return atomic_read(>->sriov.vf.migration.fixups_complete_count); +} + +/** + * xe_gt_sriov_vf_wait_valid_ggtt() - wait for valid GGTT nodes and address refs + * @gt: the &xe_gt instance which contains affected Global GTT + * + * Return: number of times VF fixups were completed since driver + * probe, or 0 if migration is not available. + */ +int xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt) { int ret; + /* + * this condition needs to be identical to one in + * xe_vf_migration_fixups_complete_count() + */ if (!IS_SRIOV_VF(gt_to_xe(gt)) || !xe_sriov_vf_migration_supported(gt_to_xe(gt))) - return; + return 0; ret = wait_event_interruptible_timeout(gt->sriov.vf.migration.wq, vf_valid_ggtt(gt), HZ * 5); xe_gt_WARN_ON(gt, !ret); + + return atomic_read(>->sriov.vf.migration.fixups_complete_count); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index 7d97189c2d3d..a6f7127521a5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -39,6 +39,7 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p); void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p); void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p); -void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt); +int xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt); +int xe_vf_migration_fixups_complete_count(struct xe_gt *gt); #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index fca18be589db..80562ffadb16 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -54,6 +54,8 @@ struct xe_gt_sriov_vf_migration { wait_queue_head_t wq; /** @scratch: Scratch memory for VF recovery */ void *scratch; + /** @fixups_complete_count: Counts completed fixups stages */ + atomic_t fixups_complete_count; /** @debug: Debug hooks for delaying migration */ struct { /** From 2b37c401b265c07b46408b5cb36a4b757c9b5060 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Fri, 20 Feb 2026 17:53:08 -0500 Subject: [PATCH 192/195] drm/xe/gsc: Fix GSC proxy cleanup on early initialization failure xe_gsc_proxy_remove undoes what is done in both xe_gsc_proxy_init and xe_gsc_proxy_start; however, if we fail between those 2 calls, it is possible that the HW forcewake access hasn't been initialized yet and so we hit errors when the cleanup code tries to write GSC register. To avoid that, split the cleanup in 2 functions so that the HW cleanup is only called if the HW setup was completed successfully. Since the HW cleanup (interrupt disabling) is now removed from xe_gsc_proxy_remove, the cleanup on error paths in xe_gsc_proxy_start must be updated to disable interrupts before returning. Fixes: ff6cd29b690b ("drm/xe: Cleanup unwind of gt initialization") Signed-off-by: Zhanjun Dong Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patch.msgid.link/20260220225308.101469-1-zhanjun.dong@intel.com --- drivers/gpu/drm/xe/xe_gsc_proxy.c | 43 +++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_gsc_types.h | 2 ++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 42438b21f235..707db650a2ae 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -435,15 +435,11 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) return 0; } -static void xe_gsc_proxy_remove(void *arg) +static void xe_gsc_proxy_stop(struct xe_gsc *gsc) { - struct xe_gsc *gsc = arg; struct xe_gt *gt = gsc_to_gt(gsc); struct xe_device *xe = gt_to_xe(gt); - if (!gsc->proxy.component_added) - return; - /* disable HECI2 IRQs */ scoped_guard(xe_pm_runtime, xe) { CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GSC); @@ -455,6 +451,30 @@ static void xe_gsc_proxy_remove(void *arg) } xe_gsc_wait_for_worker_completion(gsc); + gsc->proxy.started = false; +} + +static void xe_gsc_proxy_remove(void *arg) +{ + struct xe_gsc *gsc = arg; + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + + if (!gsc->proxy.component_added) + return; + + /* + * GSC proxy start is an async process that can be ongoing during + * Xe module load/unload. Using devm managed action to register + * xe_gsc_proxy_stop could cause issues if Xe module unload has + * already started when the action is registered, potentially leading + * to the cleanup being called at the wrong time. Therefore, instead + * of registering a separate devm action to undo what is done in + * proxy start, we call it from here, but only if the start has + * completed successfully (tracked with the 'started' flag). + */ + if (gsc->proxy.started) + xe_gsc_proxy_stop(gsc); component_del(xe->drm.dev, &xe_gsc_proxy_component_ops); gsc->proxy.component_added = false; @@ -510,6 +530,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) */ int xe_gsc_proxy_start(struct xe_gsc *gsc) { + struct xe_gt *gt = gsc_to_gt(gsc); int err; /* enable the proxy interrupt in the GSC shim layer */ @@ -521,12 +542,18 @@ int xe_gsc_proxy_start(struct xe_gsc *gsc) */ err = xe_gsc_proxy_request_handler(gsc); if (err) - return err; + goto err_irq_disable; if (!xe_gsc_proxy_init_done(gsc)) { - xe_gt_err(gsc_to_gt(gsc), "GSC FW reports proxy init not completed\n"); - return -EIO; + xe_gt_err(gt, "GSC FW reports proxy init not completed\n"); + err = -EIO; + goto err_irq_disable; } + gsc->proxy.started = true; return 0; + +err_irq_disable: + gsc_proxy_irq_toggle(gsc, false); + return err; } diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h index 97c056656df0..5aaa2a75861f 100644 --- a/drivers/gpu/drm/xe/xe_gsc_types.h +++ b/drivers/gpu/drm/xe/xe_gsc_types.h @@ -58,6 +58,8 @@ struct xe_gsc { struct mutex mutex; /** @proxy.component_added: whether the component has been added */ bool component_added; + /** @proxy.started: whether the proxy has been started */ + bool started; /** @proxy.bo: object to store message to and from the GSC */ struct xe_bo *bo; /** @proxy.to_gsc: map of the memory used to send messages to the GSC */ From 89340099c6a45884d5bf3995e466359ddb31a6f7 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Mon, 2 Mar 2026 13:57:57 +0530 Subject: [PATCH 193/195] drm/xe/lrc: Refactor context init into xe_lrc_ctx_init() Currently xe_lrc_init() does two things. 1. Allocates LRC bo based on exec queue parameters. 2. Initializes LRC bo with actual context details. Introduce xe_lrc_ctx_init() and split these two implementations for better maintainability. Signed-off-by: Raag Jadav Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20260302082757.3516577-1-raag.jadav@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 142 ++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 84360fcdf743..ebab5d78f7cc 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1438,65 +1438,16 @@ void xe_lrc_set_multi_queue_priority(struct xe_lrc *lrc, enum xe_multi_queue_pri lrc->desc |= FIELD_PREP(LRC_PRIORITY, xe_multi_queue_prio_to_lrc(lrc, priority)); } -static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_vm *vm, void *replay_state, u32 ring_size, - u16 msix_vec, - u32 init_flags) +static int xe_lrc_ctx_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, + void *replay_state, u16 msix_vec, u32 init_flags) { struct xe_gt *gt = hwe->gt; - const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); - u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); - struct xe_bo *seqno_bo; struct iosys_map map; u32 arb_enable; - u32 bo_flags; int err; - kref_init(&lrc->refcount); - lrc->gt = gt; - lrc->replay_size = xe_gt_lrc_hang_replay_size(gt, hwe->class); - lrc->size = lrc_size; - lrc->flags = 0; - lrc->ring.size = ring_size; - lrc->ring.tail = 0; - - if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { - lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; - bo_size += LRC_INDIRECT_CTX_BO_SIZE; - } - - if (xe_gt_has_indirect_ring_state(gt)) - lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; - - bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE; - - if ((vm && vm->xef) || init_flags & XE_LRC_CREATE_USER_CTX) /* userspace */ - bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM; - - lrc->bo = xe_bo_create_pin_map_novm(xe, tile, - bo_size, - ttm_bo_type_kernel, - bo_flags, false); - if (IS_ERR(lrc->bo)) - return PTR_ERR(lrc->bo); - - seqno_bo = xe_bo_create_pin_map_novm(xe, tile, PAGE_SIZE, - ttm_bo_type_kernel, - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE | - XE_BO_FLAG_SYSTEM, false); - if (IS_ERR(seqno_bo)) { - err = PTR_ERR(seqno_bo); - goto err_lrc_finish; - } - lrc->seqno_bo = seqno_bo; - - xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, - hwe->fence_irq, hwe->name); - /* * Init Per-Process of HW status Page, LRC / context state to known * values. If there's already a primed default_lrc, just copy it, otherwise @@ -1508,7 +1459,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - lrc_size - LRC_PPHWSP_SIZE); + lrc->size - LRC_PPHWSP_SIZE); if (replay_state) xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, replay_state, lrc->replay_size); @@ -1516,21 +1467,16 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, void *init_data = empty_lrc_data(hwe); if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; + return -ENOMEM; } - xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size); + xe_map_memcpy_to(xe, &map, 0, init_data, lrc->size); kfree(init_data); } - if (vm) { + if (vm) xe_lrc_set_ppgtt(lrc, vm); - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, lrc->bo); - } - if (xe_device_has_msix(xe)) { xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR, xe_memirq_status_ptr(&tile->memirq, hwe)); @@ -1546,14 +1492,20 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); + + /* Match head and tail pointers */ + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, lrc->ring.tail); xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); } else { xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); + + /* Match head and tail pointers */ + xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, lrc->ring.tail); xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); + xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); } @@ -1602,12 +1554,76 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, err = setup_wa_bb(lrc, hwe); if (err) - goto err_lrc_finish; + return err; err = setup_indirect_ctx(lrc, hwe); + + return err; +} + +static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, + void *replay_state, u32 ring_size, u16 msix_vec, u32 init_flags) +{ + struct xe_gt *gt = hwe->gt; + const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); + u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *bo; + u32 bo_flags; + int err; + + kref_init(&lrc->refcount); + lrc->gt = gt; + lrc->replay_size = xe_gt_lrc_hang_replay_size(gt, hwe->class); + lrc->size = lrc_size; + lrc->flags = 0; + lrc->ring.size = ring_size; + lrc->ring.tail = 0; + + if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { + lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; + bo_size += LRC_INDIRECT_CTX_BO_SIZE; + } + + if (xe_gt_has_indirect_ring_state(gt)) + lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; + + bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE; + + if ((vm && vm->xef) || init_flags & XE_LRC_CREATE_USER_CTX) /* userspace */ + bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM; + + bo = xe_bo_create_pin_map_novm(xe, tile, bo_size, + ttm_bo_type_kernel, + bo_flags, false); + if (IS_ERR(lrc->bo)) + return PTR_ERR(lrc->bo); + + lrc->bo = bo; + + bo = xe_bo_create_pin_map_novm(xe, tile, PAGE_SIZE, + ttm_bo_type_kernel, + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_SYSTEM, false); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + goto err_lrc_finish; + } + lrc->seqno_bo = bo; + + xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, + hwe->fence_irq, hwe->name); + + err = xe_lrc_ctx_init(lrc, hwe, vm, replay_state, msix_vec, init_flags); if (err) goto err_lrc_finish; + if (vm && vm->xef) + xe_drm_client_add_bo(vm->xef->client, lrc->bo); + return 0; err_lrc_finish: From d139209ef88e48af1f6731cd45440421c757b6b5 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 27 Feb 2026 08:43:41 -0800 Subject: [PATCH 194/195] drm/xe/xe2_hpg: Correct implementation of Wa_16025250150 Wa_16025250150 asks us to set five register fields of the register to 0x1 each. However we were just OR'ing this into the existing register value (which has a default of 0x4 for each nibble-sized field) resulting in final field values of 0x5 instead of the desired 0x1. Correct the RTP programming (use FIELD_SET instead of SET) to ensure each field is assigned to exactly the value we want. Cc: Aradhya Bhatia Cc: Tejas Upadhyay Cc: stable@vger.kernel.org # v6.16+ Fixes: 7654d51f1fd8 ("drm/xe/xe2hpg: Add Wa_16025250150") Reviewed-by: Ngai-Mint Kwan Link: https://patch.msgid.link/20260227164341.3600098-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_wa.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 26950b8a7543..183c5c86c35a 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -249,12 +249,13 @@ static const struct xe_rtp_entry_sr gt_was[] = { { XE_RTP_NAME("16025250150"), XE_RTP_RULES(GRAPHICS_VERSION(2001)), - XE_RTP_ACTIONS(SET(LSN_VC_REG2, - LSN_LNI_WGT(1) | - LSN_LNE_WGT(1) | - LSN_DIM_X_WGT(1) | - LSN_DIM_Y_WGT(1) | - LSN_DIM_Z_WGT(1))) + XE_RTP_ACTIONS(FIELD_SET(LSN_VC_REG2, + LSN_LNI_WGT_MASK | LSN_LNE_WGT_MASK | + LSN_DIM_X_WGT_MASK | LSN_DIM_Y_WGT_MASK | + LSN_DIM_Z_WGT_MASK, + LSN_LNI_WGT(1) | LSN_LNE_WGT(1) | + LSN_DIM_X_WGT(1) | LSN_DIM_Y_WGT(1) | + LSN_DIM_Z_WGT(1))) }, /* Xe3_LPG */ From 6bc6fec71ac45f52db609af4e62bdb96b9f5fadb Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 4 Feb 2026 17:28:11 +0000 Subject: [PATCH 195/195] drm/xe/reg_sr: Fix leak on xa_store failure Free the newly allocated entry when xa_store() fails to avoid a memory leak on the error path. v2: use goto fail_free. (Bala) Fixes: e5283bd4dfec ("drm/xe/reg_sr: Remove register pool") Cc: Balasubramani Vivekanandan Cc: Matt Roper Signed-off-by: Shuicheng Lin Reviewed-by: Matt Roper Link: https://patch.msgid.link/20260204172810.1486719-2-shuicheng.lin@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_reg_sr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 83a668f2a0d5..7d377f20f84e 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -100,10 +100,12 @@ int xe_reg_sr_add(struct xe_reg_sr *sr, *pentry = *e; ret = xa_err(xa_store(&sr->xa, idx, pentry, GFP_KERNEL)); if (ret) - goto fail; + goto fail_free; return 0; +fail_free: + kfree(pentry); fail: xe_gt_err(gt, "discarding save-restore reg %04lx (clear: %08x, set: %08x, masked: %s, mcr: %s): ret=%d\n",