Merge tag 'drm-xe-fixes-2024-10-31' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

Driver Changes:
- Fix missing HPD interrupt enabling, bringing one PM refactor with it
  (Imre / Maarten)
- Workaround LNL GGTT invalidation not being visible to GuC
  (Matthew Brost)
- Avoid getting jobs stuck without a protecting timeout (Matthew Brost)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/tsbftadm7owyizzdaqnqu7u4tqggxgeqeztlfvmj5fryxlfomi@5m5bfv2zvzmw
This commit is contained in:
Dave Airlie
2024-11-02 04:44:02 +10:00
5 changed files with 75 additions and 38 deletions

View File

@@ -309,18 +309,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe)
}
/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */
void xe_display_pm_runtime_suspend(struct xe_device *xe)
{
if (!xe->info.probe_display)
return;
if (xe->d3cold.allowed)
xe_display_pm_suspend(xe, true);
intel_hpd_poll_enable(xe);
}
void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
{
struct intel_display *display = &xe->display;
bool s2idle = suspend_to_idle();
@@ -353,6 +342,27 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold);
intel_dmc_suspend(xe);
if (runtime && has_display(xe))
intel_hpd_poll_enable(xe);
}
void xe_display_pm_suspend(struct xe_device *xe)
{
__xe_display_pm_suspend(xe, false);
}
void xe_display_pm_runtime_suspend(struct xe_device *xe)
{
if (!xe->info.probe_display)
return;
if (xe->d3cold.allowed) {
__xe_display_pm_suspend(xe, true);
return;
}
intel_hpd_poll_enable(xe);
}
void xe_display_pm_suspend_late(struct xe_device *xe)
@@ -366,17 +376,6 @@ void xe_display_pm_suspend_late(struct xe_device *xe)
intel_display_power_suspend_late(xe);
}
void xe_display_pm_runtime_resume(struct xe_device *xe)
{
if (!xe->info.probe_display)
return;
intel_hpd_poll_disable(xe);
if (xe->d3cold.allowed)
xe_display_pm_resume(xe, true);
}
void xe_display_pm_resume_early(struct xe_device *xe)
{
if (!xe->info.probe_display)
@@ -387,7 +386,7 @@ void xe_display_pm_resume_early(struct xe_device *xe)
intel_power_domains_resume(xe);
}
void xe_display_pm_resume(struct xe_device *xe, bool runtime)
static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
{
struct intel_display *display = &xe->display;
@@ -411,9 +410,11 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_display_driver_resume(xe);
drm_kms_helper_poll_enable(&xe->drm);
intel_display_driver_enable_user_access(xe);
intel_hpd_poll_disable(xe);
}
if (has_display(xe))
intel_hpd_poll_disable(xe);
intel_opregion_resume(display);
intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
@@ -421,6 +422,26 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_power_domains_enable(xe);
}
void xe_display_pm_resume(struct xe_device *xe)
{
__xe_display_pm_resume(xe, false);
}
void xe_display_pm_runtime_resume(struct xe_device *xe)
{
if (!xe->info.probe_display)
return;
if (xe->d3cold.allowed) {
__xe_display_pm_resume(xe, true);
return;
}
intel_hpd_init(xe);
intel_hpd_poll_disable(xe);
}
static void display_device_remove(struct drm_device *dev, void *arg)
{
struct xe_device *xe = arg;

View File

@@ -34,10 +34,10 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
void xe_display_irq_reset(struct xe_device *xe);
void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
void xe_display_pm_suspend(struct xe_device *xe, bool runtime);
void xe_display_pm_suspend(struct xe_device *xe);
void xe_display_pm_suspend_late(struct xe_device *xe);
void xe_display_pm_resume_early(struct xe_device *xe);
void xe_display_pm_resume(struct xe_device *xe, bool runtime);
void xe_display_pm_resume(struct xe_device *xe);
void xe_display_pm_runtime_suspend(struct xe_device *xe);
void xe_display_pm_runtime_resume(struct xe_device *xe);
@@ -65,10 +65,10 @@ static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
static inline void xe_display_irq_reset(struct xe_device *xe) {}
static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {}
static inline void xe_display_pm_suspend(struct xe_device *xe) {}
static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {}
static inline void xe_display_pm_resume(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {}

View File

@@ -397,6 +397,16 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
{
struct xe_device *xe = tile_to_xe(ggtt->tile);
/*
* XXX: Barrier for GGTT pages. Unsure exactly why this required but
* without this LNL is having issues with the GuC reading scratch page
* vs. correct GGTT page. Not particularly a hot code path so blindly
* do a mmio read here which results in GuC reading correct GGTT page.
*/
xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
/* Each GT in a tile has its own TLB to cache GGTT lookups */
ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);

View File

@@ -916,12 +916,22 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
{
struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
u32 ctx_timestamp, ctx_job_timestamp;
u32 timeout_ms = q->sched_props.job_timeout_ms;
u32 diff;
u64 running_time_ms;
if (!xe_sched_job_started(job)) {
xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
q->guc->id);
return xe_sched_invalidate_job(job, 2);
}
ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
/*
* Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
* possible overflows with a high timeout.
@@ -1049,10 +1059,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
exec_queue_killed_or_banned_or_wedged(q) ||
exec_queue_destroyed(q);
/* Job hasn't started, can't be timed out */
if (!skip_timeout_check && !xe_sched_job_started(job))
goto rearm;
/*
* XXX: Sampling timeout doesn't work in wedged mode as we have to
* modify scheduling state to read timestamp. We could read the

View File

@@ -123,7 +123,7 @@ int xe_pm_suspend(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
xe_display_pm_suspend(xe, false);
xe_display_pm_suspend(xe);
/* FIXME: Super racey... */
err = xe_bo_evict_all(xe);
@@ -133,7 +133,7 @@ int xe_pm_suspend(struct xe_device *xe)
for_each_gt(gt, xe, id) {
err = xe_gt_suspend(gt);
if (err) {
xe_display_pm_resume(xe, false);
xe_display_pm_resume(xe);
goto err;
}
}
@@ -187,7 +187,7 @@ int xe_pm_resume(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_resume(gt);
xe_display_pm_resume(xe, false);
xe_display_pm_resume(xe);
err = xe_bo_restore_user(xe);
if (err)