From 7f7a2da3bf8bc0e0f6c239af495b7050056e889c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 26 Jul 2024 18:22:16 -0700 Subject: [PATCH 001/108] drm/xe: Use dma_fence_chain_free in chain fence unused as a sync A chain fence is uninitialized if not installed in a drm sync obj. Thus if xe_sync_entry_cleanup is called and sync->chain_fence is non-NULL the proper cleanup is dma_fence_chain_free rather than a dma-fence put. Reported-by: Paulo Zanoni Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2411 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2261 Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240727012216.2118276-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 533246f42256..3aa6270e5dd7 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -259,7 +259,7 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) if (sync->fence) dma_fence_put(sync->fence); if (sync->chain_fence) - dma_fence_put(&sync->chain_fence->base); + dma_fence_chain_free(sync->chain_fence); if (sync->ufence) user_fence_put(sync->ufence); } From 6f20fc09936e786a4ba18f5514fe185e0451ada9 Mon Sep 17 00:00:00 2001 From: Dominik Grzegorzek Date: Mon, 29 Jul 2024 16:01:52 +0300 Subject: [PATCH 002/108] drm/xe: Move and export xe_hw_engine lookup. Move and export xe_hw_engine lookup. This is in preparation to use this in eudebug code where we want to find active engine. v2: s/tile/gt due to uapi changes (Mika) Signed-off-by: Dominik Grzegorzek Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240729130152.100130-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 37 ++++-------------------------- drivers/gpu/drm/xe/xe_hw_engine.c | 31 +++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine.h | 7 ++++++ 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 69867a7b7c77..956dc15b432a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -413,34 +413,6 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue return 0; } -static const enum xe_engine_class user_to_xe_engine_class[] = { - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, -}; - -static struct xe_hw_engine * -find_hw_engine(struct xe_device *xe, - struct drm_xe_engine_class_instance eci) -{ - u32 idx; - - if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) - return NULL; - - if (eci.gt_id >= xe->info.gt_count) - return NULL; - - idx = array_index_nospec(eci.engine_class, - ARRAY_SIZE(user_to_xe_engine_class)); - - return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), - user_to_xe_engine_class[idx], - eci.engine_instance, true); -} - static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, struct drm_xe_engine_class_instance *eci, u16 width, u16 num_placements) @@ -462,8 +434,7 @@ static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, if (xe_hw_engine_is_reserved(hwe)) continue; - if (hwe->class == - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) + if (hwe->class == XE_ENGINE_CLASS_COPY) logical_mask |= BIT(hwe->logical_instance); } @@ -492,7 +463,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, n = j * width + i; - hwe = find_hw_engine(xe, eci[n]); + hwe = xe_hw_engine_lookup(xe, eci[n]); if (XE_IOCTL_DBG(xe, !hwe)) return 0; @@ -571,7 +542,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !logical_mask)) return -EINVAL; - hwe = find_hw_engine(xe, eci[0]); + hwe = xe_hw_engine_lookup(xe, eci[0]); if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; @@ -608,7 +579,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !logical_mask)) return -EINVAL; - hwe = find_hw_engine(xe, eci[0]); + hwe = xe_hw_engine_lookup(xe, eci[0]); if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 07ed9fd28f19..00ace5fcc284 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -5,7 +5,10 @@ #include "xe_hw_engine.h" +#include + #include +#include #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" @@ -1135,3 +1138,31 @@ enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe) { return engine_infos[hwe->engine_id].domain; } + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +struct xe_hw_engine * +xe_hw_engine_lookup(struct xe_device *xe, + struct drm_xe_engine_class_instance eci) +{ + unsigned int idx; + + if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) + return NULL; + + if (eci.gt_id >= xe->info.gt_count) + return NULL; + + idx = array_index_nospec(eci.engine_class, + ARRAY_SIZE(user_to_xe_engine_class)); + + return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), + user_to_xe_engine_class[idx], + eci.engine_instance, true); +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index 900c8c991430..d227ffe557eb 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -9,6 +9,8 @@ #include "xe_hw_engine_types.h" struct drm_printer; +struct drm_xe_engine_class_instance; +struct xe_device; #ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN #define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN @@ -62,6 +64,11 @@ void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p); void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe); bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe); + +struct xe_hw_engine * +xe_hw_engine_lookup(struct xe_device *xe, + struct drm_xe_engine_class_instance eci); + static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) { return hwe->name; From f1e6f89506e48b42235755fb3c2b73022697d8e5 Mon Sep 17 00:00:00 2001 From: Pallavi Mishra Date: Thu, 1 Aug 2024 01:26:22 +0530 Subject: [PATCH 003/108] drm/xe/xe2: Enable Priority Mem Read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable feature to allow memory reads to take a priority memory path. This will reduce latency on the read path, but may introduce read after write (RAW) hazards as read and writes will no longer be ordered. To avoid RAW hazards, SW can use the MI_MEM_FENCE command or any other MI command that generates non posted memory writes. This will ensure data is coherent in memory prior to execution of commands which read data from memory. RCS,BCS and CCS support this feature. No pattern identified in KMD that could lead to a hazard. v2: Modify commit message, enable priority mem read feature for media, modify version range, modify bspec detail (Matt Roper) v3: Rebase, fix cramped line-wrapping (jcavitt) v4: Rebase v5: Media does not support Priority Mem Read. Modify commit to reflect the same. v6: Rebase Bspec: 60298, 60237, 60187, 60188 Signed-off-by: Pallavi Mishra Reviewed-by: Matt Roper Acked-by: José Roberto de Souza Acked-by: Carl Zhang Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240731195622.1868401-1-pallavi.mishra@intel.com --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 + drivers/gpu/drm/xe/xe_hw_engine.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index c38db2a74614..81b71903675e 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -104,6 +104,7 @@ #define CSFE_CHICKEN1(base) XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED) #define GHWSP_CSB_REPORT_DIS REG_BIT(15) #define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) +#define CS_PRIORITY_MEM_READ REG_BIT(7) #define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 00ace5fcc284..403eb1d2d20a 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -428,6 +428,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) 0xA, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + /* Enable Priority Mem Read */ + { XE_RTP_NAME("Priority_Mem_Read"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, {} }; From 146458645e505f5eac498759bcd865cf7c0dfd9a Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Thu, 1 Aug 2024 16:54:24 +0530 Subject: [PATCH 004/108] drm/xe/hwmon: Fix PL1 disable flow in xe_hwmon_power_max_write In xe_hwmon_power_max_write, for PL1 disable supported case, instead of returning after PL1 disable, PL1 enable path was also being run. Fixed it by returning after disable. v2: Correct typo and grammar in commit message. (Jonathan) Signed-off-by: Karthik Poosa Fixes: fef6dd12b45a ("drm/xe/hwmon: Protect hwmon rw attributes with hwmon_lock") Reviewed-by: Jonathan Cavitt Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240801112424.1841766-1-karthik.poosa@intel.com --- drivers/gpu/drm/xe/xe_hwmon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 0c8ce09e5025..832ea81faeee 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -203,9 +203,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN, 0); reg_val = xe_mmio_read32(hwmon->gt, rapl_limit); if (reg_val & PKG_PWR_LIM_1_EN) { + drm_warn(>_to_xe(hwmon->gt)->drm, "PL1 disable is not supported!\n"); ret = -EOPNOTSUPP; - goto unlock; } + goto unlock; } /* Computation in 64-bits to avoid overflow. Round to nearest. */ From c3bc97d2f102ddd5a8341eeb2dbae2a3e98bb46a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 1 Aug 2024 08:41:16 -0700 Subject: [PATCH 005/108] drm/xe: Take ref to VM in delayed snapshot Kernel BO's don't take a ref to the VM, we need the VM for the delayed snapshot, so take a ref to the VM in delayed snapshot. v2: - Check for lrc_bo before taking a VM ref (CI) - Check lrc_bo->vm before taking / dropping a VM ref (CI) - Drop VM in xe_lrc_snapshot_free v5: - Fix commit message wording (Johnathan) Fixes: 47058633d9c5 ("drm/xe: Move lrc snapshot capturing to xe_lrc.c") Cc: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240801154118.2547543-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 94ff62e1d95e..58121821f081 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1634,6 +1634,9 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; + if (lrc->bo && lrc->bo->vm) + xe_vm_get(lrc->bo->vm); + snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); @@ -1653,12 +1656,14 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) { struct xe_bo *bo; + struct xe_vm *vm; struct iosys_map src; if (!snapshot) return; bo = snapshot->lrc_bo; + vm = bo->vm; snapshot->lrc_bo = NULL; snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); @@ -1678,6 +1683,8 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) xe_bo_unlock(bo); put_bo: xe_bo_put(bo); + if (vm) + xe_vm_put(vm); } void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) @@ -1727,8 +1734,14 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) return; kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) + if (snapshot->lrc_bo) { + struct xe_vm *vm; + + vm = snapshot->lrc_bo->vm; xe_bo_put(snapshot->lrc_bo); + if (vm) + xe_vm_put(vm); + } kfree(snapshot); } From 53369581dc0c68a5700ed51e1660f44c4b2bb524 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 1 Aug 2024 08:41:17 -0700 Subject: [PATCH 006/108] drm/printer: Allow NULL data in devcoredump printer We want to determine the size of the devcoredump before writing it out. To that end, we will run the devcoredump printer with NULL data to get the size, alloc data based on the generated offset, then run the devcorecump again with a valid data pointer to print. This necessitates not writing data to the data pointer on the initial pass, when it is NULL. v5: - Better commit message (Jonathan) - Add kerenl doc with examples (Jani) Cc: Maarten Lankhorst Acked-by: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240801154118.2547543-3-matthew.brost@intel.com --- drivers/gpu/drm/drm_print.c | 13 +++++---- include/drm/drm_print.h | 54 ++++++++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c index cf24dfdeb6b2..0081190201a7 100644 --- a/drivers/gpu/drm/drm_print.c +++ b/drivers/gpu/drm/drm_print.c @@ -100,8 +100,9 @@ void __drm_puts_coredump(struct drm_printer *p, const char *str) copy = iterator->remain; /* Copy out the bit of the string that we need */ - memcpy(iterator->data, - str + (iterator->start - iterator->offset), copy); + if (iterator->data) + memcpy(iterator->data, + str + (iterator->start - iterator->offset), copy); iterator->offset = iterator->start + copy; iterator->remain -= copy; @@ -110,7 +111,8 @@ void __drm_puts_coredump(struct drm_printer *p, const char *str) len = min_t(ssize_t, strlen(str), iterator->remain); - memcpy(iterator->data + pos, str, len); + if (iterator->data) + memcpy(iterator->data + pos, str, len); iterator->offset += len; iterator->remain -= len; @@ -140,8 +142,9 @@ void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf) if ((iterator->offset >= iterator->start) && (len < iterator->remain)) { ssize_t pos = iterator->offset - iterator->start; - snprintf(((char *) iterator->data) + pos, - iterator->remain, "%pV", vaf); + if (iterator->data) + snprintf(((char *) iterator->data) + pos, + iterator->remain, "%pV", vaf); iterator->offset += len; iterator->remain -= len; diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 5d9dff5149c9..d2676831d765 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -221,7 +221,8 @@ drm_vprintf(struct drm_printer *p, const char *fmt, va_list *va) /** * struct drm_print_iterator - local struct used with drm_printer_coredump - * @data: Pointer to the devcoredump output buffer + * @data: Pointer to the devcoredump output buffer, can be NULL if using + * drm_printer_coredump to determine size of devcoredump * @start: The offset within the buffer to start writing * @remain: The number of bytes to write for this iteration */ @@ -266,6 +267,57 @@ struct drm_print_iterator { * coredump_read, ...) * } * + * The above example has a time complexity of O(N^2), where N is the size of the + * devcoredump. This is acceptable for small devcoredumps but scales poorly for + * larger ones. + * + * Another use case for drm_coredump_printer is to capture the devcoredump into + * a saved buffer before the dev_coredump() callback. This involves two passes: + * one to determine the size of the devcoredump and another to print it to a + * buffer. Then, in dev_coredump(), copy from the saved buffer into the + * devcoredump read buffer. + * + * For example:: + * + * char *devcoredump_saved_buffer; + * + * ssize_t __coredump_print(char *buffer, ssize_t count, ...) + * { + * struct drm_print_iterator iter; + * struct drm_printer p; + * + * iter.data = buffer; + * iter.start = 0; + * iter.remain = count; + * + * p = drm_coredump_printer(&iter); + * + * drm_printf(p, "foo=%d\n", foo); + * ... + * return count - iter.remain; + * } + * + * void coredump_print(...) + * { + * ssize_t count; + * + * count = __coredump_print(NULL, INT_MAX, ...); + * devcoredump_saved_buffer = kvmalloc(count, GFP_KERNEL); + * __coredump_print(devcoredump_saved_buffer, count, ...); + * } + * + * void coredump_read(char *buffer, loff_t offset, size_t count, + * void *data, size_t datalen) + * { + * ... + * memcpy(buffer, devcoredump_saved_buffer + offset, count); + * ... + * } + * + * The above example has a time complexity of O(N*2), where N is the size of the + * devcoredump. This scales better than the previous example for larger + * devcoredumps. + * * RETURNS: * The &drm_printer object */ From 4f04d07c0a94b0b30db839e44f1b8364a1b508ac Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 1 Aug 2024 08:41:18 -0700 Subject: [PATCH 007/108] drm/xe: Faster devcoredump The current algorithm to read out devcoredump is O(N*N) where N is the size of coredump due to usage of the drm_coredump_printer in xe_devcoredump_read. Switch to a O(N) algorithm which prints the devcoredump into a readable format in snapshot work and update xe_devcoredump_read to memcpy from the readable format directly. v2: - Fix double free on devcoredump removal (Testing) - Set read_data_size after snap work flush - Adjust remaining in iterator upon realloc (Testing) - Set read_data upon realloc (Testing) v3: - Kernel doc v4: - Two pass algorithm to determine size (Maarten) v5: - Use scope for reading variables (Johnathan) Reported-by: Paulo Zanoni Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2408 Cc: Rodrigo Vivi Cc: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240801154118.2547543-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 111 ++++++++++++++++------ drivers/gpu/drm/xe/xe_devcoredump_types.h | 8 ++ 2 files changed, 88 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index d8d8ca2c19d3..bdb76e834e4c 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -66,22 +66,9 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) return &q->gt->uc.guc; } -static void xe_devcoredump_deferred_snap_work(struct work_struct *work) +static ssize_t __xe_devcoredump_read(char *buffer, size_t count, + struct xe_devcoredump *coredump) { - struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); - - /* keep going if fw fails as we still want to save the memory and SW data */ - if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) - xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); - xe_vm_snapshot_capture_delayed(ss->vm); - xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); - xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); -} - -static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, - size_t count, void *data, size_t datalen) -{ - struct xe_devcoredump *coredump = data; struct xe_device *xe; struct xe_devcoredump_snapshot *ss; struct drm_printer p; @@ -89,18 +76,11 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, struct timespec64 ts; int i; - if (!coredump) - return -ENODEV; - xe = coredump_to_xe(coredump); ss = &coredump->snapshot; - /* Ensure delayed work is captured before continuing */ - flush_work(&ss->work); - iter.data = buffer; - iter.offset = 0; - iter.start = offset; + iter.start = 0; iter.remain = count; p = drm_coredump_printer(&iter); @@ -134,10 +114,83 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, return count - iter.remain; } +static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) +{ + int i; + + xe_guc_ct_snapshot_free(ss->ct); + ss->ct = NULL; + + xe_guc_exec_queue_snapshot_free(ss->ge); + ss->ge = NULL; + + xe_sched_job_snapshot_free(ss->job); + ss->job = NULL; + + for (i = 0; i < XE_NUM_HW_ENGINES; i++) + if (ss->hwe[i]) { + xe_hw_engine_snapshot_free(ss->hwe[i]); + ss->hwe[i] = NULL; + } + + xe_vm_snapshot_free(ss->vm); + ss->vm = NULL; +} + +static void xe_devcoredump_deferred_snap_work(struct work_struct *work) +{ + struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); + struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); + + /* keep going if fw fails as we still want to save the memory and SW data */ + if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); + xe_vm_snapshot_capture_delayed(ss->vm); + xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); + xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + + /* Calculate devcoredump size */ + ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); + + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); + if (!ss->read.buffer) + return; + + __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); + xe_devcoredump_snapshot_free(ss); +} + +static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, + size_t count, void *data, size_t datalen) +{ + struct xe_devcoredump *coredump = data; + struct xe_devcoredump_snapshot *ss; + ssize_t byte_copied; + + if (!coredump) + return -ENODEV; + + ss = &coredump->snapshot; + + /* Ensure delayed work is captured before continuing */ + flush_work(&ss->work); + + if (!ss->read.buffer) + return -ENODEV; + + if (offset >= ss->read.size) + return 0; + + byte_copied = count < ss->read.size - offset ? count : + ss->read.size - offset; + memcpy(buffer, ss->read.buffer + offset, byte_copied); + + return byte_copied; +} + static void xe_devcoredump_free(void *data) { struct xe_devcoredump *coredump = data; - int i; /* Our device is gone. Nothing to do... */ if (!data || !coredump_to_xe(coredump)) @@ -145,13 +198,8 @@ static void xe_devcoredump_free(void *data) cancel_work_sync(&coredump->snapshot.work); - xe_guc_ct_snapshot_free(coredump->snapshot.ct); - xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge); - xe_sched_job_snapshot_free(coredump->snapshot.job); - for (i = 0; i < XE_NUM_HW_ENGINES; i++) - if (coredump->snapshot.hwe[i]) - xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]); - xe_vm_snapshot_free(coredump->snapshot.vm); + xe_devcoredump_snapshot_free(&coredump->snapshot); + kvfree(coredump->snapshot.read.buffer); /* To prevent stale data on next snapshot, clear everything */ memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); @@ -260,4 +308,5 @@ int xe_devcoredump_init(struct xe_device *xe) { return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm); } + #endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 923cdf72a816..440d05d77a5a 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -46,6 +46,14 @@ struct xe_devcoredump_snapshot { struct xe_sched_job_snapshot *job; /** @vm: Snapshot of VM state */ struct xe_vm_snapshot *vm; + + /** @read: devcoredump in human readable format */ + struct { + /** @read.size: size of devcoredump in human readable format */ + ssize_t size; + /** @read.buffer: buffer of devcoredump in human readable format */ + char *buffer; + } read; }; /** From 2009e808bc3e0df6d4d83e2271bc25ae63a4ac05 Mon Sep 17 00:00:00 2001 From: Akshata Jahagirdar Date: Fri, 2 Aug 2024 07:12:03 -0700 Subject: [PATCH 008/108] drm/xe/xe2: Introduce performance changes Add Compression Performance Improvement Changes in Xe2 v2: Rebase v3: Rebase, updated as per latest changes on bspec, Removed unnecessary default actions (Matt) formatting nits (Tejas) v4: Formatting nits, removed default set action for bit 14 (Matt) Bspec: 72161 Signed-off-by: Akshata Jahagirdar Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/c2dd753fdc55df6a6432026f2df9c2684a0d25c1.1722607628.git.akshata.jahagirdar@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 +++ drivers/gpu/drm/xe/xe_tuning.c | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 3b87f95f9ecf..c50643ab4c84 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -367,6 +367,9 @@ #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) +#define L3SQCREG2 XE_REG_MCR(0xb104) +#define COMPMEMRD256BOVRFETCHEN REG_BIT(20) + #define L3SQCREG3 XE_REG_MCR(0xb108) #define COMPPWOVERFETCHEN REG_BIT(28) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 77d4eec0118d..3817b7743b0c 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -45,6 +45,11 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, + { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(L3SQCREG2, + COMPMEMRD256BOVRFETCHEN)) + }, {} }; From be1dec570b6f5a29ce9c99334c52bea94c28914b Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Sun, 4 Aug 2024 23:20:57 -0700 Subject: [PATCH 009/108] drm/xe/observation: Drop empty sysctl table entry An empty sysctl table entry was inadvertently left behind for observation sysctl. The breaks on 6.11 with the following errors: [ 219.654850] sysctl table check failed: dev/xe/(null) procname is null [ 219.654862] sysctl table check failed: dev/xe/(null) No proc_handler Drop the empty entry. Fixes: 8169b2097d88 ("drm/xe/uapi: Rename xe perf layer as xe observation layer") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2419 Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20240805062057.3547560-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_observation.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index fcb584b42a7d..a78c92a44ec2 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -66,7 +66,6 @@ static struct ctl_table observation_ctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - {} }; /** From 8d5309b7f67571bc0c95d430f4722a99d38f3b79 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 5 Aug 2024 13:02:33 -0700 Subject: [PATCH 010/108] drm/xe: Only check last fence on user binds We only set the last fence on user binds, so no need to check last fence kernel issued binds. Will avoid blowing up last fence lockdep asserts. Cc: Francois Dugast Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240805200233.3050325-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_pt.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 97a6a0b0b8ba..579ed31b46db 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1149,10 +1149,12 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job, return err; } - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); + if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) { + if (job) + err = xe_sched_job_last_fence_add_dep(job, vm); + else + err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); + } for (i = 0; job && !err && i < vops->num_syncs; i++) err = xe_sync_entry_add_deps(&vops->syncs[i], job); From ecabb5e6ce54711c28706fc794d77adb3ecd0605 Mon Sep 17 00:00:00 2001 From: Shekhar Chauhan Date: Mon, 5 Aug 2024 11:07:10 +0530 Subject: [PATCH 011/108] drm/xe/xe2: Add performance turning changes Update performance tuning according to the hardware spec. Bspec: 72161 Signed-off-by: Shekhar Chauhan Reviewed-by: Sai Teja Pottumuttu Reviewed-by: Akshata Jahagirdar Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240805053710.877119-1-shekhar.chauhan@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 4 ++++ drivers/gpu/drm/xe/xe_tuning.c | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index c50643ab4c84..2c8c4d4218db 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -80,6 +80,9 @@ #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) +#define STATELESS_COMPRESSION_CTRL XE_REG(0x4148) +#define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0) + #define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) #define CG_DIS_CNTLBUS REG_BIT(6) @@ -193,6 +196,7 @@ #define GSCPSMI_BASE XE_REG(0x880c) #define CCCHKNREG1 XE_REG_MCR(0x8828) +#define L3CMPCTRL REG_BIT(23) #define ENCOMPPERFFIX REG_BIT(18) /* Fuse readout registers for GT */ diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 3817b7743b0c..faa1bf42e50e 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -39,7 +39,8 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { }, { XE_RTP_NAME("Tuning: Compression Overfetch"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), - XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)), + XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), + SET(CCCHKNREG1, L3CMPCTRL)) }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), @@ -50,6 +51,11 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_ACTIONS(SET(L3SQCREG2, COMPMEMRD256BOVRFETCHEN)) }, + { XE_RTP_NAME("Tuning: Stateless compression control"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, + REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) + }, {} }; From 4eb0aab6e4434ada240286d934651dfdb2e08301 Mon Sep 17 00:00:00 2001 From: Julia Filipchuk Date: Fri, 2 Aug 2024 15:21:27 -0700 Subject: [PATCH 012/108] drm/xe/guc: Bump minimum required GuC version to v70.29.2 The VF API version for this release is 1.13.4. Bumping the minimum required GuC version just before force-probe removal allows us to set a baseline for what API features are expected to be available. I.e., at this point there is no need for any version checking in the code before using a feature. Of course, if/when the API is extended in future GuC releases, those new features will need API version checks in the code. Bump the recommended GuC versions to match. Also add numerical comparison helpers to simplify the version number checks. v2: Reword commit message and make comparison helpers GuC specific - review feedback from Daniele, done by JohnH Signed-off-by: Julia Filipchuk Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20240802222129.3976212-3-John.C.Harrison@Intel.com --- drivers/gpu/drm/xe/xe_guc.h | 10 ++++++++++ drivers/gpu/drm/xe/xe_uc_fw.c | 27 ++++++++++++++------------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index e0bbf98f849d..c3e6b51f7a09 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -11,6 +11,16 @@ #include "xe_hw_engine_types.h" #include "xe_macros.h" +/* + * GuC version number components are defined to be only 8-bit size, + * so converting to a 32bit 8.8.8 integer allows simple (and safe) + * numerical comparisons. + */ +#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) +#define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) +#define GUC_SUBMIT_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) +#define GUC_FIRMWARE_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) + struct drm_printer; void xe_guc_comm_init_early(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 5b70d23724c4..6bd1962bce36 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -15,6 +15,7 @@ #include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_guc.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_module.h" @@ -105,15 +106,15 @@ struct fw_blobs_by_type { }; #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 19, 2)) \ - fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 19, 2)) \ - fw_def(DG2, major_ver(i915, guc, dg2, 70, 19, 2)) \ - fw_def(DG1, major_ver(i915, guc, dg1, 70, 19, 2)) \ - fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 19, 2)) \ - fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) + fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \ + fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \ + fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \ + fw_def(DG1, major_ver(i915, guc, dg1, 70, 29, 2)) \ + fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 29, 2)) \ + fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \ @@ -309,10 +310,10 @@ static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC); - /* We don't support GuC releases older than 70.19 */ - if (release->major < 70 || (release->major == 70 && release->minor < 19)) { - xe_gt_err(gt, "Unsupported GuC v%u.%u! v70.19 or newer is required\n", - release->major, release->minor); + /* We don't support GuC releases older than 70.29.2 */ + if (MAKE_GUC_VER_STRUCT(*release) < MAKE_GUC_VER(70, 29, 2)) { + xe_gt_err(gt, "Unsupported GuC v%u.%u.%u! v70.29.2 or newer is required\n", + release->major, release->minor, release->patch); return -EINVAL; } From 9cc033e07d025dee1fac178c20ba96c56f8d9a91 Mon Sep 17 00:00:00 2001 From: Julia Filipchuk Date: Fri, 2 Aug 2024 15:21:28 -0700 Subject: [PATCH 013/108] drm/xe/guc: Define GuC version v70.29.2 for BMG UAPI version 1.13.4 Signed-off-by: Julia Filipchuk Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20240802222129.3976212-4-John.C.Harrison@Intel.com --- drivers/gpu/drm/xe/xe_uc_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 6bd1962bce36..4bb2a4a80ddc 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -106,6 +106,7 @@ struct fw_blobs_by_type { }; #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ + fw_def(BATTLEMAGE, major_ver(xe, guc, bmg, 70, 29, 2)) \ fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \ fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \ fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \ From dae5d79a3bcc3101e1e02a1c2999ca74d1efde05 Mon Sep 17 00:00:00 2001 From: Dominik Grzegorzek Date: Tue, 6 Aug 2024 18:30:08 +0300 Subject: [PATCH 014/108] drm/xe: Export xe_hw_engine's mmio accessors Export hw engine's mmio accessors. This is in preparation to use these from eudebug code. v2: s/hw_engine_mmio/xe_hw_engine_mmio (Matthew) v3: kernel doc (Matthew) Cc: Matthew Brost Signed-off-by: Dominik Grzegorzek Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240806153009.1081382-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/xe/xe_hw_engine.c | 88 +++++++++++++++++++------------ drivers/gpu/drm/xe/xe_hw_engine.h | 3 ++ 2 files changed, 57 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 403eb1d2d20a..c7c44de7c30f 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -277,8 +277,18 @@ static void hw_engine_fini(struct drm_device *drm, void *arg) hwe->gt = NULL; } -static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, - u32 val) +/** + * xe_hw_engine_mmio_write32() - Write engine register + * @hwe: engine + * @reg: register to write into + * @val: desired 32-bit value to write + * + * This function will write val into an engine specific register. + * Forcewake must be held by the caller. + * + */ +void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, + struct xe_reg reg, u32 val) { xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); @@ -288,7 +298,17 @@ static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, xe_mmio_write32(hwe->gt, reg, val); } -static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) +/** + * xe_hw_engine_mmio_read32() - Read engine register + * @hwe: engine + * @reg: register to read from + * + * This function will read from an engine specific register. + * Forcewake must be held by the caller. + * + * Return: value of the 32-bit register. + */ +u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) { xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); @@ -307,14 +327,14 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_mmio_write32(hwe->gt, RCU_MODE, _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); - hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); - hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), + xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); + xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), xe_bo_ggtt_addr(hwe->hwsp)); - hw_engine_mmio_write32(hwe, RING_MODE(0), + xe_hw_engine_mmio_write32(hwe, RING_MODE(0), _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); - hw_engine_mmio_write32(hwe, RING_MI_MODE(0), + xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), _MASKED_BIT_DISABLE(STOP_RING)); - hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); } static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, @@ -800,7 +820,7 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, unsigned int dss; u16 group, instance; - snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); + snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) return; @@ -896,53 +916,53 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) return snapshot; snapshot->reg.ring_execlist_status = - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); - val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); snapshot->reg.ring_execlist_status |= val << 32; snapshot->reg.ring_execlist_sq_contents = - hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); - val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); snapshot->reg.ring_execlist_sq_contents |= val << 32; - snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); - val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); + snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); snapshot->reg.ring_acthd |= val << 32; - snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); - val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); + snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); snapshot->reg.ring_bbaddr |= val << 32; snapshot->reg.ring_dma_fadd = - hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); - val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); + xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); snapshot->reg.ring_dma_fadd |= val << 32; - snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); - snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); - snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); + snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); + snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); + snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0)); if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) { - val = hw_engine_mmio_read32(hwe, RING_START_UDW(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0)); snapshot->reg.ring_start |= val << 32; } if (xe_gt_has_indirect_ring_state(hwe->gt)) { snapshot->reg.indirect_ring_state = - hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); + xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); } snapshot->reg.ring_head = - hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; + xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; snapshot->reg.ring_tail = - hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; - snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0)); + xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; + snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0)); snapshot->reg.ring_mi_mode = - hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); - snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0)); - snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0)); - snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0)); - snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); - snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); - snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0)); + snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0)); + snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0)); + snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0)); + snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0)); + snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0)); xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index d227ffe557eb..022819a4a8eb 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -78,4 +78,7 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class); u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe); enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe); +void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, u32 val); +u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg); + #endif From b62ef7e4021f74bb12445987efd8109ccca289c4 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 6 Aug 2024 18:30:09 +0300 Subject: [PATCH 015/108] drm/xe: Add kernel doc for xe_hw_engine_lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kerneldoc was missing from earlier commit where we exported xe_hw_engine_lookup. Add it. Cc: Dominik Grzegorzek Cc: Mika Kuoppala Cc: Matthew Brost Cc: Lucas De Marchi Cc: "Thomas Hellström" Cc: intel-xe@lists.freedesktop.org Signed-off-by: Mika Kuoppala Reviewed-by: Jonathan Cavitt Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240806153009.1081382-2-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/xe/xe_hw_engine.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index c7c44de7c30f..402dfa748e16 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -329,11 +329,11 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), - xe_bo_ggtt_addr(hwe->hwsp)); + xe_bo_ggtt_addr(hwe->hwsp)); xe_hw_engine_mmio_write32(hwe, RING_MODE(0), - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), - _MASKED_BIT_DISABLE(STOP_RING)); + _MASKED_BIT_DISABLE(STOP_RING)); xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); } @@ -1173,6 +1173,16 @@ static const enum xe_engine_class user_to_xe_engine_class[] = { [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, }; +/** + * xe_hw_engine_lookup() - Lookup hardware engine for class:instance + * @xe: xe device + * @eci: engine class and instance + * + * This function will find a hardware engine for given engine + * class and instance. + * + * Return: If found xe_hw_engine pointer, NULL otherwise. + */ struct xe_hw_engine * xe_hw_engine_lookup(struct xe_device *xe, struct drm_xe_engine_class_instance eci) From e102b5ed6e283a144793cab8fcd95f61d0ddbadb Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 6 Aug 2024 13:07:22 +0200 Subject: [PATCH 016/108] drm/xe: Fix access_ok check in user_fence_create Check size of the data not size of the pointer. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407300421.IBkAja96-lkp@intel.com/ Fixes: 0fde907da2d5 ("drm/xe: Validate user fence during creation") Cc: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Auld Reviewed-by: Tejas Upadhyay Reviewed-by: Apoorva Singh Link: https://patchwork.freedesktop.org/patch/msgid/20240806110722.28661-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 3aa6270e5dd7..ca826aeb41ea 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -55,7 +55,7 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, struct xe_user_fence *ufence; u64 __user *ptr = u64_to_user_ptr(addr); - if (!access_ok(ptr, sizeof(ptr))) + if (!access_ok(ptr, sizeof(*ptr))) return ERR_PTR(-EFAULT); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); From 636cdf6fbddff4780ff4452a82afb099292b88b1 Mon Sep 17 00:00:00 2001 From: Julia Filipchuk Date: Mon, 5 Aug 2024 13:54:35 -0700 Subject: [PATCH 017/108] drm/xe/guc: Enable w/a 14022293748 and 22019794406 Enable workarounds for HW bug where render engine reset fails. Given that we're bumping the minimum required GuC version to 70.29, we're guaranteed to always have support for this KLV in the GuC. v2: Enable KLV correctly for either workaround (Lucas) v4: Add check for minimum supported GuC firmware version. Enable w/a for hw version 20.01 too. (Daniele) v5 (Daniele): remove now unneeded fw type and version checks (JohnH) Signed-off-by: Julia Filipchuk Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: John Harrison Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240805205435.921921-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 1 + drivers/gpu/drm/xe/xe_guc_ads.c | 6 ++++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 8f9f60b28306..6b30743a2f6c 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -351,6 +351,7 @@ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005, GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 1c60b685dbc6..d1902a8581ca 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -24,6 +24,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_uc_fw.h" #include "xe_wa.h" /* Slack of a few additional entries per engine */ @@ -367,6 +368,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads) 0xC40, &offset, &remain); + if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406)) + guc_waklv_enable_simple(ads, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, + &offset, &remain); + size = guc_ads_waklv_size(ads) - remain; if (!size) return; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 540d38603f32..5cf27ff27ce6 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -27,6 +27,10 @@ 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) +14022293748 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) +22019794406 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) 22019338487_display PLATFORM(LUNARLAKE) From e422c0bfd9e47e399e86bcc483f49d8b54064fc2 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 7 Aug 2024 16:53:32 -0700 Subject: [PATCH 018/108] drm/xe: fix WA 14018094691 This WA is applied while initializing the media GT, but it a primary GT WA (because it modifies a register on the primary GT), so the XE_WA macro is returning false even when the WA should be applied. Fix this by using the primary GT in the macro. Note that this WA only applies to PXP and we don't yet support that in Xe, so there are no negative effects to this bug, which is why we didn't see any errors in testing. v2: use the primary GT in the macro instead of marking the WA as platform-wide (Lucas, Matt). Signed-off-by: Daniele Ceraolo Spurio Cc: Matt Roper Cc: Lucas De Marchi Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240807235333.1370915-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_gsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index f8239a13fa2b..77ce44e845c5 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -260,7 +260,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) struct xe_tile *tile = gt_to_tile(gt); int ret; - if (XE_WA(gt, 14018094691)) { + if (XE_WA(tile->primary_gt, 14018094691)) { ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* @@ -278,7 +278,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); - if (XE_WA(gt, 14018094691)) + if (XE_WA(tile->primary_gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); if (ret) From 5bdacb0907c1f531995b6ba47b832ac3a0182ae9 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 6 Aug 2024 20:05:16 +0200 Subject: [PATCH 019/108] drm/xe/pf: Fix VF config validation on multi-GT platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When validating VF config on the media GT, we may wrongly report that VF is already partially configured on it, as we consider GGTT and LMEM provisioning done on the primary GT (since both GGTT and LMEM are tile-level resources, not a GT-level). This will cause skipping a VF auto-provisioning on the media-GT and in result will block a VF from successfully initialize that GT. Fix that by considering GGTT and LMEM configurations only when checking if a VF provisioning is complete, and omit GGTT and LMEM when reporting empty/partial provisioning. Fixes: 234670cea9a2 ("drm/xe/pf: Skip fair VFs provisioning if already provisioned") Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240806180516.618-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 52c7277d243d..227527785afd 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1928,6 +1928,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool is_primary = !xe_gt_is_media_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -1936,13 +1937,17 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_dbs = pf_get_vf_config_dbs(gt, vfid); /* note that GuC doorbells are optional */ - valid_any = valid_ggtt || valid_ctxs || valid_dbs; - valid_all = valid_ggtt && valid_ctxs; + valid_any = valid_ctxs || valid_dbs; + valid_all = valid_ctxs; + + /* and GGTT/LMEM is configured on primary GT only */ + valid_all = valid_all && valid_ggtt; + valid_any = valid_any || (valid_ggtt && is_primary); if (IS_DGFX(xe)) { bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); - valid_any = valid_any || valid_lmem; + valid_any = valid_any || (valid_lmem && is_primary); valid_all = valid_all && valid_lmem; } From 549dd786b61cd3db903f5d94d07fc5a89ccdbeb9 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Jul 2024 08:28:31 -0700 Subject: [PATCH 020/108] drm/xe: Move VM dma-resv lock from xe_exec_queue_create to __xe_exec_queue_init The critical section which requires the VM dma-resv is the call xe_lrc_create in __xe_exec_queue_init. Move this lock to __xe_exec_queue_init holding it just around xe_lrc_create. Not only is good practice, this also fixes a locking double of the VM dma-resv in the error paths of __xe_exec_queue_init as xe_lrc_put tries to acquire this too resulting in a deadlock. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240724152831.1848325-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 956dc15b432a..971e1234b8ea 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -105,22 +105,35 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, static int __xe_exec_queue_init(struct xe_exec_queue *q) { + struct xe_vm *vm = q->vm; int i, err; + if (vm) { + err = xe_vm_lock(vm, true); + if (err) + return err; + } + for (i = 0; i < q->width; ++i) { q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); - goto err_lrc; + goto err_unlock; } } + if (vm) + xe_vm_unlock(vm); + err = q->ops->init(q); if (err) goto err_lrc; return 0; +err_unlock: + if (vm) + xe_vm_unlock(vm); err_lrc: for (i = i - 1; i >= 0; --i) xe_lrc_put(q->lrc[i]); @@ -140,15 +153,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (IS_ERR(q)) return q; - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - goto err_post_alloc; - } - err = __xe_exec_queue_init(q); - if (vm) - xe_vm_unlock(vm); if (err) goto err_post_alloc; From a86ee96ce819800a399e0260c5ffad793c9c6ac3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 12:19:25 -0700 Subject: [PATCH 021/108] drm/xe: Add xe_sched_msg_lock/unlock helper Will help callers to own locking when adding messages to scheduler. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240809191929.3138956-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gpu_scheduler.c | 12 ++++++------ drivers/gpu/drm/xe/xe_gpu_scheduler.h | 10 ++++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index e4ad1d6ce1d5..eea71c67cf2a 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -15,11 +15,11 @@ static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched) { struct xe_sched_msg *msg; - spin_lock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); if (msg) xe_sched_process_msg_queue(sched); - spin_unlock(&sched->base.job_list_lock); + xe_sched_msg_unlock(sched); } static struct xe_sched_msg * @@ -27,12 +27,12 @@ xe_sched_get_msg(struct xe_gpu_scheduler *sched) { struct xe_sched_msg *msg; - spin_lock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); if (msg) list_del(&msg->link); - spin_unlock(&sched->base.job_list_lock); + xe_sched_msg_unlock(sched); return msg; } @@ -93,9 +93,9 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg) { - spin_lock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); list_add_tail(&msg->link, &sched->msgs); - spin_unlock(&sched->base.job_list_lock); + xe_sched_msg_unlock(sched); xe_sched_process_msg_queue(sched); } diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 10c6bb9c9386..a54038fb3094 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -25,6 +25,16 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); +static inline void xe_sched_msg_lock(struct xe_gpu_scheduler *sched) +{ + spin_lock(&sched->base.job_list_lock); +} + +static inline void xe_sched_msg_unlock(struct xe_gpu_scheduler *sched) +{ + spin_unlock(&sched->base.job_list_lock); +} + static inline void xe_sched_stop(struct xe_gpu_scheduler *sched) { drm_sched_stop(&sched->base, NULL); From fc33077765e9104f84b49d9b9e0702a41d5269d6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 12:19:26 -0700 Subject: [PATCH 022/108] drm/xe: Reinit msg link when processing a message Will help to avoid adding a static message twice. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240809191929.3138956-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gpu_scheduler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index eea71c67cf2a..1c703e8423de 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -31,7 +31,7 @@ xe_sched_get_msg(struct xe_gpu_scheduler *sched) msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); if (msg) - list_del(&msg->link); + list_del_init(&msg->link); xe_sched_msg_unlock(sched); return msg; From 17d6abcbf6249b4ef22f41b255de2be2691ec32e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 12:19:27 -0700 Subject: [PATCH 023/108] drm/xe: Add xe_sched_add_msg_locked helper Will help by allowing callers to own message locking. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240809191929.3138956-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gpu_scheduler.c | 9 ++++++++- drivers/gpu/drm/xe/xe_gpu_scheduler.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index 1c703e8423de..c518d1d16d82 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -94,8 +94,15 @@ void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg) { xe_sched_msg_lock(sched); - list_add_tail(&msg->link, &sched->msgs); + xe_sched_add_msg_locked(sched, msg); xe_sched_msg_unlock(sched); +} +void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg) +{ + lockdep_assert_held(&sched->base.job_list_lock); + + list_add_tail(&msg->link, &sched->msgs); xe_sched_process_msg_queue(sched); } diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index a54038fb3094..cee9c6809fc0 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -24,6 +24,8 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); +void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg); static inline void xe_sched_msg_lock(struct xe_gpu_scheduler *sched) { From 885c31382509d13fd70f6a9c42637eb72056a6ce Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 12:19:28 -0700 Subject: [PATCH 024/108] drm/xe: Only enable scheduling upon resume if needed No need to enable scheduling in already enabled. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240809191929.3138956-5-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 460808507947..a9af33ff8aa7 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1374,9 +1374,11 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) struct xe_exec_queue *q = msg->private_data; if (guc_exec_queue_allowed_to_change_state(q)) { - q->guc->resume_time = RESUME_PENDING; clear_exec_queue_suspended(q); - enable_scheduling(q); + if (!exec_queue_enabled(q)) { + q->guc->resume_time = RESUME_PENDING; + enable_scheduling(q); + } } else { clear_exec_queue_suspended(q); } From d79fdaef2b55deea0df3fc6af4d4ac60e81a527c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 12:19:29 -0700 Subject: [PATCH 025/108] drm/xe: Allow suspend / resume to be safely called multiple times Switching modes between LR and dma-fence can result in multiple calls to suspend / resume. Make these calls safe while still enforcing call order. Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240809191929.3138956-6-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 40 ++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index a9af33ff8aa7..da63be550d4d 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1388,6 +1388,8 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) #define SET_SCHED_PROPS 2 #define SUSPEND 3 #define RESUME 4 +#define OPCODE_MASK 0xf +#define MSG_LOCKED BIT(8) static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) { @@ -1432,7 +1434,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) struct xe_device *xe = guc_to_xe(guc); struct xe_guc_exec_queue *ge; long timeout; - int err; + int err, i; xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); @@ -1444,6 +1446,9 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) ge->q = q; init_waitqueue_head(&ge->suspend_wait); + for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) + INIT_LIST_HEAD(&ge->static_msgs[i].link); + timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, @@ -1506,11 +1511,26 @@ static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); INIT_LIST_HEAD(&msg->link); - msg->opcode = opcode; + msg->opcode = opcode & OPCODE_MASK; msg->private_data = q; trace_xe_sched_msg_add(msg); - xe_sched_add_msg(&q->guc->sched, msg); + if (opcode & MSG_LOCKED) + xe_sched_add_msg_locked(&q->guc->sched, msg); + else + xe_sched_add_msg(&q->guc->sched, msg); +} + +static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, + struct xe_sched_msg *msg, + u32 opcode) +{ + if (!list_empty(&msg->link)) + return false; + + guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); + + return true; } #define STATIC_MSG_CLEANUP 0 @@ -1584,13 +1604,16 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, static int guc_exec_queue_suspend(struct xe_exec_queue *q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; - if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending) + if (exec_queue_killed_or_banned_or_wedged(q)) return -EINVAL; - q->guc->suspend_pending = true; - guc_exec_queue_add_msg(q, msg, SUSPEND); + xe_sched_msg_lock(sched); + if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) + q->guc->suspend_pending = true; + xe_sched_msg_unlock(sched); return 0; } @@ -1624,13 +1647,16 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) static void guc_exec_queue_resume(struct xe_exec_queue *q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); xe_assert(xe, !q->guc->suspend_pending); - guc_exec_queue_add_msg(q, msg, RESUME); + xe_sched_msg_lock(sched); + guc_exec_queue_try_add_msg(q, msg, RESUME); + xe_sched_msg_unlock(sched); } static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) From 08b5a4798713f4331317272752b27b1c4f6a246d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 8 Aug 2024 10:11:21 -0700 Subject: [PATCH 026/108] drm/xe: Allow to compile out debugfs Use a dummy xe_debugfs_register() if debugfs is not enabled and move all debugfs-related files under `ifeq ($(CONFIG_DEBUG_FS),y)` in the Makefile. This is similar to what was done for display in commit 439987f6f471 ("drm/xe: don't build debugfs files when CONFIG_DEBUG_FS=n"). This removes the following warning while loading xe with CONFIG_DEUBG_FS=n: xe 0000:03:00.0: [drm] Create GT directory failed Reviewed-by: Nirmoy Das Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240808171121.2484237-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Makefile | 16 +++++++++------- drivers/gpu/drm/xe/xe_debugfs.h | 4 ++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 1ff9602a52f6..b846b36a7bfd 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -28,7 +28,6 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ xe-y += xe_bb.o \ xe_bo.o \ xe_bo_evict.o \ - xe_debugfs.o \ xe_devcoredump.o \ xe_device.o \ xe_device_sysfs.o \ @@ -46,7 +45,6 @@ xe-y += xe_bb.o \ xe_gt.o \ xe_gt_ccs_mode.o \ xe_gt_clock.o \ - xe_gt_debugfs.o \ xe_gt_freq.o \ xe_gt_idle.o \ xe_gt_mcr.o \ @@ -59,7 +57,6 @@ xe-y += xe_bb.o \ xe_guc_ads.o \ xe_guc_ct.o \ xe_guc_db_mgr.o \ - xe_guc_debugfs.o \ xe_guc_hwconfig.o \ xe_guc_id_mgr.o \ xe_guc_klv_helpers.o \ @@ -71,7 +68,6 @@ xe-y += xe_bb.o \ xe_hw_engine_class_sysfs.o \ xe_hw_fence.o \ xe_huc.o \ - xe_huc_debugfs.o \ xe_irq.o \ xe_lrc.o \ xe_migrate.o \ @@ -107,7 +103,6 @@ xe-y += xe_bb.o \ xe_ttm_vram_mgr.o \ xe_tuning.o \ xe_uc.o \ - xe_uc_debugfs.o \ xe_uc_fw.o \ xe_vm.o \ xe_vram.o \ @@ -124,7 +119,6 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o # graphics virtualization (SR-IOV) support xe-y += \ xe_gt_sriov_vf.o \ - xe_gt_sriov_vf_debugfs.o \ xe_guc_relay.o \ xe_memirq.o \ xe_sriov.o @@ -133,7 +127,6 @@ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ xe_gt_sriov_pf_config.o \ xe_gt_sriov_pf_control.o \ - xe_gt_sriov_pf_debugfs.o \ xe_gt_sriov_pf_monitor.o \ xe_gt_sriov_pf_policy.o \ xe_gt_sriov_pf_service.o \ @@ -281,6 +274,15 @@ ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y) endif ifeq ($(CONFIG_DEBUG_FS),y) + xe-y += xe_debugfs.o \ + xe_gt_debugfs.o \ + xe_gt_sriov_vf_debugfs.o \ + xe_guc_debugfs.o \ + xe_huc_debugfs.o \ + xe_uc_debugfs.o + + xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o + xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_display_debugfs.o \ i915-display/intel_display_debugfs_params.o \ diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h index 715b8e2e0bd9..17f4c2f1b5e4 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.h +++ b/drivers/gpu/drm/xe/xe_debugfs.h @@ -8,6 +8,10 @@ struct xe_device; +#ifdef CONFIG_DEBUG_FS void xe_debugfs_register(struct xe_device *xe); +#else +static inline void xe_debugfs_register(struct xe_device *xe) { } +#endif #endif From acc4e41ec41fa28ae9fbdb1a2750525a7a242743 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Sat, 10 Aug 2024 21:15:18 +0200 Subject: [PATCH 027/108] drm/xe/gt: Add APIs for printing stats over debugfs Add skeleton APIs for recording and printing various stats over debugfs. This currently only added counter types stats which is backed by atomic_t and wrapped with CONFIG_DRM_XE_STATS so this can be disabled on production system. v4: Rebase and other minor fixes (Matt) v3: s/CONFIG_DRM_XE_STATS/CONFIG_DEBUG_FS(Lucas) v2: add missing docs Add boundary checks for stats id and other improvements (Michal) Fix build when CONFIG_DRM_XE_STATS is disabled(Matt) Cc: Lucas De Marchi Cc: Matthew Brost Cc: Michal Wajdeczko Cc: Sai Gowtham Ch Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240810191522.18616-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_gt_debugfs.c | 2 ++ drivers/gpu/drm/xe/xe_gt_stats.c | 48 ++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_stats.h | 28 +++++++++++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 9 ++++++ 5 files changed, 88 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_gt_stats.c create mode 100644 drivers/gpu/drm/xe/xe_gt_stats.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index b846b36a7bfd..e11392b5dd3d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -277,6 +277,7 @@ ifeq ($(CONFIG_DEBUG_FS),y) xe-y += xe_debugfs.o \ xe_gt_debugfs.o \ xe_gt_sriov_vf_debugfs.o \ + xe_gt_stats.o \ xe_guc_debugfs.o \ xe_huc_debugfs.o \ xe_uc_debugfs.o diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 5e7fd937917a..5125d76ccfac 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -17,6 +17,7 @@ #include "xe_gt_mcr.h" #include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_sriov_vf_debugfs.h" +#include "xe_gt_stats.h" #include "xe_gt_topology.h" #include "xe_hw_engine.h" #include "xe_lrc.h" @@ -286,6 +287,7 @@ static const struct drm_info_list debugfs_list[] = { {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, + {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info}, }; void xe_gt_debugfs_register(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c new file mode 100644 index 000000000000..a054cd52329c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include + +#include + +#include "xe_gt.h" +#include "xe_gt_stats.h" + +/** + * xe_gt_stats_incr - Increments the specified stats counter + * @gt: graphics tile + * @id: xe_gt_stats_id type id that needs to be incremented + * @incr: value to be incremented with + * + * Increments the specified stats counter. + */ +void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) +{ + if (id >= __XE_GT_STATS_NUM_IDS) + return; + + atomic_add(incr, >->stats.counters[id]); +} + +static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { +}; + +/** + * xe_gt_stats_print_info - Print the GT stats + * @gt: graphics tile + * @p: drm_printer where it will be printed out. + * + * This prints out all the available GT stats. + */ +int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) +{ + enum xe_gt_stats_id id; + + for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) + drm_printf(p, "%s: %d\n", stat_description[id], + atomic_read(>->stats.counters[id])); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h new file mode 100644 index 000000000000..1be71ce533db --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GT_STATS_H_ +#define _XE_GT_STATS_H_ + +struct xe_gt; +struct drm_printer; + +enum xe_gt_stats_id { + /* must be the last entry */ + __XE_GT_STATS_NUM_IDS, +}; + +#ifdef CONFIG_DEBUG_FS +int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); +#else +static inline void +xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, + int incr) +{ +} + +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 631928258d71..8e46c0969bc7 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -10,6 +10,7 @@ #include "xe_gt_idle_types.h" #include "xe_gt_sriov_pf_types.h" #include "xe_gt_sriov_vf_types.h" +#include "xe_gt_stats.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" #include "xe_oa.h" @@ -133,6 +134,14 @@ struct xe_gt { u8 has_indirect_ring_state:1; } info; +#if IS_ENABLED(CONFIG_DEBUG_FS) + /** @stats: GT stats */ + struct { + /** @stats.counters: counters for various GT stats */ + atomic_t counters[__XE_GT_STATS_NUM_IDS]; + } stats; +#endif + /** * @mmio: mmio info for GT. All GTs within a tile share the same * register space, but have their own copy of GSI registers at a From 39fa14e5bdd037f50df0af408da7251e400b41fd Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Sat, 10 Aug 2024 21:15:19 +0200 Subject: [PATCH 028/108] drm/xe: Add stats for tlb invalidation count Add stats for tlb invalidation count which can be viewed with per GT stat debugfs file. Example output: cat /sys/kernel/debug/dri/0/gt0/stats tlb_inval_count: 22 v2: fix #include order(Tejas) Cc: Lucas De Marchi Cc: Matthew Brost Cc: Michal Wajdeczko Cc: Sai Gowtham Ch Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240810191522.18616-2-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_gt_stats.c | 1 + drivers/gpu/drm/xe/xe_gt_stats.h | 1 + drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index a054cd52329c..c7364a5aef8f 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -27,6 +27,7 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) } static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { + "tlb_inval_count", }; /** diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h index 1be71ce533db..91d944f6c4e4 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.h +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -10,6 +10,7 @@ struct xe_gt; struct drm_printer; enum xe_gt_stats_id { + XE_GT_STATS_ID_TLB_INVAL, /* must be the last entry */ __XE_GT_STATS_NUM_IDS, }; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 87cb76a8718c..cca9cf536f76 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -12,6 +12,7 @@ #include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_gt_stats.h" #include "xe_mmio.h" #include "xe_pm.h" #include "xe_sriov.h" @@ -213,6 +214,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, gt->tlb_invalidation.seqno = 1; } mutex_unlock(&guc->ct.lock); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); return ret; } From 21ff3a16e92e2fa4f906a61d148aca1423c58298 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 12 Aug 2024 19:11:17 +0530 Subject: [PATCH 029/108] drm/xe/xe2hpg: Add Wa_14021821874 Wa_14021821874 applies to xe2_hpg V2(Himal): - Use space after define Cc: Matt Roper Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Tejas Upadhyay Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240812134117.813670-1-tejas.upadhyay@intel.com --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 2c8c4d4218db..5f33dee35f8a 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -451,6 +451,7 @@ #define DIS_FIX_EOT1_FLUSH REG_BIT(9) #define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) +#define STK_ID_RESTRICT REG_BIT(12) #define SLM_WMTP_RESTORE REG_BIT(11) #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 564e32e44e3b..28b7f95b6c2f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -542,6 +542,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("14021821874"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) + }, /* Xe2_LPM */ From b8cdc47adf059f60a39555eeba1db92c503a5061 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Sat, 10 Aug 2024 00:03:47 +0200 Subject: [PATCH 030/108] drm/xe/migrate: Parameterize ccs and bo data clear in xe_migrate_clear() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parameterize clearing ccs and bo data in xe_migrate_clear() which higher layers can utilize. This patch will be used later on when doing bo data clear for igfx as well. v2: Replace multiple params with flags in xe_migrate_clear (Matt B) v3: s/CLEAR_BO_DATA_FLAG_*/XE_MIGRATE_CLEAR_FLAG_* and move to xe_migrate.h. other nits(Matt B) Cc: Himal Prasad Ghimiray Cc: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Signed-off-by: Akshata Jahagirdar Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240809220347.25330-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/tests/xe_bo.c | 3 ++- drivers/gpu/drm/xe/tests/xe_migrate.c | 12 ++++++++---- drivers/gpu/drm/xe/xe_bo.c | 12 ++++++++++-- drivers/gpu/drm/xe/xe_migrate.c | 27 +++++++++++++++++++-------- drivers/gpu/drm/xe/xe_migrate.h | 7 ++++++- 5 files changed, 45 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 1768483da1b7..df9fd907edd4 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -36,7 +36,8 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, /* Optionally clear bo *and* CCS data in VRAM. */ if (clear) { - fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource); + fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (IS_ERR(fence)) { KUNIT_FAIL(test, "Failed to submit bo clear.\n"); return PTR_ERR(fence); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 4344a1724029..47ae9d0b8864 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -105,7 +105,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, } xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); - fence = xe_migrate_clear(m, remote, remote->ttm.resource); + fence = xe_migrate_clear(m, remote, remote->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : "Clearing remote small bo", test)) { retval = xe_map_rd(xe, &remote->vmap, 0, u64); @@ -279,7 +280,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) kunit_info(test, "Clearing small buffer object\n"); xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); expected = 0; - fence = xe_migrate_clear(m, tiny, tiny->ttm.resource); + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) goto out; @@ -300,7 +302,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) kunit_info(test, "Clearing big buffer object\n"); xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); expected = 0; - fence = xe_migrate_clear(m, big, big->ttm.resource); + fence = xe_migrate_clear(m, big, big->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) goto out; @@ -603,7 +606,8 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, kunit_info(test, "Clear vram buffer object\n"); expected = 0x0000000000000000; - fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource); + fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clear vram_bo", test)) return; dma_fence_put(fence); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3295bc92d7aa..56a089aa3916 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -793,8 +793,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } } } else { - if (move_lacks_source) - fence = xe_migrate_clear(migrate, bo, new_mem); + if (move_lacks_source) { + u32 flags = 0; + + if (mem_type_is_vram(new_mem->mem_type)) + flags |= XE_MIGRATE_CLEAR_FLAG_FULL; + else if (handle_system_ccs) + flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; + + fence = xe_migrate_clear(migrate, bo, new_mem, flags); + } else fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem, handle_system_ccs); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 6f24aaf58252..a2d0ce3c59bf 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1037,9 +1037,11 @@ static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, * @m: The migration context. * @bo: The buffer object @dst is currently bound to. * @dst: The dst TTM resource to be cleared. + * @clear_flags: flags to specify which data to clear: CCS, BO, or both. * - * Clear the contents of @dst to zero. On flat CCS devices, - * the CCS metadata is cleared to zero as well on VRAM destinations. + * Clear the contents of @dst to zero when XE_MIGRATE_CLEAR_FLAG_BO_DATA is set. + * On flat CCS devices, the CCS metadata is cleared to zero with XE_MIGRATE_CLEAR_FLAG_CCS_DATA. + * Set XE_MIGRATE_CLEAR_FLAG_FULL to clear bo as well as CCS metadata. * TODO: Eliminate the @bo argument. * * Return: Pointer to a dma_fence representing the last clear batch, or @@ -1048,18 +1050,27 @@ static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, */ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst) + struct ttm_resource *dst, + u32 clear_flags) { bool clear_vram = mem_type_is_vram(dst->mem_type); + bool clear_bo_data = XE_MIGRATE_CLEAR_FLAG_BO_DATA & clear_flags; + bool clear_ccs = XE_MIGRATE_CLEAR_FLAG_CCS_DATA & clear_flags; struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); - bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false; + bool clear_only_system_ccs = false; struct dma_fence *fence = NULL; u64 size = bo->size; struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; + if (WARN_ON(!clear_bo_data && !clear_ccs)) + return NULL; + + if (!clear_bo_data && clear_ccs && !IS_DGFX(xe)) + clear_only_system_ccs = true; + if (!clear_vram) xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); else @@ -1085,7 +1096,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, batch_size = 2 + pte_update_size(m, pte_flags, src, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, - clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0, + clear_bo_data ? emit_clear_cmd_len(gt) : 0, 0, avail_pts); if (xe_migrate_needs_ccs_emit(xe)) @@ -1107,13 +1118,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) xe_res_next(&src_it, clear_L0); else - emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs, + emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs, &src_it, clear_L0, dst); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - if (!clear_system_ccs) + if (clear_bo_data) emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram); if (xe_migrate_needs_ccs_emit(xe)) { @@ -1172,7 +1183,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, return ERR_PTR(err); } - if (clear_system_ccs) + if (clear_ccs) bo->ccs_cleared = true; return fence; diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 453e0ecf5034..7929cc2425e8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -102,9 +102,14 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *dst, bool copy_only_ccs); +#define XE_MIGRATE_CLEAR_FLAG_BO_DATA BIT(0) +#define XE_MIGRATE_CLEAR_FLAG_CCS_DATA BIT(1) +#define XE_MIGRATE_CLEAR_FLAG_FULL (XE_MIGRATE_CLEAR_FLAG_BO_DATA | \ + XE_MIGRATE_CLEAR_FLAG_CCS_DATA) struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst); + struct ttm_resource *dst, + u32 clear_flags); struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); From 50680d1698f4d4c9651822398805cb943b7c04aa Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 12 Aug 2024 13:30:19 +0300 Subject: [PATCH 031/108] drm/xe/tests: remove unused leftover xe_call_for_each_device() xe_call_for_each_device() has been unused since commit 57ecead343e7 ("drm/xe/tests: Convert xe_mocs live tests"). Remove it and the related dev_to_xe_device_fn() and struct kunit_test_data. Cc: Lucas De Marchi Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/fa3bb23d005313c9797f557e1211fde09fcb59cc.1723458544.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/tests/xe_pci.c | 52 -------------------------- drivers/gpu/drm/xe/tests/xe_pci_test.h | 1 - 2 files changed, 53 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 577ee7d14381..67404863087e 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -12,58 +12,6 @@ #include #include -struct kunit_test_data { - int ndevs; - xe_device_fn xe_fn; -}; - -static int dev_to_xe_device_fn(struct device *dev, void *__data) - -{ - struct drm_device *drm = dev_get_drvdata(dev); - struct kunit_test_data *data = __data; - int ret = 0; - int idx; - - data->ndevs++; - - if (drm_dev_enter(drm, &idx)) - ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev))); - drm_dev_exit(idx); - - return ret; -} - -/** - * xe_call_for_each_device - Iterate over all devices this driver binds to - * @xe_fn: Function to call for each device. - * - * This function iterated over all devices this driver binds to, and calls - * @xe_fn: for each one of them. If the called function returns anything else - * than 0, iteration is stopped and the return value is returned by this - * function. Across each function call, drm_dev_enter() / drm_dev_exit() is - * called for the corresponding drm device. - * - * Return: Number of devices iterated or - * the error code of a call to @xe_fn returning an error code. - */ -int xe_call_for_each_device(xe_device_fn xe_fn) -{ - int ret; - struct kunit_test_data data = { - .xe_fn = xe_fn, - .ndevs = 0, - }; - - ret = driver_for_each_device(&xe_pci_driver.driver, NULL, - &data, dev_to_xe_device_fn); - - if (!data.ndevs) - kunit_skip(current->kunit_test, "test runs only on hardware\n"); - - return ret ?: data.ndevs; -} - /** * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs * @xe_fn: Function to call for each device. diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index 3e2558bc3c90..ede46800aff1 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -19,7 +19,6 @@ typedef int (*xe_device_fn)(struct xe_device *); typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); typedef void (*xe_media_fn)(const struct xe_media_desc *); -int xe_call_for_each_device(xe_device_fn xe_fn); void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); void xe_call_for_each_media_ip(xe_media_fn xe_fn); From ec0796e6446352e9e4ecb1804bf470d1ca47c5f9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 12 Aug 2024 13:30:20 +0300 Subject: [PATCH 032/108] drm/xe: use pdev_to_xe_device() instead of pci_get_drvdata() directly We have a helper for converting pci device to xe device, use it. Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/1b87c2e56200e001ce3a5d2f4a93eb26b294df32.1723458544.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/xe_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3c4a3c91377a..bc575bbee42d 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -747,7 +747,7 @@ static void xe_pci_remove(struct pci_dev *pdev) { struct xe_device *xe; - xe = pci_get_drvdata(pdev); + xe = pdev_to_xe_device(pdev); if (!xe) /* driver load aborted, nothing to cleanup */ return; From d408d6f8cbbb5ad92b383f33d091f027f5740aea Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 12 Aug 2024 13:30:21 +0300 Subject: [PATCH 033/108] drm/xe: add kdev_to_xe_device() helper and use it There are enough users for kernel device to xe device conversion, add a helper for it. Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/38c80846e70c7e410850530426384e17cff9d031.1723458544.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/xe_device.h | 5 +++++ drivers/gpu/drm/xe/xe_gsc_proxy.c | 9 ++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index db6cc8d0d6b8..2c96f1b2aafd 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -15,6 +15,11 @@ static inline struct xe_device *to_xe_device(const struct drm_device *dev) return container_of(dev, struct xe_device, drm); } +static inline struct xe_device *kdev_to_xe_device(struct device *kdev) +{ + return dev_get_drvdata(kdev); +} + static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) { return pci_get_drvdata(pdev); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index aa812a2bc3ed..28e6a7a1d282 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -62,11 +62,6 @@ gsc_to_gt(struct xe_gsc *gsc) return container_of(gsc, struct xe_gt, uc.gsc); } -static inline struct xe_device *kdev_to_xe(struct device *kdev) -{ - return dev_get_drvdata(kdev); -} - bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); @@ -345,7 +340,7 @@ void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir) static int xe_gsc_proxy_component_bind(struct device *xe_kdev, struct device *mei_kdev, void *data) { - struct xe_device *xe = kdev_to_xe(xe_kdev); + struct xe_device *xe = kdev_to_xe_device(xe_kdev); struct xe_gt *gt = xe->tiles[0].media_gt; struct xe_gsc *gsc = >->uc.gsc; @@ -360,7 +355,7 @@ static int xe_gsc_proxy_component_bind(struct device *xe_kdev, static void xe_gsc_proxy_component_unbind(struct device *xe_kdev, struct device *mei_kdev, void *data) { - struct xe_device *xe = kdev_to_xe(xe_kdev); + struct xe_device *xe = kdev_to_xe_device(xe_kdev); struct xe_gt *gt = xe->tiles[0].media_gt; struct xe_gsc *gsc = >->uc.gsc; From 1d734a3e5d6bb266f52eaf2b1400c5d3f1875a54 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 12 Aug 2024 11:10:43 -0700 Subject: [PATCH 034/108] drm/xe: Name and document Wa_14019789679 Early in the development of Xe we identified an issue with SVG state handling on DG2 and MTL (and later on Xe2 as well). In commit 72ac304769dd ("drm/xe: Emit SVG state on RCS during driver load on DG2 and MTL") and commit fb24b858a20d ("drm/xe/xe2: Update SVG state handling") we implemented our own workaround to prevent SVG state from leaking from context A to context B in cases where context B never issues a specific state setting. The hardware teams have now created official workaround Wa_14019789679 to cover this issue. The workaround description only requires emitting 3DSTATE_MESH_CONTROL, since they believe that's the only SVG instruction that would potentially remain unset by a context B, but still cause notable issues if unwanted values were inherited from context A. However since we already have a more extensive implementation that emits the entire SVG state and prevents _any_ SVG state from unintentionally leaking, we'll stick with our existing implementation just to be safe. Signed-off-by: Matt Roper Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240812181042.2013508-2-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/xe_lrc.c | 35 +++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 58121821f081..974a9cd8c379 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -5,6 +5,8 @@ #include "xe_lrc.h" +#include + #include #include "instructions/xe_mi_commands.h" @@ -24,6 +26,7 @@ #include "xe_memirq.h" #include "xe_sriov.h" #include "xe_vm.h" +#include "xe_wa.h" #define LRC_VALID BIT_ULL(0) #define LRC_PRIVILEGE BIT_ULL(8) @@ -1581,19 +1584,31 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b int state_table_size = 0; /* - * At the moment we only need to emit non-register state for the RCS - * engine. + * Wa_14019789679 + * + * If the driver doesn't explicitly emit the SVG instructions while + * setting up the default LRC, the context switch will write 0's + * (noops) into the LRC memory rather than the expected instruction + * headers. Application contexts start out as a copy of the default + * LRC, and if they also do not emit specific settings for some SVG + * state, then on context restore they'll unintentionally inherit + * whatever state setting the previous context had programmed into the + * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will + * prevent the hardware from resetting that state back to any specific + * value). + * + * The official workaround only requires emitting 3DSTATE_MESH_CONTROL + * since that's a specific state setting that can easily cause GPU + * hangs if unintentionally inherited. However to be safe we'll + * continue to emit all of the SVG state since it's best not to leak + * any of the state between contexts, even if that leakage is harmless. */ - if (q->hwe->class != XE_ENGINE_CLASS_RENDER) - return; - - switch (GRAPHICS_VERx100(xe)) { - case 1255: - case 1270 ... 2004: + if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { state_table = xe_hpg_svg_state; state_table_size = ARRAY_SIZE(xe_hpg_svg_state); - break; - default: + } + + if (!state_table) { xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); return; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 5cf27ff27ce6..920ca5060146 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -35,3 +35,5 @@ GRAPHICS_VERSION(2001) 22019338487_display PLATFORM(LUNARLAKE) 16023588340 GRAPHICS_VERSION(2001) +14019789679 GRAPHICS_VERSION(1255) + GRAPHICS_VERSION_RANGE(1270, 2004) From 4b498d19610c6acd36a8ddf622afdefe4ab093fe Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 13 Aug 2024 16:14:19 +0530 Subject: [PATCH 035/108] drm/xe: Remove unused xe parameter Remove the xe parameter from the pde_encode_pat_index and pte_encode_pat_index functions, as it is no longer used. Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Tejas Upadhyay Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240813104419.2958046-1-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_vm.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f225107bdd65..89d86b7cdb02 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1191,7 +1191,7 @@ static const struct drm_gpuvm_ops gpuvm_ops = { .vm_free = xe_vm_free, }; -static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) +static u64 pde_encode_pat_index(u16 pat_index) { u64 pte = 0; @@ -1204,8 +1204,7 @@ static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) return pte; } -static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index, - u32 pt_level) +static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) { u64 pte = 0; @@ -1246,12 +1245,11 @@ static u64 pte_encode_ps(u32 pt_level) static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, const u16 pat_index) { - struct xe_device *xe = xe_bo_device(bo); u64 pde; pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_pat_index(xe, pat_index); + pde |= pde_encode_pat_index(pat_index); return pde; } @@ -1259,12 +1257,11 @@ static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index, u32 pt_level) { - struct xe_device *xe = xe_bo_device(bo); u64 pte; pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) @@ -1276,14 +1273,12 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, u16 pat_index, u32 pt_level) { - struct xe_device *xe = xe_vma_vm(vma)->xe; - pte |= XE_PAGE_PRESENT; if (likely(!xe_vma_read_only(vma))) pte |= XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (unlikely(xe_vma_is_null(vma))) @@ -1303,7 +1298,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, pte = addr; pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (devmem) From 1eda95cba9df35f1fb0fb4b89511beb6603c614e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 13 Aug 2024 07:19:31 -0700 Subject: [PATCH 036/108] drm/xe: Rename enable_display module param The different approach used by xe regarding the initialization of display HW has been proved a great addition for early driver bring up: core xe can be tested without having all the bits sorted out on the display side. On the other hand, the approach exposed by i915-display is to *actively* disable the display by programming it if needed, i.e. if it was left enabled by firmware. It also has its use to make sure the HW is actually disabled and not wasting power. However having both the way it is in xe doesn't expose a good interface wrt module params. From modinfo: disable_display:Disable display (default: false) (bool) enable_display:Enable display (bool) Rename enable_display to probe_display to try to convey the message that the HW is being touched and improve the module param description. To avoid confusion, the enable_display is renamed everywhere, not only in the module param. New description for the parameters: disable_display:Disable display (default: false) (bool) probe_display:Probe display HW, otherwise it's left untouched (default: true) (bool) Reviewed-by: Matt Roper Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240813141931.3141395-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/display/xe_display.c | 46 ++++++++++++------------- drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 11 ++++-- drivers/gpu/drm/xe/xe_module.c | 6 ++-- drivers/gpu/drm/xe/xe_module.h | 2 +- drivers/gpu/drm/xe/xe_pci.c | 8 ++--- 6 files changed, 41 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 78cccbe28947..56a940b39412 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -45,7 +45,7 @@ static bool has_display(struct xe_device *xe) */ bool xe_display_driver_probe_defer(struct pci_dev *pdev) { - if (!xe_modparam.enable_display) + if (!xe_modparam.probe_display) return 0; return intel_display_driver_probe_defer(pdev); @@ -61,7 +61,7 @@ bool xe_display_driver_probe_defer(struct pci_dev *pdev) */ void xe_display_driver_set_hooks(struct drm_driver *driver) { - if (!xe_modparam.enable_display) + if (!xe_modparam.probe_display) return; driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC; @@ -103,7 +103,7 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) { struct xe_device *xe = to_xe_device(dev); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_power_domains_cleanup(xe); @@ -111,7 +111,7 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) int xe_display_init_nommio(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; /* Fake uncore lock */ @@ -127,7 +127,7 @@ static void xe_display_fini_noirq(void *arg) { struct xe_device *xe = arg; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove_noirq(xe); @@ -137,7 +137,7 @@ int xe_display_init_noirq(struct xe_device *xe) { int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; intel_display_driver_early_probe(xe); @@ -166,7 +166,7 @@ static void xe_display_fini_noaccel(void *arg) { struct xe_device *xe = arg; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove_nogem(xe); @@ -176,7 +176,7 @@ int xe_display_init_noaccel(struct xe_device *xe) { int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; err = intel_display_driver_probe_nogem(xe); @@ -188,7 +188,7 @@ int xe_display_init_noaccel(struct xe_device *xe) int xe_display_init(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; return intel_display_driver_probe(xe); @@ -196,7 +196,7 @@ int xe_display_init(struct xe_device *xe) void xe_display_fini(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_hpd_poll_fini(xe); @@ -207,7 +207,7 @@ void xe_display_fini(struct xe_device *xe) void xe_display_register(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_register(xe); @@ -217,7 +217,7 @@ void xe_display_register(struct xe_device *xe) void xe_display_unregister(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_unregister_dsm_handler(); @@ -227,7 +227,7 @@ void xe_display_unregister(struct xe_device *xe) void xe_display_driver_remove(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove(xe); @@ -237,7 +237,7 @@ void xe_display_driver_remove(struct xe_device *xe) void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (master_ctl & DISPLAY_IRQ) @@ -246,7 +246,7 @@ void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (gu_misc_iir & GU_MISC_GSE) @@ -255,7 +255,7 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) void xe_display_irq_reset(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; gen11_display_irq_reset(xe); @@ -263,7 +263,7 @@ void xe_display_irq_reset(struct xe_device *xe) void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (gt->info.id == XE_GT0) @@ -297,7 +297,7 @@ static bool suspend_to_idle(void) void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { bool s2idle = suspend_to_idle(); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; /* @@ -327,7 +327,7 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) void xe_display_pm_suspend_late(struct xe_device *xe) { bool s2idle = suspend_to_idle(); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_power_domains_suspend(xe, s2idle); @@ -337,7 +337,7 @@ void xe_display_pm_suspend_late(struct xe_device *xe) void xe_display_pm_resume_early(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_power_resume_early(xe); @@ -347,7 +347,7 @@ void xe_display_pm_resume_early(struct xe_device *xe) void xe_display_pm_resume(struct xe_device *xe, bool runtime) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_dmc_resume(xe); @@ -385,7 +385,7 @@ int xe_display_probe(struct xe_device *xe) { int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) goto no_display; intel_display_device_probe(xe); @@ -398,7 +398,7 @@ int xe_display_probe(struct xe_device *xe) return 0; no_display: - xe->info.enable_display = false; + xe->info.probe_display = false; unset_display_features(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1aba6f9eaa19..206328387150 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -543,7 +543,7 @@ static void update_device_info(struct xe_device *xe) { /* disable features that are not available/applicable to VFs */ if (IS_SRIOV_VF(xe)) { - xe->info.enable_display = 0; + xe->info.probe_display = 0; xe->info.has_heci_gscfi = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 5b7292a9a66d..16a24eadd94b 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -282,8 +282,15 @@ struct xe_device { u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; - /** @info.enable_display: display enabled */ - u8 enable_display:1; + /** + * @info.probe_display: Probe display hardware. If set to + * false, the driver will behave as if there is no display + * hardware present and will not try to read/write to it in any + * way. The display hardware, if it exists, will not be + * exposed to userspace and will be left untouched in whatever + * state the firmware or bootloader left it in. + */ + u8 probe_display:1; /** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ u8 skip_mtcfg:1; /** @info.skip_pcode: skip access to PCODE uC */ diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 499540add465..e7e4b1ebab50 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -15,7 +15,7 @@ #include "xe_sched_job.h" struct xe_modparam xe_modparam = { - .enable_display = true, + .probe_display = true, .guc_log_level = 5, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, .wedged_mode = 1, @@ -25,8 +25,8 @@ struct xe_modparam xe_modparam = { module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); -module_param_named(enable_display, xe_modparam.enable_display, bool, 0444); -MODULE_PARM_DESC(enable_display, "Enable display"); +module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); +MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600); MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)"); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 61a0d28a28c8..161a5e6f717f 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -11,7 +11,7 @@ /* Module modprobe variables */ struct xe_modparam { bool force_execlist; - bool enable_display; + bool probe_display; u32 force_vram_bar_size; int guc_log_level; char *guc_firmware_path; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index bc575bbee42d..0f66cf067e2a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -616,9 +616,9 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; - xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && - xe_modparam.enable_display && - desc->has_display; + xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && + xe_modparam.probe_display && + desc->has_display; err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); if (err) @@ -829,7 +829,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.media_name, xe->info.media_verx100 / 100, xe->info.media_verx100 % 100, - str_yes_no(xe->info.enable_display), + str_yes_no(xe->info.probe_display), xe->info.dma_mask_size, xe->info.tile_count, xe->info.has_heci_gscfi, xe->info.has_heci_cscfi); From ab0d6ef864c5fa820e894ee1a07f861e63851664 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 14 Aug 2024 15:26:12 +0530 Subject: [PATCH 037/108] drm/xe: Move enable host l2 VRAM post MCR init xe_gt_enable_host_l2_vram() is reading the XE2_GAMREQSTRM_CTRL register that is currently missing the MCR annotation. However, just adding the annotation doesn't work as this function is called before MCR handling is initialized in xe_gt_mcr_init(). xe_gt_enable_host_l2_vram() is used to implement WA 16023588340 that needs to be done as early as possible during initialization in order to be effective since the MMIO writes impact it. In the failure scenario, driver would simply not be able to bind successfully. Moving xe_gt_enable_host_l2_vram() later, after MCR initialization is done, only incurs a few additional HW accesses, particularly when loading GuC for hwconfig. Binding/unbinding the driver 100 times in BMG still works so it should be ok to start handling the WA a little bit later. This is sufficient to allow adding the MCR annotation to XE2_GAMREQSTRM_CTRL. Cc: Lucas De Marchi Signed-off-by: Tejas Upadhyay Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240814095614.909774-2-tejas.upadhyay@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 58895ed22f6e..238c7d1053f0 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -557,7 +557,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_mcr_init_early(gt); xe_pat_init(gt); - xe_gt_enable_host_l2_vram(gt); err = xe_uc_init(>->uc); if (err) @@ -569,6 +568,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_topology_init(gt); xe_gt_mcr_init(gt); + xe_gt_enable_host_l2_vram(gt); out_fw: xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); From f0ffa657e9f3913c7921cbd4d876343401f15f52 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 14 Aug 2024 15:26:13 +0530 Subject: [PATCH 038/108] drm/xe: Write all slices if its mcr register Register GAMREQSTRM_CTRL should be considered mcr register which should write to all slices as per documentation. Bspec: 71185 Fixes: 01570b446939 ("drm/xe/bmg: implement Wa_16023588340") Reviewed-by: Matt Roper Signed-off-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240814095614.909774-3-tejas.upadhyay@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- drivers/gpu/drm/xe/xe_gt.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5f33dee35f8a..aeb17fcb27ac 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -83,7 +83,7 @@ #define STATELESS_COMPRESSION_CTRL XE_REG(0x4148) #define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0) -#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) +#define XE2_GAMREQSTRM_CTRL XE_REG_MCR(0x4194) #define CG_DIS_CNTLBUS REG_BIT(6) #define CCS_AUX_INV XE_REG(0x4208) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 238c7d1053f0..224c137967c3 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -110,9 +110,9 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) if (!xe_gt_is_media_type(gt)) { xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); - reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; - xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); @@ -134,9 +134,9 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) if (WARN_ON(err)) return; - reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg &= ~CG_DIS_CNTLBUS; - xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } From 4551d60299b5ddc2655b6b365a4b92634e14e04f Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 14 Aug 2024 15:26:14 +0530 Subject: [PATCH 039/108] drm/xe: Define STATELESS_COMPRESSION_CTRL as mcr register Register STATELESS_COMPRESSION_CTRL should be considered mcr register which should write to all slices as per documentation. Bspec: 71185 Fixes: ecabb5e6ce54 ("drm/xe/xe2: Add performance turning changes") Signed-off-by: Tejas Upadhyay Reviewed-by: Shekhar Chauhan Link: https://patchwork.freedesktop.org/patch/msgid/20240814095614.909774-4-tejas.upadhyay@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index aeb17fcb27ac..0d1a4a9f4e11 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -80,7 +80,7 @@ #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) -#define STATELESS_COMPRESSION_CTRL XE_REG(0x4148) +#define STATELESS_COMPRESSION_CTRL XE_REG_MCR(0x4148) #define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0) #define XE2_GAMREQSTRM_CTRL XE_REG_MCR(0x4194) From 23ab1cb6591dba7c97b65eb407cd71147bd878b8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 12 Aug 2024 15:13:32 +0100 Subject: [PATCH 040/108] drm/xe: fix engine_class bounds check again This was fixed in commit b7dce525c4fc ("drm/xe/queue: fix engine_class bounds check"), but then re-introduced in commit 6f20fc09936e ("drm/xe: Move and export xe_hw_engine lookup.") which should only be simple code movement of the existing function. Fixes: 6f20fc09936e ("drm/xe: Move and export xe_hw_engine lookup.") Signed-off-by: Matthew Auld Cc: Dominik Grzegorzek Cc: Mika Kuoppala Cc: Matthew Brost Reviewed-by: Jonathan Cavitt Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240812141331.729843-2-matthew.auld@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 402dfa748e16..e195022ca836 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -1189,7 +1189,7 @@ xe_hw_engine_lookup(struct xe_device *xe, { unsigned int idx; - if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) + if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; if (eci.gt_id >= xe->info.gt_count) From b0ee81dac3205db1e01019629c83595e9433d96b Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 14 Aug 2024 13:56:54 -0700 Subject: [PATCH 041/108] drm/xe: Make exec_queue_kill safe to call twice An upcoming PXP patch will kill queues at runtime when a PXP invalidation event occurs, so we need exec_queue_kill to be safe to call multiple times. v2: Add documentation (Matt B) Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240814205654.1716586-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 9 +++++++++ drivers/gpu/drm/xe/xe_vm.c | 8 ++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 971e1234b8ea..7d170d37fdbe 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -770,6 +770,15 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; } +/** + * xe_exec_queue_kill - permanently stop all execution from an exec queue + * @q: The exec queue + * + * This function permanently stops all activity on an exec queue. If the queue + * is actively executing on the HW, it will be kicked off the engine; any + * pending jobs are discarded and all future submissions are rejected. + * This function is safe to call multiple times. + */ void xe_exec_queue_kill(struct xe_exec_queue *q) { struct xe_exec_queue *eq = q, *next; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 89d86b7cdb02..d6386be79e91 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -275,6 +275,8 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM * @vm: The VM. * @q: The exec_queue + * + * Note that this function might be called multiple times on the same queue. */ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) { @@ -282,8 +284,10 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) return; down_write(&vm->lock); - list_del(&q->lr.link); - --vm->preempt.num_exec_queues; + if (!list_empty(&q->lr.link)) { + list_del_init(&q->lr.link); + --vm->preempt.num_exec_queues; + } if (q->lr.pfence) { dma_fence_enable_sw_signaling(q->lr.pfence); dma_fence_put(q->lr.pfence); From 8d3a2d3d766a823c7510cdc17e6ff7c042c63b61 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 9 Aug 2024 16:12:35 -0700 Subject: [PATCH 042/108] drm/xe: use devm instead of drmm for managed bo The BO cleanup touches the GGTT and therefore requires the HW to be available, so we need to use devm instead of drmm. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1160 Signed-off-by: Daniele Ceraolo Spurio Cc: Lucas De Marchi Cc: Matthew Auld Reviewed-by: Matthew Auld Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240809231237.1503796-2-daniele.ceraolospurio@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 56a089aa3916..800119c8fc8d 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1584,7 +1584,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, return bo; } -static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg) +static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); } @@ -1599,7 +1599,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile if (IS_ERR(bo)) return bo; - ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo); + ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo); if (ret) return ERR_PTR(ret); @@ -1647,7 +1647,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str if (IS_ERR(bo)) return PTR_ERR(bo); - drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src); + devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src); *src = bo; return 0; From 3396900aa273903639a1792afa4d23dc09bec291 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 16:28:30 -0700 Subject: [PATCH 043/108] drm/xe: Fix tile fini sequence Only set tile->mmio.regs to NULL if not the root tile in tile_fini. The root tile mmio regs is setup ealier in MMIO init thus it should be set to NULL in mmio_fini. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240809232830.3302251-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_mmio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index bdcc7282385c..f5bdb540e823 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -30,7 +30,8 @@ static void tiles_fini(void *arg) int id; for_each_tile(tile, xe, id) - tile->mmio.regs = NULL; + if (tile != xe_device_get_root_tile(xe)) + tile->mmio.regs = NULL; } /* @@ -146,9 +147,11 @@ int xe_mmio_probe_tiles(struct xe_device *xe) static void mmio_fini(void *arg) { struct xe_device *xe = arg; + struct xe_tile *root_tile = xe_device_get_root_tile(xe); pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); xe->mmio.regs = NULL; + root_tile->mmio.regs = NULL; } int xe_mmio_init(struct xe_device *xe) From 2e5d47fe7839298fa096970e184aac9bf82c3bd3 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 15 Aug 2024 16:05:39 -0700 Subject: [PATCH 044/108] drm/xe/uc: Use managed bo for HuC and GSC objects Drmm actions are not the right ones to clean up BOs and we should use devm instead. However, we can also instead just allocate the objects using the managed_bo function, which will internally register the correct cleanup call and therefore allows us to simplify the code. While at it, switch to drmm_kzalloc for the GSC proxy allocation to further simplify the cleanup. Cc: John Harrison Cc: Alan Previn Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Lucas De Marchi Reviewed-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240815230541.3828206-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gsc.c | 12 +++-------- drivers/gpu/drm/xe/xe_gsc_proxy.c | 36 ++++++------------------------- drivers/gpu/drm/xe/xe_huc.c | 19 +++++----------- 3 files changed, 14 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 77ce44e845c5..8a9b3c50a588 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -450,11 +450,6 @@ static void free_resources(struct drm_device *drm, void *arg) xe_exec_queue_put(gsc->q); gsc->q = NULL; } - - if (gsc->private) { - xe_bo_unpin_map_no_vm(gsc->private); - gsc->private = NULL; - } } int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) @@ -474,10 +469,9 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) if (!hwe) return -ENODEV; - bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, - ttm_bo_type_kernel, - XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT); + bo = xe_managed_bo_create_pin_map(xe, tile, SZ_4M, + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 28e6a7a1d282..2d6ea8c01445 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -371,27 +371,6 @@ static const struct component_ops xe_gsc_proxy_component_ops = { .unbind = xe_gsc_proxy_component_unbind, }; -static void proxy_channel_free(struct drm_device *drm, void *arg) -{ - struct xe_gsc *gsc = arg; - - if (!gsc->proxy.bo) - return; - - if (gsc->proxy.to_csme) { - kfree(gsc->proxy.to_csme); - gsc->proxy.to_csme = NULL; - gsc->proxy.from_csme = NULL; - } - - if (gsc->proxy.bo) { - iosys_map_clear(&gsc->proxy.to_gsc); - iosys_map_clear(&gsc->proxy.from_gsc); - xe_bo_unpin_map_no_vm(gsc->proxy.bo); - gsc->proxy.bo = NULL; - } -} - static int proxy_channel_alloc(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); @@ -400,18 +379,15 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) struct xe_bo *bo; void *csme; - csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); + csme = drmm_kzalloc(&xe->drm, GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); if (!csme) return -ENOMEM; - bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); - if (IS_ERR(bo)) { - kfree(csme); + bo = xe_managed_bo_create_pin_map(xe, tile, GSC_PROXY_CHANNEL_SIZE, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); + if (IS_ERR(bo)) return PTR_ERR(bo); - } gsc->proxy.bo = bo; gsc->proxy.to_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0); @@ -419,7 +395,7 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) gsc->proxy.to_csme = csme; gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE; - return drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc); + return 0; } /** diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index bec4366e5513..f5459f97af23 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -43,14 +43,6 @@ huc_to_guc(struct xe_huc *huc) return &container_of(huc, struct xe_uc, huc)->guc; } -static void free_gsc_pkt(struct drm_device *drm, void *arg) -{ - struct xe_huc *huc = arg; - - xe_bo_unpin_map_no_vm(huc->gsc_pkt); - huc->gsc_pkt = NULL; -} - #define PXP43_HUC_AUTH_INOUT_SIZE SZ_4K static int huc_alloc_gsc_pkt(struct xe_huc *huc) { @@ -59,17 +51,16 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) struct xe_bo *bo; /* we use a single object for both input and output */ - bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, - PXP43_HUC_AUTH_INOUT_SIZE * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); + bo = xe_managed_bo_create_pin_map(xe, gt_to_tile(gt), + PXP43_HUC_AUTH_INOUT_SIZE * 2, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); huc->gsc_pkt = bo; - return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); + return 0; } int xe_huc_init(struct xe_huc *huc) From 5a891a0e69f134f53cc91b409f38e5ea1cafaf0a Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 15 Aug 2024 16:05:40 -0700 Subject: [PATCH 045/108] drm/xe/uc: Use devm to register cleanup that includes exec_queues Exec_queue cleanup requires HW access, so we need to use devm instead of drmm for it. Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Alan Previn Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240815230541.3828206-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gsc.c | 4 ++-- drivers/gpu/drm/xe/xe_guc_submit.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 8a9b3c50a588..8a137cb83318 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -437,7 +437,7 @@ int xe_gsc_init(struct xe_gsc *gsc) return ret; } -static void free_resources(struct drm_device *drm, void *arg) +static void free_resources(void *arg) { struct xe_gsc *gsc = arg; @@ -495,7 +495,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) gsc->q = q; gsc->wq = wq; - err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); + err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index da63be550d4d..be5b9d8cfa5f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -284,7 +284,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) free_submit_wq(guc); } -static void guc_submit_wedged_fini(struct drm_device *drm, void *arg) +static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; struct xe_exec_queue *q; @@ -877,7 +877,7 @@ void xe_guc_submit_wedge(struct xe_guc *guc) xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); - err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); From db3461a7743817ad7c73553902231b096616813a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 15 Aug 2024 21:02:08 -0700 Subject: [PATCH 046/108] drm/xe: Use for_each_remote_tile rather than manual check Replace for_each_tile plus a check against primary tile with for_each_remote_tile in tiles_fini. The latter macro does this for us. Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240816040208.62695-1-matthew.brost@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mmio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index f5bdb540e823..3fd462fda625 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -29,9 +29,8 @@ static void tiles_fini(void *arg) struct xe_tile *tile; int id; - for_each_tile(tile, xe, id) - if (tile != xe_device_get_root_tile(xe)) - tile->mmio.regs = NULL; + for_each_remote_tile(tile, xe, id) + tile->mmio.regs = NULL; } /* From 8a0f58ec4728ec04c654645bf8ed4070821ebbf3 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 15 Aug 2024 10:26:03 -0700 Subject: [PATCH 047/108] drm/xe: Add debugfs to dump GuC's hwconfig Although the query uapi is the official way to get at the GuC's hwconfig table contents, it's still useful to have a quick debugfs interface to dump the table in a human-readable format while debugging the driver. Signed-off-by: Matt Roper Reviewed-by: Jonathan Cavitt Reviewed-by: Jagmeet Randhawa Link: https://patchwork.freedesktop.org/patch/msgid/20240815172602.2729146-3-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 11 ++++++ drivers/gpu/drm/xe/xe_guc_hwconfig.c | 57 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_hwconfig.h | 2 + 3 files changed, 70 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 5125d76ccfac..8f95d3a5949b 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -19,6 +19,7 @@ #include "xe_gt_sriov_vf_debugfs.h" #include "xe_gt_stats.h" #include "xe_gt_topology.h" +#include "xe_guc_hwconfig.h" #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -270,6 +271,15 @@ static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p) return 0; } +static int hwconfig(struct xe_gt *gt, struct drm_printer *p) +{ + xe_pm_runtime_get(gt_to_xe(gt)); + xe_guc_hwconfig_dump(>->uc.guc, p); + xe_pm_runtime_put(gt_to_xe(gt)); + + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, @@ -288,6 +298,7 @@ static const struct drm_info_list debugfs_list[] = { {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info}, + {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, }; void xe_gt_debugfs_register(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index d9b570a154a2..025bad701556 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -6,6 +6,7 @@ #include "xe_guc_hwconfig.h" #include +#include #include "abi/guc_actions_abi.h" #include "xe_bo.h" @@ -103,3 +104,59 @@ void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst) xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0, guc->hwconfig.size); } + +void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p) +{ + size_t size = xe_guc_hwconfig_size(guc); + u32 *hwconfig; + u64 num_dw; + u32 extra_bytes; + int i = 0; + + if (size == 0) { + drm_printf(p, "No hwconfig available\n"); + return; + } + + num_dw = div_u64_rem(size, sizeof(u32), &extra_bytes); + + hwconfig = kzalloc(size, GFP_KERNEL); + if (!hwconfig) { + drm_printf(p, "Error: could not allocate hwconfig memory\n"); + return; + } + + xe_guc_hwconfig_copy(guc, hwconfig); + + /* An entry requires at least three dwords for key, length, value */ + while (i + 3 <= num_dw) { + u32 attribute = hwconfig[i++]; + u32 len_dw = hwconfig[i++]; + + if (i + len_dw > num_dw) { + drm_printf(p, "Error: Attribute %u is %u dwords, but only %llu remain\n", + attribute, len_dw, num_dw - i); + len_dw = num_dw - i; + } + + /* + * If it's a single dword (as most hwconfig attributes are), + * then it's probably a number that makes sense to display + * in decimal form. In the rare cases where it's more than + * one dword, just print it in hex form and let the user + * figure out how to interpret it. + */ + if (len_dw == 1) + drm_printf(p, "[%2u] = %u\n", attribute, hwconfig[i]); + else + drm_printf(p, "[%2u] = { %*ph }\n", attribute, + (int)(len_dw * sizeof(u32)), &hwconfig[i]); + i += len_dw; + } + + if (i < num_dw || extra_bytes) + drm_printf(p, "Error: %llu extra bytes at end of hwconfig\n", + (num_dw - i) * sizeof(u32) + extra_bytes); + + kfree(hwconfig); +} diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h index b5794d641900..7df315900e1c 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.h +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h @@ -8,10 +8,12 @@ #include +struct drm_printer; struct xe_guc; int xe_guc_hwconfig_init(struct xe_guc *guc); u32 xe_guc_hwconfig_size(struct xe_guc *guc); void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst); +void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p); #endif From fc7c7498db3da52efe874029a9c2ccc0ad2646ee Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 15 Aug 2024 10:26:04 -0700 Subject: [PATCH 048/108] drm/xe/mcr: Try to derive dss_per_grp from hwconfig attributes When steering MCR register ranges of type "DSS," the group_id and instance_id values are calculated by dividing the DSS pool according to the size of a gslice or cslice, depending on the platform. These values haven't changed much on past platforms, so we've been able to hardcode the proper divisor so far. However the layout may not be so fixed on future platforms so the proper, future-proof way to determine this is by using some of the attributes from the GuC's hwconfig table. The hwconfig has two attributes reflecting the architectural maximum slice and subslice counts (i.e., before any fusing is considered) that can be used for the purposes of calculating MCR steering targets. If the hwconfig is lacking the necessary values (which should only be possible on older platforms before these attributes were added), we can still fall back to the old hardcoded values. Going forward the hwconfig is expected to always provide the information we need on newer platforms, and any failure to do so will be considered a bug in the firmware that will prevent us from switching to the buggy firmware release. It's worth noting that over time GuC's hwconfig has provided a couple different keys with similar-sounding descriptions. For our purposes here, we only trust the newer key "70" which has supplanted the similarly-named key "2" that existed on older platforms. Bspec: 73210 Signed-off-by: Matt Roper Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240815172602.2729146-4-matthew.d.roper@intel.com --- drivers/gpu/drm/xe/xe_gt_mcr.c | 40 +++++++++++++++++++++++++--- drivers/gpu/drm/xe/xe_gt_types.h | 6 +++++ drivers/gpu/drm/xe/xe_guc_hwconfig.c | 40 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_hwconfig.h | 1 + 4 files changed, 83 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 6d948a469126..7d7bd0be6233 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -8,8 +8,10 @@ #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_gt_topology.h" #include "xe_gt_types.h" +#include "xe_guc_hwconfig.h" #include "xe_mmio.h" #include "xe_sriov.h" @@ -297,6 +299,36 @@ static void init_steering_mslice(struct xe_gt *gt) static unsigned int dss_per_group(struct xe_gt *gt) { + struct xe_guc *guc = >->uc.guc; + u32 max_slices = 0, max_subslices = 0; + int ret; + + /* + * Try to query the GuC's hwconfig table for the maximum number of + * slices and subslices. These don't reflect the platform's actual + * slice/DSS counts, just the physical layout by which we should + * determine the steering targets. On older platforms with older GuC + * firmware releases it's possible that these attributes may not be + * included in the table, so we can always fall back to the old + * hardcoded layouts. + */ +#define HWCONFIG_ATTR_MAX_SLICES 1 +#define HWCONFIG_ATTR_MAX_SUBSLICES 70 + + ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SLICES, + &max_slices); + if (ret < 0 || max_slices == 0) + goto fallback; + + ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SUBSLICES, + &max_subslices); + if (ret < 0 || max_subslices == 0) + goto fallback; + + return DIV_ROUND_UP(max_subslices, max_slices); + +fallback: + xe_gt_dbg(gt, "GuC hwconfig cannot provide dss/slice; using typical fallback values\n"); if (gt_to_xe(gt)->info.platform == XE_PVC) return 8; else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) @@ -314,16 +346,16 @@ static unsigned int dss_per_group(struct xe_gt *gt) */ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) { - int dss_per_grp = dss_per_group(gt); - xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS); - *group = dss / dss_per_grp; - *instance = dss % dss_per_grp; + *group = dss / gt->steering_dss_per_grp; + *instance = dss % gt->steering_dss_per_grp; } static void init_steering_dss(struct xe_gt *gt) { + gt->steering_dss_per_grp = dss_per_group(gt); + xe_gt_mcr_get_dss_steering(gt, min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)), diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 8e46c0969bc7..82cf9f631736 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -376,6 +376,12 @@ struct xe_gt { u16 instance_target; } steering[NUM_STEERING_TYPES]; + /** + * @steering_dss_per_grp: number of DSS per steering group (gslice, + * cslice, etc.). + */ + unsigned int steering_dss_per_grp; + /** * @mcr_lock: protects the MCR_SELECTOR register for the duration * of a steered operation diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index 025bad701556..af2c817d552c 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -160,3 +160,43 @@ void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p) kfree(hwconfig); } + +/* + * Lookup a specific 32-bit attribute value in the GuC's hwconfig table. + */ +int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val) +{ + size_t size = xe_guc_hwconfig_size(guc); + u64 num_dw = div_u64(size, sizeof(u32)); + u32 *hwconfig; + bool found = false; + int i = 0; + + if (num_dw == 0) + return -EINVAL; + + hwconfig = kzalloc(size, GFP_KERNEL); + if (!hwconfig) + return -ENOMEM; + + xe_guc_hwconfig_copy(guc, hwconfig); + + /* An entry requires at least three dwords for key, length, value */ + while (i + 3 <= num_dw) { + u32 key = hwconfig[i++]; + u32 len_dw = hwconfig[i++]; + + if (key != attribute) { + i += len_dw; + continue; + } + + *val = hwconfig[i]; + found = true; + break; + } + + kfree(hwconfig); + + return found ? 0 : -ENOENT; +} diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h index 7df315900e1c..ab4e5038236e 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.h +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h @@ -15,5 +15,6 @@ int xe_guc_hwconfig_init(struct xe_guc *guc); u32 xe_guc_hwconfig_size(struct xe_guc *guc); void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst); void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p); +int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val); #endif From 852856e3b6f679c694dd5ec41e5a3c11aa46640b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 15 Aug 2024 20:40:33 -0700 Subject: [PATCH 049/108] drm/xe: Use reserved copy engine for user binds on faulting devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User binds map to engines with can fault, faults depend on user binds completion, thus we can deadlock. Avoid this by using reserved copy engine for user binds on faulting devices. While we are here, normalize bind queue creation with a helper. v2: - Pass in extensions to bind queue creation (CI) v3: - s/resevered/reserved (Lucas) - Fix NULL hwe check (Jonathan) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Thomas Hellström Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240816034033.53837-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 122 +++++++++++++++-------------- drivers/gpu/drm/xe/xe_exec_queue.h | 6 +- drivers/gpu/drm/xe/xe_migrate.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 8 +- 4 files changed, 70 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7d170d37fdbe..7f0b33098182 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -166,7 +166,8 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, - enum xe_engine_class class, u32 flags) + enum xe_engine_class class, + u32 flags, u64 extensions) { struct xe_hw_engine *hwe, *hwe0 = NULL; enum xe_hw_engine_id id; @@ -186,7 +187,54 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe if (!logical_mask) return ERR_PTR(-ENODEV); - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0); + return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); +} + +/** + * xe_exec_queue_create_bind() - Create bind exec queue. + * @xe: Xe device. + * @tile: tile which bind exec queue belongs to. + * @flags: exec queue creation flags + * @extensions: exec queue creation extensions + * + * Normalize bind exec queue creation. Bind exec queue is tied to migration VM + * for access to physical memory required for page table programming. On a + * faulting devices the reserved copy engine instance must be used to avoid + * deadlocking (user binds cannot get stuck behind faults as kernel binds which + * resolve faults depend on user binds). On non-faulting devices any copy engine + * can be used. + * + * Returns exec queue on success, ERR_PTR on failure + */ +struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, + struct xe_tile *tile, + u32 flags, u64 extensions) +{ + struct xe_gt *gt = tile->primary_gt; + struct xe_exec_queue *q; + struct xe_vm *migrate_vm; + + migrate_vm = xe_migrate_get_vm(tile->migrate); + if (xe->info.has_usm) { + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, + XE_ENGINE_CLASS_COPY, + gt->usm.reserved_bcs_instance, + false); + + if (!hwe) + return ERR_PTR(-EINVAL); + + q = xe_exec_queue_create(xe, migrate_vm, + BIT(hwe->logical_instance), 1, hwe, + flags, extensions); + } else { + q = xe_exec_queue_create_class(xe, gt, migrate_vm, + XE_ENGINE_CLASS_COPY, flags, + extensions); + } + xe_vm_put(migrate_vm); + + return q; } void xe_exec_queue_destroy(struct kref *ref) @@ -418,34 +466,6 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue return 0; } -static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - if (XE_IOCTL_DBG(xe, width != 1)) - return 0; - if (XE_IOCTL_DBG(xe, num_placements != 1)) - return 0; - if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return 0; - - eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == XE_ENGINE_CLASS_COPY) - logical_mask |= BIT(hwe->logical_instance); - } - - return logical_mask; -} - static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, struct drm_xe_engine_class_instance *eci, u16 width, u16 num_placements) @@ -507,8 +527,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, struct drm_xe_engine_class_instance __user *user_eci = u64_to_user_ptr(args->instances); struct xe_hw_engine *hwe; - struct xe_vm *vm, *migrate_vm; + struct xe_vm *vm; struct xe_gt *gt; + struct xe_tile *tile; struct xe_exec_queue *q = NULL; u32 logical_mask; u32 id; @@ -533,37 +554,20 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { - for_each_gt(gt, xe, id) { + if (XE_IOCTL_DBG(xe, args->width != 1) || + XE_IOCTL_DBG(xe, args->num_placements != 1) || + XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) + return -EINVAL; + + for_each_tile(tile, xe, id) { struct xe_exec_queue *new; - u32 flags; + u32 flags = EXEC_QUEUE_FLAG_VM; - if (xe_gt_is_media_type(gt)) - continue; + if (id) + flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; - eci[0].gt_id = gt->info.id; - logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = xe_hw_engine_lookup(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - /* The migration vm doesn't hold rpm ref */ - xe_pm_runtime_get_noresume(xe); - - flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); - - migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); - new = xe_exec_queue_create(xe, migrate_vm, logical_mask, - args->width, hwe, flags, - args->extensions); - - xe_pm_runtime_put(xe); /* now held by engine */ - - xe_vm_put(migrate_vm); + new = xe_exec_queue_create_bind(xe, tile, flags, + args->extensions); if (IS_ERR(new)) { err = PTR_ERR(new); if (q) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index ded77b0f3b90..99139368ba6e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -20,7 +20,11 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v u64 extensions); struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, - enum xe_engine_class class, u32 flags); + enum xe_engine_class class, + u32 flags, u64 extensions); +struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, + struct xe_tile *tile, + u32 flags, u64 extensions); void xe_exec_queue_fini(struct xe_exec_queue *q); void xe_exec_queue_destroy(struct kref *ref); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index a2d0ce3c59bf..cbf54be224c9 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -442,7 +442,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, EXEC_QUEUE_FLAG_KERNEL | - EXEC_QUEUE_FLAG_PERMANENT); + EXEC_QUEUE_FLAG_PERMANENT, 0); } if (IS_ERR(m->q)) { xe_vm_close_and_put(vm); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f54594db643e..b15348d4809a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1482,19 +1482,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { for_each_tile(tile, xe, id) { - struct xe_gt *gt = tile->primary_gt; - struct xe_vm *migrate_vm; struct xe_exec_queue *q; u32 create_flags = EXEC_QUEUE_FLAG_VM; if (!vm->pt_root[id]) continue; - migrate_vm = xe_migrate_get_vm(tile->migrate); - q = xe_exec_queue_create_class(xe, gt, migrate_vm, - XE_ENGINE_CLASS_COPY, - create_flags); - xe_vm_put(migrate_vm); + q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); if (IS_ERR(q)) { err = PTR_ERR(q); goto err_close; From f784750c670f7c5ac572590ddad77a89b4c997bf Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:26 +0200 Subject: [PATCH 050/108] drm/xe/hw_engine_group: Introduce xe_hw_engine_group A xe_hw_engine_group is a group of hw engines. Two hw engines belong to the same xe_hw_engine_group if one hw engine cannot make progress while the other is stuck on a page fault. Typically, hw engines of the same group share some resources such as EUs, but this really depends on the hardware configuration of the platforms. The simple engines partitioning proposed here might be too conservative but is intended to work for existing platforms. It can be optimized later if more sets of independent engines are identified. The hw engine groups are intended to be used in the context of faulting long-running jobs submissions. v2: Move to own files, improve error handling (Matt Brost) v3: Fix build issue reported by CI, improve commit message (Matt Roper) v4: Fix kernel doc v5: Add switch case for XE_ENGINE_CLASS_OTHER Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-2-francois.dugast@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_hw_engine.c | 4 + drivers/gpu/drm/xe/xe_hw_engine_group.c | 102 ++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine_group.h | 16 +++ drivers/gpu/drm/xe/xe_hw_engine_group_types.h | 51 +++++++++ drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 + 6 files changed, 176 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_group.c create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_group.h create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_group_types.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index e11392b5dd3d..b9670ae09a9e 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -66,6 +66,7 @@ xe-y += xe_bb.o \ xe_heci_gsc.o \ xe_hw_engine.o \ xe_hw_engine_class_sysfs.o \ + xe_hw_engine_group.o \ xe_hw_fence.o \ xe_huc.o \ xe_irq.o \ diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index e195022ca836..aa9b2b16a6e0 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -23,6 +23,7 @@ #include "xe_gt_printk.h" #include "xe_gt_mcr.h" #include "xe_gt_topology.h" +#include "xe_hw_engine_group.h" #include "xe_hw_fence.h" #include "xe_irq.h" #include "xe_lrc.h" @@ -790,6 +791,9 @@ int xe_hw_engines_init(struct xe_gt *gt) } hw_engine_setup_logical_mapping(gt); + err = xe_hw_engine_setup_groups(gt); + if (err) + return err; return 0; } diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c new file mode 100644 index 000000000000..1d109c08c7a6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_hw_engine_group.h" + +static void +hw_engine_group_free(struct drm_device *drm, void *arg) +{ + struct xe_hw_engine_group *group = arg; + + kfree(group); +} + +static struct xe_hw_engine_group * +hw_engine_group_alloc(struct xe_device *xe) +{ + struct xe_hw_engine_group *group; + int err; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + init_rwsem(&group->mode_sem); + INIT_LIST_HEAD(&group->exec_queue_list); + + err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); + if (err) + return ERR_PTR(err); + + return group; +} + +/** + * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt + * @gt: The gt for which groups are setup + * + * Return: 0 on success, negative error code on error. + */ +int xe_hw_engine_setup_groups(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; + struct xe_device *xe = gt_to_xe(gt); + int err; + + group_rcs_ccs = hw_engine_group_alloc(xe); + if (IS_ERR(group_rcs_ccs)) { + err = PTR_ERR(group_rcs_ccs); + goto err_group_rcs_ccs; + } + + group_bcs = hw_engine_group_alloc(xe); + if (IS_ERR(group_bcs)) { + err = PTR_ERR(group_bcs); + goto err_group_bcs; + } + + group_vcs_vecs = hw_engine_group_alloc(xe); + if (IS_ERR(group_vcs_vecs)) { + err = PTR_ERR(group_vcs_vecs); + goto err_group_vcs_vecs; + } + + for_each_hw_engine(hwe, gt, id) { + switch (hwe->class) { + case XE_ENGINE_CLASS_COPY: + hwe->hw_engine_group = group_bcs; + break; + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + hwe->hw_engine_group = group_rcs_ccs; + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + hwe->hw_engine_group = group_vcs_vecs; + break; + case XE_ENGINE_CLASS_OTHER: + break; + default: + drm_warn(&xe->drm, "NOT POSSIBLE"); + } + } + + return 0; + +err_group_vcs_vecs: + kfree(group_vcs_vecs); +err_group_bcs: + kfree(group_bcs); +err_group_rcs_ccs: + kfree(group_rcs_ccs); + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h new file mode 100644 index 000000000000..c2648f87f7ef --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_GROUP_H_ +#define _XE_HW_ENGINE_GROUP_H_ + +#include "xe_hw_engine_group_types.h" + +struct drm_device; +struct xe_gt; + +int xe_hw_engine_setup_groups(struct xe_gt *gt); + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group_types.h b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h new file mode 100644 index 000000000000..92b6e0712c03 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_GROUP_TYPES_H_ +#define _XE_HW_ENGINE_GROUP_TYPES_H_ + +#include "xe_force_wake_types.h" +#include "xe_lrc_types.h" +#include "xe_reg_sr_types.h" + +/** + * enum xe_hw_engine_group_execution_mode - possible execution modes of a hw + * engine group + * + * @EXEC_MODE_LR: execution in long-running mode + * @EXEC_MODE_DMA_FENCE: execution in dma fence mode + */ +enum xe_hw_engine_group_execution_mode { + EXEC_MODE_LR, + EXEC_MODE_DMA_FENCE, +}; + +/** + * struct xe_hw_engine_group - Hardware engine group + * + * hw engines belong to the same group if they share hardware resources in a way + * that prevents them from making progress when one is stuck on a page fault. + */ +struct xe_hw_engine_group { + /** + * @exec_queue_list: list of exec queues attached to this + * xe_hw_engine_group + */ + struct list_head exec_queue_list; + /** @resume_work: worker to resume faulting LR exec queues */ + struct work_struct resume_work; + /** @resume_wq: workqueue to resume faulting LR exec queues */ + struct workqueue_struct *resume_wq; + /** + * @mode_sem: used to protect this group's hardware resources and ensure + * mutual exclusion between execution only in faulting LR mode and + * execution only in DMA_FENCE mode + */ + struct rw_semaphore mode_sem; + /** @cur_mode: current execution mode of this hw engine group */ + enum xe_hw_engine_group_execution_mode cur_mode; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 70e6434f150d..39f24012d0f4 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -150,6 +150,8 @@ struct xe_hw_engine { struct xe_hw_engine_class_intf *eclass; /** @oa_unit: oa unit for this hw engine */ struct xe_oa_unit *oa_unit; + /** @hw_engine_group: the group of hw engines this one belongs to */ + struct xe_hw_engine_group *hw_engine_group; }; /** From 3dc6da76ae55d667fca2c9f9497e8ea1a27497b1 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:27 +0200 Subject: [PATCH 051/108] drm/xe/guc_submit: Make suspend_wait interruptible Rely on wait_event_interruptible_timeout() to put the process to sleep with TASK_INTERRUPTIBLE. It allows using this function in interruptible context. v2: Propagate error on wait_event_interruptible_timeout (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-3-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index be5b9d8cfa5f..fbbe6a487bbb 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1628,11 +1628,11 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) * suspend_pending upon kill but to be paranoid but races in which * suspend_pending is set after kill also check kill here. */ - ret = wait_event_timeout(q->guc->suspend_wait, - !READ_ONCE(q->guc->suspend_pending) || - exec_queue_killed(q) || - guc_read_stopped(guc), - HZ * 5); + ret = wait_event_interruptible_timeout(q->guc->suspend_wait, + !READ_ONCE(q->guc->suspend_pending) || + exec_queue_killed(q) || + guc_read_stopped(guc), + HZ * 5); if (!ret) { xe_gt_warn(guc_to_gt(guc), @@ -1642,7 +1642,7 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) return -ETIME; } - return 0; + return ret < 0 ? ret : 0; } static void guc_exec_queue_resume(struct xe_exec_queue *q) From 7970cb36966c9b9183255dc097ae0446300eebcf Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:28 +0200 Subject: [PATCH 052/108] 'drm/xe/hw_engine_group: Register hw engine group's exec queues Add helpers to safely add and delete the exec queues attached to a hw engine group, and make use them at the time of creation and destruction of the exec queues. Keeping track of them is required to control the execution mode of the hw engine group. v2: Improve error handling and robustness, suspend exec queues created in fault mode if group in dma-fence mode, init queue link (Matt Brost) v3: Delete queue from hw engine group when it is destroyed by the user, also clean up at the time of closing the file in case the user did not destroy the queue v4: Use correct list when checking if empty, do not add the queue if VM is in xe_vm_in_preempt_fence_mode (Matt Brost) v5: Remove unrelated newline, add checks and asserts for group, unwind on suspend failure (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-4-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_device.c | 3 ++ drivers/gpu/drm/xe/xe_exec_queue.c | 11 ++++ drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 + drivers/gpu/drm/xe/xe_hw_engine_group.c | 66 ++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine_group.h | 4 ++ 5 files changed, 86 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 206328387150..b6db7e082d88 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -37,6 +37,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_hw_engine_group.h" #include "xe_hwmon.h" #include "xe_irq.h" #include "xe_memirq.h" @@ -165,6 +166,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) * vm->lock taken during xe_exec_queue_kill(). */ xa_for_each(&xef->exec_queue.xa, idx, q) { + if (q->vm && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); xe_exec_queue_kill(q); xe_exec_queue_put(q); } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7f0b33098182..ebc80add10ac 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -14,6 +14,7 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" +#include "xe_hw_engine_group.h" #include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -73,6 +74,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->ops = gt->exec_queue_ops; INIT_LIST_HEAD(&q->lr.link); INIT_LIST_HEAD(&q->multi_gt_link); + INIT_LIST_HEAD(&q->hw_engine_group_link); q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = @@ -624,6 +626,12 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, err)) goto put_exec_queue; } + + if (q->vm && q->hwe->hw_engine_group) { + err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); + if (err) + goto put_exec_queue; + } } mutex_lock(&xef->exec_queue.lock); @@ -815,6 +823,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; + if (q->vm && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 1408b02eea53..315f874426bd 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -142,6 +142,8 @@ struct xe_exec_queue { * Protected by @vm's resv. Unused if @vm == NULL. */ u64 tlb_flush_seqno; + /** @hw_engine_group_link: link into exec queues in the same hw engine group */ + struct list_head hw_engine_group_link; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 1d109c08c7a6..b362af7ffab5 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -5,9 +5,12 @@ #include +#include "xe_assert.h" #include "xe_device.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_hw_engine_group.h" +#include "xe_vm.h" static void hw_engine_group_free(struct drm_device *drm, void *arg) @@ -100,3 +103,66 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) return err; } + +/** + * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group + * @group: The hw engine group + * @q: The exec_queue + * + * Return: 0 on success, + * -EINTR if the lock could not be acquired + */ +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) +{ + int err; + struct xe_device *xe = gt_to_xe(q->gt); + + xe_assert(xe, group); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); + xe_assert(xe, q->vm); + + if (xe_vm_in_preempt_fence_mode(q->vm)) + return 0; + + err = down_write_killable(&group->mode_sem); + if (err) + return err; + + if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) { + q->ops->suspend(q); + err = q->ops->suspend_wait(q); + if (err) + goto err_suspend; + + queue_work(group->resume_wq, &group->resume_work); + } + + list_add(&q->hw_engine_group_link, &group->exec_queue_list); + up_write(&group->mode_sem); + + return 0; + +err_suspend: + up_write(&group->mode_sem); + return err; +} + +/** + * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group + * @group: The hw engine group + * @q: The exec_queue + */ +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) +{ + struct xe_device *xe = gt_to_xe(q->gt); + + xe_assert(xe, group); + xe_assert(xe, q->vm); + + down_write(&group->mode_sem); + + if (!list_empty(&q->hw_engine_group_link)) + list_del(&q->hw_engine_group_link); + + up_write(&group->mode_sem); +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h index c2648f87f7ef..857a83787504 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h @@ -9,8 +9,12 @@ #include "xe_hw_engine_group_types.h" struct drm_device; +struct xe_exec_queue; struct xe_gt; int xe_hw_engine_setup_groups(struct xe_gt *gt); +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); + #endif From 53fdfa19e6a9220a14c0bb21273880774ea70dbd Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:29 +0200 Subject: [PATCH 053/108] drm/xe/hw_engine_group: Add helper to suspend faulting LR jobs This is a required feature for dma fence jobs to preempt faulting long running jobs in order to ensure mutual exclusion on a given hw engine group. v2: Pipeline calls to suspend(q) and suspend_wait(q) to improve efficiency, switch to lockdep_assert_held_write (Matt Brost) v3: Return error on suspend_wait failure to propagate on the call stack up to IOCTL (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-5-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_hw_engine_group.c | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index b362af7ffab5..8659332012dd 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -166,3 +166,39 @@ void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct up_write(&group->mode_sem); } + +/** + * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group + * @group: The hw engine group + * + * Return: 0 on success, negative error code on error. + */ +static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) +{ + int err; + struct xe_exec_queue *q; + + lockdep_assert_held_write(&group->mode_sem); + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + q->ops->suspend(q); + } + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + err = q->ops->suspend_wait(q); + if (err) + goto err_suspend; + } + + return 0; + +err_suspend: + up_write(&group->mode_sem); + return err; +} From 7f0d7bee2079fc899c8280e177b0c0feb8b9debe Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:30 +0200 Subject: [PATCH 054/108] drm/xe/exec_queue: Remove duplicated code This code section is the same as the body of xe_exec_queue_last_fence_put_unlocked() so call the function instead and remove duplicated code to make maintenance easier. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-6-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index ebc80add10ac..3ce4582504f9 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -852,10 +852,7 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) { xe_exec_queue_last_fence_lockdep_assert(q, vm); - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } + xe_exec_queue_last_fence_put_unlocked(q); } /** From 0d92cd8935a3fffbbfee0fd59cdc89ac5167b14a Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:31 +0200 Subject: [PATCH 055/108] drm/xe/exec_queue: Prepare last fence for hw engine group resume context Ensure we can safely take a ref of the exec queue's last fence from the context of resuming jobs from the hw engine group. The locking requirements differ from the general case, hence the introduction of this new function. v2: Add kernel doc, rework the code to prevent code duplication v3: Fix kernel doc, remove now unnecessary lockdep variants (Matt Brost) v4: Remove new put function (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-7-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 33 ++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_exec_queue.h | 2 ++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 3ce4582504f9..6d3b44cbc4c7 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -837,10 +837,12 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { - if (q->flags & EXEC_QUEUE_FLAG_VM) + if (q->flags & EXEC_QUEUE_FLAG_VM) { lockdep_assert_held(&vm->lock); - else + } else { xe_vm_assert_held(vm); + lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); + } } /** @@ -894,6 +896,33 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, return fence; } +/** + * xe_exec_queue_last_fence_get_for_resume() - Get last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Get last fence, takes a ref. Only safe to be called in the context of + * resuming the hw engine group's long-running exec queue, when the group + * semaphore is held. + * + * Returns: last fence if not signaled, dma fence stub if signaled + */ +struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + struct dma_fence *fence; + + lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); + + if (q->last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) + xe_exec_queue_last_fence_put_unlocked(q); + + fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); + dma_fence_get(fence); + return fence; +} + /** * xe_exec_queue_last_fence_set() - Set last fence * @q: The exec queue diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 99139368ba6e..90c7f73eab88 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -77,6 +77,8 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm); void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e); struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e, struct xe_vm *vm); +struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *e, + struct xe_vm *vm); void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, struct dma_fence *fence); int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, From 2750ff97ee385b85195c5579ee911a551fbb0dd9 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:32 +0200 Subject: [PATCH 056/108] drm/xe/hw_engine_group: Add helper to wait for dma fence jobs This is a required feature for faulting long running jobs not to be submitted while dma fence jobs are running on the hw engine group. v2: Switch to lockdep_assert_held_write in worker, get a proper reference for the last fence (Matt Brost) v3: Directly call dma_fence_put with the fence ref (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-8-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_hw_engine_group.c | 33 +++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 8659332012dd..8d3ddfc4ee82 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -202,3 +202,36 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group up_write(&group->mode_sem); return err; } + +/** + * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete + * @group: The hw engine group + * + * This function is not meant to be called directly from a user IOCTL as dma_fence_wait() + * is not interruptible. + * + * Return: 0 on success, + * -ETIME if waiting for one job failed + */ +static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group) +{ + long timeout; + struct xe_exec_queue *q; + struct dma_fence *fence; + + lockdep_assert_held_write(&group->mode_sem); + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (xe_vm_in_lr_mode(q->vm)) + continue; + + fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm); + timeout = dma_fence_wait(fence, false); + dma_fence_put(fence); + + if (timeout < 0) + return -ETIME; + } + + return 0; +} From 770bd1d341130ff38feda169177159cd78389cfc Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:33 +0200 Subject: [PATCH 057/108] drm/xe/hw_engine_group: Ensure safe transition between execution modes Provide a way to safely transition execution modes of the hw engine group ahead of the actual execution. When necessary, either wait for running jobs to complete or preempt them, thus ensuring mutual exclusion between execution modes. Unlike a mutex, the rw_semaphore used in this context allows multiple submissions in the same mode. v2: Use lockdep_assert_held_write, add annotations (Matt Brost) v3: Fix kernel doc, remove redundant code (Matt Brost) v4: Now that xe_hw_engine_group_suspend_faulting_lr_jobs can fail, propagate the error to the caller (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-9-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_hw_engine_group.c | 75 +++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_hw_engine_group.h | 5 ++ 2 files changed, 80 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 8d3ddfc4ee82..e6c235119351 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -235,3 +235,78 @@ static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group return 0; } + +static int switch_mode(struct xe_hw_engine_group *group) +{ + int err = 0; + enum xe_hw_engine_group_execution_mode new_mode; + + lockdep_assert_held_write(&group->mode_sem); + + switch (group->cur_mode) { + case EXEC_MODE_LR: + new_mode = EXEC_MODE_DMA_FENCE; + err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); + break; + case EXEC_MODE_DMA_FENCE: + new_mode = EXEC_MODE_LR; + err = xe_hw_engine_group_wait_for_dma_fence_jobs(group); + break; + } + + if (err) + return err; + + group->cur_mode = new_mode; + + return 0; +} + +/** + * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode + * @group: The hw engine group + * @new_mode: The new execution mode + * @previous_mode: Pointer to the previous mode provided for use by caller + * + * Return: 0 if successful, -EINTR if locking failed. + */ +int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, + enum xe_hw_engine_group_execution_mode new_mode, + enum xe_hw_engine_group_execution_mode *previous_mode) +__acquires(&group->mode_sem) +{ + int err = down_read_interruptible(&group->mode_sem); + + if (err) + return err; + + *previous_mode = group->cur_mode; + + if (new_mode != group->cur_mode) { + up_read(&group->mode_sem); + err = down_write_killable(&group->mode_sem); + if (err) + return err; + + if (new_mode != group->cur_mode) { + err = switch_mode(group); + if (err) { + up_write(&group->mode_sem); + return err; + } + } + downgrade_write(&group->mode_sem); + } + + return err; +} + +/** + * xe_hw_engine_group_put() - Put the group + * @group: The hw engine group + */ +void xe_hw_engine_group_put(struct xe_hw_engine_group *group) +__releases(&group->mode_sem) +{ + up_read(&group->mode_sem); +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h index 857a83787504..e0deb7c7bb5b 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h @@ -17,4 +17,9 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt); int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); +int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, + enum xe_hw_engine_group_execution_mode new_mode, + enum xe_hw_engine_group_execution_mode *previous_mode); +void xe_hw_engine_group_put(struct xe_hw_engine_group *group); + #endif From d16ef1a18e39a5086a419d8b3c71adb30273881a Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:34 +0200 Subject: [PATCH 058/108] drm/xe/exec: Switch hw engine group execution mode upon job submission If the job about to be submitted is a dma-fence job, update the current execution mode of the hw engine group. This triggers an immediate suspend of the exec queues running faulting long-running jobs. If the job about to be submitted is a long-running job, kick a new worker used to resume the exec queues running faulting long-running jobs once the dma-fence jobs have completed. v2: Kick the resume worker from exec IOCTL, switch to unordered workqueue, destroy it after use (Matt Brost) v3: Do not resume if no exec queue was suspended (Matt Brost) v4: Squash commits (Matt Brost) v5: Do not kick the worker when xe_vm_in_preempt_fence_mode (Matt Brost) Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-10-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_exec.c | 20 +++++++- drivers/gpu/drm/xe/xe_hw_engine_group.c | 62 ++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_hw_engine_group.h | 4 ++ 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index f36980aa26e6..484acfbe0e61 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -14,6 +14,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_hw_engine_group.h" #include "xe_macros.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" @@ -124,6 +125,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) bool write_locked, skip_retry = false; ktime_t end = 0; int err = 0; + struct xe_hw_engine_group *group; + enum xe_hw_engine_group_execution_mode mode, previous_mode; if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || @@ -182,6 +185,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } } + group = q->hwe->hw_engine_group; + mode = xe_hw_engine_group_find_exec_mode(q); + + if (mode == EXEC_MODE_DMA_FENCE) { + err = xe_hw_engine_group_get_mode(group, mode, &previous_mode); + if (err) + goto err_syncs; + } + retry: if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) { err = down_write_killable(&vm->lock); @@ -199,7 +211,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) downgrade_write(&vm->lock); write_locked = false; if (err) - goto err_unlock_list; + goto err_hw_exec_mode; } if (!args->num_batch_buffer) { @@ -312,6 +324,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) spin_unlock(&xe->ttm.lru_lock); } + if (mode == EXEC_MODE_LR) + xe_hw_engine_group_resume_faulting_lr_jobs(group); + err_repin: if (!xe_vm_in_lr_mode(vm)) up_read(&vm->userptr.notifier_lock); @@ -324,6 +339,9 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) up_read(&vm->lock); if (err == -EAGAIN && !skip_retry) goto retry; +err_hw_exec_mode: + if (mode == EXEC_MODE_DMA_FENCE) + xe_hw_engine_group_put(group); err_syncs: while (num_syncs--) xe_sync_entry_cleanup(&syncs[num_syncs]); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index e6c235119351..82750520a90a 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -17,9 +17,36 @@ hw_engine_group_free(struct drm_device *drm, void *arg) { struct xe_hw_engine_group *group = arg; + destroy_workqueue(group->resume_wq); kfree(group); } +static void +hw_engine_group_resume_lr_jobs_func(struct work_struct *w) +{ + struct xe_exec_queue *q; + struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work); + int err; + enum xe_hw_engine_group_execution_mode previous_mode; + + err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); + if (err) + return; + + if (previous_mode == EXEC_MODE_LR) + goto put; + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + q->ops->resume(q); + } + +put: + xe_hw_engine_group_put(group); +} + static struct xe_hw_engine_group * hw_engine_group_alloc(struct xe_device *xe) { @@ -30,7 +57,12 @@ hw_engine_group_alloc(struct xe_device *xe) if (!group) return ERR_PTR(-ENOMEM); + group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); + if (!group->resume_wq) + return ERR_PTR(-ENOMEM); + init_rwsem(&group->mode_sem); + INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); INIT_LIST_HEAD(&group->exec_queue_list); err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); @@ -134,7 +166,7 @@ int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct x if (err) goto err_suspend; - queue_work(group->resume_wq, &group->resume_work); + xe_hw_engine_group_resume_faulting_lr_jobs(group); } list_add(&q->hw_engine_group_link, &group->exec_queue_list); @@ -167,6 +199,16 @@ void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct up_write(&group->mode_sem); } +/** + * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's + * faulting LR jobs + * @group: The hw engine group + */ +void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group) +{ + queue_work(group->resume_wq, &group->resume_work); +} + /** * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group * @group: The hw engine group @@ -177,6 +219,7 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group { int err; struct xe_exec_queue *q; + bool need_resume = false; lockdep_assert_held_write(&group->mode_sem); @@ -184,6 +227,7 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group if (!xe_vm_in_fault_mode(q->vm)) continue; + need_resume = true; q->ops->suspend(q); } @@ -196,6 +240,9 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group goto err_suspend; } + if (need_resume) + xe_hw_engine_group_resume_faulting_lr_jobs(group); + return 0; err_suspend: @@ -310,3 +357,16 @@ __releases(&group->mode_sem) { up_read(&group->mode_sem); } + +/** + * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue + * @q: The exec_queue + */ +enum xe_hw_engine_group_execution_mode +xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q) +{ + if (xe_vm_in_fault_mode(q->vm)) + return EXEC_MODE_LR; + else + return EXEC_MODE_DMA_FENCE; +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h index e0deb7c7bb5b..797ee81acbf2 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h @@ -22,4 +22,8 @@ int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, enum xe_hw_engine_group_execution_mode *previous_mode); void xe_hw_engine_group_put(struct xe_hw_engine_group *group); +enum xe_hw_engine_group_execution_mode +xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q); +void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group); + #endif From 226d92e49a7e7a7f5a3a37a34df7c319b72009fc Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:35 +0200 Subject: [PATCH 059/108] drm/xe/vm: Remove restriction that all VMs must be faulting if one is With this restriction, all VMs on the device must be faulting VMs if there is already one faulting VM, in which case the device is considered in fault mode. This prevents for example an application from running 3D jobs for the compositor while submitting a SVM compute job on the same device. Now that mutual exclusion of faulting LR jobs and dma fence jobs is ensured on the hw engine group, remove this restriction to allow running faulting and non-faulting VMs on the same device. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-11-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index b15348d4809a..cd542de23234 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1762,14 +1762,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && - xe_device_in_non_fault_mode(xe))) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && - xe_device_in_fault_mode(xe))) - return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; From 4099cfda9dd856222d7cab6970a65896375616d5 Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Fri, 9 Aug 2024 17:51:36 +0200 Subject: [PATCH 060/108] drm/xe/device: Remove unused xe_device::usm::num_vm_in_* Those counters were used to keep track of the numbers VMs in fault mode and in non-fault mode, to determine if the whole device was in fault mode or not. This is no longer needed so remove those variables and their usages. Signed-off-by: Francois Dugast Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240809155156.1955925-12-francois.dugast@intel.com --- drivers/gpu/drm/xe/xe_device.h | 10 ---------- drivers/gpu/drm/xe/xe_device_types.h | 4 ---- drivers/gpu/drm/xe/xe_vm.c | 12 ------------ 3 files changed, 26 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 2c96f1b2aafd..f052c06a2d2f 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -139,16 +139,6 @@ static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) void xe_device_assert_mem_access(struct xe_device *xe); -static inline bool xe_device_in_fault_mode(struct xe_device *xe) -{ - return xe->usm.num_vm_in_fault_mode != 0; -} - -static inline bool xe_device_in_non_fault_mode(struct xe_device *xe) -{ - return xe->usm.num_vm_in_non_fault_mode != 0; -} - static inline bool xe_device_has_flat_ccs(struct xe_device *xe) { return xe->info.has_flat_ccs; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 16a24eadd94b..fc89420d0ba6 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -368,10 +368,6 @@ struct xe_device { struct xarray asid_to_vm; /** @usm.next_asid: next ASID, used to cyclical alloc asids */ u32 next_asid; - /** @usm.num_vm_in_fault_mode: number of VM in fault mode */ - u32 num_vm_in_fault_mode; - /** @usm.num_vm_in_non_fault_mode: number of VM in non-fault mode */ - u32 num_vm_in_non_fault_mode; /** @usm.lock: protects UM state */ struct mutex lock; } usm; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index cd542de23234..3cff3ae57643 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1501,13 +1501,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); - mutex_lock(&xe->usm.lock); - if (flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode++; - else if (!(flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode++; - mutex_unlock(&xe->usm.lock); - trace_xe_vm_create(vm); return vm; @@ -1621,11 +1614,6 @@ void xe_vm_close_and_put(struct xe_vm *vm) up_write(&vm->lock); mutex_lock(&xe->usm.lock); - if (vm->flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode--; - else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode--; - if (vm->usm.asid) { void *lookup; From 6b77dab5da721f2fa9d3c3611e1cbaead8030706 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 16 Aug 2024 12:22:48 +0200 Subject: [PATCH 061/108] drm/xe: Remove redundant param from xe_bo_create_user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BO from xe_bo_create_user() will always be of type, ttm_bo_type_device. So remove that redundant parameter. Cc: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240816102248.25628-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/tests/xe_bo.c | 5 ++--- drivers/gpu/drm/xe/tests/xe_dma_buf.c | 2 +- drivers/gpu/drm/xe/tests/xe_migrate.c | 12 +++++++----- drivers/gpu/drm/xe/xe_bo.c | 6 ++---- drivers/gpu/drm/xe/xe_bo.h | 1 - 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index df9fd907edd4..8dac069483e8 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -125,7 +125,7 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, kunit_info(test, "Testing system memory\n"); bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); + bo_flags); if (IS_ERR(bo)) { KUNIT_FAIL(test, "Failed to create bo.\n"); return; @@ -206,7 +206,6 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc xe_vm_lock(vm, false); bo = xe_bo_create_user(xe, NULL, vm, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); xe_vm_unlock(vm); if (IS_ERR(bo)) { @@ -216,7 +215,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc external = xe_bo_create_user(xe, NULL, NULL, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); + bo_flags); if (IS_ERR(external)) { KUNIT_FAIL(test, "external bo create err=%pe\n", external); goto cleanup_bo; diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index c24c8509227e..13db6c0530b3 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -126,7 +126,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) kunit_info(test, "running %s\n", __func__); bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, params->mem_mask); + params->mem_mask); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 47ae9d0b8864..1a192a2a941b 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -641,7 +641,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til long ret; sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, - DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device, + DRM_XE_GEM_CPU_CACHING_WC, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(sys_bo)) { @@ -664,8 +664,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(sys_bo); - ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + DRM_XE_GEM_CPU_CACHING_WC, + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(ccs_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -687,8 +688,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(ccs_bo); - vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + DRM_XE_GEM_CPU_CACHING_WC, + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(vram_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(vram_bo)); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 800119c8fc8d..ce8282e67e84 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1499,11 +1499,10 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u16 cpu_caching, - enum ttm_bo_type type, u32 flags) { struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, - cpu_caching, type, + cpu_caching, ttm_bo_type_device, flags | XE_BO_FLAG_USER); if (!IS_ERR(bo)) xe_bo_unlock_vm_held(bo); @@ -2027,7 +2026,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, } bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, - ttm_bo_type_device, bo_flags); + bo_flags); if (vm) xe_vm_unlock(vm); @@ -2333,7 +2332,6 @@ int xe_bo_dumb_create(struct drm_file *file_priv, bo = xe_bo_create_user(xe, NULL, NULL, args->size, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_NEEDS_CPU_ACCESS); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 1c9dc8adaaa3..935a94279026 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -87,7 +87,6 @@ struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u16 cpu_caching, - enum ttm_bo_type type, u32 flags); struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, From 7116c35aacedc38be6d15bd21b2fc936eed0008b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 14 Aug 2024 12:01:30 +0100 Subject: [PATCH 062/108] drm/xe: prevent UAF around preempt fence The fence lock is part of the queue, therefore in the current design anything locking the fence should then also hold a ref to the queue to prevent the queue from being freed. However, currently it looks like we signal the fence and then drop the queue ref, but if something is waiting on the fence, the waiter is kicked to wake up at some later point, where upon waking up it first grabs the lock before checking the fence state. But if we have already dropped the queue ref, then the lock might already be freed as part of the queue, leading to uaf. To prevent this, move the fence lock into the fence itself so we don't run into lifetime issues. Alternative might be to have device level lock, or only release the queue in the fence release callback, however that might require pushing to another worker to avoid locking issues. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2454 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2342 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2020 Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240814110129.825847-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 1 - drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 -- drivers/gpu/drm/xe/xe_preempt_fence.c | 3 ++- drivers/gpu/drm/xe/xe_preempt_fence_types.h | 2 ++ 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 6d3b44cbc4c7..e53937fafd14 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -620,7 +620,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (xe_vm_in_preempt_fence_mode(vm)) { q->lr.context = dma_fence_context_alloc(1); - spin_lock_init(&q->lr.lock); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 315f874426bd..7deb480e26af 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -126,8 +126,6 @@ struct xe_exec_queue { u32 seqno; /** @lr.link: link into VM's list of exec queues */ struct list_head link; - /** @lr.lock: preemption fences lock */ - spinlock_t lock; } lr; /** @ops: submission backend exec queue operations */ diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 56e709d2fb30..83fbeea5aa20 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -134,8 +134,9 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, { list_del_init(&pfence->link); pfence->q = xe_exec_queue_get(q); + spin_lock_init(&pfence->lock); dma_fence_init(&pfence->base, &preempt_fence_ops, - &q->lr.lock, context, seqno); + &pfence->lock, context, seqno); return &pfence->base; } diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h index b54b5c29b533..312c3372a49f 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -25,6 +25,8 @@ struct xe_preempt_fence { struct xe_exec_queue *q; /** @preempt_work: work struct which issues preemption */ struct work_struct preempt_work; + /** @lock: dma-fence fence lock */ + spinlock_t lock; /** @error: preempt fence is in error state */ int error; }; From 492be2a070f023c66aaef6ebd664567fda28c2a6 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 6 Aug 2024 12:50:43 +0200 Subject: [PATCH 063/108] drm/xe/display: Match i915 driver suspend/resume sequences better Suspend fbdev sooner, and disable user access before suspending to prevent some races. I've noticed this when comparing xe suspend to i915's. Matches the following commits from i915: 24b412b1bfeb ("drm/i915: Disable intel HPD poll after DRM poll init/enable") 1ef28d86bea9 ("drm/i915: Suspend the framebuffer console earlier during system suspend") bd738d859e71 ("drm/i915: Prevent modesets during driver init/shutdown") Thanks to Imre for pointing me to those commits. Driver shutdown is currently missing, but I have some idea how to implement it next. Signed-off-by: Maarten Lankhorst Cc: Imre Deak Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20240806105044.596842-2-maarten.lankhorst@linux.intel.com Signed-off-by: Maarten Lankhorst,,, --- drivers/gpu/drm/xe/display/xe_display.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 982b9c5b440f..8b4fd90a9c35 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -299,8 +299,11 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) * properly. */ intel_power_domains_disable(xe); - if (has_display(xe)) + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + if (has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); + intel_display_driver_disable_user_access(xe); + } if (!runtime) intel_display_driver_suspend(xe); @@ -309,12 +312,13 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_hpd_cancel_work(xe); + if (has_display(xe)) + intel_display_driver_suspend_access(xe); + intel_encoder_suspend_all(&xe->display); intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); - intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); - intel_dmc_suspend(xe); } @@ -354,14 +358,19 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_display_driver_init_hw(xe); intel_hpd_init(xe); + if (has_display(xe)) + intel_display_driver_resume_access(xe); + /* MST sideband requires HPD interrupts enabled */ intel_dp_mst_resume(xe); if (!runtime) intel_display_driver_resume(xe); - intel_hpd_poll_disable(xe); - if (has_display(xe)) + if (has_display(xe)) { drm_kms_helper_poll_enable(&xe->drm); + intel_display_driver_enable_user_access(xe); + } + intel_hpd_poll_disable(xe); intel_opregion_resume(display); From cb8f81c1753187995b7a43e79c12959f14eb32d3 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 6 Aug 2024 12:50:44 +0200 Subject: [PATCH 064/108] drm/xe/display: Make display suspend/resume work on discrete We should unpin before evicting all memory, and repin after GT resume. This way, we preserve the contents of the framebuffers, and won't hang on resume due to migration engine not being restored yet. Signed-off-by: Maarten Lankhorst Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org # v6.8+ Reviewed-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20240806105044.596842-3-maarten.lankhorst@linux.intel.com Signed-off-by: Maarten Lankhorst,,, --- drivers/gpu/drm/xe/display/xe_display.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/xe/xe_pm.c | 11 ++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 8b4fd90a9c35..30dfdac9f6fa 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -287,6 +287,27 @@ static bool suspend_to_idle(void) return false; } +static void xe_display_flush_cleanup_work(struct xe_device *xe) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&xe->drm, crtc) { + struct drm_crtc_commit *commit; + + spin_lock(&crtc->base.commit_lock); + commit = list_first_entry_or_null(&crtc->base.commit_list, + struct drm_crtc_commit, commit_entry); + if (commit) + drm_crtc_commit_get(commit); + spin_unlock(&crtc->base.commit_lock); + + if (commit) { + wait_for_completion(&commit->cleanup_done); + drm_crtc_commit_put(commit); + } + } +} + void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { struct intel_display *display = &xe->display; @@ -308,6 +329,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) if (!runtime) intel_display_driver_suspend(xe); + xe_display_flush_cleanup_work(xe); + intel_dp_mst_suspend(xe); intel_hpd_cancel_work(xe); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 9f3c14fd9f33..fcfb49af8c89 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -93,13 +93,13 @@ int xe_pm_suspend(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); + xe_display_pm_suspend(xe, false); + /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) goto err; - xe_display_pm_suspend(xe, false); - for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); if (err) { @@ -154,11 +154,11 @@ int xe_pm_resume(struct xe_device *xe) xe_irq_resume(xe); - xe_display_pm_resume(xe, false); - for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_resume(xe, false); + err = xe_bo_restore_user(xe); if (err) goto err; @@ -367,10 +367,11 @@ int xe_pm_runtime_suspend(struct xe_device *xe) mutex_unlock(&xe->mem_access.vram_userfault.lock); if (xe->d3cold.allowed) { + xe_display_pm_suspend(xe, true); + err = xe_bo_evict_all(xe); if (err) goto out; - xe_display_pm_suspend(xe, true); } for_each_gt(gt, xe, id) { From 6841a26e2c6715dc52a1cdd888e958dd2a92a5bc Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 19 Aug 2024 11:57:52 +0200 Subject: [PATCH 065/108] drm/xe/oa: Use vma_pages() helper function in xe_oa_mmap() Use the vma_pages() helper function and remove the following Coccinelle/coccicheck warning reported by vma_pages.cocci: WARNING: Consider using vma_pages helper on vma Reviewed-by: Ashutosh Dixit Signed-off-by: Thorsten Blum Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240819095751.539645-2-thorsten.blum@toblux.com --- drivers/gpu/drm/xe/xe_oa.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 3ef92eb8fbb1..4d4541e0b24c 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1244,8 +1244,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, VM_MAYWRITE | VM_MAYEXEC); - xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == - (vma->vm_end - vma->vm_start) >> PAGE_SHIFT); + xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma)); for (i = 0; i < bo->ttm.ttm->num_pages; i++) { ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), PAGE_SIZE, vma->vm_page_prot); From decbfaf06db05fa1f9b33149ebb3c145b44e878f Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 16 Aug 2024 15:51:53 +0200 Subject: [PATCH 066/108] drm/ttm: Add a flag to allow drivers to skip clear-on-free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before releasing backing stores if they want to skip clear-on-free. Cc: Matthew Auld Cc: Thomas Hellström Suggested-by: Christian König Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240816135154.19678-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++------- include/drm/ttm/ttm_tt.h | 6 +++++- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 8504dbe19c1a..935ab3cfd046 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr, } /* Give pages into a specific pool_type */ -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p) +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p, + bool cleared) { unsigned int i, num_pages = 1 << pt->order; - for (i = 0; i < num_pages; ++i) { - if (PageHighMem(p)) - clear_highpage(p + i); - else - clear_page(page_address(p + i)); + if (!cleared) { + for (i = 0; i < num_pages; ++i) { + if (PageHighMem(p)) + clear_highpage(p + i); + else + clear_page(page_address(p + i)); + } } spin_lock(&pt->lock); @@ -394,6 +397,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pgoff_t start_page, pgoff_t end_page) { struct page **pages = &tt->pages[start_page]; + bool cleared = tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE; unsigned int order; pgoff_t i, nr; @@ -407,7 +411,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pt = ttm_pool_select_type(pool, caching, order); if (pt) - ttm_pool_type_give(pt, *pages); + ttm_pool_type_give(pt, *pages, cleared); else ttm_pool_free_page(pool, caching, order, *pages); } diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 2b9d856ff388..cfaf49de2419 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -85,6 +85,9 @@ struct ttm_tt { * fault handling abuses the DMA api a bit and dma_map_attrs can't be * used to assure pgprot always matches. * + * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver handles + * clearing backing store + * * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is * set by TTM after ttm_tt_populate() has successfully returned, and is * then unset when TTM calls ttm_tt_unpopulate(). @@ -94,8 +97,9 @@ struct ttm_tt { #define TTM_TT_FLAG_EXTERNAL BIT(2) #define TTM_TT_FLAG_EXTERNAL_MAPPABLE BIT(3) #define TTM_TT_FLAG_DECRYPTED BIT(4) +#define TTM_TT_FLAG_CLEARED_ON_FREE BIT(5) -#define TTM_TT_FLAG_PRIV_POPULATED BIT(5) +#define TTM_TT_FLAG_PRIV_POPULATED BIT(6) uint32_t page_flags; /** @num_pages: Number of pages in the page array. */ uint32_t num_pages; From 23683061805be368c8d1c7e7ff52abc470cac275 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 16 Aug 2024 15:51:54 +0200 Subject: [PATCH 067/108] drm/xe/lnl: Offload system clear page activity to GPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On LNL because of flat CCS, driver creates migrates job to clear CCS meta data. Extend that to also clear system pages using GPU. Inform TTM to allocate pages without __GFP_ZERO to avoid double page clearing by clearing out TTM_TT_FLAG_ZERO_ALLOC flag and set TTM_TT_FLAG_CLEARED_ON_FREE while freeing to skip ttm pool's clear on free as XE now takes care of clearing pages. If a bo is in system placement such as BO created with DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING and there is a cpu map then for such BO gpu clear will be avoided as there is no dma mapping for such BO at that moment to create migration jobs. Tested this patch api_overhead_benchmark_l0 from https://github.com/intel/compute-benchmarks Without the patch: api_overhead_benchmark_l0 --testFilter=UsmMemoryAllocation: UsmMemoryAllocation(api=l0 type=Host size=4KB) 84.206 us UsmMemoryAllocation(api=l0 type=Host size=1GB) 105775.56 us erf tool top 5 entries: 71.44% api_overhead_be [kernel.kallsyms] [k] clear_page_erms 6.34% api_overhead_be [kernel.kallsyms] [k] __pageblock_pfn_to_page 2.24% api_overhead_be [kernel.kallsyms] [k] cpa_flush 2.15% api_overhead_be [kernel.kallsyms] [k] pages_are_mergeable 1.94% api_overhead_be [kernel.kallsyms] [k] find_next_iomem_res With the patch: api_overhead_benchmark_l0 --testFilter=UsmMemoryAllocation: UsmMemoryAllocation(api=l0 type=Host size=4KB) 79.439 us UsmMemoryAllocation(api=l0 type=Host size=1GB) 98677.75 us Perf tool top 5 entries: 11.16% api_overhead_be [kernel.kallsyms] [k] __pageblock_pfn_to_page 7.85% api_overhead_be [kernel.kallsyms] [k] cpa_flush 7.59% api_overhead_be [kernel.kallsyms] [k] find_next_iomem_res 7.24% api_overhead_be [kernel.kallsyms] [k] pages_are_mergeable 5.53% api_overhead_be [kernel.kallsyms] [k] lookup_address_in_pgd_attr Without this patch clear_page_erms() dominates execution time which is also not pipelined with migration jobs. With this patch page clearing will get pipelined with migration job and will free CPU for more work. v2: Handle regression on dgfx(Himal) Update commit message as no ttm API changes needed. v3: Fix Kunit test. v4: handle data leak on cpu mmap(Thomas) v5: s/gpu_page_clear/gpu_page_clear_sys and move setting it to xe_ttm_sys_mgr_init() and other nits (Matt Auld) v6: Disable it when init_on_alloc and/or init_on_free is active(Matt) Use compute-benchmarks as reporter used it to report this allocation latency issue also a proper test application than mime. In v5, the test showed significant reduction in alloc latency but that is not the case any more, I think this was mostly because previous test was done on IFWI which had low mem BW from CPU. Cc: Himal Prasad Ghimiray Cc: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240816135154.19678-2-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_bo.c | 26 ++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 12 ++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index ce8282e67e84..6ed0e1955215 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -396,6 +396,14 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, caching = ttm_uncached; } + /* + * If the device can support gpu clear system pages then set proper ttm + * flag. Zeroed pages are only required for ttm_bo_type_device so + * unwanted data is not leaked to userspace. + */ + if (ttm_bo->type == ttm_bo_type_device && xe->mem.gpu_page_clear_sys) + page_flags |= TTM_TT_FLAG_CLEARED_ON_FREE; + err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); if (err) { kfree(tt); @@ -417,6 +425,10 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) return 0; + /* Clear TTM_TT_FLAG_ZERO_ALLOC when GPU is set to clear system pages */ + if (tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE) + tt->page_flags &= ~TTM_TT_FLAG_ZERO_ALLOC; + err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx); if (err) return err; @@ -659,8 +671,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, bool needs_clear; bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) && ttm && ttm_tt_is_populated(ttm)) ? true : false; + bool clear_system_pages; int ret = 0; + /* + * Clear TTM_TT_FLAG_CLEARED_ON_FREE on bo creation path when + * moving to system as the bo doesn't have dma_mapping. + */ + if (!old_mem && ttm && !ttm_tt_is_populated(ttm)) + ttm->page_flags &= ~TTM_TT_FLAG_CLEARED_ON_FREE; + /* Bo creation path, moving to system or TT. */ if ((!old_mem && ttm) && !handle_system_ccs) { if (new_mem->mem_type == XE_PL_TT) @@ -683,8 +703,10 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : (!mem_type_is_vram(old_mem_type) && !tt_has_data); + clear_system_pages = ttm && (ttm->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE); needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || - (!ttm && ttm_bo->type == ttm_bo_type_device); + (!ttm && ttm_bo->type == ttm_bo_type_device) || + clear_system_pages; if (new_mem->mem_type == XE_PL_TT) { ret = xe_tt_map_sg(ttm); @@ -796,7 +818,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (move_lacks_source) { u32 flags = 0; - if (mem_type_is_vram(new_mem->mem_type)) + if (mem_type_is_vram(new_mem->mem_type) || clear_system_pages) flags |= XE_MIGRATE_CLEAR_FLAG_FULL; else if (handle_system_ccs) flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index fc89420d0ba6..cb60bc5ec21b 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -339,6 +339,8 @@ struct xe_device { struct xe_mem_region vram; /** @mem.sys_mgr: system TTM manager */ struct ttm_resource_manager sys_mgr; + /** @mem.gpu_page_clear_sys: clear system pages offloaded to GPU */ + bool gpu_page_clear_sys; } mem; /** @sriov: device level virtualization data */ diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c index 9844a8edbfe1..e0ac20f20758 100644 --- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c @@ -117,5 +117,17 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe) ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man); ttm_resource_manager_set_used(man, true); + + /* + * On iGFX device with flat CCS, we clear CCS metadata, let's extend that + * and use GPU to clear pages as well. + * + * Disable this when init_on_free and/or init_on_alloc is on to avoid double + * zeroing pages with CPU and GPU. + */ + if (xe_device_has_flat_ccs(xe) && !IS_DGFX(xe) && + !want_init_on_alloc(GFP_USER) && !want_init_on_free()) + xe->mem.gpu_page_clear_sys = true; + return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe); } From 7586fc52b14e0b8edd0d1f8a434e0de2078b7b2b Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Sat, 17 Aug 2024 02:47:30 +0000 Subject: [PATCH 068/108] drm/xe: Fix missing workqueue destroy in xe_gt_pagefault On driver reload we never free up the memory for the pagefault and access counter workqueues. Add those destroy calls here. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Stuart Summers Reviewed-by: Rodrigo Vivi Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/c9a951505271dc3a7aee76de7656679f69c11518.1723862633.git.stuart.summers@intel.com --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 9292d5468868..b2a7fa55bd18 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -382,6 +382,18 @@ static void pf_queue_work_func(struct work_struct *w) static void acc_queue_work_func(struct work_struct *w); +static void pagefault_fini(void *arg) +{ + struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + + if (!xe->info.has_usm) + return; + + destroy_workqueue(gt->usm.acc_wq); + destroy_workqueue(gt->usm.pf_wq); +} + int xe_gt_pagefault_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -409,10 +421,12 @@ int xe_gt_pagefault_init(struct xe_gt *gt) gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", WQ_UNBOUND | WQ_HIGHPRI, NUM_ACC_QUEUE); - if (!gt->usm.acc_wq) + if (!gt->usm.acc_wq) { + destroy_workqueue(gt->usm.pf_wq); return -ENOMEM; + } - return 0; + return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); } void xe_gt_pagefault_reset(struct xe_gt *gt) From 3338e4f90c143cf32f77d64f464cb7f2c2d24700 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Sat, 17 Aug 2024 02:47:31 +0000 Subject: [PATCH 069/108] drm/xe: Use topology to determine page fault queue size Currently the page fault queue size is hard coded. However the hardware supports faulting for each EU and each CS. For some applications running on hardware with a large number of EUs and CSs, this can result in an overflow of the page fault queue. Add a small calculation to determine the page fault queue size based on the number of EUs and CSs in the platform as detmined by fuses. Signed-off-by: Stuart Summers Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/24d582a3b48c97793b8b6a402f34b4b469471636.1723862633.git.stuart.summers@intel.com --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 54 +++++++++++++++++++++------- drivers/gpu/drm/xe/xe_gt_types.h | 9 +++-- 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index b2a7fa55bd18..401c0527d914 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -287,7 +287,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) PFD_VIRTUAL_ADDR_LO_SHIFT; pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % - PF_QUEUE_NUM_DW; + pf_queue->num_dw; ret = true; } spin_unlock_irq(&pf_queue->lock); @@ -299,7 +299,8 @@ static bool pf_queue_full(struct pf_queue *pf_queue) { lockdep_assert_held(&pf_queue->lock); - return CIRC_SPACE(pf_queue->head, pf_queue->tail, PF_QUEUE_NUM_DW) <= + return CIRC_SPACE(pf_queue->head, pf_queue->tail, + pf_queue->num_dw) <= PF_MSG_LEN_DW; } @@ -312,22 +313,23 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) u32 asid; bool full; - /* - * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 - */ - BUILD_BUG_ON(PF_QUEUE_NUM_DW % PF_MSG_LEN_DW); - if (unlikely(len != PF_MSG_LEN_DW)) return -EPROTO; asid = FIELD_GET(PFD_ASID, msg[1]); pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); + /* + * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 + */ + xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW)); + spin_lock_irqsave(&pf_queue->lock, flags); full = pf_queue_full(pf_queue); if (!full) { memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); - pf_queue->head = (pf_queue->head + len) % PF_QUEUE_NUM_DW; + pf_queue->head = (pf_queue->head + len) % + pf_queue->num_dw; queue_work(gt->usm.pf_wq, &pf_queue->worker); } else { drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); @@ -386,26 +388,54 @@ static void pagefault_fini(void *arg) { struct xe_gt *gt = arg; struct xe_device *xe = gt_to_xe(gt); + int i; if (!xe->info.has_usm) return; destroy_workqueue(gt->usm.acc_wq); destroy_workqueue(gt->usm.pf_wq); + + for (i = 0; i < NUM_PF_QUEUE; ++i) + kfree(gt->usm.pf_queue[i].data); +} + +static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) +{ + xe_dss_mask_t all_dss; + int num_dss, num_eus; + + bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, + XE_MAX_DSS_FUSE_BITS); + + num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); + num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, + XE_MAX_EU_FUSE_BITS) * num_dss; + + /* user can issue separate page faults per EU and per CS */ + pf_queue->num_dw = + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; + + pf_queue->gt = gt; + pf_queue->data = kzalloc(pf_queue->num_dw, GFP_KERNEL); + spin_lock_init(&pf_queue->lock); + INIT_WORK(&pf_queue->worker, pf_queue_work_func); + + return 0; } int xe_gt_pagefault_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - int i; + int i, ret = 0; if (!xe->info.has_usm) return 0; for (i = 0; i < NUM_PF_QUEUE; ++i) { - gt->usm.pf_queue[i].gt = gt; - spin_lock_init(>->usm.pf_queue[i].lock); - INIT_WORK(>->usm.pf_queue[i].worker, pf_queue_work_func); + ret = xe_alloc_pf_queue(gt, >->usm.pf_queue[i]); + if (ret) + return ret; } for (i = 0; i < NUM_ACC_QUEUE; ++i) { gt->usm.acc_queue[i].gt = gt; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 82cf9f631736..31946d7fe701 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -247,9 +247,14 @@ struct xe_gt { struct pf_queue { /** @usm.pf_queue.gt: back pointer to GT */ struct xe_gt *gt; -#define PF_QUEUE_NUM_DW 128 /** @usm.pf_queue.data: data in the page fault queue */ - u32 data[PF_QUEUE_NUM_DW]; + u32 *data; + /** + * @usm.pf_queue.num_dw: number of DWORDS in the page + * fault queue. Dynamically calculated based on the number + * of compute resources available. + */ + u32 num_dw; /** * @usm.pf_queue.tail: tail pointer in DWs for page fault queue, * moved by worker which processes faults (consumer). From df2dbc925fad3274b952b865f85d26d1e056c1cc Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Sat, 17 Aug 2024 02:47:32 +0000 Subject: [PATCH 070/108] drm/xe/guc: Bump the G2H queue size to account for page faults With the increase in the size of the recoverable page fault queue, we want to ensure the initial messages from GuC in the G2H buffer have space while we transfer those out to the actual pf_queue. Bump the G2H queue size to account for this increase in the pf_queue size. Reviewed-by: Matthew Brost Signed-off-by: Stuart Summers Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/4c2b6974801bcffd8a010d838c8733fa4092573d.1723862633.git.stuart.summers@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index beeeb120d1fc..f24dd5223926 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -105,12 +105,20 @@ ct_to_xe(struct xe_guc_ct *ct) * enough space to avoid backpressure on the driver. We increase the size * of the receive buffer (relative to the send) to ensure a G2H response * CTB has a landing spot. + * + * In addition to submissions, the G2H buffer needs to be able to hold + * enough space for recoverable page fault notifications. The number of + * page faults is interrupt driven and can be as much as the number of + * compute resources available. However, most of the actual work for these + * is in a separate page fault worker thread. Therefore we only need to + * make sure the queue has enough space to handle all of the submissions + * and responses and an extra buffer for incoming page faults. */ #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) #define CTB_H2G_BUFFER_SIZE (SZ_4K) -#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) -#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) +#define CTB_G2H_BUFFER_SIZE (SZ_128K) +#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2) /** * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full From 60db6f540af9f93144d5039140aa2ed17171d168 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 15 Aug 2024 12:35:22 -0700 Subject: [PATCH 071/108] drm/xe: Drop HW fence pointer to HW fence ctx The HW fence ctx objects are not ref counted rather tied to the life of an LRC object. HW fences reference the HW fence ctx, HW fences can outlive LRCs thus resulting in UAF. Drop the HW fence pointer to HW fence ctx rather just store what is needed directly in HW fence. v2: - Fix typo in commit (Ashutosh) - Use snprintf (Ashutosh) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240815193522.16008-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_hw_fence.c | 9 +++++---- drivers/gpu/drm/xe/xe_hw_fence_types.h | 7 +++++-- drivers/gpu/drm/xe/xe_trace.h | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 45a9789cf501..0b4f12be3692 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -148,20 +148,20 @@ static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev); + return dev_name(fence->xe->drm.dev); } static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return fence->ctx->name; + return fence->name; } static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - struct xe_device *xe = gt_to_xe(fence->ctx->gt); + struct xe_device *xe = fence->xe; u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); return dma_fence->error || @@ -253,7 +253,8 @@ void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx, struct xe_hw_fence *hw_fence = container_of(fence, typeof(*hw_fence), dma); - hw_fence->ctx = ctx; + hw_fence->xe = gt_to_xe(ctx->gt); + snprintf(hw_fence->name, sizeof(hw_fence->name), "%s", ctx->name); hw_fence->seqno_map = seqno_map; INIT_LIST_HEAD(&hw_fence->irq_link); diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index b33c4956e8ea..364a61f4bfda 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -12,6 +12,7 @@ #include #include +struct xe_device; struct xe_gt; /** @@ -61,8 +62,10 @@ struct xe_hw_fence_ctx { struct xe_hw_fence { /** @dma: base dma fence for hardware fence context */ struct dma_fence dma; - /** @ctx: hardware fence context */ - struct xe_hw_fence_ctx *ctx; + /** @xe: Xe device for hw fence driver name */ + struct xe_device *xe; + /** @name: name of hardware fence context */ + char name[MAX_FENCE_NAME_LEN]; /** @seqno_map: I/O map for seqno */ struct iosys_map seqno_map; /** @irq_link: Link in struct xe_hw_fence_irq.pending */ diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 1abdb30cb7ad..8573d7a87d84 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_ARGS(fence), TP_STRUCT__entry( - __string(dev, __dev_name_gt(fence->ctx->gt)) + __string(dev, __dev_name_xe(fence->xe)) __field(u64, ctx) __field(u32, seqno) __field(struct xe_hw_fence *, fence) From 32a42c93b74c8ca6d0915ea3eba21bceff53042f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 13:23:09 -0700 Subject: [PATCH 072/108] drm/xe: Free job before xe_exec_queue_put Free job depends on job->vm being valid, the last xe_exec_queue_put can destroy the VM. Prevent UAF by freeing job before xe_exec_queue_put. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Nirmoy Das Reviewed-by: Jagmeet Randhawa Link: https://patchwork.freedesktop.org/patch/msgid/20240820202309.1260755-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_sched_job.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 44d534e362cd..9628f9deb3c0 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -171,12 +171,13 @@ void xe_sched_job_destroy(struct kref *ref) struct xe_sched_job *job = container_of(ref, struct xe_sched_job, refcount); struct xe_device *xe = job_to_xe(job); + struct xe_exec_queue *q = job->q; xe_sched_job_free_fences(job); - xe_exec_queue_put(job->q); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); job_free(job); + xe_exec_queue_put(q); xe_pm_runtime_put(xe); } From 77cc3f6c58b1b28cee73904946c46a1415187d04 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 09:01:29 -0700 Subject: [PATCH 073/108] drm/xe: Invalidate media_gt TLBs Testing on LNL has shown media TLBs need to be invalidated via the GuC, update xe_vm_invalidate_vma appropriately. v2: Fix 2 tile case v3: Include missing local change Fixes: 3330361543fc ("drm/xe/lnl: Add LNL platform definition") Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240820160129.986889-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 3cff3ae57643..bfa4880a1673 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3157,9 +3157,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; - u32 tile_needs_invalidate = 0; + struct xe_gt_tlb_invalidation_fence + fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; u8 id; + u32 fence_id = 0; int ret = 0; xe_assert(xe, !xe_vma_is_null(vma)); @@ -3187,27 +3188,37 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (xe_pt_zap_ptes(tile, vma)) { xe_device_wmb(xe); xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[id], true); + &fence[fence_id], + true); - /* - * FIXME: We potentially need to invalidate multiple - * GTs within the tile - */ ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[id], vma); + &fence[fence_id], vma); if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence[id]); + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); goto wait; } + ++fence_id; - tile_needs_invalidate |= BIT(id); + if (!tile->media_gt) + continue; + + xe_gt_tlb_invalidation_fence_init(tile->media_gt, + &fence[fence_id], + true); + + ret = xe_gt_tlb_invalidation_vma(tile->media_gt, + &fence[fence_id], vma); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); + goto wait; + } + ++fence_id; } } wait: - for_each_tile(tile, xe, id) - if (tile_needs_invalidate & BIT(id)) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + for (id = 0; id < fence_id; ++id) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); vma->tile_invalidated = vma->tile_mask; From 40520283e0fd11237ed9dfc0991503b3403d5fa4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 09:16:32 -0700 Subject: [PATCH 074/108] drm/xe: Invalidate media_gt TLBs in PT code Testing on LNL has shown media GT's TLBs need to be invalidated via the GuC, update PT code appropriately. v2: - Do dma_fence_get before first call of invalidation_fence_init (Himal) - No need to check for valid chain fence (Himal) Fixes: 3330361543fc ("drm/xe/lnl: Add LNL platform definition") Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240820161632.987369-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_pt.c | 99 ++++++++++++++++++++++++++++++-------- 1 file changed, 80 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 579ed31b46db..6c6714af3d5d 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include + #include "xe_pt.h" #include "regs/xe_gtt_defs.h" @@ -1849,13 +1851,20 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence) + struct xe_vma *vma, struct dma_fence *fence, + struct dma_fence *fence2) { - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); + if (fence2) + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + } vma->tile_present |= BIT(tile->id); vma->tile_staged &= ~BIT(tile->id); if (xe_vma_is_userptr(vma)) { @@ -1875,13 +1884,20 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence) + struct xe_vma *vma, struct dma_fence *fence, + struct dma_fence *fence2) { - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); + if (fence2) + dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + } vma->tile_present &= ~BIT(tile->id); if (!vma->tile_present) { list_del_init(&vma->combined_links.rebind); @@ -1898,7 +1914,8 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, static void op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence) + struct xe_vma_op *op, struct dma_fence *fence, + struct dma_fence *fence2) { xe_vm_assert_held(vm); @@ -1907,26 +1924,28 @@ static void op_commit(struct xe_vm *vm, if (!op->map.immediate && xe_vm_in_fault_mode(vm)) break; - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence); + bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, + fence2); break; case DRM_GPUVA_OP_REMAP: unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence); + gpuva_to_vma(op->base.remap.unmap->va), fence, + fence2); if (op->remap.prev) bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence); + fence, fence2); if (op->remap.next) bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence); + fence, fence2); break; case DRM_GPUVA_OP_UNMAP: unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence); + gpuva_to_vma(op->base.unmap.va), fence, fence2); break; case DRM_GPUVA_OP_PREFETCH: bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence); + gpuva_to_vma(op->base.prefetch.va), fence, fence2); break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); @@ -1963,7 +1982,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[tile->id]; struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; + struct invalidation_fence *ifence = NULL, *mfence = NULL; + struct dma_fence_chain *chain_fence = NULL; struct xe_range_fence *rfence; struct xe_vma_op *op; int err = 0, i; @@ -1996,6 +2016,18 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) err = -ENOMEM; goto kill_vm_tile1; } + if (tile->media_gt) { + mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); + if (!mfence) { + err = -ENOMEM; + goto free_ifence; + } + chain_fence = dma_fence_chain_alloc(); + if (!chain_fence) { + err = -ENOMEM; + goto free_ifence; + } + } } rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); @@ -2027,19 +2059,46 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) /* tlb invalidation must be done before signaling rebind */ if (ifence) { + if (mfence) + dma_fence_get(fence); invalidation_fence_init(tile->primary_gt, ifence, fence, pt_update_ops->start, pt_update_ops->last, vm->usm.asid); - fence = &ifence->base.base; + if (mfence) { + invalidation_fence_init(tile->media_gt, mfence, fence, + pt_update_ops->start, + pt_update_ops->last, vm->usm.asid); + dma_fence_chain_init(chain_fence, &ifence->base.base, + &mfence->base.base, 0); + fence = &chain_fence->base; + } else { + fence = &ifence->base.base; + } } - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); + if (!mfence) { + dma_resv_add_fence(xe_vm_resv(vm), fence, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence); + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); + } else { + dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + + dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); + + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, + &ifence->base.base, &mfence->base.base); + } if (pt_update_ops->needs_userptr_lock) up_read(&vm->userptr.notifier_lock); @@ -2049,6 +2108,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) free_rfence: kfree(rfence); free_ifence: + dma_fence_chain_free(chain_fence); + kfree(mfence); kfree(ifence); kill_vm_tile1: if (err != -EAGAIN && tile->id) From 321d6b4b9cbe3dd0bc99937d5e5b4d730b5b5798 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 21 Aug 2024 18:19:18 +0100 Subject: [PATCH 075/108] drm/xe: fixup xe_alloc_pf_queue kzalloc expects number of bytes, therefore we should convert the number of dw into bytes, otherwise we are likely just accessing beyond the array causing all kinds of carnage. Also fixup the error handling while we are here. v2: - Prefer kcalloc (dim) Fixes: 3338e4f90c14 ("drm/xe: Use topology to determine page fault queue size") Signed-off-by: Matthew Auld Cc: Stuart Summers Cc: Matthew Brost Reviewed-by: Nirmoy Das Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240821171917.417386-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 401c0527d914..0be4687bfc20 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -417,7 +417,10 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; pf_queue->gt = gt; - pf_queue->data = kzalloc(pf_queue->num_dw, GFP_KERNEL); + pf_queue->data = kcalloc(pf_queue->num_dw, sizeof(u32), GFP_KERNEL); + if (!pf_queue->data) + return -ENOMEM; + spin_lock_init(&pf_queue->lock); INIT_WORK(&pf_queue->worker, pf_queue_work_func); From 69f0925c67c21c1e3ccda152d2b6ce21c363e563 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:31 -0400 Subject: [PATCH 076/108] drm/xe: Removed unused xe_ggtt_printk Apparently this was only useful when enabling ggtt support for the very first time and never used again. It is also not useful now that we have the ggtt_dump available through debugfs. Reviewed-by: Himal Prasad Ghimiray Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 20 -------------------- drivers/gpu/drm/xe/xe_ggtt.h | 1 - 2 files changed, 21 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 0cdbc1296e88..add14f3dea1f 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -314,26 +314,6 @@ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); } -void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) -{ - u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; - u64 addr, scratch_pte; - - scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index); - - printk("%sGlobal GTT:", prefix); - for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { - unsigned int i = addr / XE_PAGE_SIZE; - - xe_tile_assert(ggtt->tile, addr <= U32_MAX); - if (ggtt->gsm[i] == scratch_pte) - continue; - - printk("%s ggtt[0x%08x] = 0x%016llx", - prefix, (u32)addr, ggtt->gsm[i]); - } -} - static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, const struct drm_mm_node *node, const char *description) { diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 6a96fd54bf60..2546bab97507 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -12,7 +12,6 @@ struct drm_printer; int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); -void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node); void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node); From 244fe1666364865154930f34d8df5489df1922b6 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:32 -0400 Subject: [PATCH 077/108] drm/xe: Introduce GGTT documentation Document xe_ggtt and ensure it is part of the built kernel docs. v2: - Accepted all Michal's suggestions - Rebased on top of new set_pte per platform/wa function pointer v3: - Typos and other acronym fixes (Michal) Cc: Matthew Brost Cc: Michal Wajdeczko Reviewed-by: Himal Prasad Ghimiray #v1 Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-2-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- Documentation/gpu/xe/xe_mm.rst | 15 ++++ drivers/gpu/drm/xe/xe_ggtt.c | 136 +++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_ggtt_types.h | 34 ++++++-- 3 files changed, 150 insertions(+), 35 deletions(-) diff --git a/Documentation/gpu/xe/xe_mm.rst b/Documentation/gpu/xe/xe_mm.rst index 6c8fd8b4a466..95864a4502dd 100644 --- a/Documentation/gpu/xe/xe_mm.rst +++ b/Documentation/gpu/xe/xe_mm.rst @@ -7,6 +7,21 @@ Memory Management .. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h :doc: Buffer Objects (BO) +GGTT +==== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_ggtt.c + :doc: Global Graphics Translation Table (GGTT) + +GGTT Internal API +----------------- + +.. kernel-doc:: drivers/gpu/drm/xe/xe_ggtt_types.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/xe/xe_ggtt.c + :internal: + Pagetable building ================== diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index add14f3dea1f..dd5cd0df705c 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -30,6 +30,39 @@ #include "xe_wa.h" #include "xe_wopcm.h" +/** + * DOC: Global Graphics Translation Table (GGTT) + * + * Xe GGTT implements the support for a Global Virtual Address space that is used + * for resources that are accessible to privileged (i.e. kernel-mode) processes, + * and not tied to a specific user-level process. For example, the Graphics + * micro-Controller (GuC) and Display Engine (if present) utilize this Global + * address space. + * + * The Global GTT (GGTT) translates from the Global virtual address to a physical + * address that can be accessed by HW. The GGTT is a flat, single-level table. + * + * Xe implements a simplified version of the GGTT specifically managing only a + * certain range of it that goes from the Write Once Protected Content Memory (WOPCM) + * Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to + * the GuC (Graphics Microcontroller) hardware limitations. The GuC address space + * is limited on both ends of the GGTT, because the GuC shim HW redirects + * accesses to those addresses to other HW areas instead of going through the + * GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size, + * while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things + * simple, instead of checking each object to see if they are accessed by GuC or + * not, we just exclude those areas from the allocator. Additionally, to simplify + * the driver load, we use the maximum WOPCM size in this logic instead of the + * programmed one, so we don't need to wait until the actual size to be + * programmed is determined (which requires FW fetch) before initializing the + * GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs + * depending on the platform) but we can live with this. Another benefit of this + * is the GuC bootrom can't access anything below the WOPCM max size so anything + * the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT + * above the WOPCM max size. Starting the GGTT allocations above the WOPCM max + * give us the correct placement for free. + */ + static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index) { @@ -164,12 +197,16 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; -/* - * Early GGTT initialization, which allows to create new mappings usable by the - * GuC. - * Mappings are not usable by the HW engines, as it doesn't have scratch / +/** + * xe_ggtt_init_early - Early GGTT initialization + * @ggtt: the &xe_ggtt to be initialized + * + * It allows to create new mappings usable by the GuC. + * Mappings are not usable by the HW engines, as it doesn't have scratch nor * initial clear done to it yet. That will happen in the regular, non-early - * GGTT init. + * GGTT initialization. + * + * Return: 0 on success or a negative error code on failure. */ int xe_ggtt_init_early(struct xe_ggtt *ggtt) { @@ -194,29 +231,6 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ggtt->flags |= XE_GGTT_FLAGS_64K; - /* - * 8B per entry, each points to a 4KB page. - * - * The GuC address space is limited on both ends of the GGTT, because - * the GuC shim HW redirects accesses to those addresses to other HW - * areas instead of going through the GGTT. On the bottom end, the GuC - * can't access offsets below the WOPCM size, while on the top side the - * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of - * checking each object to see if they are accessed by GuC or not, we - * just exclude those areas from the allocator. Additionally, to - * simplify the driver load, we use the maximum WOPCM size in this logic - * instead of the programmed one, so we don't need to wait until the - * actual size to be programmed is determined (which requires FW fetch) - * before initializing the GGTT. These simplifications might waste space - * in the GGTT (about 20-25 MBs depending on the platform) but we can - * live with this. - * - * Another benifit of this is the GuC bootrom can't access anything - * below the WOPCM max size so anything the bootom needs to access (e.g. - * a RSA key) needs to be placed in the GGTT above the WOPCM max size. - * Starting the GGTT allocations above the WOPCM max give us the correct - * placement for free. - */ if (ggtt->size > GUC_GGTT_TOP) ggtt->size = GUC_GGTT_TOP; @@ -262,6 +276,12 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) mutex_unlock(&ggtt->lock); } +/** + * xe_ggtt_init - Regular non-early GGTT initialization + * @ggtt: the &xe_ggtt to be initialized + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_init(struct xe_ggtt *ggtt) { struct xe_device *xe = tile_to_xe(ggtt->tile); @@ -382,6 +402,18 @@ void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node) mutex_unlock(&ggtt->lock); } +/** + * xe_ggtt_insert_special_node_locked - Locked version to insert a &drm_mm_node into the GGTT + * @ggtt: the &xe_ggtt where node will be inserted + * @node: the &drm_mm_node to be inserted + * @size: size of the node + * @align: alignment constrain of the node + * @mm_flags: flags to control the node behavior + * + * To be used in cases where ggtt->lock is already taken. + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, u32 size, u32 align, u32 mm_flags) { @@ -389,6 +421,15 @@ int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node mm_flags); } +/** + * xe_ggtt_insert_special_node - Insert a &drm_mm_node into the GGTT + * @ggtt: the &xe_ggtt where node will be inserted + * @node: the &drm_mm_node to be inserted + * @size: size of the node + * @align: alignment constrain of the node + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, u32 size, u32 align) { @@ -402,6 +443,11 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, return ret; } +/** + * xe_ggtt_map_bo - Map the BO into GGTT + * @ggtt: the &xe_ggtt where node will be mapped + * @bo: the &xe_bo to be mapped + */ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; @@ -449,17 +495,39 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, return err; } +/** + * xe_ggtt_insert_bo_at - Insert BO at a specific GGTT space + * @ggtt: the &xe_ggtt where bo will be inserted + * @bo: the &xe_bo to be inserted + * @start: address where it will be inserted + * @end: end of the range where it will be inserted + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); } +/** + * xe_ggtt_insert_bo - Insert BO into GGTT + * @ggtt: the &xe_ggtt where bo will be inserted + * @bo: the &xe_bo to be inserted + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); } +/** + * xe_ggtt_remove_node - Remove a &drm_mm_node from the GGTT + * @ggtt: the &xe_ggtt where node will be removed + * @node: the &drm_mm_node to be removed + * @invalidate: if node needs invalidation upon removal + */ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, bool invalidate) { @@ -488,6 +556,11 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, drm_dev_exit(idx); } +/** + * xe_ggtt_remove_bo - Remove a BO from the GGTT + * @ggtt: the &xe_ggtt where node will be removed + * @bo: the &xe_bo to be removed + */ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { if (XE_WARN_ON(!bo->ggtt_node.size)) @@ -544,6 +617,13 @@ void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vf } #endif +/** + * xe_ggtt_dump - Dump GGTT for debug + * @ggtt: the &xe_ggtt to be dumped + * @p: the &drm_mm_printer helper handle to be used to dump the information + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) { int err; diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index 2245d88d8f39..154de298a4e3 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -13,30 +13,50 @@ struct xe_bo; struct xe_gt; +/** + * struct xe_ggtt - Main GGTT struct + * + * In general, each tile can contains its own Global Graphics Translation Table + * (GGTT) instance. + */ struct xe_ggtt { + /** @tile: Back pointer to tile where this GGTT belongs */ struct xe_tile *tile; - + /** @size: Total size of this GGTT */ u64 size; #define XE_GGTT_FLAGS_64K BIT(0) + /** + * @flags: Flags for this GGTT + * Acceptable flags: + * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. + */ unsigned int flags; - + /** @scratch: Internal object allocation used as a scratch page */ struct xe_bo *scratch; - + /** @lock: Mutex lock to protect GGTT data */ struct mutex lock; - + /** + * @gsm: The iomem pointer to the actual location of the translation + * table located in the GSM for easy PTE manipulation + */ u64 __iomem *gsm; - + /** @pt_ops: Page Table operations per platform */ const struct xe_ggtt_pt_ops *pt_ops; - + /** @mm: The memory manager used to manage individual GGTT allocations */ struct drm_mm mm; - /** @access_count: counts GGTT writes */ unsigned int access_count; }; +/** + * struct xe_ggtt_pt_ops - GGTT Page table operations + * Which can vary from platform to platform. + */ struct xe_ggtt_pt_ops { + /** @pte_encode_bo: Encode PTE address for a given BO */ u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); + /** @ggtt_set_pte: Directly write into GGTT's PTE */ void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); }; From df99acc7ba1be9f111debdb75e00539fed8ad21a Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:33 -0400 Subject: [PATCH 078/108] drm/xe: Remove unnecessary drm_mm.h includes These includes are no longer necessary, and where appropriate are replaced by the linux/types.h one. Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-3-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_migrate.h | 2 +- drivers/gpu/drm/xe/xe_res_cursor.h | 1 - drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 7929cc2425e8..0109866e398a 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -6,7 +6,7 @@ #ifndef _XE_MIGRATE_ #define _XE_MIGRATE_ -#include +#include struct dma_fence; struct iosys_map; diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index 655af89b31a9..dca374b6521c 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -26,7 +26,6 @@ #include -#include #include #include #include diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index f46fd2df84de..f7113cf6109d 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -5,7 +5,6 @@ */ #include -#include #include #include From 6dbd43dcedf3b58a18eb3518e5c19e38a97aa68a Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:34 -0400 Subject: [PATCH 079/108] drm/{i915, xe}: Avoid direct inspection of dpt_vma from outside dpt DPT code is so dependent on i915 vma implementation and it is not ported yet to Xe. This patch limits inspection to DPT's VMA struct to intel_dpt component only, so the Xe GGTT code can evolve. Cc: Matthew Brost Cc: Maarten Lankhorst Cc: Juha-Pekka Heikkila Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-4-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dpt.c | 4 ++++ drivers/gpu/drm/i915/display/intel_dpt.h | 3 +++ drivers/gpu/drm/i915/display/skl_universal_plane.c | 3 ++- drivers/gpu/drm/xe/display/xe_fb_pin.c | 9 +++++++-- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index 73a1918e2537..3a6d99044828 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -317,3 +317,7 @@ void intel_dpt_destroy(struct i915_address_space *vm) i915_vm_put(&dpt->vm); } +u64 intel_dpt_offset(struct i915_vma *dpt_vma) +{ + return dpt_vma->node.start; +} diff --git a/drivers/gpu/drm/i915/display/intel_dpt.h b/drivers/gpu/drm/i915/display/intel_dpt.h index ff18a525bfbe..1f88b0ee17e7 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.h +++ b/drivers/gpu/drm/i915/display/intel_dpt.h @@ -6,6 +6,8 @@ #ifndef __INTEL_DPT_H__ #define __INTEL_DPT_H__ +#include + struct drm_i915_private; struct i915_address_space; @@ -20,5 +22,6 @@ void intel_dpt_suspend(struct drm_i915_private *i915); void intel_dpt_resume(struct drm_i915_private *i915); struct i915_address_space * intel_dpt_create(struct intel_framebuffer *fb); +u64 intel_dpt_offset(struct i915_vma *dpt_vma); #endif /* __INTEL_DPT_H__ */ diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index ba5a628b4757..1cf1d5c8b9dc 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -14,6 +14,7 @@ #include "intel_de.h" #include "intel_display_irq.h" #include "intel_display_types.h" +#include "intel_dpt.h" #include "intel_fb.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" @@ -1162,7 +1163,7 @@ static u32 skl_surf_address(const struct intel_plane_state *plane_state, * within the DPT is always 0. */ drm_WARN_ON(&i915->drm, plane_state->dpt_vma && - plane_state->dpt_vma->node.start); + intel_dpt_offset(plane_state->dpt_vma)); drm_WARN_ON(&i915->drm, offset & 0x1fffff); return offset >> 9; } else { diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d7db44e79eaf..42d431ff14e7 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -377,8 +377,8 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) } /* - * For Xe introduce dummy intel_dpt_create which just return NULL and - * intel_dpt_destroy which does nothing. + * For Xe introduce dummy intel_dpt_create which just return NULL, + * intel_dpt_destroy which does nothing, and fake intel_dpt_ofsset returning 0; */ struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb) { @@ -389,3 +389,8 @@ void intel_dpt_destroy(struct i915_address_space *vm) { return; } + +u64 intel_dpt_offset(struct i915_vma *dpt_vma) +{ + return 0; +} From 6062ea9398d3ec09c521774f9d81f604d1a85fbd Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:35 -0400 Subject: [PATCH 080/108] drm/xe: Encapsulate drm_mm_node inside xe_ggtt_node The xe_ggtt component uses drm_mm to manage the GGTT. The drm_mm_node is just a node inside drm_mm, but in Xe we use that only in the GGTT context. So, this patch encapsulates the drm_mm_node into a xe_ggtt's new struct. This is the first step towards limiting all the drm_mm access through xe_ggtt. The ultimate goal is to have a better control of the node insertion and removal, so the removal can be delegated to a delayed workqueue. v2: Fix includes and typos (Michal and Brost) Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-5-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- .../gpu/drm/xe/compat-i915-headers/i915_vma.h | 7 +- drivers/gpu/drm/xe/display/xe_fb_pin.c | 10 +-- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_bo.h | 6 +- drivers/gpu/drm/xe/xe_bo_types.h | 5 +- drivers/gpu/drm/xe/xe_device_types.h | 2 +- drivers/gpu/drm/xe/xe_ggtt.c | 72 +++++++++---------- drivers/gpu/drm/xe/xe_ggtt.h | 12 ++-- drivers/gpu/drm/xe/xe_ggtt_types.h | 8 +++ drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 39 +++++----- .../gpu/drm/xe/xe_gt_sriov_pf_config_types.h | 5 +- 11 files changed, 90 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h index a20d2638ea7a..3028ac1ba72f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -7,7 +7,8 @@ #define I915_VMA_H #include -#include + +#include "xe_ggtt_types.h" /* We don't want these from i915_drm.h in case of Xe */ #undef I915_TILING_X @@ -19,7 +20,7 @@ struct xe_bo; struct i915_vma { struct xe_bo *bo, *dpt; - struct drm_mm_node node; + struct xe_ggtt_node node; }; #define i915_ggtt_clear_scanout(bo) do { } while (0) @@ -28,7 +29,7 @@ struct i915_vma { static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { - return vma->node.start; + return vma->node.base.start; } #endif diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 42d431ff14e7..a93923fb8721 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -204,7 +204,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) { + if (bo->ggtt_node.base.size && view->type == I915_GTT_VIEW_NORMAL) { vma->node = bo->ggtt_node; } else if (view->type == I915_GTT_VIEW_NORMAL) { u32 x, size = bo->ttm.base.size; @@ -218,7 +218,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, xe->pat.idx[XE_CACHE_NONE]); - ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.start + x, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.base.start + x, pte); } } else { u32 i, ggtt_ofs; @@ -232,7 +232,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (ret) goto out_unlock; - ggtt_ofs = vma->node.start; + ggtt_ofs = vma->node.base.start; for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++) write_ggtt_rotated(bo, ggtt, &ggtt_ofs, @@ -325,8 +325,8 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma) if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) || - vma->bo->ggtt_node.start != vma->node.start) + else if (!drm_mm_node_allocated(&vma->bo->ggtt_node.base) || + vma->bo->ggtt_node.base.start != vma->node.base.start) xe_ggtt_remove_node(ggtt, &vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 6ed0e1955215..1faef9903b87 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1120,7 +1120,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); - if (bo->ggtt_node.size) + if (bo->ggtt_node.base.size) xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); #ifdef CONFIG_PROC_FS diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 935a94279026..9904b410be37 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -194,9 +194,9 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) static inline u32 xe_bo_ggtt_addr(struct xe_bo *bo) { - XE_WARN_ON(bo->ggtt_node.size > bo->size); - XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32)); - return bo->ggtt_node.start; + XE_WARN_ON(bo->ggtt_node.base.size > bo->size); + XE_WARN_ON(bo->ggtt_node.base.start + bo->ggtt_node.base.size > (1ull << 32)); + return bo->ggtt_node.base.start; } int xe_bo_vmap(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index ebc8abf7930a..4b1de9f5be00 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -8,12 +8,13 @@ #include -#include #include #include #include #include +#include "xe_ggtt_types.h" + struct xe_device; struct xe_vm; @@ -39,7 +40,7 @@ struct xe_bo { /** @placement: current placement for this BO */ struct ttm_placement placement; /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ - struct drm_mm_node ggtt_node; + struct xe_ggtt_node ggtt_node; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index cb60bc5ec21b..9f7d46a3260c 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -204,7 +204,7 @@ struct xe_tile { struct xe_memirq memirq; /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ - struct drm_mm_node ggtt_balloon[2]; + struct xe_ggtt_node ggtt_balloon[2]; } vf; } sriov; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index dd5cd0df705c..1cf682d15253 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -351,61 +351,61 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, * @ggtt: the &xe_ggtt where we want to make reservation * @start: the starting GGTT address of the reserved region * @end: then end GGTT address of the reserved region - * @node: the &drm_mm_node to hold reserved GGTT node + * @node: the &xe_ggtt_node to hold reserved GGTT node * * Use xe_ggtt_deballoon() to release a reserved GGTT node. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct drm_mm_node *node) +int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct xe_ggtt_node *node) { int err; xe_tile_assert(ggtt->tile, start < end); xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); - xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(node)); + xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); - node->color = 0; - node->start = start; - node->size = end - start; + node->base.color = 0; + node->base.start = start; + node->base.size = end - start; mutex_lock(&ggtt->lock); - err = drm_mm_reserve_node(&ggtt->mm, node); + err = drm_mm_reserve_node(&ggtt->mm, &node->base); mutex_unlock(&ggtt->lock); if (xe_gt_WARN(ggtt->tile->primary_gt, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", - node->start, node->start + node->size, ERR_PTR(err))) + node->base.start, node->base.start + node->base.size, ERR_PTR(err))) return err; - xe_ggtt_dump_node(ggtt, node, "balloon"); + xe_ggtt_dump_node(ggtt, &node->base, "balloon"); return 0; } /** * xe_ggtt_deballoon - release a reserved GGTT region * @ggtt: the &xe_ggtt where reserved node belongs - * @node: the &drm_mm_node with reserved GGTT region + * @node: the &xe_ggtt_node with reserved GGTT region * * See xe_ggtt_balloon() for details. */ -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node) +void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node) { - if (!drm_mm_node_allocated(node)) + if (!drm_mm_node_allocated(&node->base)) return; - xe_ggtt_dump_node(ggtt, node, "deballoon"); + xe_ggtt_dump_node(ggtt, &node->base, "deballoon"); mutex_lock(&ggtt->lock); - drm_mm_remove_node(node); + drm_mm_remove_node(&node->base); mutex_unlock(&ggtt->lock); } /** - * xe_ggtt_insert_special_node_locked - Locked version to insert a &drm_mm_node into the GGTT + * xe_ggtt_insert_special_node_locked - Locked version to insert a &xe_ggtt_node into the GGTT * @ggtt: the &xe_ggtt where node will be inserted - * @node: the &drm_mm_node to be inserted + * @node: the &xe_ggtt_node to be inserted * @size: size of the node * @align: alignment constrain of the node * @mm_flags: flags to control the node behavior @@ -414,23 +414,23 @@ void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node) * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, +int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags) { - return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0, + return drm_mm_insert_node_generic(&ggtt->mm, &node->base, size, align, 0, mm_flags); } /** - * xe_ggtt_insert_special_node - Insert a &drm_mm_node into the GGTT + * xe_ggtt_insert_special_node - Insert a &xe_ggtt_node into the GGTT * @ggtt: the &xe_ggtt where node will be inserted - * @node: the &drm_mm_node to be inserted + * @node: the &xe_ggtt_node to be inserted * @size: size of the node * @align: alignment constrain of the node * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, +int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u32 size, u32 align) { int ret; @@ -452,7 +452,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; - u64 start = bo->ggtt_node.start; + u64 start = bo->ggtt_node.base.start; u64 offset, pte; for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { @@ -470,9 +470,9 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) alignment = SZ_64K; - if (XE_WARN_ON(bo->ggtt_node.size)) { + if (XE_WARN_ON(bo->ggtt_node.base.size)) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node.base.size == bo->size); return 0; } @@ -482,7 +482,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node.base, bo->size, alignment, 0, start, end, 0); if (!err) xe_ggtt_map_bo(ggtt, bo); @@ -523,12 +523,12 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) } /** - * xe_ggtt_remove_node - Remove a &drm_mm_node from the GGTT + * xe_ggtt_remove_node - Remove a &xe_ggtt_node from the GGTT * @ggtt: the &xe_ggtt where node will be removed - * @node: the &drm_mm_node to be removed + * @node: the &xe_ggtt_node to be removed * @invalidate: if node needs invalidation upon removal */ -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, bool invalidate) { struct xe_device *xe = tile_to_xe(ggtt->tile); @@ -541,9 +541,9 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, mutex_lock(&ggtt->lock); if (bound) - xe_ggtt_clear(ggtt, node->start, node->size); - drm_mm_remove_node(node); - node->size = 0; + xe_ggtt_clear(ggtt, node->base.start, node->base.size); + drm_mm_remove_node(&node->base); + node->base.size = 0; mutex_unlock(&ggtt->lock); if (!bound) @@ -563,11 +563,11 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, */ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - if (XE_WARN_ON(!bo->ggtt_node.size)) + if (XE_WARN_ON(!bo->ggtt_node.base.size)) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node.base.size == bo->size); xe_ggtt_remove_node(ggtt, &bo->ggtt_node, bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); @@ -602,17 +602,17 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node /** * xe_ggtt_assign - assign a GGTT region to the VF * @ggtt: the &xe_ggtt where the node belongs - * @node: the &drm_mm_node to update + * @node: the &xe_ggtt_node to update * @vfid: the VF identifier * * This function is used by the PF driver to assign a GGTT region to the VF. * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some * platforms VFs can't modify that either. */ -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) +void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct xe_ggtt_node *node, u16 vfid) { mutex_lock(&ggtt->lock); - xe_ggtt_assign_locked(ggtt, node, vfid); + xe_ggtt_assign_locked(ggtt, &node->base, vfid); mutex_unlock(&ggtt->lock); } #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 2546bab97507..30a521f7b075 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -13,15 +13,15 @@ struct drm_printer; int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node); -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node); +int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct xe_ggtt_node *node); +void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node); -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, +int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u32 size, u32 align); int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, - struct drm_mm_node *node, + struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags); -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, bool invalidate); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); @@ -32,7 +32,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); #ifdef CONFIG_PCI_IOV -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid); +void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct xe_ggtt_node *node, u16 vfid); #endif #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index 154de298a4e3..af312a7d1031 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -49,6 +49,14 @@ struct xe_ggtt { unsigned int access_count; }; +/** + * struct xe_ggtt_node - A node in GGTT + */ +struct xe_ggtt_node { + /** @base: A drm_mm_node */ + struct drm_mm_node base; +}; + /** * struct xe_ggtt_pt_ops - GGTT Page table operations * Which can vary from platform to platform. diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 227527785afd..f107ae550c0f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -6,6 +6,9 @@ #include #include +/* FIXME: remove this after encapsulating all drm_mm_node access into xe_ggtt */ +#include + #include "abi/guc_actions_sriov_abi.h" #include "abi/guc_klvs_abi.h" @@ -232,14 +235,14 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config) { u32 n = 0; - if (drm_mm_node_allocated(&config->ggtt_region)) { + if (drm_mm_node_allocated(&config->ggtt_region.base)) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region.start); - cfg[n++] = upper_32_bits(config->ggtt_region.start); + cfg[n++] = lower_32_bits(config->ggtt_region.base.start); + cfg[n++] = upper_32_bits(config->ggtt_region.base.start); cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region.size); - cfg[n++] = upper_32_bits(config->ggtt_region.size); + cfg[n++] = lower_32_bits(config->ggtt_region.base.size); + cfg[n++] = upper_32_bits(config->ggtt_region.base.size); } return n; @@ -369,11 +372,11 @@ static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u6 return err ?: err2; } -static void pf_release_ggtt(struct xe_tile *tile, struct drm_mm_node *node) +static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) { struct xe_ggtt *ggtt = tile->mem.ggtt; - if (drm_mm_node_allocated(node)) { + if (drm_mm_node_allocated(&node->base)) { /* * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() * is redundant, as PTE will be implicitly re-assigned to PF by @@ -391,7 +394,7 @@ static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_confi static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct drm_mm_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = &config->ggtt_region; struct xe_tile *tile = gt_to_tile(gt); struct xe_ggtt *ggtt = tile->mem.ggtt; u64 alignment = pf_get_ggtt_alignment(gt); @@ -403,14 +406,14 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) size = round_up(size, alignment); - if (drm_mm_node_allocated(node)) { + if (drm_mm_node_allocated(&node->base)) { err = pf_distribute_config_ggtt(tile, vfid, 0, 0); if (unlikely(err)) return err; pf_release_ggtt(tile, node); } - xe_gt_assert(gt, !drm_mm_node_allocated(node)); + xe_gt_assert(gt, !drm_mm_node_allocated(&node->base)); if (!size) return 0; @@ -421,9 +424,9 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) xe_ggtt_assign(ggtt, node, vfid); xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n", - vfid, node->start, node->start + node->size - 1); + vfid, node->base.start, node->base.start + node->base.size - 1); - err = pf_distribute_config_ggtt(gt->tile, vfid, node->start, node->size); + err = pf_distribute_config_ggtt(gt->tile, vfid, node->base.start, node->base.size); if (unlikely(err)) return err; @@ -433,10 +436,10 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct drm_mm_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = &config->ggtt_region; xe_gt_assert(gt, !xe_gt_is_media_type(gt)); - return drm_mm_node_allocated(node) ? node->size : 0; + return drm_mm_node_allocated(&node->base) ? node->base.size : 0; } /** @@ -2025,13 +2028,13 @@ int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) for (n = 1; n <= total_vfs; n++) { config = >->sriov.pf.vfs[n].config; - if (!drm_mm_node_allocated(&config->ggtt_region)) + if (!drm_mm_node_allocated(&config->ggtt_region.base)) continue; - string_get_size(config->ggtt_region.size, 1, STRING_UNITS_2, buf, sizeof(buf)); + string_get_size(config->ggtt_region.base.size, 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n", - n, config->ggtt_region.start, - config->ggtt_region.start + config->ggtt_region.size - 1, buf); + n, config->ggtt_region.base.start, + config->ggtt_region.base.start + config->ggtt_region.base.size - 1, buf); } return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h index 7bc66656fcc7..a73d9a4b9e64 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -6,8 +6,7 @@ #ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ #define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ -#include - +#include "xe_ggtt_types.h" #include "xe_guc_klv_thresholds_set_types.h" struct xe_bo; @@ -19,7 +18,7 @@ struct xe_bo; */ struct xe_gt_sriov_config { /** @ggtt_region: GGTT region assigned to the VF. */ - struct drm_mm_node ggtt_region; + struct xe_ggtt_node ggtt_region; /** @lmem_obj: LMEM allocation for use by the VF. */ struct xe_bo *lmem_obj; /** @num_ctxs: number of GuC contexts IDs. */ From 0567f18e0757a260031e59487fe01f402c16c0de Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:36 -0400 Subject: [PATCH 081/108] drm/xe: Rename xe_ggtt_node related functions Bring some consistency and prepare for more xe_ggtt_node related functions to be introduced. Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-6-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 8 +- drivers/gpu/drm/xe/xe_ggtt.c | 86 +++++++++++----------- drivers/gpu/drm/xe/xe_ggtt.h | 12 +-- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 4 +- 4 files changed, 54 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index a93923fb8721..db74c3395ef8 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -209,8 +209,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, } else if (view->type == I915_GTT_VIEW_NORMAL) { u32 x, size = bo->ttm.base.size; - ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, - align, 0); + ret = xe_ggtt_node_insert_locked(ggtt, &vma->node, size, align, 0); if (ret) goto out_unlock; @@ -227,8 +226,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, /* display seems to use tiles instead of bytes here, so convert it back.. */ u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE; - ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, - align, 0); + ret = xe_ggtt_node_insert_locked(ggtt, &vma->node, size, align, 0); if (ret) goto out_unlock; @@ -327,7 +325,7 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma) xe_bo_unpin_map_no_vm(vma->dpt); else if (!drm_mm_node_allocated(&vma->bo->ggtt_node.base) || vma->bo->ggtt_node.base.start != vma->node.base.start) - xe_ggtt_remove_node(ggtt, &vma->node, false); + xe_ggtt_node_remove(ggtt, &vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); ttm_bo_unpin(&vma->bo->ttm); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 1cf682d15253..e0da24bb5774 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -403,7 +403,7 @@ void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node) } /** - * xe_ggtt_insert_special_node_locked - Locked version to insert a &xe_ggtt_node into the GGTT + * xe_ggtt_node_insert_locked - Locked version to insert a &xe_ggtt_node into the GGTT * @ggtt: the &xe_ggtt where node will be inserted * @node: the &xe_ggtt_node to be inserted * @size: size of the node @@ -414,15 +414,15 @@ void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node) * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - u32 size, u32 align, u32 mm_flags) +int xe_ggtt_node_insert_locked(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + u32 size, u32 align, u32 mm_flags) { return drm_mm_insert_node_generic(&ggtt->mm, &node->base, size, align, 0, mm_flags); } /** - * xe_ggtt_insert_special_node - Insert a &xe_ggtt_node into the GGTT + * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT * @ggtt: the &xe_ggtt where node will be inserted * @node: the &xe_ggtt_node to be inserted * @size: size of the node @@ -430,19 +430,53 @@ int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct xe_ggtt_node * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - u32 size, u32 align) +int xe_ggtt_node_insert(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + u32 size, u32 align) { int ret; mutex_lock(&ggtt->lock); - ret = xe_ggtt_insert_special_node_locked(ggtt, node, size, - align, DRM_MM_INSERT_HIGH); + ret = xe_ggtt_node_insert_locked(ggtt, node, size, + align, DRM_MM_INSERT_HIGH); mutex_unlock(&ggtt->lock); return ret; } +/** + * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT + * @ggtt: the &xe_ggtt where node will be removed + * @node: the &xe_ggtt_node to be removed + * @invalidate: if node needs invalidation upon removal + */ +void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + bool invalidate) +{ + struct xe_device *xe = tile_to_xe(ggtt->tile); + bool bound; + int idx; + + bound = drm_dev_enter(&xe->drm, &idx); + if (bound) + xe_pm_runtime_get_noresume(xe); + + mutex_lock(&ggtt->lock); + if (bound) + xe_ggtt_clear(ggtt, node->base.start, node->base.size); + drm_mm_remove_node(&node->base); + node->base.size = 0; + mutex_unlock(&ggtt->lock); + + if (!bound) + return; + + if (invalidate) + xe_ggtt_invalidate(ggtt); + + xe_pm_runtime_put(xe); + drm_dev_exit(idx); +} + /** * xe_ggtt_map_bo - Map the BO into GGTT * @ggtt: the &xe_ggtt where node will be mapped @@ -522,40 +556,6 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); } -/** - * xe_ggtt_remove_node - Remove a &xe_ggtt_node from the GGTT - * @ggtt: the &xe_ggtt where node will be removed - * @node: the &xe_ggtt_node to be removed - * @invalidate: if node needs invalidation upon removal - */ -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - bool invalidate) -{ - struct xe_device *xe = tile_to_xe(ggtt->tile); - bool bound; - int idx; - - bound = drm_dev_enter(&xe->drm, &idx); - if (bound) - xe_pm_runtime_get_noresume(xe); - - mutex_lock(&ggtt->lock); - if (bound) - xe_ggtt_clear(ggtt, node->base.start, node->base.size); - drm_mm_remove_node(&node->base); - node->base.size = 0; - mutex_unlock(&ggtt->lock); - - if (!bound) - return; - - if (invalidate) - xe_ggtt_invalidate(ggtt); - - xe_pm_runtime_put(xe); - drm_dev_exit(idx); -} - /** * xe_ggtt_remove_bo - Remove a BO from the GGTT * @ggtt: the &xe_ggtt where node will be removed @@ -569,7 +569,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) /* This BO is not currently in the GGTT */ xe_tile_assert(ggtt->tile, bo->ggtt_node.base.size == bo->size); - xe_ggtt_remove_node(ggtt, &bo->ggtt_node, + xe_ggtt_node_remove(ggtt, &bo->ggtt_node, bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 30a521f7b075..5147930357de 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -16,12 +16,12 @@ int xe_ggtt_init(struct xe_ggtt *ggtt); int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct xe_ggtt_node *node); void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node); -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - u32 size, u32 align); -int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, - struct xe_ggtt_node *node, - u32 size, u32 align, u32 mm_flags); -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, +int xe_ggtt_node_insert(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + u32 size, u32 align); +int xe_ggtt_node_insert_locked(struct xe_ggtt *ggtt, + struct xe_ggtt_node *node, + u32 size, u32 align, u32 mm_flags); +void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, bool invalidate); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index f107ae550c0f..3ca98f8a0eae 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -382,7 +382,7 @@ static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) * is redundant, as PTE will be implicitly re-assigned to PF by * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). */ - xe_ggtt_remove_node(ggtt, node, false); + xe_ggtt_node_remove(ggtt, node, false); } } @@ -418,7 +418,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) if (!size) return 0; - err = xe_ggtt_insert_special_node(ggtt, node, size, alignment); + err = xe_ggtt_node_insert(ggtt, node, size, alignment); if (unlikely(err)) return err; From 8b5ccc9743ab026b12075eb5e3883cc9e42bc683 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:37 -0400 Subject: [PATCH 082/108] drm/xe: Limit drm_mm_node_allocated access to xe_ggtt_node Continue with the encapsulation of drm_mm_node inside xe_ggtt. Cc: Michal Wajdeczko Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-7-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 2 +- drivers/gpu/drm/xe/xe_ggtt.c | 11 +++++++++++ drivers/gpu/drm/xe/xe_ggtt.h | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 12 ++++++------ 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index db74c3395ef8..de4930b67a29 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -323,7 +323,7 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma) if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!drm_mm_node_allocated(&vma->bo->ggtt_node.base) || + else if (!xe_ggtt_node_allocated(&vma->bo->ggtt_node) || vma->bo->ggtt_node.base.start != vma->node.base.start) xe_ggtt_node_remove(ggtt, &vma->node, false); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index e0da24bb5774..7c8bbaa30fca 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -477,6 +477,17 @@ void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, drm_dev_exit(idx); } +/** + * xe_ggtt_node_allocated - Check if node is allocated + * @node: the &xe_ggtt_node to be inspected + * + * Return: True if allocated, False otherwise. + */ +bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) +{ + return drm_mm_node_allocated(&node->base); +} + /** * xe_ggtt_map_bo - Map the BO into GGTT * @ggtt: the &xe_ggtt where node will be mapped diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 5147930357de..f816b3c0732b 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -23,6 +23,7 @@ int xe_ggtt_node_insert_locked(struct xe_ggtt *ggtt, u32 size, u32 align, u32 mm_flags); void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, bool invalidate); +bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 3ca98f8a0eae..947750d97d7d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -235,7 +235,7 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config) { u32 n = 0; - if (drm_mm_node_allocated(&config->ggtt_region.base)) { + if (xe_ggtt_node_allocated(&config->ggtt_region)) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); cfg[n++] = lower_32_bits(config->ggtt_region.base.start); cfg[n++] = upper_32_bits(config->ggtt_region.base.start); @@ -376,7 +376,7 @@ static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) { struct xe_ggtt *ggtt = tile->mem.ggtt; - if (drm_mm_node_allocated(&node->base)) { + if (xe_ggtt_node_allocated(node)) { /* * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() * is redundant, as PTE will be implicitly re-assigned to PF by @@ -406,14 +406,14 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) size = round_up(size, alignment); - if (drm_mm_node_allocated(&node->base)) { + if (xe_ggtt_node_allocated(node)) { err = pf_distribute_config_ggtt(tile, vfid, 0, 0); if (unlikely(err)) return err; pf_release_ggtt(tile, node); } - xe_gt_assert(gt, !drm_mm_node_allocated(&node->base)); + xe_gt_assert(gt, !xe_ggtt_node_allocated(node)); if (!size) return 0; @@ -439,7 +439,7 @@ static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) struct xe_ggtt_node *node = &config->ggtt_region; xe_gt_assert(gt, !xe_gt_is_media_type(gt)); - return drm_mm_node_allocated(&node->base) ? node->base.size : 0; + return xe_ggtt_node_allocated(node) ? node->base.size : 0; } /** @@ -2028,7 +2028,7 @@ int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) for (n = 1; n <= total_vfs; n++) { config = >->sriov.pf.vfs[n].config; - if (!drm_mm_node_allocated(&config->ggtt_region.base)) + if (!xe_ggtt_node_allocated(&config->ggtt_region)) continue; string_get_size(config->ggtt_region.base.size, 1, STRING_UNITS_2, buf, sizeof(buf)); From 1144e0dff5e68907cb8d3e2d64d1c00e2a96d1b2 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:38 -0400 Subject: [PATCH 083/108] drm/xe: Introduce xe_ggtt_largest_hole Introduce a new xe_ggtt_largest_hole helper that attends the SRIOV demand and continue with the goal of limiting drm_mm access to xe_ggtt. v2: Fix a typo (Michal) Cc: Michal Wajdeczko Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-8-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 35 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_ggtt.h | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 23 ++------------ 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7c8bbaa30fca..2d055f489879 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -584,6 +584,41 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } +/** + * xe_ggtt_largest_hole - Largest GGTT hole + * @ggtt: the &xe_ggtt that will be inspected + * @alignment: minimum alignment + * @spare: If not NULL: in: desired memory size to be spared / out: Adjusted possible spare + * + * Return: size of the largest continuous GGTT region + */ +u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare) +{ + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); + u64 hole_start, hole_end, hole_size; + u64 max_hole = 0; + + mutex_lock(&ggtt->lock); + + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + if (spare) + *spare -= min3(*spare, hole_size, max_hole); + max_hole = max(max_hole, hole_size); + } + + mutex_unlock(&ggtt->lock); + + return max_hole; +} + #ifdef CONFIG_PCI_IOV static u64 xe_encode_vfid_pte(u16 vfid) { diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index f816b3c0732b..31060fe7644b 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -29,6 +29,7 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end); void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare); int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 947750d97d7d..1852ff45bea4 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -590,30 +590,11 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, static u64 pf_get_max_ggtt(struct xe_gt *gt) { struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; - const struct drm_mm *mm = &ggtt->mm; - const struct drm_mm_node *entry; u64 alignment = pf_get_ggtt_alignment(gt); u64 spare = pf_get_spare_ggtt(gt); - u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); - u64 hole_start, hole_end, hole_size; - u64 max_hole = 0; + u64 max_hole; - mutex_lock(&ggtt->lock); - - drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { - hole_start = max(hole_start, hole_min_start); - hole_start = ALIGN(hole_start, alignment); - hole_end = ALIGN_DOWN(hole_end, alignment); - if (hole_start >= hole_end) - continue; - hole_size = hole_end - hole_start; - xe_gt_sriov_dbg_verbose(gt, "HOLE start %llx size %lluK\n", - hole_start, hole_size / SZ_1K); - spare -= min3(spare, hole_size, max_hole); - max_hole = max(max_hole, hole_size); - } - - mutex_unlock(&ggtt->lock); + max_hole = xe_ggtt_largest_hole(ggtt, alignment, &spare); xe_gt_sriov_dbg_verbose(gt, "HOLE max %lluK reserved %lluK\n", max_hole / SZ_1K, spare / SZ_1K); From 136367290ea5d7b5d05696189e9fd6162b9d9742 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:39 -0400 Subject: [PATCH 084/108] drm/xe: Introduce xe_ggtt_print_holes Introduce a new xe_ggtt_print_holes helper that attends the SRIOV demand and finishes the goal of limiting drm_mm access to xe_ggtt. Cc: Michal Wajdeczko Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-9-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 40 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_ggtt.h | 1 + drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 25 +------------- 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 2d055f489879..960f5a28b7ed 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -682,3 +682,43 @@ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) mutex_unlock(&ggtt->lock); return err; } + +/** + * xe_ggtt_print_holes - Print holes + * @ggtt: the &xe_ggtt to be inspected + * @alignment: min alignment + * @p: the &drm_printer + * + * Print GGTT ranges that are available and return total size available. + * + * Return: Total available size. + */ +u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p) +{ + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); + u64 hole_start, hole_end, hole_size; + u64 total; + char buf[10]; + + mutex_lock(&ggtt->lock); + + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + total += hole_size; + + string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", + hole_start, hole_end - 1, buf); + } + + mutex_unlock(&ggtt->lock); + + return total; +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 31060fe7644b..67ae5f1602a3 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -32,6 +32,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare); int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); +u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p); #ifdef CONFIG_PCI_IOV void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct xe_ggtt_node *node, u16 vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 1852ff45bea4..e133594cc6bd 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -6,9 +6,6 @@ #include #include -/* FIXME: remove this after encapsulating all drm_mm_node access into xe_ggtt */ -#include - #include "abi/guc_actions_sriov_abi.h" #include "abi/guc_klvs_abi.h" @@ -2103,11 +2100,7 @@ int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p) int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p) { struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; - const struct drm_mm *mm = &ggtt->mm; - const struct drm_mm_node *entry; u64 alignment = pf_get_ggtt_alignment(gt); - u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); - u64 hole_start, hole_end, hole_size; u64 spare, avail, total = 0; char buf[10]; @@ -2116,24 +2109,8 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); spare = pf_get_spare_ggtt(gt); + total = xe_ggtt_print_holes(ggtt, alignment, p); - mutex_lock(&ggtt->lock); - - drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { - hole_start = max(hole_start, hole_min_start); - hole_start = ALIGN(hole_start, alignment); - hole_end = ALIGN_DOWN(hole_end, alignment); - if (hole_start >= hole_end) - continue; - hole_size = hole_end - hole_start; - total += hole_size; - - string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); - drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", - hole_start, hole_end - 1, buf); - } - - mutex_unlock(&ggtt->lock); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); string_get_size(total, 1, STRING_UNITS_2, buf, sizeof(buf)); From 15ca09499bc669b600dcdaf01fc0bf8c55e15b35 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:40 -0400 Subject: [PATCH 085/108] drm/xe: Refactor xe_ggtt balloon functions to make the node clear These operations are related to node. Convert them to the new appropriate name space xe_ggtt_node. v2: Also move arguments around for consistency (Lucas). v3: s/node_balloon/node_insert_balloon and s/node_deballoon/node_remove_balloon (Michal). Reviewed-by: Lucas De Marchi Cc: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-10-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 16 ++++++++-------- drivers/gpu/drm/xe/xe_ggtt.h | 5 +++-- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 12 +++++++----- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 960f5a28b7ed..d21474715bdb 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -347,17 +347,17 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, } /** - * xe_ggtt_balloon - prevent allocation of specified GGTT addresses + * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses * @ggtt: the &xe_ggtt where we want to make reservation + * @node: the &xe_ggtt_node to hold reserved GGTT node * @start: the starting GGTT address of the reserved region * @end: then end GGTT address of the reserved region - * @node: the &xe_ggtt_node to hold reserved GGTT node * - * Use xe_ggtt_deballoon() to release a reserved GGTT node. + * Use xe_ggtt_node_remove_balloon() to release a reserved GGTT node. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct xe_ggtt_node *node) +int xe_ggtt_node_insert_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u64 start, u64 end) { int err; @@ -384,18 +384,18 @@ int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct xe_ggtt_nod } /** - * xe_ggtt_deballoon - release a reserved GGTT region + * xe_ggtt_node_remove_balloon - release a reserved GGTT region * @ggtt: the &xe_ggtt where reserved node belongs * @node: the &xe_ggtt_node with reserved GGTT region * - * See xe_ggtt_balloon() for details. + * See xe_ggtt_node_insert_balloon() for details. */ -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node) +void xe_ggtt_node_remove_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node) { if (!drm_mm_node_allocated(&node->base)) return; - xe_ggtt_dump_node(ggtt, &node->base, "deballoon"); + xe_ggtt_dump_node(ggtt, &node->base, "remove-balloon"); mutex_lock(&ggtt->lock); drm_mm_remove_node(&node->base); diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 67ae5f1602a3..e258a4f381b4 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -13,8 +13,9 @@ struct drm_printer; int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct xe_ggtt_node *node); -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node); +int xe_ggtt_node_insert_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, + u64 start, u64 size); +void xe_ggtt_node_remove_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node); int xe_ggtt_node_insert(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u32 size, u32 align); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 47222bd9988d..39ff4e7f902e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -528,7 +528,8 @@ static int vf_balloon_ggtt(struct xe_gt *gt) start = xe_wopcm_size(xe); end = config->ggtt_base; if (end != start) { - err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[0]); + err = xe_ggtt_node_insert_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[0], + start, end); if (err) goto failed; } @@ -536,7 +537,8 @@ static int vf_balloon_ggtt(struct xe_gt *gt) start = config->ggtt_base + config->ggtt_size; end = GUC_GGTT_TOP; if (end != start) { - err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[1]); + err = xe_ggtt_node_insert_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[1], + start, end); if (err) goto deballoon; } @@ -544,7 +546,7 @@ static int vf_balloon_ggtt(struct xe_gt *gt) return 0; deballoon: - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); + xe_ggtt_node_remove_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); failed: return err; } @@ -555,8 +557,8 @@ static void deballoon_ggtt(struct drm_device *drm, void *arg) struct xe_ggtt *ggtt = tile->mem.ggtt; xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); + xe_ggtt_node_remove_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_remove_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); } /** From 34e804220f693fe6bb1c4e50f27dd855e9313f0c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:41 -0400 Subject: [PATCH 086/108] drm/xe: Make xe_ggtt_node struct independent In some rare cases, the drm_mm node cannot be removed synchronously due to runtime PM conditions. In this situation, the node removal will be delegated to a workqueue that will be able to wake up the device before removing the node. However, in this situation, the lifetime of the xe_ggtt_node cannot be restricted to the lifetime of the parent object. So, this patch introduces the infrastructure so the xe_ggtt_node struct can be allocated in advance and freed when needed. By having the ggtt backpointer, it also ensure that the init function is always called before any attempt to insert or reserve the node in the GGTT. v2: s/xe_ggtt_node_force_fini/xe_ggtt_node_fini and use it internaly (Brost) v3: - Use GF_NOFS for node allocation (CI) - Avoid ggtt argument, now that we have it inside the node (Lucas) - Fix some missed fini cases (CI) v4: - Fix SRIOV critical case where config->ggtt_region was lost (Michal) - Avoid ggtt argument also on removal (missed case on v3) (Michal) - Remove useless checks (Michal) - Return 0 instead of negative errno on a u32 addr. (Michal) - s/xe_ggtt_assign/xe_ggtt_node_assign for coherence, while we are touching it (Michal) v5: - Fix VFs' ggtt_balloon Cc: Matthew Auld Cc: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-11-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- .../gpu/drm/xe/compat-i915-headers/i915_vma.h | 4 +- drivers/gpu/drm/xe/display/xe_fb_pin.c | 39 +++-- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_bo.h | 9 +- drivers/gpu/drm/xe/xe_bo_types.h | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 2 +- drivers/gpu/drm/xe/xe_ggtt.c | 140 +++++++++++++----- drivers/gpu/drm/xe/xe_ggtt.h | 17 +-- drivers/gpu/drm/xe/xe_ggtt_types.h | 8 +- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 49 +++--- .../gpu/drm/xe/xe_gt_sriov_pf_config_types.h | 2 +- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 46 +++--- 12 files changed, 215 insertions(+), 105 deletions(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h index 3028ac1ba72f..bdae8392e125 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -20,7 +20,7 @@ struct xe_bo; struct i915_vma { struct xe_bo *bo, *dpt; - struct xe_ggtt_node node; + struct xe_ggtt_node *node; }; #define i915_ggtt_clear_scanout(bo) do { } while (0) @@ -29,7 +29,7 @@ struct i915_vma { static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { - return vma->node.base.start; + return vma->node->base.start; } #endif diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index de4930b67a29..d650c5ac41a4 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -204,20 +204,28 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node.base.size && view->type == I915_GTT_VIEW_NORMAL) { + if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) { vma->node = bo->ggtt_node; } else if (view->type == I915_GTT_VIEW_NORMAL) { u32 x, size = bo->ttm.base.size; - ret = xe_ggtt_node_insert_locked(ggtt, &vma->node, size, align, 0); - if (ret) + vma->node = xe_ggtt_node_init(ggtt); + if (IS_ERR(vma->node)) { + ret = PTR_ERR(vma->node); goto out_unlock; + } + + ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + if (ret) { + xe_ggtt_node_fini(vma->node); + goto out_unlock; + } for (x = 0; x < size; x += XE_PAGE_SIZE) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, xe->pat.idx[XE_CACHE_NONE]); - ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.base.start + x, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node->base.start + x, pte); } } else { u32 i, ggtt_ofs; @@ -226,11 +234,19 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, /* display seems to use tiles instead of bytes here, so convert it back.. */ u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE; - ret = xe_ggtt_node_insert_locked(ggtt, &vma->node, size, align, 0); - if (ret) + vma->node = xe_ggtt_node_init(ggtt); + if (IS_ERR(vma->node)) { + ret = PTR_ERR(vma->node); goto out_unlock; + } - ggtt_ofs = vma->node.base.start; + ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + if (ret) { + xe_ggtt_node_fini(vma->node); + goto out_unlock; + } + + ggtt_ofs = vma->node->base.start; for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++) write_ggtt_rotated(bo, ggtt, &ggtt_ofs, @@ -318,14 +334,11 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, static void __xe_unpin_fb_vma(struct i915_vma *vma) { - struct xe_device *xe = to_xe_device(vma->bo->ttm.base.dev); - struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; - if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!xe_ggtt_node_allocated(&vma->bo->ggtt_node) || - vma->bo->ggtt_node.base.start != vma->node.base.start) - xe_ggtt_node_remove(ggtt, &vma->node, false); + else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) || + vma->bo->ggtt_node->base.start != vma->node->base.start) + xe_ggtt_node_remove(vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); ttm_bo_unpin(&vma->bo->ttm); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 1faef9903b87..cbe7bf098970 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1120,7 +1120,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); - if (bo->ggtt_node.base.size) + if (bo->ggtt_node && bo->ggtt_node->base.size) xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); #ifdef CONFIG_PROC_FS diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 9904b410be37..dbfb3209615d 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -194,9 +194,12 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) static inline u32 xe_bo_ggtt_addr(struct xe_bo *bo) { - XE_WARN_ON(bo->ggtt_node.base.size > bo->size); - XE_WARN_ON(bo->ggtt_node.base.start + bo->ggtt_node.base.size > (1ull << 32)); - return bo->ggtt_node.base.start; + if (XE_WARN_ON(!bo->ggtt_node)) + return 0; + + XE_WARN_ON(bo->ggtt_node->base.size > bo->size); + XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32)); + return bo->ggtt_node->base.start; } int xe_bo_vmap(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 4b1de9f5be00..2ed558ac2264 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -40,7 +40,7 @@ struct xe_bo { /** @placement: current placement for this BO */ struct ttm_placement placement; /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ - struct xe_ggtt_node ggtt_node; + struct xe_ggtt_node *ggtt_node; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9f7d46a3260c..121a403a1478 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -204,7 +204,7 @@ struct xe_tile { struct xe_memirq memirq; /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ - struct xe_ggtt_node ggtt_balloon[2]; + struct xe_ggtt_node *ggtt_balloon[2]; } vf; } sriov; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index d21474715bdb..58d7cad2425b 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -348,7 +348,6 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, /** * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses - * @ggtt: the &xe_ggtt where we want to make reservation * @node: the &xe_ggtt_node to hold reserved GGTT node * @start: the starting GGTT address of the reserved region * @end: then end GGTT address of the reserved region @@ -357,8 +356,9 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_node_insert_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, u64 start, u64 end) +int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) { + struct xe_ggtt *ggtt = node->ggtt; int err; xe_tile_assert(ggtt->tile, start < end); @@ -385,77 +385,124 @@ int xe_ggtt_node_insert_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, /** * xe_ggtt_node_remove_balloon - release a reserved GGTT region - * @ggtt: the &xe_ggtt where reserved node belongs * @node: the &xe_ggtt_node with reserved GGTT region * * See xe_ggtt_node_insert_balloon() for details. */ -void xe_ggtt_node_remove_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node) +void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node) { - if (!drm_mm_node_allocated(&node->base)) + if (!node || !node->ggtt) return; - xe_ggtt_dump_node(ggtt, &node->base, "remove-balloon"); + if (!drm_mm_node_allocated(&node->base)) + goto free_node; - mutex_lock(&ggtt->lock); + xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); + + mutex_lock(&node->ggtt->lock); drm_mm_remove_node(&node->base); - mutex_unlock(&ggtt->lock); + mutex_unlock(&node->ggtt->lock); + +free_node: + xe_ggtt_node_fini(node); } /** * xe_ggtt_node_insert_locked - Locked version to insert a &xe_ggtt_node into the GGTT - * @ggtt: the &xe_ggtt where node will be inserted * @node: the &xe_ggtt_node to be inserted * @size: size of the node * @align: alignment constrain of the node * @mm_flags: flags to control the node behavior * + * It cannot be called without first having called xe_ggtt_init() once. * To be used in cases where ggtt->lock is already taken. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_node_insert_locked(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, +int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags) { - return drm_mm_insert_node_generic(&ggtt->mm, &node->base, size, align, 0, + return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0, mm_flags); } /** * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT - * @ggtt: the &xe_ggtt where node will be inserted * @node: the &xe_ggtt_node to be inserted * @size: size of the node * @align: alignment constrain of the node * + * It cannot be called without first having called xe_ggtt_init() once. + * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_node_insert(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - u32 size, u32 align) +int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) { int ret; - mutex_lock(&ggtt->lock); - ret = xe_ggtt_node_insert_locked(ggtt, node, size, - align, DRM_MM_INSERT_HIGH); - mutex_unlock(&ggtt->lock); + if (!node || !node->ggtt) + return -ENOENT; + + mutex_lock(&node->ggtt->lock); + ret = xe_ggtt_node_insert_locked(node, size, align, + DRM_MM_INSERT_HIGH); + mutex_unlock(&node->ggtt->lock); return ret; } +/** + * xe_ggtt_node_init - Initialize %xe_ggtt_node struct + * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. + * + * This function will allocated the struct %xe_ggtt_node and return it's pointer. + * This struct will then be freed after the node removal upon xe_ggtt_node_remove() + * or xe_ggtt_node_remove_balloon(). + * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated + * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), + * xe_ggtt_node_insert_balloon() will ensure the node is inserted or reserved in GGTT. + * + * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. + **/ +struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) +{ + struct xe_ggtt_node *node = kzalloc(sizeof(*node), GFP_NOFS); + + if (!node) + return ERR_PTR(-ENOMEM); + + node->ggtt = ggtt; + return node; +} + +/** + * xe_ggtt_node_fini - Forcebly finalize %xe_ggtt_node struct + * @node: the &xe_ggtt_node to be freed + * + * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), + * or xe_ggtt_node_insert_balloon(); and this @node is not going to be reused, then, + * this function needs to be called to free the %xe_ggtt_node struct + **/ +void xe_ggtt_node_fini(struct xe_ggtt_node *node) +{ + kfree(node); +} + /** * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT - * @ggtt: the &xe_ggtt where node will be removed * @node: the &xe_ggtt_node to be removed * @invalidate: if node needs invalidation upon removal */ -void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - bool invalidate) +void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) { + struct xe_ggtt *ggtt = node->ggtt; struct xe_device *xe = tile_to_xe(ggtt->tile); bool bound; int idx; + if (!node || !node->ggtt) + return; + bound = drm_dev_enter(&xe->drm, &idx); if (bound) xe_pm_runtime_get_noresume(xe); @@ -468,23 +515,29 @@ void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, mutex_unlock(&ggtt->lock); if (!bound) - return; + goto free_node; if (invalidate) xe_ggtt_invalidate(ggtt); xe_pm_runtime_put(xe); drm_dev_exit(idx); + +free_node: + xe_ggtt_node_fini(node); } /** - * xe_ggtt_node_allocated - Check if node is allocated + * xe_ggtt_node_allocated - Check if node is allocated in GGTT * @node: the &xe_ggtt_node to be inspected * * Return: True if allocated, False otherwise. */ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) { + if (!node || !node->ggtt) + return false; + return drm_mm_node_allocated(&node->base); } @@ -497,9 +550,14 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; - u64 start = bo->ggtt_node.base.start; + u64 start; u64 offset, pte; + if (XE_WARN_ON(!bo->ggtt_node)) + return; + + start = bo->ggtt_node->base.start; + for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte); @@ -515,9 +573,9 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) alignment = SZ_64K; - if (XE_WARN_ON(bo->ggtt_node.base.size)) { + if (XE_WARN_ON(bo->ggtt_node)) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); return 0; } @@ -526,15 +584,26 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, return err; xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); + + bo->ggtt_node = xe_ggtt_node_init(ggtt); + if (IS_ERR(bo->ggtt_node)) { + err = PTR_ERR(bo->ggtt_node); + goto out; + } + mutex_lock(&ggtt->lock); - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node.base, bo->size, + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, alignment, 0, start, end, 0); - if (!err) + if (err) + xe_ggtt_node_fini(bo->ggtt_node); + else xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) xe_ggtt_invalidate(ggtt); + +out: xe_pm_runtime_put(tile_to_xe(ggtt->tile)); return err; @@ -574,13 +643,13 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) */ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - if (XE_WARN_ON(!bo->ggtt_node.base.size)) + if (XE_WARN_ON(!bo->ggtt_node)) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); - xe_ggtt_node_remove(ggtt, &bo->ggtt_node, + xe_ggtt_node_remove(bo->ggtt_node, bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } @@ -647,7 +716,6 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node /** * xe_ggtt_assign - assign a GGTT region to the VF - * @ggtt: the &xe_ggtt where the node belongs * @node: the &xe_ggtt_node to update * @vfid: the VF identifier * @@ -655,11 +723,11 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some * platforms VFs can't modify that either. */ -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct xe_ggtt_node *node, u16 vfid) +void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) { - mutex_lock(&ggtt->lock); - xe_ggtt_assign_locked(ggtt, &node->base, vfid); - mutex_unlock(&ggtt->lock); + mutex_lock(&node->ggtt->lock); + xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); + mutex_unlock(&node->ggtt->lock); } #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index e258a4f381b4..27e7d67de004 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -13,17 +13,16 @@ struct drm_printer; int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); -int xe_ggtt_node_insert_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, +struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); +void xe_ggtt_node_fini(struct xe_ggtt_node *node); +int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 size); -void xe_ggtt_node_remove_balloon(struct xe_ggtt *ggtt, struct xe_ggtt_node *node); +void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node); -int xe_ggtt_node_insert(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - u32 size, u32 align); -int xe_ggtt_node_insert_locked(struct xe_ggtt *ggtt, - struct xe_ggtt_node *node, +int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align); +int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, u32 size, u32 align, u32 mm_flags); -void xe_ggtt_node_remove(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, - bool invalidate); +void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); @@ -36,7 +35,7 @@ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p); #ifdef CONFIG_PCI_IOV -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct xe_ggtt_node *node, u16 vfid); +void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid); #endif #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index af312a7d1031..0e8822ae13fc 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -50,9 +50,15 @@ struct xe_ggtt { }; /** - * struct xe_ggtt_node - A node in GGTT + * struct xe_ggtt_node - A node in GGTT. + * + * This struct needs to be initialized (only-once) with xe_ggtt_node_init() before any node + * insertion, reservation, or 'ballooning'. + * It will, then, be finalized by either xe_ggtt_node_remove() or xe_ggtt_node_deballoon(). */ struct xe_ggtt_node { + /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */ + struct xe_ggtt *ggtt; /** @base: A drm_mm_node */ struct drm_mm_node base; }; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index e133594cc6bd..c8835d9eead6 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -232,14 +232,14 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config) { u32 n = 0; - if (xe_ggtt_node_allocated(&config->ggtt_region)) { + if (xe_ggtt_node_allocated(config->ggtt_region)) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region.base.start); - cfg[n++] = upper_32_bits(config->ggtt_region.base.start); + cfg[n++] = lower_32_bits(config->ggtt_region->base.start); + cfg[n++] = upper_32_bits(config->ggtt_region->base.start); cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region.base.size); - cfg[n++] = upper_32_bits(config->ggtt_region.base.size); + cfg[n++] = lower_32_bits(config->ggtt_region->base.size); + cfg[n++] = upper_32_bits(config->ggtt_region->base.size); } return n; @@ -371,27 +371,26 @@ static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u6 static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) { - struct xe_ggtt *ggtt = tile->mem.ggtt; - if (xe_ggtt_node_allocated(node)) { /* * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() * is redundant, as PTE will be implicitly re-assigned to PF by * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). */ - xe_ggtt_node_remove(ggtt, node, false); + xe_ggtt_node_remove(node, false); } } static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config) { - pf_release_ggtt(gt_to_tile(gt), &config->ggtt_region); + pf_release_ggtt(gt_to_tile(gt), config->ggtt_region); + config->ggtt_region = NULL; } static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct xe_ggtt_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = config->ggtt_region; struct xe_tile *tile = gt_to_tile(gt); struct xe_ggtt *ggtt = tile->mem.ggtt; u64 alignment = pf_get_ggtt_alignment(gt); @@ -415,25 +414,33 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) if (!size) return 0; - err = xe_ggtt_node_insert(ggtt, node, size, alignment); - if (unlikely(err)) - return err; + node = xe_ggtt_node_init(ggtt); + if (IS_ERR(node)) + return PTR_ERR(node); - xe_ggtt_assign(ggtt, node, vfid); + err = xe_ggtt_node_insert(node, size, alignment); + if (unlikely(err)) + goto err; + + xe_ggtt_assign(node, vfid); xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n", vfid, node->base.start, node->base.start + node->base.size - 1); err = pf_distribute_config_ggtt(gt->tile, vfid, node->base.start, node->base.size); if (unlikely(err)) - return err; + goto err; + config->ggtt_region = node; return 0; +err: + xe_ggtt_node_fini(node); + return err; } static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct xe_ggtt_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = config->ggtt_region; xe_gt_assert(gt, !xe_gt_is_media_type(gt)); return xe_ggtt_node_allocated(node) ? node->base.size : 0; @@ -2006,13 +2013,15 @@ int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) for (n = 1; n <= total_vfs; n++) { config = >->sriov.pf.vfs[n].config; - if (!xe_ggtt_node_allocated(&config->ggtt_region)) + if (!xe_ggtt_node_allocated(config->ggtt_region)) continue; - string_get_size(config->ggtt_region.base.size, 1, STRING_UNITS_2, buf, sizeof(buf)); + string_get_size(config->ggtt_region->base.size, 1, STRING_UNITS_2, + buf, sizeof(buf)); drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n", - n, config->ggtt_region.base.start, - config->ggtt_region.base.start + config->ggtt_region.base.size - 1, buf); + n, config->ggtt_region->base.start, + config->ggtt_region->base.start + config->ggtt_region->base.size - 1, + buf); } return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h index a73d9a4b9e64..2d3b73d78f14 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -18,7 +18,7 @@ struct xe_bo; */ struct xe_gt_sriov_config { /** @ggtt_region: GGTT region assigned to the VF. */ - struct xe_ggtt_node ggtt_region; + struct xe_ggtt_node *ggtt_region; /** @lmem_obj: LMEM allocation for use by the VF. */ struct xe_bo *lmem_obj; /** @num_ctxs: number of GuC contexts IDs. */ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 39ff4e7f902e..4ebc82e607af 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -495,6 +495,25 @@ u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt) return gt->sriov.vf.self_config.lmem_size; } +static struct xe_ggtt_node * +vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end) +{ + struct xe_ggtt_node *node; + int err; + + node = xe_ggtt_node_init(ggtt); + if (IS_ERR(node)) + return node; + + err = xe_ggtt_node_insert_balloon(node, start, end); + if (err) { + xe_ggtt_node_fini(node); + return ERR_PTR(err); + } + + return node; +} + static int vf_balloon_ggtt(struct xe_gt *gt) { struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; @@ -502,7 +521,6 @@ static int vf_balloon_ggtt(struct xe_gt *gt) struct xe_ggtt *ggtt = tile->mem.ggtt; struct xe_device *xe = gt_to_xe(gt); u64 start, end; - int err; xe_gt_assert(gt, IS_SRIOV_VF(xe)); xe_gt_assert(gt, !xe_gt_is_media_type(gt)); @@ -528,37 +546,31 @@ static int vf_balloon_ggtt(struct xe_gt *gt) start = xe_wopcm_size(xe); end = config->ggtt_base; if (end != start) { - err = xe_ggtt_node_insert_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[0], - start, end); - if (err) - goto failed; + tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) + return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); } start = config->ggtt_base + config->ggtt_size; end = GUC_GGTT_TOP; if (end != start) { - err = xe_ggtt_node_insert_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[1], - start, end); - if (err) - goto deballoon; + tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); + return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); + } } return 0; - -deballoon: - xe_ggtt_node_remove_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); -failed: - return err; } static void deballoon_ggtt(struct drm_device *drm, void *arg) { struct xe_tile *tile = arg; - struct xe_ggtt *ggtt = tile->mem.ggtt; xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - xe_ggtt_node_remove_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_node_remove_balloon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); } /** From 919bb54e989c1edef87e9797be125c94c450fc65 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:42 -0400 Subject: [PATCH 087/108] drm/xe: Fix missing runtime outer protection for ggtt_remove_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defer the ggtt node removal to a thread if runtime_pm is not active. The ggtt node removal can be called from multiple places, including places where we cannot protect with outer callers and places we are within other locks. So, try to grab the runtime reference if the device is already active, otherwise defer the removal to a separate thread from where we are sure we can wake the device up. v2: - use xe wq instead of system wq (Matt and CI) - Avoid GFP_KERNEL to be future proof since this removal can be called from outside our drivers and we don't want to block if atomic is needed. (Brost) v3: amend forgot chunk declaring xe_device. v4: Use a xe_ggtt_region to encapsulate the node and remova info, wihtout the need for any memory allocation at runtime. v5: Actually fill the delayed_removal.invalidate (Brost) v6: - Ensure that ggtt_region is not freed before work finishes (Auld) - Own wq to ensures that the queued works are flushed before ggtt_fini (Brost) v7: also free ggtt_region on early !bound return (Auld) v8: Address the null deref (CI) v9: Based on the new xe_ggtt_node for the proper care of the lifetime of the object. v10: Redo the lost v5 change. (Brost) v11: Simplify the invalidate_on_remove (Lucas) Cc: Matthew Auld Cc: Paulo Zanoni Cc: Francois Dugast Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-12-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 106 ++++++++++++++++++----------- drivers/gpu/drm/xe/xe_ggtt_types.h | 6 ++ 2 files changed, 73 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 58d7cad2425b..b4a0cd2b62ed 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -161,6 +161,7 @@ static void ggtt_fini_early(struct drm_device *drm, void *arg) { struct xe_ggtt *ggtt = arg; + destroy_workqueue(ggtt->wq); mutex_destroy(&ggtt->lock); drm_mm_takedown(&ggtt->mm); } @@ -242,6 +243,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) else ggtt->pt_ops = &xelp_pt_ops; + ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0); + drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), ggtt->size - xe_wopcm_size(xe)); mutex_init(&ggtt->lock); @@ -276,6 +279,68 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) mutex_unlock(&ggtt->lock); } +static void ggtt_node_remove(struct xe_ggtt_node *node) +{ + struct xe_ggtt *ggtt = node->ggtt; + struct xe_device *xe = tile_to_xe(ggtt->tile); + bool bound; + int idx; + + if (!node || !node->ggtt) + return; + + bound = drm_dev_enter(&xe->drm, &idx); + + mutex_lock(&ggtt->lock); + if (bound) + xe_ggtt_clear(ggtt, node->base.start, node->base.size); + drm_mm_remove_node(&node->base); + node->base.size = 0; + mutex_unlock(&ggtt->lock); + + if (!bound) + goto free_node; + + if (node->invalidate_on_remove) + xe_ggtt_invalidate(ggtt); + + drm_dev_exit(idx); + +free_node: + xe_ggtt_node_fini(node); +} + +static void ggtt_node_remove_work_func(struct work_struct *work) +{ + struct xe_ggtt_node *node = container_of(work, typeof(*node), + delayed_removal_work); + struct xe_device *xe = tile_to_xe(node->ggtt->tile); + + xe_pm_runtime_get(xe); + ggtt_node_remove(node); + xe_pm_runtime_put(xe); +} + +/** + * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT + * @node: the &xe_ggtt_node to be removed + * @invalidate: if node needs invalidation upon removal + */ +void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) +{ + struct xe_ggtt *ggtt = node->ggtt; + struct xe_device *xe = tile_to_xe(ggtt->tile); + + node->invalidate_on_remove = invalidate; + + if (xe_pm_runtime_get_if_active(xe)) { + ggtt_node_remove(node); + xe_pm_runtime_put(xe); + } else { + queue_work(ggtt->wq, &node->delayed_removal_work); + } +} + /** * xe_ggtt_init - Regular non-early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -471,7 +536,9 @@ struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) if (!node) return ERR_PTR(-ENOMEM); + INIT_WORK(&node->delayed_removal_work, ggtt_node_remove_work_func); node->ggtt = ggtt; + return node; } @@ -488,45 +555,6 @@ void xe_ggtt_node_fini(struct xe_ggtt_node *node) kfree(node); } -/** - * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT - * @node: the &xe_ggtt_node to be removed - * @invalidate: if node needs invalidation upon removal - */ -void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) -{ - struct xe_ggtt *ggtt = node->ggtt; - struct xe_device *xe = tile_to_xe(ggtt->tile); - bool bound; - int idx; - - if (!node || !node->ggtt) - return; - - bound = drm_dev_enter(&xe->drm, &idx); - if (bound) - xe_pm_runtime_get_noresume(xe); - - mutex_lock(&ggtt->lock); - if (bound) - xe_ggtt_clear(ggtt, node->base.start, node->base.size); - drm_mm_remove_node(&node->base); - node->base.size = 0; - mutex_unlock(&ggtt->lock); - - if (!bound) - goto free_node; - - if (invalidate) - xe_ggtt_invalidate(ggtt); - - xe_pm_runtime_put(xe); - drm_dev_exit(idx); - -free_node: - xe_ggtt_node_fini(node); -} - /** * xe_ggtt_node_allocated - Check if node is allocated in GGTT * @node: the &xe_ggtt_node to be inspected diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index 0e8822ae13fc..cb02b7994a9a 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -47,6 +47,8 @@ struct xe_ggtt { struct drm_mm mm; /** @access_count: counts GGTT writes */ unsigned int access_count; + /** @wq: Dedicated unordered work queue to process node removals */ + struct workqueue_struct *wq; }; /** @@ -61,6 +63,10 @@ struct xe_ggtt_node { struct xe_ggtt *ggtt; /** @base: A drm_mm_node */ struct drm_mm_node base; + /** @delayed_removal_work: The work struct for the delayed removal */ + struct work_struct delayed_removal_work; + /** @invalidate_on_remove: If it needs invalidation upon removal */ + bool invalidate_on_remove; }; /** From 25ebe10e3f4c897a2d348a9d4b674ee9ea28225c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 23 Aug 2024 09:22:07 -0700 Subject: [PATCH 088/108] Revert "drm/xe: Invalidate media_gt TLBs in PT code" This reverts commit 40520283e0fd11237ed9dfc0991503b3403d5fa4. We can't install dma-fence-chain in timeline sync objs. Signed-off-by: Matthew Brost Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240823162207.2168887-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_pt.c | 99 ++++++++------------------------------ 1 file changed, 19 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 6c6714af3d5d..579ed31b46db 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -3,8 +3,6 @@ * Copyright © 2022 Intel Corporation */ -#include - #include "xe_pt.h" #include "regs/xe_gtt_defs.h" @@ -1851,20 +1849,13 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) + struct xe_vma *vma, struct dma_fence *fence) { - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } vma->tile_present |= BIT(tile->id); vma->tile_staged &= ~BIT(tile->id); if (xe_vma_is_userptr(vma)) { @@ -1884,20 +1875,13 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) + struct xe_vma *vma, struct dma_fence *fence) { - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { + if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } vma->tile_present &= ~BIT(tile->id); if (!vma->tile_present) { list_del_init(&vma->combined_links.rebind); @@ -1914,8 +1898,7 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, static void op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence, - struct dma_fence *fence2) + struct xe_vma_op *op, struct dma_fence *fence) { xe_vm_assert_held(vm); @@ -1924,28 +1907,26 @@ static void op_commit(struct xe_vm *vm, if (!op->map.immediate && xe_vm_in_fault_mode(vm)) break; - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); + bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence); break; case DRM_GPUVA_OP_REMAP: unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); + gpuva_to_vma(op->base.remap.unmap->va), fence); if (op->remap.prev) bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); + fence); if (op->remap.next) bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); + fence); break; case DRM_GPUVA_OP_UNMAP: unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); + gpuva_to_vma(op->base.unmap.va), fence); break; case DRM_GPUVA_OP_PREFETCH: bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); + gpuva_to_vma(op->base.prefetch.va), fence); break; default: drm_warn(&vm->xe->drm, "NOT POSSIBLE"); @@ -1982,8 +1963,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[tile->id]; struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence_chain *chain_fence = NULL; + struct invalidation_fence *ifence = NULL; struct xe_range_fence *rfence; struct xe_vma_op *op; int err = 0, i; @@ -2016,18 +1996,6 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) err = -ENOMEM; goto kill_vm_tile1; } - if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - chain_fence = dma_fence_chain_alloc(); - if (!chain_fence) { - err = -ENOMEM; - goto free_ifence; - } - } } rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); @@ -2059,46 +2027,19 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) /* tlb invalidation must be done before signaling rebind */ if (ifence) { - if (mfence) - dma_fence_get(fence); invalidation_fence_init(tile->primary_gt, ifence, fence, pt_update_ops->start, pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - dma_fence_chain_init(chain_fence, &ifence->base.base, - &mfence->base.base, 0); - fence = &chain_fence->base; - } else { - fence = &ifence->base.base; - } + fence = &ifence->base.base; } - if (!mfence) { - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); + dma_resv_add_fence(xe_vm_resv(vm), fence, + pt_update_ops->wait_vm_bookkeep ? + DMA_RESV_USAGE_KERNEL : + DMA_RESV_USAGE_BOOKKEEP); - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); - } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); - } + list_for_each_entry(op, &vops->list, link) + op_commit(vops->vm, tile, pt_update_ops, op, fence); if (pt_update_ops->needs_userptr_lock) up_read(&vm->userptr.notifier_lock); @@ -2108,8 +2049,6 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) free_rfence: kfree(rfence); free_ifence: - dma_fence_chain_free(chain_fence); - kfree(mfence); kfree(ifence); kill_vm_tile1: if (err != -EAGAIN && tile->id) From 5b993d00d7f0c970a5e5d34c1031069fb13b6986 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 10:29:53 -0700 Subject: [PATCH 089/108] drm/xe: Move ggtt_fini to devm managed ggtt->scratch is destroyed via devm, ggtt_fini sets ggtt->scratch to NULL, ggtt->scratch in GGTT clears, so ensure ggtt->scratch is set NULL before the BO is destroyed. Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240820172958.1095143-2-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_ggtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index b4a0cd2b62ed..a3ce91decdce 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -166,7 +166,7 @@ static void ggtt_fini_early(struct drm_device *drm, void *arg) drm_mm_takedown(&ggtt->mm); } -static void ggtt_fini(struct drm_device *drm, void *arg) +static void ggtt_fini(void *arg) { struct xe_ggtt *ggtt = arg; @@ -374,7 +374,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) xe_ggtt_initial_clear(ggtt); - return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt); + return devm_add_action_or_reset(xe->drm.dev, ggtt_fini, ggtt); err: ggtt->scratch = NULL; return err; From b5de6a5ced074910b8fe57d3b0ab7f8843f85a3a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 10:29:54 -0700 Subject: [PATCH 090/108] drm/xe: Set firmware state to loadable before registering guc_fini_hw The guc_fini_hw registered calls __xe_uc_fw_status which is only expected to be called after initializing fw state. Move this before registering guc_fini_hw. Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240820172958.1095143-3-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index de0fe9e65746..52df28032a6f 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -350,6 +350,8 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); if (ret) goto out; @@ -358,8 +360,6 @@ int xe_guc_init(struct xe_guc *guc) xe_guc_comm_init_early(guc); - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); - return 0; out: From a323782567812ee925e9b7926445532c7afe331b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 10:29:55 -0700 Subject: [PATCH 091/108] drm/xe: Drop warn on xe_guc_pc_gucrc_disable in guc pc fini Not a big deal if CT is down as driver is unloading, no need to warn. Signed-off-by: Matthew Brost Reviewed-by: Jagmeet Randhawa Link: https://patchwork.freedesktop.org/patch/msgid/20240820172958.1095143-4-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_pc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 32e93a8127d4..def503abeed5 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -1042,7 +1042,7 @@ static void xe_guc_pc_fini_hw(void *arg) return; XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); - XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); + xe_guc_pc_gucrc_disable(pc); XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ From 6eb2aad402cc94ab4dd3780d8fd94c5c9bb7ed4a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 10:29:56 -0700 Subject: [PATCH 092/108] drm/xe: Move hw_engine_fini to devm managed Kernel BOs are destroyed with GGTT mappings, this is hardware interaction so use devm. Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240820172958.1095143-5-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_hw_engine.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index aa9b2b16a6e0..18980238a2ea 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -267,7 +267,7 @@ static const struct engine_info engine_infos[] = { }, }; -static void hw_engine_fini(struct drm_device *drm, void *arg) +static void hw_engine_fini(void *arg) { struct xe_hw_engine *hwe = arg; @@ -585,7 +585,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) gt->usm.reserved_bcs_instance = hwe->instance; - return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); + return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); err_kernel_lrc: xe_lrc_put(hwe->kernel_lrc); From 501d94389310bb282915e730386d1150b13ae321 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 10:29:58 -0700 Subject: [PATCH 093/108] drm/xe: Update xe_sa to use xe_managed_bo_create_pin_map Preferred way to create kernel BOs is xe_managed_bo_create_pin_map, use it. Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240820172958.1095143-7-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_sa.c | 13 ++++++------- drivers/gpu/drm/xe/xe_sa_types.h | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index f3060979e63f..fe2cb2a96f78 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -25,10 +25,9 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) drm_suballoc_manager_fini(&sa_manager->base); - if (bo->vmap.is_iomem) + if (sa_manager->is_iomem) kvfree(sa_manager->cpu_ptr); - xe_bo_unpin_map_no_vm(bo); sa_manager->bo = NULL; } @@ -47,16 +46,17 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 sa_manager->bo = NULL; - bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo = xe_managed_bo_create_pin_map(xe, tile, size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) { drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", PTR_ERR(bo)); return (struct xe_sa_manager *)bo; } sa_manager->bo = bo; + sa_manager->is_iomem = bo->vmap.is_iomem; drm_suballoc_manager_init(&sa_manager->base, managed_size, align); sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); @@ -64,7 +64,6 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 if (bo->vmap.is_iomem) { sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); if (!sa_manager->cpu_ptr) { - xe_bo_unpin_map_no_vm(sa_manager->bo); sa_manager->bo = NULL; return ERR_PTR(-ENOMEM); } diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h index 2ef896aeca1d..2b070ff1292e 100644 --- a/drivers/gpu/drm/xe/xe_sa_types.h +++ b/drivers/gpu/drm/xe/xe_sa_types.h @@ -14,6 +14,7 @@ struct xe_sa_manager { struct xe_bo *bo; u64 gpu_addr; void *cpu_ptr; + bool is_iomem; }; #endif From a64e7e5b05e014dad9ae5858c9644d61400ec6ef Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 23 Aug 2024 14:21:46 +0300 Subject: [PATCH 094/108] drm/xe: Suspend/resume user access only during system s/r Enable/Disable user access only during system suspend/resume. This should not happen during runtime s/r v2: rebased Reviewed-by: Arun R Murthy Signed-off-by: Imre Deak Signed-off-by: Vinod Govindapillai Link: https://patchwork.freedesktop.org/patch/msgid/20240823112148.327015-2-vinod.govindapillai@intel.com --- drivers/gpu/drm/xe/display/xe_display.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 30dfdac9f6fa..ad7fc5137b42 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -323,7 +323,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); if (has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); - intel_display_driver_disable_user_access(xe); + if (!runtime) + intel_display_driver_disable_user_access(xe); } if (!runtime) @@ -335,7 +336,7 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_hpd_cancel_work(xe); - if (has_display(xe)) + if (!runtime && has_display(xe)) intel_display_driver_suspend_access(xe); intel_encoder_suspend_all(&xe->display); @@ -381,7 +382,7 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_display_driver_init_hw(xe); intel_hpd_init(xe); - if (has_display(xe)) + if (!runtime && has_display(xe)) intel_display_driver_resume_access(xe); /* MST sideband requires HPD interrupts enabled */ @@ -391,7 +392,8 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) if (has_display(xe)) { drm_kms_helper_poll_enable(&xe->drm); - intel_display_driver_enable_user_access(xe); + if (!runtime) + intel_display_driver_enable_user_access(xe); } intel_hpd_poll_disable(xe); From 122824165471ea492d8b07d15384345940aababb Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 23 Aug 2024 14:21:47 +0300 Subject: [PATCH 095/108] drm/xe: Handle polling only for system s/r in xe_display_pm_suspend/resume() This is a preparation for the follow-up patch where polling will be handled properly for all cases during runtime suspend/resume. v2: rebased Reviewed-by: Arun R Murthy Signed-off-by: Imre Deak Signed-off-by: Vinod Govindapillai Link: https://patchwork.freedesktop.org/patch/msgid/20240823112148.327015-3-vinod.govindapillai@intel.com --- drivers/gpu/drm/xe/display/xe_display.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index ad7fc5137b42..00dd38f6177b 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -321,14 +321,11 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) */ intel_power_domains_disable(xe); intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); - if (has_display(xe)) { + if (!runtime && has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); - if (!runtime) - intel_display_driver_disable_user_access(xe); - } - - if (!runtime) + intel_display_driver_disable_user_access(xe); intel_display_driver_suspend(xe); + } xe_display_flush_cleanup_work(xe); @@ -387,15 +384,12 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) /* MST sideband requires HPD interrupts enabled */ intel_dp_mst_resume(xe); - if (!runtime) + if (!runtime && has_display(xe)) { intel_display_driver_resume(xe); - - if (has_display(xe)) { drm_kms_helper_poll_enable(&xe->drm); - if (!runtime) - intel_display_driver_enable_user_access(xe); + intel_display_driver_enable_user_access(xe); + intel_hpd_poll_disable(xe); } - intel_hpd_poll_disable(xe); intel_opregion_resume(display); From 66a0f6b9f5fc205272035b6ffa4830be51e3f787 Mon Sep 17 00:00:00 2001 From: Vinod Govindapillai Date: Fri, 23 Aug 2024 14:21:48 +0300 Subject: [PATCH 096/108] drm/xe/display: handle HPD polling in display runtime suspend/resume In XE, display runtime suspend / resume routines are called only if d3cold is allowed. This makes the driver unable to detect any HPDs once the device goes into runtime suspend state in platforms like LNL. Update the display runtime suspend / resume routines to include HPD polling regardless of d3cold status. While xe_display_pm_suspend/resume() performs steps during runtime suspend/resume that shouldn't happen, like suspending MST and they are missing other steps like enabling DC9, this patchset is meant to keep the current behavior wrt. these, leaving the corresponding updates for a follow-up v2: have a separate function for display runtime s/r (Rodrigo) v3: better streamlining of system s/r and runtime s/r calls (Imre) v4: rebased Reviewed-by: Arun R Murthy Signed-off-by: Vinod Govindapillai Link: https://patchwork.freedesktop.org/patch/msgid/20240823112148.327015-4-vinod.govindapillai@intel.com --- drivers/gpu/drm/xe/display/xe_display.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/xe/display/xe_display.h | 4 ++++ drivers/gpu/drm/xe/xe_pm.c | 8 +++++--- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 00dd38f6177b..710b1e2170c1 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -308,6 +308,18 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe) } } +/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */ +void xe_display_pm_runtime_suspend(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + if (xe->d3cold.allowed) + xe_display_pm_suspend(xe, true); + + intel_hpd_poll_enable(xe); +} + void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { struct intel_display *display = &xe->display; @@ -354,6 +366,17 @@ void xe_display_pm_suspend_late(struct xe_device *xe) intel_display_power_suspend_late(xe); } +void xe_display_pm_runtime_resume(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + intel_hpd_poll_disable(xe); + + if (xe->d3cold.allowed) + xe_display_pm_resume(xe, true); +} + void xe_display_pm_resume_early(struct xe_device *xe) { if (!xe->info.probe_display) diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 000fb5799df5..53d727fd792b 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -38,6 +38,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime); void xe_display_pm_suspend_late(struct xe_device *xe); void xe_display_pm_resume_early(struct xe_device *xe); void xe_display_pm_resume(struct xe_device *xe, bool runtime); +void xe_display_pm_runtime_suspend(struct xe_device *xe); +void xe_display_pm_runtime_resume(struct xe_device *xe); #else @@ -67,6 +69,8 @@ static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {} static inline void xe_display_pm_suspend_late(struct xe_device *xe) {} static inline void xe_display_pm_resume_early(struct xe_device *xe) {} static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {} +static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {} +static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {} #endif /* CONFIG_DRM_XE_DISPLAY */ #endif /* _XE_DISPLAY_H_ */ diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index fcfb49af8c89..c247e1cb8aba 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -366,9 +366,9 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_bo_runtime_pm_release_mmap_offset(bo); mutex_unlock(&xe->mem_access.vram_userfault.lock); - if (xe->d3cold.allowed) { - xe_display_pm_suspend(xe, true); + xe_display_pm_runtime_suspend(xe); + if (xe->d3cold.allowed) { err = xe_bo_evict_all(xe); if (err) goto out; @@ -431,12 +431,14 @@ int xe_pm_runtime_resume(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_runtime_resume(xe); + if (xe->d3cold.allowed) { - xe_display_pm_resume(xe, true); err = xe_bo_restore_user(xe); if (err) goto out; } + out: lock_map_release(&xe_pm_runtime_lockdep_map); xe_pm_write_callback_task(xe, NULL); From ff9c674d1127e768050fe418470e74586985c87b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 23 Aug 2024 20:47:13 -0700 Subject: [PATCH 097/108] drm/xe: Fix total initialization in xe_ggtt_print_holes() Clang warns (or errors with CONFIG_DRM_WERROR or CONFIG_WERROR): drivers/gpu/drm/xe/xe_ggtt.c:810:3: error: variable 'total' is uninitialized when used here [-Werror,-Wuninitialized] 810 | total += hole_size; | ^~~~~ drivers/gpu/drm/xe/xe_ggtt.c:798:11: note: initialize the variable 'total' to silence this warning 798 | u64 total; | ^ | = 0 1 error generated. Move the zero initialization of total from xe_gt_sriov_pf_config_print_available_ggtt() to xe_ggtt_print_holes() to resolve the warning. Fixes: 136367290ea5 ("drm/xe: Introduce xe_ggtt_print_holes") Signed-off-by: Nathan Chancellor Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240823-drm-xe-fix-total-in-xe_ggtt_print_holes-v1-1-12b02d079327@kernel.org Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_ggtt.c | 2 +- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index a3ce91decdce..86fc6afa43bd 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -795,7 +795,7 @@ u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer const struct drm_mm_node *entry; u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); u64 hole_start, hole_end, hole_size; - u64 total; + u64 total = 0; char buf[10]; mutex_lock(&ggtt->lock); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index c8835d9eead6..41ed07b153b5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -2110,7 +2110,7 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin { struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; u64 alignment = pf_get_ggtt_alignment(gt); - u64 spare, avail, total = 0; + u64 spare, avail, total; char buf[10]; xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); From 65112db0c21022cb1caed5a03c6392eaaf984c14 Mon Sep 17 00:00:00 2001 From: Apoorva Singh Date: Fri, 16 Aug 2024 13:33:55 +0530 Subject: [PATCH 098/108] drm/xe: Remove NULL check of lrc->bo in xe_lrc_snapshot_capture() - lrc->bo NULL check is not needed in xe_lrc_snapshot_capture() as its already been taken care of in xe_lrc_init(). Signed-off-by: Apoorva Singh Acked-by: Rodrigo Vivi Reviewed-by: Matthew Brost Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240816080355.897256-1-apoorva.singh@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 974a9cd8c379..aec7db39c061 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1649,7 +1649,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; - if (lrc->bo && lrc->bo->vm) + if (lrc->bo->vm) xe_vm_get(lrc->bo->vm); snapshot->context_desc = xe_lrc_ggtt_addr(lrc); From 9c57bc08652a7c8e0e355c52c023fddc090fe9bb Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Aug 2024 15:46:15 -0700 Subject: [PATCH 099/108] drm/xe/lnl: Drop force_probe requirement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lunar Lake has been usable for a while in a desktop setup. Bugs are sporadically showing up in CI, but being promptly fixed. Nothing very concerning. All the uapi changes related to fundamental platform usage have been finalized. Remove the force_probe requirement and enable the platform by default. Cc: Thomas Hellström Cc: Rodrigo Vivi Cc: Jani Nikula Reviewed-by: Thomas Hellström Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240822224615.793540-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 0f66cf067e2a..703822d39800 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -338,7 +338,6 @@ static const struct xe_device_desc mtl_desc = { static const struct xe_device_desc lnl_desc = { PLATFORM(LUNARLAKE), .has_display = true, - .require_force_probe = true, }; static const struct xe_device_desc bmg_desc = { From 11b7309dbe9fa98d9b8d18cd51db4f385c37ae30 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 20 Aug 2024 14:32:28 +0530 Subject: [PATCH 100/108] drm/xe: Remove extra dma_fence_put on xe_sync_entry_add_deps failure drm_sched_job_add_dependency() drops references even in case of error, no need for caller to call dma_fence_put. Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Reviewed-by: Ashutosh Dixit Acked-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240820090230.3258128-1-himal.prasad.ghimiray@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_sync.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index ca826aeb41ea..a0675f57a398 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -206,16 +206,9 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) { - int err; - - if (sync->fence) { - err = drm_sched_job_add_dependency(&job->drm, - dma_fence_get(sync->fence)); - if (err) { - dma_fence_put(sync->fence); - return err; - } - } + if (sync->fence) + return drm_sched_job_add_dependency(&job->drm, + dma_fence_get(sync->fence)); return 0; } From 19f01d4bbe9daf71901b200ab5c52591946b022a Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 20 Aug 2024 14:32:29 +0530 Subject: [PATCH 101/108] drm/xe: Remove unrequired NULL checks in xe_sync_entry_cleanup dma_fence_put() and dma_fence_chain_free() can handle NULL input, there is no need for NULL check by caller. Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240820090230.3258128-2-himal.prasad.ghimiray@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_sync.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index a0675f57a398..436faff09bac 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -249,10 +249,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) { if (sync->syncobj) drm_syncobj_put(sync->syncobj); - if (sync->fence) - dma_fence_put(sync->fence); - if (sync->chain_fence) - dma_fence_chain_free(sync->chain_fence); + dma_fence_put(sync->fence); + dma_fence_chain_free(sync->chain_fence); if (sync->ufence) user_fence_put(sync->ufence); } From 8a04e342684a75ad1455fc3f23fab8e67c1aa9fc Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Tue, 20 Aug 2024 14:32:30 +0530 Subject: [PATCH 102/108] drm/xe: Remove unrequired NULL check in xe_sched_job_free_fences dma_fence_chain_free() can handle NULL input, there is no need for NULL check by caller. Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Reviewed-by: Jagmeet Randhawa Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240820090230.3258128-3-himal.prasad.ghimiray@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/xe_sched_job.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 9628f9deb3c0..55d47450b2c6 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -89,8 +89,7 @@ static void xe_sched_job_free_fences(struct xe_sched_job *job) if (ptrs->lrc_fence) xe_lrc_free_seqno_fence(ptrs->lrc_fence); - if (ptrs->chain_fence) - dma_fence_chain_free(ptrs->chain_fence); + dma_fence_chain_free(ptrs->chain_fence); } } From 014125c64d09e58e90dde49fbb57d802a13e2559 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 27 Aug 2024 14:09:05 +0200 Subject: [PATCH 103/108] drm/xe: Support 'nomodeset' kernel command-line option Setting 'nomodeset' on the kernel command line disables all graphics drivers with modesetting capabilities, leaving only firmware drivers, such as simpledrm or efifb. Most DRM drivers automatically support 'nomodeset' via DRM's module helper macros. In xe, which uses regular module_init(), manually call drm_firmware_drivers_only() to test for 'nomodeset'. Do not register the driver if set. v2: - use xe's init table (Lucas) - do NULL test for init/exit functions Signed-off-by: Thomas Zimmermann Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240827121003.97429-1-tzimmermann@suse.de Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_module.c | 39 +++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e7e4b1ebab50..cbc444ec1854 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -8,6 +8,8 @@ #include #include +#include + #include "xe_drv.h" #include "xe_hw_fence.h" #include "xe_pci.h" @@ -61,12 +63,23 @@ module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); MODULE_PARM_DESC(wedged_mode, "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); +static int xe_check_nomodeset(void) +{ + if (drm_firmware_drivers_only()) + return -ENODEV; + + return 0; +} + struct init_funcs { int (*init)(void); void (*exit)(void); }; static const struct init_funcs init_funcs[] = { + { + .init = xe_check_nomodeset, + }, { .init = xe_hw_fence_module_init, .exit = xe_hw_fence_module_exit, @@ -85,15 +98,35 @@ static const struct init_funcs init_funcs[] = { }, }; +static int __init xe_call_init_func(unsigned int i) +{ + if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) + return 0; + if (!init_funcs[i].init) + return 0; + + return init_funcs[i].init(); +} + +static void xe_call_exit_func(unsigned int i) +{ + if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) + return; + if (!init_funcs[i].exit) + return; + + init_funcs[i].exit(); +} + static int __init xe_init(void) { int err, i; for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { - err = init_funcs[i].init(); + err = xe_call_init_func(i); if (err) { while (i--) - init_funcs[i].exit(); + xe_call_exit_func(i); return err; } } @@ -106,7 +139,7 @@ static void __exit xe_exit(void) int i; for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) - init_funcs[i].exit(); + xe_call_exit_func(i); } module_init(xe_init); From 7546a8201ba288530ed1bbf2a8bdcce5e034a234 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 28 Aug 2024 10:36:34 +0200 Subject: [PATCH 104/108] Revert "drm/xe/lnl: Offload system clear page activity to GPU" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This optimization relied on having to clear CCS on allocations. If there is no need to clear CCS on allocations then this would mostly help in reducing CPU utilization. Revert this patch at this moment because of: 1 Currently Xe can't do clear on free and using a invalid ttm flag, TTM_TT_FLAG_CLEARED_ON_FREE which could poison global ttm pool on multi-device setup. 2 Also for LNL CPU:WB doesn't require clearing CCS as such BO will not be allowed to bind with compression PTE. Subsequent patch will disable clearing CCS for CPU:WB BOs for LNL. This reverts commit 23683061805be368c8d1c7e7ff52abc470cac275. Cc: Christian König Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Reviewed-by: Thomas Hellström Signed-off-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240828083635.23601-1-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo.c | 26 ++------------------------ drivers/gpu/drm/xe/xe_device_types.h | 2 -- drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 12 ------------ 3 files changed, 2 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index cbe7bf098970..e3d68710a91a 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -396,14 +396,6 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, caching = ttm_uncached; } - /* - * If the device can support gpu clear system pages then set proper ttm - * flag. Zeroed pages are only required for ttm_bo_type_device so - * unwanted data is not leaked to userspace. - */ - if (ttm_bo->type == ttm_bo_type_device && xe->mem.gpu_page_clear_sys) - page_flags |= TTM_TT_FLAG_CLEARED_ON_FREE; - err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); if (err) { kfree(tt); @@ -425,10 +417,6 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, if (tt->page_flags & TTM_TT_FLAG_EXTERNAL) return 0; - /* Clear TTM_TT_FLAG_ZERO_ALLOC when GPU is set to clear system pages */ - if (tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE) - tt->page_flags &= ~TTM_TT_FLAG_ZERO_ALLOC; - err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx); if (err) return err; @@ -671,16 +659,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, bool needs_clear; bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) && ttm && ttm_tt_is_populated(ttm)) ? true : false; - bool clear_system_pages; int ret = 0; - /* - * Clear TTM_TT_FLAG_CLEARED_ON_FREE on bo creation path when - * moving to system as the bo doesn't have dma_mapping. - */ - if (!old_mem && ttm && !ttm_tt_is_populated(ttm)) - ttm->page_flags &= ~TTM_TT_FLAG_CLEARED_ON_FREE; - /* Bo creation path, moving to system or TT. */ if ((!old_mem && ttm) && !handle_system_ccs) { if (new_mem->mem_type == XE_PL_TT) @@ -703,10 +683,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : (!mem_type_is_vram(old_mem_type) && !tt_has_data); - clear_system_pages = ttm && (ttm->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE); needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || - (!ttm && ttm_bo->type == ttm_bo_type_device) || - clear_system_pages; + (!ttm && ttm_bo->type == ttm_bo_type_device); if (new_mem->mem_type == XE_PL_TT) { ret = xe_tt_map_sg(ttm); @@ -818,7 +796,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (move_lacks_source) { u32 flags = 0; - if (mem_type_is_vram(new_mem->mem_type) || clear_system_pages) + if (mem_type_is_vram(new_mem->mem_type)) flags |= XE_MIGRATE_CLEAR_FLAG_FULL; else if (handle_system_ccs) flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 121a403a1478..856db4020048 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -339,8 +339,6 @@ struct xe_device { struct xe_mem_region vram; /** @mem.sys_mgr: system TTM manager */ struct ttm_resource_manager sys_mgr; - /** @mem.gpu_page_clear_sys: clear system pages offloaded to GPU */ - bool gpu_page_clear_sys; } mem; /** @sriov: device level virtualization data */ diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c index e0ac20f20758..9844a8edbfe1 100644 --- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c @@ -117,17 +117,5 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe) ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT); ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man); ttm_resource_manager_set_used(man, true); - - /* - * On iGFX device with flat CCS, we clear CCS metadata, let's extend that - * and use GPU to clear pages as well. - * - * Disable this when init_on_free and/or init_on_alloc is on to avoid double - * zeroing pages with CPU and GPU. - */ - if (xe_device_has_flat_ccs(xe) && !IS_DGFX(xe) && - !want_init_on_alloc(GFP_USER) && !want_init_on_free()) - xe->mem.gpu_page_clear_sys = true; - return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe); } From 789e51597d33ec0053b029127d797d86c0d857eb Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 28 Aug 2024 10:36:35 +0200 Subject: [PATCH 105/108] Revert "drm/ttm: Add a flag to allow drivers to skip clear-on-free" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove TTM_TT_FLAG_CLEARED_ON_FREE now that XE stopped using this flag. This reverts commit decbfaf06db05fa1f9b33149ebb3c145b44e878f. Cc: Christian König Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Signed-off-by: Nirmoy Das Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240828083635.23601-2-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++----------- include/drm/ttm/ttm_tt.h | 6 +----- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 935ab3cfd046..8504dbe19c1a 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -222,18 +222,15 @@ static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr, } /* Give pages into a specific pool_type */ -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p, - bool cleared) +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p) { unsigned int i, num_pages = 1 << pt->order; - if (!cleared) { - for (i = 0; i < num_pages; ++i) { - if (PageHighMem(p)) - clear_highpage(p + i); - else - clear_page(page_address(p + i)); - } + for (i = 0; i < num_pages; ++i) { + if (PageHighMem(p)) + clear_highpage(p + i); + else + clear_page(page_address(p + i)); } spin_lock(&pt->lock); @@ -397,7 +394,6 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pgoff_t start_page, pgoff_t end_page) { struct page **pages = &tt->pages[start_page]; - bool cleared = tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE; unsigned int order; pgoff_t i, nr; @@ -411,7 +407,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pt = ttm_pool_select_type(pool, caching, order); if (pt) - ttm_pool_type_give(pt, *pages, cleared); + ttm_pool_type_give(pt, *pages); else ttm_pool_free_page(pool, caching, order, *pages); } diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index cfaf49de2419..2b9d856ff388 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -85,9 +85,6 @@ struct ttm_tt { * fault handling abuses the DMA api a bit and dma_map_attrs can't be * used to assure pgprot always matches. * - * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver handles - * clearing backing store - * * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is * set by TTM after ttm_tt_populate() has successfully returned, and is * then unset when TTM calls ttm_tt_unpopulate(). @@ -97,9 +94,8 @@ struct ttm_tt { #define TTM_TT_FLAG_EXTERNAL BIT(2) #define TTM_TT_FLAG_EXTERNAL_MAPPABLE BIT(3) #define TTM_TT_FLAG_DECRYPTED BIT(4) -#define TTM_TT_FLAG_CLEARED_ON_FREE BIT(5) -#define TTM_TT_FLAG_PRIV_POPULATED BIT(6) +#define TTM_TT_FLAG_PRIV_POPULATED BIT(5) uint32_t page_flags; /** @num_pages: Number of pages in the page array. */ uint32_t num_pages; From 379cad69bdfe522e840ed5f5c01ac8769006d53e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Mon, 26 Aug 2024 16:34:50 +0200 Subject: [PATCH 106/108] drm/xe: Use separate rpm lockdep map for non-d3cold-capable devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For non-d3cold-capable devices we'd like to be able to wake up the device from reclaim. In particular, for Lunar Lake we'd like to be able to blit CCS metadata to system at shrink time; at least from kswapd where it's reasonable OK to wait for rpm resume and a preceding rpm suspend. Therefore use a separate lockdep map for such devices and prime it reclaim-tainted. v2: - Rename lockmap acquire- and release functions. (Rodrigo Vivi). - Reinstate the old xe_pm_runtime_lockdep_prime() function and rename it to xe_rpm_might_enter_cb(). (Matthew Auld). - Introduce a separate xe_pm_runtime_lockdep_prime function called from module init for known required locking orders. v3: - Actually hook up the prime function at module init. v4: - Rebase. v5: - Don't use reclaim-safe RPM with sriov. Cc: "Vivi, Rodrigo" Cc: "Auld, Matthew" Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240826143450.92511-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_module.c | 9 ++++ drivers/gpu/drm/xe/xe_pm.c | 84 ++++++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_pm.h | 1 + 3 files changed, 80 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index cbc444ec1854..bfc3deebdaa2 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -13,6 +13,7 @@ #include "xe_drv.h" #include "xe_hw_fence.h" #include "xe_pci.h" +#include "xe_pm.h" #include "xe_observation.h" #include "xe_sched_job.h" @@ -76,6 +77,10 @@ struct init_funcs { void (*exit)(void); }; +static void xe_dummy_exit(void) +{ +} + static const struct init_funcs init_funcs[] = { { .init = xe_check_nomodeset, @@ -96,6 +101,10 @@ static const struct init_funcs init_funcs[] = { .init = xe_observation_sysctl_register, .exit = xe_observation_sysctl_unregister, }, + { + .init = xe_pm_module_init, + .exit = xe_dummy_exit, + }, }; static int __init xe_call_init_func(unsigned int i) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index c247e1cb8aba..2600c936527e 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -70,11 +70,34 @@ */ #ifdef CONFIG_LOCKDEP -static struct lockdep_map xe_pm_runtime_lockdep_map = { - .name = "xe_pm_runtime_lockdep_map" +static struct lockdep_map xe_pm_runtime_d3cold_map = { + .name = "xe_rpm_d3cold_map" +}; + +static struct lockdep_map xe_pm_runtime_nod3cold_map = { + .name = "xe_rpm_nod3cold_map" }; #endif +static bool __maybe_unused xe_rpm_reclaim_safe(const struct xe_device *xe) +{ + return !xe->d3cold.capable && !xe->info.has_sriov; +} + +static void xe_rpm_lockmap_acquire(const struct xe_device *xe) +{ + lock_map_acquire(xe_rpm_reclaim_safe(xe) ? + &xe_pm_runtime_nod3cold_map : + &xe_pm_runtime_d3cold_map); +} + +static void xe_rpm_lockmap_release(const struct xe_device *xe) +{ + lock_map_release(xe_rpm_reclaim_safe(xe) ? + &xe_pm_runtime_nod3cold_map : + &xe_pm_runtime_d3cold_map); +} + /** * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle * @xe: xe device instance @@ -354,7 +377,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) * annotation here and in xe_pm_runtime_get() lockdep will see * the potential lock inversion and give us a nice splat. */ - lock_map_acquire(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); /* * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify @@ -387,7 +410,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) out: if (err) xe_display_pm_resume(xe, true); - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; } @@ -408,7 +431,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); - lock_map_acquire(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); if (xe->d3cold.allowed) { err = xe_pcode_ready(xe, true); @@ -440,7 +463,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) } out: - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; } @@ -454,15 +477,37 @@ int xe_pm_runtime_resume(struct xe_device *xe) * stuff that can happen inside the runtime_resume callback by acquiring * a dummy lock (it doesn't protect anything and gets compiled out on * non-debug builds). Lockdep then only needs to see the - * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can - * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map. + * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can + * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map. * For example if the (callers_locks) are ever grabbed in the * runtime_resume callback, lockdep should give us a nice splat. */ -static void pm_runtime_lockdep_prime(void) +static void xe_rpm_might_enter_cb(const struct xe_device *xe) { - lock_map_acquire(&xe_pm_runtime_lockdep_map); - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); + xe_rpm_lockmap_release(xe); +} + +/* + * Prime the lockdep maps for known locking orders that need to + * be supported but that may not always occur on all systems. + */ +static void xe_pm_runtime_lockdep_prime(void) +{ + struct dma_resv lockdep_resv; + + dma_resv_init(&lockdep_resv); + lock_map_acquire(&xe_pm_runtime_d3cold_map); + /* D3Cold takes the dma_resv locks to evict bos */ + dma_resv_lock(&lockdep_resv, NULL); + dma_resv_unlock(&lockdep_resv); + lock_map_release(&xe_pm_runtime_d3cold_map); + + /* Shrinkers might like to wake up the device under reclaim. */ + fs_reclaim_acquire(GFP_KERNEL); + lock_map_acquire(&xe_pm_runtime_nod3cold_map); + lock_map_release(&xe_pm_runtime_nod3cold_map); + fs_reclaim_release(GFP_KERNEL); } /** @@ -477,7 +522,7 @@ void xe_pm_runtime_get(struct xe_device *xe) if (xe_pm_read_callback_task(xe) == current) return; - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); pm_runtime_resume(xe->drm.dev); } @@ -509,7 +554,7 @@ int xe_pm_runtime_get_ioctl(struct xe_device *xe) if (WARN_ON(xe_pm_read_callback_task(xe) == current)) return -ELOOP; - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); return pm_runtime_get_sync(xe->drm.dev); } @@ -577,7 +622,7 @@ bool xe_pm_runtime_resume_and_get(struct xe_device *xe) return true; } - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); return pm_runtime_resume_and_get(xe->drm.dev) >= 0; } @@ -669,3 +714,14 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) drm_dbg(&xe->drm, "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); } + +/** + * xe_pm_module_init() - Perform xe_pm specific module initialization. + * + * Return: 0 on success. Currently doesn't fail. + */ +int __init xe_pm_module_init(void) +{ + xe_pm_runtime_lockdep_prime(); + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 104a21ae6dfd..9aef673b1c8a 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -32,5 +32,6 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe); int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); struct task_struct *xe_pm_read_callback_task(struct xe_device *xe); +int xe_pm_module_init(void); #endif From c72084163cd22ebf59d936669ec25b1fc2b7494c Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Wed, 28 Aug 2024 14:52:29 +0530 Subject: [PATCH 107/108] drm/xe: Fix NPD in ggtt_node_remove() Make sure that ggtt_node_remove() is invoked only if both node and ggtt are not null. Move the null checks to the caller function xe_ggtt_node_remove(). v2: Move null check below declarations (Tejas) Fixes: 919bb54e989c ("drm/xe: Fix missing runtime outer protection for ggtt_remove_node") Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: Tejas Upadhyay Reviewed-by: Tejas Upadhyay Signed-off-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240828092229.3606503-1-himal.prasad.ghimiray@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_ggtt.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 86fc6afa43bd..f3fca5565d32 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -286,9 +286,6 @@ static void ggtt_node_remove(struct xe_ggtt_node *node) bool bound; int idx; - if (!node || !node->ggtt) - return; - bound = drm_dev_enter(&xe->drm, &idx); mutex_lock(&ggtt->lock); @@ -328,8 +325,14 @@ static void ggtt_node_remove_work_func(struct work_struct *work) */ void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) { - struct xe_ggtt *ggtt = node->ggtt; - struct xe_device *xe = tile_to_xe(ggtt->tile); + struct xe_ggtt *ggtt; + struct xe_device *xe; + + if (!node || !node->ggtt) + return; + + ggtt = node->ggtt; + xe = tile_to_xe(ggtt->tile); node->invalidate_on_remove = invalidate; From 3adcf970dc7ec0469ec3116a5a8be9161d17a335 Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Wed, 28 Aug 2024 13:51:52 +0530 Subject: [PATCH 108/108] drm/xe/bmg: Drop force_probe requirement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Battlemage platform is sufficiently tested and found stable. CI is also pretty stable. Remove the force_probe requirement to enable the platform support by default. Cc: Thomas Hellström Cc: Rodrigo Vivi Cc: Jani Nikula Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Rodrigo Vivi Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240828082152.3194814-1-balasubramani.vivekanandan@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 703822d39800..7eb00a87b786 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -344,7 +344,6 @@ static const struct xe_device_desc bmg_desc = { DGFX_FEATURES, PLATFORM(BATTLEMAGE), .has_display = true, - .require_force_probe = true, .has_heci_cscfi = 1, };