mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-16 04:21:09 -04:00
drm/xe/xe3p_lpg: flush shrinker bo cachelines manually
XA, new pat_index introduced post xe3p_lpg, is memory shared between the
CPU and GPU is treated differently from other GPU memory when the Media
engine is power-gated.
XA is *always* flushed, like at the end-of-submssion (and maybe other
places), just that internally as an optimisation hw doesn't need to make
that a full flush (which will also include XA) when Media is
off/powergated, since it doesn't need to worry about GT caches vs Media
coherency, and only CPU vs GPU coherency, so can make that flush a
targeted XA flush, since stuff tagged with XA now means it's shared with
the CPU. The main implication is that we now need to somehow flush non-XA
before freeing system memory pages, otherwise dirty cachelines could be
flushed after the free (like if Media suddenly turns on and does a full
flush)
V4: Add comments for L2 flush path
V3(Thomas/MattA/MattR): Restrict userptr with non-xa, then no need to
flush manually
V2(MattA): Expand commit description
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patch.msgid.link/20260305121902.1892593-7-tejas.upadhyay@intel.com
Signed-off-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
This commit is contained in:
@@ -689,7 +689,12 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
|
||||
|
||||
if (!xe_vm_in_fault_mode(vm)) {
|
||||
drm_gpuvm_bo_evict(vm_bo, true);
|
||||
continue;
|
||||
/*
|
||||
* L2 cache may not be flushed, so ensure that is done in
|
||||
* xe_vm_invalidate_vma() below
|
||||
*/
|
||||
if (!xe_device_is_l2_flush_optimized(xe))
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!idle) {
|
||||
|
||||
@@ -1094,6 +1094,29 @@ static void tdf_request_sync(struct xe_device *xe)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_device_is_l2_flush_optimized - if L2 flush is optimized by HW
|
||||
* @xe: The device to check.
|
||||
*
|
||||
* Return: true if the HW device optimizing L2 flush, false otherwise.
|
||||
*/
|
||||
bool xe_device_is_l2_flush_optimized(struct xe_device *xe)
|
||||
{
|
||||
/* XA is *always* flushed, like at the end-of-submssion (and maybe other
|
||||
* places), just that internally as an optimisation hw doesn't need to make
|
||||
* that a full flush (which will also include XA) when Media is
|
||||
* off/powergated, since it doesn't need to worry about GT caches vs Media
|
||||
* coherency, and only CPU vs GPU coherency, so can make that flush a
|
||||
* targeted XA flush, since stuff tagged with XA now means it's shared with
|
||||
* the CPU. The main implication is that we now need to somehow flush non-XA before
|
||||
* freeing system memory pages, otherwise dirty cachelines could be flushed after the free
|
||||
* (like if Media suddenly turns on and does a full flush)
|
||||
*/
|
||||
if (GRAPHICS_VER(xe) >= 35 && !IS_DGFX(xe))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void xe_device_l2_flush(struct xe_device *xe)
|
||||
{
|
||||
struct xe_gt *gt;
|
||||
|
||||
@@ -188,6 +188,7 @@ void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p);
|
||||
u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address);
|
||||
u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
|
||||
|
||||
bool xe_device_is_l2_flush_optimized(struct xe_device *xe);
|
||||
void xe_device_td_flush(struct xe_device *xe);
|
||||
void xe_device_l2_flush(struct xe_device *xe);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user