Merge tag 'drm-xe-fixes-2026-04-30' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

API Fixes:
 - Add missing pad and extensions check (Jonathan)
 - Reject unsafe PAT indices for CPU cached memory (Jia)

 Driver Fixes:
 - Drop registration of guc_submit_wedged_fini from xe_guc_submit_wedge (Brost)
 - Xe3p tuning and workaround fixes (Roper, Gustavo)
 - USE drm mm instead of drm SA for CCS read/write (Satya)
 - Fix leaks and null derefs (Shuicheng)
 - Fix Wa_18022495364 (Tvrtko)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patch.msgid.link/afO05KvmFMn_7qcY@intel.com
This commit is contained in:
Dave Airlie
2026-05-01 12:49:22 +10:00
22 changed files with 621 additions and 105 deletions

View File

@@ -88,6 +88,7 @@ xe-y += xe_bb.o \
xe_irq.o \
xe_late_bind_fw.o \
xe_lrc.o \
xe_mem_pool.o \
xe_migrate.o \
xe_mmio.o \
xe_mmio_gem.o \

View File

@@ -583,7 +583,7 @@
#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32)
#define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32)
#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0)
#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0, XE_REG_OPTION_MASKED)
#define CPSS_AWARE_DIS REG_BIT(3)
#define SARB_CHICKEN1 XE_REG_MCR(0xe90c)

View File

@@ -2322,8 +2322,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
}
/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) {
xe_bo_free(bo);
return ERR_PTR(-EINVAL);
}
if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
!(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
@@ -2342,8 +2344,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
alignment = SZ_4K >> PAGE_SHIFT;
}
if (type == ttm_bo_type_device && aligned_size != size)
if (type == ttm_bo_type_device && aligned_size != size) {
xe_bo_free(bo);
return ERR_PTR(-EINVAL);
}
if (!bo) {
bo = xe_bo_alloc();

View File

@@ -18,6 +18,7 @@
#include "xe_ggtt_types.h"
struct xe_device;
struct xe_mem_pool_node;
struct xe_vm;
#define XE_BO_MAX_PLACEMENTS 3
@@ -88,7 +89,7 @@ struct xe_bo {
bool ccs_cleared;
/** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */
struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
struct xe_mem_pool_node *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
/**
* @cpu_caching: CPU caching mode. Currently only used for userspace

View File

@@ -258,6 +258,13 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags)
return ERR_PTR(ret);
}
/*
* Takes ownership of @storage: on success it is transferred to the returned
* drm_gem_object; on failure it is freed before returning the error.
* This matches the contract of xe_bo_init_locked() which frees @storage on
* its error paths, so callers need not (and must not) free @storage after
* this call.
*/
static struct drm_gem_object *
xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
struct dma_buf *dma_buf)
@@ -271,8 +278,10 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
int ret = 0;
dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
if (!dummy_obj)
if (!dummy_obj) {
xe_bo_free(storage);
return ERR_PTR(-ENOMEM);
}
dummy_obj->resv = resv;
xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) {
@@ -281,6 +290,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
if (ret)
break;
/* xe_bo_init_locked() frees storage on error */
bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
0, /* Will require 1way or 2way for vm_bind */
ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec);
@@ -368,12 +378,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
goto out_err;
}
/* Errors here will take care of freeing the bo. */
/*
* xe_dma_buf_init_obj() takes ownership of bo on both success
* and failure, so we must not touch bo after this call.
*/
obj = xe_dma_buf_init_obj(dev, bo, dma_buf);
if (IS_ERR(obj))
if (IS_ERR(obj)) {
dma_buf_detach(dma_buf, attach);
return obj;
}
get_dma_buf(dma_buf);
obj->import_attach = attach;
return obj;

View File

@@ -869,14 +869,14 @@ static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
struct xe_eu_stall_data_stream *stream = file->private_data;
struct xe_gt *gt = stream->gt;
drm_dev_put(&gt->tile->xe->drm);
mutex_lock(&gt->eu_stall->stream_lock);
xe_eu_stall_disable_locked(stream);
xe_eu_stall_data_buf_destroy(stream);
xe_eu_stall_stream_free(stream);
mutex_unlock(&gt->eu_stall->stream_lock);
drm_dev_put(&gt->tile->xe->drm);
return 0;
}

View File

@@ -1405,7 +1405,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
if (q->vm && q->hwe->hw_engine_group) {
err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q);
if (err)
goto put_exec_queue;
goto kill_exec_queue;
}
}
@@ -1416,12 +1416,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
/* user id alloc must always be last in ioctl to prevent UAF */
err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
if (err)
goto kill_exec_queue;
goto del_hw_engine_group;
args->exec_queue_id = id;
return 0;
del_hw_engine_group:
if (q->vm && q->hwe && q->hwe->hw_engine_group)
xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
kill_exec_queue:
xe_exec_queue_kill(q);
delete_queue_group:
@@ -1760,7 +1763,7 @@ void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
unsigned int type)
{
xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
xe_assert(gt_to_xe(q->gt), type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
dma_fence_put(q->tlb_inval[type].last_fence);

View File

@@ -166,7 +166,7 @@ static int query_compatibility_version(struct xe_gsc *gsc)
&rd_offset);
if (err) {
xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err);
return err;
goto out_bo;
}
compat->major = version_query_rd(xe, &bo->vmap, rd_offset, proj_major);

View File

@@ -259,24 +259,12 @@ static void guc_submit_sw_fini(struct drm_device *drm, void *arg)
}
static void guc_submit_fini(void *arg)
{
struct xe_guc *guc = arg;
/* Forcefully kill any remaining exec queues */
xe_guc_ct_stop(&guc->ct);
guc_submit_reset_prepare(guc);
xe_guc_softreset(guc);
xe_guc_submit_stop(guc);
xe_uc_fw_sanitize(&guc->fw);
xe_guc_submit_pause_abort(guc);
}
static void guc_submit_wedged_fini(void *arg)
{
struct xe_guc *guc = arg;
struct xe_exec_queue *q;
unsigned long index;
/* Drop any wedged queue refs */
mutex_lock(&guc->submission_state.lock);
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
if (exec_queue_wedged(q)) {
@@ -286,6 +274,14 @@ static void guc_submit_wedged_fini(void *arg)
}
}
mutex_unlock(&guc->submission_state.lock);
/* Forcefully kill any remaining exec queues */
xe_guc_ct_stop(&guc->ct);
guc_submit_reset_prepare(guc);
xe_guc_softreset(guc);
xe_guc_submit_stop(guc);
xe_uc_fw_sanitize(&guc->fw);
xe_guc_submit_pause_abort(guc);
}
static const struct xe_exec_queue_ops guc_exec_queue_ops;
@@ -1320,10 +1316,8 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
void xe_guc_submit_wedge(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_gt *gt = guc_to_gt(guc);
struct xe_exec_queue *q;
unsigned long index;
int err;
xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
@@ -1335,15 +1329,6 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
return;
if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) {
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
guc_submit_wedged_fini, guc);
if (err) {
xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; "
"Although device is wedged.\n",
xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET));
return;
}
mutex_lock(&guc->submission_state.lock);
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
if (xe_exec_queue_get_unless_zero(q))

View File

@@ -1214,7 +1214,7 @@ static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc,
if (xe_gt_WARN_ON(lrc->gt, max_len < 3))
return -ENOSPC;
*cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
*cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_LRM_CS_MMIO | MI_LRI_NUM_REGS(1);
*cmd++ = CS_DEBUG_MODE2(0).addr;
*cmd++ = REG_MASKED_FIELD_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);

View File

@@ -0,0 +1,403 @@
// SPDX-License-Identifier: MIT
/*
* Copyright © 2026 Intel Corporation
*/
#include <linux/kernel.h>
#include <drm/drm_managed.h>
#include "instructions/xe_mi_commands.h"
#include "xe_bo.h"
#include "xe_device_types.h"
#include "xe_map.h"
#include "xe_mem_pool.h"
#include "xe_mem_pool_types.h"
#include "xe_tile_printk.h"
/**
* struct xe_mem_pool - DRM MM pool for sub-allocating memory from a BO on an
* XE tile.
*
* The XE memory pool is a DRM MM manager that provides sub-allocation of memory
* from a backing buffer object (BO) on a specific XE tile. It is designed to
* manage memory for GPU workloads, allowing for efficient allocation and
* deallocation of memory regions within the BO.
*
* The memory pool maintains a primary BO that is pinned in the GGTT and mapped
* into the CPU address space for direct access. Optionally, it can also maintain
* a shadow BO that can be used for atomic updates to the primary BO's contents.
*
* The API provided by the memory pool allows clients to allocate and free memory
* regions, retrieve GPU and CPU addresses, and synchronize data between the
* primary and shadow BOs as needed.
*/
struct xe_mem_pool {
/** @base: Range allocator over [0, @size) in bytes */
struct drm_mm base;
/** @bo: Active pool BO (GGTT-pinned, CPU-mapped). */
struct xe_bo *bo;
/** @shadow: Shadow BO for atomic command updates. */
struct xe_bo *shadow;
/** @swap_guard: Timeline guard updating @bo and @shadow */
struct mutex swap_guard;
/** @cpu_addr: CPU virtual address of the active BO. */
void *cpu_addr;
/** @is_iomem: Indicates if the BO mapping is I/O memory. */
bool is_iomem;
};
static struct xe_mem_pool *node_to_pool(struct xe_mem_pool_node *node)
{
return container_of(node->sa_node.mm, struct xe_mem_pool, base);
}
static struct xe_tile *pool_to_tile(struct xe_mem_pool *pool)
{
return pool->bo->tile;
}
static void fini_pool_action(struct drm_device *drm, void *arg)
{
struct xe_mem_pool *pool = arg;
if (pool->is_iomem)
kvfree(pool->cpu_addr);
drm_mm_takedown(&pool->base);
}
static int pool_shadow_init(struct xe_mem_pool *pool)
{
struct xe_tile *tile = pool->bo->tile;
struct xe_device *xe = tile_to_xe(tile);
struct xe_bo *shadow;
int ret;
xe_assert(xe, !pool->shadow);
ret = drmm_mutex_init(&xe->drm, &pool->swap_guard);
if (ret)
return ret;
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
fs_reclaim_acquire(GFP_KERNEL);
might_lock(&pool->swap_guard);
fs_reclaim_release(GFP_KERNEL);
}
shadow = xe_managed_bo_create_pin_map(xe, tile,
xe_bo_size(pool->bo),
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE |
XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(shadow))
return PTR_ERR(shadow);
pool->shadow = shadow;
return 0;
}
/**
* xe_mem_pool_init() - Initialize memory pool.
* @tile: the &xe_tile where allocate.
* @size: number of bytes to allocate.
* @guard: the size of the guard region at the end of the BO that is not
* sub-allocated, in bytes.
* @flags: flags to use to create shadow pool.
*
* Initializes a memory pool for sub-allocating memory from a backing BO on the
* specified XE tile. The backing BO is pinned in the GGTT and mapped into
* the CPU address space for direct access. Optionally, a shadow BO can also be
* initialized for atomic updates to the primary BO's contents.
*
* Returns: a pointer to the &xe_mem_pool, or an error pointer on failure.
*/
struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size,
u32 guard, int flags)
{
struct xe_device *xe = tile_to_xe(tile);
struct xe_mem_pool *pool;
struct xe_bo *bo;
u32 managed_size;
int ret;
xe_tile_assert(tile, size > guard);
managed_size = size - guard;
pool = drmm_kzalloc(&xe->drm, sizeof(*pool), GFP_KERNEL);
if (!pool)
return ERR_PTR(-ENOMEM);
bo = xe_managed_bo_create_pin_map(xe, tile, size,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE |
XE_BO_FLAG_PINNED_NORESTORE);
if (IS_ERR(bo)) {
xe_tile_err(tile, "Failed to prepare %uKiB BO for mem pool (%pe)\n",
size / SZ_1K, bo);
return ERR_CAST(bo);
}
pool->bo = bo;
pool->is_iomem = bo->vmap.is_iomem;
if (pool->is_iomem) {
pool->cpu_addr = kvzalloc(size, GFP_KERNEL);
if (!pool->cpu_addr)
return ERR_PTR(-ENOMEM);
} else {
pool->cpu_addr = bo->vmap.vaddr;
}
if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) {
ret = pool_shadow_init(pool);
if (ret)
goto out_err;
}
drm_mm_init(&pool->base, 0, managed_size);
ret = drmm_add_action_or_reset(&xe->drm, fini_pool_action, pool);
if (ret)
return ERR_PTR(ret);
return pool;
out_err:
if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY)
xe_tile_err(tile,
"Failed to initialize shadow BO for mem pool (%d)\n", ret);
if (bo->vmap.is_iomem)
kvfree(pool->cpu_addr);
return ERR_PTR(ret);
}
/**
* xe_mem_pool_sync() - Copy the entire contents of the main pool to shadow pool.
* @pool: the memory pool containing the primary and shadow BOs.
*
* Copies the entire contents of the primary pool to the shadow pool. This must
* be done after xe_mem_pool_init() with the XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY
* flag to ensure that the shadow pool has the same initial contents as the primary
* pool. After this initial synchronization, clients can choose to synchronize the
* shadow pool with the primary pool on a node basis using
* xe_mem_pool_sync_shadow_locked() as needed.
*
* Return: None.
*/
void xe_mem_pool_sync(struct xe_mem_pool *pool)
{
struct xe_tile *tile = pool_to_tile(pool);
struct xe_device *xe = tile_to_xe(tile);
xe_tile_assert(tile, pool->shadow);
xe_map_memcpy_to(xe, &pool->shadow->vmap, 0,
pool->cpu_addr, xe_bo_size(pool->bo));
}
/**
* xe_mem_pool_swap_shadow_locked() - Swap the primary BO with the shadow BO.
* @pool: the memory pool containing the primary and shadow BOs.
*
* Swaps the primary buffer object with the shadow buffer object in the mem
* pool. This allows for atomic updates to the contents of the primary BO
* by first writing to the shadow BO and then swapping it with the primary BO.
* Swap_guard must be held to ensure synchronization with any concurrent swap
* operations.
*
* Return: None.
*/
void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool)
{
struct xe_tile *tile = pool_to_tile(pool);
xe_tile_assert(tile, pool->shadow);
lockdep_assert_held(&pool->swap_guard);
swap(pool->bo, pool->shadow);
if (!pool->bo->vmap.is_iomem)
pool->cpu_addr = pool->bo->vmap.vaddr;
}
/**
* xe_mem_pool_sync_shadow_locked() - Copy node from primary pool to shadow pool.
* @node: the node allocated in the memory pool.
*
* Copies the specified batch buffer from the primary pool to the shadow pool.
* Swap_guard must be held to ensure synchronization with any concurrent swap
* operations.
*
* Return: None.
*/
void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node)
{
struct xe_mem_pool *pool = node_to_pool(node);
struct xe_tile *tile = pool_to_tile(pool);
struct xe_device *xe = tile_to_xe(tile);
struct drm_mm_node *sa_node = &node->sa_node;
xe_tile_assert(tile, pool->shadow);
lockdep_assert_held(&pool->swap_guard);
xe_map_memcpy_to(xe, &pool->shadow->vmap,
sa_node->start,
pool->cpu_addr + sa_node->start,
sa_node->size);
}
/**
* xe_mem_pool_gpu_addr() - Retrieve GPU address of memory pool.
* @pool: the memory pool
*
* Returns: GGTT address of the memory pool.
*/
u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool)
{
return xe_bo_ggtt_addr(pool->bo);
}
/**
* xe_mem_pool_cpu_addr() - Retrieve CPU address of manager pool.
* @pool: the memory pool
*
* Returns: CPU virtual address of memory pool.
*/
void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool)
{
return pool->cpu_addr;
}
/**
* xe_mem_pool_bo_swap_guard() - Retrieve the mutex used to guard swap
* operations on a memory pool.
* @pool: the memory pool
*
* Returns: Swap guard mutex or NULL if shadow pool is not created.
*/
struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool)
{
if (!pool->shadow)
return NULL;
return &pool->swap_guard;
}
/**
* xe_mem_pool_bo_flush_write() - Copy the data from the sub-allocation
* to the GPU memory.
* @node: the node allocated in the memory pool to flush.
*/
void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node)
{
struct xe_mem_pool *pool = node_to_pool(node);
struct xe_tile *tile = pool_to_tile(pool);
struct xe_device *xe = tile_to_xe(tile);
struct drm_mm_node *sa_node = &node->sa_node;
if (!pool->bo->vmap.is_iomem)
return;
xe_map_memcpy_to(xe, &pool->bo->vmap, sa_node->start,
pool->cpu_addr + sa_node->start,
sa_node->size);
}
/**
* xe_mem_pool_bo_sync_read() - Copy the data from GPU memory to the
* sub-allocation.
* @node: the node allocated in the memory pool to read back.
*/
void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node)
{
struct xe_mem_pool *pool = node_to_pool(node);
struct xe_tile *tile = pool_to_tile(pool);
struct xe_device *xe = tile_to_xe(tile);
struct drm_mm_node *sa_node = &node->sa_node;
if (!pool->bo->vmap.is_iomem)
return;
xe_map_memcpy_from(xe, pool->cpu_addr + sa_node->start,
&pool->bo->vmap, sa_node->start, sa_node->size);
}
/**
* xe_mem_pool_alloc_node() - Allocate a new node for use with xe_mem_pool.
*
* Returns: node structure or an ERR_PTR(-ENOMEM).
*/
struct xe_mem_pool_node *xe_mem_pool_alloc_node(void)
{
struct xe_mem_pool_node *node = kzalloc_obj(*node);
if (!node)
return ERR_PTR(-ENOMEM);
return node;
}
/**
* xe_mem_pool_insert_node() - Insert a node into the memory pool.
* @pool: the memory pool to insert into
* @node: the node to insert
* @size: the size of the node to be allocated in bytes.
*
* Inserts a node into the specified memory pool using drm_mm for
* allocation.
*
* Returns: 0 on success or a negative error code on failure.
*/
int xe_mem_pool_insert_node(struct xe_mem_pool *pool,
struct xe_mem_pool_node *node, u32 size)
{
if (!pool)
return -EINVAL;
return drm_mm_insert_node(&pool->base, &node->sa_node, size);
}
/**
* xe_mem_pool_free_node() - Free a node allocated from the memory pool.
* @node: the node to free
*
* Returns: None.
*/
void xe_mem_pool_free_node(struct xe_mem_pool_node *node)
{
if (!node)
return;
drm_mm_remove_node(&node->sa_node);
kfree(node);
}
/**
* xe_mem_pool_node_cpu_addr() - Retrieve CPU address of the node.
* @node: the node allocated in the memory pool
*
* Returns: CPU virtual address of the node.
*/
void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node)
{
struct xe_mem_pool *pool = node_to_pool(node);
return xe_mem_pool_cpu_addr(pool) + node->sa_node.start;
}
/**
* xe_mem_pool_dump() - Dump the state of the DRM MM manager for debugging.
* @pool: the memory pool info be dumped.
* @p: The DRM printer to use for output.
*
* Only the drm managed region is dumped, not the state of the BOs or any other
* pool information.
*
* Returns: None.
*/
void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p)
{
drm_mm_print(&pool->base, p);
}

View File

@@ -0,0 +1,35 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2026 Intel Corporation
*/
#ifndef _XE_MEM_POOL_H_
#define _XE_MEM_POOL_H_
#include <linux/sizes.h>
#include <linux/types.h>
#include <drm/drm_mm.h>
#include "xe_mem_pool_types.h"
struct drm_printer;
struct xe_mem_pool;
struct xe_tile;
struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size,
u32 guard, int flags);
void xe_mem_pool_sync(struct xe_mem_pool *pool);
void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool);
void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node);
u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool);
void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool);
struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool);
void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node);
void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node);
struct xe_mem_pool_node *xe_mem_pool_alloc_node(void);
int xe_mem_pool_insert_node(struct xe_mem_pool *pool,
struct xe_mem_pool_node *node, u32 size);
void xe_mem_pool_free_node(struct xe_mem_pool_node *node);
void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node);
void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p);
#endif

View File

@@ -0,0 +1,21 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2026 Intel Corporation
*/
#ifndef _XE_MEM_POOL_TYPES_H_
#define _XE_MEM_POOL_TYPES_H_
#include <drm/drm_mm.h>
#define XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY BIT(0)
/**
* struct xe_mem_pool_node - Sub-range allocations from mem pool.
*/
struct xe_mem_pool_node {
/** @sa_node: drm_mm_node for this allocation. */
struct drm_mm_node sa_node;
};
#endif

View File

@@ -29,6 +29,7 @@
#include "xe_hw_engine.h"
#include "xe_lrc.h"
#include "xe_map.h"
#include "xe_mem_pool.h"
#include "xe_mocs.h"
#include "xe_printk.h"
#include "xe_pt.h"
@@ -1166,11 +1167,12 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
u32 batch_size, batch_size_allocated;
struct xe_device *xe = gt_to_xe(gt);
struct xe_res_cursor src_it, ccs_it;
struct xe_mem_pool *bb_pool;
struct xe_sriov_vf_ccs_ctx *ctx;
struct xe_sa_manager *bb_pool;
u64 size = xe_bo_size(src_bo);
struct xe_bb *bb = NULL;
struct xe_mem_pool_node *bb;
u64 src_L0, src_L0_ofs;
struct xe_bb xe_bb_tmp;
u32 src_L0_pt;
int err;
@@ -1208,18 +1210,18 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
size -= src_L0;
}
bb = xe_bb_alloc(gt);
bb = xe_mem_pool_alloc_node();
if (IS_ERR(bb))
return PTR_ERR(bb);
bb_pool = ctx->mem.ccs_bb_pool;
scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) {
xe_sa_bo_swap_shadow(bb_pool);
scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
xe_mem_pool_swap_shadow_locked(bb_pool);
err = xe_bb_init(bb, bb_pool, batch_size);
err = xe_mem_pool_insert_node(bb_pool, bb, batch_size * sizeof(u32));
if (err) {
xe_gt_err(gt, "BB allocation failed.\n");
xe_bb_free(bb, NULL);
kfree(bb);
return err;
}
@@ -1227,6 +1229,7 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
size = xe_bo_size(src_bo);
batch_size = 0;
xe_bb_tmp = (struct xe_bb){ .cs = xe_mem_pool_node_cpu_addr(bb), .len = 0 };
/*
* Emit PTE and copy commands here.
* The CCS copy command can only support limited size. If the size to be
@@ -1255,24 +1258,27 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
batch_size += EMIT_COPY_CCS_DW;
emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src);
emit_pte(m, &xe_bb_tmp, src_L0_pt, false, true, &src_it, src_L0, src);
emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
emit_pte(m, &xe_bb_tmp, ccs_pt, false, false, &ccs_it, ccs_size, src);
bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt,
xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
flush_flags);
flush_flags = xe_migrate_ccs_copy(m, &xe_bb_tmp, src_L0_ofs, src_is_pltt,
src_L0_ofs, dst_is_pltt,
src_L0, ccs_ofs, true);
bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
flush_flags);
size -= src_L0;
}
xe_assert(xe, (batch_size_allocated == bb->len));
xe_assert(xe, (batch_size_allocated == xe_bb_tmp.len));
xe_assert(xe, bb->sa_node.size == xe_bb_tmp.len * sizeof(u32));
src_bo->bb_ccs[read_write] = bb;
xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
xe_sa_bo_sync_shadow(bb->bo);
xe_mem_pool_sync_shadow_locked(bb);
}
return 0;
@@ -1297,10 +1303,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
enum xe_sriov_vf_ccs_rw_ctxs read_write)
{
struct xe_bb *bb = src_bo->bb_ccs[read_write];
struct xe_mem_pool_node *bb = src_bo->bb_ccs[read_write];
struct xe_device *xe = xe_bo_device(src_bo);
struct xe_mem_pool *bb_pool;
struct xe_sriov_vf_ccs_ctx *ctx;
struct xe_sa_manager *bb_pool;
u32 *cs;
xe_assert(xe, IS_SRIOV_VF(xe));
@@ -1308,17 +1314,17 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
ctx = &xe->sriov.vf.ccs.contexts[read_write];
bb_pool = ctx->mem.ccs_bb_pool;
guard(mutex) (xe_sa_bo_swap_guard(bb_pool));
xe_sa_bo_swap_shadow(bb_pool);
scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
xe_mem_pool_swap_shadow_locked(bb_pool);
cs = xe_sa_bo_cpu_addr(bb->bo);
memset(cs, MI_NOOP, bb->len * sizeof(u32));
xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
cs = xe_mem_pool_node_cpu_addr(bb);
memset(cs, MI_NOOP, bb->sa_node.size);
xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
xe_sa_bo_sync_shadow(bb->bo);
xe_bb_free(bb, NULL);
src_bo->bb_ccs[read_write] = NULL;
xe_mem_pool_sync_shadow_locked(bb);
xe_mem_pool_free_node(bb);
src_bo->bb_ccs[read_write] = NULL;
}
}
/**

View File

@@ -118,6 +118,7 @@ static const struct xe_graphics_desc graphics_xe2 = {
static const struct xe_graphics_desc graphics_xe3p_lpg = {
XE2_GFX_FEATURES,
.has_indirect_ring_state = 1,
.multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE),
.num_geometry_xecore_fuse_regs = 3,
.num_compute_xecore_fuse_regs = 3,

View File

@@ -226,7 +226,7 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
}
range_start = reg & REG_GENMASK(25, range_bit);
range_end = range_start | REG_GENMASK(range_bit, 0);
range_end = range_start | REG_GENMASK(range_bit - 1, 0);
switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) {
case RING_FORCE_TO_NONPRIV_ACCESS_RW:

View File

@@ -14,9 +14,9 @@
#include "xe_guc.h"
#include "xe_guc_submit.h"
#include "xe_lrc.h"
#include "xe_mem_pool.h"
#include "xe_migrate.h"
#include "xe_pm.h"
#include "xe_sa.h"
#include "xe_sriov_printk.h"
#include "xe_sriov_vf.h"
#include "xe_sriov_vf_ccs.h"
@@ -141,43 +141,47 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe)
static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
{
struct xe_mem_pool *pool;
struct xe_device *xe = tile_to_xe(tile);
struct xe_sa_manager *sa_manager;
u32 *pool_cpu_addr, *last_dw_addr;
u64 bb_pool_size;
int offset, err;
int err;
bb_pool_size = get_ccs_bb_pool_size(xe);
xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
XE_SA_BO_MANAGER_FLAG_SHADOW);
if (IS_ERR(sa_manager)) {
xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
sa_manager);
err = PTR_ERR(sa_manager);
pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32),
XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY);
if (IS_ERR(pool)) {
xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n",
pool);
err = PTR_ERR(pool);
return err;
}
offset = 0;
xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
bb_pool_size);
xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
bb_pool_size);
pool_cpu_addr = xe_mem_pool_cpu_addr(pool);
memset(pool_cpu_addr, 0, bb_pool_size);
offset = bb_pool_size - sizeof(u32);
xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1;
*last_dw_addr = MI_BATCH_BUFFER_END;
ctx->mem.ccs_bb_pool = sa_manager;
/**
* Sync the main copy and shadow copy so that the shadow copy is
* replica of main copy. We sync only BBs after init part. So, we
* need to make sure the main pool and shadow copy are in sync after
* this point. This is needed as GuC may read the BB commands from
* shadow copy.
*/
xe_mem_pool_sync(pool);
ctx->mem.ccs_bb_pool = pool;
return 0;
}
static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
{
u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
u32 dw[10], i = 0;
@@ -388,7 +392,7 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
#define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32))
void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
{
u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
@@ -412,8 +416,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
struct xe_device *xe = xe_bo_device(bo);
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
struct xe_sriov_vf_ccs_ctx *ctx;
struct xe_mem_pool_node *bb;
struct xe_tile *tile;
struct xe_bb *bb;
int err = 0;
xe_assert(xe, IS_VF_CCS_READY(xe));
@@ -445,7 +449,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
{
struct xe_device *xe = xe_bo_device(bo);
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
struct xe_bb *bb;
struct xe_mem_pool_node *bb;
xe_assert(xe, IS_VF_CCS_READY(xe));
@@ -471,8 +475,8 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
*/
void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
{
struct xe_sa_manager *bb_pool;
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
struct xe_mem_pool *bb_pool;
if (!IS_VF_CCS_READY(xe))
return;
@@ -485,7 +489,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
drm_printf(p, "-------------------------\n");
drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
xe_mem_pool_dump(bb_pool, p);
drm_puts(p, "\n");
}
}

View File

@@ -17,9 +17,6 @@ enum xe_sriov_vf_ccs_rw_ctxs {
XE_SRIOV_VF_CCS_CTX_COUNT
};
struct xe_migrate;
struct xe_sa_manager;
/**
* struct xe_sriov_vf_ccs_ctx - VF CCS migration context data.
*/
@@ -33,7 +30,7 @@ struct xe_sriov_vf_ccs_ctx {
/** @mem: memory data */
struct {
/** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */
struct xe_sa_manager *ccs_bb_pool;
struct xe_mem_pool *ccs_bb_pool;
} mem;
};

View File

@@ -97,7 +97,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
{ XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED),
IS_INTEGRATED),
XE_RTP_ACTIONS(FIELD_SET(XEHP_GAMSTLB_CTRL, BANK_HASH_MODE,
XE_RTP_ACTIONS(FIELD_SET(GAMSTLB_CTRL, BANK_HASH_MODE,
BANK_HASH_4KB_MODE))
},
};

View File

@@ -3658,6 +3658,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE &&
is_cpu_addr_mirror) ||
XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) &&
(op == DRM_XE_VM_BIND_OP_MAP_USERPTR ||
is_cpu_addr_mirror) &&
@@ -4156,7 +4158,8 @@ int xe_vm_get_property_ioctl(struct drm_device *drm, void *data,
int ret = 0;
if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] ||
args->reserved[2])))
args->reserved[2] || args->extensions ||
args->pad)))
return -EINVAL;
vm = xe_vm_lookup(xef, args->vm_id);

View File

@@ -621,6 +621,45 @@ static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details
return 0;
}
static bool check_pat_args_are_sane(struct xe_device *xe,
struct xe_vmas_in_madvise_range *madvise_range,
u16 pat_index)
{
u16 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
int i;
/*
* Using coh_none with CPU cached buffers is not allowed on iGPU.
* On iGPU the GPU shares the LLC with the CPU, so with coh_none
* the GPU bypasses CPU caches and reads directly from DRAM,
* potentially seeing stale sensitive data from previously freed
* pages. On dGPU this restriction does not apply, because the
* platform does not provide a non-coherent system memory access
* path that would violate the DMA coherency contract.
*/
if (coh_mode != XE_COH_NONE || IS_DGFX(xe))
return true;
for (i = 0; i < madvise_range->num_vmas; i++) {
struct xe_vma *vma = madvise_range->vmas[i];
struct xe_bo *bo = xe_vma_bo(vma);
if (bo) {
/* BO with WB caching + COH_NONE is not allowed */
if (XE_IOCTL_DBG(xe, bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
return false;
/* Imported dma-buf without caching info, assume cached */
if (XE_IOCTL_DBG(xe, !bo->cpu_caching))
return false;
} else if (XE_IOCTL_DBG(xe, xe_vma_is_cpu_addr_mirror(vma) ||
xe_vma_is_userptr(vma)))
/* System memory (userptr/SVM) is always CPU cached */
return false;
}
return true;
}
static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
int num_vmas, u32 atomic_val)
{
@@ -750,6 +789,14 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
}
}
if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) {
if (!check_pat_args_are_sane(xe, &madvise_range,
args->pat_index.val)) {
err = -EINVAL;
goto free_vmas;
}
}
if (madvise_range.has_bo_vmas) {
if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
if (!check_bo_args_are_sane(vm, madvise_range.vmas,

View File

@@ -743,14 +743,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
},
{ XE_RTP_NAME("14019988906"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
},
{ XE_RTP_NAME("14019877138"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
},
{ XE_RTP_NAME("14021490052"),
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(FF_MODE,