mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-15 22:31:47 -04:00
Merge tag 'drm-xe-fixes-2026-04-30' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
API Fixes: - Add missing pad and extensions check (Jonathan) - Reject unsafe PAT indices for CPU cached memory (Jia) Driver Fixes: - Drop registration of guc_submit_wedged_fini from xe_guc_submit_wedge (Brost) - Xe3p tuning and workaround fixes (Roper, Gustavo) - USE drm mm instead of drm SA for CCS read/write (Satya) - Fix leaks and null derefs (Shuicheng) - Fix Wa_18022495364 (Tvrtko) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patch.msgid.link/afO05KvmFMn_7qcY@intel.com
This commit is contained in:
@@ -88,6 +88,7 @@ xe-y += xe_bb.o \
|
||||
xe_irq.o \
|
||||
xe_late_bind_fw.o \
|
||||
xe_lrc.o \
|
||||
xe_mem_pool.o \
|
||||
xe_migrate.o \
|
||||
xe_mmio.o \
|
||||
xe_mmio_gem.o \
|
||||
|
||||
@@ -583,7 +583,7 @@
|
||||
#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32)
|
||||
#define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32)
|
||||
|
||||
#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0)
|
||||
#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0, XE_REG_OPTION_MASKED)
|
||||
#define CPSS_AWARE_DIS REG_BIT(3)
|
||||
|
||||
#define SARB_CHICKEN1 XE_REG_MCR(0xe90c)
|
||||
|
||||
@@ -2322,8 +2322,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
|
||||
}
|
||||
|
||||
/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
|
||||
if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
|
||||
if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) {
|
||||
xe_bo_free(bo);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
|
||||
!(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
|
||||
@@ -2342,8 +2344,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
|
||||
alignment = SZ_4K >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
if (type == ttm_bo_type_device && aligned_size != size)
|
||||
if (type == ttm_bo_type_device && aligned_size != size) {
|
||||
xe_bo_free(bo);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (!bo) {
|
||||
bo = xe_bo_alloc();
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "xe_ggtt_types.h"
|
||||
|
||||
struct xe_device;
|
||||
struct xe_mem_pool_node;
|
||||
struct xe_vm;
|
||||
|
||||
#define XE_BO_MAX_PLACEMENTS 3
|
||||
@@ -88,7 +89,7 @@ struct xe_bo {
|
||||
bool ccs_cleared;
|
||||
|
||||
/** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */
|
||||
struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
|
||||
struct xe_mem_pool_node *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
|
||||
|
||||
/**
|
||||
* @cpu_caching: CPU caching mode. Currently only used for userspace
|
||||
|
||||
@@ -258,6 +258,13 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Takes ownership of @storage: on success it is transferred to the returned
|
||||
* drm_gem_object; on failure it is freed before returning the error.
|
||||
* This matches the contract of xe_bo_init_locked() which frees @storage on
|
||||
* its error paths, so callers need not (and must not) free @storage after
|
||||
* this call.
|
||||
*/
|
||||
static struct drm_gem_object *
|
||||
xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
|
||||
struct dma_buf *dma_buf)
|
||||
@@ -271,8 +278,10 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
|
||||
int ret = 0;
|
||||
|
||||
dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
|
||||
if (!dummy_obj)
|
||||
if (!dummy_obj) {
|
||||
xe_bo_free(storage);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
dummy_obj->resv = resv;
|
||||
xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) {
|
||||
@@ -281,6 +290,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
/* xe_bo_init_locked() frees storage on error */
|
||||
bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
|
||||
0, /* Will require 1way or 2way for vm_bind */
|
||||
ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec);
|
||||
@@ -368,12 +378,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* Errors here will take care of freeing the bo. */
|
||||
/*
|
||||
* xe_dma_buf_init_obj() takes ownership of bo on both success
|
||||
* and failure, so we must not touch bo after this call.
|
||||
*/
|
||||
obj = xe_dma_buf_init_obj(dev, bo, dma_buf);
|
||||
if (IS_ERR(obj))
|
||||
if (IS_ERR(obj)) {
|
||||
dma_buf_detach(dma_buf, attach);
|
||||
return obj;
|
||||
|
||||
|
||||
}
|
||||
get_dma_buf(dma_buf);
|
||||
obj->import_attach = attach;
|
||||
return obj;
|
||||
|
||||
@@ -869,14 +869,14 @@ static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
|
||||
struct xe_eu_stall_data_stream *stream = file->private_data;
|
||||
struct xe_gt *gt = stream->gt;
|
||||
|
||||
drm_dev_put(>->tile->xe->drm);
|
||||
|
||||
mutex_lock(>->eu_stall->stream_lock);
|
||||
xe_eu_stall_disable_locked(stream);
|
||||
xe_eu_stall_data_buf_destroy(stream);
|
||||
xe_eu_stall_stream_free(stream);
|
||||
mutex_unlock(>->eu_stall->stream_lock);
|
||||
|
||||
drm_dev_put(>->tile->xe->drm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1405,7 +1405,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
|
||||
if (q->vm && q->hwe->hw_engine_group) {
|
||||
err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q);
|
||||
if (err)
|
||||
goto put_exec_queue;
|
||||
goto kill_exec_queue;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1416,12 +1416,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
|
||||
/* user id alloc must always be last in ioctl to prevent UAF */
|
||||
err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
|
||||
if (err)
|
||||
goto kill_exec_queue;
|
||||
goto del_hw_engine_group;
|
||||
|
||||
args->exec_queue_id = id;
|
||||
|
||||
return 0;
|
||||
|
||||
del_hw_engine_group:
|
||||
if (q->vm && q->hwe && q->hwe->hw_engine_group)
|
||||
xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
|
||||
kill_exec_queue:
|
||||
xe_exec_queue_kill(q);
|
||||
delete_queue_group:
|
||||
@@ -1760,7 +1763,7 @@ void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
|
||||
void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
|
||||
unsigned int type)
|
||||
{
|
||||
xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
|
||||
xe_assert(gt_to_xe(q->gt), type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
|
||||
type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
|
||||
|
||||
dma_fence_put(q->tlb_inval[type].last_fence);
|
||||
|
||||
@@ -166,7 +166,7 @@ static int query_compatibility_version(struct xe_gsc *gsc)
|
||||
&rd_offset);
|
||||
if (err) {
|
||||
xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err);
|
||||
return err;
|
||||
goto out_bo;
|
||||
}
|
||||
|
||||
compat->major = version_query_rd(xe, &bo->vmap, rd_offset, proj_major);
|
||||
|
||||
@@ -259,24 +259,12 @@ static void guc_submit_sw_fini(struct drm_device *drm, void *arg)
|
||||
}
|
||||
|
||||
static void guc_submit_fini(void *arg)
|
||||
{
|
||||
struct xe_guc *guc = arg;
|
||||
|
||||
/* Forcefully kill any remaining exec queues */
|
||||
xe_guc_ct_stop(&guc->ct);
|
||||
guc_submit_reset_prepare(guc);
|
||||
xe_guc_softreset(guc);
|
||||
xe_guc_submit_stop(guc);
|
||||
xe_uc_fw_sanitize(&guc->fw);
|
||||
xe_guc_submit_pause_abort(guc);
|
||||
}
|
||||
|
||||
static void guc_submit_wedged_fini(void *arg)
|
||||
{
|
||||
struct xe_guc *guc = arg;
|
||||
struct xe_exec_queue *q;
|
||||
unsigned long index;
|
||||
|
||||
/* Drop any wedged queue refs */
|
||||
mutex_lock(&guc->submission_state.lock);
|
||||
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
|
||||
if (exec_queue_wedged(q)) {
|
||||
@@ -286,6 +274,14 @@ static void guc_submit_wedged_fini(void *arg)
|
||||
}
|
||||
}
|
||||
mutex_unlock(&guc->submission_state.lock);
|
||||
|
||||
/* Forcefully kill any remaining exec queues */
|
||||
xe_guc_ct_stop(&guc->ct);
|
||||
guc_submit_reset_prepare(guc);
|
||||
xe_guc_softreset(guc);
|
||||
xe_guc_submit_stop(guc);
|
||||
xe_uc_fw_sanitize(&guc->fw);
|
||||
xe_guc_submit_pause_abort(guc);
|
||||
}
|
||||
|
||||
static const struct xe_exec_queue_ops guc_exec_queue_ops;
|
||||
@@ -1320,10 +1316,8 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
|
||||
void xe_guc_submit_wedge(struct xe_guc *guc)
|
||||
{
|
||||
struct xe_device *xe = guc_to_xe(guc);
|
||||
struct xe_gt *gt = guc_to_gt(guc);
|
||||
struct xe_exec_queue *q;
|
||||
unsigned long index;
|
||||
int err;
|
||||
|
||||
xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
|
||||
|
||||
@@ -1335,15 +1329,6 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
|
||||
return;
|
||||
|
||||
if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) {
|
||||
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
|
||||
guc_submit_wedged_fini, guc);
|
||||
if (err) {
|
||||
xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; "
|
||||
"Although device is wedged.\n",
|
||||
xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET));
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&guc->submission_state.lock);
|
||||
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
|
||||
if (xe_exec_queue_get_unless_zero(q))
|
||||
|
||||
@@ -1214,7 +1214,7 @@ static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc,
|
||||
if (xe_gt_WARN_ON(lrc->gt, max_len < 3))
|
||||
return -ENOSPC;
|
||||
|
||||
*cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
|
||||
*cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_LRM_CS_MMIO | MI_LRI_NUM_REGS(1);
|
||||
*cmd++ = CS_DEBUG_MODE2(0).addr;
|
||||
*cmd++ = REG_MASKED_FIELD_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
|
||||
|
||||
|
||||
403
drivers/gpu/drm/xe/xe_mem_pool.c
Normal file
403
drivers/gpu/drm/xe/xe_mem_pool.c
Normal file
@@ -0,0 +1,403 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright © 2026 Intel Corporation
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include <drm/drm_managed.h>
|
||||
|
||||
#include "instructions/xe_mi_commands.h"
|
||||
#include "xe_bo.h"
|
||||
#include "xe_device_types.h"
|
||||
#include "xe_map.h"
|
||||
#include "xe_mem_pool.h"
|
||||
#include "xe_mem_pool_types.h"
|
||||
#include "xe_tile_printk.h"
|
||||
|
||||
/**
|
||||
* struct xe_mem_pool - DRM MM pool for sub-allocating memory from a BO on an
|
||||
* XE tile.
|
||||
*
|
||||
* The XE memory pool is a DRM MM manager that provides sub-allocation of memory
|
||||
* from a backing buffer object (BO) on a specific XE tile. It is designed to
|
||||
* manage memory for GPU workloads, allowing for efficient allocation and
|
||||
* deallocation of memory regions within the BO.
|
||||
*
|
||||
* The memory pool maintains a primary BO that is pinned in the GGTT and mapped
|
||||
* into the CPU address space for direct access. Optionally, it can also maintain
|
||||
* a shadow BO that can be used for atomic updates to the primary BO's contents.
|
||||
*
|
||||
* The API provided by the memory pool allows clients to allocate and free memory
|
||||
* regions, retrieve GPU and CPU addresses, and synchronize data between the
|
||||
* primary and shadow BOs as needed.
|
||||
*/
|
||||
struct xe_mem_pool {
|
||||
/** @base: Range allocator over [0, @size) in bytes */
|
||||
struct drm_mm base;
|
||||
/** @bo: Active pool BO (GGTT-pinned, CPU-mapped). */
|
||||
struct xe_bo *bo;
|
||||
/** @shadow: Shadow BO for atomic command updates. */
|
||||
struct xe_bo *shadow;
|
||||
/** @swap_guard: Timeline guard updating @bo and @shadow */
|
||||
struct mutex swap_guard;
|
||||
/** @cpu_addr: CPU virtual address of the active BO. */
|
||||
void *cpu_addr;
|
||||
/** @is_iomem: Indicates if the BO mapping is I/O memory. */
|
||||
bool is_iomem;
|
||||
};
|
||||
|
||||
static struct xe_mem_pool *node_to_pool(struct xe_mem_pool_node *node)
|
||||
{
|
||||
return container_of(node->sa_node.mm, struct xe_mem_pool, base);
|
||||
}
|
||||
|
||||
static struct xe_tile *pool_to_tile(struct xe_mem_pool *pool)
|
||||
{
|
||||
return pool->bo->tile;
|
||||
}
|
||||
|
||||
static void fini_pool_action(struct drm_device *drm, void *arg)
|
||||
{
|
||||
struct xe_mem_pool *pool = arg;
|
||||
|
||||
if (pool->is_iomem)
|
||||
kvfree(pool->cpu_addr);
|
||||
|
||||
drm_mm_takedown(&pool->base);
|
||||
}
|
||||
|
||||
static int pool_shadow_init(struct xe_mem_pool *pool)
|
||||
{
|
||||
struct xe_tile *tile = pool->bo->tile;
|
||||
struct xe_device *xe = tile_to_xe(tile);
|
||||
struct xe_bo *shadow;
|
||||
int ret;
|
||||
|
||||
xe_assert(xe, !pool->shadow);
|
||||
|
||||
ret = drmm_mutex_init(&xe->drm, &pool->swap_guard);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
|
||||
fs_reclaim_acquire(GFP_KERNEL);
|
||||
might_lock(&pool->swap_guard);
|
||||
fs_reclaim_release(GFP_KERNEL);
|
||||
}
|
||||
shadow = xe_managed_bo_create_pin_map(xe, tile,
|
||||
xe_bo_size(pool->bo),
|
||||
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
|
||||
XE_BO_FLAG_GGTT |
|
||||
XE_BO_FLAG_GGTT_INVALIDATE |
|
||||
XE_BO_FLAG_PINNED_NORESTORE);
|
||||
if (IS_ERR(shadow))
|
||||
return PTR_ERR(shadow);
|
||||
|
||||
pool->shadow = shadow;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_init() - Initialize memory pool.
|
||||
* @tile: the &xe_tile where allocate.
|
||||
* @size: number of bytes to allocate.
|
||||
* @guard: the size of the guard region at the end of the BO that is not
|
||||
* sub-allocated, in bytes.
|
||||
* @flags: flags to use to create shadow pool.
|
||||
*
|
||||
* Initializes a memory pool for sub-allocating memory from a backing BO on the
|
||||
* specified XE tile. The backing BO is pinned in the GGTT and mapped into
|
||||
* the CPU address space for direct access. Optionally, a shadow BO can also be
|
||||
* initialized for atomic updates to the primary BO's contents.
|
||||
*
|
||||
* Returns: a pointer to the &xe_mem_pool, or an error pointer on failure.
|
||||
*/
|
||||
struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size,
|
||||
u32 guard, int flags)
|
||||
{
|
||||
struct xe_device *xe = tile_to_xe(tile);
|
||||
struct xe_mem_pool *pool;
|
||||
struct xe_bo *bo;
|
||||
u32 managed_size;
|
||||
int ret;
|
||||
|
||||
xe_tile_assert(tile, size > guard);
|
||||
managed_size = size - guard;
|
||||
|
||||
pool = drmm_kzalloc(&xe->drm, sizeof(*pool), GFP_KERNEL);
|
||||
if (!pool)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
bo = xe_managed_bo_create_pin_map(xe, tile, size,
|
||||
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
|
||||
XE_BO_FLAG_GGTT |
|
||||
XE_BO_FLAG_GGTT_INVALIDATE |
|
||||
XE_BO_FLAG_PINNED_NORESTORE);
|
||||
if (IS_ERR(bo)) {
|
||||
xe_tile_err(tile, "Failed to prepare %uKiB BO for mem pool (%pe)\n",
|
||||
size / SZ_1K, bo);
|
||||
return ERR_CAST(bo);
|
||||
}
|
||||
pool->bo = bo;
|
||||
pool->is_iomem = bo->vmap.is_iomem;
|
||||
|
||||
if (pool->is_iomem) {
|
||||
pool->cpu_addr = kvzalloc(size, GFP_KERNEL);
|
||||
if (!pool->cpu_addr)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
} else {
|
||||
pool->cpu_addr = bo->vmap.vaddr;
|
||||
}
|
||||
|
||||
if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) {
|
||||
ret = pool_shadow_init(pool);
|
||||
|
||||
if (ret)
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
drm_mm_init(&pool->base, 0, managed_size);
|
||||
ret = drmm_add_action_or_reset(&xe->drm, fini_pool_action, pool);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
return pool;
|
||||
|
||||
out_err:
|
||||
if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY)
|
||||
xe_tile_err(tile,
|
||||
"Failed to initialize shadow BO for mem pool (%d)\n", ret);
|
||||
if (bo->vmap.is_iomem)
|
||||
kvfree(pool->cpu_addr);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_sync() - Copy the entire contents of the main pool to shadow pool.
|
||||
* @pool: the memory pool containing the primary and shadow BOs.
|
||||
*
|
||||
* Copies the entire contents of the primary pool to the shadow pool. This must
|
||||
* be done after xe_mem_pool_init() with the XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY
|
||||
* flag to ensure that the shadow pool has the same initial contents as the primary
|
||||
* pool. After this initial synchronization, clients can choose to synchronize the
|
||||
* shadow pool with the primary pool on a node basis using
|
||||
* xe_mem_pool_sync_shadow_locked() as needed.
|
||||
*
|
||||
* Return: None.
|
||||
*/
|
||||
void xe_mem_pool_sync(struct xe_mem_pool *pool)
|
||||
{
|
||||
struct xe_tile *tile = pool_to_tile(pool);
|
||||
struct xe_device *xe = tile_to_xe(tile);
|
||||
|
||||
xe_tile_assert(tile, pool->shadow);
|
||||
|
||||
xe_map_memcpy_to(xe, &pool->shadow->vmap, 0,
|
||||
pool->cpu_addr, xe_bo_size(pool->bo));
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_swap_shadow_locked() - Swap the primary BO with the shadow BO.
|
||||
* @pool: the memory pool containing the primary and shadow BOs.
|
||||
*
|
||||
* Swaps the primary buffer object with the shadow buffer object in the mem
|
||||
* pool. This allows for atomic updates to the contents of the primary BO
|
||||
* by first writing to the shadow BO and then swapping it with the primary BO.
|
||||
* Swap_guard must be held to ensure synchronization with any concurrent swap
|
||||
* operations.
|
||||
*
|
||||
* Return: None.
|
||||
*/
|
||||
void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool)
|
||||
{
|
||||
struct xe_tile *tile = pool_to_tile(pool);
|
||||
|
||||
xe_tile_assert(tile, pool->shadow);
|
||||
lockdep_assert_held(&pool->swap_guard);
|
||||
|
||||
swap(pool->bo, pool->shadow);
|
||||
if (!pool->bo->vmap.is_iomem)
|
||||
pool->cpu_addr = pool->bo->vmap.vaddr;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_sync_shadow_locked() - Copy node from primary pool to shadow pool.
|
||||
* @node: the node allocated in the memory pool.
|
||||
*
|
||||
* Copies the specified batch buffer from the primary pool to the shadow pool.
|
||||
* Swap_guard must be held to ensure synchronization with any concurrent swap
|
||||
* operations.
|
||||
*
|
||||
* Return: None.
|
||||
*/
|
||||
void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node)
|
||||
{
|
||||
struct xe_mem_pool *pool = node_to_pool(node);
|
||||
struct xe_tile *tile = pool_to_tile(pool);
|
||||
struct xe_device *xe = tile_to_xe(tile);
|
||||
struct drm_mm_node *sa_node = &node->sa_node;
|
||||
|
||||
xe_tile_assert(tile, pool->shadow);
|
||||
lockdep_assert_held(&pool->swap_guard);
|
||||
|
||||
xe_map_memcpy_to(xe, &pool->shadow->vmap,
|
||||
sa_node->start,
|
||||
pool->cpu_addr + sa_node->start,
|
||||
sa_node->size);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_gpu_addr() - Retrieve GPU address of memory pool.
|
||||
* @pool: the memory pool
|
||||
*
|
||||
* Returns: GGTT address of the memory pool.
|
||||
*/
|
||||
u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool)
|
||||
{
|
||||
return xe_bo_ggtt_addr(pool->bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_cpu_addr() - Retrieve CPU address of manager pool.
|
||||
* @pool: the memory pool
|
||||
*
|
||||
* Returns: CPU virtual address of memory pool.
|
||||
*/
|
||||
void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool)
|
||||
{
|
||||
return pool->cpu_addr;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_bo_swap_guard() - Retrieve the mutex used to guard swap
|
||||
* operations on a memory pool.
|
||||
* @pool: the memory pool
|
||||
*
|
||||
* Returns: Swap guard mutex or NULL if shadow pool is not created.
|
||||
*/
|
||||
struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool)
|
||||
{
|
||||
if (!pool->shadow)
|
||||
return NULL;
|
||||
|
||||
return &pool->swap_guard;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_bo_flush_write() - Copy the data from the sub-allocation
|
||||
* to the GPU memory.
|
||||
* @node: the node allocated in the memory pool to flush.
|
||||
*/
|
||||
void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node)
|
||||
{
|
||||
struct xe_mem_pool *pool = node_to_pool(node);
|
||||
struct xe_tile *tile = pool_to_tile(pool);
|
||||
struct xe_device *xe = tile_to_xe(tile);
|
||||
struct drm_mm_node *sa_node = &node->sa_node;
|
||||
|
||||
if (!pool->bo->vmap.is_iomem)
|
||||
return;
|
||||
|
||||
xe_map_memcpy_to(xe, &pool->bo->vmap, sa_node->start,
|
||||
pool->cpu_addr + sa_node->start,
|
||||
sa_node->size);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_bo_sync_read() - Copy the data from GPU memory to the
|
||||
* sub-allocation.
|
||||
* @node: the node allocated in the memory pool to read back.
|
||||
*/
|
||||
void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node)
|
||||
{
|
||||
struct xe_mem_pool *pool = node_to_pool(node);
|
||||
struct xe_tile *tile = pool_to_tile(pool);
|
||||
struct xe_device *xe = tile_to_xe(tile);
|
||||
struct drm_mm_node *sa_node = &node->sa_node;
|
||||
|
||||
if (!pool->bo->vmap.is_iomem)
|
||||
return;
|
||||
|
||||
xe_map_memcpy_from(xe, pool->cpu_addr + sa_node->start,
|
||||
&pool->bo->vmap, sa_node->start, sa_node->size);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_alloc_node() - Allocate a new node for use with xe_mem_pool.
|
||||
*
|
||||
* Returns: node structure or an ERR_PTR(-ENOMEM).
|
||||
*/
|
||||
struct xe_mem_pool_node *xe_mem_pool_alloc_node(void)
|
||||
{
|
||||
struct xe_mem_pool_node *node = kzalloc_obj(*node);
|
||||
|
||||
if (!node)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_insert_node() - Insert a node into the memory pool.
|
||||
* @pool: the memory pool to insert into
|
||||
* @node: the node to insert
|
||||
* @size: the size of the node to be allocated in bytes.
|
||||
*
|
||||
* Inserts a node into the specified memory pool using drm_mm for
|
||||
* allocation.
|
||||
*
|
||||
* Returns: 0 on success or a negative error code on failure.
|
||||
*/
|
||||
int xe_mem_pool_insert_node(struct xe_mem_pool *pool,
|
||||
struct xe_mem_pool_node *node, u32 size)
|
||||
{
|
||||
if (!pool)
|
||||
return -EINVAL;
|
||||
|
||||
return drm_mm_insert_node(&pool->base, &node->sa_node, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_free_node() - Free a node allocated from the memory pool.
|
||||
* @node: the node to free
|
||||
*
|
||||
* Returns: None.
|
||||
*/
|
||||
void xe_mem_pool_free_node(struct xe_mem_pool_node *node)
|
||||
{
|
||||
if (!node)
|
||||
return;
|
||||
|
||||
drm_mm_remove_node(&node->sa_node);
|
||||
kfree(node);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_node_cpu_addr() - Retrieve CPU address of the node.
|
||||
* @node: the node allocated in the memory pool
|
||||
*
|
||||
* Returns: CPU virtual address of the node.
|
||||
*/
|
||||
void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node)
|
||||
{
|
||||
struct xe_mem_pool *pool = node_to_pool(node);
|
||||
|
||||
return xe_mem_pool_cpu_addr(pool) + node->sa_node.start;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_mem_pool_dump() - Dump the state of the DRM MM manager for debugging.
|
||||
* @pool: the memory pool info be dumped.
|
||||
* @p: The DRM printer to use for output.
|
||||
*
|
||||
* Only the drm managed region is dumped, not the state of the BOs or any other
|
||||
* pool information.
|
||||
*
|
||||
* Returns: None.
|
||||
*/
|
||||
void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p)
|
||||
{
|
||||
drm_mm_print(&pool->base, p);
|
||||
}
|
||||
35
drivers/gpu/drm/xe/xe_mem_pool.h
Normal file
35
drivers/gpu/drm/xe/xe_mem_pool.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright © 2026 Intel Corporation
|
||||
*/
|
||||
#ifndef _XE_MEM_POOL_H_
|
||||
#define _XE_MEM_POOL_H_
|
||||
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <drm/drm_mm.h>
|
||||
#include "xe_mem_pool_types.h"
|
||||
|
||||
struct drm_printer;
|
||||
struct xe_mem_pool;
|
||||
struct xe_tile;
|
||||
|
||||
struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size,
|
||||
u32 guard, int flags);
|
||||
void xe_mem_pool_sync(struct xe_mem_pool *pool);
|
||||
void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool);
|
||||
void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node);
|
||||
u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool);
|
||||
void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool);
|
||||
struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool);
|
||||
void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node);
|
||||
void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node);
|
||||
struct xe_mem_pool_node *xe_mem_pool_alloc_node(void);
|
||||
int xe_mem_pool_insert_node(struct xe_mem_pool *pool,
|
||||
struct xe_mem_pool_node *node, u32 size);
|
||||
void xe_mem_pool_free_node(struct xe_mem_pool_node *node);
|
||||
void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node);
|
||||
void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p);
|
||||
|
||||
#endif
|
||||
21
drivers/gpu/drm/xe/xe_mem_pool_types.h
Normal file
21
drivers/gpu/drm/xe/xe_mem_pool_types.h
Normal file
@@ -0,0 +1,21 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright © 2026 Intel Corporation
|
||||
*/
|
||||
|
||||
#ifndef _XE_MEM_POOL_TYPES_H_
|
||||
#define _XE_MEM_POOL_TYPES_H_
|
||||
|
||||
#include <drm/drm_mm.h>
|
||||
|
||||
#define XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY BIT(0)
|
||||
|
||||
/**
|
||||
* struct xe_mem_pool_node - Sub-range allocations from mem pool.
|
||||
*/
|
||||
struct xe_mem_pool_node {
|
||||
/** @sa_node: drm_mm_node for this allocation. */
|
||||
struct drm_mm_node sa_node;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -29,6 +29,7 @@
|
||||
#include "xe_hw_engine.h"
|
||||
#include "xe_lrc.h"
|
||||
#include "xe_map.h"
|
||||
#include "xe_mem_pool.h"
|
||||
#include "xe_mocs.h"
|
||||
#include "xe_printk.h"
|
||||
#include "xe_pt.h"
|
||||
@@ -1166,11 +1167,12 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
|
||||
u32 batch_size, batch_size_allocated;
|
||||
struct xe_device *xe = gt_to_xe(gt);
|
||||
struct xe_res_cursor src_it, ccs_it;
|
||||
struct xe_mem_pool *bb_pool;
|
||||
struct xe_sriov_vf_ccs_ctx *ctx;
|
||||
struct xe_sa_manager *bb_pool;
|
||||
u64 size = xe_bo_size(src_bo);
|
||||
struct xe_bb *bb = NULL;
|
||||
struct xe_mem_pool_node *bb;
|
||||
u64 src_L0, src_L0_ofs;
|
||||
struct xe_bb xe_bb_tmp;
|
||||
u32 src_L0_pt;
|
||||
int err;
|
||||
|
||||
@@ -1208,18 +1210,18 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
|
||||
size -= src_L0;
|
||||
}
|
||||
|
||||
bb = xe_bb_alloc(gt);
|
||||
bb = xe_mem_pool_alloc_node();
|
||||
if (IS_ERR(bb))
|
||||
return PTR_ERR(bb);
|
||||
|
||||
bb_pool = ctx->mem.ccs_bb_pool;
|
||||
scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) {
|
||||
xe_sa_bo_swap_shadow(bb_pool);
|
||||
scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
|
||||
xe_mem_pool_swap_shadow_locked(bb_pool);
|
||||
|
||||
err = xe_bb_init(bb, bb_pool, batch_size);
|
||||
err = xe_mem_pool_insert_node(bb_pool, bb, batch_size * sizeof(u32));
|
||||
if (err) {
|
||||
xe_gt_err(gt, "BB allocation failed.\n");
|
||||
xe_bb_free(bb, NULL);
|
||||
kfree(bb);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -1227,6 +1229,7 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
|
||||
size = xe_bo_size(src_bo);
|
||||
batch_size = 0;
|
||||
|
||||
xe_bb_tmp = (struct xe_bb){ .cs = xe_mem_pool_node_cpu_addr(bb), .len = 0 };
|
||||
/*
|
||||
* Emit PTE and copy commands here.
|
||||
* The CCS copy command can only support limited size. If the size to be
|
||||
@@ -1255,24 +1258,27 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
|
||||
xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
|
||||
batch_size += EMIT_COPY_CCS_DW;
|
||||
|
||||
emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src);
|
||||
emit_pte(m, &xe_bb_tmp, src_L0_pt, false, true, &src_it, src_L0, src);
|
||||
|
||||
emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
|
||||
emit_pte(m, &xe_bb_tmp, ccs_pt, false, false, &ccs_it, ccs_size, src);
|
||||
|
||||
bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
|
||||
flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt,
|
||||
xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
|
||||
flush_flags);
|
||||
flush_flags = xe_migrate_ccs_copy(m, &xe_bb_tmp, src_L0_ofs, src_is_pltt,
|
||||
src_L0_ofs, dst_is_pltt,
|
||||
src_L0, ccs_ofs, true);
|
||||
bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
|
||||
xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
|
||||
flush_flags);
|
||||
|
||||
size -= src_L0;
|
||||
}
|
||||
|
||||
xe_assert(xe, (batch_size_allocated == bb->len));
|
||||
xe_assert(xe, (batch_size_allocated == xe_bb_tmp.len));
|
||||
xe_assert(xe, bb->sa_node.size == xe_bb_tmp.len * sizeof(u32));
|
||||
src_bo->bb_ccs[read_write] = bb;
|
||||
|
||||
xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
|
||||
xe_sa_bo_sync_shadow(bb->bo);
|
||||
xe_mem_pool_sync_shadow_locked(bb);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1297,10 +1303,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
|
||||
void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
|
||||
enum xe_sriov_vf_ccs_rw_ctxs read_write)
|
||||
{
|
||||
struct xe_bb *bb = src_bo->bb_ccs[read_write];
|
||||
struct xe_mem_pool_node *bb = src_bo->bb_ccs[read_write];
|
||||
struct xe_device *xe = xe_bo_device(src_bo);
|
||||
struct xe_mem_pool *bb_pool;
|
||||
struct xe_sriov_vf_ccs_ctx *ctx;
|
||||
struct xe_sa_manager *bb_pool;
|
||||
u32 *cs;
|
||||
|
||||
xe_assert(xe, IS_SRIOV_VF(xe));
|
||||
@@ -1308,17 +1314,17 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
|
||||
ctx = &xe->sriov.vf.ccs.contexts[read_write];
|
||||
bb_pool = ctx->mem.ccs_bb_pool;
|
||||
|
||||
guard(mutex) (xe_sa_bo_swap_guard(bb_pool));
|
||||
xe_sa_bo_swap_shadow(bb_pool);
|
||||
scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
|
||||
xe_mem_pool_swap_shadow_locked(bb_pool);
|
||||
|
||||
cs = xe_sa_bo_cpu_addr(bb->bo);
|
||||
memset(cs, MI_NOOP, bb->len * sizeof(u32));
|
||||
xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
|
||||
cs = xe_mem_pool_node_cpu_addr(bb);
|
||||
memset(cs, MI_NOOP, bb->sa_node.size);
|
||||
xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
|
||||
|
||||
xe_sa_bo_sync_shadow(bb->bo);
|
||||
|
||||
xe_bb_free(bb, NULL);
|
||||
src_bo->bb_ccs[read_write] = NULL;
|
||||
xe_mem_pool_sync_shadow_locked(bb);
|
||||
xe_mem_pool_free_node(bb);
|
||||
src_bo->bb_ccs[read_write] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -118,6 +118,7 @@ static const struct xe_graphics_desc graphics_xe2 = {
|
||||
|
||||
static const struct xe_graphics_desc graphics_xe3p_lpg = {
|
||||
XE2_GFX_FEATURES,
|
||||
.has_indirect_ring_state = 1,
|
||||
.multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE),
|
||||
.num_geometry_xecore_fuse_regs = 3,
|
||||
.num_compute_xecore_fuse_regs = 3,
|
||||
|
||||
@@ -226,7 +226,7 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
|
||||
}
|
||||
|
||||
range_start = reg & REG_GENMASK(25, range_bit);
|
||||
range_end = range_start | REG_GENMASK(range_bit, 0);
|
||||
range_end = range_start | REG_GENMASK(range_bit - 1, 0);
|
||||
|
||||
switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) {
|
||||
case RING_FORCE_TO_NONPRIV_ACCESS_RW:
|
||||
|
||||
@@ -14,9 +14,9 @@
|
||||
#include "xe_guc.h"
|
||||
#include "xe_guc_submit.h"
|
||||
#include "xe_lrc.h"
|
||||
#include "xe_mem_pool.h"
|
||||
#include "xe_migrate.h"
|
||||
#include "xe_pm.h"
|
||||
#include "xe_sa.h"
|
||||
#include "xe_sriov_printk.h"
|
||||
#include "xe_sriov_vf.h"
|
||||
#include "xe_sriov_vf_ccs.h"
|
||||
@@ -141,43 +141,47 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe)
|
||||
|
||||
static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
|
||||
{
|
||||
struct xe_mem_pool *pool;
|
||||
struct xe_device *xe = tile_to_xe(tile);
|
||||
struct xe_sa_manager *sa_manager;
|
||||
u32 *pool_cpu_addr, *last_dw_addr;
|
||||
u64 bb_pool_size;
|
||||
int offset, err;
|
||||
int err;
|
||||
|
||||
bb_pool_size = get_ccs_bb_pool_size(xe);
|
||||
xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
|
||||
ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
|
||||
|
||||
sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
|
||||
XE_SA_BO_MANAGER_FLAG_SHADOW);
|
||||
|
||||
if (IS_ERR(sa_manager)) {
|
||||
xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
|
||||
sa_manager);
|
||||
err = PTR_ERR(sa_manager);
|
||||
pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32),
|
||||
XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY);
|
||||
if (IS_ERR(pool)) {
|
||||
xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n",
|
||||
pool);
|
||||
err = PTR_ERR(pool);
|
||||
return err;
|
||||
}
|
||||
|
||||
offset = 0;
|
||||
xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
|
||||
bb_pool_size);
|
||||
xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
|
||||
bb_pool_size);
|
||||
pool_cpu_addr = xe_mem_pool_cpu_addr(pool);
|
||||
memset(pool_cpu_addr, 0, bb_pool_size);
|
||||
|
||||
offset = bb_pool_size - sizeof(u32);
|
||||
xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
|
||||
xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
|
||||
last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1;
|
||||
*last_dw_addr = MI_BATCH_BUFFER_END;
|
||||
|
||||
ctx->mem.ccs_bb_pool = sa_manager;
|
||||
/**
|
||||
* Sync the main copy and shadow copy so that the shadow copy is
|
||||
* replica of main copy. We sync only BBs after init part. So, we
|
||||
* need to make sure the main pool and shadow copy are in sync after
|
||||
* this point. This is needed as GuC may read the BB commands from
|
||||
* shadow copy.
|
||||
*/
|
||||
xe_mem_pool_sync(pool);
|
||||
|
||||
ctx->mem.ccs_bb_pool = pool;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
|
||||
{
|
||||
u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
|
||||
u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
|
||||
struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
|
||||
u32 dw[10], i = 0;
|
||||
|
||||
@@ -388,7 +392,7 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
|
||||
#define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32))
|
||||
void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
|
||||
{
|
||||
u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
|
||||
u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
|
||||
struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
|
||||
struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
|
||||
|
||||
@@ -412,8 +416,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
|
||||
struct xe_device *xe = xe_bo_device(bo);
|
||||
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
|
||||
struct xe_sriov_vf_ccs_ctx *ctx;
|
||||
struct xe_mem_pool_node *bb;
|
||||
struct xe_tile *tile;
|
||||
struct xe_bb *bb;
|
||||
int err = 0;
|
||||
|
||||
xe_assert(xe, IS_VF_CCS_READY(xe));
|
||||
@@ -445,7 +449,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
|
||||
{
|
||||
struct xe_device *xe = xe_bo_device(bo);
|
||||
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
|
||||
struct xe_bb *bb;
|
||||
struct xe_mem_pool_node *bb;
|
||||
|
||||
xe_assert(xe, IS_VF_CCS_READY(xe));
|
||||
|
||||
@@ -471,8 +475,8 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
|
||||
*/
|
||||
void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
|
||||
{
|
||||
struct xe_sa_manager *bb_pool;
|
||||
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
|
||||
struct xe_mem_pool *bb_pool;
|
||||
|
||||
if (!IS_VF_CCS_READY(xe))
|
||||
return;
|
||||
@@ -485,7 +489,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
|
||||
|
||||
drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
|
||||
drm_printf(p, "-------------------------\n");
|
||||
drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
|
||||
xe_mem_pool_dump(bb_pool, p);
|
||||
drm_puts(p, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,9 +17,6 @@ enum xe_sriov_vf_ccs_rw_ctxs {
|
||||
XE_SRIOV_VF_CCS_CTX_COUNT
|
||||
};
|
||||
|
||||
struct xe_migrate;
|
||||
struct xe_sa_manager;
|
||||
|
||||
/**
|
||||
* struct xe_sriov_vf_ccs_ctx - VF CCS migration context data.
|
||||
*/
|
||||
@@ -33,7 +30,7 @@ struct xe_sriov_vf_ccs_ctx {
|
||||
/** @mem: memory data */
|
||||
struct {
|
||||
/** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */
|
||||
struct xe_sa_manager *ccs_bb_pool;
|
||||
struct xe_mem_pool *ccs_bb_pool;
|
||||
} mem;
|
||||
};
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
|
||||
{ XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"),
|
||||
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED),
|
||||
IS_INTEGRATED),
|
||||
XE_RTP_ACTIONS(FIELD_SET(XEHP_GAMSTLB_CTRL, BANK_HASH_MODE,
|
||||
XE_RTP_ACTIONS(FIELD_SET(GAMSTLB_CTRL, BANK_HASH_MODE,
|
||||
BANK_HASH_4KB_MODE))
|
||||
},
|
||||
};
|
||||
|
||||
@@ -3658,6 +3658,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
|
||||
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
|
||||
XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
|
||||
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
|
||||
XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE &&
|
||||
is_cpu_addr_mirror) ||
|
||||
XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) &&
|
||||
(op == DRM_XE_VM_BIND_OP_MAP_USERPTR ||
|
||||
is_cpu_addr_mirror) &&
|
||||
@@ -4156,7 +4158,8 @@ int xe_vm_get_property_ioctl(struct drm_device *drm, void *data,
|
||||
int ret = 0;
|
||||
|
||||
if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] ||
|
||||
args->reserved[2])))
|
||||
args->reserved[2] || args->extensions ||
|
||||
args->pad)))
|
||||
return -EINVAL;
|
||||
|
||||
vm = xe_vm_lookup(xef, args->vm_id);
|
||||
|
||||
@@ -621,6 +621,45 @@ static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool check_pat_args_are_sane(struct xe_device *xe,
|
||||
struct xe_vmas_in_madvise_range *madvise_range,
|
||||
u16 pat_index)
|
||||
{
|
||||
u16 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Using coh_none with CPU cached buffers is not allowed on iGPU.
|
||||
* On iGPU the GPU shares the LLC with the CPU, so with coh_none
|
||||
* the GPU bypasses CPU caches and reads directly from DRAM,
|
||||
* potentially seeing stale sensitive data from previously freed
|
||||
* pages. On dGPU this restriction does not apply, because the
|
||||
* platform does not provide a non-coherent system memory access
|
||||
* path that would violate the DMA coherency contract.
|
||||
*/
|
||||
if (coh_mode != XE_COH_NONE || IS_DGFX(xe))
|
||||
return true;
|
||||
|
||||
for (i = 0; i < madvise_range->num_vmas; i++) {
|
||||
struct xe_vma *vma = madvise_range->vmas[i];
|
||||
struct xe_bo *bo = xe_vma_bo(vma);
|
||||
|
||||
if (bo) {
|
||||
/* BO with WB caching + COH_NONE is not allowed */
|
||||
if (XE_IOCTL_DBG(xe, bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
|
||||
return false;
|
||||
/* Imported dma-buf without caching info, assume cached */
|
||||
if (XE_IOCTL_DBG(xe, !bo->cpu_caching))
|
||||
return false;
|
||||
} else if (XE_IOCTL_DBG(xe, xe_vma_is_cpu_addr_mirror(vma) ||
|
||||
xe_vma_is_userptr(vma)))
|
||||
/* System memory (userptr/SVM) is always CPU cached */
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
|
||||
int num_vmas, u32 atomic_val)
|
||||
{
|
||||
@@ -750,6 +789,14 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
|
||||
}
|
||||
}
|
||||
|
||||
if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) {
|
||||
if (!check_pat_args_are_sane(xe, &madvise_range,
|
||||
args->pat_index.val)) {
|
||||
err = -EINVAL;
|
||||
goto free_vmas;
|
||||
}
|
||||
}
|
||||
|
||||
if (madvise_range.has_bo_vmas) {
|
||||
if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
|
||||
if (!check_bo_args_are_sane(vm, madvise_range.vmas,
|
||||
|
||||
@@ -743,14 +743,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
|
||||
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
|
||||
XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
|
||||
},
|
||||
{ XE_RTP_NAME("14019988906"),
|
||||
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
|
||||
XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
|
||||
},
|
||||
{ XE_RTP_NAME("14019877138"),
|
||||
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
|
||||
XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
|
||||
},
|
||||
{ XE_RTP_NAME("14021490052"),
|
||||
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
|
||||
XE_RTP_ACTIONS(SET(FF_MODE,
|
||||
|
||||
Reference in New Issue
Block a user