mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-16 12:31:52 -04:00
UAPI Changes: - restrict multi-lrc to VCS/VECS engines (Xin Wang) - Introduce a flag to disallow vm overcommit in fault mode (Thomas) - update used tracking kernel-doc (Auld, Fixes) - Some bind queue fixes (Auld, Fixes) Cross-subsystem Changes: - Split drm_suballoc_new() into SA alloc and init helpers (Satya, Fixes) - pass pagemap_addr by reference (Arnd, Fixes) - Revert "drm/pagemap: Disable device-to-device migration" (Thomas) - Fix unbalanced unlock in drm_gpusvm_scan_mm (Maciej, Fixes) - Small GPUSVM fixes (Brost, Fixes) - Fix xe SVM configs (Thomas, Fixes) Core Changes: - Fix a hmm_range_fault() livelock / starvation problem (Thomas, Fixes) Driver Changes: - Fix leak on xa_store failure (Shuicheng, Fixes) - Correct implementation of Wa_16025250150 (Roper, Fixes) - Refactor context init into xe_lrc_ctx_init (Raag) - Fix GSC proxy cleanup on early initialization failure (Zhanjun) - Fix exec queue creation during post-migration recovery (Tomasz, Fixes) - Apply windower hardware filtering setting on Xe3 and Xe3p (Roper) - Free ctx_restore_mid_bb in release (Shuicheng, Fixes) - Drop stale MCR steering TODO comment (Roper) - dGPU memory optimizations (Brost) - Do not preempt fence signaling CS instructions (Brost, Fixes) - Revert "drm/xe/compat: Remove unused i915_reg.h from compat header" (Uma) - Don't expose display modparam if no display support (Wajdeczko) - Some VRAM flag improvements (Wajdeczko) - Misc fix for xe_guc_ct.c (Shuicheng, Fixes) - Remove unused i915_reg.h from compat header (Uma) - Workaround cleanup & simplification (Roper) - Add prefetch pagefault support for Xe3p (Varun) - Fix fs_reclaim deadlock caused by CCS save/restore (Satya, Fixes) - Cleanup partially initialized sync on parse failure (Shuicheng, Fixes) - Allow to change VFs VRAM quota using sysfs (Michal) - Increase GuC log sizes in debug builds (Tomasz) - Wa_18041344222 changes (Harish) - Add Wa_14026781792 (Niton) - Add debugfs facility to catch RTP mistakes (Roper) - Convert GT stats to per-cpu counters (Brost) - Prevent unintended VRAM channel creation (Karthik) - Privatize struct xe_ggtt (Maarten) - remove unnecessary struct dram_info forward declaration (Jani) - pagefault refactors (Brost) - Apply Wa_14024997852 (Arvind) - Redirect faults to dummy page for wedged device (Raag, Fixes) - Force EXEC_QUEUE_FLAG_KERNEL for kernel internal VMs (Piotr) - Stop applying Wa_16018737384 from Xe3 onward (Roper) - Add new XeCore fuse registers to VF runtime regs (Roper) - Update xe_device_declare_wedged() error log (Raag) - Make xe_modparam.force_vram_bar_size signed (Shuicheng, Fixes) - Avoid reading media version when media GT is disabled (Piotr, Fixes) - Fix handling of Wa_14019988906 & Wa_14019877138 (Roper, Fixes) - Basic enabling patches for Xe3p_LPG and NVL-P (Gustavo, Roper, Shekhar) - Avoid double-adjust in 64-bit reads (Shuicheng, Fixes) - Allow VF to initialize MCR tables (Wajdeczko) - Add Wa_14025883347 for GuC DMA failure on reset (Anirban) - Add bounds check on pat_index to prevent OOB kernel read in madvise (Jia, Fixes) - Fix the address range assert in ggtt_get_pte helper (Winiarski) - XeCore fuse register changes (Roper) - Add more info to powergate_info debugfs (Vinay) - Separate out GuC RC code (Vinay) - Fix g2g_test_array indexing (Pallavi) - Mutual exclusivity between CCS-mode and PF (Nareshkumar, Fixes) - Some more _types.h cleanups (Wajdeczko) - Fix sysfs initialization (Wajdeczko, Fixes) - Drop unnecessary goto in xe_device_create (Roper) - Disable D3Cold for BMG only on specific platforms (Karthik, Fixes) - Add sriov.admin_only_pf attribute (Wajdeczko) - replace old wq(s), add WQ_PERCPU to alloc_workqueue (Marco) - Make MMIO communication more robust (Wajdeczko) - Fix warning of kerneldoc (Shuicheng, Fixes) - Fix topology query pointer advance (Shuicheng, Fixes) - use entry_dump callbacks for xe2+ PAT dumps (Xin Wang) - Fix kernel-doc warning in GuC scheduler ABI header (Chaitanya, Fixes) - Fix CFI violation in debugfs access (Daniele, Fixes) - Apply WA_16028005424 to Media (Balasubramani) - Fix typo in function kernel-doc (Wajdeczko) - Protect priority against concurrent access (Niranjana) - Fix nvm aux resource cleanup (Shuicheng, Fixes) - Fix is_bound() pci_dev lifetime (Shuicheng, Fixes) - Use CLASS() for forcewake in xe_gt_enable_comp_1wcoh (Shuicheng) - Reset VF GuC state on fini (Wajdeczko) - Move _THIS_IP_ usage from xe_vm_create() to dedicated function (Nathan Chancellor, Fixes) - Unregister drm device on probe error (Shuicheng, Fixes) - Disable DCC on PTL (Vinay, Fixes) - Fix Wa_18022495364 (Tvrtko, Fixes) - Skip address copy for sync-only execs (Shuicheng, Fixes) - derive mem copy capability from graphics version (Nitin, Fixes) - Use DRM_BUDDY_CONTIGUOUS_ALLOCATION for contiguous allocations (Sanjay) - Context based TLB invalidations (Brost) - Enable multi_queue on xe3p_xpc (Brost, Niranjana) - Remove check for gt in xe_query (Nakshtra) - Reduce LRC timestamp stuck message on VFs to notice (Brost, Fixes) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Matthew Brost <matthew.brost@intel.com> Link: https://patch.msgid.link/aaYR5G2MHjOEMXPW@lstrano-desk.jf.intel.com
175 lines
3.9 KiB
C
175 lines
3.9 KiB
C
// SPDX-License-Identifier: MIT
|
|
/*
|
|
* Copyright © 2022 Intel Corporation
|
|
*/
|
|
|
|
#include "xe_bb.h"
|
|
|
|
#include "instructions/xe_mi_commands.h"
|
|
#include "xe_assert.h"
|
|
#include "xe_device_types.h"
|
|
#include "xe_exec_queue_types.h"
|
|
#include "xe_gt.h"
|
|
#include "xe_sa.h"
|
|
#include "xe_sched_job.h"
|
|
#include "xe_vm_types.h"
|
|
|
|
static int bb_prefetch(struct xe_gt *gt)
|
|
{
|
|
struct xe_device *xe = gt_to_xe(gt);
|
|
|
|
if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt))
|
|
/*
|
|
* RCS and CCS require 1K, although other engines would be
|
|
* okay with 512.
|
|
*/
|
|
return SZ_1K;
|
|
else
|
|
return SZ_512;
|
|
}
|
|
|
|
struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
|
|
{
|
|
struct xe_tile *tile = gt_to_tile(gt);
|
|
struct xe_bb *bb = kmalloc_obj(*bb);
|
|
int err;
|
|
|
|
if (!bb)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
/*
|
|
* We need to allocate space for the requested number of dwords,
|
|
* one additional MI_BATCH_BUFFER_END dword, and additional buffer
|
|
* space to accommodate the platform-specific hardware prefetch
|
|
* requirements.
|
|
*/
|
|
bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
|
|
4 * (dwords + 1) + bb_prefetch(gt));
|
|
if (IS_ERR(bb->bo)) {
|
|
err = PTR_ERR(bb->bo);
|
|
goto err;
|
|
}
|
|
|
|
bb->cs = xe_sa_bo_cpu_addr(bb->bo);
|
|
bb->len = 0;
|
|
|
|
return bb;
|
|
err:
|
|
kfree(bb);
|
|
return ERR_PTR(err);
|
|
}
|
|
|
|
/**
|
|
* xe_bb_alloc() - Allocate a new batch buffer structure
|
|
* @gt: the &xe_gt
|
|
*
|
|
* Allocates and initializes a new xe_bb structure with an associated
|
|
* uninitialized suballoc object.
|
|
*
|
|
* Returns: Batch buffer structure or an ERR_PTR(-ENOMEM).
|
|
*/
|
|
struct xe_bb *xe_bb_alloc(struct xe_gt *gt)
|
|
{
|
|
struct xe_bb *bb = kmalloc_obj(*bb);
|
|
int err;
|
|
|
|
if (!bb)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
bb->bo = xe_sa_bo_alloc(GFP_KERNEL);
|
|
if (IS_ERR(bb->bo)) {
|
|
err = PTR_ERR(bb->bo);
|
|
goto err;
|
|
}
|
|
|
|
return bb;
|
|
|
|
err:
|
|
kfree(bb);
|
|
return ERR_PTR(err);
|
|
}
|
|
|
|
/**
|
|
* xe_bb_init() - Initialize a batch buffer with memory from a sub-allocator pool
|
|
* @bb: Batch buffer structure to initialize
|
|
* @bb_pool: Suballoc memory pool to allocate from
|
|
* @dwords: Number of dwords to be allocated
|
|
*
|
|
* Initializes the batch buffer by allocating memory from the specified
|
|
* suballoc pool.
|
|
*
|
|
* Return: 0 on success, negative error code on failure.
|
|
*/
|
|
int xe_bb_init(struct xe_bb *bb, struct xe_sa_manager *bb_pool, u32 dwords)
|
|
{
|
|
int err;
|
|
|
|
/*
|
|
* We need to allocate space for the requested number of dwords &
|
|
* one additional MI_BATCH_BUFFER_END dword. Since the whole SA
|
|
* is submitted to HW, we need to make sure that the last instruction
|
|
* is not over written when the last chunk of SA is allocated for BB.
|
|
* So, this extra DW acts as a guard here.
|
|
*/
|
|
err = xe_sa_bo_init(bb_pool, bb->bo, 4 * (dwords + 1));
|
|
if (err)
|
|
return err;
|
|
|
|
bb->cs = xe_sa_bo_cpu_addr(bb->bo);
|
|
bb->len = 0;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct xe_sched_job *
|
|
__xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
|
|
{
|
|
u32 size = drm_suballoc_size(bb->bo);
|
|
|
|
if (bb->len == 0 || bb->cs[bb->len - 1] != MI_BATCH_BUFFER_END)
|
|
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
|
|
|
|
xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size);
|
|
|
|
xe_sa_bo_flush_write(bb->bo);
|
|
|
|
return xe_sched_job_create(q, addr);
|
|
}
|
|
|
|
struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
|
|
struct xe_bb *bb,
|
|
u64 batch_base_ofs,
|
|
u32 second_idx)
|
|
{
|
|
u64 addr[2] = {
|
|
batch_base_ofs + drm_suballoc_soffset(bb->bo),
|
|
batch_base_ofs + drm_suballoc_soffset(bb->bo) +
|
|
4 * second_idx,
|
|
};
|
|
|
|
xe_gt_assert(q->gt, second_idx <= bb->len);
|
|
xe_gt_assert(q->gt, xe_sched_job_is_migration(q));
|
|
xe_gt_assert(q->gt, q->width == 1);
|
|
|
|
return __xe_bb_create_job(q, bb, addr);
|
|
}
|
|
|
|
struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
|
|
struct xe_bb *bb)
|
|
{
|
|
u64 addr = xe_sa_bo_gpu_addr(bb->bo);
|
|
|
|
xe_gt_assert(q->gt, !xe_sched_job_is_migration(q));
|
|
xe_gt_assert(q->gt, q->width == 1);
|
|
return __xe_bb_create_job(q, bb, &addr);
|
|
}
|
|
|
|
void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
|
|
{
|
|
if (!bb)
|
|
return;
|
|
|
|
xe_sa_bo_free(bb->bo, fence);
|
|
kfree(bb);
|
|
}
|