mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-16 11:21:26 -04:00
Merge tag 'amd-drm-fixes-7.0-2026-04-01' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-7.0-2026-04-01: amdgpu: - UserQ fixes - PASID handling fix - S4 fix for smu11 chips - Misc small fixes amdkfd: - Non-4K page fixes Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patch.msgid.link/20260401174731.3576021-1-alexander.deucher@amd.com
This commit is contained in:
@@ -2703,8 +2703,12 @@ static int amdgpu_pmops_freeze(struct device *dev)
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (amdgpu_acpi_should_gpu_reset(adev))
|
||||
return amdgpu_asic_reset(adev);
|
||||
if (amdgpu_acpi_should_gpu_reset(adev)) {
|
||||
amdgpu_device_lock_reset_domain(adev->reset_domain);
|
||||
r = amdgpu_asic_reset(adev);
|
||||
amdgpu_device_unlock_reset_domain(adev->reset_domain);
|
||||
return r;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -403,6 +403,50 @@ void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
|
||||
drm_dev_exit(idx);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gart_map_gfx9_mqd - map mqd and ctrl_stack dma_addresses into GART entries
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @offset: offset into the GPU's gart aperture
|
||||
* @pages: number of pages to bind
|
||||
* @dma_addr: DMA addresses of pages
|
||||
* @flags: page table entry flags
|
||||
*
|
||||
* Map the MQD and control stack addresses into GART entries with the correct
|
||||
* memory types on gfxv9. The MQD occupies the first 4KB and is followed by
|
||||
* the control stack. The MQD uses UC (uncached) memory, while the control stack
|
||||
* uses NC (non-coherent) memory.
|
||||
*/
|
||||
void amdgpu_gart_map_gfx9_mqd(struct amdgpu_device *adev, uint64_t offset,
|
||||
int pages, dma_addr_t *dma_addr, uint64_t flags)
|
||||
{
|
||||
uint64_t page_base;
|
||||
unsigned int i, j, t;
|
||||
int idx;
|
||||
uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
|
||||
void *dst;
|
||||
|
||||
if (!adev->gart.ptr)
|
||||
return;
|
||||
|
||||
if (!drm_dev_enter(adev_to_drm(adev), &idx))
|
||||
return;
|
||||
|
||||
t = offset / AMDGPU_GPU_PAGE_SIZE;
|
||||
dst = adev->gart.ptr;
|
||||
for (i = 0; i < pages; i++) {
|
||||
page_base = dma_addr[i];
|
||||
for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
|
||||
if ((i == 0) && (j == 0))
|
||||
amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);
|
||||
else
|
||||
amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, ctrl_flags);
|
||||
page_base += AMDGPU_GPU_PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
drm_dev_exit(idx);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gart_bind - bind pages into the gart page table
|
||||
*
|
||||
|
||||
@@ -62,6 +62,8 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
|
||||
void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
|
||||
int pages, dma_addr_t *dma_addr, uint64_t flags,
|
||||
void *dst);
|
||||
void amdgpu_gart_map_gfx9_mqd(struct amdgpu_device *adev, uint64_t offset,
|
||||
int pages, dma_addr_t *dma_addr, uint64_t flags);
|
||||
void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
|
||||
int pages, dma_addr_t *dma_addr, uint64_t flags);
|
||||
void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
|
||||
|
||||
@@ -68,8 +68,11 @@ int amdgpu_pasid_alloc(unsigned int bits)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&amdgpu_pasid_idr_lock);
|
||||
/* TODO: Need to replace the idr with an xarry, and then
|
||||
* handle the internal locking with ATOMIC safe paths.
|
||||
*/
|
||||
pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1,
|
||||
1U << bits, GFP_KERNEL);
|
||||
1U << bits, GFP_ATOMIC);
|
||||
spin_unlock(&amdgpu_pasid_idr_lock);
|
||||
|
||||
if (pasid >= 0)
|
||||
|
||||
@@ -853,25 +853,15 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
|
||||
int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
|
||||
uint64_t page_idx, pages_per_xcc;
|
||||
int i;
|
||||
uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
|
||||
|
||||
pages_per_xcc = total_pages;
|
||||
do_div(pages_per_xcc, num_xcc);
|
||||
|
||||
for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
|
||||
/* MQD page: use default flags */
|
||||
amdgpu_gart_bind(adev,
|
||||
amdgpu_gart_map_gfx9_mqd(adev,
|
||||
gtt->offset + (page_idx << PAGE_SHIFT),
|
||||
1, >t->ttm.dma_address[page_idx], flags);
|
||||
/*
|
||||
* Ctrl pages - modify the memory type to NC (ctrl_flags) from
|
||||
* the second page of the BO onward.
|
||||
*/
|
||||
amdgpu_gart_bind(adev,
|
||||
gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
|
||||
pages_per_xcc - 1,
|
||||
>t->ttm.dma_address[page_idx + 1],
|
||||
ctrl_flags);
|
||||
pages_per_xcc, >t->ttm.dma_address[page_idx],
|
||||
flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -600,6 +600,13 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
|
||||
goto unpin_bo;
|
||||
}
|
||||
|
||||
/* Validate doorbell_offset is within the doorbell BO */
|
||||
if ((u64)db_info->doorbell_offset * db_size + db_size >
|
||||
amdgpu_bo_size(db_obj->obj)) {
|
||||
r = -EINVAL;
|
||||
goto unpin_bo;
|
||||
}
|
||||
|
||||
index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
|
||||
db_info->doorbell_offset, db_size);
|
||||
drm_dbg_driver(adev_to_drm(uq_mgr->adev),
|
||||
|
||||
@@ -173,7 +173,7 @@ struct amdgpu_bo_vm;
|
||||
#define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20)
|
||||
#define AMDGPU_VA_RESERVED_SEQ64_START(adev) (AMDGPU_VA_RESERVED_CSA_START(adev) \
|
||||
- AMDGPU_VA_RESERVED_SEQ64_SIZE)
|
||||
#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12)
|
||||
#define AMDGPU_VA_RESERVED_TRAP_SIZE (1ULL << 16)
|
||||
#define AMDGPU_VA_RESERVED_TRAP_START(adev) (AMDGPU_VA_RESERVED_SEQ64_START(adev) \
|
||||
- AMDGPU_VA_RESERVED_TRAP_SIZE)
|
||||
#define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16)
|
||||
|
||||
@@ -324,8 +324,10 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
|
||||
|
||||
r = amdgpu_userq_input_va_validate(adev, queue, compute_mqd->eop_va,
|
||||
2048);
|
||||
if (r)
|
||||
if (r) {
|
||||
kfree(compute_mqd);
|
||||
goto free_mqd;
|
||||
}
|
||||
|
||||
userq_props->eop_gpu_addr = compute_mqd->eop_va;
|
||||
userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
|
||||
@@ -365,12 +367,16 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
|
||||
|
||||
r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->shadow_va,
|
||||
shadow_info.shadow_size);
|
||||
if (r)
|
||||
if (r) {
|
||||
kfree(mqd_gfx_v11);
|
||||
goto free_mqd;
|
||||
}
|
||||
r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->csa_va,
|
||||
shadow_info.csa_size);
|
||||
if (r)
|
||||
if (r) {
|
||||
kfree(mqd_gfx_v11);
|
||||
goto free_mqd;
|
||||
}
|
||||
|
||||
kfree(mqd_gfx_v11);
|
||||
} else if (queue->queue_type == AMDGPU_HW_IP_DMA) {
|
||||
@@ -390,8 +396,10 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
|
||||
}
|
||||
r = amdgpu_userq_input_va_validate(adev, queue, mqd_sdma_v11->csa_va,
|
||||
32);
|
||||
if (r)
|
||||
if (r) {
|
||||
kfree(mqd_sdma_v11);
|
||||
goto free_mqd;
|
||||
}
|
||||
|
||||
userq_props->csa_addr = mqd_sdma_v11->csa_va;
|
||||
kfree(mqd_sdma_v11);
|
||||
|
||||
@@ -170,7 +170,8 @@ static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
|
||||
int retry_loop;
|
||||
|
||||
/* For a reset done at the end of S3, only wait for TOS to be unloaded */
|
||||
if (adev->in_s3 && !(adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev))
|
||||
if ((adev->in_s4 || adev->in_s3) && !(adev->flags & AMD_IS_APU) &&
|
||||
amdgpu_in_reset(adev))
|
||||
return psp_v11_wait_for_tos_unload(psp);
|
||||
|
||||
for (retry_loop = 0; retry_loop < 20; retry_loop++) {
|
||||
|
||||
@@ -42,9 +42,16 @@ static uint64_t mqd_stride_v9(struct mqd_manager *mm,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
if (mm->dev->kfd->cwsr_enabled &&
|
||||
q->type == KFD_QUEUE_TYPE_COMPUTE)
|
||||
return ALIGN(q->ctl_stack_size, PAGE_SIZE) +
|
||||
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE);
|
||||
q->type == KFD_QUEUE_TYPE_COMPUTE) {
|
||||
|
||||
/* On gfxv9, the MQD resides in the first 4K page,
|
||||
* followed by the control stack. Align both to
|
||||
* AMDGPU_GPU_PAGE_SIZE to maintain the required 4K boundary.
|
||||
*/
|
||||
|
||||
return ALIGN(ALIGN(q->ctl_stack_size, AMDGPU_GPU_PAGE_SIZE) +
|
||||
ALIGN(sizeof(struct v9_mqd), AMDGPU_GPU_PAGE_SIZE), PAGE_SIZE);
|
||||
}
|
||||
|
||||
return mm->mqd_size;
|
||||
}
|
||||
@@ -151,8 +158,8 @@ static struct kfd_mem_obj *allocate_mqd(struct mqd_manager *mm,
|
||||
if (!mqd_mem_obj)
|
||||
return NULL;
|
||||
retval = amdgpu_amdkfd_alloc_kernel_mem(node->adev,
|
||||
(ALIGN(q->ctl_stack_size, PAGE_SIZE) +
|
||||
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
|
||||
(ALIGN(ALIGN(q->ctl_stack_size, AMDGPU_GPU_PAGE_SIZE) +
|
||||
ALIGN(sizeof(struct v9_mqd), AMDGPU_GPU_PAGE_SIZE), PAGE_SIZE)) *
|
||||
NUM_XCC(node->xcc_mask),
|
||||
mqd_on_vram(node->adev) ? AMDGPU_GEM_DOMAIN_VRAM :
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
@@ -360,7 +367,7 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_context_save_area_header header;
|
||||
|
||||
/* Control stack is located one page after MQD. */
|
||||
void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
|
||||
void *mqd_ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
|
||||
@@ -397,7 +404,7 @@ static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, voi
|
||||
{
|
||||
struct v9_mqd *m;
|
||||
/* Control stack is located one page after MQD. */
|
||||
void *ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
|
||||
void *ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
|
||||
@@ -443,7 +450,7 @@ static void restore_mqd(struct mqd_manager *mm, void **mqd,
|
||||
*gart_addr = addr;
|
||||
|
||||
/* Control stack is located one page after MQD. */
|
||||
ctl_stack = (void *)((uintptr_t)*mqd + PAGE_SIZE);
|
||||
ctl_stack = (void *)((uintptr_t)*mqd + AMDGPU_GPU_PAGE_SIZE);
|
||||
memcpy(ctl_stack, ctl_stack_src, ctl_stack_size);
|
||||
|
||||
m->cp_hqd_pq_doorbell_control =
|
||||
|
||||
@@ -102,8 +102,8 @@
|
||||
* The first chunk is the TBA used for the CWSR ISA code. The second
|
||||
* chunk is used as TMA for user-mode trap handler setup in daisy-chain mode.
|
||||
*/
|
||||
#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
|
||||
#define KFD_CWSR_TMA_OFFSET (PAGE_SIZE + 2048)
|
||||
#define KFD_CWSR_TBA_TMA_SIZE (AMDGPU_GPU_PAGE_SIZE * 2)
|
||||
#define KFD_CWSR_TMA_OFFSET (AMDGPU_GPU_PAGE_SIZE + 2048)
|
||||
|
||||
#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \
|
||||
(KFD_MAX_NUM_OF_PROCESSES * \
|
||||
|
||||
@@ -249,10 +249,10 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope
|
||||
topo_dev->node_props.gfx_target_version < 90000)
|
||||
/* metadata_queue_size not supported on GFX7/GFX8 */
|
||||
expected_queue_size =
|
||||
properties->queue_size / 2;
|
||||
PAGE_ALIGN(properties->queue_size / 2);
|
||||
else
|
||||
expected_queue_size =
|
||||
properties->queue_size + properties->metadata_queue_size;
|
||||
PAGE_ALIGN(properties->queue_size + properties->metadata_queue_size);
|
||||
|
||||
vm = drm_priv_to_vm(pdd->drm_priv);
|
||||
err = amdgpu_bo_reserve(vm->root.bo, false);
|
||||
@@ -492,10 +492,11 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
|
||||
cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask);
|
||||
wave_num = get_num_waves(props, gfxv, cu_num);
|
||||
|
||||
wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE);
|
||||
wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props),
|
||||
AMDGPU_GPU_PAGE_SIZE);
|
||||
ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
|
||||
ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
|
||||
PAGE_SIZE);
|
||||
AMDGPU_GPU_PAGE_SIZE);
|
||||
|
||||
if ((gfxv / 10000 * 10000) == 100000) {
|
||||
/* HW design limits control stack size to 0x7000.
|
||||
@@ -507,7 +508,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
|
||||
|
||||
props->ctl_stack_size = ctl_stack_size;
|
||||
props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
|
||||
props->cwsr_size = ctl_stack_size + wg_data_size;
|
||||
props->cwsr_size = ALIGN(ctl_stack_size + wg_data_size, PAGE_SIZE);
|
||||
|
||||
if (gfxv == 80002) /* GFX_VERSION_TONGA */
|
||||
props->eop_buffer_size = 0x8000;
|
||||
|
||||
@@ -147,6 +147,7 @@ void dcn401_init_hw(struct dc *dc)
|
||||
int edp_num;
|
||||
uint32_t backlight = MAX_BACKLIGHT_LEVEL;
|
||||
uint32_t user_level = MAX_BACKLIGHT_LEVEL;
|
||||
bool dchub_ref_freq_changed;
|
||||
int current_dchub_ref_freq = 0;
|
||||
|
||||
if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) {
|
||||
@@ -360,14 +361,18 @@ void dcn401_init_hw(struct dc *dc)
|
||||
dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr;
|
||||
dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver > 0;
|
||||
dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver;
|
||||
|
||||
/* sw and fw FAMS versions must match for support */
|
||||
dc->debug.fams2_config.bits.enable &=
|
||||
dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver; // sw & fw fams versions must match for support
|
||||
if ((!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box)
|
||||
|| res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000 != current_dchub_ref_freq) {
|
||||
dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver;
|
||||
dchub_ref_freq_changed =
|
||||
res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000 != current_dchub_ref_freq;
|
||||
if ((!dc->debug.fams2_config.bits.enable || dchub_ref_freq_changed) &&
|
||||
dc->res_pool->funcs->update_bw_bounding_box &&
|
||||
dc->clk_mgr && dc->clk_mgr->bw_params) {
|
||||
/* update bounding box if FAMS2 disabled, or if dchub clk has changed */
|
||||
if (dc->clk_mgr)
|
||||
dc->res_pool->funcs->update_bw_bounding_box(dc,
|
||||
dc->clk_mgr->bw_params);
|
||||
dc->res_pool->funcs->update_bw_bounding_box(dc,
|
||||
dc->clk_mgr->bw_params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -262,7 +262,6 @@ int smu_v11_0_check_fw_version(struct smu_context *smu)
|
||||
"smu fw program = %d, version = 0x%08x (%d.%d.%d)\n",
|
||||
smu->smc_driver_if_version, if_version,
|
||||
smu_program, smu_version, smu_major, smu_minor, smu_debug);
|
||||
dev_info(smu->adev->dev, "SMU driver if version not matched\n");
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -101,7 +101,6 @@ int smu_v12_0_check_fw_version(struct smu_context *smu)
|
||||
"smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n",
|
||||
smu->smc_driver_if_version, if_version,
|
||||
smu_program, smu_version, smu_major, smu_minor, smu_debug);
|
||||
dev_info(smu->adev->dev, "SMU driver if version not matched\n");
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -284,7 +284,6 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
|
||||
"smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n",
|
||||
smu->smc_driver_if_version, if_version,
|
||||
smu_program, smu_version, smu_major, smu_minor, smu_debug);
|
||||
dev_info(adev->dev, "SMU driver if version not matched\n");
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
Reference in New Issue
Block a user