mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-04-04 11:15:39 -04:00
accel/amdxdna: Fix command hang on suspended hardware context
When a hardware context is suspended, the job scheduler is stopped. If a
command is submitted while the context is suspended, the job is queued in
the scheduler but aie2_sched_job_run() is never invoked to restart the
hardware context. As a result, the command hangs.
Fix this by modifying the hardware context suspend routine to keep the job
scheduler running so that queued jobs can trigger context restart properly.
Fixes: aac243092b ("accel/amdxdna: Add command execution")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260211205341.722982-1-lizhi.hou@amd.com
This commit is contained in:
@@ -53,6 +53,7 @@ static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwct
|
||||
{
|
||||
drm_sched_stop(&hwctx->priv->sched, bad_job);
|
||||
aie2_destroy_context(xdna->dev_handle, hwctx);
|
||||
drm_sched_start(&hwctx->priv->sched, 0);
|
||||
}
|
||||
|
||||
static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
|
||||
@@ -80,7 +81,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw
|
||||
}
|
||||
|
||||
out:
|
||||
drm_sched_start(&hwctx->priv->sched, 0);
|
||||
XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
|
||||
return ret;
|
||||
}
|
||||
@@ -297,19 +297,23 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
|
||||
struct dma_fence *fence;
|
||||
int ret;
|
||||
|
||||
if (!hwctx->priv->mbox_chann)
|
||||
ret = amdxdna_pm_resume_get(hwctx->client->xdna);
|
||||
if (ret)
|
||||
return NULL;
|
||||
|
||||
if (!mmget_not_zero(job->mm))
|
||||
if (!hwctx->priv->mbox_chann) {
|
||||
amdxdna_pm_suspend_put(hwctx->client->xdna);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!mmget_not_zero(job->mm)) {
|
||||
amdxdna_pm_suspend_put(hwctx->client->xdna);
|
||||
return ERR_PTR(-ESRCH);
|
||||
}
|
||||
|
||||
kref_get(&job->refcnt);
|
||||
fence = dma_fence_get(job->fence);
|
||||
|
||||
ret = amdxdna_pm_resume_get(hwctx->client->xdna);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (job->drv_cmd) {
|
||||
switch (job->drv_cmd->opcode) {
|
||||
case SYNC_DEBUG_BO:
|
||||
|
||||
Reference in New Issue
Block a user