mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-05-05 05:09:17 -04:00
drm/amdgpu: handle eviction fence race
The eviction process can get into a race condition between the eviction
fence suspend work (which replaces the old fence with new) and kms_close
(which destroys the fence and doesn't expect a new one).
This patch:
- adds a flag to indicate that fd is closing, so fence replacement is
not required (evf_mgr->fd_closing)
- adds a flush_work() during the ev_fence_destroy routine
V2: Addressed review comments from Christian:
- Do not use mutex to sync
- Use flush_work and wait for suspend_work to be done
V3: Fixed state machine for queue->active, which adds into race between
suspend/resume and queue ops
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
44cfdf368f
commit
b8e6d3f68c
@@ -117,6 +117,10 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
|
||||
/* Signal old eviction fence */
|
||||
amdgpu_eviction_fence_signal(evf_mgr);
|
||||
|
||||
/* Do not replace eviction fence is fd is getting closed */
|
||||
if (evf_mgr->fd_closing)
|
||||
return;
|
||||
|
||||
/* Prepare the objects to replace eviction fence */
|
||||
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
|
||||
drm_exec_until_all_locked(&exec) {
|
||||
@@ -199,6 +203,9 @@ void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
|
||||
{
|
||||
struct amdgpu_eviction_fence *ev_fence;
|
||||
|
||||
/* Wait for any pending work to execute */
|
||||
flush_delayed_work(&evf_mgr->suspend_work);
|
||||
|
||||
spin_lock(&evf_mgr->ev_fence_lock);
|
||||
ev_fence = evf_mgr->ev_fence;
|
||||
spin_unlock(&evf_mgr->ev_fence_lock);
|
||||
|
||||
@@ -1490,10 +1490,12 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
|
||||
amdgpu_bo_unreserve(pd);
|
||||
}
|
||||
|
||||
fpriv->evf_mgr.fd_closing = true;
|
||||
amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
|
||||
amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
|
||||
|
||||
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
|
||||
amdgpu_vm_fini(adev, &fpriv->vm);
|
||||
amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
|
||||
|
||||
if (pasid)
|
||||
amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid);
|
||||
|
||||
@@ -614,9 +614,10 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
|
||||
|
||||
cancel_delayed_work(&userq_mgr->resume_work);
|
||||
|
||||
mutex_lock(&userq_mgr->userq_mutex);
|
||||
idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id)
|
||||
amdgpu_userqueue_cleanup(userq_mgr, queue, queue_id);
|
||||
|
||||
idr_destroy(&userq_mgr->userq_idr);
|
||||
mutex_unlock(&userq_mgr->userq_mutex);
|
||||
mutex_destroy(&userq_mgr->userq_mutex);
|
||||
}
|
||||
|
||||
@@ -139,6 +139,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr,
|
||||
return r;
|
||||
}
|
||||
|
||||
queue->queue_active = true;
|
||||
DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index);
|
||||
return 0;
|
||||
}
|
||||
@@ -160,6 +161,7 @@ static void mes_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
if (r)
|
||||
DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r);
|
||||
queue->queue_active = false;
|
||||
}
|
||||
|
||||
static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
|
||||
@@ -331,7 +333,6 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
|
||||
goto free_ctx;
|
||||
}
|
||||
|
||||
queue->queue_active = true;
|
||||
return 0;
|
||||
|
||||
free_ctx:
|
||||
@@ -350,12 +351,12 @@ static void
|
||||
mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue)
|
||||
{
|
||||
mes_v11_0_userq_unmap(uq_mgr, queue);
|
||||
amdgpu_bo_unref(&queue->wptr_obj.obj);
|
||||
if (queue->queue_active)
|
||||
mes_v11_0_userq_unmap(uq_mgr, queue);
|
||||
|
||||
amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj);
|
||||
kfree(queue->userq_prop);
|
||||
amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd);
|
||||
queue->queue_active = false;
|
||||
}
|
||||
|
||||
static int mes_v11_0_userq_suspend(struct amdgpu_userq_mgr *uq_mgr,
|
||||
|
||||
Reference in New Issue
Block a user