mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-02-15 08:32:44 -05:00
drm/amd: Clean up kfd node on surprise disconnect
When an eGPU is unplugged the KFD topology should also be destroyed
for that GPU. This never happens because the fini_sw callbacks never
get to run. Run them manually before calling amdgpu_device_ip_fini_early()
when a device has already been disconnected.
This location is intentionally chosen to make sure that the kfd locking
refcount doesn't get incremented unintentionally.
Cc: kent.russell@amd.com
Closes: https://community.frame.work/t/amd-egpu-on-linux/8691/33
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
Reviewed-by: Kent Russell <kent.russell@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 6a23e7b433)
Cc: stable@vger.kernel.org
This commit is contained in:
committed by
Alex Deucher
parent
9cb6278b44
commit
28695ca09d
@@ -5063,6 +5063,14 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
|
||||
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, false);
|
||||
|
||||
/*
|
||||
* device went through surprise hotplug; we need to destroy topology
|
||||
* before ip_fini_early to prevent kfd locking refcount issues by calling
|
||||
* amdgpu_amdkfd_suspend()
|
||||
*/
|
||||
if (drm_dev_is_unplugged(adev_to_drm(adev)))
|
||||
amdgpu_amdkfd_device_fini_sw(adev);
|
||||
|
||||
amdgpu_device_ip_fini_early(adev);
|
||||
|
||||
amdgpu_irq_fini_hw(adev);
|
||||
|
||||
Reference in New Issue
Block a user