mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-04-05 05:57:16 -04:00
drm/amdgpu/userq: add a detect and reset callback
Add a detect and reset callback and add the implementation for mes. The callback will detect all hung queues of a particular ip type (e.g., GFX or compute or SDMA) and reset them. v2: increase reset counter and set fence force completion v3: Removed userq_mutex in mes_userq_detect_and_reset since the driver holds it when calling Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
cbda64f3f5
commit
54d18bc600
@@ -82,6 +82,8 @@ struct amdgpu_userq_funcs {
|
||||
struct amdgpu_usermode_queue *queue);
|
||||
int (*restore)(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct amdgpu_usermode_queue *queue);
|
||||
int (*detect_and_reset)(struct amdgpu_device *adev,
|
||||
int queue_type);
|
||||
};
|
||||
|
||||
/* Usermode queues for gfx */
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <drm/drm_drv.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
#include "mes_userqueue.h"
|
||||
@@ -198,6 +199,53 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
|
||||
int queue_type)
|
||||
{
|
||||
int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev);
|
||||
struct mes_detect_and_reset_queue_input input;
|
||||
struct amdgpu_usermode_queue *queue;
|
||||
struct amdgpu_userq_mgr *uqm, *tmp;
|
||||
unsigned int hung_db_num = 0;
|
||||
int queue_id, r, i;
|
||||
u32 db_array[4];
|
||||
|
||||
if (db_array_size > 4) {
|
||||
dev_err(adev->dev, "DB array size (%d vs 4) too small\n",
|
||||
db_array_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memset(&input, 0x0, sizeof(struct mes_detect_and_reset_queue_input));
|
||||
|
||||
input.queue_type = queue_type;
|
||||
|
||||
amdgpu_mes_lock(&adev->mes);
|
||||
r = amdgpu_mes_detect_and_reset_hung_queues(adev, queue_type, false,
|
||||
&hung_db_num, db_array);
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "Failed to detect and reset queues, err (%d)\n", r);
|
||||
} else if (hung_db_num) {
|
||||
list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
|
||||
idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
|
||||
if (queue->queue_type == queue_type) {
|
||||
for (i = 0; i < hung_db_num; i++) {
|
||||
if (queue->doorbell_index == db_array[i]) {
|
||||
queue->state = AMDGPU_USERQ_STATE_HUNG;
|
||||
atomic_inc(&adev->gpu_reset_counter);
|
||||
amdgpu_userq_fence_driver_force_completion(queue);
|
||||
drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
|
||||
struct drm_amdgpu_userq_in *args_in,
|
||||
struct amdgpu_usermode_queue *queue)
|
||||
@@ -352,4 +400,5 @@ const struct amdgpu_userq_funcs userq_mes_funcs = {
|
||||
.mqd_destroy = mes_userq_mqd_destroy,
|
||||
.unmap = mes_userq_unmap,
|
||||
.map = mes_userq_map,
|
||||
.detect_and_reset = mes_userq_detect_and_reset,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user