drm/amd/pm: add support for checking SDMA reset capability

This patch introduces a new function to check if the SMU supports resetting the SDMA engine.
This capability check ensures that the driver does not attempt to reset the SDMA engine
on hardware that does not support it.

The following changes are included:
- New function `amdgpu_dpm_reset_sdma_is_supported` to check SDMA reset
  support at the AMDGPU driver level.
- New function `smu_reset_sdma_is_supported` to check SDMA reset support
  at the SMU level.
- Implementation of `smu_v13_0_6_reset_sdma_is_supported` for the specific
  SMU version v13.0.6.
- Updated `smu_v13_0_6_reset_sdma` to use the new capability check before
  attempting to reset the SDMA engine.

v2: change smu_reset_sdma_is_supported type to bool (Tim)

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Signed-off-by: Jesse Zhang <jesse.zhang@amd.com>
Reviewed-by: Tim Huang <tim.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Jesse.zhang@amd.com
2025-02-21 14:02:05 +08:00
committed by Alex Deucher
parent 8225254492
commit d190e4d0f7
5 changed files with 68 additions and 1 deletions

View File

@@ -722,6 +722,29 @@ int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev)
return ret;
}
/**
* amdgpu_dpm_reset_sdma_is_supported - Check if SDMA reset is supported
* @adev: amdgpu_device pointer
*
* This function checks if the SMU supports resetting the SDMA engine.
* It returns false if the hardware does not support software SMU or
* if the feature is not supported.
*/
bool amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev)
{
struct smu_context *smu = adev->powerplay.pp_handle;
bool ret;
if (!is_support_sw_smu(adev))
return false;
mutex_lock(&adev->pm.mutex);
ret = smu_reset_sdma_is_supported(smu);
mutex_unlock(&adev->pm.mutex);
return ret;
}
int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask)
{
struct smu_context *smu = adev->powerplay.pp_handle;

View File

@@ -603,5 +603,6 @@ int amdgpu_dpm_set_pm_policy(struct amdgpu_device *adev, int policy_type,
ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev,
enum pp_pm_policy p_type, char *buf);
int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask);
bool amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev);
#endif

View File

@@ -3907,6 +3907,23 @@ int smu_send_rma_reason(struct smu_context *smu)
return ret;
}
/**
* smu_reset_sdma_is_supported - Check if SDMA reset is supported by SMU
* @smu: smu_context pointer
*
* This function checks if the SMU supports resetting the SDMA engine.
* It returns true if supported, false otherwise.
*/
bool smu_reset_sdma_is_supported(struct smu_context *smu)
{
bool ret = false;
if (smu->ppt_funcs && smu->ppt_funcs->reset_sdma_is_supported)
ret = smu->ppt_funcs->reset_sdma_is_supported(smu);
return ret;
}
int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
{
int ret = 0;

View File

@@ -1376,6 +1376,10 @@ struct pptable_funcs {
* @reset_sdma: message SMU to soft reset sdma instance.
*/
int (*reset_sdma)(struct smu_context *smu, uint32_t inst_mask);
/**
* @reset_sdma_is_supported: Check if support resets the SDMA engine.
*/
bool (*reset_sdma_is_supported)(struct smu_context *smu);
/**
* @get_ecc_table: message SMU to get ECC INFO table.
@@ -1637,6 +1641,7 @@ int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size);
int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size);
int smu_send_rma_reason(struct smu_context *smu);
int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask);
bool smu_reset_sdma_is_supported(struct smu_context *smu);
int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type,
int level);
ssize_t smu_get_pm_policy_info(struct smu_context *smu,

View File

@@ -2902,11 +2902,31 @@ static int smu_v13_0_6_send_rma_reason(struct smu_context *smu)
return ret;
}
/**
* smu_v13_0_6_reset_sdma_is_supported - Check if SDMA reset is supported
* @smu: smu_context pointer
*
* This function checks if the SMU supports resetting the SDMA engine.
* It returns false if the capability is not supported.
*/
static bool smu_v13_0_6_reset_sdma_is_supported(struct smu_context *smu)
{
bool ret = true;
if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET))) {
dev_info(smu->adev->dev,
"SDMA reset capability is not supported\n");
ret = false;
}
return ret;
}
static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
{
int ret = 0;
if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET)))
if (!smu_v13_0_6_reset_sdma_is_supported(smu))
return -EOPNOTSUPP;
ret = smu_cmn_send_smc_msg_with_param(smu,
@@ -3590,6 +3610,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
.send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num,
.send_rma_reason = smu_v13_0_6_send_rma_reason,
.reset_sdma = smu_v13_0_6_reset_sdma,
.reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported,
};
void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)