drm/amd/ras: Support physical address convert

Support physical address convert to current NPS
pages in uniras.

Signed-off-by: Jinzhou Su <jinzhou.su@amd.com>
Reviewed-by: YiPeng Chai <YiPeng.Chai@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Jinzhou Su
2025-12-02 16:09:10 +08:00
committed by Alex Deucher
parent 364f168f61
commit d3336c935e
6 changed files with 68 additions and 11 deletions

View File

@@ -671,3 +671,18 @@ int amdgpu_ras_mgr_post_reset(struct amdgpu_device *adev)
amdgpu_ras_process_post_reset(adev);
return 0;
}
int amdgpu_ras_mgr_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
uint64_t addr, uint64_t *nps_page_addr, uint32_t max_page_count)
{
struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev);
if (!amdgpu_ras_mgr_is_ready(adev))
return -EPERM;
if (!nps_page_addr || !max_page_count)
return -EINVAL;
return ras_core_convert_soc_pa_to_cur_nps_pages(ras_mgr->ras_core,
addr, nps_page_addr, max_page_count);
}

View File

@@ -81,4 +81,6 @@ int amdgpu_ras_mgr_handle_ras_cmd(struct amdgpu_device *adev,
void *output, uint32_t out_size);
int amdgpu_ras_mgr_pre_reset(struct amdgpu_device *adev);
int amdgpu_ras_mgr_post_reset(struct amdgpu_device *adev);
int amdgpu_ras_mgr_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
uint64_t addr, uint64_t *nps_page_addr, uint32_t max_page_count);
#endif

View File

@@ -367,4 +367,6 @@ int ras_core_event_notify(struct ras_core_context *ras_core,
enum ras_notify_event event_id, void *data);
int ras_core_get_device_system_info(struct ras_core_context *ras_core,
struct device_system_info *dev_info);
int ras_core_convert_soc_pa_to_cur_nps_pages(struct ras_core_context *ras_core,
uint64_t soc_pa, uint64_t *page_pfn, uint32_t max_pages);
#endif

View File

@@ -601,3 +601,26 @@ int ras_core_get_device_system_info(struct ras_core_context *ras_core,
return -RAS_CORE_NOT_SUPPORTED;
}
int ras_core_convert_soc_pa_to_cur_nps_pages(struct ras_core_context *ras_core,
uint64_t soc_pa, uint64_t *page_pfn, uint32_t max_pages)
{
struct eeprom_umc_record record;
uint32_t cur_nps_mode;
int count = 0;
if (!ras_core || !page_pfn || !max_pages)
return -EINVAL;
cur_nps_mode = ras_core_get_curr_nps_mode(ras_core);
if (!cur_nps_mode || cur_nps_mode > AMDGPU_NPS8_PARTITION_MODE)
return -EINVAL;
memset(&record, 0, sizeof(record));
record.cur_nps_retired_row_pfn = RAS_ADDR_TO_PFN(soc_pa);
count = ras_umc_convert_record_to_nps_pages(ras_core,
&record, cur_nps_mode, page_pfn, max_pages);
return count;
}

View File

@@ -154,22 +154,36 @@ int ras_umc_clear_logged_ecc(struct ras_core_context *ras_core)
return 0;
}
int ras_umc_convert_record_to_nps_pages(struct ras_core_context *ras_core,
struct eeprom_umc_record *record, uint32_t nps,
uint64_t *page_pfn, uint32_t max_pages)
{
int count = 0;
struct ras_umc *ras_umc = &ras_core->ras_umc;
if (!page_pfn || !max_pages)
return -EINVAL;
if (ras_umc->ip_func && ras_umc->ip_func->eeprom_record_to_nps_pages)
count = ras_umc->ip_func->eeprom_record_to_nps_pages(ras_core,
record, nps, page_pfn, max_pages);
return count;
}
static void ras_umc_reserve_eeprom_record(struct ras_core_context *ras_core,
struct eeprom_umc_record *record)
{
struct ras_umc *ras_umc = &ras_core->ras_umc;
uint64_t page_pfn[16];
int count = 0, i;
memset(page_pfn, 0, sizeof(page_pfn));
if (ras_umc->ip_func && ras_umc->ip_func->eeprom_record_to_nps_pages) {
count = ras_umc->ip_func->eeprom_record_to_nps_pages(ras_core,
count = ras_umc_convert_record_to_nps_pages(ras_core,
record, record->cur_nps, page_pfn, ARRAY_SIZE(page_pfn));
if (count <= 0) {
RAS_DEV_ERR(ras_core->dev,
"Fail to convert error address! count:%d\n", count);
return;
}
if (count <= 0) {
RAS_DEV_ERR(ras_core->dev,
"Fail to convert error address! count:%d\n", count);
return;
}
/* Reserve memory */
@@ -367,10 +381,8 @@ static int ras_umc_update_eeprom_ram_data(struct ras_core_context *ras_core,
}
memset(page_pfn, 0, sizeof(page_pfn));
if (ras_umc->ip_func && ras_umc->ip_func->eeprom_record_to_nps_pages)
count = ras_umc->ip_func->eeprom_record_to_nps_pages(ras_core,
count = ras_umc_convert_record_to_nps_pages(ras_core,
bps, bps->cur_nps, page_pfn, ARRAY_SIZE(page_pfn));
if (count > 0) {
for (j = 0; j < count; j++) {
bps->cur_nps_retired_row_pfn = page_pfn[j];

View File

@@ -163,4 +163,7 @@ int ras_umc_get_badpage_record(struct ras_core_context *ras_core, uint32_t index
bool ras_umc_check_retired_addr(struct ras_core_context *ras_core, uint64_t addr);
int ras_umc_translate_soc_pa_and_bank(struct ras_core_context *ras_core,
uint64_t *soc_pa, struct umc_bank_addr *bank_addr, bool bank_to_pa);
int ras_umc_convert_record_to_nps_pages(struct ras_core_context *ras_core,
struct eeprom_umc_record *record, uint32_t nps,
uint64_t *page_pfn, uint32_t max_pages);
#endif