mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-04-04 19:22:08 -04:00
drm/amdgpu: add range check for RAS bad page address
Exclude invalid bad pages. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -139,9 +139,9 @@ enum amdgpu_ras_retire_page_reservation {
|
||||
|
||||
atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
|
||||
|
||||
static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
|
||||
static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
|
||||
uint64_t addr);
|
||||
static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
|
||||
static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
|
||||
uint64_t addr);
|
||||
#ifdef CONFIG_X86_MCE_AMD
|
||||
static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
|
||||
@@ -172,18 +172,16 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
|
||||
struct eeprom_table_record err_rec;
|
||||
int ret;
|
||||
|
||||
if ((address >= adev->gmc.mc_vram_size) ||
|
||||
(address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
|
||||
ret = amdgpu_ras_check_bad_page(adev, address);
|
||||
if (ret == -EINVAL) {
|
||||
dev_warn(adev->dev,
|
||||
"RAS WARN: input address 0x%llx is invalid.\n",
|
||||
address);
|
||||
"RAS WARN: input address 0x%llx is invalid.\n",
|
||||
address);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (amdgpu_ras_check_bad_page(adev, address)) {
|
||||
} else if (ret == 1) {
|
||||
dev_warn(adev->dev,
|
||||
"RAS WARN: 0x%llx has already been marked as bad page!\n",
|
||||
address);
|
||||
"RAS WARN: 0x%llx has already been marked as bad page!\n",
|
||||
address);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -573,22 +571,16 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
|
||||
ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
|
||||
break;
|
||||
case 2:
|
||||
if ((data.inject.address >= adev->gmc.mc_vram_size &&
|
||||
adev->gmc.mc_vram_size) ||
|
||||
(data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
|
||||
dev_warn(adev->dev, "RAS WARN: input address "
|
||||
"0x%llx is invalid.",
|
||||
data.inject.address);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
/* umc ce/ue error injection for a bad page is not allowed */
|
||||
if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
|
||||
amdgpu_ras_check_bad_page(adev, data.inject.address)) {
|
||||
dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has "
|
||||
"already been marked as bad!\n",
|
||||
data.inject.address);
|
||||
if (data.head.block == AMDGPU_RAS_BLOCK__UMC)
|
||||
ret = amdgpu_ras_check_bad_page(adev, data.inject.address);
|
||||
if (ret == -EINVAL) {
|
||||
dev_warn(adev->dev, "RAS WARN: input address 0x%llx is invalid.",
|
||||
data.inject.address);
|
||||
break;
|
||||
} else if (ret == 1) {
|
||||
dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has already been marked as bad!\n",
|
||||
data.inject.address);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -3194,18 +3186,24 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
|
||||
static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
|
||||
uint64_t addr)
|
||||
{
|
||||
struct ras_err_handler_data *data = con->eh_data;
|
||||
struct amdgpu_device *adev = con->adev;
|
||||
int i;
|
||||
|
||||
if ((addr >= adev->gmc.mc_vram_size &&
|
||||
adev->gmc.mc_vram_size) ||
|
||||
(addr >= RAS_UMC_INJECT_ADDR_LIMIT))
|
||||
return -EINVAL;
|
||||
|
||||
addr >>= AMDGPU_GPU_PAGE_SHIFT;
|
||||
for (i = 0; i < data->count; i++)
|
||||
if (addr == data->bps[i].retired_page)
|
||||
return true;
|
||||
return 1;
|
||||
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3213,11 +3211,11 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
|
||||
*
|
||||
* Note: this check is only for umc block
|
||||
*/
|
||||
static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
|
||||
static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
|
||||
uint64_t addr)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
bool ret = false;
|
||||
int ret = 0;
|
||||
|
||||
if (!con || !con->eh_data)
|
||||
return ret;
|
||||
|
||||
Reference in New Issue
Block a user