mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-02-16 20:05:13 -05:00
drm/amd/pm: Fetch and fill temperature metrics
Fetch system metrics table to fill gpuboard/baseboard temperature metrics data for smu_v13_0_12 v2: Remove unnecessary checks, used separate metrics time for temperature metrics table(Lijo) v3: Use cached values for back to back system metrics query(Lijo) Signed-off-by: Asad Kamal <asad.kamal@amd.com> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -766,6 +766,7 @@ static int smu_set_funcs(struct amdgpu_device *adev)
|
||||
case IP_VERSION(13, 0, 14):
|
||||
case IP_VERSION(13, 0, 12):
|
||||
smu_v13_0_6_set_ppt_funcs(smu);
|
||||
smu_v13_0_6_set_temp_funcs(smu);
|
||||
/* Enable pp_od_clk_voltage node */
|
||||
smu->od_enabled = true;
|
||||
break;
|
||||
|
||||
@@ -257,6 +257,7 @@ struct smu_table {
|
||||
void *cpu_addr;
|
||||
struct amdgpu_bo *bo;
|
||||
uint32_t version;
|
||||
unsigned long metrics_time;
|
||||
};
|
||||
|
||||
enum smu_perf_level_designation {
|
||||
@@ -322,6 +323,7 @@ enum smu_table_id {
|
||||
SMU_TABLE_ECCINFO,
|
||||
SMU_TABLE_COMBO_PPTABLE,
|
||||
SMU_TABLE_WIFIBAND,
|
||||
SMU_TABLE_TEMP_METRICS,
|
||||
SMU_TABLE_COUNT,
|
||||
};
|
||||
|
||||
|
||||
@@ -278,7 +278,8 @@
|
||||
__SMU_DUMMY_MAP(MALLPowerState), \
|
||||
__SMU_DUMMY_MAP(ResetSDMA), \
|
||||
__SMU_DUMMY_MAP(ResetVCN), \
|
||||
__SMU_DUMMY_MAP(GetStaticMetricsTable),
|
||||
__SMU_DUMMY_MAP(GetStaticMetricsTable), \
|
||||
__SMU_DUMMY_MAP(GetSystemMetricsTable),
|
||||
|
||||
#undef __SMU_DUMMY_MAP
|
||||
#define __SMU_DUMMY_MAP(type) SMU_MSG_##type
|
||||
|
||||
@@ -138,6 +138,7 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] =
|
||||
MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0),
|
||||
MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0),
|
||||
MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1),
|
||||
MSG_MAP(GetSystemMetricsTable, PPSMC_MSG_GetSystemMetricsTable, 0),
|
||||
};
|
||||
|
||||
static int smu_v13_0_12_get_enabled_mask(struct smu_context *smu,
|
||||
@@ -184,7 +185,8 @@ static int smu_v13_0_12_fru_get_product_info(struct smu_context *smu,
|
||||
|
||||
int smu_v13_0_12_get_max_metrics_size(void)
|
||||
{
|
||||
return max(sizeof(StaticMetricsTable_t), sizeof(MetricsTable_t));
|
||||
return max3(sizeof(StaticMetricsTable_t), sizeof(MetricsTable_t),
|
||||
sizeof(SystemMetricsTable_t));
|
||||
}
|
||||
|
||||
static void smu_v13_0_12_init_xgmi_data(struct smu_context *smu,
|
||||
@@ -359,6 +361,245 @@ int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int smu_v13_0_12_get_system_metrics_table(struct smu_context *smu, void *metrics_table,
|
||||
bool bypass_cache)
|
||||
{
|
||||
struct smu_table_context *smu_table = &smu->smu_table;
|
||||
uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size;
|
||||
struct smu_table *table = &smu_table->driver_table;
|
||||
int ret;
|
||||
|
||||
if (bypass_cache || !smu_table->tables[SMU_TABLE_TEMP_METRICS].metrics_time ||
|
||||
time_after(jiffies,
|
||||
smu_table->tables[SMU_TABLE_TEMP_METRICS].metrics_time +
|
||||
msecs_to_jiffies(1))) {
|
||||
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetSystemMetricsTable, NULL);
|
||||
if (ret) {
|
||||
dev_info(smu->adev->dev,
|
||||
"Failed to export system metrics table!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
amdgpu_asic_invalidate_hdp(smu->adev, NULL);
|
||||
memcpy(smu_table->metrics_table, table->cpu_addr, table_size);
|
||||
|
||||
smu_table->tables[SMU_TABLE_TEMP_METRICS].metrics_time = jiffies;
|
||||
}
|
||||
|
||||
if (metrics_table)
|
||||
memcpy(metrics_table, smu_table->metrics_table, sizeof(SystemMetricsTable_t));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static enum amdgpu_node_temp smu_v13_0_12_get_node_sensor_type(NODE_TEMP_e type)
|
||||
{
|
||||
switch (type) {
|
||||
case NODE_TEMP_RETIMER:
|
||||
return AMDGPU_RETIMER_X_TEMP;
|
||||
case NODE_TEMP_IBC_TEMP:
|
||||
return AMDGPU_OAM_X_IBC_TEMP;
|
||||
case NODE_TEMP_IBC_2_TEMP:
|
||||
return AMDGPU_OAM_X_IBC_2_TEMP;
|
||||
case NODE_TEMP_VDD18_VR_TEMP:
|
||||
return AMDGPU_OAM_X_VDD18_VR_TEMP;
|
||||
case NODE_TEMP_04_HBM_B_VR_TEMP:
|
||||
return AMDGPU_OAM_X_04_HBM_B_VR_TEMP;
|
||||
case NODE_TEMP_04_HBM_D_VR_TEMP:
|
||||
return AMDGPU_OAM_X_04_HBM_D_VR_TEMP;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static enum amdgpu_vr_temp smu_v13_0_12_get_vr_sensor_type(SVI_TEMP_e type)
|
||||
{
|
||||
switch (type) {
|
||||
case SVI_VDDCR_VDD0_TEMP:
|
||||
return AMDGPU_VDDCR_VDD0_TEMP;
|
||||
case SVI_VDDCR_VDD1_TEMP:
|
||||
return AMDGPU_VDDCR_VDD1_TEMP;
|
||||
case SVI_VDDCR_VDD2_TEMP:
|
||||
return AMDGPU_VDDCR_VDD2_TEMP;
|
||||
case SVI_VDDCR_VDD3_TEMP:
|
||||
return AMDGPU_VDDCR_VDD3_TEMP;
|
||||
case SVI_VDDCR_SOC_A_TEMP:
|
||||
return AMDGPU_VDDCR_SOC_A_TEMP;
|
||||
case SVI_VDDCR_SOC_C_TEMP:
|
||||
return AMDGPU_VDDCR_SOC_C_TEMP;
|
||||
case SVI_VDDCR_SOCIO_A_TEMP:
|
||||
return AMDGPU_VDDCR_SOCIO_A_TEMP;
|
||||
case SVI_VDDCR_SOCIO_C_TEMP:
|
||||
return AMDGPU_VDDCR_SOCIO_C_TEMP;
|
||||
case SVI_VDD_085_HBM_TEMP:
|
||||
return AMDGPU_VDD_085_HBM_TEMP;
|
||||
case SVI_VDDCR_11_HBM_B_TEMP:
|
||||
return AMDGPU_VDDCR_11_HBM_B_TEMP;
|
||||
case SVI_VDDCR_11_HBM_D_TEMP:
|
||||
return AMDGPU_VDDCR_11_HBM_D_TEMP;
|
||||
case SVI_VDD_USR_TEMP:
|
||||
return AMDGPU_VDD_USR_TEMP;
|
||||
case SVI_VDDIO_11_E32_TEMP:
|
||||
return AMDGPU_VDDIO_11_E32_TEMP;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static enum amdgpu_system_temp smu_v13_0_12_get_system_sensor_type(SYSTEM_TEMP_e type)
|
||||
{
|
||||
switch (type) {
|
||||
case SYSTEM_TEMP_UBB_FPGA:
|
||||
return AMDGPU_UBB_FPGA_TEMP;
|
||||
case SYSTEM_TEMP_UBB_FRONT:
|
||||
return AMDGPU_UBB_FRONT_TEMP;
|
||||
case SYSTEM_TEMP_UBB_BACK:
|
||||
return AMDGPU_UBB_BACK_TEMP;
|
||||
case SYSTEM_TEMP_UBB_OAM7:
|
||||
return AMDGPU_UBB_OAM7_TEMP;
|
||||
case SYSTEM_TEMP_UBB_IBC:
|
||||
return AMDGPU_UBB_IBC_TEMP;
|
||||
case SYSTEM_TEMP_UBB_UFPGA:
|
||||
return AMDGPU_UBB_UFPGA_TEMP;
|
||||
case SYSTEM_TEMP_UBB_OAM1:
|
||||
return AMDGPU_UBB_OAM1_TEMP;
|
||||
case SYSTEM_TEMP_OAM_0_1_HSC:
|
||||
return AMDGPU_OAM_0_1_HSC_TEMP;
|
||||
case SYSTEM_TEMP_OAM_2_3_HSC:
|
||||
return AMDGPU_OAM_2_3_HSC_TEMP;
|
||||
case SYSTEM_TEMP_OAM_4_5_HSC:
|
||||
return AMDGPU_OAM_4_5_HSC_TEMP;
|
||||
case SYSTEM_TEMP_OAM_6_7_HSC:
|
||||
return AMDGPU_OAM_6_7_HSC_TEMP;
|
||||
case SYSTEM_TEMP_UBB_FPGA_0V72_VR:
|
||||
return AMDGPU_UBB_FPGA_0V72_VR_TEMP;
|
||||
case SYSTEM_TEMP_UBB_FPGA_3V3_VR:
|
||||
return AMDGPU_UBB_FPGA_3V3_VR_TEMP;
|
||||
case SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR:
|
||||
return AMDGPU_RETIMER_0_1_2_3_1V2_VR_TEMP;
|
||||
case SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR:
|
||||
return AMDGPU_RETIMER_4_5_6_7_1V2_VR_TEMP;
|
||||
case SYSTEM_TEMP_RETIMER_0_1_0V9_VR:
|
||||
return AMDGPU_RETIMER_0_1_0V9_VR_TEMP;
|
||||
case SYSTEM_TEMP_RETIMER_4_5_0V9_VR:
|
||||
return AMDGPU_RETIMER_4_5_0V9_VR_TEMP;
|
||||
case SYSTEM_TEMP_RETIMER_2_3_0V9_VR:
|
||||
return AMDGPU_RETIMER_2_3_0V9_VR_TEMP;
|
||||
case SYSTEM_TEMP_RETIMER_6_7_0V9_VR:
|
||||
return AMDGPU_RETIMER_6_7_0V9_VR_TEMP;
|
||||
case SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR:
|
||||
return AMDGPU_OAM_0_1_2_3_3V3_VR_TEMP;
|
||||
case SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR:
|
||||
return AMDGPU_OAM_4_5_6_7_3V3_VR_TEMP;
|
||||
case SYSTEM_TEMP_IBC_HSC:
|
||||
return AMDGPU_IBC_HSC_TEMP;
|
||||
case SYSTEM_TEMP_IBC:
|
||||
return AMDGPU_IBC_TEMP;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static bool smu_v13_0_12_is_temp_metrics_supported(struct smu_context *smu,
|
||||
enum smu_temp_metric_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case SMU_TEMP_METRIC_BASEBOARD:
|
||||
if (smu->adev->gmc.xgmi.physical_node_id == 0 &&
|
||||
smu->adev->gmc.xgmi.num_physical_nodes > 1 &&
|
||||
smu_v13_0_6_cap_supported(smu, SMU_CAP(TEMP_METRICS)))
|
||||
return true;
|
||||
break;
|
||||
case SMU_TEMP_METRIC_GPUBOARD:
|
||||
return smu_v13_0_6_cap_supported(smu, SMU_CAP(TEMP_METRICS));
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static ssize_t smu_v13_0_12_get_temp_metrics(struct smu_context *smu,
|
||||
enum smu_temp_metric_type type, void *table)
|
||||
{
|
||||
struct amdgpu_gpuboard_temp_metrics_v1_0 *gpuboard_temp_metrics;
|
||||
struct amdgpu_baseboard_temp_metrics_v1_0 *baseboard_temp_metrics;
|
||||
SystemMetricsTable_t *metrics;
|
||||
int ret, sensor_type;
|
||||
u32 idx, sensors;
|
||||
ssize_t size;
|
||||
|
||||
size = (type == SMU_TEMP_METRIC_GPUBOARD) ?
|
||||
sizeof(*gpuboard_temp_metrics) : sizeof(*baseboard_temp_metrics);
|
||||
|
||||
if (!table)
|
||||
goto out;
|
||||
metrics = kzalloc(sizeof(SystemMetricsTable_t), GFP_KERNEL);
|
||||
if (!metrics)
|
||||
return -ENOMEM;
|
||||
gpuboard_temp_metrics = (struct amdgpu_gpuboard_temp_metrics_v1_0 *)table;
|
||||
baseboard_temp_metrics = (struct amdgpu_baseboard_temp_metrics_v1_0 *)table;
|
||||
if (type == SMU_TEMP_METRIC_GPUBOARD)
|
||||
smu_cmn_init_gpuboard_temp_metrics(gpuboard_temp_metrics, 1, 0);
|
||||
else if (type == SMU_TEMP_METRIC_BASEBOARD)
|
||||
smu_cmn_init_baseboard_temp_metrics(baseboard_temp_metrics, 1, 0);
|
||||
|
||||
ret = smu_v13_0_12_get_system_metrics_table(smu, metrics, false);
|
||||
if (ret) {
|
||||
kfree(metrics);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (type == SMU_TEMP_METRIC_GPUBOARD) {
|
||||
gpuboard_temp_metrics->accumulation_counter = metrics->AccumulationCounter;
|
||||
gpuboard_temp_metrics->label_version = metrics->LabelVersion;
|
||||
gpuboard_temp_metrics->node_id = metrics->NodeIdentifier;
|
||||
|
||||
idx = 0;
|
||||
for (sensors = 0; sensors < NODE_TEMP_MAX_TEMP_ENTRIES; sensors++) {
|
||||
if (metrics->NodeTemperatures[sensors] != -1) {
|
||||
sensor_type = smu_v13_0_12_get_node_sensor_type(sensors);
|
||||
gpuboard_temp_metrics->node_temp[idx] =
|
||||
((int)metrics->NodeTemperatures[sensors]) & 0xFFFFFF;
|
||||
gpuboard_temp_metrics->node_temp[idx] |= (sensor_type << 24);
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
|
||||
idx = 0;
|
||||
|
||||
for (sensors = 0; sensors < SVI_MAX_TEMP_ENTRIES; sensors++) {
|
||||
if (metrics->VrTemperatures[sensors] != -1) {
|
||||
sensor_type = smu_v13_0_12_get_vr_sensor_type(sensors);
|
||||
gpuboard_temp_metrics->vr_temp[idx] =
|
||||
((int)metrics->VrTemperatures[sensors]) & 0xFFFFFF;
|
||||
gpuboard_temp_metrics->vr_temp[idx] |= (sensor_type << 24);
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
} else if (type == SMU_TEMP_METRIC_BASEBOARD) {
|
||||
baseboard_temp_metrics->accumulation_counter = metrics->AccumulationCounter;
|
||||
baseboard_temp_metrics->label_version = metrics->LabelVersion;
|
||||
baseboard_temp_metrics->node_id = metrics->NodeIdentifier;
|
||||
|
||||
idx = 0;
|
||||
for (sensors = 0; sensors < SYSTEM_TEMP_MAX_ENTRIES; sensors++) {
|
||||
if (metrics->SystemTemperatures[sensors] != -1) {
|
||||
sensor_type = smu_v13_0_12_get_system_sensor_type(sensors);
|
||||
baseboard_temp_metrics->system_temp[idx] =
|
||||
((int)metrics->SystemTemperatures[sensors]) & 0xFFFFFF;
|
||||
baseboard_temp_metrics->system_temp[idx] |= (sensor_type << 24);
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kfree(metrics);
|
||||
|
||||
out:
|
||||
return size;
|
||||
}
|
||||
|
||||
ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu, struct amdgpu_xcp *xcp, void *table, void *smu_metrics)
|
||||
{
|
||||
const u8 num_jpeg_rings = NUM_JPEG_RINGS_FW;
|
||||
@@ -572,3 +813,8 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table, void
|
||||
|
||||
return sizeof(*gpu_metrics);
|
||||
}
|
||||
|
||||
const struct smu_temp_funcs smu_v13_0_12_temp_funcs = {
|
||||
.temp_metrics_is_supported = smu_v13_0_12_is_temp_metrics_supported,
|
||||
.get_temp_metrics = smu_v13_0_12_get_temp_metrics,
|
||||
};
|
||||
|
||||
@@ -3871,3 +3871,9 @@ void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
|
||||
amdgpu_mca_smu_init_funcs(smu->adev, &smu_v13_0_6_mca_smu_funcs);
|
||||
amdgpu_aca_set_smu_funcs(smu->adev, &smu_v13_0_6_aca_smu_funcs);
|
||||
}
|
||||
|
||||
void smu_v13_0_6_set_temp_funcs(struct smu_context *smu)
|
||||
{
|
||||
smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)
|
||||
== IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : NULL;
|
||||
}
|
||||
|
||||
@@ -68,10 +68,12 @@ enum smu_v13_0_6_caps {
|
||||
SMU_CAP(HST_LIMIT_METRICS),
|
||||
SMU_CAP(BOARD_VOLTAGE),
|
||||
SMU_CAP(PLDM_VERSION),
|
||||
SMU_CAP(TEMP_METRICS),
|
||||
SMU_CAP(ALL),
|
||||
};
|
||||
|
||||
extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu);
|
||||
extern void smu_v13_0_6_set_temp_funcs(struct smu_context *smu);
|
||||
bool smu_v13_0_6_cap_supported(struct smu_context *smu, enum smu_v13_0_6_caps cap);
|
||||
int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu);
|
||||
int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table,
|
||||
@@ -88,4 +90,5 @@ ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu,
|
||||
void *smu_metrics);
|
||||
extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[];
|
||||
extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[];
|
||||
extern const struct smu_temp_funcs smu_v13_0_12_temp_funcs;
|
||||
#endif
|
||||
|
||||
@@ -65,6 +65,32 @@
|
||||
header->structure_size = sizeof(*tmp); \
|
||||
} while (0)
|
||||
|
||||
#define smu_cmn_init_baseboard_temp_metrics(ptr, fr, cr) \
|
||||
do { \
|
||||
typecheck(struct amdgpu_baseboard_temp_metrics_v##fr##_##cr *, \
|
||||
(ptr)); \
|
||||
struct amdgpu_baseboard_temp_metrics_v##fr##_##cr *tmp = (ptr); \
|
||||
struct metrics_table_header *header = \
|
||||
(struct metrics_table_header *)tmp; \
|
||||
memset(header, 0xFF, sizeof(*tmp)); \
|
||||
header->format_revision = fr; \
|
||||
header->content_revision = cr; \
|
||||
header->structure_size = sizeof(*tmp); \
|
||||
} while (0)
|
||||
|
||||
#define smu_cmn_init_gpuboard_temp_metrics(ptr, fr, cr) \
|
||||
do { \
|
||||
typecheck(struct amdgpu_gpuboard_temp_metrics_v##fr##_##cr *, \
|
||||
(ptr)); \
|
||||
struct amdgpu_gpuboard_temp_metrics_v##fr##_##cr *tmp = (ptr); \
|
||||
struct metrics_table_header *header = \
|
||||
(struct metrics_table_header *)tmp; \
|
||||
memset(header, 0xFF, sizeof(*tmp)); \
|
||||
header->format_revision = fr; \
|
||||
header->content_revision = cr; \
|
||||
header->structure_size = sizeof(*tmp); \
|
||||
} while (0)
|
||||
|
||||
extern const int link_speed[];
|
||||
|
||||
/* Helper to Convert from PCIE Gen 1/2/3/4/5/6 to 0.1 GT/s speed units */
|
||||
|
||||
Reference in New Issue
Block a user