From 0d6516efff2cf275591c57faadce249257d58980 Mon Sep 17 00:00:00 2001 From: Li Ma Date: Mon, 5 Sep 2022 17:20:58 +0800 Subject: drm/amd/pm:add new gpu_metrics_v2_3 to acquire average temperature info Add new gpu_metrics_v2_3 to acquire average temperature info from SMU metrics. To acquire average temp info from gpu_metrics interface, but gpu_metrics_v2_2 only has members to show current temp info. --- v1: Only add average_temperature_gfx in gpu_metrics_v2_3. v2: Add average temp members for soc, core and l3 in gpu_metrics_v2_3 and put these new members at the end of gpu_metrics_v2_3. Add operation to read average temp info from metrics table. v3: Merge v1 and v2 and rename the patch. v4: Merge v3. Add firmware version judgment in vangogh_common_get_gpu_metrics to maintain backward compatibility and rename the patch. "return ret" on error scenario in smu_cmn_get_smc_version. Signed-off-by: Li Ma Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 58 ++++++++ drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 160 +++++++++++++++++++++-- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 3 + 3 files changed, 209 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 7e3231c2191c..a40ead44778a 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -824,4 +824,62 @@ struct gpu_metrics_v2_2 { uint64_t indep_throttle_status; }; +struct gpu_metrics_v2_3 { + struct metrics_table_header common_header; + + /* Temperature */ + uint16_t temperature_gfx; // gfx temperature on APUs + uint16_t temperature_soc; // soc temperature on APUs + uint16_t temperature_core[8]; // CPU core temperature on APUs + uint16_t temperature_l3[2]; + + /* Utilization */ + uint16_t average_gfx_activity; + uint16_t average_mm_activity; // UVD or VCN + + /* Driver attached timestamp (in ns) */ + uint64_t system_clock_counter; + + /* Power/Energy */ + uint16_t average_socket_power; // dGPU + APU power on A + A platform + uint16_t average_cpu_power; + uint16_t average_soc_power; + uint16_t average_gfx_power; + uint16_t average_core_power[8]; // CPU core power on APUs + + /* Average clocks */ + uint16_t average_gfxclk_frequency; + uint16_t average_socclk_frequency; + uint16_t average_uclk_frequency; + uint16_t average_fclk_frequency; + uint16_t average_vclk_frequency; + uint16_t average_dclk_frequency; + + /* Current clocks */ + uint16_t current_gfxclk; + uint16_t current_socclk; + uint16_t current_uclk; + uint16_t current_fclk; + uint16_t current_vclk; + uint16_t current_dclk; + uint16_t current_coreclk[8]; // CPU core clocks + uint16_t current_l3clk[2]; + + /* Throttle status (ASIC dependent) */ + uint32_t throttle_status; + + /* Fans */ + uint16_t fan_pwm; + + uint16_t padding[3]; + + /* Throttle status (ASIC independent) */ + uint64_t indep_throttle_status; + + /* Average Temperature */ + uint16_t average_temperature_gfx; // average gfx temperature on APUs + uint16_t average_temperature_soc; // average soc temperature on APUs + uint16_t average_temperature_core[8]; // average CPU core temperature on APUs + uint16_t average_temperature_l3[2]; +}; #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 847990145dcd..cb10c7e31264 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -223,14 +223,13 @@ static int vangogh_tables_init(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *tables = smu_table->tables; - struct amdgpu_device *adev = smu->adev; uint32_t if_version; + uint32_t smu_version; uint32_t ret = 0; - ret = smu_cmn_get_smc_version(smu, &if_version, NULL); + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); if (ret) { - dev_err(adev->dev, "Failed to get smu if version!\n"); - goto err0_out; + return ret; } SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t), @@ -255,7 +254,10 @@ static int vangogh_tables_init(struct smu_context *smu) goto err0_out; smu_table->metrics_time = 0; - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2); + if (smu_version >= 0x043F3E00) + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_3); + else + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) goto err1_out; @@ -1648,6 +1650,63 @@ static int vangogh_set_watermarks_table(struct smu_context *smu, return 0; } +static ssize_t vangogh_get_legacy_gpu_metrics_v2_3(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v2_3 *gpu_metrics = + (struct gpu_metrics_v2_3 *)smu_table->gpu_metrics_table; + SmuMetrics_legacy_t metrics; + int ret = 0; + + ret = smu_cmn_get_metrics_table(smu, &metrics, true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 3); + + gpu_metrics->temperature_gfx = metrics.GfxTemperature; + gpu_metrics->temperature_soc = metrics.SocTemperature; + memcpy(&gpu_metrics->temperature_core[0], + &metrics.CoreTemperature[0], + sizeof(uint16_t) * 4); + gpu_metrics->temperature_l3[0] = metrics.L3Temperature[0]; + + gpu_metrics->average_gfx_activity = metrics.GfxActivity; + gpu_metrics->average_mm_activity = metrics.UvdActivity; + + gpu_metrics->average_socket_power = metrics.CurrentSocketPower; + gpu_metrics->average_cpu_power = metrics.Power[0]; + gpu_metrics->average_soc_power = metrics.Power[1]; + gpu_metrics->average_gfx_power = metrics.Power[2]; + memcpy(&gpu_metrics->average_core_power[0], + &metrics.CorePower[0], + sizeof(uint16_t) * 4); + + gpu_metrics->average_gfxclk_frequency = metrics.GfxclkFrequency; + gpu_metrics->average_socclk_frequency = metrics.SocclkFrequency; + gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency; + gpu_metrics->average_fclk_frequency = metrics.MemclkFrequency; + gpu_metrics->average_vclk_frequency = metrics.VclkFrequency; + gpu_metrics->average_dclk_frequency = metrics.DclkFrequency; + + memcpy(&gpu_metrics->current_coreclk[0], + &metrics.CoreFrequency[0], + sizeof(uint16_t) * 4); + gpu_metrics->current_l3clk[0] = metrics.L3Frequency[0]; + + gpu_metrics->throttle_status = metrics.ThrottlerStatus; + gpu_metrics->indep_throttle_status = + smu_cmn_get_indep_throttler_status(metrics.ThrottlerStatus, + vangogh_throttler_map); + + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v2_3); +} + static ssize_t vangogh_get_legacy_gpu_metrics(struct smu_context *smu, void **table) { @@ -1705,6 +1764,77 @@ static ssize_t vangogh_get_legacy_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v2_2); } +static ssize_t vangogh_get_gpu_metrics_v2_3(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v2_3 *gpu_metrics = + (struct gpu_metrics_v2_3 *)smu_table->gpu_metrics_table; + SmuMetrics_t metrics; + int ret = 0; + + ret = smu_cmn_get_metrics_table(smu, &metrics, true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 3); + + gpu_metrics->temperature_gfx = metrics.Current.GfxTemperature; + gpu_metrics->temperature_soc = metrics.Current.SocTemperature; + memcpy(&gpu_metrics->temperature_core[0], + &metrics.Current.CoreTemperature[0], + sizeof(uint16_t) * 4); + gpu_metrics->temperature_l3[0] = metrics.Current.L3Temperature[0]; + + gpu_metrics->average_temperature_gfx = metrics.Average.GfxTemperature; + gpu_metrics->average_temperature_soc = metrics.Average.SocTemperature; + memcpy(&gpu_metrics->average_temperature_core[0], + &metrics.Average.CoreTemperature[0], + sizeof(uint16_t) * 4); + gpu_metrics->average_temperature_l3[0] = metrics.Average.L3Temperature[0]; + + gpu_metrics->average_gfx_activity = metrics.Current.GfxActivity; + gpu_metrics->average_mm_activity = metrics.Current.UvdActivity; + + gpu_metrics->average_socket_power = metrics.Current.CurrentSocketPower; + gpu_metrics->average_cpu_power = metrics.Current.Power[0]; + gpu_metrics->average_soc_power = metrics.Current.Power[1]; + gpu_metrics->average_gfx_power = metrics.Current.Power[2]; + memcpy(&gpu_metrics->average_core_power[0], + &metrics.Average.CorePower[0], + sizeof(uint16_t) * 4); + + gpu_metrics->average_gfxclk_frequency = metrics.Average.GfxclkFrequency; + gpu_metrics->average_socclk_frequency = metrics.Average.SocclkFrequency; + gpu_metrics->average_uclk_frequency = metrics.Average.MemclkFrequency; + gpu_metrics->average_fclk_frequency = metrics.Average.MemclkFrequency; + gpu_metrics->average_vclk_frequency = metrics.Average.VclkFrequency; + gpu_metrics->average_dclk_frequency = metrics.Average.DclkFrequency; + + gpu_metrics->current_gfxclk = metrics.Current.GfxclkFrequency; + gpu_metrics->current_socclk = metrics.Current.SocclkFrequency; + gpu_metrics->current_uclk = metrics.Current.MemclkFrequency; + gpu_metrics->current_fclk = metrics.Current.MemclkFrequency; + gpu_metrics->current_vclk = metrics.Current.VclkFrequency; + gpu_metrics->current_dclk = metrics.Current.DclkFrequency; + + memcpy(&gpu_metrics->current_coreclk[0], + &metrics.Current.CoreFrequency[0], + sizeof(uint16_t) * 4); + gpu_metrics->current_l3clk[0] = metrics.Current.L3Frequency[0]; + + gpu_metrics->throttle_status = metrics.Current.ThrottlerStatus; + gpu_metrics->indep_throttle_status = + smu_cmn_get_indep_throttler_status(metrics.Current.ThrottlerStatus, + vangogh_throttler_map); + + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v2_3); +} + static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu, void **table) { @@ -1772,20 +1902,26 @@ static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu, static ssize_t vangogh_common_get_gpu_metrics(struct smu_context *smu, void **table) { - struct amdgpu_device *adev = smu->adev; uint32_t if_version; + uint32_t smu_version; int ret = 0; - ret = smu_cmn_get_smc_version(smu, &if_version, NULL); + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); if (ret) { - dev_err(adev->dev, "Failed to get smu if version!\n"); return ret; } - if (if_version < 0x3) - ret = vangogh_get_legacy_gpu_metrics(smu, table); - else - ret = vangogh_get_gpu_metrics(smu, table); + if (smu_version >= 0x043F3E00) { + if (if_version < 0x3) + ret = vangogh_get_legacy_gpu_metrics_v2_3(smu, table); + else + ret = vangogh_get_gpu_metrics_v2_3(smu, table); + } else { + if (if_version < 0x3) + ret = vangogh_get_legacy_gpu_metrics(smu, table); + else + ret = vangogh_get_gpu_metrics(smu, table); + } return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 15e4298c7cc8..e4f8f90ac5aa 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -969,6 +969,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev) case METRICS_VERSION(2, 2): structure_size = sizeof(struct gpu_metrics_v2_2); break; + case METRICS_VERSION(2, 3): + structure_size = sizeof(struct gpu_metrics_v2_3); + break; default: return; } -- cgit