diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_guc_pc.c')
| -rw-r--r-- | drivers/gpu/drm/xe/xe_guc_pc.c | 659 |
1 files changed, 501 insertions, 158 deletions
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index e8b9faeaef64..951a49fb1d3e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -5,9 +5,16 @@ #include "xe_guc_pc.h" +#include <linux/cleanup.h> #include <linux/delay.h> +#include <linux/iopoll.h> +#include <linux/jiffies.h> +#include <linux/ktime.h> +#include <linux/wait_bit.h> #include <drm/drm_managed.h> +#include <drm/drm_print.h> +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> #include "abi/guc_actions_slpc_abi.h" @@ -19,6 +26,7 @@ #include "xe_gt.h" #include "xe_gt_idle.h" #include "xe_gt_printk.h" +#include "xe_gt_throttle.h" #include "xe_gt_types.h" #include "xe_guc.h" #include "xe_guc_ct.h" @@ -38,6 +46,7 @@ #define FREQ_INFO_REC XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x5ef0) #define RPE_MASK REG_GENMASK(15, 8) +#define RPA_MASK REG_GENMASK(31, 16) #define GT_PERF_STATUS XE_REG(0x1381b4) #define CAGF_MASK REG_GENMASK(19, 11) @@ -47,6 +56,12 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 +#define BMG_MIN_FREQ 1200 +#define BMG_MERT_FLUSH_FREQ_CAP 2600 + +#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ +#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ +#define SLPC_ACT_FREQ_TIMEOUT_MS 100 /** * DOC: GuC Power Conservation (PC) @@ -66,6 +81,11 @@ * Xe driver enables SLPC with all of its defaults features and frequency * selection, which varies per platform. * + * Power profiles add another level of control to SLPC. When power saving + * profile is chosen, SLPC will use conservative thresholds to ramp frequency, + * thus saving power. Base profile is default and ensures balanced performance + * for any workload. + * * Render-C States: * ================ * @@ -112,25 +132,37 @@ static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc) FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count)) static int wait_for_pc_state(struct xe_guc_pc *pc, - enum slpc_global_state state) + enum slpc_global_state target_state, + int timeout_ms) { - int timeout_us = 5000; /* rought 5ms, but no need for precision */ - int slept, wait = 10; + enum slpc_global_state state; xe_device_assert_mem_access(pc_to_xe(pc)); - for (slept = 0; slept < timeout_us;) { - if (slpc_shared_data_read(pc, header.global_state) == state) - return 0; + return poll_timeout_us(state = slpc_shared_data_read(pc, header.global_state), + state == target_state, + 20, timeout_ms * USEC_PER_MSEC, false); +} - usleep_range(wait, wait << 1); - slept += wait; - wait <<= 1; - if (slept + wait > timeout_us) - wait = timeout_us - slept; - } +static int wait_for_flush_complete(struct xe_guc_pc *pc) +{ + const unsigned long timeout = msecs_to_jiffies(30); + + if (!wait_var_event_timeout(&pc->flush_freq_limit, + !atomic_read(&pc->flush_freq_limit), + timeout)) + return -ETIMEDOUT; + + return 0; +} - return -ETIMEDOUT; +static int wait_for_act_freq_max_limit(struct xe_guc_pc *pc, u32 max_limit) +{ + u32 freq; + + return poll_timeout_us(freq = xe_guc_pc_get_act_freq(pc), + freq <= max_limit, + 20, SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC, false); } static int pc_action_reset(struct xe_guc_pc *pc) @@ -145,7 +177,7 @@ static int pc_action_reset(struct xe_guc_pc *pc) int ret; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC reset failed: %pe\n", ERR_PTR(ret)); @@ -163,12 +195,13 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc) }; int ret; - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_TIMEOUT_MS)) return -EAGAIN; /* Blocking here to ensure the results are ready before reading them */ ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action)); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC query task state failed: %pe\n", ERR_PTR(ret)); @@ -186,11 +219,12 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) }; int ret; - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_TIMEOUT_MS)) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC set param[%u]=%u failed: %pe\n", id, value, ERR_PTR(ret)); @@ -207,11 +241,12 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id) struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; int ret; - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_TIMEOUT_MS)) return -EAGAIN; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC PC unset param failed: %pe", ERR_PTR(ret)); @@ -228,7 +263,7 @@ static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) int ret; ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) + if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED)) xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u failed: %pe\n", mode, ERR_PTR(ret)); return ret; @@ -296,7 +331,7 @@ static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) * Our goal is to have the admin choices respected. */ pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, - freq < pc->rpe_freq); + freq < xe_guc_pc_get_rpe_freq(pc)); return pc_action_set_param(pc, SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, @@ -328,54 +363,79 @@ static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) freq); } -static void mtl_update_rpe_value(struct xe_guc_pc *pc) +static u32 mtl_get_rpa_freq(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); u32 reg; if (xe_gt_is_media_type(gt)) - reg = xe_mmio_read32(>->mmio, MTL_MPE_FREQUENCY); + reg = xe_mmio_read32(>->mmio, MTL_MPA_FREQUENCY); else - reg = xe_mmio_read32(>->mmio, MTL_GT_RPE_FREQUENCY); + reg = xe_mmio_read32(>->mmio, MTL_GT_RPA_FREQUENCY); - pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); + return decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg)); } -static void tgl_update_rpe_value(struct xe_guc_pc *pc) +static u32 mtl_get_rpe_freq(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - struct xe_device *xe = gt_to_xe(gt); u32 reg; + if (xe_gt_is_media_type(gt)) + reg = xe_mmio_read32(>->mmio, MTL_MPE_FREQUENCY); + else + reg = xe_mmio_read32(>->mmio, MTL_GT_RPE_FREQUENCY); + + return decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg)); +} + +static u32 pvc_get_rpa_freq(struct xe_guc_pc *pc) +{ /* - * For PVC we still need to use fused RP1 as the approximation for RPe - * For other platforms than PVC we get the resolved RPe directly from + * For PVC we still need to use fused RP0 as the approximation for RPa + * For other platforms than PVC we get the resolved RPa directly from * PCODE at a different register */ - if (xe->info.platform == XE_PVC) - reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); - else - reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); - pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); + return REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } -static void pc_update_rp_values(struct xe_guc_pc *pc) +static u32 tgl_get_rpa_freq(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - struct xe_device *xe = gt_to_xe(gt); + u32 reg; - if (GRAPHICS_VERx100(xe) >= 1270) - mtl_update_rpe_value(pc); - else - tgl_update_rpe_value(pc); + reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); + return REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + +static u32 pvc_get_rpe_freq(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; + + /* + * For PVC we still need to use fused RP1 as the approximation for RPe + */ + reg = xe_mmio_read32(>->mmio, PVC_RP_STATE_CAP); + return REG_FIELD_GET(RP1_MASK, reg) * GT_FREQUENCY_MULTIPLIER; +} + +static u32 tgl_get_rpe_freq(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 reg; /* - * RPe is decided at runtime by PCODE. In the rare case where that's - * smaller than the fused min, we will trust the PCODE and use that - * as our minimum one. + * For other platforms than PVC, we get the resolved RPe directly from + * PCODE at a different register */ - pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq); + reg = xe_mmio_read32(>->mmio, FREQ_INFO_REC); + return REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER; } /** @@ -404,6 +464,30 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) return freq; } +static u32 get_cur_freq(struct xe_gt *gt) +{ + u32 freq; + + freq = xe_mmio_read32(>->mmio, RPNSWREQ); + freq = REG_FIELD_GET(REQ_RATIO_MASK, freq); + return decode_freq(freq); +} + +/** + * xe_guc_pc_get_cur_freq_fw - With fw held, get requested frequency + * @pc: The GuC PC + * + * Returns: the requested frequency for that GT instance + */ +u32 xe_guc_pc_get_cur_freq_fw(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); + + return get_cur_freq(gt); +} + /** * xe_guc_pc_get_cur_freq - Get Current requested frequency * @pc: The GuC PC @@ -421,16 +505,13 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) * GuC SLPC plays with cur freq request when GuCRC is enabled * Block RC6 for a more reliable read. */ - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) { xe_force_wake_put(gt_to_fw(gt), fw_ref); return -ETIMEDOUT; } - *freq = xe_mmio_read32(>->mmio, RPNSWREQ); - - *freq = REG_FIELD_GET(REQ_RATIO_MASK, *freq); - *freq = decode_freq(*freq); + *freq = get_cur_freq(gt); xe_force_wake_put(gt_to_fw(gt), fw_ref); return 0; @@ -448,6 +529,25 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc) } /** + * xe_guc_pc_get_rpa_freq - Get the RPa freq + * @pc: The GuC PC + * + * Returns: RPa freq. + */ +u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + struct xe_device *xe = gt_to_xe(gt); + + if (GRAPHICS_VERx100(xe) == 1260) + return pvc_get_rpa_freq(pc); + else if (GRAPHICS_VERx100(xe) >= 1270) + return mtl_get_rpa_freq(pc); + else + return tgl_get_rpa_freq(pc); +} + +/** * xe_guc_pc_get_rpe_freq - Get the RPe freq * @pc: The GuC PC * @@ -455,9 +555,17 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc) */ u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc) { - pc_update_rp_values(pc); + struct xe_device *xe = pc_to_xe(pc); + u32 freq; + + if (GRAPHICS_VERx100(xe) == 1260) + freq = pvc_get_rpe_freq(pc); + else if (GRAPHICS_VERx100(xe) >= 1270) + freq = mtl_get_rpe_freq(pc); + else + freq = tgl_get_rpe_freq(pc); - return pc->rpe_freq; + return freq; } /** @@ -471,6 +579,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) return pc->rpn_freq; } +static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + *freq = pc_get_min_freq(pc); + + return 0; +} + /** * xe_guc_pc_get_min_freq - Get the min operational frequency * @pc: The GuC PC @@ -481,38 +608,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { - struct xe_gt *gt = pc_to_gt(pc); - unsigned int fw_ref; + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - /* - * GuC SLPC plays with min freq request when GuCRC is enabled - * Block RC6 for a more reliable read. - */ - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - ret = -ETIMEDOUT; - goto fw; - } + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; - ret = pc_action_query_task_state(pc); + ret = pc_set_min_freq(pc, freq); if (ret) - goto fw; + return ret; - *freq = pc_get_min_freq(pc); + pc->user_requested_min = freq; -fw: - xe_force_wake_put(gt_to_fw(gt), fw_ref); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -526,24 +643,28 @@ out: */ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_set_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_set_min_freq(pc, freq); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); if (ret) - goto out; + return ret; - pc->user_requested_min = freq; + *freq = pc_get_max_freq(pc); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -556,24 +677,28 @@ out: */ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_max_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_action_query_task_state(pc); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_set_max_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_max_freq(pc); + pc->user_requested_max = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -587,24 +712,14 @@ out: */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - int ret; - - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; + if (XE_GT_WA(pc_to_gt(pc), 22019338487)) { + if (wait_for_flush_complete(pc) != 0) + return -EAGAIN; } - ret = pc_set_max_freq(pc, freq); - if (ret) - goto out; + guard(mutex)(&pc->freq_lock); - pc->user_requested_max = freq; - -out: - mutex_unlock(&pc->freq_lock); - return ret; + return xe_guc_pc_set_max_freq_locked(pc, freq); } /** @@ -710,7 +825,7 @@ static u32 pc_max_freq_cap(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(gt, 22019338487)) { + if (XE_GT_WA(gt, 22019338487)) { if (xe_gt_is_media_type(gt)) return min(LNL_MERT_FREQ_CAP, pc->rp0_freq); else @@ -747,6 +862,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc) static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) { + struct xe_tile *tile = gt_to_tile(pc_to_gt(pc)); int ret; lockdep_assert_held(&pc->freq_lock); @@ -773,6 +889,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) if (pc_get_min_freq(pc) > pc->rp0_freq) ret = pc_set_min_freq(pc, pc->rp0_freq); + if (XE_DEVICE_WA(tile_to_xe(tile), 14022085890)) + ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc))); + out: return ret; } @@ -798,30 +917,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } -static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +static bool needs_flush_freq_limit(struct xe_guc_pc *pc) { - int ret = 0; + struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(pc_to_gt(pc), 22019338487)) { - /* - * Get updated min/max and stash them. - */ - ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); - if (!ret) - ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); - if (ret) - return ret; + return XE_GT_WA(gt, 22019338487) && + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; +} + +/** + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush + * @pc: the xe_guc_pc object + * + * As per the WA, reduce max GT frequency during L2 cache flush + */ +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 max_freq; + int ret; + + if (!needs_flush_freq_limit(pc)) + return; + + guard(mutex)(&pc->freq_lock); + + ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq); + if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) { + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) { + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); + return; + } + + atomic_set(&pc->flush_freq_limit, 1); /* - * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + * If user has previously changed max freq, stash that value to + * restore later, otherwise use the current max. New user + * requests wait on flush. */ - mutex_lock(&pc->freq_lock); - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); - if (!ret) - ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); - mutex_unlock(&pc->freq_lock); + if (pc->user_requested_max != 0) + pc->stashed_max_freq = pc->user_requested_max; + else + pc->stashed_max_freq = max_freq; } + /* + * Wait for actual freq to go below the flush cap: even if the previous + * max was below cap, the current one might still be above it + */ + ret = wait_for_act_freq_max_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); +} + +/** + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. + * @pc: the xe_guc_pc object + * + * Retrieve the previous GT max frequency value. + */ +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (!needs_flush_freq_limit(pc)) + return; + + if (!atomic_read(&pc->flush_freq_limit)) + return; + + mutex_lock(&pc->freq_lock); + + ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq); + if (ret) + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", + pc->stashed_max_freq, ret); + + atomic_set(&pc->flush_freq_limit, 0); + mutex_unlock(&pc->freq_lock); + wake_up_var(&pc->flush_freq_limit); +} + +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +{ + int ret; + + if (!XE_GT_WA(pc_to_gt(pc), 22019338487)) + return 0; + + guard(mutex)(&pc->freq_lock); + + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + ret = pc_set_min_freq(pc, min(xe_guc_pc_get_rpe_freq(pc), pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); + return ret; } @@ -860,7 +1066,6 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) { struct xe_device *xe = pc_to_xe(pc); struct xe_gt *gt = pc_to_gt(pc); - unsigned int fw_ref; int ret = 0; if (xe->info.skip_guc_pc) @@ -870,17 +1075,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) if (ret) return ret; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return -ETIMEDOUT; - } - - xe_gt_idle_disable_c6(gt); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); - - return 0; + return xe_gt_idle_disable_c6(gt); } /** @@ -940,8 +1135,6 @@ static int pc_init_freqs(struct xe_guc_pc *pc) if (ret) goto out; - pc_update_rp_values(pc); - pc_init_pcode_freq(pc); /* @@ -955,6 +1148,72 @@ out: return ret; } +static int pc_action_set_strategy(struct xe_guc_pc *pc, u32 val) +{ + int ret = 0; + + ret = pc_action_set_param(pc, + SLPC_PARAM_STRATEGIES, + val); + + return ret; +} + +static const char *power_profile_to_string(struct xe_guc_pc *pc) +{ + switch (pc->power_profile) { + case SLPC_POWER_PROFILE_BASE: + return "base"; + case SLPC_POWER_PROFILE_POWER_SAVING: + return "power_saving"; + default: + return "invalid"; + } +} + +void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile) +{ + switch (pc->power_profile) { + case SLPC_POWER_PROFILE_BASE: + sprintf(profile, "[%s] %s\n", "base", "power_saving"); + break; + case SLPC_POWER_PROFILE_POWER_SAVING: + sprintf(profile, "%s [%s]\n", "base", "power_saving"); + break; + default: + sprintf(profile, "invalid"); + } +} + +int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf) +{ + int ret = 0; + u32 val; + + if (strncmp("base", buf, strlen("base")) == 0) + val = SLPC_POWER_PROFILE_BASE; + else if (strncmp("power_saving", buf, strlen("power_saving")) == 0) + val = SLPC_POWER_PROFILE_POWER_SAVING; + else + return -EINVAL; + + guard(mutex)(&pc->freq_lock); + xe_pm_runtime_get_noresume(pc_to_xe(pc)); + + ret = pc_action_set_param(pc, + SLPC_PARAM_POWER_PROFILE, + val); + if (ret) + xe_gt_err_once(pc_to_gt(pc), "Failed to set power profile to %d: %pe\n", + val, ERR_PTR(ret)); + else + pc->power_profile = val; + + xe_pm_runtime_put(pc_to_xe(pc)); + + return ret; +} + /** * xe_guc_pc_start - Start GuC's Power Conservation component * @pc: Xe_GuC_PC instance @@ -965,12 +1224,13 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); unsigned int fw_ref; + ktime_t earlier; int ret; xe_gt_assert(gt, xe_device_uc_enabled(xe)); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_GT)) { xe_force_wake_put(gt_to_fw(gt), fw_ref); return -ETIMEDOUT; } @@ -986,17 +1246,29 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) goto out; } - memset(pc->bo->vmap.vaddr, 0, size); + xe_map_memset(xe, &pc->bo->vmap, 0, 0, size); slpc_shared_data_write(pc, header.size, size); + earlier = ktime_get(); ret = pc_action_reset(pc); if (ret) goto out; - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) { - xe_gt_err(gt, "GuC PC Start failed\n"); - ret = -EIO; - goto out; + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_TIMEOUT_MS)) { + xe_gt_warn(gt, "GuC PC start taking longer than normal [freq = %dMHz (req = %dMHz), perf_limit_reasons = 0x%08X]\n", + xe_guc_pc_get_act_freq(pc), get_cur_freq(gt), + xe_gt_throttle_get_limit_reasons(gt)); + + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, + SLPC_RESET_EXTENDED_TIMEOUT_MS)) { + xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n"); + ret = -EIO; + goto out; + } + + xe_gt_warn(gt, "GuC PC excessive start time: %lldms", + ktime_ms_delta(ktime_get(), earlier)); } ret = pc_init_freqs(pc); @@ -1014,6 +1286,16 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) } ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL); + if (ret) + goto out; + + /* Enable SLPC Optimized Strategy for compute */ + ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); + + /* Set cached value of power_profile */ + ret = xe_guc_pc_set_power_profile(pc, power_profile_to_string(pc)); + if (unlikely(ret)) + xe_gt_err(gt, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret)); out: xe_force_wake_put(gt_to_fw(gt), fw_ref); @@ -1058,7 +1340,7 @@ static void xe_guc_pc_fini_hw(void *arg) XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ - pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq)); + pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), xe_guc_pc_get_rpe_freq(pc))); xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref); } @@ -1086,11 +1368,72 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) bo = xe_managed_bo_create_pin_map(xe, tile, size, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); pc->bo = bo; + pc->power_profile = SLPC_POWER_PROFILE_BASE; + return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc); } + +static const char *pc_get_state_string(struct xe_guc_pc *pc) +{ + switch (slpc_shared_data_read(pc, header.global_state)) { + case SLPC_GLOBAL_STATE_NOT_RUNNING: + return "not running"; + case SLPC_GLOBAL_STATE_INITIALIZING: + return "initializing"; + case SLPC_GLOBAL_STATE_RESETTING: + return "resetting"; + case SLPC_GLOBAL_STATE_RUNNING: + return "running"; + case SLPC_GLOBAL_STATE_SHUTTING_DOWN: + return "shutting down"; + case SLPC_GLOBAL_STATE_ERROR: + return "error"; + default: + return "unknown"; + } +} + +/** + * xe_guc_pc_print - Print GuC's Power Conservation information for debug + * @pc: Xe_GuC_PC instance + * @p: drm_printer + */ +void xe_guc_pc_print(struct xe_guc_pc *pc, struct drm_printer *p) +{ + drm_printf(p, "SLPC Shared Data Header:\n"); + drm_printf(p, "\tSize: %x\n", slpc_shared_data_read(pc, header.size)); + drm_printf(p, "\tGlobal State: %s\n", pc_get_state_string(pc)); + + if (pc_action_query_task_state(pc)) + return; + + drm_printf(p, "\nSLPC Tasks Status:\n"); + drm_printf(p, "\tGTPERF enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_GTPERF_TASK_ENABLED)); + drm_printf(p, "\tDCC enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_DCC_TASK_ENABLED)); + drm_printf(p, "\tDCC in use: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_IN_DCC)); + drm_printf(p, "\tBalancer enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_BALANCER_ENABLED)); + drm_printf(p, "\tIBC enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_IBC_TASK_ENABLED)); + drm_printf(p, "\tBalancer IA LMT enabled: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_BALANCER_IA_LMT_ENABLED)); + drm_printf(p, "\tBalancer IA LMT active: %s\n", + str_yes_no(slpc_shared_data_read(pc, task_state_data.status) & + SLPC_BALANCER_IA_LMT_ACTIVE)); +} |
