summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_pmu.c
diff options
context:
space:
mode:
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>2023-05-19 08:49:45 -0700
committerUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>2023-05-22 11:07:52 -0700
commitbc4be0a38b63b6d4d00a58b10e164f56049be2c2 (patch)
tree6f16b441ff953df6ca132d5b0f85c5cd961c6f4c /drivers/gpu/drm/i915/i915_pmu.c
parentb319cc594e666061b22331da654606b92730343a (diff)
drm/i915/pmu: Prepare for multi-tile non-engine counters
Reserve some bits in the counter config namespace which will carry the tile id and prepare the code to handle this. No per tile counters have been added yet. v2: - Fix checkpatch issues - Use 4 bits for gt id in non-engine counters. Drop FIXME. - Set MAX GTs to 4. Drop FIXME. v3: (Ashutosh, Tvrtko) - Drop BUG_ON that would never fire - Make enable u64 - Pull in some code from next patch v4: Set I915_PMU_MAX_GTS to 2 (Tvrtko) v5: s/u64/u32 where needed (Ashutosh) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230519154946.3751971-7-umesh.nerlige.ramappa@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/i915_pmu.c')
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c146
1 files changed, 105 insertions, 41 deletions
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index ecb57a94143e..5cfc322e69b4 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -56,11 +56,21 @@ static bool is_engine_config(const u64 config)
return config < __I915_PMU_OTHER(0);
}
+static unsigned int config_gt_id(const u64 config)
+{
+ return config >> __I915_PMU_GT_SHIFT;
+}
+
+static u64 config_counter(const u64 config)
+{
+ return config & ~(~0ULL << __I915_PMU_GT_SHIFT);
+}
+
static unsigned int other_bit(const u64 config)
{
unsigned int val;
- switch (config) {
+ switch (config_counter(config)) {
case I915_PMU_ACTUAL_FREQUENCY:
val = __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
break;
@@ -78,7 +88,9 @@ static unsigned int other_bit(const u64 config)
return -1;
}
- return I915_ENGINE_SAMPLE_COUNT + val;
+ return I915_ENGINE_SAMPLE_COUNT +
+ config_gt_id(config) * __I915_PMU_TRACKED_EVENT_COUNT +
+ val;
}
static unsigned int config_bit(const u64 config)
@@ -115,6 +127,18 @@ static unsigned int event_bit(struct perf_event *event)
return config_bit(event->attr.config);
}
+static u32 frequency_enabled_mask(void)
+{
+ unsigned int i;
+ u32 mask = 0;
+
+ for (i = 0; i < I915_PMU_MAX_GTS; i++)
+ mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) |
+ config_mask(__I915_PMU_REQUESTED_FREQUENCY(i));
+
+ return mask;
+}
+
static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
{
struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
@@ -131,9 +155,7 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
* Mask out all the ones which do not need the timer, or in
* other words keep all the ones that could need the timer.
*/
- enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) |
- config_mask(I915_PMU_REQUESTED_FREQUENCY) |
- ENGINE_SAMPLE_MASK;
+ enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
/*
* When the GPU is idle per-engine counters do not need to be
@@ -175,9 +197,37 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
}
+static unsigned int
+__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
+{
+ unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
+
+ GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
+
+ return idx;
+}
+
+static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
+{
+ return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
+}
+
+static void
+store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
+{
+ pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
+}
+
+static void
+add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
+{
+ pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
+}
+
static u64 get_rc6(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
+ const unsigned int gt_id = gt->info.id;
struct i915_pmu *pmu = &i915->pmu;
unsigned long flags;
bool awake = false;
@@ -192,7 +242,7 @@ static u64 get_rc6(struct intel_gt *gt)
spin_lock_irqsave(&pmu->lock, flags);
if (awake) {
- pmu->sample[__I915_SAMPLE_RC6].cur = val;
+ store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
} else {
/*
* We think we are runtime suspended.
@@ -201,14 +251,14 @@ static u64 get_rc6(struct intel_gt *gt)
* on top of the last known real value, as the approximated RC6
* counter value.
*/
- val = ktime_since_raw(pmu->sleep_last);
- val += pmu->sample[__I915_SAMPLE_RC6].cur;
+ val = ktime_since_raw(pmu->sleep_last[gt_id]);
+ val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
}
- if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
- val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
+ if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
+ val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
else
- pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
+ store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
spin_unlock_irqrestore(&pmu->lock, flags);
@@ -218,13 +268,20 @@ static u64 get_rc6(struct intel_gt *gt)
static void init_rc6(struct i915_pmu *pmu)
{
struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
- intel_wakeref_t wakeref;
+ struct intel_gt *gt;
+ unsigned int i;
+
+ for_each_gt(gt, i915, i) {
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
+ u64 val = __get_rc6(gt);
- with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) {
- pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
- pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
- pmu->sample[__I915_SAMPLE_RC6].cur;
- pmu->sleep_last = ktime_get_raw();
+ store_sample(pmu, i, __I915_SAMPLE_RC6, val);
+ store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
+ val);
+ pmu->sleep_last[i] = ktime_get_raw();
+ }
}
}
@@ -232,8 +289,8 @@ static void park_rc6(struct intel_gt *gt)
{
struct i915_pmu *pmu = &gt->i915->pmu;
- pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(gt);
- pmu->sleep_last = ktime_get_raw();
+ store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
+ pmu->sleep_last[gt->info.id] = ktime_get_raw();
}
static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
@@ -373,34 +430,30 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
}
}
-static void
-add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
-{
- sample->cur += mul_u32_u32(val, mul);
-}
-
-static bool frequency_sampling_enabled(struct i915_pmu *pmu)
+static bool
+frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
{
return pmu->enable &
- (config_mask(I915_PMU_ACTUAL_FREQUENCY) |
- config_mask(I915_PMU_REQUESTED_FREQUENCY));
+ (config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt)) |
+ config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt)));
}
static void
frequency_sample(struct intel_gt *gt, unsigned int period_ns)
{
struct drm_i915_private *i915 = gt->i915;
+ const unsigned int gt_id = gt->info.id;
struct i915_pmu *pmu = &i915->pmu;
struct intel_rps *rps = &gt->rps;
- if (!frequency_sampling_enabled(pmu))
+ if (!frequency_sampling_enabled(pmu, gt_id))
return;
/* Report 0/0 (actual/requested) frequency while parked. */
if (!intel_gt_pm_get_if_awake(gt))
return;
- if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
+ if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) {
u32 val;
/*
@@ -416,12 +469,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
if (!val)
val = intel_gpu_freq(rps, rps->cur_freq);
- add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
+ add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
val, period_ns / 1000);
}
- if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
- add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ],
+ if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
+ add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
intel_rps_get_requested_frequency(rps),
period_ns / 1000);
}
@@ -458,9 +511,7 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
continue;
engines_sample(gt, period_ns);
-
- if (i == 0) /* FIXME */
- frequency_sample(gt, period_ns);
+ frequency_sample(gt, period_ns);
}
hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
@@ -502,7 +553,13 @@ config_status(struct drm_i915_private *i915, u64 config)
{
struct intel_gt *gt = to_gt(i915);
- switch (config) {
+ unsigned int gt_id = config_gt_id(config);
+ unsigned int max_gt_id = HAS_EXTRA_GT_LIST(i915) ? 1 : 0;
+
+ if (gt_id > max_gt_id)
+ return -ENOENT;
+
+ switch (config_counter(config)) {
case I915_PMU_ACTUAL_FREQUENCY:
if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
/* Requires a mutex for sampling! */
@@ -513,6 +570,8 @@ config_status(struct drm_i915_private *i915, u64 config)
return -ENODEV;
break;
case I915_PMU_INTERRUPTS:
+ if (gt_id)
+ return -ENOENT;
break;
case I915_PMU_RC6_RESIDENCY:
if (!gt->rc6.supported)
@@ -610,22 +669,27 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
val = engine->pmu.sample[sample].cur;
}
} else {
- switch (event->attr.config) {
+ const unsigned int gt_id = config_gt_id(event->attr.config);
+ const u64 config = config_counter(event->attr.config);
+
+ switch (config) {
case I915_PMU_ACTUAL_FREQUENCY:
val =
- div_u64(pmu->sample[__I915_SAMPLE_FREQ_ACT].cur,
+ div_u64(read_sample(pmu, gt_id,
+ __I915_SAMPLE_FREQ_ACT),
USEC_PER_SEC /* to MHz */);
break;
case I915_PMU_REQUESTED_FREQUENCY:
val =
- div_u64(pmu->sample[__I915_SAMPLE_FREQ_REQ].cur,
+ div_u64(read_sample(pmu, gt_id,
+ __I915_SAMPLE_FREQ_REQ),
USEC_PER_SEC /* to MHz */);
break;
case I915_PMU_INTERRUPTS:
val = READ_ONCE(pmu->irq_count);
break;
case I915_PMU_RC6_RESIDENCY:
- val = get_rc6(to_gt(i915));
+ val = get_rc6(i915->gt[gt_id]);
break;
case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));