summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_perf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c1419
1 files changed, 1024 insertions, 395 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 0defbb43ceea..0b9d9f3f7813 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -192,6 +192,7 @@
*/
#include <linux/anon_inodes.h>
+#include <linux/nospec.h>
#include <linux/sizes.h>
#include <linux/uuid.h>
@@ -204,15 +205,21 @@
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_clock_utils.h"
+#include "gt/intel_gt_mcr.h"
+#include "gt/intel_gt_print.h"
#include "gt/intel_gt_regs.h"
#include "gt/intel_lrc.h"
#include "gt/intel_lrc_reg.h"
+#include "gt/intel_rc6.h"
#include "gt/intel_ring.h"
+#include "gt/uc/intel_guc_slpc.h"
#include "i915_drv.h"
#include "i915_file_private.h"
#include "i915_perf.h"
#include "i915_perf_oa_regs.h"
+#include "i915_reg.h"
+#include "i915_mmio_range.h"
/* HW requires this to be a power of two, between 128k and 16M, though driver
* is currently generally designed assuming the largest 16M size is used such
@@ -286,6 +293,7 @@ static u32 i915_perf_stream_paranoid = true;
#define OAREPORT_REASON_CTX_SWITCH (1<<3)
#define OAREPORT_REASON_CLK_RATIO (1<<5)
+#define HAS_MI_SET_PREDICATE(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
/* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
*
@@ -320,6 +328,14 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
[I915_OA_FORMAT_A12] = { 0, 64 },
[I915_OA_FORMAT_A12_B8_C8] = { 2, 128 },
[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
+ [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
+ [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 },
+ [I915_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, TYPE_OAM, HDR_64_BIT },
+ [I915_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, TYPE_OAM, HDR_64_BIT },
+};
+
+static const u32 mtl_oa_base[] = {
+ [PERF_GROUP_OAM_SAMEDIA_0] = 0x393000,
};
#define SAMPLE_OA_REPORT (1<<0)
@@ -412,11 +428,17 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
kfree(oa_bo);
}
+static inline const
+struct i915_perf_regs *__oa_regs(struct i915_perf_stream *stream)
+{
+ return &stream->engine->oa_group->regs;
+}
+
static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
- return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
+ return intel_uncore_read(uncore, __oa_regs(stream)->oa_tail_ptr) &
GEN12_OAG_OATAILPTR_MASK;
}
@@ -435,6 +457,66 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
}
+#define oa_report_header_64bit(__s) \
+ ((__s)->oa_buffer.format->header == HDR_64_BIT)
+
+static u64 oa_report_id(struct i915_perf_stream *stream, void *report)
+{
+ return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+}
+
+static u64 oa_report_reason(struct i915_perf_stream *stream, void *report)
+{
+ return (oa_report_id(stream, report) >> OAREPORT_REASON_SHIFT) &
+ (GRAPHICS_VER(stream->perf->i915) == 12 ?
+ OAREPORT_REASON_MASK_EXTENDED :
+ OAREPORT_REASON_MASK);
+}
+
+static void oa_report_id_clear(struct i915_perf_stream *stream, u32 *report)
+{
+ if (oa_report_header_64bit(stream))
+ *(u64 *)report = 0;
+ else
+ *report = 0;
+}
+
+static bool oa_report_ctx_invalid(struct i915_perf_stream *stream, void *report)
+{
+ return !(oa_report_id(stream, report) &
+ stream->perf->gen8_valid_ctx_bit);
+}
+
+static u64 oa_timestamp(struct i915_perf_stream *stream, void *report)
+{
+ return oa_report_header_64bit(stream) ?
+ *((u64 *)report + 1) :
+ *((u32 *)report + 1);
+}
+
+static void oa_timestamp_clear(struct i915_perf_stream *stream, u32 *report)
+{
+ if (oa_report_header_64bit(stream))
+ *(u64 *)&report[2] = 0;
+ else
+ report[1] = 0;
+}
+
+static u32 oa_context_id(struct i915_perf_stream *stream, u32 *report)
+{
+ u32 ctx_id = oa_report_header_64bit(stream) ? report[4] : report[2];
+
+ return ctx_id & stream->specific_ctx_id_mask;
+}
+
+static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report)
+{
+ if (oa_report_header_64bit(stream))
+ report[4] = INVALID_CTX_ID;
+ else
+ report[2] = INVALID_CTX_ID;
+}
+
/**
* oa_buffer_check_unlocked - check for data and update tail ptr state
* @stream: i915 stream instance
@@ -450,8 +532,7 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
* (See description of OA_TAIL_MARGIN_NSEC above for further details.)
*
* Besides returning true when there is data available to read() this function
- * also updates the tail, aging_tail and aging_timestamp in the oa_buffer
- * object.
+ * also updates the tail in the oa_buffer object.
*
* Note: It's safe to read OA config state here unlocked, assuming that this is
* only called while the stream is enabled, while the global OA configuration
@@ -462,80 +543,67 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
{
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
- int report_size = stream->oa_buffer.format_size;
+ int report_size = stream->oa_buffer.format->size;
+ u32 tail, hw_tail;
unsigned long flags;
bool pollin;
- u32 hw_tail;
- u64 now;
+ u32 partial_report_size;
- /* We have to consider the (unlikely) possibility that read() errors
+ /*
+ * We have to consider the (unlikely) possibility that read() errors
* could result in an OA buffer reset which might reset the head and
* tail state.
*/
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
+ hw_tail -= gtt_offset;
- /* The tail pointer increases in 64 byte increments,
- * not in report_size steps...
+ /*
+ * The tail pointer increases in 64 byte increments, not in report_size
+ * steps. Also the report size may not be a power of 2. Compute
+ * potentially partially landed report in the OA buffer
*/
- hw_tail &= ~(report_size - 1);
+ partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
+ partial_report_size %= report_size;
- now = ktime_get_mono_fast_ns();
+ /* Subtract partial amount off the tail */
+ hw_tail = OA_TAKEN(hw_tail, partial_report_size);
- if (hw_tail == stream->oa_buffer.aging_tail &&
- (now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) {
- /* If the HW tail hasn't move since the last check and the HW
- * tail has been aging for long enough, declare it the new
- * tail.
- */
- stream->oa_buffer.tail = stream->oa_buffer.aging_tail;
- } else {
- u32 head, tail, aged_tail;
+ tail = hw_tail;
- /* NB: The head we observe here might effectively be a little
- * out of date. If a read() is in progress, the head could be
- * anywhere between this head and stream->oa_buffer.tail.
- */
- head = stream->oa_buffer.head - gtt_offset;
- aged_tail = stream->oa_buffer.tail - gtt_offset;
-
- hw_tail -= gtt_offset;
- tail = hw_tail;
-
- /* Walk the stream backward until we find a report with dword 0
- * & 1 not at 0. Since the circular buffer pointers progress by
- * increments of 64 bytes and that reports can be up to 256
- * bytes long, we can't tell whether a report has fully landed
- * in memory before the first 2 dwords of the following report
- * have effectively landed.
- *
- * This is assuming that the writes of the OA unit land in
- * memory in the order they were written to.
- * If not : (╯°□°)╯︵ ┻━┻
- */
- while (OA_TAKEN(tail, aged_tail) >= report_size) {
- u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail);
+ /*
+ * Walk the stream backward until we find a report with report
+ * id and timestamp not at 0. Since the circular buffer pointers
+ * progress by increments of 64 bytes and that reports can be up
+ * to 256 bytes long, we can't tell whether a report has fully
+ * landed in memory before the report id and timestamp of the
+ * following report have effectively landed.
+ *
+ * This is assuming that the writes of the OA unit land in
+ * memory in the order they were written to.
+ * If not : (╯°□°)╯︵ ┻━┻
+ */
+ while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) {
+ void *report = stream->oa_buffer.vaddr + tail;
- if (report32[0] != 0 || report32[1] != 0)
- break;
+ if (oa_report_id(stream, report) ||
+ oa_timestamp(stream, report))
+ break;
- tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
- }
+ tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
+ }
- if (OA_TAKEN(hw_tail, tail) > report_size &&
- __ratelimit(&stream->perf->tail_pointer_race))
- DRM_NOTE("unlanded report(s) head=0x%x "
- "tail=0x%x hw_tail=0x%x\n",
- head, tail, hw_tail);
+ if (OA_TAKEN(hw_tail, tail) > report_size &&
+ __ratelimit(&stream->perf->tail_pointer_race))
+ drm_notice(&stream->uncore->i915->drm,
+ "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n",
+ stream->oa_buffer.head, tail, hw_tail);
- stream->oa_buffer.tail = gtt_offset + tail;
- stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
- stream->oa_buffer.aging_timestamp = now;
- }
+ stream->oa_buffer.tail = tail;
- pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
- stream->oa_buffer.head - gtt_offset) >= report_size;
+ pollin = OA_TAKEN(stream->oa_buffer.tail,
+ stream->oa_buffer.head) >= report_size;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -599,8 +667,10 @@ static int append_oa_sample(struct i915_perf_stream *stream,
size_t *offset,
const u8 *report)
{
- int report_size = stream->oa_buffer.format_size;
+ int report_size = stream->oa_buffer.format->size;
struct drm_i915_perf_record_header header;
+ int report_size_partial;
+ u8 *oa_buf_end;
header.type = DRM_I915_PERF_RECORD_SAMPLE;
header.pad = 0;
@@ -614,8 +684,20 @@ static int append_oa_sample(struct i915_perf_stream *stream,
return -EFAULT;
buf += sizeof(header);
- if (copy_to_user(buf, report, report_size))
+ oa_buf_end = stream->oa_buffer.vaddr + OA_BUFFER_SIZE;
+ report_size_partial = oa_buf_end - report;
+
+ if (report_size_partial < report_size) {
+ if (copy_to_user(buf, report, report_size_partial))
+ return -EFAULT;
+ buf += report_size_partial;
+
+ if (copy_to_user(buf, stream->oa_buffer.vaddr,
+ report_size - report_size_partial))
+ return -EFAULT;
+ } else if (copy_to_user(buf, report, report_size)) {
return -EFAULT;
+ }
(*offset) += header.size;
@@ -649,14 +731,13 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
size_t *offset)
{
struct intel_uncore *uncore = stream->uncore;
- int report_size = stream->oa_buffer.format_size;
+ int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
u32 mask = (OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
- u32 taken;
int ret = 0;
if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
@@ -670,66 +751,34 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/*
- * NB: oa_buffer.head/tail include the gtt_offset which we don't want
- * while indexing relative to oa_buf_base.
- */
- head -= gtt_offset;
- tail -= gtt_offset;
-
- /*
* An out of bounds or misaligned head or tail pointer implies a driver
* bug since we validate + align the tail pointers we read from the
* hardware and we are in full control of the head pointer which should
- * only be incremented by multiples of the report size (notably also
- * all a power of two).
+ * only be incremented by multiples of the report size.
*/
if (drm_WARN_ONCE(&uncore->i915->drm,
- head > OA_BUFFER_SIZE || head % report_size ||
- tail > OA_BUFFER_SIZE || tail % report_size,
+ head > OA_BUFFER_SIZE ||
+ tail > OA_BUFFER_SIZE,
"Inconsistent OA buffer pointers: head = %u, tail = %u\n",
head, tail))
return -EIO;
for (/* none */;
- (taken = OA_TAKEN(tail, head));
+ OA_TAKEN(tail, head);
head = (head + report_size) & mask) {
u8 *report = oa_buf_base + head;
u32 *report32 = (void *)report;
u32 ctx_id;
- u32 reason;
-
- /*
- * All the report sizes factor neatly into the buffer
- * size so we never expect to see a report split
- * between the beginning and end of the buffer.
- *
- * Given the initial alignment check a misalignment
- * here would imply a driver bug that would result
- * in an overrun.
- */
- if (drm_WARN_ON(&uncore->i915->drm,
- (OA_BUFFER_SIZE - head) < report_size)) {
- drm_err(&uncore->i915->drm,
- "Spurious OA head ptr: non-integral report offset\n");
- break;
- }
+ u64 reason;
/*
* The reason field includes flags identifying what
* triggered this specific report (mostly timer
* triggered or e.g. due to a context switch).
- *
- * This field is never expected to be zero so we can
- * check that the report isn't invalid before copying
- * it to userspace...
*/
- reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
- (GRAPHICS_VER(stream->perf->i915) == 12 ?
- OAREPORT_REASON_MASK_EXTENDED :
- OAREPORT_REASON_MASK));
-
- ctx_id = report32[2] & stream->specific_ctx_id_mask;
+ reason = oa_report_reason(stream, report);
+ ctx_id = oa_context_id(stream, report32);
/*
* Squash whatever is in the CTX_ID field if it's marked as
@@ -738,10 +787,44 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
*
* Note: that we don't clear the valid_ctx_bit so userspace can
* understand that the ID has been squashed by the kernel.
+ *
+ * Update:
+ *
+ * On XEHP platforms the behavior of context id valid bit has
+ * changed compared to prior platforms. To describe this, we
+ * define a few terms:
+ *
+ * context-switch-report: This is a report with the reason type
+ * being context-switch. It is generated when a context switches
+ * out.
+ *
+ * context-valid-bit: A bit that is set in the report ID field
+ * to indicate that a valid context has been loaded.
+ *
+ * gpu-idle: A condition characterized by a
+ * context-switch-report with context-valid-bit set to 0.
+ *
+ * On prior platforms, context-id-valid bit is set to 0 only
+ * when GPU goes idle. In all other reports, it is set to 1.
+ *
+ * On XEHP platforms, context-valid-bit is set to 1 in a context
+ * switch report if a new context switched in. For all other
+ * reports it is set to 0.
+ *
+ * This change in behavior causes an issue with MMIO triggered
+ * reports. MMIO triggered reports have the markers in the
+ * context ID field and the context-valid-bit is 0. The logic
+ * below to squash the context ID would render the report
+ * useless since the user will not be able to find it in the OA
+ * buffer. Since MMIO triggered reports exist only on XEHP,
+ * we should avoid squashing these for XEHP platforms.
*/
- if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
- GRAPHICS_VER(stream->perf->i915) <= 11)
- ctx_id = report32[2] = INVALID_CTX_ID;
+
+ if (oa_report_ctx_invalid(stream, report) &&
+ GRAPHICS_VER_FULL(stream->engine->i915) < IP_VER(12, 55)) {
+ ctx_id = INVALID_CTX_ID;
+ oa_context_id_squash(stream, report32);
+ }
/*
* NB: For Gen 8 the OA unit no longer supports clock gating
@@ -774,7 +857,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* switches since it's not-uncommon for periodic samples to
* identify a switch before any 'context switch' report.
*/
- if (!stream->perf->exclusive_stream->ctx ||
+ if (!stream->ctx ||
stream->specific_ctx_id == ctx_id ||
stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
reason & OAREPORT_REASON_CTX_SWITCH) {
@@ -783,9 +866,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* While filtering for a single context we avoid
* leaking the IDs of other contexts.
*/
- if (stream->perf->exclusive_stream->ctx &&
+ if (stream->ctx &&
stream->specific_ctx_id != ctx_id) {
- report32[2] = INVALID_CTX_ID;
+ oa_context_id_squash(stream, report32);
}
ret = append_oa_sample(stream, buf, count, offset,
@@ -796,19 +879,34 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
stream->oa_buffer.last_ctx_id = ctx_id;
}
- /*
- * Clear out the first 2 dword as a mean to detect unlanded
- * reports.
- */
- report32[0] = 0;
- report32[1] = 0;
+ if (is_power_of_2(report_size)) {
+ /*
+ * Clear out the report id and timestamp as a means
+ * to detect unlanded reports.
+ */
+ oa_report_id_clear(stream, report32);
+ oa_timestamp_clear(stream, report32);
+ } else {
+ u8 *oa_buf_end = stream->oa_buffer.vaddr +
+ OA_BUFFER_SIZE;
+ u32 part = oa_buf_end - (u8 *)report32;
+
+ /* Zero out the entire report */
+ if (report_size <= part) {
+ memset(report32, 0, report_size);
+ } else {
+ memset(report32, 0, part);
+ memset(oa_buf_base, 0, report_size - part);
+ }
+ }
}
if (start_offset != *offset) {
i915_reg_t oaheadptr;
oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?
- GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
+ __oa_regs(stream)->oa_head_ptr :
+ GEN8_OAHEADPTR;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
@@ -816,9 +914,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* We removed the gtt_offset for the copy loop above, indexing
* relative to oa_buf_base so put back here...
*/
- head += gtt_offset;
intel_uncore_write(uncore, oaheadptr,
- head & GEN12_OAG_OAHEADPTR_MASK);
+ (head + gtt_offset) & GEN12_OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = head;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -861,7 +958,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
return -EIO;
oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?
- GEN12_OAG_OASTATUS : GEN8_OASTATUS;
+ __oa_regs(stream)->oa_status :
+ GEN8_OASTATUS;
oastatus = intel_uncore_read(uncore, oastatus_reg);
@@ -943,14 +1041,13 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
size_t *offset)
{
struct intel_uncore *uncore = stream->uncore;
- int report_size = stream->oa_buffer.format_size;
+ int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
u32 mask = (OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
- u32 taken;
int ret = 0;
if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
@@ -963,12 +1060,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
- /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
- * while indexing relative to oa_buf_base.
- */
- head -= gtt_offset;
- tail -= gtt_offset;
-
/* An out of bounds or misaligned head or tail pointer implies a driver
* bug since we validate + align the tail pointers we read from the
* hardware and we are in full control of the head pointer which should
@@ -984,7 +1075,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
for (/* none */;
- (taken = OA_TAKEN(tail, head));
+ OA_TAKEN(tail, head);
head = (head + report_size) & mask) {
u8 *report = oa_buf_base + head;
u32 *report32 = (void *)report;
@@ -1012,7 +1103,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
*/
if (report32[0] == 0) {
if (__ratelimit(&stream->perf->spurious_report_rs))
- DRM_NOTE("Skipping spurious, invalid OA report\n");
+ drm_notice(&uncore->i915->drm,
+ "Skipping spurious, invalid OA report\n");
continue;
}
@@ -1030,13 +1122,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
if (start_offset != *offset) {
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- /* We removed the gtt_offset for the copy loop above, indexing
- * relative to oa_buf_base so put back here...
- */
- head += gtt_offset;
-
intel_uncore_write(uncore, GEN7_OASTATUS2,
- (head & GEN7_OASTATUS2_HEAD_MASK) |
+ ((head + gtt_offset) & GEN7_OASTATUS2_HEAD_MASK) |
GEN7_OASTATUS2_MEM_SELECT_GGTT);
stream->oa_buffer.head = head;
@@ -1233,6 +1320,199 @@ retry:
return stream->pinned_ctx;
}
+static int
+__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset)
+{
+ u32 *cs, cmd;
+
+ cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+ if (GRAPHICS_VER(rq->i915) >= 8)
+ cmd++;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = cmd;
+ *cs++ = i915_mmio_reg_offset(reg);
+ *cs++ = ggtt_offset;
+ *cs++ = 0;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int
+__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
+{
+ struct i915_request *rq;
+ int err;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_request_get(rq);
+
+ err = __store_reg_to_mem(rq, reg, ggtt_offset);
+
+ i915_request_add(rq);
+ if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
+ err = -ETIME;
+
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int
+gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
+{
+ struct i915_vma *scratch;
+ u32 *val;
+ int err;
+
+ scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
+
+ err = i915_vma_sync(scratch);
+ if (err)
+ goto err_scratch;
+
+ err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
+ i915_ggtt_offset(scratch));
+ if (err)
+ goto err_scratch;
+
+ val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(val)) {
+ err = PTR_ERR(val);
+ goto err_scratch;
+ }
+
+ *ctx_id = *val;
+ i915_gem_object_unpin_map(scratch->obj);
+
+err_scratch:
+ i915_vma_unpin_and_release(&scratch, 0);
+ return err;
+}
+
+/*
+ * For execlist mode of submission, pick an unused context id
+ * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
+ * XXX_MAX_CONTEXT_HW_ID is used by idle context
+ *
+ * For GuC mode of submission read context id from the upper dword of the
+ * EXECLIST_STATUS register. Note that we read this value only once and expect
+ * that the value stays fixed for the entire OA use case. There are cases where
+ * GuC KMD implementation may deregister a context to reuse it's context id, but
+ * we prevent that from happening to the OA context by pinning it.
+ */
+static int gen12_get_render_context_id(struct i915_perf_stream *stream)
+{
+ u32 ctx_id, mask;
+ int ret;
+
+ if (intel_engine_uses_guc(stream->engine)) {
+ ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
+ if (ret)
+ return ret;
+
+ mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
+ (GEN12_GUC_SW_CTX_ID_SHIFT - 32);
+ } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 55)) {
+ ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
+ (XEHP_SW_CTX_ID_SHIFT - 32);
+
+ mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
+ (XEHP_SW_CTX_ID_SHIFT - 32);
+ } else {
+ ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
+ (GEN11_SW_CTX_ID_SHIFT - 32);
+
+ mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
+ (GEN11_SW_CTX_ID_SHIFT - 32);
+ }
+ stream->specific_ctx_id = ctx_id & mask;
+ stream->specific_ctx_id_mask = mask;
+
+ return 0;
+}
+
+static bool oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
+{
+ u32 idx = *offset;
+ u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
+ bool found = false;
+
+ idx++;
+ for (; idx < len; idx += 2) {
+ if (state[idx] == reg) {
+ found = true;
+ break;
+ }
+ }
+
+ *offset = idx;
+ return found;
+}
+
+static u32 oa_context_image_offset(struct intel_context *ce, u32 reg)
+{
+ u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4;
+ u32 *state = ce->lrc_reg_state;
+
+ if (drm_WARN_ON(&ce->engine->i915->drm, !state))
+ return U32_MAX;
+
+ for (offset = 0; offset < len; ) {
+ if (IS_MI_LRI_CMD(state[offset])) {
+ /*
+ * We expect reg-value pairs in MI_LRI command, so
+ * MI_LRI_LEN() should be even, if not, issue a warning.
+ */
+ drm_WARN_ON(&ce->engine->i915->drm,
+ MI_LRI_LEN(state[offset]) & 0x1);
+
+ if (oa_find_reg_in_lri(state, reg, &offset, len))
+ break;
+ } else {
+ offset++;
+ }
+ }
+
+ return offset < len ? offset : U32_MAX;
+}
+
+static int set_oa_ctx_ctrl_offset(struct intel_context *ce)
+{
+ i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base);
+ struct i915_perf *perf = &ce->engine->i915->perf;
+ u32 offset = perf->ctx_oactxctrl_offset;
+
+ /* Do this only once. Failure is stored as offset of U32_MAX */
+ if (offset)
+ goto exit;
+
+ offset = oa_context_image_offset(ce, i915_mmio_reg_offset(reg));
+ perf->ctx_oactxctrl_offset = offset;
+
+ drm_dbg(&ce->engine->i915->drm,
+ "%s oa ctx control at 0x%08x dword offset\n",
+ ce->engine->name, offset);
+
+exit:
+ return offset && offset != U32_MAX ? 0 : -ENODEV;
+}
+
+static bool engine_supports_mi_query(struct intel_engine_cs *engine)
+{
+ return engine->class == RENDER_CLASS;
+}
+
/**
* oa_get_render_ctx_id - determine and hold ctx hw id
* @stream: An i915-perf stream opened for OA metrics
@@ -1246,11 +1526,28 @@ retry:
static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
{
struct intel_context *ce;
+ int ret = 0;
ce = oa_pin_context(stream);
if (IS_ERR(ce))
return PTR_ERR(ce);
+ if (engine_supports_mi_query(stream->engine) &&
+ HAS_LOGICAL_RING_CONTEXTS(stream->perf->i915)) {
+ /*
+ * We are enabling perf query here. If we don't find the context
+ * offset here, just return an error.
+ */
+ ret = set_oa_ctx_ctrl_offset(ce);
+ if (ret) {
+ intel_context_unpin(ce);
+ drm_err(&stream->perf->i915->drm,
+ "Enabling perf query failed for %s\n",
+ stream->engine->name);
+ return ret;
+ }
+ }
+
switch (GRAPHICS_VER(ce->engine->i915)) {
case 7: {
/*
@@ -1292,24 +1589,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
case 11:
case 12:
- if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) {
- stream->specific_ctx_id_mask =
- ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
- (XEHP_SW_CTX_ID_SHIFT - 32);
- stream->specific_ctx_id =
- (XEHP_MAX_CONTEXT_HW_ID - 1) <<
- (XEHP_SW_CTX_ID_SHIFT - 32);
- } else {
- stream->specific_ctx_id_mask =
- ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
- /*
- * Pick an unused context id
- * 0 - BITS_PER_LONG are used by other contexts
- * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
- */
- stream->specific_ctx_id =
- (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
- }
+ ret = gen12_get_render_context_id(stream);
break;
default:
@@ -1323,7 +1603,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
stream->specific_ctx_id,
stream->specific_ctx_id_mask);
- return 0;
+ return ret;
}
/**
@@ -1372,11 +1652,24 @@ free_noa_wait(struct i915_perf_stream *stream)
i915_vma_unpin_and_release(&stream->noa_wait, 0);
}
+static bool engine_supports_oa(const struct intel_engine_cs *engine)
+{
+ return engine->oa_group;
+}
+
+static bool engine_supports_oa_format(struct intel_engine_cs *engine, int type)
+{
+ return engine->oa_group && engine->oa_group->type == type;
+}
+
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct i915_perf *perf = stream->perf;
+ struct intel_gt *gt = stream->engine->gt;
+ struct i915_perf_group *g = stream->engine->oa_group;
+ int m;
- if (WARN_ON(stream != perf->exclusive_stream))
+ if (WARN_ON(stream != g->exclusive_stream))
return;
/*
@@ -1385,7 +1678,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
*
* See i915_oa_init_reg_state() and lrc_configure_all_contexts()
*/
- WRITE_ONCE(perf->exclusive_stream, NULL);
+ WRITE_ONCE(g->exclusive_stream, NULL);
perf->ops.disable_metric_set(stream);
free_oa_buffer(stream);
@@ -1399,10 +1692,9 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
free_oa_configs(stream);
free_noa_wait(stream);
- if (perf->spurious_report_rs.missed) {
- DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
- perf->spurious_report_rs.missed);
- }
+ m = ratelimit_state_get_miss(&perf->spurious_report_rs);
+ if (m)
+ gt_notice(gt, "%d spurious OA report notices suppressed due to ratelimiting\n", m);
}
static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
@@ -1418,7 +1710,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
*/
intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */
gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT);
- stream->oa_buffer.head = gtt_offset;
+ stream->oa_buffer.head = 0;
intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset);
@@ -1426,8 +1718,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
gtt_offset | OABUFFER_SIZE_16M);
/* Mark that we need updated tail pointers to read from... */
- stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
- stream->oa_buffer.tail = gtt_offset;
+ stream->oa_buffer.tail = 0;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -1461,7 +1752,7 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
intel_uncore_write(uncore, GEN8_OASTATUS, 0);
intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset);
- stream->oa_buffer.head = gtt_offset;
+ stream->oa_buffer.head = 0;
intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0);
@@ -1478,8 +1769,7 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
- stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
- stream->oa_buffer.tail = gtt_offset;
+ stream->oa_buffer.tail = 0;
/*
* Reset state used to recognise context switches, affecting which
@@ -1513,10 +1803,10 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
- intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0);
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr,
gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
- stream->oa_buffer.head = gtt_offset;
+ stream->oa_buffer.head = 0;
/*
* PRM says:
@@ -1526,14 +1816,13 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
* to enable proper functionality of the overflow
* bit."
*/
- intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_buffer, gtt_offset |
OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
- intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_tail_ptr,
gtt_offset & GEN12_OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
- stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
- stream->oa_buffer.tail = gtt_offset;
+ stream->oa_buffer.tail = 0;
/*
* Reset state used to recognise context switches, affecting which
@@ -1563,6 +1852,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
static int alloc_oa_buffer(struct i915_perf_stream *stream)
{
struct drm_i915_private *i915 = stream->perf->i915;
+ struct intel_gt *gt = stream->engine->gt;
struct drm_i915_gem_object *bo;
struct i915_vma *vma;
int ret;
@@ -1582,11 +1872,22 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
/* PreHSW required 512K alignment, HSW requires 16M */
- vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
+ vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err_unref;
}
+
+ /*
+ * PreHSW required 512K alignment.
+ * HSW and onwards, align to requested size of OA buffer.
+ */
+ ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH);
+ if (ret) {
+ gt_err(gt, "Failed to pin OA buffer %d\n", ret);
+ goto err_unref;
+ }
+
stream->oa_buffer.vma = vma;
stream->oa_buffer.vaddr =
@@ -1625,8 +1926,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
for (d = 0; d < dword_count; d++) {
*cs++ = cmd;
*cs++ = i915_mmio_reg_offset(reg) + 4 * d;
- *cs++ = intel_gt_scratch_offset(stream->engine->gt,
- offset) + 4 * d;
+ *cs++ = i915_ggtt_offset(stream->noa_wait) + offset + 4 * d;
*cs++ = 0;
}
@@ -1636,6 +1936,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
static int alloc_noa_wait(struct i915_perf_stream *stream)
{
struct drm_i915_private *i915 = stream->perf->i915;
+ struct intel_gt *gt = stream->engine->gt;
struct drm_i915_gem_object *bo;
struct i915_vma *vma;
const u64 delay_ticks = 0xffffffffffffffff -
@@ -1654,8 +1955,17 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
DELTA_TARGET,
N_CS_GPR
};
+ i915_reg_t mi_predicate_result = HAS_MI_SET_PREDICATE(i915) ?
+ MI_PREDICATE_RESULT_2_ENGINE(base) :
+ MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
- bo = i915_gem_object_create_internal(i915, 4096);
+ /*
+ * gt->scratch was being used to save/restore the GPR registers, but on
+ * MTL the scratch uses stolen lmem. An MI_SRM to this memory region
+ * causes an engine hang. Instead allocate an additional page here to
+ * save/restore GPR registers
+ */
+ bo = i915_gem_object_create_internal(i915, 8192);
if (IS_ERR(bo)) {
drm_err(&i915->drm,
"Failed to allocate NOA wait batchbuffer\n");
@@ -1673,26 +1983,35 @@ retry:
* multiple OA config BOs will have a jump to this address and it
* needs to be fixed during the lifetime of the i915/perf stream.
*/
- vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH);
+ vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto out_ww;
}
+ ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (ret)
+ goto out_ww;
+
batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
if (IS_ERR(batch)) {
ret = PTR_ERR(batch);
goto err_unpin;
}
+ stream->noa_wait = vma;
+
+#define GPR_SAVE_OFFSET 4096
+#define PREDICATE_SAVE_OFFSET 4160
+
/* Save registers. */
for (i = 0; i < N_CS_GPR; i++)
cs = save_restore_register(
stream, cs, true /* save */, CS_GPR(i),
- INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+ GPR_SAVE_OFFSET + 8 * i, 2);
cs = save_restore_register(
- stream, cs, true /* save */, MI_PREDICATE_RESULT_1(RENDER_RING_BASE),
- INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+ stream, cs, true /* save */, mi_predicate_result,
+ PREDICATE_SAVE_OFFSET, 1);
/* First timestamp snapshot location. */
ts0 = cs;
@@ -1745,7 +2064,10 @@ retry:
*/
*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
*cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
- *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE));
+ *cs++ = i915_mmio_reg_offset(mi_predicate_result);
+
+ if (HAS_MI_SET_PREDICATE(i915))
+ *cs++ = MI_SET_PREDICATE | 1;
/* Restart from the beginning if we had timestamps roll over. */
*cs++ = (GRAPHICS_VER(i915) < 8 ?
@@ -1755,6 +2077,9 @@ retry:
*cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
*cs++ = 0;
+ if (HAS_MI_SET_PREDICATE(i915))
+ *cs++ = MI_SET_PREDICATE;
+
/*
* Now add the diff between to previous timestamps and add it to :
* (((1 * << 64) - 1) - delay_ns)
@@ -1782,7 +2107,10 @@ retry:
*/
*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
*cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
- *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE));
+ *cs++ = i915_mmio_reg_offset(mi_predicate_result);
+
+ if (HAS_MI_SET_PREDICATE(i915))
+ *cs++ = MI_SET_PREDICATE | 1;
/* Predicate the jump. */
*cs++ = (GRAPHICS_VER(i915) < 8 ?
@@ -1792,14 +2120,17 @@ retry:
*cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
*cs++ = 0;
+ if (HAS_MI_SET_PREDICATE(i915))
+ *cs++ = MI_SET_PREDICATE;
+
/* Restore registers. */
for (i = 0; i < N_CS_GPR; i++)
cs = save_restore_register(
stream, cs, false /* restore */, CS_GPR(i),
- INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+ GPR_SAVE_OFFSET + 8 * i, 2);
cs = save_restore_register(
- stream, cs, false /* restore */, MI_PREDICATE_RESULT_1(RENDER_RING_BASE),
- INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+ stream, cs, false /* restore */, mi_predicate_result,
+ PREDICATE_SAVE_OFFSET, 1);
/* And return to the ring. */
*cs++ = MI_BATCH_BUFFER_END;
@@ -1809,7 +2140,6 @@ retry:
i915_gem_object_flush_map(bo);
__i915_gem_object_release_map(bo);
- stream->noa_wait = vma;
goto out_ww;
err_unpin:
@@ -2017,14 +2347,12 @@ retry:
goto err_add_request;
}
- err = i915_request_await_object(rq, vma->obj, 0);
- if (!err)
- err = i915_vma_move_to_active(vma, rq, 0);
+ err = i915_vma_move_to_active(vma, rq, 0);
if (err)
goto err_add_request;
err = rq->engine->emit_bb_start(rq,
- vma->node.start, 0,
+ i915_vma_offset(vma), 0,
I915_DISPATCH_SECURE);
if (err)
goto err_add_request;
@@ -2249,7 +2577,8 @@ err_add_request:
return err;
}
-static int gen8_configure_context(struct i915_gem_context *ctx,
+static int gen8_configure_context(struct i915_perf_stream *stream,
+ struct i915_gem_context *ctx,
struct flex *flex, unsigned int count)
{
struct i915_gem_engines_iter it;
@@ -2283,11 +2612,12 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
{
int err;
struct intel_context *ce = stream->pinned_ctx;
- u32 format = stream->oa_buffer.format;
+ u32 format = stream->oa_buffer.format->format;
+ u32 offset = stream->perf->ctx_oactxctrl_offset;
struct flex regs_context[] = {
{
GEN8_OACTXCONTROL,
- stream->perf->ctx_oactxctrl_offset + 1,
+ offset + 1,
active ? GEN8_OA_COUNTER_RESUME : 0,
},
};
@@ -2312,12 +2642,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
},
};
- /* Modify the context image of pinned context with regs_context*/
+ /* Modify the context image of pinned context with regs_context */
err = intel_context_lock_pinned(ce);
if (err)
return err;
- err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
+ err = gen8_modify_context(ce, regs_context,
+ ARRAY_SIZE(regs_context));
intel_context_unlock_pinned(ce);
if (err)
return err;
@@ -2359,10 +2690,11 @@ oa_configure_all_contexts(struct i915_perf_stream *stream,
{
struct drm_i915_private *i915 = stream->perf->i915;
struct intel_engine_cs *engine;
+ struct intel_gt *gt = stream->engine->gt;
struct i915_gem_context *ctx, *cn;
int err;
- lockdep_assert_held(&stream->perf->lock);
+ lockdep_assert_held(&gt->perf.lock);
/*
* The OA register config is setup through the context image. This image
@@ -2387,7 +2719,7 @@ oa_configure_all_contexts(struct i915_perf_stream *stream,
spin_unlock(&i915->gem.contexts.lock);
- err = gen8_configure_context(ctx, regs, num_regs);
+ err = gen8_configure_context(stream, ctx, regs, num_regs);
if (err) {
i915_gem_context_put(ctx);
return err;
@@ -2421,27 +2753,11 @@ oa_configure_all_contexts(struct i915_perf_stream *stream,
}
static int
-gen12_configure_all_contexts(struct i915_perf_stream *stream,
- const struct i915_oa_config *oa_config,
- struct i915_active *active)
-{
- struct flex regs[] = {
- {
- GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE),
- CTX_R_PWR_CLK_STATE,
- },
- };
-
- return oa_configure_all_contexts(stream,
- regs, ARRAY_SIZE(regs),
- active);
-}
-
-static int
lrc_configure_all_contexts(struct i915_perf_stream *stream,
const struct i915_oa_config *oa_config,
struct i915_active *active)
{
+ u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
/* The MMIO offsets for Flex EU registers aren't contiguous */
const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
@@ -2452,7 +2768,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
},
{
GEN8_OACTXCONTROL,
- stream->perf->ctx_oactxctrl_offset + 1,
+ ctx_oactxctrl + 1,
},
{ EU_PERF_CNTL0, ctx_flexeuN(0) },
{ EU_PERF_CNTL1, ctx_flexeuN(1) },
@@ -2540,13 +2856,26 @@ static int
gen12_enable_metric_set(struct i915_perf_stream *stream,
struct i915_active *active)
{
+ struct drm_i915_private *i915 = stream->perf->i915;
struct intel_uncore *uncore = stream->uncore;
- struct i915_oa_config *oa_config = stream->oa_config;
bool periodic = stream->periodic;
u32 period_exponent = stream->period_exponent;
+ u32 sqcnt1;
int ret;
- intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
+ /*
+ * Wa_1508761755
+ * EU NOA signals behave incorrectly if EU clock gating is enabled.
+ * Disable thread stall DOP gating and EU DOP gating.
+ */
+ if (IS_DG2(i915)) {
+ intel_gt_mcr_multicast_write(uncore->gt, GEN8_ROW_CHICKEN,
+ _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
+ intel_uncore_write(uncore, GEN7_ROW_CHICKEN2,
+ _MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
+ }
+
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_debug,
/* Disable clk ratio reports, like previous Gens. */
_MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
@@ -2556,20 +2885,21 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
*/
oag_report_ctx_switches(stream));
- intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_ctx_ctrl, periodic ?
(GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
(period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
: 0);
/*
- * Update all contexts prior writing the mux configurations as we need
- * to make sure all slices/subslices are ON before writing to NOA
- * registers.
+ * Initialize Super Queue Internal Cnt Register
+ * Set PMON Enable in order to collect valid metrics.
+ * Enable bytes per clock reporting in OA.
*/
- ret = gen12_configure_all_contexts(stream, oa_config, active);
- if (ret)
- return ret;
+ sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+ (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+ intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
/*
* For Gen12, performance counters are context
@@ -2611,9 +2941,18 @@ static void gen11_disable_metric_set(struct i915_perf_stream *stream)
static void gen12_disable_metric_set(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
+ struct drm_i915_private *i915 = stream->perf->i915;
+ u32 sqcnt1;
- /* Reset all contexts' slices/subslices configurations. */
- gen12_configure_all_contexts(stream, NULL, NULL);
+ /*
+ * Wa_1508761755: Enable thread stall DOP gating and EU DOP gating.
+ */
+ if (IS_DG2(i915)) {
+ intel_gt_mcr_multicast_write(uncore->gt, GEN8_ROW_CHICKEN,
+ _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE));
+ intel_uncore_write(uncore, GEN7_ROW_CHICKEN2,
+ _MASKED_BIT_DISABLE(GEN12_DISABLE_DOP_GATING));
+ }
/* disable the context save/restore or OAR counters */
if (stream->ctx)
@@ -2621,6 +2960,12 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
/* Make sure we disable noa to save power. */
intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
+
+ sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+ (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+ /* Reset PMON Enable to save power. */
+ intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
}
static void gen7_oa_enable(struct i915_perf_stream *stream)
@@ -2630,7 +2975,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
u32 ctx_id = stream->specific_ctx_id;
bool periodic = stream->periodic;
u32 period_exponent = stream->period_exponent;
- u32 report_format = stream->oa_buffer.format;
+ u32 report_format = stream->oa_buffer.format->format;
/*
* Reset buf pointers so we don't forward reports from before now.
@@ -2656,7 +3001,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
static void gen8_oa_enable(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
- u32 report_format = stream->oa_buffer.format;
+ u32 report_format = stream->oa_buffer.format->format;
/*
* Reset buf pointers so we don't forward reports from before now.
@@ -2681,8 +3026,8 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
static void gen12_oa_enable(struct i915_perf_stream *stream)
{
- struct intel_uncore *uncore = stream->uncore;
- u32 report_format = stream->oa_buffer.format;
+ const struct i915_perf_regs *regs;
+ u32 val;
/*
* If we don't want OA reports from the OA buffer, then we don't even
@@ -2693,9 +3038,11 @@ static void gen12_oa_enable(struct i915_perf_stream *stream)
gen12_init_oa_buffer(stream);
- intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
- (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
- GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
+ regs = __oa_regs(stream);
+ val = (stream->oa_buffer.format->format << regs->oa_ctrl_counter_format_shift) |
+ GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE;
+
+ intel_uncore_write(stream->uncore, regs->oa_ctrl, val);
}
/**
@@ -2747,9 +3094,9 @@ static void gen12_oa_disable(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
- intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
+ intel_uncore_write(uncore, __oa_regs(stream)->oa_ctrl, 0);
if (intel_wait_for_register(uncore,
- GEN12_OAG_OACONTROL,
+ __oa_regs(stream)->oa_ctrl,
GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
50))
drm_err(&stream->perf->i915->drm,
@@ -2838,6 +3185,32 @@ get_sseu_config(struct intel_sseu *out_sseu,
return i915_gem_user_to_context_sseu(engine->gt, drm_sseu, out_sseu);
}
+/*
+ * OA timestamp frequency = CS timestamp frequency in most platforms. On some
+ * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such
+ * cases, return the adjusted CS timestamp frequency to the user.
+ */
+u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
+{
+ struct intel_gt *gt = to_gt(i915);
+
+ /* Wa_18013179988 */
+ if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
+ intel_wakeref_t wakeref;
+ u32 reg, shift;
+
+ with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref)
+ reg = intel_uncore_read(to_gt(i915)->uncore, RPM_CONFIG0);
+
+ shift = REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK,
+ reg);
+
+ return to_gt(i915)->clock_frequency << (3 - shift);
+ }
+
+ return to_gt(i915)->clock_frequency;
+}
+
/**
* i915_oa_stream_init - validate combined props for OA stream and init
* @stream: An i915 perf stream
@@ -2862,7 +3235,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
{
struct drm_i915_private *i915 = stream->perf->i915;
struct i915_perf *perf = stream->perf;
- int format_size;
+ struct i915_perf_group *g;
int ret;
if (!props->engine) {
@@ -2870,6 +3243,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
"OA engine not specified\n");
return -EINVAL;
}
+ g = props->engine->oa_group;
/*
* If the sysfs metrics/ directory wasn't registered for some
@@ -2900,7 +3274,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
* counter reports and marshal to the appropriate client
* we currently only allow exclusive access
*/
- if (perf->exclusive_stream) {
+ if (g->exclusive_stream) {
drm_dbg(&stream->perf->i915->drm,
"OA unit already in use\n");
return -EBUSY;
@@ -2917,20 +3291,15 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->sample_size = sizeof(struct drm_i915_perf_record_header);
- format_size = perf->oa_formats[props->oa_format].size;
+ stream->oa_buffer.format = &perf->oa_formats[props->oa_format];
+ if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format->size == 0))
+ return -EINVAL;
stream->sample_flags = props->sample_flags;
- stream->sample_size += format_size;
-
- stream->oa_buffer.format_size = format_size;
- if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format_size == 0))
- return -EINVAL;
+ stream->sample_size += stream->oa_buffer.format->size;
stream->hold_preemption = props->hold_preemption;
- stream->oa_buffer.format =
- perf->oa_formats[props->oa_format].format;
-
stream->periodic = props->oa_periodic;
if (stream->periodic)
stream->period_exponent = props->oa_period_exponent;
@@ -2980,8 +3349,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->ops = &i915_oa_stream_ops;
- perf->sseu = props->sseu;
- WRITE_ONCE(perf->exclusive_stream, stream);
+ stream->engine->gt->perf.sseu = props->sseu;
+ WRITE_ONCE(g->exclusive_stream, stream);
ret = i915_perf_stream_enable_sync(stream);
if (ret) {
@@ -2994,26 +3363,26 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
"opening stream oa config uuid=%s\n",
stream->oa_config->uuid);
- hrtimer_init(&stream->poll_check_timer,
- CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- stream->poll_check_timer.function = oa_poll_check_timer_cb;
+ hrtimer_setup(&stream->poll_check_timer, oa_poll_check_timer_cb, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
init_waitqueue_head(&stream->poll_wq);
spin_lock_init(&stream->oa_buffer.ptr_lock);
+ mutex_init(&stream->lock);
return 0;
err_enable:
- WRITE_ONCE(perf->exclusive_stream, NULL);
+ WRITE_ONCE(g->exclusive_stream, NULL);
perf->ops.disable_metric_set(stream);
free_oa_buffer(stream);
err_oa_buf_alloc:
- free_oa_configs(stream);
-
intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
intel_engine_pm_put(stream->engine);
+ free_oa_configs(stream);
+
err_config:
free_noa_wait(stream);
@@ -3033,7 +3402,7 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
return;
/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
- stream = READ_ONCE(engine->i915->perf.exclusive_stream);
+ stream = READ_ONCE(engine->oa_group->exclusive_stream);
if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
gen8_update_reg_state_unlocked(ce, stream);
}
@@ -3062,7 +3431,6 @@ static ssize_t i915_perf_read(struct file *file,
loff_t *ppos)
{
struct i915_perf_stream *stream = file->private_data;
- struct i915_perf *perf = stream->perf;
size_t offset = 0;
int ret;
@@ -3086,14 +3454,14 @@ static ssize_t i915_perf_read(struct file *file,
if (ret)
return ret;
- mutex_lock(&perf->lock);
+ mutex_lock(&stream->lock);
ret = stream->ops->read(stream, buf, count, &offset);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&stream->lock);
} while (!offset && !ret);
} else {
- mutex_lock(&perf->lock);
+ mutex_lock(&stream->lock);
ret = stream->ops->read(stream, buf, count, &offset);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&stream->lock);
}
/* We allow the poll checking to sometimes report false positive EPOLLIN
@@ -3140,9 +3508,6 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
* &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
* will be woken for new stream data.
*
- * Note: The &perf->lock mutex has been taken to serialize
- * with any non-file-operation driver hooks.
- *
* Returns: any poll events that are ready without sleeping
*/
static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
@@ -3181,12 +3546,11 @@ static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
{
struct i915_perf_stream *stream = file->private_data;
- struct i915_perf *perf = stream->perf;
__poll_t ret;
- mutex_lock(&perf->lock);
+ mutex_lock(&stream->lock);
ret = i915_perf_poll_locked(stream, file, wait);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&stream->lock);
return ret;
}
@@ -3285,9 +3649,6 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
* @cmd: the ioctl request
* @arg: the ioctl data
*
- * Note: The &perf->lock mutex has been taken to serialize
- * with any non-file-operation driver hooks.
- *
* Returns: zero on success or a negative error code. Returns -EINVAL for
* an unknown ioctl request.
*/
@@ -3325,12 +3686,11 @@ static long i915_perf_ioctl(struct file *file,
unsigned long arg)
{
struct i915_perf_stream *stream = file->private_data;
- struct i915_perf *perf = stream->perf;
long ret;
- mutex_lock(&perf->lock);
+ mutex_lock(&stream->lock);
ret = i915_perf_ioctl_locked(stream, cmd, arg);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&stream->lock);
return ret;
}
@@ -3342,7 +3702,7 @@ static long i915_perf_ioctl(struct file *file,
* Frees all resources associated with the given i915 perf @stream, disabling
* any associated data capture in the process.
*
- * Note: The &perf->lock mutex has been taken to serialize
+ * Note: The &gt->perf.lock mutex has been taken to serialize
* with any non-file-operation driver hooks.
*/
static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
@@ -3374,10 +3734,16 @@ static int i915_perf_release(struct inode *inode, struct file *file)
{
struct i915_perf_stream *stream = file->private_data;
struct i915_perf *perf = stream->perf;
+ struct intel_gt *gt = stream->engine->gt;
- mutex_lock(&perf->lock);
+ /*
+ * Within this call, we know that the fd is being closed and we have no
+ * other user of stream->lock. Use the perf lock to destroy the stream
+ * here.
+ */
+ mutex_lock(&gt->perf.lock);
i915_perf_destroy_locked(stream);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&gt->perf.lock);
/* Release the reference the perf stream kept on the driver. */
drm_dev_put(&perf->i915->drm);
@@ -3388,7 +3754,6 @@ static int i915_perf_release(struct inode *inode, struct file *file)
static const struct file_operations fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.release = i915_perf_release,
.poll = i915_perf_poll,
.read = i915_perf_read,
@@ -3410,7 +3775,7 @@ static const struct file_operations fops = {
* See i915_perf_ioctl_open() for interface details.
*
* Implements further stream config validation and stream initialization on
- * behalf of i915_perf_open_ioctl() with the &perf->lock mutex
+ * behalf of i915_perf_open_ioctl() with the &gt->perf.lock mutex
* taken to serialize with any non-file-operation driver hooks.
*
* Note: at this point the @props have only been validated in isolation and
@@ -3487,7 +3852,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
}
/*
- * Asking for SSEU configuration is a priviliged operation.
+ * Asking for SSEU configuration is a privileged operation.
*/
if (props->has_sseu)
privileged_op = true;
@@ -3565,8 +3930,10 @@ err:
static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
{
- return intel_gt_clock_interval_to_ns(to_gt(perf->i915),
- 2ULL << exponent);
+ u64 nom = (2ULL << exponent) * NSEC_PER_SEC;
+ u32 den = i915_perf_oa_timestamp_frequency(perf->i915);
+
+ return div_u64(nom + den - 1, den);
}
static __always_inline bool
@@ -3601,41 +3968,35 @@ static int read_properties_unlocked(struct i915_perf *perf,
u32 n_props,
struct perf_open_properties *props)
{
+ struct drm_i915_gem_context_param_sseu user_sseu;
+ const struct i915_oa_format *f;
u64 __user *uprop = uprops;
+ bool config_instance = false;
+ bool config_class = false;
+ bool config_sseu = false;
+ u8 class, instance;
u32 i;
int ret;
memset(props, 0, sizeof(struct perf_open_properties));
props->poll_oa_period = DEFAULT_POLL_PERIOD_NS;
- if (!n_props) {
- drm_dbg(&perf->i915->drm,
- "No i915 perf properties given\n");
- return -EINVAL;
- }
-
- /* At the moment we only support using i915-perf on the RCS. */
- props->engine = intel_engine_lookup_user(perf->i915,
- I915_ENGINE_CLASS_RENDER,
- 0);
- if (!props->engine) {
- drm_dbg(&perf->i915->drm,
- "No RENDER-capable engines\n");
- return -EINVAL;
- }
-
/* Considering that ID = 0 is reserved and assuming that we don't
* (currently) expect any configurations to ever specify duplicate
* values for a particular property ID then the last _PROP_MAX value is
* one greater than the maximum number of properties we expect to get
* from userspace.
*/
- if (n_props >= DRM_I915_PERF_PROP_MAX) {
+ if (!n_props || n_props >= DRM_I915_PERF_PROP_MAX) {
drm_dbg(&perf->i915->drm,
- "More i915 perf properties specified than exist\n");
+ "Invalid number of i915 perf properties given\n");
return -EINVAL;
}
+ /* Defaults when class:instance is not passed */
+ class = I915_ENGINE_CLASS_RENDER;
+ instance = 0;
+
for (i = 0; i < n_props; i++) {
u64 oa_period, oa_freq_hz;
u64 id, value;
@@ -3730,9 +4091,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
props->hold_preemption = !!value;
break;
case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
- struct drm_i915_gem_context_param_sseu user_sseu;
-
- if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) {
+ if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 55)) {
drm_dbg(&perf->i915->drm,
"SSEU config not supported on gfx %x\n",
GRAPHICS_VER_FULL(perf->i915));
@@ -3746,14 +4105,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
"Unable to copy global sseu parameter\n");
return -EFAULT;
}
-
- ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
- if (ret) {
- drm_dbg(&perf->i915->drm,
- "Invalid SSEU configuration\n");
- return ret;
- }
- props->has_sseu = true;
+ config_sseu = true;
break;
}
case DRM_I915_PERF_PROP_POLL_OA_PERIOD:
@@ -3765,7 +4117,15 @@ static int read_properties_unlocked(struct i915_perf *perf,
}
props->poll_oa_period = value;
break;
- case DRM_I915_PERF_PROP_MAX:
+ case DRM_I915_PERF_PROP_OA_ENGINE_CLASS:
+ class = (u8)value;
+ config_class = true;
+ break;
+ case DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE:
+ instance = (u8)value;
+ config_instance = true;
+ break;
+ default:
MISSING_CASE(id);
return -EINVAL;
}
@@ -3773,6 +4133,60 @@ static int read_properties_unlocked(struct i915_perf *perf,
uprop += 2;
}
+ if ((config_class && !config_instance) ||
+ (config_instance && !config_class)) {
+ drm_dbg(&perf->i915->drm,
+ "OA engine-class and engine-instance parameters must be passed together\n");
+ return -EINVAL;
+ }
+
+ props->engine = intel_engine_lookup_user(perf->i915, class, instance);
+ if (!props->engine) {
+ drm_dbg(&perf->i915->drm,
+ "OA engine class and instance invalid %d:%d\n",
+ class, instance);
+ return -EINVAL;
+ }
+
+ if (!engine_supports_oa(props->engine)) {
+ drm_dbg(&perf->i915->drm,
+ "Engine not supported by OA %d:%d\n",
+ class, instance);
+ return -EINVAL;
+ }
+
+ /*
+ * Wa_14017512683: mtl[a0..c0): Use of OAM must be preceded with Media
+ * C6 disable in BIOS. Fail if Media C6 is enabled on steppings where OAM
+ * does not work as expected.
+ */
+ if (IS_MEDIA_GT_IP_STEP(props->engine->gt, IP_VER(13, 0), STEP_A0, STEP_C0) &&
+ props->engine->oa_group->type == TYPE_OAM &&
+ intel_check_bios_c6_setup(&props->engine->gt->rc6)) {
+ drm_dbg(&perf->i915->drm,
+ "OAM requires media C6 to be disabled in BIOS\n");
+ return -EINVAL;
+ }
+
+ i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX);
+ f = &perf->oa_formats[i];
+ if (!engine_supports_oa_format(props->engine, f->type)) {
+ drm_dbg(&perf->i915->drm,
+ "Invalid OA format %d for class %d\n",
+ f->type, props->engine->class);
+ return -EINVAL;
+ }
+
+ if (config_sseu) {
+ ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
+ if (ret) {
+ drm_dbg(&perf->i915->drm,
+ "Invalid SSEU configuration\n");
+ return ret;
+ }
+ props->has_sseu = true;
+ }
+
return 0;
}
@@ -3794,7 +4208,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
* mutex to avoid an awkward lockdep with mmap_lock.
*
* Most of the implementation details are handled by
- * i915_perf_open_ioctl_locked() after taking the &perf->lock
+ * i915_perf_open_ioctl_locked() after taking the &gt->perf.lock
* mutex for serializing with any non-file-operation driver hooks.
*
* Return: A newly opened i915 Perf stream file descriptor or negative
@@ -3805,15 +4219,13 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
{
struct i915_perf *perf = &to_i915(dev)->perf;
struct drm_i915_perf_open_param *param = data;
+ struct intel_gt *gt;
struct perf_open_properties props;
u32 known_open_flags;
int ret;
- if (!perf->i915) {
- drm_dbg(&perf->i915->drm,
- "i915 perf interface not available for this system\n");
+ if (!perf->i915)
return -ENOTSUPP;
- }
known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
@@ -3831,9 +4243,11 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
if (ret)
return ret;
- mutex_lock(&perf->lock);
+ gt = props.engine->gt;
+
+ mutex_lock(&gt->perf.lock);
ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&gt->perf.lock);
return ret;
}
@@ -3849,6 +4263,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
void i915_perf_register(struct drm_i915_private *i915)
{
struct i915_perf *perf = &i915->perf;
+ struct intel_gt *gt = to_gt(i915);
if (!perf->i915)
return;
@@ -3857,13 +4272,13 @@ void i915_perf_register(struct drm_i915_private *i915)
* i915_perf_open_ioctl(); considering that we register after
* being exposed to userspace.
*/
- mutex_lock(&perf->lock);
+ mutex_lock(&gt->perf.lock);
perf->metrics_kobj =
kobject_create_and_add("metrics",
&i915->drm.primary->kdev->kobj);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&gt->perf.lock);
}
/**
@@ -3906,29 +4321,17 @@ static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
return false;
}
-static bool reg_in_range_table(u32 addr, const struct i915_range *table)
-{
- while (table->start || table->end) {
- if (addr >= table->start && addr <= table->end)
- return true;
-
- table++;
- }
-
- return false;
-}
-
#define REG_EQUAL(addr, mmio) \
((addr) == i915_mmio_reg_offset(mmio))
-static const struct i915_range gen7_oa_b_counters[] = {
+static const struct i915_mmio_range gen7_oa_b_counters[] = {
{ .start = 0x2710, .end = 0x272c }, /* OASTARTTRIG[1-8] */
{ .start = 0x2740, .end = 0x275c }, /* OAREPORTTRIG[1-8] */
{ .start = 0x2770, .end = 0x27ac }, /* OACEC[0-7][0-1] */
{}
};
-static const struct i915_range gen12_oa_b_counters[] = {
+static const struct i915_mmio_range gen12_oa_b_counters[] = {
{ .start = 0x2b2c, .end = 0x2b2c }, /* GEN12_OAG_OA_PESS */
{ .start = 0xd900, .end = 0xd91c }, /* GEN12_OAG_OASTARTTRIG[1-8] */
{ .start = 0xd920, .end = 0xd93c }, /* GEN12_OAG_OAREPORTTRIG1[1-8] */
@@ -3939,37 +4342,51 @@ static const struct i915_range gen12_oa_b_counters[] = {
{}
};
-static const struct i915_range gen7_oa_mux_regs[] = {
+static const struct i915_mmio_range mtl_oam_b_counters[] = {
+ { .start = 0x393000, .end = 0x39301c }, /* GEN12_OAM_STARTTRIG1[1-8] */
+ { .start = 0x393020, .end = 0x39303c }, /* GEN12_OAM_REPORTTRIG1[1-8] */
+ { .start = 0x393040, .end = 0x39307c }, /* GEN12_OAM_CEC[0-7][0-1] */
+ { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */
+ {}
+};
+
+static const struct i915_mmio_range xehp_oa_b_counters[] = {
+ { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */
+ { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */
+ {}
+};
+
+static const struct i915_mmio_range gen7_oa_mux_regs[] = {
{ .start = 0x91b8, .end = 0x91cc }, /* OA_PERFCNT[1-2], OA_PERFMATRIX */
{ .start = 0x9800, .end = 0x9888 }, /* MICRO_BP0_0 - NOA_WRITE */
{ .start = 0xe180, .end = 0xe180 }, /* HALF_SLICE_CHICKEN2 */
{}
};
-static const struct i915_range hsw_oa_mux_regs[] = {
+static const struct i915_mmio_range hsw_oa_mux_regs[] = {
{ .start = 0x09e80, .end = 0x09ea4 }, /* HSW_MBVID2_NOA[0-9] */
{ .start = 0x09ec0, .end = 0x09ec0 }, /* HSW_MBVID2_MISR0 */
{ .start = 0x25100, .end = 0x2ff90 },
{}
};
-static const struct i915_range chv_oa_mux_regs[] = {
+static const struct i915_mmio_range chv_oa_mux_regs[] = {
{ .start = 0x182300, .end = 0x1823a4 },
{}
};
-static const struct i915_range gen8_oa_mux_regs[] = {
+static const struct i915_mmio_range gen8_oa_mux_regs[] = {
{ .start = 0x0d00, .end = 0x0d2c }, /* RPM_CONFIG[0-1], NOA_CONFIG[0-8] */
{ .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */
{}
};
-static const struct i915_range gen11_oa_mux_regs[] = {
+static const struct i915_mmio_range gen11_oa_mux_regs[] = {
{ .start = 0x91c8, .end = 0x91dc }, /* OA_PERFCNT[3-4] */
{}
};
-static const struct i915_range gen12_oa_mux_regs[] = {
+static const struct i915_mmio_range gen12_oa_mux_regs[] = {
{ .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */
{ .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
{ .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */
@@ -3978,56 +4395,90 @@ static const struct i915_range gen12_oa_mux_regs[] = {
{}
};
+/*
+ * Ref: 14010536224:
+ * 0x20cc is repurposed on MTL, so use a separate array for MTL.
+ */
+static const struct i915_mmio_range mtl_oa_mux_regs[] = {
+ { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */
+ { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
+ { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */
+ { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */
+ { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */
+ {}
+};
+
static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
{
- return reg_in_range_table(addr, gen7_oa_b_counters);
+ return i915_mmio_range_table_contains(addr, gen7_oa_b_counters);
}
static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return reg_in_range_table(addr, gen7_oa_mux_regs) ||
- reg_in_range_table(addr, gen8_oa_mux_regs);
+ return i915_mmio_range_table_contains(addr, gen7_oa_mux_regs) ||
+ i915_mmio_range_table_contains(addr, gen8_oa_mux_regs);
}
static bool gen11_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return reg_in_range_table(addr, gen7_oa_mux_regs) ||
- reg_in_range_table(addr, gen8_oa_mux_regs) ||
- reg_in_range_table(addr, gen11_oa_mux_regs);
+ return i915_mmio_range_table_contains(addr, gen7_oa_mux_regs) ||
+ i915_mmio_range_table_contains(addr, gen8_oa_mux_regs) ||
+ i915_mmio_range_table_contains(addr, gen11_oa_mux_regs);
}
static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return reg_in_range_table(addr, gen7_oa_mux_regs) ||
- reg_in_range_table(addr, hsw_oa_mux_regs);
+ return i915_mmio_range_table_contains(addr, gen7_oa_mux_regs) ||
+ i915_mmio_range_table_contains(addr, hsw_oa_mux_regs);
}
static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return reg_in_range_table(addr, gen7_oa_mux_regs) ||
- reg_in_range_table(addr, chv_oa_mux_regs);
+ return i915_mmio_range_table_contains(addr, gen7_oa_mux_regs) ||
+ i915_mmio_range_table_contains(addr, chv_oa_mux_regs);
}
static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
{
- return reg_in_range_table(addr, gen12_oa_b_counters);
+ return i915_mmio_range_table_contains(addr, gen12_oa_b_counters);
+}
+
+static bool mtl_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 addr)
+{
+ if (HAS_OAM(perf->i915) &&
+ GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
+ return i915_mmio_range_table_contains(addr, mtl_oam_b_counters);
+
+ return false;
+}
+
+static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
+{
+ return i915_mmio_range_table_contains(addr, xehp_oa_b_counters) ||
+ i915_mmio_range_table_contains(addr, gen12_oa_b_counters) ||
+ mtl_is_valid_oam_b_counter_addr(perf, addr);
}
static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
- return reg_in_range_table(addr, gen12_oa_mux_regs);
+ if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
+ return i915_mmio_range_table_contains(addr, mtl_oa_mux_regs);
+ else
+ return i915_mmio_range_table_contains(addr, gen12_oa_mux_regs);
}
static u32 mask_reg_value(u32 reg, u32 val)
{
- /* HALF_SLICE_CHICKEN2 is programmed with a the
+ /*
+ * HALF_SLICE_CHICKEN2 is programmed with a the
* WaDisableSTUnitPowerOptimization workaround. Make sure the value
* programmed by userspace doesn't change this.
*/
if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2))
val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
- /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function
+ /*
+ * WAIT_FOR_RC6_EXIT has only one bit fulfilling the function
* indicated by its name and a bunch of selection fields used by OA
* configs.
*/
@@ -4140,11 +4591,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
struct i915_oa_reg *regs;
int err, id;
- if (!perf->i915) {
- drm_dbg(&perf->i915->drm,
- "i915 perf interface not available for this system\n");
+ if (!perf->i915)
return -ENOTSUPP;
- }
if (!perf->metrics_kobj) {
drm_dbg(&perf->i915->drm,
@@ -4270,13 +4718,13 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
err = oa_config->id;
goto sysfs_err;
}
-
- mutex_unlock(&perf->metrics_lock);
+ id = oa_config->id;
drm_dbg(&perf->i915->drm,
"Added config %s id=%i\n", oa_config->uuid, oa_config->id);
+ mutex_unlock(&perf->metrics_lock);
- return oa_config->id;
+ return id;
sysfs_err:
mutex_unlock(&perf->metrics_lock);
@@ -4306,11 +4754,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
struct i915_oa_config *oa_config;
int ret;
- if (!perf->i915) {
- drm_dbg(&perf->i915->drm,
- "i915 perf interface not available for this system\n");
+ if (!perf->i915)
return -ENOTSUPP;
- }
if (i915_perf_stream_paranoid && !perfmon_capable()) {
drm_dbg(&perf->i915->drm,
@@ -4350,7 +4795,7 @@ err_unlock:
return ret;
}
-static struct ctl_table oa_table[] = {
+static const struct ctl_table oa_table[] = {
{
.procname = "perf_stream_paranoid",
.data = &i915_perf_stream_paranoid,
@@ -4369,9 +4814,138 @@ static struct ctl_table oa_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &oa_sample_rate_hard_limit,
},
- {}
};
+static u32 num_perf_groups_per_gt(struct intel_gt *gt)
+{
+ return 1;
+}
+
+static u32 __oam_engine_group(struct intel_engine_cs *engine)
+{
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) {
+ /*
+ * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
+ * within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA.
+ */
+ drm_WARN_ON(&engine->i915->drm,
+ engine->gt->type != GT_MEDIA);
+
+ return PERF_GROUP_OAM_SAMEDIA_0;
+ }
+
+ return PERF_GROUP_INVALID;
+}
+
+static u32 __oa_engine_group(struct intel_engine_cs *engine)
+{
+ switch (engine->class) {
+ case RENDER_CLASS:
+ return PERF_GROUP_OAG;
+
+ case VIDEO_DECODE_CLASS:
+ case VIDEO_ENHANCEMENT_CLASS:
+ return __oam_engine_group(engine);
+
+ default:
+ return PERF_GROUP_INVALID;
+ }
+}
+
+static struct i915_perf_regs __oam_regs(u32 base)
+{
+ return (struct i915_perf_regs) {
+ base,
+ GEN12_OAM_HEAD_POINTER(base),
+ GEN12_OAM_TAIL_POINTER(base),
+ GEN12_OAM_BUFFER(base),
+ GEN12_OAM_CONTEXT_CONTROL(base),
+ GEN12_OAM_CONTROL(base),
+ GEN12_OAM_DEBUG(base),
+ GEN12_OAM_STATUS(base),
+ GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT,
+ };
+}
+
+static struct i915_perf_regs __oag_regs(void)
+{
+ return (struct i915_perf_regs) {
+ 0,
+ GEN12_OAG_OAHEADPTR,
+ GEN12_OAG_OATAILPTR,
+ GEN12_OAG_OABUFFER,
+ GEN12_OAG_OAGLBCTXCTRL,
+ GEN12_OAG_OACONTROL,
+ GEN12_OAG_OA_DEBUG,
+ GEN12_OAG_OASTATUS,
+ GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT,
+ };
+}
+
+static void oa_init_groups(struct intel_gt *gt)
+{
+ int i, num_groups = gt->perf.num_perf_groups;
+
+ for (i = 0; i < num_groups; i++) {
+ struct i915_perf_group *g = &gt->perf.group[i];
+
+ /* Fused off engines can result in a group with num_engines == 0 */
+ if (g->num_engines == 0)
+ continue;
+
+ if (i == PERF_GROUP_OAG && gt->type != GT_MEDIA) {
+ g->regs = __oag_regs();
+ g->type = TYPE_OAG;
+ } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
+ g->regs = __oam_regs(mtl_oa_base[i]);
+ g->type = TYPE_OAM;
+ }
+ }
+}
+
+static int oa_init_gt(struct intel_gt *gt)
+{
+ u32 num_groups = num_perf_groups_per_gt(gt);
+ struct intel_engine_cs *engine;
+ struct i915_perf_group *g;
+ intel_engine_mask_t tmp;
+
+ g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
+ if (!g)
+ return -ENOMEM;
+
+ for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
+ u32 index = __oa_engine_group(engine);
+
+ engine->oa_group = NULL;
+ if (index < num_groups) {
+ g[index].num_engines++;
+ engine->oa_group = &g[index];
+ }
+ }
+
+ gt->perf.num_perf_groups = num_groups;
+ gt->perf.group = g;
+
+ oa_init_groups(gt);
+
+ return 0;
+}
+
+static int oa_init_engine_groups(struct i915_perf *perf)
+{
+ struct intel_gt *gt;
+ int i, ret;
+
+ for_each_gt(gt, perf->i915, i) {
+ ret = oa_init_gt(gt);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static void oa_init_supported_formats(struct i915_perf *perf)
{
struct drm_i915_private *i915 = perf->i915;
@@ -4411,11 +4985,55 @@ static void oa_init_supported_formats(struct i915_perf *perf)
oa_format_add(perf, I915_OA_FORMAT_C4_B8);
break;
+ case INTEL_DG2:
+ oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
+ oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
+ break;
+
+ case INTEL_METEORLAKE:
+ oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
+ oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
+ oa_format_add(perf, I915_OAM_FORMAT_MPEC8u64_B8_C8);
+ oa_format_add(perf, I915_OAM_FORMAT_MPEC8u32_B8_C8);
+ break;
+
default:
MISSING_CASE(platform);
}
}
+static void i915_perf_init_info(struct drm_i915_private *i915)
+{
+ struct i915_perf *perf = &i915->perf;
+
+ switch (GRAPHICS_VER(i915)) {
+ case 8:
+ perf->ctx_oactxctrl_offset = 0x120;
+ perf->ctx_flexeu0_offset = 0x2ce;
+ perf->gen8_valid_ctx_bit = BIT(25);
+ break;
+ case 9:
+ perf->ctx_oactxctrl_offset = 0x128;
+ perf->ctx_flexeu0_offset = 0x3de;
+ perf->gen8_valid_ctx_bit = BIT(16);
+ break;
+ case 11:
+ perf->ctx_oactxctrl_offset = 0x124;
+ perf->ctx_flexeu0_offset = 0x78e;
+ perf->gen8_valid_ctx_bit = BIT(16);
+ break;
+ case 12:
+ perf->gen8_valid_ctx_bit = BIT(16);
+ /*
+ * Calculate offset at runtime in oa_pin_context for gen12 and
+ * cache the value in perf->ctx_oactxctrl_offset.
+ */
+ break;
+ default:
+ MISSING_CASE(GRAPHICS_VER(i915));
+ }
+}
+
/**
* i915_perf_init - initialize i915-perf state on module bind
* @i915: i915 device instance
@@ -4425,16 +5043,10 @@ static void oa_init_supported_formats(struct i915_perf *perf)
* Note: i915-perf initialization is split into an 'init' and 'register'
* phase with the i915_perf_register() exposing state to userspace.
*/
-void i915_perf_init(struct drm_i915_private *i915)
+int i915_perf_init(struct drm_i915_private *i915)
{
struct i915_perf *perf = &i915->perf;
- /* XXX const struct i915_perf_ops! */
-
- /* i915_perf is not enabled for DG2 yet */
- if (IS_DG2(i915))
- return;
-
perf->oa_formats = oa_formats;
if (IS_HASWELL(i915)) {
perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
@@ -4454,6 +5066,7 @@ void i915_perf_init(struct drm_i915_private *i915)
* execlist mode by default.
*/
perf->ops.read = gen8_oa_read;
+ i915_perf_init_info(i915);
if (IS_GRAPHICS_VER(i915, 8, 9)) {
perf->ops.is_valid_b_counter_reg =
@@ -4473,18 +5086,6 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->ops.enable_metric_set = gen8_enable_metric_set;
perf->ops.disable_metric_set = gen8_disable_metric_set;
perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
-
- if (GRAPHICS_VER(i915) == 8) {
- perf->ctx_oactxctrl_offset = 0x120;
- perf->ctx_flexeu0_offset = 0x2ce;
-
- perf->gen8_valid_ctx_bit = BIT(25);
- } else {
- perf->ctx_oactxctrl_offset = 0x128;
- perf->ctx_flexeu0_offset = 0x3de;
-
- perf->gen8_valid_ctx_bit = BIT(16);
- }
} else if (GRAPHICS_VER(i915) == 11) {
perf->ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
@@ -4498,13 +5099,10 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->ops.enable_metric_set = gen8_enable_metric_set;
perf->ops.disable_metric_set = gen11_disable_metric_set;
perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
-
- perf->ctx_oactxctrl_offset = 0x124;
- perf->ctx_flexeu0_offset = 0x78e;
-
- perf->gen8_valid_ctx_bit = BIT(16);
} else if (GRAPHICS_VER(i915) == 12) {
perf->ops.is_valid_b_counter_reg =
+ HAS_OA_SLICE_CONTRIB_LIMITS(i915) ?
+ xehp_is_valid_b_counter_addr :
gen12_is_valid_b_counter_addr;
perf->ops.is_valid_mux_reg =
gen12_is_valid_mux_addr;
@@ -4516,14 +5114,15 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->ops.enable_metric_set = gen12_enable_metric_set;
perf->ops.disable_metric_set = gen12_disable_metric_set;
perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
-
- perf->ctx_flexeu0_offset = 0;
- perf->ctx_oactxctrl_offset = 0x144;
}
}
if (perf->ops.enable_metric_set) {
- mutex_init(&perf->lock);
+ struct intel_gt *gt;
+ int i, ret;
+
+ for_each_gt(gt, i915, i)
+ mutex_init(&gt->perf.lock);
/* Choose a representative limit */
oa_sample_rate_hard_limit = to_gt(i915)->clock_frequency / 2;
@@ -4559,8 +5158,17 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->i915 = i915;
+ ret = oa_init_engine_groups(perf);
+ if (ret) {
+ drm_err(&i915->drm,
+ "OA initialization failed %d\n", ret);
+ return ret;
+ }
+
oa_init_supported_formats(perf);
}
+
+ return 0;
}
static int destroy_config(int id, void *p, void *data)
@@ -4587,10 +5195,15 @@ void i915_perf_sysctl_unregister(void)
void i915_perf_fini(struct drm_i915_private *i915)
{
struct i915_perf *perf = &i915->perf;
+ struct intel_gt *gt;
+ int i;
if (!perf->i915)
return;
+ for_each_gt(gt, perf->i915, i)
+ kfree(gt->perf.group);
+
idr_for_each(&perf->metrics_idr, destroy_config, perf);
idr_destroy(&perf->metrics_idr);
@@ -4600,10 +5213,11 @@ void i915_perf_fini(struct drm_i915_private *i915)
/**
* i915_perf_ioctl_version - Version of the i915-perf subsystem
+ * @i915: The i915 device
*
* This version number is used by userspace to detect available features.
*/
-int i915_perf_ioctl_version(void)
+int i915_perf_ioctl_version(struct drm_i915_private *i915)
{
/*
* 1: Initial version
@@ -4624,8 +5238,23 @@ int i915_perf_ioctl_version(void)
*
* 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the
* interval for the hrtimer used to check for OA data.
+ *
+ * 6: Add DRM_I915_PERF_PROP_OA_ENGINE_CLASS and
+ * DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE
+ *
+ * 7: Add support for video decode and enhancement classes.
*/
- return 5;
+
+ /*
+ * Wa_14017512683: mtl[a0..c0): Use of OAM must be preceded with Media
+ * C6 disable in BIOS. If Media C6 is enabled in BIOS, return version 6
+ * to indicate that OA media is not supported.
+ */
+ if (IS_MEDIA_GT_IP_STEP(i915->media_gt, IP_VER(13, 0), STEP_A0, STEP_C0) &&
+ intel_check_bios_c6_setup(&i915->media_gt->rc6))
+ return 6;
+
+ return 7;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)