diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_oa.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_oa.c | 224 |
1 files changed, 165 insertions, 59 deletions
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index fb842fa0552e..d991fbd90f20 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -43,6 +43,12 @@ #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX +enum xe_oam_unit_type { + XE_OAM_UNIT_SAG, + XE_OAM_UNIT_SCMI_0, + XE_OAM_UNIT_SCMI_1, +}; + enum xe_oa_submit_deps { XE_OA_SUBMIT_NO_DEPS, XE_OA_SUBMIT_ADD_DEPS, @@ -77,7 +83,7 @@ struct xe_oa_config { struct xe_oa_open_param { struct xe_file *xef; - u32 oa_unit_id; + struct xe_oa_unit *oa_unit; bool sample; u32 metric_set; enum xe_oa_format_name oa_format; @@ -194,7 +200,7 @@ static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *l static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) { - return &stream->hwe->oa_unit->regs; + return &stream->oa_unit->regs; } static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) @@ -397,7 +403,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - int size_exponent = __ffs(stream->oa_buffer.bo->size); + int size_exponent = __ffs(xe_bo_size(stream->oa_buffer.bo)); u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; struct xe_mmio *mmio = &stream->gt->mmio; unsigned long flags; @@ -429,7 +435,7 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ - memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); + memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo)); } static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) @@ -454,7 +460,7 @@ static u32 __oa_ccs_select(struct xe_oa_stream *stream) static u32 __oactrl_used_bits(struct xe_oa_stream *stream) { - return stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? + return stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS; } @@ -475,7 +481,7 @@ static void xe_oa_enable(struct xe_oa_stream *stream) __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; if (GRAPHICS_VER(stream->oa->xe) >= 20 && - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) val |= OAG_OACONTROL_OA_PES_DISAG_EN; xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val); @@ -838,11 +844,16 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) /* Reset PMON Enable to save power. */ xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0); + + if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM || + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) && + GRAPHICS_VER(stream->oa->xe) >= 30) + xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, OAM_LAT_MEASURE_ENABLE, 0); } static void xe_oa_stream_destroy(struct xe_oa_stream *stream) { - struct xe_oa_unit *u = stream->hwe->oa_unit; + struct xe_oa_unit *u = stream->oa_unit; struct xe_gt *gt = stream->hwe->gt; if (WARN_ON(stream != u->exclusive_stream)) @@ -1054,7 +1065,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) static u32 oag_buf_size_select(const struct xe_oa_stream *stream) { return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, - stream->oa_buffer.bo->size > SZ_16M ? + xe_bo_size(stream->oa_buffer.bo) > SZ_16M ? OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); } @@ -1105,9 +1116,13 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) */ sqcnt1 = SQCNT1_PMON_ENABLE | (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); - xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1); + if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM || + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) && + GRAPHICS_VER(stream->oa->xe) >= 30) + xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, 0, OAM_LAT_MEASURE_ENABLE); + /* Configure OAR/OAC */ if (stream->exec_q) { ret = xe_oa_configure_oa_context(stream, true); @@ -1139,14 +1154,31 @@ static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *n return -EINVAL; } +static struct xe_oa_unit *xe_oa_lookup_oa_unit(struct xe_oa *oa, u32 oa_unit_id) +{ + struct xe_gt *gt; + int gt_id, i; + + for_each_gt(gt, oa->xe, gt_id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + struct xe_oa_unit *u = >->oa.oa_unit[i]; + + if (u->oa_unit_id == oa_unit_id) + return u; + } + } + + return NULL; +} + static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param) { - if (value >= oa->oa_unit_ids) { + param->oa_unit = xe_oa_lookup_oa_unit(oa, value); + if (!param->oa_unit) { drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); return -EINVAL; } - param->oa_unit_id = value; return 0; } @@ -1550,7 +1582,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) { - struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; + struct drm_xe_oa_stream_info info = { .oa_buf_size = xe_bo_size(stream->oa_buffer.bo), }; void __user *uaddr = (void __user *)arg; if (copy_to_user(uaddr, &info, sizeof(info))) @@ -1636,7 +1668,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { + if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) { drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL; } @@ -1677,13 +1709,13 @@ static const struct file_operations xe_oa_fops = { static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { - struct xe_oa_unit *u = param->hwe->oa_unit; struct xe_gt *gt = param->hwe->gt; unsigned int fw_ref; int ret; stream->exec_q = param->exec_q; stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS; + stream->oa_unit = param->oa_unit; stream->hwe = param->hwe; stream->gt = stream->hwe->gt; stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; @@ -1704,7 +1736,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, * buffer whose size, circ_size, is a multiple of the report size */ if (GRAPHICS_VER(stream->oa->xe) >= 20 && - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) + stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) stream->oa_buffer.circ_size = param->oa_buffer_size - param->oa_buffer_size % stream->oa_buffer.format->size; @@ -1762,7 +1794,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n", stream->oa_config->uuid); - WRITE_ONCE(u->exclusive_stream, stream); + WRITE_ONCE(stream->oa_unit->exclusive_stream, stream); hrtimer_setup(&stream->poll_check_timer, xe_oa_poll_check_timer_cb, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -1798,7 +1830,7 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, int ret; /* We currently only allow exclusive access */ - if (param->hwe->oa_unit->exclusive_stream) { + if (param->oa_unit->exclusive_stream) { drm_dbg(&oa->xe->drm, "OA unit already in use\n"); ret = -EBUSY; goto exit; @@ -1874,13 +1906,14 @@ static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) return div_u64(nom + den - 1, den); } -static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) +static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type) { - switch (hwe->oa_unit->type) { + switch (param->oa_unit->type) { case DRM_XE_OA_UNIT_TYPE_OAG: return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; case DRM_XE_OA_UNIT_TYPE_OAM: + case DRM_XE_OA_UNIT_TYPE_OAM_SAG: return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; default: return false; @@ -1899,37 +1932,48 @@ u16 xe_oa_unit_id(struct xe_hw_engine *hwe) hwe->oa_unit->oa_unit_id : U16_MAX; } +/* A hwe must be assigned to stream/oa_unit for batch submissions */ static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) { - struct xe_gt *gt; - int i, ret = 0; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + int ret = 0; + + /* If not provided, OA unit defaults to OA unit 0 as per uapi */ + if (!param->oa_unit) + param->oa_unit = &xe_device_get_gt(oa->xe, 0)->oa.oa_unit[0]; + /* When we have an exec_q, get hwe from the exec_q */ if (param->exec_q) { - /* When we have an exec_q, get hwe from the exec_q */ param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, param->engine_instance, true); - } else { - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - /* Else just get the first hwe attached to the oa unit */ - for_each_gt(gt, oa->xe, i) { - for_each_hw_engine(hwe, gt, id) { - if (xe_oa_unit_id(hwe) == param->oa_unit_id) { - param->hwe = hwe; - goto out; - } - } - } + if (!param->hwe || param->hwe->oa_unit != param->oa_unit) + goto err; + goto out; } -out: - if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { - drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", - param->exec_q ? param->exec_q->class : -1, - param->engine_instance, param->oa_unit_id); - ret = -EINVAL; + + /* Else just get the first hwe attached to the oa unit */ + for_each_hw_engine(hwe, param->oa_unit->gt, id) { + if (hwe->oa_unit == param->oa_unit) { + param->hwe = hwe; + goto out; + } } + /* If we still didn't find a hwe, just get one with a valid oa_unit from the same gt */ + for_each_hw_engine(hwe, param->oa_unit->gt, id) { + if (!hwe->oa_unit) + continue; + + param->hwe = hwe; + goto out; + } +err: + drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", + param->exec_q ? param->exec_q->class : -1, + param->engine_instance, param->oa_unit->oa_unit_id); + ret = -EINVAL; +out: return ret; } @@ -2007,7 +2051,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f f = &oa->oa_formats[param.oa_format]; if (!param.oa_format || !f->size || - !engine_supports_oa_format(param.hwe, f->type)) { + !oa_unit_supports_oa_format(¶m, f->type)) { drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", param.oa_format, f->type, f->size, param.hwe->class); ret = -EINVAL; @@ -2155,6 +2199,7 @@ static const struct xe_mmio_range gen12_oa_mux_regs[] = { static const struct xe_mmio_range xe2_oa_mux_regs[] = { { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ + { .start = 0xB01C, .end = 0xB01C }, /* LNCF_MISC_CONFIG_REGISTER0 */ { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ @@ -2448,20 +2493,38 @@ int xe_oa_register(struct xe_device *xe) static u32 num_oa_units_per_gt(struct xe_gt *gt) { - return 1; + if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) + return 1; + else if (!IS_DGFX(gt_to_xe(gt))) + return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ + else + return XE_OAM_UNIT_SCMI_1 + 1; /* SAG + SCMI_0 + SCMI_1 */ } static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) { - if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { - /* - * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices - * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA - */ - xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); + if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) < 1270) + return XE_OA_UNIT_INVALID; + + xe_gt_WARN_ON(hwe->gt, xe_gt_is_main_type(hwe->gt)); + if (GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20) return 0; - } + /* + * XE_OAM_UNIT_SAG has only GSCCS attached to it, but only on some platforms. Also + * GSCCS cannot be used to submit batches to program the OAM unit. Therefore we don't + * assign an OA unit to GSCCS. This means that XE_OAM_UNIT_SAG is exposed as an OA + * unit without attached engines. Fused off engines can also result in oa_unit's with + * num_engines == 0. OA streams can be opened on all OA units. + */ + else if (hwe->engine_id == XE_HW_ENGINE_GSCCS0) + return XE_OA_UNIT_INVALID; + else if (!IS_DGFX(gt_to_xe(hwe->gt))) + return XE_OAM_UNIT_SCMI_0; + else if (hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE) + return (hwe->instance / 2 & 0x1) + 1; + else if (hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE) + return (hwe->instance & 0x1) + 1; return XE_OA_UNIT_INVALID; } @@ -2475,6 +2538,7 @@ static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) case XE_ENGINE_CLASS_VIDEO_DECODE: case XE_ENGINE_CLASS_VIDEO_ENHANCE: + case XE_ENGINE_CLASS_OTHER: return __hwe_oam_unit(hwe); default: @@ -2514,20 +2578,29 @@ static struct xe_oa_regs __oag_regs(void) static void __xe_oa_init_oa_units(struct xe_gt *gt) { - const u32 mtl_oa_base[] = { 0x13000 }; + /* Actual address is MEDIA_GT_GSI_OFFSET + oam_base_addr[i] */ + const u32 oam_base_addr[] = { + [XE_OAM_UNIT_SAG] = 0x13000, + [XE_OAM_UNIT_SCMI_0] = 0x14000, + [XE_OAM_UNIT_SCMI_1] = 0x14800, + }; int i, num_units = gt->oa.num_oa_units; for (i = 0; i < num_units; i++) { struct xe_oa_unit *u = >->oa.oa_unit[i]; - if (gt->info.type != XE_GT_TYPE_MEDIA) { + if (xe_gt_is_main_type(gt)) { u->regs = __oag_regs(); u->type = DRM_XE_OA_UNIT_TYPE_OAG; - } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { - u->regs = __oam_regs(mtl_oa_base[i]); - u->type = DRM_XE_OA_UNIT_TYPE_OAM; + } else { + xe_gt_assert(gt, GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270); + u->regs = __oam_regs(oam_base_addr[i]); + u->type = i == XE_OAM_UNIT_SAG && GRAPHICS_VER(gt_to_xe(gt)) >= 20 ? + DRM_XE_OA_UNIT_TYPE_OAM_SAG : DRM_XE_OA_UNIT_TYPE_OAM; } + u->gt = gt; + xe_mmio_write32(>->mmio, u->regs.oa_ctrl, 0); /* Ensure MMIO trigger remains disabled till there is a stream */ @@ -2560,10 +2633,6 @@ static int xe_oa_init_gt(struct xe_gt *gt) } } - /* - * Fused off engines can result in oa_unit's with num_engines == 0. These units - * will appear in OA unit query, but no OA streams can be opened on them. - */ gt->oa.num_oa_units = num_oa_units; gt->oa.oa_unit = u; @@ -2574,17 +2643,54 @@ static int xe_oa_init_gt(struct xe_gt *gt) return 0; } +static void xe_oa_print_gt_oa_units(struct xe_gt *gt) +{ + enum xe_hw_engine_id hwe_id; + struct xe_hw_engine *hwe; + struct xe_oa_unit *u; + char buf[256]; + int i, n; + + for (i = 0; i < gt->oa.num_oa_units; i++) { + u = >->oa.oa_unit[i]; + buf[0] = '\0'; + n = 0; + + for_each_hw_engine(hwe, gt, hwe_id) + if (xe_oa_unit_id(hwe) == u->oa_unit_id) + n += scnprintf(buf + n, sizeof(buf) - n, "%s ", hwe->name); + + xe_gt_dbg(gt, "oa_unit %d, type %d, Engines: %s\n", u->oa_unit_id, u->type, buf); + } +} + +static void xe_oa_print_oa_units(struct xe_oa *oa) +{ + struct xe_gt *gt; + int gt_id; + + for_each_gt(gt, oa->xe, gt_id) + xe_oa_print_gt_oa_units(gt); +} + static int xe_oa_init_oa_units(struct xe_oa *oa) { struct xe_gt *gt; int i, ret; + /* Needed for OAM implementation here */ + BUILD_BUG_ON(XE_OAM_UNIT_SAG != 0); + BUILD_BUG_ON(XE_OAM_UNIT_SCMI_0 != 1); + BUILD_BUG_ON(XE_OAM_UNIT_SCMI_1 != 2); + for_each_gt(gt, oa->xe, i) { ret = xe_oa_init_gt(gt); if (ret) return ret; } + xe_oa_print_oa_units(oa); + return 0; } |