summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r--drivers/gpu/drm/xe/Kconfig11
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c2
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c11
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c5
-rw-r--r--drivers/gpu/drm/xe/regs/xe_mchbar_regs.h11
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pcode_regs.h4
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c48
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c38
-rw-r--r--drivers/gpu/drm/xe/xe_device.c72
-rw-r--r--drivers/gpu/drm/xe/xe_device_sysfs.c2
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_drv.h2
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c15
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c11
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.h10
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c15
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c19
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c27
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c8
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c17
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c286
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c24
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c380
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c11
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c72
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c26
-rw-r--r--drivers/gpu/drm/xe/xe_module.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c6
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.c11
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.h3
-rw-r--r--drivers/gpu/drm/xe/xe_pcode_api.h7
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c11
-rw-r--r--drivers/gpu/drm/xe/xe_pxp.c8
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c22
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c2
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c6
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c27
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h69
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules11
47 files changed, 977 insertions, 370 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 9bce047901b2..99a91355842e 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -1,7 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
config DRM_XE
- tristate "Intel Xe Graphics"
- depends on DRM && PCI && MMU && (m || (y && KUNIT=y))
+ tristate "Intel Xe2 Graphics"
+ depends on DRM && PCI
+ depends on KUNIT || !KUNIT
+ depends on INTEL_VSEC || !INTEL_VSEC
+ depends on X86_PLATFORM_DEVICES || !(X86 && ACPI)
select INTERVAL_TREE
# we need shmfs for the swappable backing store, and in particular
# the shmem_readpage() which depends upon tmpfs
@@ -27,7 +30,6 @@ config DRM_XE
select BACKLIGHT_CLASS_DEVICE if ACPI
select INPUT if ACPI
select ACPI_VIDEO if X86 && ACPI
- select X86_PLATFORM_DEVICES if X86 && ACPI
select ACPI_WMI if X86 && ACPI
select SYNC_FILE
select IOSF_MBI
@@ -45,7 +47,8 @@ config DRM_XE
select AUXILIARY_BUS
select HMM_MIRROR
help
- Experimental driver for Intel Xe series GPUs
+ Driver for Intel Xe2 series GPUs and later. Experimental support
+ for Xe series is also available.
If "M" is selected, the module will be called xe.
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 68f064f33d4b..9f4ade25787a 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -104,6 +104,8 @@ int xe_display_create(struct xe_device *xe)
spin_lock_init(&xe->display.fb_tracking.lock);
xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
+ if (!xe->display.hotplug.dp_wq)
+ return -ENOMEM;
return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
}
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index f95375451e2f..9f941fc2e36b 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
{
- struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
-
iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
- xe_device_l2_flush(xe);
}
u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
@@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
{
- struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
-
WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
- xe_device_l2_flush(xe);
}
bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
@@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
{
+ struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
+
/*
* The memory barrier here is to ensure coherency of DSB vs MMIO,
* both for weak ordering archs and discrete cards.
*/
- xe_device_wmb(dsb_buf->vma->bo->tile->xe);
+ xe_device_wmb(xe);
+ xe_device_l2_flush(xe);
}
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index d918ae1c8061..55259969480b 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -164,6 +164,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
vma->dpt = dpt;
vma->node = dpt->ggtt_node[tile0->id];
+
+ /* Ensure DPT writes are flushed */
+ xe_device_l2_flush(xe);
return 0;
}
@@ -333,8 +336,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
if (ret)
goto err_unpin;
- /* Ensure DPT writes are flushed */
- xe_device_l2_flush(xe);
return vma;
err_unpin:
diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
index f5e5234857c1..ef2bf984723f 100644
--- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
@@ -38,10 +38,11 @@
#define TEMP_MASK REG_GENMASK(7, 0)
#define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
-#define PKG_PWR_LIM_1 REG_GENMASK(14, 0)
-#define PKG_PWR_LIM_1_EN REG_BIT(15)
-#define PKG_PWR_LIM_1_TIME REG_GENMASK(23, 17)
-#define PKG_PWR_LIM_1_TIME_X REG_GENMASK(23, 22)
-#define PKG_PWR_LIM_1_TIME_Y REG_GENMASK(21, 17)
+#define PWR_LIM_VAL REG_GENMASK(14, 0)
+#define PWR_LIM_EN REG_BIT(15)
+#define PWR_LIM REG_GENMASK(15, 0)
+#define PWR_LIM_TIME REG_GENMASK(23, 17)
+#define PWR_LIM_TIME_X REG_GENMASK(23, 22)
+#define PWR_LIM_TIME_Y REG_GENMASK(21, 17)
#endif /* _XE_MCHBAR_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
index c7d5d782e3f9..c556a04670ee 100644
--- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
@@ -18,16 +18,12 @@
#define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c)
#define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080)
-#define BMG_PACKAGE_POWER_SKU XE_REG(0x138098)
-#define BMG_PACKAGE_POWER_SKU_UNIT XE_REG(0x1380dc)
#define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120)
#define BMG_FAN_1_SPEED XE_REG(0x138140)
#define BMG_FAN_2_SPEED XE_REG(0x138170)
#define BMG_FAN_3_SPEED XE_REG(0x1381a0)
#define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0)
#define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434)
-#define BMG_PACKAGE_RAPL_LIMIT XE_REG(0x138440)
#define BMG_PLATFORM_ENERGY_STATUS XE_REG(0x138458)
-#define BMG_PLATFORM_POWER_LIMIT XE_REG(0x138460)
#endif /* _XE_PCODE_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index d99d91fe8aa9..7aa2c17825da 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -841,21 +841,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
goto out;
}
- /* Reject BO eviction if BO is bound to current VM. */
- if (evict && ctx->resv) {
- struct drm_gpuvm_bo *vm_bo;
-
- drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) {
- struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
-
- if (xe_vm_resv(vm) == ctx->resv &&
- xe_vm_in_preempt_fence_mode(vm)) {
- ret = -EBUSY;
- goto out;
- }
- }
- }
-
/*
* Failed multi-hop where the old_mem is still marked as
* TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
@@ -1013,6 +998,25 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
return lret;
}
+static bool
+xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place)
+{
+ struct drm_gpuvm_bo *vm_bo;
+
+ if (!ttm_bo_eviction_valuable(bo, place))
+ return false;
+
+ if (!xe_bo_is_xe_bo(bo))
+ return true;
+
+ drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
+ if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm)))
+ return false;
+ }
+
+ return true;
+}
+
/**
* xe_bo_shrink() - Try to shrink an xe bo.
* @ctx: The struct ttm_operation_ctx used for shrinking.
@@ -1047,7 +1051,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
(flags.purge && !xe_tt->purgeable))
return -EBUSY;
- if (!ttm_bo_eviction_valuable(bo, &place))
+ if (!xe_bo_eviction_valuable(bo, &place))
return -EBUSY;
if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
@@ -1588,7 +1592,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
.io_mem_pfn = xe_ttm_io_mem_pfn,
.access_memory = xe_ttm_access_memory,
.release_notify = xe_ttm_bo_release_notify,
- .eviction_valuable = ttm_bo_eviction_valuable,
+ .eviction_valuable = xe_bo_eviction_valuable,
.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
.swap_notify = xe_ttm_bo_swap_notify,
};
@@ -2431,6 +2435,8 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
.no_wait_gpu = false,
.gfp_retry_mayfail = true,
};
+ struct pin_cookie cookie;
+ int ret;
if (vm) {
lockdep_assert_held(&vm->lock);
@@ -2440,8 +2446,12 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
ctx.resv = xe_vm_resv(vm);
}
+ cookie = xe_vm_set_validating(vm, allow_res_evict);
trace_xe_bo_validate(bo);
- return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
+ ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
+ xe_vm_clear_validating(vm, allow_res_evict, cookie);
+
+ return ret;
}
bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
@@ -2557,7 +2567,7 @@ typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe,
u64 value);
static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = {
- [DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_set_pxp_type,
+ [DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type,
};
static int gem_create_user_ext_set_property(struct xe_device *xe,
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 7a8af2311318..11e60d687572 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -171,14 +171,32 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G)
+/**
+ * xe_devcoredump_read() - Read data from the Xe device coredump snapshot
+ * @buffer: Destination buffer to copy the coredump data into
+ * @offset: Offset in the coredump data to start reading from
+ * @count: Number of bytes to read
+ * @data: Pointer to the xe_devcoredump structure
+ * @datalen: Length of the data (unused)
+ *
+ * Reads a chunk of the coredump snapshot data into the provided buffer.
+ * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX),
+ * it is read directly from a pre-written buffer. For larger devcoredumps,
+ * the pre-written buffer must be periodically repopulated from the snapshot
+ * state due to kmalloc size limitations.
+ *
+ * Return: Number of bytes copied on success, or a negative error code on failure.
+ */
static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen)
{
struct xe_devcoredump *coredump = data;
struct xe_devcoredump_snapshot *ss;
- ssize_t byte_copied;
+ ssize_t byte_copied = 0;
u32 chunk_offset;
ssize_t new_chunk_position;
+ bool pm_needed = false;
+ int ret = 0;
if (!coredump)
return -ENODEV;
@@ -188,20 +206,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
/* Ensure delayed work is captured before continuing */
flush_work(&ss->work);
- if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
+ pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX;
+ if (pm_needed)
xe_pm_runtime_get(gt_to_xe(ss->gt));
mutex_lock(&coredump->lock);
if (!ss->read.buffer) {
- mutex_unlock(&coredump->lock);
- return -ENODEV;
+ ret = -ENODEV;
+ goto unlock;
}
- if (offset >= ss->read.size) {
- mutex_unlock(&coredump->lock);
- return 0;
- }
+ if (offset >= ss->read.size)
+ goto unlock;
new_chunk_position = div_u64_rem(offset,
XE_DEVCOREDUMP_CHUNK_MAX,
@@ -221,12 +238,13 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
ss->read.size - offset;
memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied);
+unlock:
mutex_unlock(&coredump->lock);
- if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
+ if (pm_needed)
xe_pm_runtime_put(gt_to_xe(ss->gt));
- return byte_copied;
+ return byte_copied ? byte_copied : ret;
}
static void xe_devcoredump_free(void *data)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index c02c4c4e9412..e9f3c1a53db2 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -40,6 +40,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
+#include "xe_guc_pc.h"
#include "xe_hw_engine_group.h"
#include "xe_hwmon.h"
#include "xe_irq.h"
@@ -986,38 +987,15 @@ void xe_device_wmb(struct xe_device *xe)
xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
}
-/**
- * xe_device_td_flush() - Flush transient L3 cache entries
- * @xe: The device
- *
- * Display engine has direct access to memory and is never coherent with L3/L4
- * caches (or CPU caches), however KMD is responsible for specifically flushing
- * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
- * can happen from such a surface without seeing corruption.
- *
- * Display surfaces can be tagged as transient by mapping it using one of the
- * various L3:XD PAT index modes on Xe2.
- *
- * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
- * at the end of each submission via PIPE_CONTROL for compute/render, since SA
- * Media is not coherent with L3 and we want to support render-vs-media
- * usescases. For other engines like copy/blt the HW internally forces uncached
- * behaviour, hence why we can skip the TDF on such platforms.
+/*
+ * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt.
*/
-void xe_device_td_flush(struct xe_device *xe)
+static void tdf_request_sync(struct xe_device *xe)
{
- struct xe_gt *gt;
unsigned int fw_ref;
+ struct xe_gt *gt;
u8 id;
- if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
- return;
-
- if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
- xe_device_l2_flush(xe);
- return;
- }
-
for_each_gt(gt, xe, id) {
if (xe_gt_is_media_type(gt))
continue;
@@ -1027,6 +1005,7 @@ void xe_device_td_flush(struct xe_device *xe)
return;
xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+
/*
* FIXME: We can likely do better here with our choice of
* timeout. Currently we just assume the worst case, i.e. 150us,
@@ -1057,15 +1036,52 @@ void xe_device_l2_flush(struct xe_device *xe)
return;
spin_lock(&gt->global_invl_lock);
- xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
+ xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
xe_gt_err_once(gt, "Global invalidation timeout\n");
+
spin_unlock(&gt->global_invl_lock);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
+/**
+ * xe_device_td_flush() - Flush transient L3 cache entries
+ * @xe: The device
+ *
+ * Display engine has direct access to memory and is never coherent with L3/L4
+ * caches (or CPU caches), however KMD is responsible for specifically flushing
+ * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
+ * can happen from such a surface without seeing corruption.
+ *
+ * Display surfaces can be tagged as transient by mapping it using one of the
+ * various L3:XD PAT index modes on Xe2.
+ *
+ * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
+ * at the end of each submission via PIPE_CONTROL for compute/render, since SA
+ * Media is not coherent with L3 and we want to support render-vs-media
+ * usescases. For other engines like copy/blt the HW internally forces uncached
+ * behaviour, hence why we can skip the TDF on such platforms.
+ */
+void xe_device_td_flush(struct xe_device *xe)
+{
+ struct xe_gt *root_gt;
+
+ if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
+ return;
+
+ root_gt = xe_root_mmio_gt(xe);
+ if (XE_WA(root_gt, 16023588340)) {
+ /* A transient flush is not sufficient: flush the L2 */
+ xe_device_l2_flush(xe);
+ } else {
+ xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc);
+ tdf_request_sync(xe);
+ xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc);
+ }
+}
+
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
{
return xe_device_has_flat_ccs(xe) ?
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
index 2e657692e5b5..b9440f8c781e 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -115,7 +115,7 @@ auto_link_downgrade_capable_show(struct device *dev, struct device_attribute *at
xe_pm_runtime_put(xe);
cap = REG_FIELD_GET(LINK_DOWNGRADE, val);
- return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE ? true : false);
+ return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE);
}
static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_capable);
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index c8fa2c011666..6383a1c0d478 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -325,6 +325,10 @@ struct xe_device {
u8 has_heci_gscfi:1;
/** @info.has_llc: Device has a shared CPU+GPU last level cache */
u8 has_llc:1;
+ /** @info.has_mbx_power_limits: Device has support to manage power limits using
+ * pcode mailbox commands.
+ */
+ u8 has_mbx_power_limits:1;
/** @info.has_pxp: Device has PXP support */
u8 has_pxp:1;
/** @info.has_range_tlb_invalidation: Has range based TLB invalidations */
diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h
index d61650d4aa0b..95242a375e54 100644
--- a/drivers/gpu/drm/xe/xe_drv.h
+++ b/drivers/gpu/drm/xe/xe_drv.h
@@ -9,7 +9,7 @@
#include <drm/drm_drv.h>
#define DRIVER_NAME "xe"
-#define DRIVER_DESC "Intel Xe Graphics"
+#define DRIVER_DESC "Intel Xe2 Graphics"
/* Interface history:
*
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index ce78cee5dec6..fee22358cc09 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -114,7 +114,6 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
static int __xe_exec_queue_init(struct xe_exec_queue *q)
{
- struct xe_vm *vm = q->vm;
int i, err;
u32 flags = 0;
@@ -132,32 +131,20 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
flags |= XE_LRC_CREATE_RUNALONE;
}
- if (vm) {
- err = xe_vm_lock(vm, true);
- if (err)
- return err;
- }
-
for (i = 0; i < q->width; ++i) {
q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags);
if (IS_ERR(q->lrc[i])) {
err = PTR_ERR(q->lrc[i]);
- goto err_unlock;
+ goto err_lrc;
}
}
- if (vm)
- xe_vm_unlock(vm);
-
err = q->ops->init(q);
if (err)
goto err_lrc;
return 0;
-err_unlock:
- if (vm)
- xe_vm_unlock(vm);
err_lrc:
for (i = i - 1; i >= 0; --i)
xe_lrc_put(q->lrc[i]);
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 7062115909f2..2c799958c1e4 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -201,6 +201,13 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = {
.ggtt_set_pte = xe_ggtt_set_pte_and_flush,
};
+static void dev_fini_ggtt(void *arg)
+{
+ struct xe_ggtt *ggtt = arg;
+
+ drain_workqueue(ggtt->wq);
+}
+
/**
* xe_ggtt_init_early - Early GGTT initialization
* @ggtt: the &xe_ggtt to be initialized
@@ -257,6 +264,10 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
if (err)
return err;
+ err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt);
+ if (err)
+ return err;
+
if (IS_SRIOV_VF(xe)) {
err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0));
if (err)
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
index c250ea773491..308061f0cf37 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -51,7 +51,15 @@ static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched)
static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
{
- drm_sched_resubmit_jobs(&sched->base);
+ struct drm_sched_job *s_job;
+
+ list_for_each_entry(s_job, &sched->base.pending_list, list) {
+ struct drm_sched_fence *s_fence = s_job->s_fence;
+ struct dma_fence *hw_fence = s_fence->parent;
+
+ if (hw_fence && !dma_fence_is_signaled(hw_fence))
+ sched->base.ops->run_job(s_job);
+ }
}
static inline bool
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 0e5d243c9451..e3517ce2e18c 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -118,7 +118,7 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
}
- xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
+ xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
@@ -417,6 +417,8 @@ int xe_gt_init_early(struct xe_gt *gt)
if (err)
return err;
+ xe_mocs_init_early(gt);
+
return 0;
}
@@ -630,17 +632,15 @@ int xe_gt_init(struct xe_gt *gt)
if (err)
return err;
- err = xe_gt_pagefault_init(gt);
+ err = xe_gt_sysfs_init(gt);
if (err)
return err;
- xe_mocs_init_early(gt);
-
- err = xe_gt_sysfs_init(gt);
+ err = gt_fw_domain_init(gt);
if (err)
return err;
- err = gt_fw_domain_init(gt);
+ err = xe_gt_pagefault_init(gt);
if (err)
return err;
@@ -839,6 +839,9 @@ static int gt_reset(struct xe_gt *gt)
goto err_out;
}
+ if (IS_SRIOV_PF(gt_to_xe(gt)))
+ xe_gt_sriov_pf_stop_prepare(gt);
+
xe_uc_gucrc_disable(&gt->uc);
xe_uc_stop_prepare(&gt->uc);
xe_gt_pagefault_reset(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 868a5d2c1a52..60d9354e7dbf 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -32,13 +32,18 @@
* Xe's Freq provides a sysfs API for frequency management:
*
* device/tile#/gt#/freq0/<item>_freq *read-only* files:
+ *
* - act_freq: The actual resolved frequency decided by PCODE.
* - cur_freq: The current one requested by GuC PC to the PCODE.
* - rpn_freq: The Render Performance (RP) N level, which is the minimal one.
+ * - rpa_freq: The Render Performance (RP) A level, which is the achiveable one.
+ * Calculated by PCODE at runtime based on multiple running conditions
* - rpe_freq: The Render Performance (RP) E level, which is the efficient one.
+ * Calculated by PCODE at runtime based on multiple running conditions
* - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one.
*
* device/tile#/gt#/freq0/<item>_freq *read-write* files:
+ *
* - min_freq: Min frequency request.
* - max_freq: Max frequency request.
* If max <= min, then freq_min becomes a fixed frequency request.
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 10622ca471a2..6717a636b1d9 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -444,6 +444,7 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
#define PF_MULTIPLIER 8
pf_queue->num_dw =
(num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER;
+ pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw);
#undef PF_MULTIPLIER
pf_queue->gt = gt;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index c08efca6420e..35489fa81825 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -172,6 +172,25 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid)
pf_clear_vf_scratch_regs(gt, vfid);
}
+static void pf_cancel_restart(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ if (cancel_work_sync(&gt->sriov.pf.workers.restart))
+ xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n");
+}
+
+/**
+ * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support.
+ * @gt: the &xe_gt
+ *
+ * This function can only be called on the PF.
+ */
+void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt)
+{
+ pf_cancel_restart(gt);
+}
+
static void pf_restart(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index f474509411c0..e2b2ff8132dc 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -13,6 +13,7 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
int xe_gt_sriov_pf_init(struct xe_gt *gt);
void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid);
+void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt);
void xe_gt_sriov_pf_restart(struct xe_gt *gt);
#else
static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
@@ -29,6 +30,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
{
}
+static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt)
+{
+}
+
static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt)
{
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 2420a548cacc..53a44702c04a 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -2364,6 +2364,21 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
return err;
}
+static int pf_push_self_config(struct xe_gt *gt)
+{
+ int err;
+
+ err = pf_push_full_vf_config(gt, PFID);
+ if (err) {
+ xe_gt_sriov_err(gt, "Failed to push self configuration (%pe)\n",
+ ERR_PTR(err));
+ return err;
+ }
+
+ xe_gt_sriov_dbg_verbose(gt, "self configuration completed\n");
+ return 0;
+}
+
static void fini_config(void *arg)
{
struct xe_gt *gt = arg;
@@ -2387,9 +2402,17 @@ static void fini_config(void *arg)
int xe_gt_sriov_pf_config_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ int err;
xe_gt_assert(gt, IS_SRIOV_PF(xe));
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ err = pf_push_self_config(gt);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (err)
+ return err;
+
return devm_add_action_or_reset(xe->drm.dev, fini_config, gt);
}
@@ -2407,6 +2430,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt)
unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
unsigned int fail = 0, skip = 0;
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ pf_push_self_config(gt);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
for (n = 1; n <= total_vfs; n++) {
if (xe_gt_sriov_pf_config_is_empty(gt, n))
skip++;
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index 084cbdeba8ea..e1362e608146 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -138,6 +138,14 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
int pending_seqno;
/*
+ * we can get here before the CTs are even initialized if we're wedging
+ * very early, in which case there are not going to be any pending
+ * fences so we can bail immediately.
+ */
+ if (!xe_guc_ct_initialized(&gt->uc.guc.ct))
+ return;
+
+ /*
* CT channel is already disabled at this point. No new TLB requests can
* appear.
*/
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 2447de0ebedf..bbcbb348256f 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -34,6 +34,11 @@
#include "xe_pm.h"
#include "xe_trace_guc.h"
+static void receive_g2h(struct xe_guc_ct *ct);
+static void g2h_worker_func(struct work_struct *w);
+static void safe_mode_worker_func(struct work_struct *w);
+static void ct_exit_safe_mode(struct xe_guc_ct *ct);
+
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
enum {
/* Internal states, not error conditions */
@@ -186,14 +191,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg)
{
struct xe_guc_ct *ct = arg;
+ ct_exit_safe_mode(ct);
destroy_workqueue(ct->g2h_wq);
xa_destroy(&ct->fence_lookup);
}
-static void receive_g2h(struct xe_guc_ct *ct);
-static void g2h_worker_func(struct work_struct *w);
-static void safe_mode_worker_func(struct work_struct *w);
-
static void primelockdep(struct xe_guc_ct *ct)
{
if (!IS_ENABLED(CONFIG_LOCKDEP))
@@ -514,6 +516,9 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct)
*/
void xe_guc_ct_stop(struct xe_guc_ct *ct)
{
+ if (!xe_guc_ct_initialized(ct))
+ return;
+
xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED);
stop_g2h_handler(ct);
}
@@ -760,7 +765,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
u16 seqno;
int ret;
- xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, xe_guc_ct_initialized(ct));
xe_gt_assert(gt, !g2h_len || !g2h_fence);
xe_gt_assert(gt, !num_g2h || !g2h_fence);
xe_gt_assert(gt, !g2h_len || num_g2h);
@@ -1344,7 +1349,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
u32 action;
u32 *hxg;
- xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, xe_guc_ct_initialized(ct));
lockdep_assert_held(&ct->fast_lock);
if (ct->state == XE_GUC_CT_STATE_DISABLED)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 82c4ae458dda..582aac106469 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -22,6 +22,11 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_pr
void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb);
+static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct)
+{
+ return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED;
+}
+
static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
{
return ct->state == XE_GUC_CT_STATE_ENABLED;
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 18c623992035..c0ca61695d76 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -5,8 +5,11 @@
#include "xe_guc_pc.h"
+#include <linux/cleanup.h>
#include <linux/delay.h>
+#include <linux/jiffies.h>
#include <linux/ktime.h>
+#include <linux/wait_bit.h>
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
@@ -51,9 +54,12 @@
#define LNL_MERT_FREQ_CAP 800
#define BMG_MERT_FREQ_CAP 2133
+#define BMG_MIN_FREQ 1200
+#define BMG_MERT_FLUSH_FREQ_CAP 2600
#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
+#define SLPC_ACT_FREQ_TIMEOUT_MS 100
/**
* DOC: GuC Power Conservation (PC)
@@ -141,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc,
return -ETIMEDOUT;
}
+static int wait_for_flush_complete(struct xe_guc_pc *pc)
+{
+ const unsigned long timeout = msecs_to_jiffies(30);
+
+ if (!wait_var_event_timeout(&pc->flush_freq_limit,
+ !atomic_read(&pc->flush_freq_limit),
+ timeout))
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq)
+{
+ int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC;
+ int slept, wait = 10;
+
+ for (slept = 0; slept < timeout_us;) {
+ if (xe_guc_pc_get_act_freq(pc) <= freq)
+ return 0;
+
+ usleep_range(wait, wait << 1);
+ slept += wait;
+ wait <<= 1;
+ if (slept + wait > timeout_us)
+ wait = timeout_us - slept;
+ }
+
+ return -ETIMEDOUT;
+}
static int pc_action_reset(struct xe_guc_pc *pc)
{
struct xe_guc_ct *ct = pc_to_ct(pc);
@@ -553,6 +589,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
return pc->rpn_freq;
}
+static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq)
+{
+ int ret;
+
+ lockdep_assert_held(&pc->freq_lock);
+
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
+
+ ret = pc_action_query_task_state(pc);
+ if (ret)
+ return ret;
+
+ *freq = pc_get_min_freq(pc);
+
+ return 0;
+}
+
/**
* xe_guc_pc_get_min_freq - Get the min operational frequency
* @pc: The GuC PC
@@ -563,26 +618,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
*/
int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
{
+ guard(mutex)(&pc->freq_lock);
+
+ return xe_guc_pc_get_min_freq_locked(pc, freq);
+}
+
+static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq)
+{
int ret;
- xe_device_assert_mem_access(pc_to_xe(pc));
+ lockdep_assert_held(&pc->freq_lock);
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
- }
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
- ret = pc_action_query_task_state(pc);
+ ret = pc_set_min_freq(pc, freq);
if (ret)
- goto out;
+ return ret;
- *freq = pc_get_min_freq(pc);
+ pc->user_requested_min = freq;
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return 0;
}
/**
@@ -596,24 +653,28 @@ out:
*/
int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
{
+ guard(mutex)(&pc->freq_lock);
+
+ return xe_guc_pc_set_min_freq_locked(pc, freq);
+}
+
+static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq)
+{
int ret;
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
- }
+ lockdep_assert_held(&pc->freq_lock);
- ret = pc_set_min_freq(pc, freq);
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
+
+ ret = pc_action_query_task_state(pc);
if (ret)
- goto out;
+ return ret;
- pc->user_requested_min = freq;
+ *freq = pc_get_max_freq(pc);
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return 0;
}
/**
@@ -626,24 +687,28 @@ out:
*/
int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
{
+ guard(mutex)(&pc->freq_lock);
+
+ return xe_guc_pc_get_max_freq_locked(pc, freq);
+}
+
+static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq)
+{
int ret;
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
- }
+ lockdep_assert_held(&pc->freq_lock);
- ret = pc_action_query_task_state(pc);
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
+
+ ret = pc_set_max_freq(pc, freq);
if (ret)
- goto out;
+ return ret;
- *freq = pc_get_max_freq(pc);
+ pc->user_requested_max = freq;
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return 0;
}
/**
@@ -657,24 +722,14 @@ out:
*/
int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
{
- int ret;
-
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
+ if (XE_WA(pc_to_gt(pc), 22019338487)) {
+ if (wait_for_flush_complete(pc) != 0)
+ return -EAGAIN;
}
- ret = pc_set_max_freq(pc, freq);
- if (ret)
- goto out;
-
- pc->user_requested_max = freq;
+ guard(mutex)(&pc->freq_lock);
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return xe_guc_pc_set_max_freq_locked(pc, freq);
}
/**
@@ -817,6 +872,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
{
+ struct xe_tile *tile = gt_to_tile(pc_to_gt(pc));
int ret;
lockdep_assert_held(&pc->freq_lock);
@@ -843,6 +899,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
if (pc_get_min_freq(pc) > pc->rp0_freq)
ret = pc_set_min_freq(pc, pc->rp0_freq);
+ if (XE_WA(tile->primary_gt, 14022085890))
+ ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc)));
+
out:
return ret;
}
@@ -868,30 +927,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
return ret;
}
-static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
+static bool needs_flush_freq_limit(struct xe_guc_pc *pc)
{
- int ret = 0;
+ struct xe_gt *gt = pc_to_gt(pc);
- if (XE_WA(pc_to_gt(pc), 22019338487)) {
- /*
- * Get updated min/max and stash them.
- */
- ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
- if (!ret)
- ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
- if (ret)
- return ret;
+ return XE_WA(gt, 22019338487) &&
+ pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP;
+}
+
+/**
+ * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush
+ * @pc: the xe_guc_pc object
+ *
+ * As per the WA, reduce max GT frequency during L2 cache flush
+ */
+void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 max_freq;
+ int ret;
+
+ if (!needs_flush_freq_limit(pc))
+ return;
+
+ guard(mutex)(&pc->freq_lock);
+
+ ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq);
+ if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) {
+ ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP);
+ if (ret) {
+ xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n",
+ BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
+ return;
+ }
+
+ atomic_set(&pc->flush_freq_limit, 1);
/*
- * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
+ * If user has previously changed max freq, stash that value to
+ * restore later, otherwise use the current max. New user
+ * requests wait on flush.
*/
- mutex_lock(&pc->freq_lock);
- ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
- if (!ret)
- ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
- mutex_unlock(&pc->freq_lock);
+ if (pc->user_requested_max != 0)
+ pc->stashed_max_freq = pc->user_requested_max;
+ else
+ pc->stashed_max_freq = max_freq;
}
+ /*
+ * Wait for actual freq to go below the flush cap: even if the previous
+ * max was below cap, the current one might still be above it
+ */
+ ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
+ if (ret)
+ xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
+ BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
+}
+
+/**
+ * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes.
+ * @pc: the xe_guc_pc object
+ *
+ * Retrieve the previous GT max frequency value.
+ */
+void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ int ret = 0;
+
+ if (!needs_flush_freq_limit(pc))
+ return;
+
+ if (!atomic_read(&pc->flush_freq_limit))
+ return;
+
+ mutex_lock(&pc->freq_lock);
+
+ ret = pc_set_max_freq(&gt->uc.guc.pc, pc->stashed_max_freq);
+ if (ret)
+ xe_gt_err_once(gt, "Failed to restore max freq %u:%d",
+ pc->stashed_max_freq, ret);
+
+ atomic_set(&pc->flush_freq_limit, 0);
+ mutex_unlock(&pc->freq_lock);
+ wake_up_var(&pc->flush_freq_limit);
+}
+
+static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
+{
+ int ret;
+
+ if (!XE_WA(pc_to_gt(pc), 22019338487))
+ return 0;
+
+ guard(mutex)(&pc->freq_lock);
+
+ /*
+ * Get updated min/max and stash them.
+ */
+ ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq);
+ if (!ret)
+ ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq);
+ if (ret)
+ return ret;
+
+ /*
+ * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
+ */
+ ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
+ if (!ret)
+ ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
+
return ret;
}
@@ -1068,7 +1214,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
goto out;
}
- memset(pc->bo->vmap.vaddr, 0, size);
+ xe_map_memset(xe, &pc->bo->vmap, 0, 0, size);
slpc_shared_data_write(pc, header.size, size);
earlier = ktime_get();
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 0a2664d5c811..52ecdd5ddbff 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
void xe_guc_pc_init_early(struct xe_guc_pc *pc);
int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
+void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc);
+void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc);
#endif /* _XE_GUC_PC_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
index 2978ac9a249b..c02053948a57 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -15,6 +15,8 @@
struct xe_guc_pc {
/** @bo: GGTT buffer object that is shared with GuC PC */
struct xe_bo *bo;
+ /** @flush_freq_limit: 1 when max freq changes are limited by driver */
+ atomic_t flush_freq_limit;
/** @rp0_freq: HW RP0 frequency - The Maximum one */
u32 rp0_freq;
/** @rpa_freq: HW RPa frequency - The Achievable one */
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 2ad38f6b103e..2ac87ff4a057 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -229,6 +229,17 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
static void guc_submit_fini(struct drm_device *drm, void *arg)
{
struct xe_guc *guc = arg;
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ int ret;
+
+ ret = wait_event_timeout(guc->submission_state.fini_wq,
+ xa_empty(&guc->submission_state.exec_queue_lookup),
+ HZ * 5);
+
+ drain_workqueue(xe->destroy_wq);
+
+ xe_gt_assert(gt, ret);
xa_destroy(&guc->submission_state.exec_queue_lookup);
}
@@ -880,12 +891,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
struct xe_exec_queue *q = ge->q;
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_gpu_scheduler *sched = &ge->sched;
- bool wedged;
+ bool wedged = false;
xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q));
trace_xe_exec_queue_lr_cleanup(q);
- wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
+ if (!exec_queue_killed(q))
+ wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
/* Kill the run_job / process_msg entry points */
xe_sched_submission_stop(sched);
@@ -1059,7 +1071,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
int err = -ETIME;
pid_t pid = -1;
int i = 0;
- bool wedged, skip_timeout_check;
+ bool wedged = false, skip_timeout_check;
/*
* TDR has fired before free job worker. Common if exec queue
@@ -1105,7 +1117,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
* doesn't work for SRIOV. For now assuming timeouts in wedged mode are
* genuine timeouts.
*/
- wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
+ if (!exec_queue_killed(q))
+ wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
/* Engine state now stable, disable scheduling to check timestamp */
if (!wedged && exec_queue_registered(q)) {
@@ -1751,6 +1764,9 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc)
{
int ret;
+ if (!guc->submission_state.initialized)
+ return 0;
+
/*
* Using an atomic here rather than submission_state.lock as this
* function can be called while holding the CT lock (engine reset
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index eb293aec36a0..f008e8049700 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -52,6 +52,14 @@ enum xe_fan_channel {
};
/*
+ * For platforms that support mailbox commands for power limits, REG_PKG_POWER_SKU_UNIT is
+ * not supported and below are SKU units to be used.
+ */
+#define PWR_UNIT 0x3
+#define ENERGY_UNIT 0xe
+#define TIME_UNIT 0xa
+
+/*
* SF_* - scale factors for particular quantities according to hwmon spec.
*/
#define SF_POWER 1000000 /* microwatts */
@@ -60,6 +68,18 @@ enum xe_fan_channel {
#define SF_ENERGY 1000000 /* microjoules */
#define SF_TIME 1000 /* milliseconds */
+/*
+ * PL*_HWMON_ATTR - mapping of hardware power limits to corresponding hwmon power attribute.
+ */
+#define PL1_HWMON_ATTR hwmon_power_max
+
+#define PWR_ATTR_TO_STR(attr) (((attr) == hwmon_power_max) ? "PL1" : "Invalid")
+
+/*
+ * Timeout for power limit write mailbox command.
+ */
+#define PL_WRITE_MBX_TIMEOUT_MS (1)
+
/**
* struct xe_hwmon_energy_info - to accumulate energy
*/
@@ -100,8 +120,80 @@ struct xe_hwmon {
struct xe_hwmon_energy_info ei[CHANNEL_MAX];
/** @fi: Fan info for fanN_input */
struct xe_hwmon_fan_info fi[FAN_MAX];
+ /** @boot_power_limit_read: is boot power limits read */
+ bool boot_power_limit_read;
+ /** @pl1_on_boot: power limit PL1 on boot */
+ u32 pl1_on_boot[CHANNEL_MAX];
};
+static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 attr, int channel,
+ u32 *uval)
+{
+ struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe);
+ u32 val0 = 0, val1 = 0;
+ int ret = 0;
+
+ ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP,
+ (channel == CHANNEL_CARD) ?
+ READ_PSYSGPU_POWER_LIMIT :
+ READ_PACKAGE_POWER_LIMIT,
+ hwmon->boot_power_limit_read ?
+ READ_PL_FROM_PCODE : READ_PL_FROM_FW),
+ &val0, &val1);
+
+ if (ret) {
+ drm_dbg(&hwmon->xe->drm, "read failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n",
+ channel, val0, val1, ret);
+ *uval = 0;
+ return ret;
+ }
+
+ /* return the value only if limit is enabled */
+ if (attr == PL1_HWMON_ATTR)
+ *uval = (val0 & PWR_LIM_EN) ? val0 : 0;
+ else if (attr == hwmon_power_label)
+ *uval = (val0 & PWR_LIM_EN) ? 1 : 0;
+ else
+ *uval = 0;
+
+ return ret;
+}
+
+static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel,
+ u32 clr, u32 set)
+{
+ struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe);
+ u32 val0, val1;
+ int ret = 0;
+
+ ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP,
+ (channel == CHANNEL_CARD) ?
+ READ_PSYSGPU_POWER_LIMIT :
+ READ_PACKAGE_POWER_LIMIT,
+ hwmon->boot_power_limit_read ?
+ READ_PL_FROM_PCODE : READ_PL_FROM_FW),
+ &val0, &val1);
+
+ if (ret)
+ drm_dbg(&hwmon->xe->drm, "read failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n",
+ channel, val0, val1, ret);
+
+ if (attr == PL1_HWMON_ATTR)
+ val0 = (val0 & ~clr) | set;
+ else
+ return -EIO;
+
+ ret = xe_pcode_write64_timeout(root_tile, PCODE_MBOX(PCODE_POWER_SETUP,
+ (channel == CHANNEL_CARD) ?
+ WRITE_PSYSGPU_POWER_LIMIT :
+ WRITE_PACKAGE_POWER_LIMIT, 0),
+ val0, val1, PL_WRITE_MBX_TIMEOUT_MS);
+ if (ret)
+ drm_dbg(&hwmon->xe->drm, "write failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n",
+ channel, val0, val1, ret);
+ return ret;
+}
+
static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
int channel)
{
@@ -122,29 +214,19 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
}
break;
case REG_PKG_RAPL_LIMIT:
- if (xe->info.platform == XE_BATTLEMAGE) {
- if (channel == CHANNEL_PKG)
- return BMG_PACKAGE_RAPL_LIMIT;
- else
- return BMG_PLATFORM_POWER_LIMIT;
- } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) {
+ if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
return PVC_GT0_PACKAGE_RAPL_LIMIT;
- } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) {
+ else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
return PCU_CR_PACKAGE_RAPL_LIMIT;
- }
break;
case REG_PKG_POWER_SKU:
- if (xe->info.platform == XE_BATTLEMAGE)
- return BMG_PACKAGE_POWER_SKU;
- else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+ if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
return PVC_GT0_PACKAGE_POWER_SKU;
else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
return PCU_CR_PACKAGE_POWER_SKU;
break;
case REG_PKG_POWER_SKU_UNIT:
- if (xe->info.platform == XE_BATTLEMAGE)
- return BMG_PACKAGE_POWER_SKU_UNIT;
- else if (xe->info.platform == XE_PVC)
+ if (xe->info.platform == XE_PVC)
return PVC_GT0_PACKAGE_POWER_SKU_UNIT;
else if (xe->info.platform == XE_DG2)
return PCU_CR_PACKAGE_POWER_SKU_UNIT;
@@ -181,7 +263,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
return XE_REG(0);
}
-#define PL1_DISABLE 0
+#define PL_DISABLE 0
/*
* HW allows arbitrary PL1 limits to be set but silently clamps these values to
@@ -189,67 +271,83 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
* same pattern for sysfs, allow arbitrary PL1 limits to be set but display
* clamped values when read.
*/
-static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value)
+static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value)
{
u64 reg_val, min, max;
struct xe_device *xe = hwmon->xe;
struct xe_reg rapl_limit, pkg_power_sku;
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
- rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
- pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
+ mutex_lock(&hwmon->hwmon_lock);
- /*
- * Valid check of REG_PKG_RAPL_LIMIT is already done in xe_hwmon_power_is_visible.
- * So not checking it again here.
- */
- if (!xe_reg_is_valid(pkg_power_sku)) {
- drm_warn(&xe->drm, "pkg_power_sku invalid\n");
- *value = 0;
- return;
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, (u32 *)&reg_val);
+ } else {
+ rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
+ pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
+
+ /*
+ * Valid check of REG_PKG_RAPL_LIMIT is already done in xe_hwmon_power_is_visible.
+ * So not checking it again here.
+ */
+ if (!xe_reg_is_valid(pkg_power_sku)) {
+ drm_warn(&xe->drm, "pkg_power_sku invalid\n");
+ *value = 0;
+ goto unlock;
+ }
+ reg_val = xe_mmio_read32(mmio, rapl_limit);
}
- mutex_lock(&hwmon->hwmon_lock);
-
- reg_val = xe_mmio_read32(mmio, rapl_limit);
- /* Check if PL1 limit is disabled */
- if (!(reg_val & PKG_PWR_LIM_1_EN)) {
- *value = PL1_DISABLE;
+ /* Check if PL limits are disabled. */
+ if (!(reg_val & PWR_LIM_EN)) {
+ *value = PL_DISABLE;
+ drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%016llx\n",
+ PWR_ATTR_TO_STR(attr), channel, reg_val);
goto unlock;
}
- reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val);
+ reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val);
*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
- reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku);
- min = REG_FIELD_GET(PKG_MIN_PWR, reg_val);
- min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
- max = REG_FIELD_GET(PKG_MAX_PWR, reg_val);
- max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
-
- if (min && max)
- *value = clamp_t(u64, *value, min, max);
+ /* For platforms with mailbox power limit support clamping would be done by pcode. */
+ if (!hwmon->xe->info.has_mbx_power_limits) {
+ reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku);
+ min = REG_FIELD_GET(PKG_MIN_PWR, reg_val);
+ max = REG_FIELD_GET(PKG_MAX_PWR, reg_val);
+ min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
+ max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
+ if (min && max)
+ *value = clamp_t(u64, *value, min, max);
+ }
unlock:
mutex_unlock(&hwmon->hwmon_lock);
}
-static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value)
+static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channel, long value)
{
struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
int ret = 0;
- u64 reg_val;
+ u32 reg_val;
struct xe_reg rapl_limit;
+ mutex_lock(&hwmon->hwmon_lock);
+
rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
- mutex_lock(&hwmon->hwmon_lock);
+ /* Disable Power Limit and verify, as limit cannot be disabled on all platforms. */
+ if (value == PL_DISABLE) {
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n",
+ PWR_ATTR_TO_STR(attr), channel);
+ xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM_EN, 0);
+ xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &reg_val);
+ } else {
+ reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0);
+ reg_val = xe_mmio_read32(mmio, rapl_limit);
+ }
- /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */
- if (value == PL1_DISABLE) {
- reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN, 0);
- reg_val = xe_mmio_read32(mmio, rapl_limit);
- if (reg_val & PKG_PWR_LIM_1_EN) {
- drm_warn(&hwmon->xe->drm, "PL1 disable is not supported!\n");
+ if (reg_val & PWR_LIM_EN) {
+ drm_warn(&hwmon->xe->drm, "Power limit disable is not supported!\n");
ret = -EOPNOTSUPP;
}
goto unlock;
@@ -257,26 +355,49 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va
/* Computation in 64-bits to avoid overflow. Round to nearest. */
reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
- reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val);
- reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val);
+ reg_val = PWR_LIM_EN | REG_FIELD_PREP(PWR_LIM_VAL, reg_val);
+
+ /*
+ * Clamp power limit to card-firmware default as maximum, as an additional protection to
+ * pcode clamp.
+ */
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ if (reg_val > REG_FIELD_GET(PWR_LIM_VAL, hwmon->pl1_on_boot[channel])) {
+ reg_val = REG_FIELD_GET(PWR_LIM_VAL, hwmon->pl1_on_boot[channel]);
+ drm_dbg(&hwmon->xe->drm, "Clamping power limit to firmware default 0x%x\n",
+ reg_val);
+ }
+ }
+ if (hwmon->xe->info.has_mbx_power_limits)
+ ret = xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM, reg_val);
+ else
+ reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM, reg_val);
unlock:
mutex_unlock(&hwmon->hwmon_lock);
return ret;
}
-static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value)
+static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, u32 attr, int channel,
+ long *value)
{
struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
- struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
- u64 reg_val;
+ u32 reg_val;
+
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ /* PL1 is rated max if supported. */
+ xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, channel, &reg_val);
+ } else {
+ /*
+ * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check
+ * for this register can be skipped.
+ * See xe_hwmon_power_is_visible.
+ */
+ struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
+
+ reg_val = xe_mmio_read32(mmio, reg);
+ }
- /*
- * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check
- * for this register can be skipped.
- * See xe_hwmon_power_is_visible.
- */
- reg_val = xe_mmio_read32(mmio, reg);
reg_val = REG_FIELD_GET(PKG_TDP, reg_val);
*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
}
@@ -330,23 +451,35 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at
struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
u32 x, y, x_w = 2; /* 2 bits */
u64 r, tau4, out;
- int sensor_index = to_sensor_dev_attr(attr)->index;
+ int channel = to_sensor_dev_attr(attr)->index;
+ u32 power_attr = PL1_HWMON_ATTR;
+ int ret = 0;
xe_pm_runtime_get(hwmon->xe);
mutex_lock(&hwmon->hwmon_lock);
- r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index));
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r);
+ if (ret) {
+ drm_err(&hwmon->xe->drm,
+ "power interval read fail, ch %d, attr %d, r 0%llx, ret %d\n",
+ channel, power_attr, r, ret);
+ r = 0;
+ }
+ } else {
+ r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel));
+ }
mutex_unlock(&hwmon->hwmon_lock);
xe_pm_runtime_put(hwmon->xe);
- x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
- y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
+ x = REG_FIELD_GET(PWR_LIM_TIME_X, r);
+ y = REG_FIELD_GET(PWR_LIM_TIME_Y, r);
/*
- * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17)
+ * tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17)
* = (4 | x) << (y - 2)
*
* Here (y - 2) ensures a 1.x fixed point representation of 1.x
@@ -373,14 +506,15 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
u64 tau4, r, max_win;
unsigned long val;
int ret;
- int sensor_index = to_sensor_dev_attr(attr)->index;
+ int channel = to_sensor_dev_attr(attr)->index;
+ u32 power_attr = PL1_HWMON_ATTR;
ret = kstrtoul(buf, 0, &val);
if (ret)
return ret;
/*
- * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12.
+ * Max HW supported tau in '(1 + (x / 4)) * power(2,y)' format, x = 0, y = 0x12.
* The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds.
*
* The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register.
@@ -400,11 +534,13 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
tau4 = (u64)((1 << x_w) | x) << y;
max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
- if (val > max_win)
+ if (val > max_win) {
+ drm_warn(&hwmon->xe->drm, "power_interval invalid val 0x%lx\n", val);
return -EINVAL;
+ }
/* val in hw units */
- val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME);
+ val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME) + 1;
/*
* Convert val to 1.x * power(2,y)
@@ -419,14 +555,18 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
x = (val - (1ul << y)) << x_w >> y;
}
- rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
+ rxy = REG_FIELD_PREP(PWR_LIM_TIME_X, x) |
+ REG_FIELD_PREP(PWR_LIM_TIME_Y, y);
xe_pm_runtime_get(hwmon->xe);
mutex_lock(&hwmon->hwmon_lock);
- r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index),
- PKG_PWR_LIM_1_TIME, rxy);
+ if (hwmon->xe->info.has_mbx_power_limits)
+ xe_hwmon_pcode_rmw_power_limit(hwmon, power_attr, channel, PWR_LIM_TIME, rxy);
+ else
+ r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel),
+ PWR_LIM_TIME, rxy);
mutex_unlock(&hwmon->hwmon_lock);
@@ -435,6 +575,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
return count;
}
+/* PSYS PL1 */
static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
xe_hwmon_power_max_interval_show,
xe_hwmon_power_max_interval_store, CHANNEL_CARD);
@@ -455,10 +596,19 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj,
struct device *dev = kobj_to_dev(kobj);
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
int ret = 0;
+ int channel = index ? CHANNEL_PKG : CHANNEL_CARD;
+ u32 power_attr = PL1_HWMON_ATTR;
+ u32 uval;
xe_pm_runtime_get(hwmon->xe);
- ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0;
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &uval);
+ ret = (uval & PWR_LIM_EN) ? attr->mode : 0;
+ } else {
+ ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT,
+ channel)) ? attr->mode : 0;
+ }
xe_pm_runtime_put(hwmon->xe);
@@ -478,8 +628,8 @@ static const struct attribute_group *hwmon_groups[] = {
static const struct hwmon_channel_info * const hwmon_info[] = {
HWMON_CHANNEL_INFO(temp, HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL,
HWMON_T_INPUT | HWMON_T_LABEL),
- HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL,
- HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT | HWMON_P_LABEL),
+ HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT,
+ HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL),
HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL),
HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL),
HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL),
@@ -604,19 +754,27 @@ xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
switch (attr) {
case hwmon_power_max:
- return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT,
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval);
+ return (uval) ? 0664 : 0;
+ } else {
+ return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT,
channel)) ? 0664 : 0;
+ }
case hwmon_power_rated_max:
- return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU,
- channel)) ? 0444 : 0;
+ if (hwmon->xe->info.has_mbx_power_limits)
+ return 0;
+ else
+ return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU,
+ channel)) ? 0444 : 0;
case hwmon_power_crit:
- if (channel == CHANNEL_PKG)
- return (xe_hwmon_pcode_read_i1(hwmon, &uval) ||
- !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
- break;
case hwmon_power_label:
- return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT,
- channel)) ? 0444 : 0;
+ if (channel == CHANNEL_CARD) {
+ xe_hwmon_pcode_read_i1(hwmon, &uval);
+ return (uval & POWER_SETUP_I1_WATTS) ? (attr == hwmon_power_label) ?
+ 0444 : 0644 : 0;
+ }
+ break;
default:
return 0;
}
@@ -628,10 +786,10 @@ xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
{
switch (attr) {
case hwmon_power_max:
- xe_hwmon_power_max_read(hwmon, channel, val);
+ xe_hwmon_power_max_read(hwmon, attr, channel, val);
return 0;
case hwmon_power_rated_max:
- xe_hwmon_power_rated_max_read(hwmon, channel, val);
+ xe_hwmon_power_rated_max_read(hwmon, attr, channel, val);
return 0;
case hwmon_power_crit:
return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_POWER);
@@ -645,7 +803,7 @@ xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val)
{
switch (attr) {
case hwmon_power_max:
- return xe_hwmon_power_max_write(hwmon, channel, val);
+ return xe_hwmon_power_max_write(hwmon, attr, channel, val);
case hwmon_power_crit:
return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_POWER);
default:
@@ -965,18 +1123,42 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
int channel;
struct xe_reg pkg_power_sku_unit;
- /*
- * The contents of register PKG_POWER_SKU_UNIT do not change,
- * so read it once and store the shift values.
- */
- pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0);
- if (xe_reg_is_valid(pkg_power_sku_unit)) {
- val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit);
- hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
- hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
- hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ /* Check if card firmware support mailbox power limits commands. */
+ if (xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_CARD,
+ &hwmon->pl1_on_boot[CHANNEL_CARD]) |
+ xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_PKG,
+ &hwmon->pl1_on_boot[CHANNEL_PKG])) {
+ drm_warn(&hwmon->xe->drm,
+ "Failed to read power limits, check card firmware !\n");
+ } else {
+ drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n");
+ /* Write default limits to read from pcode from now on. */
+ xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR,
+ CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME,
+ hwmon->pl1_on_boot[CHANNEL_CARD]);
+ xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR,
+ CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME,
+ hwmon->pl1_on_boot[CHANNEL_PKG]);
+ hwmon->scl_shift_power = PWR_UNIT;
+ hwmon->scl_shift_energy = ENERGY_UNIT;
+ hwmon->scl_shift_time = TIME_UNIT;
+ hwmon->boot_power_limit_read = true;
+ }
+ } else {
+ drm_info(&hwmon->xe->drm, "Using register for power limits\n");
+ /*
+ * The contents of register PKG_POWER_SKU_UNIT do not change,
+ * so read it once and store the shift values.
+ */
+ pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0);
+ if (xe_reg_is_valid(pkg_power_sku_unit)) {
+ val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit);
+ hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
+ hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
+ hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
+ }
}
-
/*
* Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the
* first value of the energy register read
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 63db66df064b..023ed6a6b49d 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -78,6 +78,9 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
}
lmtt_assert(lmtt, xe_bo_is_vram(bo));
+ lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE));
+
+ xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, bo->size);
pt->level = level;
pt->bo = bo;
@@ -91,6 +94,9 @@ out:
static void lmtt_pt_free(struct xe_lmtt_pt *pt)
{
+ lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n",
+ pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE));
+
xe_bo_unpin_map_no_vm(pt->bo);
kfree(pt);
}
@@ -226,9 +232,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt,
switch (lmtt->ops->lmtt_pte_size(level)) {
case sizeof(u32):
+ lmtt_assert(lmtt, !overflows_type(pte, u32));
+ lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32));
+
xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte);
break;
case sizeof(u64):
+ lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64));
+
xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte);
break;
default:
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 61a2e87990a9..6e7b70532d11 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -40,6 +40,7 @@
#define LRC_PPHWSP_SIZE SZ_4K
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
+#define LRC_WA_BB_SIZE SZ_4K
static struct xe_device *
lrc_to_xe(struct xe_lrc *lrc)
@@ -909,11 +910,12 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
static void xe_lrc_finish(struct xe_lrc *lrc)
{
xe_hw_fence_ctx_finish(&lrc->fence_ctx);
- xe_bo_lock(lrc->bo, false);
- xe_bo_unpin(lrc->bo);
- xe_bo_unlock(lrc->bo);
- xe_bo_put(lrc->bo);
- xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
+ xe_bo_unpin_map_no_vm(lrc->bo);
+}
+
+static size_t wa_bb_offset(struct xe_lrc *lrc)
+{
+ return lrc->bo->size - LRC_WA_BB_SIZE;
}
/*
@@ -944,11 +946,19 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
* store it in the PPHSWP.
*/
#define CONTEXT_ACTIVE 1ULL
-static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
+static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
{
- u32 *cmd;
+ const size_t max_size = LRC_WA_BB_SIZE;
+ u32 *cmd, *buf = NULL;
- cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
+ if (lrc->bo->vmap.is_iomem) {
+ buf = kmalloc(max_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ cmd = buf;
+ } else {
+ cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc);
+ }
*cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
*cmd++ = ENGINE_ID(0).addr;
@@ -969,9 +979,17 @@ static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
*cmd++ = MI_BATCH_BUFFER_END;
- xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
- xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
+ if (buf) {
+ xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap,
+ wa_bb_offset(lrc), buf,
+ (cmd - buf) * sizeof(*cmd));
+ kfree(buf);
+ }
+
+ xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) +
+ wa_bb_offset(lrc) + 1);
+ return 0;
}
#define PVC_CTX_ASID (0x2e + 1)
@@ -1007,20 +1025,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
* via VM bind calls.
*/
- lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
+ lrc->bo = xe_bo_create_pin_map(xe, tile, NULL,
+ lrc_size + LRC_WA_BB_SIZE,
ttm_bo_type_kernel,
bo_flags);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
- lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
- ttm_bo_type_kernel,
- bo_flags);
- if (IS_ERR(lrc->bb_per_ctx_bo)) {
- err = PTR_ERR(lrc->bb_per_ctx_bo);
- goto err_lrc_finish;
- }
-
lrc->size = lrc_size;
lrc->ring.size = ring_size;
lrc->ring.tail = 0;
@@ -1128,7 +1139,9 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
map = __xe_lrc_start_seqno_map(lrc);
xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
- xe_lrc_setup_utilization(lrc);
+ err = xe_lrc_setup_utilization(lrc);
+ if (err)
+ goto err_lrc_finish;
return 0;
@@ -1795,9 +1808,6 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
if (!snapshot)
return NULL;
- if (lrc->bo->vm)
- xe_vm_get(lrc->bo->vm);
-
snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
@@ -1809,7 +1819,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
snapshot->seqno = xe_lrc_seqno(lrc);
snapshot->lrc_bo = xe_bo_get(lrc->bo);
snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
- snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
+ snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset -
+ LRC_WA_BB_SIZE;
snapshot->lrc_snapshot = NULL;
snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
@@ -1819,14 +1830,12 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
{
struct xe_bo *bo;
- struct xe_vm *vm;
struct iosys_map src;
if (!snapshot)
return;
bo = snapshot->lrc_bo;
- vm = bo->vm;
snapshot->lrc_bo = NULL;
snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
@@ -1846,8 +1855,6 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
xe_bo_unlock(bo);
put_bo:
xe_bo_put(bo);
- if (vm)
- xe_vm_put(vm);
}
void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
@@ -1900,14 +1907,9 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
return;
kvfree(snapshot->lrc_snapshot);
- if (snapshot->lrc_bo) {
- struct xe_vm *vm;
-
- vm = snapshot->lrc_bo->vm;
+ if (snapshot->lrc_bo)
xe_bo_put(snapshot->lrc_bo);
- if (vm)
- xe_vm_put(vm);
- }
+
kfree(snapshot);
}
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index ae24cf6f8dd9..883e550a9423 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -53,9 +53,6 @@ struct xe_lrc {
/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
u64 ctx_timestamp;
-
- /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */
- struct xe_bo *bb_per_ctx_bo;
};
struct xe_lrc_snapshot;
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 8f8e9fdfb2a8..07a5161c7d5b 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -82,7 +82,7 @@ struct xe_migrate {
* of the instruction. Subtracting the instruction header (1 dword) and
* address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values.
*/
-#define MAX_PTE_PER_SDI 0x1FE
+#define MAX_PTE_PER_SDI 0x1FEU
/**
* xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue.
@@ -863,7 +863,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it))
xe_res_next(&src_it, src_L0);
else
- emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs,
+ emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat,
&src_it, src_L0, src);
if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
@@ -1553,15 +1553,17 @@ static u32 pte_update_cmd_size(u64 size)
u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE);
XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER);
+
/*
* MI_STORE_DATA_IMM command is used to update page table. Each
- * instruction can update maximumly 0x1ff pte entries. To update
- * n (n <= 0x1ff) pte entries, we need:
- * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
- * 2 dword for the page table's physical location
- * 2*n dword for value of pte to fill (each pte entry is 2 dwords)
+ * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To
+ * update n (n <= MAX_PTE_PER_SDI) pte entries, we need:
+ *
+ * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
+ * - 2 dword for the page table's physical location
+ * - 2*n dword for value of pte to fill (each pte entry is 2 dwords)
*/
- num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff);
+ num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI);
num_dword += entries * 2;
return num_dword;
@@ -1577,7 +1579,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m,
ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
while (ptes) {
- u32 chunk = min(0x1ffU, ptes);
+ u32 chunk = min(MAX_PTE_PER_SDI, ptes);
bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
bb->cs[bb->len++] = pt_offset;
@@ -1815,8 +1817,8 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
xe_bo_assert_held(bo);
/* Use bounce buffer for small access and unaligned access */
- if (len & XE_CACHELINE_MASK ||
- ((uintptr_t)buf | offset) & XE_CACHELINE_MASK) {
+ if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) ||
+ !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) {
int buf_offset = 0;
/*
@@ -1846,7 +1848,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
err = xe_migrate_access_memory(m, bo,
offset & ~XE_CACHELINE_MASK,
(void *)ptr,
- sizeof(bounce), 0);
+ sizeof(bounce), write);
if (err)
return err;
} else {
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index e4742e27e2cd..da6793c2f991 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -20,7 +20,7 @@
struct xe_modparam xe_modparam = {
.probe_display = true,
- .guc_log_level = 3,
+ .guc_log_level = IS_ENABLED(CONFIG_DRM_XE_DEBUG) ? 3 : 1,
.force_probe = CONFIG_DRM_XE_FORCE_PROBE,
.wedged_mode = 1,
.svm_notifier_size = 512,
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 024175cfe61e..278af53c74dc 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -66,6 +66,7 @@ struct xe_device_desc {
u8 has_heci_gscfi:1;
u8 has_heci_cscfi:1;
u8 has_llc:1;
+ u8 has_mbx_power_limits:1;
u8 has_pxp:1;
u8 has_sriov:1;
u8 needs_scratch:1;
@@ -139,7 +140,6 @@ static const struct xe_graphics_desc graphics_xelpg = {
.has_asid = 1, \
.has_atomic_enable_pte_bit = 1, \
.has_flat_ccs = 1, \
- .has_indirect_ring_state = 1, \
.has_range_tlb_invalidation = 1, \
.has_usm = 1, \
.has_64bit_timestamp = 1, \
@@ -306,6 +306,7 @@ static const struct xe_device_desc dg2_desc = {
DG2_FEATURES,
.has_display = true,
.has_fan_control = true,
+ .has_mbx_power_limits = false,
};
static const __maybe_unused struct xe_device_desc pvc_desc = {
@@ -317,6 +318,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = {
.has_heci_gscfi = 1,
.max_remote_tiles = 1,
.require_force_probe = true,
+ .has_mbx_power_limits = false,
};
static const struct xe_device_desc mtl_desc = {
@@ -342,6 +344,7 @@ static const struct xe_device_desc bmg_desc = {
.dma_mask_size = 46,
.has_display = true,
.has_fan_control = true,
+ .has_mbx_power_limits = true,
.has_heci_cscfi = 1,
.needs_scratch = true,
};
@@ -584,6 +587,7 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.dma_mask_size = desc->dma_mask_size;
xe->info.is_dgfx = desc->is_dgfx;
xe->info.has_fan_control = desc->has_fan_control;
+ xe->info.has_mbx_power_limits = desc->has_mbx_power_limits;
xe->info.has_heci_gscfi = desc->has_heci_gscfi;
xe->info.has_heci_cscfi = desc->has_heci_cscfi;
xe->info.has_llc = desc->has_llc;
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index cf955b3ed52c..9189117fe825 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -109,6 +109,17 @@ int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 data, int timeout
return err;
}
+int xe_pcode_write64_timeout(struct xe_tile *tile, u32 mbox, u32 data0, u32 data1, int timeout)
+{
+ int err;
+
+ mutex_lock(&tile->pcode.lock);
+ err = pcode_mailbox_rw(tile, mbox, &data0, &data1, timeout, false, false);
+ mutex_unlock(&tile->pcode.lock);
+
+ return err;
+}
+
int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1)
{
int err;
diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
index ba33991d72a7..de38f44f3201 100644
--- a/drivers/gpu/drm/xe/xe_pcode.h
+++ b/drivers/gpu/drm/xe/xe_pcode.h
@@ -18,6 +18,9 @@ int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq,
int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1);
int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 val,
int timeout_ms);
+int xe_pcode_write64_timeout(struct xe_tile *tile, u32 mbox, u32 data0,
+ u32 data1, int timeout);
+
#define xe_pcode_write(tile, mbox, val) \
xe_pcode_write_timeout(tile, mbox, val, 1)
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index 127d4d26c4cf..0befdea77db1 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -43,6 +43,13 @@
#define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */
#define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0)
+#define READ_PSYSGPU_POWER_LIMIT 0x6
+#define WRITE_PSYSGPU_POWER_LIMIT 0x7
+#define READ_PACKAGE_POWER_LIMIT 0x8
+#define WRITE_PACKAGE_POWER_LIMIT 0x9
+#define READ_PL_FROM_FW 0x1
+#define READ_PL_FROM_PCODE 0x0
+
#define PCODE_FREQUENCY_CONFIG 0x6e
/* Frequency Config Sub Commands (param1) */
#define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index ff749edc005b..ad263de44111 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -134,7 +134,7 @@ int xe_pm_suspend(struct xe_device *xe)
/* FIXME: Super racey... */
err = xe_bo_evict_all(xe);
if (err)
- goto err_pxp;
+ goto err_display;
for_each_gt(gt, xe, id) {
err = xe_gt_suspend(gt);
@@ -151,7 +151,6 @@ int xe_pm_suspend(struct xe_device *xe)
err_display:
xe_display_pm_resume(xe);
-err_pxp:
xe_pxp_pm_resume(xe->pxp);
err:
drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
@@ -753,11 +752,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
}
/**
- * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold
+ * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold
* @xe: xe device instance
- * @threshold: VRAM size in bites for the D3cold threshold
+ * @threshold: VRAM size in MiB for the D3cold threshold
*
- * Returns 0 for success, negative error code otherwise.
+ * Return:
+ * * 0 - success
+ * * -EINVAL - invalid argument
*/
int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
{
diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
index 454ea7dc08ac..b5bc15f436fa 100644
--- a/drivers/gpu/drm/xe/xe_pxp.c
+++ b/drivers/gpu/drm/xe/xe_pxp.c
@@ -541,10 +541,14 @@ int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q)
*/
xe_pm_runtime_get(pxp->xe);
- if (!pxp_prerequisites_done(pxp)) {
- ret = -EBUSY;
+ /* get_readiness_status() returns 0 for in-progress and 1 for done */
+ ret = xe_pxp_get_readiness_status(pxp);
+ if (ret <= 0) {
+ if (!ret)
+ ret = -EBUSY;
goto out;
}
+ ret = 0;
wait_for_idle:
/*
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index bc1689db4cd7..7b50c7c1ee21 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -110,13 +110,14 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
return i;
}
-static int emit_flush_invalidate(u32 *dw, int i)
+static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i)
{
dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
- MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX;
- dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR;
- dw[i++] = 0;
+ MI_FLUSH_IMM_DW;
+
+ dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
dw[i++] = 0;
+ dw[i++] = val;
return i;
}
@@ -397,23 +398,20 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
static void emit_migration_job_gen12(struct xe_sched_job *job,
struct xe_lrc *lrc, u32 seqno)
{
+ u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc);
u32 dw[MAX_JOB_SIZE_DW], i = 0;
i = emit_copy_timestamp(lrc, dw, i);
- i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
- seqno, dw, i);
+ i = emit_store_imm_ggtt(saddr, seqno, dw, i);
dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */
i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i);
- if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) {
- /* XXX: Do we need this? Leaving for now. */
- dw[i++] = preparser_disable(true);
- i = emit_flush_invalidate(dw, i);
- dw[i++] = preparser_disable(false);
- }
+ dw[i++] = preparser_disable(true);
+ i = emit_flush_invalidate(saddr, seqno, dw, i);
+ dw[i++] = preparser_disable(false);
i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i);
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 6345896585de..f0b167b3fb6a 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -764,7 +764,7 @@ static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range,
return false;
}
- if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) {
+ if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
return false;
}
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 2741849bbf4d..a6612105201a 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -114,10 +114,10 @@ struct fw_blobs_by_type {
#define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED
#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
- fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \
- fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \
+ fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \
+ fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \
fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \
- fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \
+ fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \
fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \
fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \
fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 79323c78130f..861577746929 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1678,13 +1678,21 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
* scheduler drops all the references of it, hence protecting the VM
* for this case is necessary.
*/
- if (flags & XE_VM_FLAG_LR_MODE)
+ if (flags & XE_VM_FLAG_LR_MODE) {
+ INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
xe_pm_runtime_get_noresume(xe);
+ }
+
+ if (flags & XE_VM_FLAG_FAULT_MODE) {
+ err = xe_svm_init(vm);
+ if (err)
+ goto err_no_resv;
+ }
vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
if (!vm_resv_obj) {
err = -ENOMEM;
- goto err_no_resv;
+ goto err_svm_fini;
}
drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
@@ -1724,10 +1732,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
vm->batch_invalidate_tlb = true;
}
- if (vm->flags & XE_VM_FLAG_LR_MODE) {
- INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
+ if (vm->flags & XE_VM_FLAG_LR_MODE)
vm->batch_invalidate_tlb = false;
- }
/* Fill pt_root after allocating scratch tables */
for_each_tile(tile, xe, id) {
@@ -1757,12 +1763,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
}
}
- if (flags & XE_VM_FLAG_FAULT_MODE) {
- err = xe_svm_init(vm);
- if (err)
- goto err_close;
- }
-
if (number_tiles > 1)
vm->composite_fence_ctx = dma_fence_context_alloc(1);
@@ -1776,6 +1776,11 @@ err_close:
xe_vm_close_and_put(vm);
return ERR_PTR(err);
+err_svm_fini:
+ if (flags & XE_VM_FLAG_FAULT_MODE) {
+ vm->size = 0; /* close the vm */
+ xe_svm_fini(vm);
+ }
err_no_resv:
mutex_destroy(&vm->snap_mutex);
for_each_tile(tile, xe, id)
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 0ef811fc2bde..494af6bdc646 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -301,6 +301,75 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap);
void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p);
void xe_vm_snapshot_free(struct xe_vm_snapshot *snap);
+/**
+ * xe_vm_set_validating() - Register this task as currently making bos resident
+ * @allow_res_evict: Allow eviction of buffer objects bound to @vm when
+ * validating.
+ * @vm: Pointer to the vm or NULL.
+ *
+ * Register this task as currently making bos resident for the vm. Intended
+ * to avoid eviction by the same task of shared bos bound to the vm.
+ * Call with the vm's resv lock held.
+ *
+ * Return: A pin cookie that should be used for xe_vm_clear_validating().
+ */
+static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm,
+ bool allow_res_evict)
+{
+ struct pin_cookie cookie = {};
+
+ if (vm && !allow_res_evict) {
+ xe_vm_assert_held(vm);
+ cookie = lockdep_pin_lock(&xe_vm_resv(vm)->lock.base);
+ /* Pairs with READ_ONCE in xe_vm_is_validating() */
+ WRITE_ONCE(vm->validating, current);
+ }
+
+ return cookie;
+}
+
+/**
+ * xe_vm_clear_validating() - Unregister this task as currently making bos resident
+ * @vm: Pointer to the vm or NULL
+ * @allow_res_evict: Eviction from @vm was allowed. Must be set to the same
+ * value as for xe_vm_set_validation().
+ * @cookie: Cookie obtained from xe_vm_set_validating().
+ *
+ * Register this task as currently making bos resident for the vm. Intended
+ * to avoid eviction by the same task of shared bos bound to the vm.
+ * Call with the vm's resv lock held.
+ */
+static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict,
+ struct pin_cookie cookie)
+{
+ if (vm && !allow_res_evict) {
+ lockdep_unpin_lock(&xe_vm_resv(vm)->lock.base, cookie);
+ /* Pairs with READ_ONCE in xe_vm_is_validating() */
+ WRITE_ONCE(vm->validating, NULL);
+ }
+}
+
+/**
+ * xe_vm_is_validating() - Whether bos bound to the vm are currently being made resident
+ * by the current task.
+ * @vm: Pointer to the vm.
+ *
+ * If this function returns %true, we should be in a vm resv locked region, since
+ * the current process is the same task that called xe_vm_set_validating().
+ * The function asserts that that's indeed the case.
+ *
+ * Return: %true if the task is currently making bos resident, %false otherwise.
+ */
+static inline bool xe_vm_is_validating(struct xe_vm *vm)
+{
+ /* Pairs with WRITE_ONCE in xe_vm_is_validating() */
+ if (READ_ONCE(vm->validating) == current) {
+ xe_vm_assert_held(vm);
+ return true;
+ }
+ return false;
+}
+
#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma);
#else
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 1662604c4486..1979e9bdbdf3 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -310,6 +310,14 @@ struct xe_vm {
* protected by the vm resv.
*/
u64 tlb_flush_seqno;
+ /**
+ * @validating: The task that is currently making bos resident for this vm.
+ * Protected by the VM's resv for writing. Opportunistic reading can be done
+ * using READ_ONCE. Note: This is a workaround for the
+ * TTM eviction_valuable() callback not being passed a struct
+ * ttm_operation_context(). Future work might want to address this.
+ */
+ struct task_struct *validating;
/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
bool batch_invalidate_tlb;
/** @xef: XE file handle for tracking this VM's drm client */
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 9efc5accd43d..6d70109fcc43 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -21,7 +21,8 @@
GRAPHICS_VERSION_RANGE(1270, 1274)
MEDIA_VERSION(1300)
PLATFORM(DG2)
-14018094691 GRAPHICS_VERSION(2004)
+14018094691 GRAPHICS_VERSION_RANGE(2001, 2002)
+ GRAPHICS_VERSION(2004)
14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
18024947630 GRAPHICS_VERSION(2001)
GRAPHICS_VERSION(2004)
@@ -37,10 +38,10 @@
GRAPHICS_VERSION(2004)
GRAPHICS_VERSION_RANGE(3000, 3001)
22019338487 MEDIA_VERSION(2000)
- GRAPHICS_VERSION(2001)
+ GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf)
MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf)
22019338487_display PLATFORM(LUNARLAKE)
-16023588340 GRAPHICS_VERSION(2001)
+16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf)
14019789679 GRAPHICS_VERSION(1255)
GRAPHICS_VERSION_RANGE(1270, 2004)
no_media_l3 MEDIA_VERSION(3000)
@@ -59,3 +60,7 @@ no_media_l3 MEDIA_VERSION(3000)
MEDIA_VERSION_RANGE(1301, 3000)
16026508708 GRAPHICS_VERSION_RANGE(1200, 3001)
MEDIA_VERSION_RANGE(1300, 3000)
+
+# SoC workaround - currently applies to all platforms with the following
+# primary GT GMDID
+14022085890 GRAPHICS_VERSION(2001)