summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c1
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c3
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c4
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c151
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c27
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.c4
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c11
-rw-r--r--drivers/gpu/drm/drm_encoder_slave.c15
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_dma.c4
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_g2d.c1
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_mic.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c12
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c52
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c241
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c25
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c185
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_mocs.c18
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_ring.c110
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c69
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c15
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c2
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/README46
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm119
-rw-r--r--drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm117
-rw-r--r--drivers/gpu/drm/i915/gvt/debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c24
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.h6
-rw-r--r--drivers/gpu/drm/i915/gvt/reg.h5
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c1
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c84
-rw-r--r--drivers/gpu/drm/i915/i915_priolist_types.h2
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h2
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c206
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_mock_selftests.h1
-rw-r--r--drivers/gpu/drm/mediatek/Kconfig2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_crtc.c8
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_drv.c6
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_plane.c25
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dsi.c5
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi.c12
-rw-r--r--drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c52
-rw-r--r--drivers/gpu/drm/msm/adreno/a2xx_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c18
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c2
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c2
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c2
-rw-r--r--drivers/gpu/drm/msm/msm_submitqueue.c4
-rw-r--r--drivers/gpu/drm/radeon/ni_dpm.c2
-rw-r--r--drivers/gpu/drm/rcar-du/Kconfig1
-rw-r--r--drivers/gpu/drm/tegra/dc.c1
-rw-r--r--drivers/gpu/drm/tegra/hub.c17
-rw-r--r--drivers/gpu/host1x/bus.c9
-rw-r--r--drivers/gpu/host1x/dev.c11
69 files changed, 1252 insertions, 581 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 58f9d8c3a17a..44f927641b89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -204,6 +204,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
(mode_info->atom_context->bios + data_offset);
switch (crev) {
case 11:
+ case 12:
mem_channel_number = igp_info->v11.umachannelnumber;
/* channel width is 64 */
if (vram_width)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 775e389c9a13..02e6f8c4dde0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -696,7 +696,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
* default power levels, write "r" (reset) to the file to reset them.
*
*
- * < For Vega20 >
+ * < For Vega20 and newer ASICs >
*
* Reading the file will display:
*
@@ -1668,7 +1668,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
}
/**
- * DOC: busy_percent
+ * DOC: gpu_busy_percent
*
* The amdgpu driver provides a sysfs API for reading how busy the GPU
* is as a percentage. The file gpu_busy_percent is used for this.
@@ -2784,7 +2784,7 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
if (r)
return r;
- return snprintf(buf, PAGE_SIZE, "%d\n", sclk * 10 * 1000);
+ return snprintf(buf, PAGE_SIZE, "%u\n", sclk * 10 * 1000);
}
static ssize_t amdgpu_hwmon_show_sclk_label(struct device *dev,
@@ -2819,7 +2819,7 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
if (r)
return r;
- return snprintf(buf, PAGE_SIZE, "%d\n", mclk * 10 * 1000);
+ return snprintf(buf, PAGE_SIZE, "%u\n", mclk * 10 * 1000);
}
static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index b544baf306f2..5d71c23e2640 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1298,8 +1298,12 @@ static int sdma_v5_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
- for (i = 0; i < adev->sdma.num_instances; i++)
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (adev->sdma.instance[i].fw != NULL)
+ release_firmware(adev->sdma.instance[i].fw);
+
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ }
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f0587d94294d..fee60921fccf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -40,6 +40,7 @@
#include <drm/drm_file.h>
#include <drm/drm_drv.h>
#include <drm/drm_device.h>
+#include <drm/drm_ioctl.h>
#include <kgd_kfd_interface.h>
#include <linux/swap.h>
@@ -1076,7 +1077,7 @@ static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd)
#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
struct drm_device *ddev = kfd->ddev;
- return devcgroup_check_permission(DEVCG_DEV_CHAR, ddev->driver->major,
+ return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR,
ddev->render->index,
DEVCG_ACC_WRITE | DEVCG_ACC_READ);
#else
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d27221ddcdeb..0e0c42e9f6a3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -428,6 +428,7 @@ struct kfd_process *kfd_create_process(struct file *filep)
(int)process->lead_thread->pid);
if (ret) {
pr_warn("Creating procfs pid directory failed");
+ kobject_put(process->kobj);
goto out;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 7ced9f87be97..10ac8076d4f2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5024,7 +5024,8 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
struct drm_connector *connector = &aconnector->base;
struct amdgpu_device *adev = connector->dev->dev_private;
struct dc_stream_state *stream;
- int requested_bpc = connector->state ? connector->state->max_requested_bpc : 8;
+ const struct drm_connector_state *drm_state = dm_state ? &dm_state->base : NULL;
+ int requested_bpc = drm_state ? drm_state->max_requested_bpc : 8;
enum dc_status dc_result = DC_OK;
do {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 076af267b488..1d692f4f42f3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -1058,7 +1058,6 @@ static const struct {
{"link_settings", &dp_link_settings_debugfs_fops},
{"phy_settings", &dp_phy_settings_debugfs_fop},
{"test_pattern", &dp_phy_test_pattern_fops},
- {"output_bpc", &output_bpc_fops},
{"vrr_range", &vrr_range_fops},
#ifdef CONFIG_DRM_AMD_DC_HDCP
{"hdcp_sink_capability", &hdcp_sink_capability_fops},
@@ -1142,6 +1141,9 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector)
debugfs_create_file_unsafe("force_yuv420_output", 0644, dir, connector,
&force_yuv420_output_fops);
+ debugfs_create_file("output_bpc", 0644, dir, connector,
+ &output_bpc_fops);
+
connector->debugfs_dpcd_address = 0;
connector->debugfs_dpcd_size = 0;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index dcf84a61de37..949d10ef8304 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -510,8 +510,10 @@ static ssize_t srm_data_read(struct file *filp, struct kobject *kobj, struct bin
srm = psp_get_srm(work->hdcp.config.psp.handle, &srm_version, &srm_size);
- if (!srm)
- return -EINVAL;
+ if (!srm) {
+ ret = -EINVAL;
+ goto ret;
+ }
if (pos >= srm_size)
ret = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 6f93a6ca4cf0..d016f50e187c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -2538,10 +2538,12 @@ void dc_commit_updates_for_stream(struct dc *dc,
copy_stream_update_to_stream(dc, context, stream, stream_update);
- if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
- DC_ERROR("Mode validation failed for stream update!\n");
- dc_release_state(context);
- return;
+ if (update_type > UPDATE_TYPE_FAST) {
+ if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
+ DC_ERROR("Mode validation failed for stream update!\n");
+ dc_release_state(context);
+ return;
+ }
}
commit_planes_for_stream(
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
index 3f66868df171..ea29cf95d470 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
@@ -28,8 +28,6 @@ endif
endif
CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc_dpi.o := $(dsc_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dsc/dc_dsc.o := $(dsc_ccflags)
DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
index 0ea6662a1563..0c7f247bb7de 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
@@ -22,10 +22,12 @@
* Author: AMD
*/
+#include <drm/drm_dsc.h>
#include "dc_hw_types.h"
#include "dsc.h"
#include <drm/drm_dp_helper.h>
#include "dc.h"
+#include "rc_calc.h"
/* This module's internal functions */
@@ -304,22 +306,6 @@ static inline uint32_t dsc_div_by_10_round_up(uint32_t value)
return (value + 9) / 10;
}
-static inline uint32_t calc_dsc_bpp_x16(uint32_t stream_bandwidth_kbps, uint32_t pix_clk_100hz, uint32_t bpp_increment_div)
-{
- uint32_t dsc_target_bpp_x16;
- float f_dsc_target_bpp;
- float f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f;
- uint32_t precision = bpp_increment_div; // bpp_increment_div is actually precision
-
- f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz;
-
- // Round down to the nearest precision stop to bring it into DSC spec range
- dsc_target_bpp_x16 = (uint32_t)(f_dsc_target_bpp * precision);
- dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision;
-
- return dsc_target_bpp_x16;
-}
-
/* Get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range, and timing's pixel clock
* and uncompressed bandwidth.
*/
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
index 03ae15946c6d..667afbc260f9 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c
@@ -23,6 +23,7 @@
* Authors: AMD
*
*/
+#include <drm/drm_dsc.h>
#include "os_types.h"
#include "rc_calc.h"
@@ -40,7 +41,8 @@
break
-void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum max_min max_min, float bpp)
+static void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc,
+ enum max_min max_min, float bpp)
{
int mode = MODE_SELECT(444, 422, 420);
int sel = table_hash(mode, bpc, max_min);
@@ -85,7 +87,7 @@ void get_qp_set(qp_set qps, enum colour_mode cm, enum bits_per_comp bpc, enum ma
memcpy(qps, table[index].qps, sizeof(qp_set));
}
-double dsc_roundf(double num)
+static double dsc_roundf(double num)
{
if (num < 0.0)
num = num - 0.5;
@@ -95,7 +97,7 @@ double dsc_roundf(double num)
return (int)(num);
}
-double dsc_ceil(double num)
+static double dsc_ceil(double num)
{
double retval = (int)num;
@@ -105,7 +107,7 @@ double dsc_ceil(double num)
return (int)retval;
}
-void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
+static void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
{
int *p = ofs;
@@ -160,7 +162,7 @@ void get_ofs_set(qp_set ofs, enum colour_mode mode, float bpp)
}
}
-int median3(int a, int b, int c)
+static int median3(int a, int b, int c)
{
if (a > b)
swap(a, b);
@@ -172,13 +174,25 @@ int median3(int a, int b, int c)
return b;
}
-void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version)
+static void _do_calc_rc_params(struct rc_params *rc, enum colour_mode cm,
+ enum bits_per_comp bpc, u8 drm_bpp,
+ bool is_navite_422_or_420,
+ int slice_width, int slice_height,
+ int minor_version)
{
+ float bpp;
float bpp_group;
float initial_xmit_delay_factor;
int padding_pixels;
int i;
+ bpp = ((float)drm_bpp / 16.0);
+ /* in native_422 or native_420 modes, the bits_per_pixel is double the
+ * target bpp (the latter is what calc_rc_params expects)
+ */
+ if (is_navite_422_or_420)
+ bpp /= 2.0;
+
rc->rc_quant_incr_limit0 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
rc->rc_quant_incr_limit1 = ((bpc == BPC_8) ? 11 : (bpc == BPC_10 ? 15 : 19)) - ((minor_version == 1 && cm == CM_444) ? 1 : 0);
@@ -251,3 +265,128 @@ void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_com
rc->rc_buf_thresh[13] = 8064;
}
+static u32 _do_bytes_per_pixel_calc(int slice_width, u8 drm_bpp,
+ bool is_navite_422_or_420)
+{
+ float bpp;
+ u32 bytes_per_pixel;
+ double d_bytes_per_pixel;
+
+ bpp = ((float)drm_bpp / 16.0);
+ d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
+ // TODO: Make sure the formula for calculating this is precise (ceiling
+ // vs. floor, and at what point they should be applied)
+ if (is_navite_422_or_420)
+ d_bytes_per_pixel /= 2;
+
+ bytes_per_pixel = (u32)dsc_ceil(d_bytes_per_pixel * 0x10000000);
+
+ return bytes_per_pixel;
+}
+
+static u32 _do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
+ u32 bpp_increment_div)
+{
+ u32 dsc_target_bpp_x16;
+ float f_dsc_target_bpp;
+ float f_stream_bandwidth_100bps;
+ // bpp_increment_div is actually precision
+ u32 precision = bpp_increment_div;
+
+ f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f;
+ f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz;
+
+ // Round down to the nearest precision stop to bring it into DSC spec
+ // range
+ dsc_target_bpp_x16 = (u32)(f_dsc_target_bpp * precision);
+ dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision;
+
+ return dsc_target_bpp_x16;
+}
+
+/**
+ * calc_rc_params - reads the user's cmdline mode
+ * @rc: DC internal DSC parameters
+ * @pps: DRM struct with all required DSC values
+ *
+ * This function expects a drm_dsc_config data struct with all the required DSC
+ * values previously filled out by our driver and based on this information it
+ * computes some of the DSC values.
+ *
+ * @note This calculation requires float point operation, most of it executes
+ * under kernel_fpu_{begin,end}.
+ */
+void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps)
+{
+ enum colour_mode mode;
+ enum bits_per_comp bpc;
+ bool is_navite_422_or_420;
+ u8 drm_bpp = pps->bits_per_pixel;
+ int slice_width = pps->slice_width;
+ int slice_height = pps->slice_height;
+
+ mode = pps->convert_rgb ? CM_RGB : (pps->simple_422 ? CM_444 :
+ (pps->native_422 ? CM_422 :
+ pps->native_420 ? CM_420 : CM_444));
+ bpc = (pps->bits_per_component == 8) ? BPC_8 : (pps->bits_per_component == 10)
+ ? BPC_10 : BPC_12;
+
+ is_navite_422_or_420 = pps->native_422 || pps->native_420;
+
+ DC_FP_START();
+ _do_calc_rc_params(rc, mode, bpc, drm_bpp, is_navite_422_or_420,
+ slice_width, slice_height,
+ pps->dsc_version_minor);
+ DC_FP_END();
+}
+
+/**
+ * calc_dsc_bytes_per_pixel - calculate bytes per pixel
+ * @pps: DRM struct with all required DSC values
+ *
+ * Based on the information inside drm_dsc_config, this function calculates the
+ * total of bytes per pixel.
+ *
+ * @note This calculation requires float point operation, most of it executes
+ * under kernel_fpu_{begin,end}.
+ *
+ * Return:
+ * Return the number of bytes per pixel
+ */
+u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps)
+
+{
+ u32 ret;
+ u8 drm_bpp = pps->bits_per_pixel;
+ int slice_width = pps->slice_width;
+ bool is_navite_422_or_420 = pps->native_422 || pps->native_420;
+
+ DC_FP_START();
+ ret = _do_bytes_per_pixel_calc(slice_width, drm_bpp,
+ is_navite_422_or_420);
+ DC_FP_END();
+ return ret;
+}
+
+/**
+ * calc_dsc_bpp_x16 - retrieve the dsc bits per pixel
+ * @stream_bandwidth_kbps:
+ * @pix_clk_100hz:
+ * @bpp_increment_div:
+ *
+ * Calculate the total of bits per pixel for DSC configuration.
+ *
+ * @note This calculation requires float point operation, most of it executes
+ * under kernel_fpu_{begin,end}.
+ */
+u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
+ u32 bpp_increment_div)
+{
+ u32 dsc_bpp;
+
+ DC_FP_START();
+ dsc_bpp = _do_calc_dsc_bpp_x16(stream_bandwidth_kbps, pix_clk_100hz,
+ bpp_increment_div);
+ DC_FP_END();
+ return dsc_bpp;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
index b6b1f09c2009..21723fa6561e 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
+++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h
@@ -77,7 +77,10 @@ struct qp_entry {
typedef struct qp_entry qp_table[];
-void calc_rc_params(struct rc_params *rc, enum colour_mode cm, enum bits_per_comp bpc, float bpp, int slice_width, int slice_height, int minor_version);
+void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps);
+u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps);
+u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz,
+ u32 bpp_increment_div);
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
index 1f6e63b71456..ef830aded5b1 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c
@@ -27,8 +27,6 @@
#include "dscc_types.h"
#include "rc_calc.h"
-double dsc_ceil(double num);
-
static void copy_pps_fields(struct drm_dsc_config *to, const struct drm_dsc_config *from)
{
to->line_buf_depth = from->line_buf_depth;
@@ -100,34 +98,13 @@ static void copy_rc_to_cfg(struct drm_dsc_config *dsc_cfg, const struct rc_param
int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, struct dsc_parameters *dsc_params)
{
- enum colour_mode mode = pps->convert_rgb ? CM_RGB :
- (pps->simple_422 ? CM_444 :
- (pps->native_422 ? CM_422 :
- pps->native_420 ? CM_420 : CM_444));
- enum bits_per_comp bpc = (pps->bits_per_component == 8) ? BPC_8 :
- (pps->bits_per_component == 10) ? BPC_10 : BPC_12;
- float bpp = ((float) pps->bits_per_pixel / 16.0);
- int slice_width = pps->slice_width;
- int slice_height = pps->slice_height;
int ret;
struct rc_params rc;
struct drm_dsc_config dsc_cfg;
- double d_bytes_per_pixel = dsc_ceil(bpp * slice_width / 8.0) / slice_width;
-
- // TODO: Make sure the formula for calculating this is precise (ceiling vs. floor, and at what point they should be applied)
- if (pps->native_422 || pps->native_420)
- d_bytes_per_pixel /= 2;
-
- dsc_params->bytes_per_pixel = (uint32_t)dsc_ceil(d_bytes_per_pixel * 0x10000000);
-
- /* in native_422 or native_420 modes, the bits_per_pixel is double the target bpp
- * (the latter is what calc_rc_params expects)
- */
- if (pps->native_422 || pps->native_420)
- bpp /= 2.0;
+ dsc_params->bytes_per_pixel = calc_dsc_bytes_per_pixel(pps);
- calc_rc_params(&rc, mode, bpc, bpp, slice_width, slice_height, pps->dsc_version_minor);
+ calc_rc_params(&rc, pps);
dsc_params->pps = *pps;
dsc_params->pps.initial_scale_value = 8 * rc.rc_model_size / (rc.rc_model_size - rc.initial_fullness_offset);
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index 9431b48aecb4..bcfe34ef8c28 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -843,7 +843,7 @@ static bool build_regamma(struct pwl_float_data_ex *rgb_regamma,
pow_buffer_ptr = -1; // reset back to no optimize
ret = true;
release:
- kfree(coeff);
+ kvfree(coeff);
return ret;
}
@@ -1777,7 +1777,7 @@ bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
kfree(rgb_regamma);
rgb_regamma_alloc_fail:
- kvfree(rgb_user);
+ kfree(rgb_user);
rgb_user_alloc_fail:
return ret;
}
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
index 85e5b1ed22c2..56923a96b450 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
@@ -239,7 +239,7 @@ static void ci_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr)
switch (dev_id) {
case 0x67BA:
- case 0x66B1:
+ case 0x67B1:
smu_data->power_tune_defaults = &defaults_hawaii_pro;
break;
case 0x67B8:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
index 2fb97554134f..c2e0fbbccf56 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega20_smumgr.c
@@ -522,9 +522,11 @@ static int vega20_smu_init(struct pp_hwmgr *hwmgr)
priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].version = 0x01;
priv->smu_tables.entry[TABLE_ACTIVITY_MONITOR_COEFF].size = sizeof(DpmActivityMonitorCoeffInt_t);
- ret = smu_v11_0_i2c_eeprom_control_init(&adev->pm.smu_i2c);
- if (ret)
- goto err4;
+ if (adev->psp.ras.ras) {
+ ret = smu_v11_0_i2c_eeprom_control_init(&adev->pm.smu_i2c);
+ if (ret)
+ goto err4;
+ }
return 0;
@@ -560,7 +562,8 @@ static int vega20_smu_fini(struct pp_hwmgr *hwmgr)
(struct vega20_smumgr *)(hwmgr->smu_backend);
struct amdgpu_device *adev = hwmgr->adev;
- smu_v11_0_i2c_eeprom_control_fini(&adev->pm.smu_i2c);
+ if (adev->psp.ras.ras)
+ smu_v11_0_i2c_eeprom_control_fini(&adev->pm.smu_i2c);
if (priv) {
amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_PPTABLE].handle,
diff --git a/drivers/gpu/drm/drm_encoder_slave.c b/drivers/gpu/drm/drm_encoder_slave.c
index cf804389f5ec..e464429d32df 100644
--- a/drivers/gpu/drm/drm_encoder_slave.c
+++ b/drivers/gpu/drm/drm_encoder_slave.c
@@ -61,13 +61,8 @@ int drm_i2c_encoder_init(struct drm_device *dev,
request_module("%s%s", I2C_MODULE_PREFIX, info->type);
- client = i2c_new_device(adap, info);
- if (!client) {
- err = -ENOMEM;
- goto fail;
- }
-
- if (!client->dev.driver) {
+ client = i2c_new_client_device(adap, info);
+ if (!i2c_client_has_driver(client)) {
err = -ENODEV;
goto fail_unregister;
}
@@ -84,7 +79,7 @@ int drm_i2c_encoder_init(struct drm_device *dev,
err = encoder_drv->encoder_init(client, dev, encoder);
if (err)
- goto fail_unregister;
+ goto fail_module_put;
if (info->platform_data)
encoder->slave_funcs->set_config(&encoder->base,
@@ -92,10 +87,10 @@ int drm_i2c_encoder_init(struct drm_device *dev,
return 0;
+fail_module_put:
+ module_put(module);
fail_unregister:
i2c_unregister_device(client);
- module_put(module);
-fail:
return err;
}
EXPORT_SYMBOL(drm_i2c_encoder_init);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c
index 619f81435c1b..58b89ec11b0e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dma.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c
@@ -61,7 +61,7 @@ static int drm_iommu_attach_device(struct drm_device *drm_dev,
struct device *subdrv_dev, void **dma_priv)
{
struct exynos_drm_private *priv = drm_dev->dev_private;
- int ret;
+ int ret = 0;
if (get_dma_ops(priv->dma_dev) != get_dma_ops(subdrv_dev)) {
DRM_DEV_ERROR(subdrv_dev, "Device %s lacks support for IOMMU\n",
@@ -92,7 +92,7 @@ static int drm_iommu_attach_device(struct drm_device *drm_dev,
if (ret)
clear_dma_max_seg_size(subdrv_dev);
- return 0;
+ return ret;
}
/*
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index fcee33a43aca..03be31427181 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -1498,7 +1498,6 @@ static int g2d_probe(struct platform_device *pdev)
g2d->irq = platform_get_irq(pdev, 0);
if (g2d->irq < 0) {
- dev_err(dev, "failed to get irq\n");
ret = g2d->irq;
goto err_put_clk;
}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c
index a86abc173605..3821ea76a703 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_mic.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -269,8 +269,10 @@ static void mic_pre_enable(struct drm_bridge *bridge)
goto unlock;
ret = pm_runtime_get_sync(mic->dev);
- if (ret < 0)
+ if (ret < 0) {
+ pm_runtime_put_noidle(mic->dev);
goto unlock;
+ }
mic_set_path(mic, 1);
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index aa22465bb56e..0575a1eea2a1 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2579,14 +2579,14 @@ static void icl_ddi_vswing_sequence(struct intel_encoder *encoder,
static void
tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock,
- u32 level)
+ u32 level, enum intel_output_type type)
{
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port);
const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations;
u32 n_entries, val, ln, dpcnt_mask, dpcnt_val;
- if (encoder->type == INTEL_OUTPUT_HDMI) {
+ if (type == INTEL_OUTPUT_HDMI) {
n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans);
ddi_translations = tgl_dkl_phy_hdmi_ddi_trans;
} else {
@@ -2638,7 +2638,7 @@ static void tgl_ddi_vswing_sequence(struct intel_encoder *encoder,
if (intel_phy_is_combo(dev_priv, phy))
icl_combo_phy_ddi_vswing_sequence(encoder, level, type);
else
- tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level);
+ tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level, type);
}
static u32 translate_signal_level(struct intel_dp *intel_dp, int signal_levels)
@@ -2987,7 +2987,7 @@ icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port,
ln1 = intel_de_read(dev_priv, MG_DP_MODE(1, tc_port));
}
- ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE);
+ ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE);
ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE);
/* DPPATC */
@@ -3472,7 +3472,9 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state,
INTEL_OUTPUT_DP_MST);
enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
- intel_dp_set_infoframes(encoder, false, old_crtc_state, old_conn_state);
+ if (!is_mst)
+ intel_dp_set_infoframes(encoder, false,
+ old_crtc_state, old_conn_state);
/*
* Power down sink before disabling the port, otherwise we end
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index d18b406f2a7d..f29e51ce489c 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -397,6 +397,14 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state,
*/
drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port,
false);
+
+ /*
+ * BSpec 4287: disable DIP after the transcoder is disabled and before
+ * the transcoder clock select is set to none.
+ */
+ if (last_mst_stream)
+ intel_dp_set_infoframes(&intel_dig_port->base, false,
+ old_crtc_state, NULL);
/*
* From TGL spec: "If multi-stream slave transcoder: Configure
* Transcoder Clock Select to direct no clock to the transcoder"
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index da5b61085257..8691eb61e185 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -646,7 +646,7 @@ static int engine_setup_common(struct intel_engine_cs *engine)
struct measure_breadcrumb {
struct i915_request rq;
struct intel_ring ring;
- u32 cs[1024];
+ u32 cs[2048];
};
static int measure_breadcrumb_dw(struct intel_context *ce)
@@ -668,6 +668,8 @@ static int measure_breadcrumb_dw(struct intel_context *ce)
frame->ring.vaddr = frame->cs;
frame->ring.size = sizeof(frame->cs);
+ frame->ring.wrap =
+ BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size);
frame->ring.effective_size = frame->ring.size;
intel_ring_update_space(&frame->ring);
frame->rq.ring = &frame->ring;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 87e6c5bdd2dc..7c3d8ef4a47c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1134,6 +1134,13 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_move(&rq->sched.link, pl);
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ /* Check in case we rollback so far we wrap [size/2] */
+ if (intel_ring_direction(rq->ring,
+ intel_ring_wrap(rq->ring,
+ rq->tail),
+ rq->ring->tail) > 0)
+ rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
+
active = rq;
} else {
struct intel_engine_cs *owner = rq->context->engine;
@@ -1498,8 +1505,9 @@ static u64 execlists_update_context(struct i915_request *rq)
* HW has a tendency to ignore us rewinding the TAIL to the end of
* an earlier request.
*/
+ GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
+ prev = rq->ring->tail;
tail = intel_ring_set_tail(rq->ring, rq->tail);
- prev = ce->lrc_reg_state[CTX_RING_TAIL];
if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
desc |= CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state[CTX_RING_TAIL] = tail;
@@ -1895,7 +1903,8 @@ static void defer_active(struct intel_engine_cs *engine)
static bool
need_timeslice(const struct intel_engine_cs *engine,
- const struct i915_request *rq)
+ const struct i915_request *rq,
+ const struct rb_node *rb)
{
int hint;
@@ -1903,9 +1912,28 @@ need_timeslice(const struct intel_engine_cs *engine,
return false;
hint = engine->execlists.queue_priority_hint;
+
+ if (rb) {
+ const struct virtual_engine *ve =
+ rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+ const struct intel_engine_cs *inflight =
+ intel_context_inflight(&ve->context);
+
+ if (!inflight || inflight == engine) {
+ struct i915_request *next;
+
+ rcu_read_lock();
+ next = READ_ONCE(ve->request);
+ if (next)
+ hint = max(hint, rq_prio(next));
+ rcu_read_unlock();
+ }
+ }
+
if (!list_is_last(&rq->sched.link, &engine->active.requests))
hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
+ GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE);
return hint >= effective_prio(rq);
}
@@ -1977,10 +2005,9 @@ static void set_timeslice(struct intel_engine_cs *engine)
set_timer_ms(&engine->execlists.timer, duration);
}
-static void start_timeslice(struct intel_engine_cs *engine)
+static void start_timeslice(struct intel_engine_cs *engine, int prio)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- const int prio = queue_prio(execlists);
unsigned long duration;
if (!intel_engine_has_timeslices(engine))
@@ -2140,7 +2167,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
__unwind_incomplete_requests(engine);
last = NULL;
- } else if (need_timeslice(engine, last) &&
+ } else if (need_timeslice(engine, last, rb) &&
timeslice_expired(execlists, last)) {
if (i915_request_completed(last)) {
tasklet_hi_schedule(&execlists->tasklet);
@@ -2188,7 +2215,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* Even if ELSP[1] is occupied and not worthy
* of timeslices, our queue might be.
*/
- start_timeslice(engine);
+ start_timeslice(engine, queue_prio(execlists));
return;
}
}
@@ -2223,7 +2250,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
- start_timeslice(engine);
+ start_timeslice(engine, rq_prio(rq));
return; /* leave this for another sibling */
}
@@ -4739,6 +4766,14 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
return 0;
}
+static void assert_request_valid(struct i915_request *rq)
+{
+ struct intel_ring *ring __maybe_unused = rq->ring;
+
+ /* Can we unwind this request without appearing to go forwards? */
+ GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
+}
+
/*
* Reserve space for 2 NOOPs at the end of each request to be
* used as a workaround for not being allowed to do lite
@@ -4751,6 +4786,9 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
*cs++ = MI_NOOP;
request->wa_tail = intel_ring_offset(request, cs);
+ /* Check that entire request is less than half the ring */
+ assert_request_valid(request);
+
return cs;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 8cda1b7e17ba..bdb324167ef3 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -315,3 +315,7 @@ int intel_ring_cacheline_align(struct i915_request *rq)
GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
return 0;
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_ring.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 90a2b9e399b0..85d2bef51524 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -179,6 +179,12 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
}
static void
+wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
+{
+ wa_write_masked_or(wal, reg, clr, 0);
+}
+
+static void
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
@@ -687,6 +693,227 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
}
static void
+gen4_gt_workarounds_init(struct drm_i915_private *i915,
+ struct i915_wa_list *wal)
+{
+ /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
+ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
+}
+
+static void
+g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ gen4_gt_workarounds_init(i915, wal);
+
+ /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
+ wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
+}
+
+static void
+ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ g4x_gt_workarounds_init(i915, wal);
+
+ wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
+}
+
+static void
+snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
+ wa_masked_en(wal,
+ _3D_CHICKEN,
+ _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
+
+ /* WaDisable_RenderCache_OperationalFlush:snb */
+ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal,
+ GEN6_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+
+ wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB);
+
+ wa_masked_en(wal,
+ _3D_CHICKEN3,
+ /* WaStripsFansDisableFastClipPerformanceFix:snb */
+ _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
+ /*
+ * Bspec says:
+ * "This bit must be set if 3DSTATE_CLIP clip mode is set
+ * to normal and 3DSTATE_SF number of SF output attributes
+ * is more than 16."
+ */
+ _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
+}
+
+static void
+ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* WaDisableEarlyCull:ivb */
+ wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
+
+ /* WaDisablePSDDualDispatchEnable:ivb */
+ if (IS_IVB_GT1(i915))
+ wa_masked_en(wal,
+ GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+
+ /* WaDisable_RenderCache_OperationalFlush:ivb */
+ wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
+
+ /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
+ wa_masked_dis(wal,
+ GEN7_COMMON_SLICE_CHICKEN1,
+ GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
+
+ /* WaApplyL3ControlAndL3ChickenMode:ivb */
+ wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
+ wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
+
+ /* WaForceL3Serialization:ivb */
+ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
+ /*
+ * WaVSThreadDispatchOverride:ivb,vlv
+ *
+ * This actually overrides the dispatch
+ * mode for all thread types.
+ */
+ wa_write_masked_or(wal, GEN7_FF_THREAD_MODE,
+ GEN7_FF_SCHED_MASK,
+ GEN7_FF_TS_SCHED_HW |
+ GEN7_FF_VS_SCHED_HW |
+ GEN7_FF_DS_SCHED_HW);
+
+ if (0) { /* causes HiZ corruption on ivb:gt1 */
+ /* enable HiZ Raw Stall Optimization */
+ wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
+ }
+
+ /* WaDisable4x2SubspanOptimization:ivb */
+ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal, GEN7_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+}
+
+static void
+vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* WaDisableEarlyCull:vlv */
+ wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
+
+ /* WaPsdDispatchEnable:vlv */
+ /* WaDisablePSDDualDispatchEnable:vlv */
+ wa_masked_en(wal,
+ GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_MAX_PS_THREAD_DEP |
+ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+
+ /* WaDisable_RenderCache_OperationalFlush:vlv */
+ wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
+
+ /* WaForceL3Serialization:vlv */
+ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
+ /*
+ * WaVSThreadDispatchOverride:ivb,vlv
+ *
+ * This actually overrides the dispatch
+ * mode for all thread types.
+ */
+ wa_write_masked_or(wal,
+ GEN7_FF_THREAD_MODE,
+ GEN7_FF_SCHED_MASK,
+ GEN7_FF_TS_SCHED_HW |
+ GEN7_FF_VS_SCHED_HW |
+ GEN7_FF_DS_SCHED_HW);
+
+ /*
+ * BSpec says this must be set, even though
+ * WaDisable4x2SubspanOptimization isn't listed for VLV.
+ */
+ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal, GEN7_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+
+ /*
+ * WaIncreaseL3CreditsForVLVB0:vlv
+ * This is the hardware default actually.
+ */
+ wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
+}
+
+static void
+hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* L3 caching of data atomics doesn't work -- disable it. */
+ wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
+
+ wa_add(wal,
+ HSW_ROW_CHICKEN3, 0,
+ _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
+ 0 /* XXX does this reg exist? */);
+
+ /* WaVSRefCountFullforceMissDisable:hsw */
+ wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
+
+ wa_masked_dis(wal,
+ CACHE_MODE_0_GEN7,
+ /* WaDisable_RenderCache_OperationalFlush:hsw */
+ RC_OP_FLUSH_ENABLE |
+ /* enable HiZ Raw Stall Optimization */
+ HIZ_RAW_STALL_OPT_DISABLE);
+
+ /* WaDisable4x2SubspanOptimization:hsw */
+ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal, GEN7_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+
+ /* WaSampleCChickenBitEnable:hsw */
+ wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
+}
+
+static void
gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
/* WaDisableKillLogic:bxt,skl,kbl */
@@ -963,6 +1190,20 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
bxt_gt_workarounds_init(i915, wal);
else if (IS_SKYLAKE(i915))
skl_gt_workarounds_init(i915, wal);
+ else if (IS_HASWELL(i915))
+ hsw_gt_workarounds_init(i915, wal);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_gt_workarounds_init(i915, wal);
+ else if (IS_IVYBRIDGE(i915))
+ ivb_gt_workarounds_init(i915, wal);
+ else if (IS_GEN(i915, 6))
+ snb_gt_workarounds_init(i915, wal);
+ else if (IS_GEN(i915, 5))
+ ilk_gt_workarounds_init(i915, wal);
+ else if (IS_G4X(i915))
+ g4x_gt_workarounds_init(i915, wal);
+ else if (IS_GEN(i915, 4))
+ gen4_gt_workarounds_init(i915, wal);
else if (INTEL_GEN(i915) <= 8)
return;
else
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 2b2efff6e19d..4aa4cc917d8b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -310,22 +310,20 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq)
1000));
}
-static void engine_heartbeat_disable(struct intel_engine_cs *engine,
- unsigned long *saved)
+static void engine_heartbeat_disable(struct intel_engine_cs *engine)
{
- *saved = engine->props.heartbeat_interval_ms;
engine->props.heartbeat_interval_ms = 0;
intel_engine_pm_get(engine);
intel_engine_park_heartbeat(engine);
}
-static void engine_heartbeat_enable(struct intel_engine_cs *engine,
- unsigned long saved)
+static void engine_heartbeat_enable(struct intel_engine_cs *engine)
{
intel_engine_pm_put(engine);
- engine->props.heartbeat_interval_ms = saved;
+ engine->props.heartbeat_interval_ms =
+ engine->defaults.heartbeat_interval_ms;
}
static int igt_hang_sanitycheck(void *arg)
@@ -473,7 +471,6 @@ static int igt_reset_nop_engine(void *arg)
for_each_engine(engine, gt, id) {
unsigned int reset_count, reset_engine_count, count;
struct intel_context *ce;
- unsigned long heartbeat;
IGT_TIMEOUT(end_time);
int err;
@@ -485,7 +482,7 @@ static int igt_reset_nop_engine(void *arg)
reset_engine_count = i915_reset_engine_count(global, engine);
count = 0;
- engine_heartbeat_disable(engine, &heartbeat);
+ engine_heartbeat_disable(engine);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
int i;
@@ -529,7 +526,7 @@ static int igt_reset_nop_engine(void *arg)
}
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- engine_heartbeat_enable(engine, heartbeat);
+ engine_heartbeat_enable(engine);
pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
@@ -564,7 +561,6 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
for_each_engine(engine, gt, id) {
unsigned int reset_count, reset_engine_count;
- unsigned long heartbeat;
IGT_TIMEOUT(end_time);
if (active && !intel_engine_can_store_dword(engine))
@@ -580,7 +576,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
reset_count = i915_reset_count(global);
reset_engine_count = i915_reset_engine_count(global, engine);
- engine_heartbeat_disable(engine, &heartbeat);
+ engine_heartbeat_disable(engine);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
if (active) {
@@ -632,7 +628,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
}
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- engine_heartbeat_enable(engine, heartbeat);
+ engine_heartbeat_enable(engine);
if (err)
break;
@@ -789,7 +785,6 @@ static int __igt_reset_engines(struct intel_gt *gt,
struct active_engine threads[I915_NUM_ENGINES] = {};
unsigned long device = i915_reset_count(global);
unsigned long count = 0, reported;
- unsigned long heartbeat;
IGT_TIMEOUT(end_time);
if (flags & TEST_ACTIVE &&
@@ -832,7 +827,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
yield(); /* start all threads before we begin */
- engine_heartbeat_disable(engine, &heartbeat);
+ engine_heartbeat_disable(engine);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
struct i915_request *rq = NULL;
@@ -906,7 +901,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
}
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- engine_heartbeat_enable(engine, heartbeat);
+ engine_heartbeat_enable(engine);
pr_info("i915_reset_engine(%s:%s): %lu resets\n",
engine->name, test_name, count);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 824f99c4cc7c..924bc01ef526 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -51,22 +51,20 @@ static struct i915_vma *create_scratch(struct intel_gt *gt)
return vma;
}
-static void engine_heartbeat_disable(struct intel_engine_cs *engine,
- unsigned long *saved)
+static void engine_heartbeat_disable(struct intel_engine_cs *engine)
{
- *saved = engine->props.heartbeat_interval_ms;
engine->props.heartbeat_interval_ms = 0;
intel_engine_pm_get(engine);
intel_engine_park_heartbeat(engine);
}
-static void engine_heartbeat_enable(struct intel_engine_cs *engine,
- unsigned long saved)
+static void engine_heartbeat_enable(struct intel_engine_cs *engine)
{
intel_engine_pm_put(engine);
- engine->props.heartbeat_interval_ms = saved;
+ engine->props.heartbeat_interval_ms =
+ engine->defaults.heartbeat_interval_ms;
}
static bool is_active(struct i915_request *rq)
@@ -224,7 +222,6 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
struct intel_context *ce[2] = {};
struct i915_request *rq[2];
struct igt_live_test t;
- unsigned long saved;
int n;
if (prio && !intel_engine_has_preemption(engine))
@@ -237,7 +234,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
err = -EIO;
break;
}
- engine_heartbeat_disable(engine, &saved);
+ engine_heartbeat_disable(engine);
for (n = 0; n < ARRAY_SIZE(ce); n++) {
struct intel_context *tmp;
@@ -345,7 +342,7 @@ err_ce:
intel_context_put(ce[n]);
}
- engine_heartbeat_enable(engine, saved);
+ engine_heartbeat_enable(engine);
if (igt_live_test_end(&t))
err = -EIO;
if (err)
@@ -466,7 +463,6 @@ static int live_hold_reset(void *arg)
for_each_engine(engine, gt, id) {
struct intel_context *ce;
- unsigned long heartbeat;
struct i915_request *rq;
ce = intel_context_create(engine);
@@ -475,7 +471,7 @@ static int live_hold_reset(void *arg)
break;
}
- engine_heartbeat_disable(engine, &heartbeat);
+ engine_heartbeat_disable(engine);
rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
if (IS_ERR(rq)) {
@@ -535,7 +531,7 @@ static int live_hold_reset(void *arg)
i915_request_put(rq);
out:
- engine_heartbeat_enable(engine, heartbeat);
+ engine_heartbeat_enable(engine);
intel_context_put(ce);
if (err)
break;
@@ -580,10 +576,9 @@ static int live_error_interrupt(void *arg)
for_each_engine(engine, gt, id) {
const struct error_phase *p;
- unsigned long heartbeat;
int err = 0;
- engine_heartbeat_disable(engine, &heartbeat);
+ engine_heartbeat_disable(engine);
for (p = phases; p->error[0] != GOOD; p++) {
struct i915_request *client[ARRAY_SIZE(phases->error)];
@@ -682,7 +677,7 @@ out:
}
}
- engine_heartbeat_enable(engine, heartbeat);
+ engine_heartbeat_enable(engine);
if (err) {
intel_gt_set_wedged(gt);
return err;
@@ -828,7 +823,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
}
}
- err = release_queue(outer, vma, n, INT_MAX);
+ err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
if (err)
goto out;
@@ -895,16 +890,14 @@ static int live_timeslice_preempt(void *arg)
enum intel_engine_id id;
for_each_engine(engine, gt, id) {
- unsigned long saved;
-
if (!intel_engine_has_preemption(engine))
continue;
memset(vaddr, 0, PAGE_SIZE);
- engine_heartbeat_disable(engine, &saved);
+ engine_heartbeat_disable(engine);
err = slice_semaphore_queue(engine, vma, count);
- engine_heartbeat_enable(engine, saved);
+ engine_heartbeat_enable(engine);
if (err)
goto err_pin;
@@ -1009,7 +1002,6 @@ static int live_timeslice_rewind(void *arg)
enum { X = 1, Z, Y };
struct i915_request *rq[3] = {};
struct intel_context *ce;
- unsigned long heartbeat;
unsigned long timeslice;
int i, err = 0;
u32 *slot;
@@ -1028,7 +1020,7 @@ static int live_timeslice_rewind(void *arg)
* Expect execution/evaluation order XZY
*/
- engine_heartbeat_disable(engine, &heartbeat);
+ engine_heartbeat_disable(engine);
timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
slot = memset32(engine->status_page.addr + 1000, 0, 4);
@@ -1122,7 +1114,7 @@ err:
wmb();
engine->props.timeslice_duration_ms = timeslice;
- engine_heartbeat_enable(engine, heartbeat);
+ engine_heartbeat_enable(engine);
for (i = 0; i < 3; i++)
i915_request_put(rq[i]);
if (igt_flush_test(gt->i915))
@@ -1202,12 +1194,11 @@ static int live_timeslice_queue(void *arg)
.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
};
struct i915_request *rq, *nop;
- unsigned long saved;
if (!intel_engine_has_preemption(engine))
continue;
- engine_heartbeat_disable(engine, &saved);
+ engine_heartbeat_disable(engine);
memset(vaddr, 0, PAGE_SIZE);
/* ELSP[0]: semaphore wait */
@@ -1284,7 +1275,7 @@ static int live_timeslice_queue(void *arg)
err_rq:
i915_request_put(rq);
err_heartbeat:
- engine_heartbeat_enable(engine, saved);
+ engine_heartbeat_enable(engine);
if (err)
break;
}
@@ -1298,6 +1289,121 @@ err_obj:
return err;
}
+static int live_timeslice_nopreempt(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ /*
+ * We should not timeslice into a request that is marked with
+ * I915_REQUEST_NOPREEMPT.
+ */
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+ struct i915_request *rq;
+ unsigned long timeslice;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ engine_heartbeat_disable(engine);
+ timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
+
+ /* Create an unpreemptible spinner */
+
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_heartbeat;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ i915_request_put(rq);
+ err = -ETIME;
+ goto out_spin;
+ }
+
+ set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
+ i915_request_put(rq);
+
+ /* Followed by a maximum priority barrier (heartbeat) */
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(rq);
+ goto out_spin;
+ }
+
+ rq = intel_context_create_request(ce);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_spin;
+ }
+
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ /*
+ * Wait until the barrier is in ELSP, and we know timeslicing
+ * will have been activated.
+ */
+ if (wait_for_submit(engine, rq, HZ / 2)) {
+ i915_request_put(rq);
+ err = -ETIME;
+ goto out_spin;
+ }
+
+ /*
+ * Since the ELSP[0] request is unpreemptible, it should not
+ * allow the maximum priority barrier through. Wait long
+ * enough to see if it is timesliced in by mistake.
+ */
+ if (i915_request_wait(rq, 0, timeslice_threshold(engine)) >= 0) {
+ pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
+ engine->name);
+ err = -EINVAL;
+ }
+ i915_request_put(rq);
+
+out_spin:
+ igt_spinner_end(&spin);
+out_heartbeat:
+ xchg(&engine->props.timeslice_duration_ms, timeslice);
+ engine_heartbeat_enable(engine);
+ if (err)
+ break;
+
+ if (igt_flush_test(gt->i915)) {
+ err = -EIO;
+ break;
+ }
+ }
+
+ igt_spinner_fini(&spin);
+ return err;
+}
+
static int live_busywait_preempt(void *arg)
{
struct intel_gt *gt = arg;
@@ -4153,7 +4259,6 @@ static int reset_virtual_engine(struct intel_gt *gt,
{
struct intel_engine_cs *engine;
struct intel_context *ve;
- unsigned long *heartbeat;
struct igt_spinner spin;
struct i915_request *rq;
unsigned int n;
@@ -4165,15 +4270,9 @@ static int reset_virtual_engine(struct intel_gt *gt,
* descendents are not executed while the capture is in progress.
*/
- heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
- if (!heartbeat)
+ if (igt_spinner_init(&spin, gt))
return -ENOMEM;
- if (igt_spinner_init(&spin, gt)) {
- err = -ENOMEM;
- goto out_free;
- }
-
ve = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
@@ -4181,7 +4280,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
}
for (n = 0; n < nsibling; n++)
- engine_heartbeat_disable(siblings[n], &heartbeat[n]);
+ engine_heartbeat_disable(siblings[n]);
rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
if (IS_ERR(rq)) {
@@ -4252,13 +4351,11 @@ out_rq:
i915_request_put(rq);
out_heartbeat:
for (n = 0; n < nsibling; n++)
- engine_heartbeat_enable(siblings[n], heartbeat[n]);
+ engine_heartbeat_enable(siblings[n]);
intel_context_put(ve);
out_spin:
igt_spinner_fini(&spin);
-out_free:
- kfree(heartbeat);
return err;
}
@@ -4314,6 +4411,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_timeslice_preempt),
SUBTEST(live_timeslice_rewind),
SUBTEST(live_timeslice_queue),
+ SUBTEST(live_timeslice_nopreempt),
SUBTEST(live_busywait_preempt),
SUBTEST(live_preempt),
SUBTEST(live_late_preempt),
@@ -4932,9 +5030,7 @@ static int live_lrc_gpr(void *arg)
return PTR_ERR(scratch);
for_each_engine(engine, gt, id) {
- unsigned long heartbeat;
-
- engine_heartbeat_disable(engine, &heartbeat);
+ engine_heartbeat_disable(engine);
err = __live_lrc_gpr(engine, scratch, false);
if (err)
@@ -4945,7 +5041,7 @@ static int live_lrc_gpr(void *arg)
goto err;
err:
- engine_heartbeat_enable(engine, heartbeat);
+ engine_heartbeat_enable(engine);
if (igt_flush_test(gt->i915))
err = -EIO;
if (err)
@@ -5092,10 +5188,9 @@ static int live_lrc_timestamp(void *arg)
*/
for_each_engine(data.engine, gt, id) {
- unsigned long heartbeat;
int i, err = 0;
- engine_heartbeat_disable(data.engine, &heartbeat);
+ engine_heartbeat_disable(data.engine);
for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
struct intel_context *tmp;
@@ -5128,7 +5223,7 @@ static int live_lrc_timestamp(void *arg)
}
err:
- engine_heartbeat_enable(data.engine, heartbeat);
+ engine_heartbeat_enable(data.engine);
for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
if (!data.ce[i])
break;
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index 8831ffee2061..63f87d8608c3 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -18,6 +18,20 @@ struct live_mocs {
void *vaddr;
};
+static struct intel_context *mocs_context_create(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return ce;
+
+ /* We build large requests to read the registers from the ring */
+ ce->ring = __intel_context_ring_size(SZ_16K);
+
+ return ce;
+}
+
static int request_add_sync(struct i915_request *rq, int err)
{
i915_request_get(rq);
@@ -301,7 +315,7 @@ static int live_mocs_clean(void *arg)
for_each_engine(engine, gt, id) {
struct intel_context *ce;
- ce = intel_context_create(engine);
+ ce = mocs_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
break;
@@ -395,7 +409,7 @@ static int live_mocs_reset(void *arg)
for_each_engine(engine, gt, id) {
struct intel_context *ce;
- ce = intel_context_create(engine);
+ ce = mocs_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
break;
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring.c b/drivers/gpu/drm/i915/gt/selftest_ring.c
new file mode 100644
index 000000000000..2a8c534dc125
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_ring.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+static struct intel_ring *mock_ring(unsigned long sz)
+{
+ struct intel_ring *ring;
+
+ ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
+ if (!ring)
+ return NULL;
+
+ kref_init(&ring->ref);
+ ring->size = sz;
+ ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(sz);
+ ring->effective_size = sz;
+ ring->vaddr = (void *)(ring + 1);
+ atomic_set(&ring->pin_count, 1);
+
+ intel_ring_update_space(ring);
+
+ return ring;
+}
+
+static void mock_ring_free(struct intel_ring *ring)
+{
+ kfree(ring);
+}
+
+static int check_ring_direction(struct intel_ring *ring,
+ u32 next, u32 prev,
+ int expected)
+{
+ int result;
+
+ result = intel_ring_direction(ring, next, prev);
+ if (result < 0)
+ result = -1;
+ else if (result > 0)
+ result = 1;
+
+ if (result != expected) {
+ pr_err("intel_ring_direction(%u, %u):%d != %d\n",
+ next, prev, result, expected);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int check_ring_step(struct intel_ring *ring, u32 x, u32 step)
+{
+ u32 prev = x, next = intel_ring_wrap(ring, x + step);
+ int err = 0;
+
+ err |= check_ring_direction(ring, next, next, 0);
+ err |= check_ring_direction(ring, prev, prev, 0);
+ err |= check_ring_direction(ring, next, prev, 1);
+ err |= check_ring_direction(ring, prev, next, -1);
+
+ return err;
+}
+
+static int check_ring_offset(struct intel_ring *ring, u32 x, u32 step)
+{
+ int err = 0;
+
+ err |= check_ring_step(ring, x, step);
+ err |= check_ring_step(ring, intel_ring_wrap(ring, x + 1), step);
+ err |= check_ring_step(ring, intel_ring_wrap(ring, x - 1), step);
+
+ return err;
+}
+
+static int igt_ring_direction(void *dummy)
+{
+ struct intel_ring *ring;
+ unsigned int half = 2048;
+ int step, err = 0;
+
+ ring = mock_ring(2 * half);
+ if (!ring)
+ return -ENOMEM;
+
+ GEM_BUG_ON(ring->size != 2 * half);
+
+ /* Precision of wrap detection is limited to ring->size / 2 */
+ for (step = 1; step < half; step <<= 1) {
+ err |= check_ring_offset(ring, 0, step);
+ err |= check_ring_offset(ring, half, step);
+ }
+ err |= check_ring_step(ring, 0, half - 64);
+
+ /* And check unwrapped handling for good measure */
+ err |= check_ring_offset(ring, 0, 2 * half + 64);
+ err |= check_ring_offset(ring, 3 * half, 1);
+
+ mock_ring_free(ring);
+ return err;
+}
+
+int intel_ring_mock_selftests(void)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(igt_ring_direction),
+ };
+
+ return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 6275d69aa9cc..5049c3dd08a6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -20,24 +20,20 @@
/* Try to isolate the impact of cstates from determing frequency response */
#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
-static unsigned long engine_heartbeat_disable(struct intel_engine_cs *engine)
+static void engine_heartbeat_disable(struct intel_engine_cs *engine)
{
- unsigned long old;
-
- old = fetch_and_zero(&engine->props.heartbeat_interval_ms);
+ engine->props.heartbeat_interval_ms = 0;
intel_engine_pm_get(engine);
intel_engine_park_heartbeat(engine);
-
- return old;
}
-static void engine_heartbeat_enable(struct intel_engine_cs *engine,
- unsigned long saved)
+static void engine_heartbeat_enable(struct intel_engine_cs *engine)
{
intel_engine_pm_put(engine);
- engine->props.heartbeat_interval_ms = saved;
+ engine->props.heartbeat_interval_ms =
+ engine->defaults.heartbeat_interval_ms;
}
static void dummy_rps_work(struct work_struct *wrk)
@@ -246,7 +242,6 @@ int live_rps_clock_interval(void *arg)
intel_gt_check_clock_frequency(gt);
for_each_engine(engine, gt, id) {
- unsigned long saved_heartbeat;
struct i915_request *rq;
u32 cycles;
u64 dt;
@@ -254,13 +249,13 @@ int live_rps_clock_interval(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
- saved_heartbeat = engine_heartbeat_disable(engine);
+ engine_heartbeat_disable(engine);
rq = igt_spinner_create_request(&spin,
engine->kernel_context,
MI_NOOP);
if (IS_ERR(rq)) {
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
err = PTR_ERR(rq);
break;
}
@@ -271,7 +266,7 @@ int live_rps_clock_interval(void *arg)
pr_err("%s: RPS spinner did not start\n",
engine->name);
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
intel_gt_set_wedged(engine->gt);
err = -EIO;
break;
@@ -327,7 +322,7 @@ int live_rps_clock_interval(void *arg)
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
if (err == 0) {
u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
@@ -405,7 +400,6 @@ int live_rps_control(void *arg)
intel_gt_pm_get(gt);
for_each_engine(engine, gt, id) {
- unsigned long saved_heartbeat;
struct i915_request *rq;
ktime_t min_dt, max_dt;
int f, limit;
@@ -414,7 +408,7 @@ int live_rps_control(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
- saved_heartbeat = engine_heartbeat_disable(engine);
+ engine_heartbeat_disable(engine);
rq = igt_spinner_create_request(&spin,
engine->kernel_context,
@@ -430,7 +424,7 @@ int live_rps_control(void *arg)
pr_err("%s: RPS spinner did not start\n",
engine->name);
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
intel_gt_set_wedged(engine->gt);
err = -EIO;
break;
@@ -440,7 +434,7 @@ int live_rps_control(void *arg)
pr_err("%s: could not set minimum frequency [%x], only %x!\n",
engine->name, rps->min_freq, read_cagf(rps));
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
show_pstate_limits(rps);
err = -EINVAL;
break;
@@ -457,7 +451,7 @@ int live_rps_control(void *arg)
pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
engine->name, rps->min_freq, read_cagf(rps));
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
show_pstate_limits(rps);
err = -EINVAL;
break;
@@ -472,7 +466,7 @@ int live_rps_control(void *arg)
min_dt = ktime_sub(ktime_get(), min_dt);
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
engine->name,
@@ -635,7 +629,6 @@ int live_rps_frequency_cs(void *arg)
rps->work.func = dummy_rps_work;
for_each_engine(engine, gt, id) {
- unsigned long saved_heartbeat;
struct i915_request *rq;
struct i915_vma *vma;
u32 *cancel, *cntr;
@@ -644,14 +637,14 @@ int live_rps_frequency_cs(void *arg)
int freq;
} min, max;
- saved_heartbeat = engine_heartbeat_disable(engine);
+ engine_heartbeat_disable(engine);
vma = create_spin_counter(engine,
engine->kernel_context->vm, false,
&cancel, &cntr);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
break;
}
@@ -732,7 +725,7 @@ err_vma:
i915_vma_unpin(vma);
i915_vma_put(vma);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
if (igt_flush_test(gt->i915))
err = -EIO;
if (err)
@@ -778,7 +771,6 @@ int live_rps_frequency_srm(void *arg)
rps->work.func = dummy_rps_work;
for_each_engine(engine, gt, id) {
- unsigned long saved_heartbeat;
struct i915_request *rq;
struct i915_vma *vma;
u32 *cancel, *cntr;
@@ -787,14 +779,14 @@ int live_rps_frequency_srm(void *arg)
int freq;
} min, max;
- saved_heartbeat = engine_heartbeat_disable(engine);
+ engine_heartbeat_disable(engine);
vma = create_spin_counter(engine,
engine->kernel_context->vm, true,
&cancel, &cntr);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
break;
}
@@ -874,7 +866,7 @@ err_vma:
i915_vma_unpin(vma);
i915_vma_put(vma);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
if (igt_flush_test(gt->i915))
err = -EIO;
if (err)
@@ -1066,16 +1058,14 @@ int live_rps_interrupt(void *arg)
for_each_engine(engine, gt, id) {
/* Keep the engine busy with a spinner; expect an UP! */
if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
- unsigned long saved_heartbeat;
-
intel_gt_pm_wait_for_idle(engine->gt);
GEM_BUG_ON(intel_rps_is_active(rps));
- saved_heartbeat = engine_heartbeat_disable(engine);
+ engine_heartbeat_disable(engine);
err = __rps_up_interrupt(rps, engine, &spin);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
if (err)
goto out;
@@ -1084,15 +1074,13 @@ int live_rps_interrupt(void *arg)
/* Keep the engine awake but idle and check for DOWN */
if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
- unsigned long saved_heartbeat;
-
- saved_heartbeat = engine_heartbeat_disable(engine);
+ engine_heartbeat_disable(engine);
intel_rc6_disable(&gt->rc6);
err = __rps_down_interrupt(rps, engine);
intel_rc6_enable(&gt->rc6);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
if (err)
goto out;
}
@@ -1168,7 +1156,6 @@ int live_rps_power(void *arg)
rps->work.func = dummy_rps_work;
for_each_engine(engine, gt, id) {
- unsigned long saved_heartbeat;
struct i915_request *rq;
struct {
u64 power;
@@ -1178,13 +1165,13 @@ int live_rps_power(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
- saved_heartbeat = engine_heartbeat_disable(engine);
+ engine_heartbeat_disable(engine);
rq = igt_spinner_create_request(&spin,
engine->kernel_context,
MI_NOOP);
if (IS_ERR(rq)) {
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
err = PTR_ERR(rq);
break;
}
@@ -1195,7 +1182,7 @@ int live_rps_power(void *arg)
pr_err("%s: RPS spinner did not start\n",
engine->name);
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
intel_gt_set_wedged(engine->gt);
err = -EIO;
break;
@@ -1208,7 +1195,7 @@ int live_rps_power(void *arg)
min.power = measure_power_at(rps, &min.freq);
igt_spinner_end(&spin);
- engine_heartbeat_enable(engine, saved_heartbeat);
+ engine_heartbeat_enable(engine);
pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
engine->name,
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index c2578a0f2f14..ef1c35073dc0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -751,22 +751,20 @@ out_free:
return err;
}
-static void engine_heartbeat_disable(struct intel_engine_cs *engine,
- unsigned long *saved)
+static void engine_heartbeat_disable(struct intel_engine_cs *engine)
{
- *saved = engine->props.heartbeat_interval_ms;
engine->props.heartbeat_interval_ms = 0;
intel_engine_pm_get(engine);
intel_engine_park_heartbeat(engine);
}
-static void engine_heartbeat_enable(struct intel_engine_cs *engine,
- unsigned long saved)
+static void engine_heartbeat_enable(struct intel_engine_cs *engine)
{
intel_engine_pm_put(engine);
- engine->props.heartbeat_interval_ms = saved;
+ engine->props.heartbeat_interval_ms =
+ engine->defaults.heartbeat_interval_ms;
}
static int live_hwsp_rollover_kernel(void *arg)
@@ -785,10 +783,9 @@ static int live_hwsp_rollover_kernel(void *arg)
struct intel_context *ce = engine->kernel_context;
struct intel_timeline *tl = ce->timeline;
struct i915_request *rq[3] = {};
- unsigned long heartbeat;
int i;
- engine_heartbeat_disable(engine, &heartbeat);
+ engine_heartbeat_disable(engine);
if (intel_gt_wait_for_idle(gt, HZ / 2)) {
err = -EIO;
goto out;
@@ -839,7 +836,7 @@ static int live_hwsp_rollover_kernel(void *arg)
out:
for (i = 0; i < ARRAY_SIZE(rq); i++)
i915_request_put(rq[i]);
- engine_heartbeat_enable(engine, heartbeat);
+ engine_heartbeat_enable(engine);
if (err)
break;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 5ed323254ee1..32785463ec9e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -623,6 +623,8 @@ err_request:
err = -EINVAL;
goto out_unpin;
}
+ } else {
+ rsvd = 0;
}
expect = results[0];
diff --git a/drivers/gpu/drm/i915/gt/shaders/README b/drivers/gpu/drm/i915/gt/shaders/README
new file mode 100644
index 000000000000..e7e96d7073c7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shaders/README
@@ -0,0 +1,46 @@
+ASM sources for auto generated shaders
+======================================
+
+The i915/gt/hsw_clear_kernel.c and i915/gt/ivb_clear_kernel.c files contain
+pre-compiled batch chunks that will clear any residual render cache during
+context switch.
+
+They are generated from their respective platform ASM files present on
+i915/gt/shaders/clear_kernel directory.
+
+The generated .c files should never be modified directly. Instead, any modification
+needs to be done on the on their respective ASM files and build instructions below
+needes to be followed.
+
+Building
+========
+
+Environment
+-----------
+
+IGT GPU tool scripts and the Mesa's i965 instruction assembler tool are used
+on building.
+
+Please make sure your Mesa tool is compiled with "-Dtools=intel" and
+"-Ddri-drivers=i965", and run this script from IGT source root directory"
+
+The instructions bellow assume:
+ * IGT gpu tools source code is located on your home directory (~) as ~/igt
+ * Mesa source code is located on your home directory (~) as ~/mesa
+ and built under the ~/mesa/build directory
+ * Linux kernel source code is under your home directory (~) as ~/linux
+
+Instructions
+------------
+
+~ $ cp ~/linux/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm \
+ ~/igt/lib/i915/shaders/clear_kernel/ivb.asm
+~ $ cd ~/igt
+igt $ ./scripts/generate_clear_kernel.sh -g ivb \
+ -m ~/mesa/build/src/intel/tools/i965_asm
+
+~ $ cp ~/linux/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm \
+ ~/igt/lib/i915/shaders/clear_kernel/hsw.asm
+~ $ cd ~/igt
+igt $ ./scripts/generate_clear_kernel.sh -g hsw \
+ -m ~/mesa/build/src/intel/tools/i965_asm \ No newline at end of file
diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm
new file mode 100644
index 000000000000..5fdf384bb621
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/hsw.asm
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+/*
+ * Kernel for PAVP buffer clear.
+ *
+ * 1. Clear all 64 GRF registers assigned to the kernel with designated value;
+ * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears
+ * 512 bytes of Render Cache.
+ */
+
+/* Store designated "clear GRF" value */
+mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
+
+/**
+ * Curbe Format
+ *
+ * DW 1.0 - Block Offset to write Render Cache
+ * DW 1.1 [15:0] - Clear Word
+ * DW 1.2 - Delay iterations
+ * DW 1.3 - Enable Instrumentation (only for debug)
+ * DW 1.4 - Rsvd (intended for context ID)
+ * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount
+ * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count)
+ * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count)
+ *
+ * Binding Table
+ *
+ * BTI 0: 2D Surface to help clear L3 (Render/Data Cache)
+ * BTI 1: Wait/Instrumentation Buffer
+ * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT)
+ * Expected to be initialized to 0 by driver/another kernel
+ * Layout:
+ * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS]
+ * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N
+ */
+add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */
+cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N };
+(+f0.0) jmpi(1) 352D { align1 WE_all 1N };
+
+/**
+ * State Register has info on where this thread is running
+ * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
+ * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
+ */
+mov(8) g3<1>UD 0x00000000UD { align1 1Q };
+shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N };
+and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */
+shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N };
+and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */
+mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N };
+add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */
+shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N };
+and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */
+mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N };
+add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */
+
+mov(8) g5<1>UD 0x00000000UD { align1 1Q };
+and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N };
+mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N };
+
+mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */
+mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */
+mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */
+mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */
+and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N };
+
+/* Media block read to fetch current value at specified location in instrumentation buffer */
+sendc(8) g5<1>UD g4<8,8,1>F 0x02190001
+
+ render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q };
+add(1) g5<1>D g5<0,1,0>D 1D { align1 1N };
+
+/* Media block write for updated value at specified location in instrumentation buffer */
+sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q };
+
+/* Delay thread for specified parameter */
+add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N };
+(+f0.0) jmpi(1) -32D { align1 WE_all 1N };
+
+/* Store designated "clear GRF" value */
+mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
+
+/* Initialize looping parameters */
+mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */
+mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */
+
+/* Write 32x16 all "0" block */
+mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q };
+mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q };
+mov(2) g2<1>UD g1<2,2,1>UW { align1 1N };
+mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */
+and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N };
+mov(16) g3<1>UD 0x00000000UD { align1 1H };
+mov(16) g4<1>UD 0x00000000UD { align1 1H };
+mov(16) g5<1>UD 0x00000000UD { align1 1H };
+mov(16) g6<1>UD 0x00000000UD { align1 1H };
+mov(16) g7<1>UD 0x00000000UD { align1 1H };
+mov(16) g8<1>UD 0x00000000UD { align1 1H };
+mov(16) g9<1>UD 0x00000000UD { align1 1H };
+mov(16) g10<1>UD 0x00000000UD { align1 1H };
+sendc(8) null<1>UD g2<8,8,1>F 0x120a8000
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
+add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N };
+sendc(8) null<1>UD g2<8,8,1>F 0x120a8000
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
+
+/* Now, clear all GRF registers */
+add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N };
+mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H };
+add(1) a0<1>D a0<0,1,0>D 32D { align1 1N };
+(+f0.0) jmpi(1) -64D { align1 WE_all 1N };
+
+/* Terminante the thread */
+sendc(8) null<1>UD g127<8,8,1>F 0x82000010
+ thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT };
diff --git a/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm
new file mode 100644
index 000000000000..97c7ac9e3854
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+/*
+ * Kernel for PAVP buffer clear.
+ *
+ * 1. Clear all 64 GRF registers assigned to the kernel with designated value;
+ * 2. Write 32x16 block of all "0" to render target buffer which indirectly clears
+ * 512 bytes of Render Cache.
+ */
+
+/* Store designated "clear GRF" value */
+mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
+
+/**
+ * Curbe Format
+ *
+ * DW 1.0 - Block Offset to write Render Cache
+ * DW 1.1 [15:0] - Clear Word
+ * DW 1.2 - Delay iterations
+ * DW 1.3 - Enable Instrumentation (only for debug)
+ * DW 1.4 - Rsvd (intended for context ID)
+ * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount
+ * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count)
+ * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count)
+ *
+ * Binding Table
+ *
+ * BTI 0: 2D Surface to help clear L3 (Render/Data Cache)
+ * BTI 1: Wait/Instrumentation Buffer
+ * Size : (SliceCount * SubSliceCount * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT)
+ * Expected to be initialized to 0 by driver/another kernel
+ * Layout :
+ * RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS]
+ * Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N
+ */
+add(1) g1.2<1>UD g1.2<0,1,0>UD 0x00000001UD { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */
+cmp.z.f0.0(1) null<1>UD g1.3<0,1,0>UD 0x00000000UD { align1 1N };
+(+f0.0) jmpi(1) 44D { align1 WE_all 1N };
+
+/**
+ * State Register has info on where this thread is running
+ * IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
+ * HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
+ */
+mov(8) g3<1>UD 0x00000000UD { align1 1Q };
+shr(1) g3<1>D sr0<0,1,0>D 12D { align1 1N };
+and(1) g3<1>D g3<0,1,0>D 1D { align1 1N }; /* g3 has HSID */
+shr(1) g3.1<1>D sr0<0,1,0>D 13D { align1 1N };
+and(1) g3.1<1>D g3.1<0,1,0>D 3D { align1 1N }; /* g3.1 has sliceID */
+mul(1) g3.5<1>D g3.1<0,1,0>D g1.10<0,1,0>UW { align1 1N };
+add(1) g3<1>D g3<0,1,0>D g3.5<0,1,0>D { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */
+shr(1) g3.2<1>D sr0<0,1,0>D 8D { align1 1N };
+and(1) g3.2<1>D g3.2<0,1,0>D 15D { align1 1N }; /* g3.2 = EUID */
+mul(1) g3.4<1>D g3<0,1,0>D 16D { align1 1N };
+add(1) g3.2<1>D g3.2<0,1,0>D g3.4<0,1,0>D { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address ) in instrumentation surf */
+
+mov(8) g5<1>UD 0x00000000UD { align1 1Q };
+and(1) g3.3<1>D sr0<0,1,0>D 7D { align1 1N };
+mul(1) g3.3<1>D g3.3<0,1,0>D 4D { align1 1N };
+
+mov(8) g4<1>UD g0<8,8,1>UD { align1 1Q }; /* Initialize message header with g0 */
+mov(1) g4<1>UD g3.3<0,1,0>UD { align1 1N }; /* Block offset */
+mov(1) g4.1<1>UD g3.2<0,1,0>UD { align1 1N }; /* Block offset */
+mov(1) g4.2<1>UD 0x00000003UD { align1 1N }; /* Block size (1 row x 4 bytes) */
+and(1) g4.3<1>UD g4.3<0,1,0>UW 0xffffffffUD { align1 1N };
+
+/* Media block read to fetch current value at specified location in instrumentation buffer */
+sendc(8) g5<1>UD g4<8,8,1>F 0x02190001
+ render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q };
+add(1) g5<1>D g5<0,1,0>D 1D { align1 1N };
+
+/* Media block write for updated value at specified location in instrumentation buffer */
+sendc(8) g5<1>UD g4<8,8,1>F 0x040a8001
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q };
+/* Delay thread for specified parameter */
+add.nz.f0.0(1) g1.2<1>UD g1.2<0,1,0>UD -1D { align1 1N };
+(+f0.0) jmpi(1) -4D { align1 WE_all 1N };
+
+/* Store designated "clear GRF" value */
+mov(1) f0.1<1>UW g1.2<0,1,0>UW { align1 1N };
+
+/* Initialize looping parameters */
+mov(1) a0<1>D 0D { align1 1N }; /* Initialize a0.0:w=0 */
+mov(1) a0.4<1>W 127W { align1 1N }; /* Loop count. Each loop contains 16 GRF's */
+
+/* Write 32x16 all "0" block */
+mov(8) g2<1>UD g0<8,8,1>UD { align1 1Q };
+mov(8) g127<1>UD g0<8,8,1>UD { align1 1Q };
+mov(2) g2<1>UD g1<2,2,1>UW { align1 1N };
+mov(1) g2.2<1>UD 0x000f000fUD { align1 1N }; /* Block size (16x16) */
+and(1) g2.3<1>UD g2.3<0,1,0>UW 0xffffffefUD { align1 1N };
+mov(16) g3<1>UD 0x00000000UD { align1 1H };
+mov(16) g4<1>UD 0x00000000UD { align1 1H };
+mov(16) g5<1>UD 0x00000000UD { align1 1H };
+mov(16) g6<1>UD 0x00000000UD { align1 1H };
+mov(16) g7<1>UD 0x00000000UD { align1 1H };
+mov(16) g8<1>UD 0x00000000UD { align1 1H };
+mov(16) g9<1>UD 0x00000000UD { align1 1H };
+mov(16) g10<1>UD 0x00000000UD { align1 1H };
+sendc(8) null<1>UD g2<8,8,1>F 0x120a8000
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
+add(1) g2<1>UD g1<0,1,0>UW 0x0010UW { align1 1N };
+sendc(8) null<1>UD g2<8,8,1>F 0x120a8000
+ render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
+
+/* Now, clear all GRF registers */
+add.nz.f0.0(1) a0.4<1>W a0.4<0,1,0>W -1W { align1 1N };
+mov(16) g[a0]<1>UW f0.1<0,1,0>UW { align1 1H };
+add(1) a0<1>D a0<0,1,0>D 32D { align1 1N };
+(+f0.0) jmpi(1) -8D { align1 WE_all 1N };
+
+/* Terminante the thread */
+sendc(8) null<1>UD g127<8,8,1>F 0x82000010
+ thread_spawner MsgDesc: mlen 1 rlen 0 { align1 1Q EOT };
diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c
index ec47d4114554..62e6a14ad58e 100644
--- a/drivers/gpu/drm/i915/gvt/debugfs.c
+++ b/drivers/gpu/drm/i915/gvt/debugfs.c
@@ -66,7 +66,7 @@ static inline int mmio_diff_handler(struct intel_gvt *gvt,
vreg = vgpu_vreg(param->vgpu, offset);
if (preg != vreg) {
- node = kmalloc(sizeof(*node), GFP_KERNEL);
+ node = kmalloc(sizeof(*node), GFP_ATOMIC);
if (!node)
return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 3e88e3b5c43a..fadd2adb8030 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1726,13 +1726,13 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
(*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(2);
write_vreg(vgpu, offset, p_data, bytes);
- if (data & _MASKED_BIT_ENABLE(1)) {
+ if (IS_MASKED_BITS_ENABLED(data, 1)) {
enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
return 0;
}
if (IS_COFFEELAKE(vgpu->gvt->gt->i915) &&
- data & _MASKED_BIT_ENABLE(2)) {
+ IS_MASKED_BITS_ENABLED(data, 2)) {
enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
return 0;
}
@@ -1741,14 +1741,14 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
* pvinfo, if not, we will treat this guest as non-gvtg-aware
* guest, and stop emulating its cfg space, mmio, gtt, etc.
*/
- if (((data & _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)) ||
- (data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)))
- && !vgpu->pv_notified) {
+ if ((IS_MASKED_BITS_ENABLED(data, GFX_PPGTT_ENABLE) ||
+ IS_MASKED_BITS_ENABLED(data, GFX_RUN_LIST_ENABLE)) &&
+ !vgpu->pv_notified) {
enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
return 0;
}
- if ((data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE))
- || (data & _MASKED_BIT_DISABLE(GFX_RUN_LIST_ENABLE))) {
+ if (IS_MASKED_BITS_ENABLED(data, GFX_RUN_LIST_ENABLE) ||
+ IS_MASKED_BITS_DISABLED(data, GFX_RUN_LIST_ENABLE)) {
enable_execlist = !!(data & GFX_RUN_LIST_ENABLE);
gvt_dbg_core("EXECLIST %s on ring %s\n",
@@ -1809,7 +1809,7 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
write_vreg(vgpu, offset, p_data, bytes);
data = vgpu_vreg(vgpu, offset);
- if (data & _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET))
+ if (IS_MASKED_BITS_ENABLED(data, RESET_CTL_REQUEST_RESET))
data |= RESET_CTL_READY_TO_RESET;
else if (data & _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET))
data &= ~RESET_CTL_READY_TO_RESET;
@@ -1827,7 +1827,8 @@ static int csfe_chicken1_mmio_write(struct intel_vgpu *vgpu,
(*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(0x18);
write_vreg(vgpu, offset, p_data, bytes);
- if (data & _MASKED_BIT_ENABLE(0x10) || data & _MASKED_BIT_ENABLE(0x8))
+ if (IS_MASKED_BITS_ENABLED(data, 0x10) ||
+ IS_MASKED_BITS_ENABLED(data, 0x8))
enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
return 0;
@@ -3055,6 +3056,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
MMIO_D(_MMIO(0x72380), D_SKL_PLUS);
MMIO_D(_MMIO(0x7239c), D_SKL_PLUS);
MMIO_D(_MMIO(_PLANE_SURF_3_A), D_SKL_PLUS);
+ MMIO_D(_MMIO(_PLANE_SURF_3_B), D_SKL_PLUS);
MMIO_D(CSR_SSP_BASE, D_SKL_PLUS);
MMIO_D(CSR_HTP_SKL, D_SKL_PLUS);
@@ -3131,8 +3133,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
MMIO_DFH(GEN9_WM_CHICKEN3, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS,
NULL, NULL);
- MMIO_D(GAMT_CHKN_BIT_REG, D_KBL);
- MMIO_D(GEN9_CTX_PREEMPT_REG, D_KBL | D_SKL);
+ MMIO_D(GAMT_CHKN_BIT_REG, D_KBL | D_CFL);
+ MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h
index 970704b18f23..3b25e7fe32f6 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.h
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.h
@@ -54,8 +54,8 @@ bool is_inhibit_context(struct intel_context *ce);
int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu,
struct i915_request *req);
-#define IS_RESTORE_INHIBIT(a) \
- (_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) == \
- ((a) & _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT)))
+
+#define IS_RESTORE_INHIBIT(a) \
+ IS_MASKED_BITS_ENABLED(a, CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT)
#endif
diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h
index 5b66e14c5b7b..b88e033cbed4 100644
--- a/drivers/gpu/drm/i915/gvt/reg.h
+++ b/drivers/gpu/drm/i915/gvt/reg.h
@@ -94,6 +94,11 @@
#define GFX_MODE_BIT_SET_IN_MASK(val, bit) \
((((bit) & 0xffff0000) == 0) && !!((val) & (((bit) << 16))))
+#define IS_MASKED_BITS_ENABLED(_val, _b) \
+ (((_val) & _MASKED_BIT_ENABLE(_b)) == _MASKED_BIT_ENABLE(_b))
+#define IS_MASKED_BITS_DISABLED(_val, _b) \
+ ((_val) & _MASKED_BIT_DISABLE(_b))
+
#define FORCEWAKE_RENDER_GEN9_REG 0xa278
#define FORCEWAKE_ACK_RENDER_GEN9_REG 0x0D84
#define FORCEWAKE_BLITTER_GEN9_REG 0xa188
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4dc601dffc08..284cf078135a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3125,6 +3125,7 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv)
val = I915_READ(GEN11_DE_HPD_IMR);
val &= ~hotplug_irqs;
+ val |= ~enabled_irqs & hotplug_irqs;
I915_WRITE(GEN11_DE_HPD_IMR, val);
POSTING_READ(GEN11_DE_HPD_IMR);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index e991a707bdb7..962ded9ce73f 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -269,12 +269,48 @@ static bool exclusive_mmio_access(const struct drm_i915_private *i915)
return IS_GEN(i915, 7);
}
+static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns)
+{
+ struct intel_engine_pmu *pmu = &engine->pmu;
+ bool busy;
+ u32 val;
+
+ val = ENGINE_READ_FW(engine, RING_CTL);
+ if (val == 0) /* powerwell off => engine idle */
+ return;
+
+ if (val & RING_WAIT)
+ add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
+ if (val & RING_WAIT_SEMAPHORE)
+ add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
+
+ /* No need to sample when busy stats are supported. */
+ if (intel_engine_supports_stats(engine))
+ return;
+
+ /*
+ * While waiting on a semaphore or event, MI_MODE reports the
+ * ring as idle. However, previously using the seqno, and with
+ * execlists sampling, we account for the ring waiting as the
+ * engine being busy. Therefore, we record the sample as being
+ * busy if either waiting or !idle.
+ */
+ busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
+ if (!busy) {
+ val = ENGINE_READ_FW(engine, RING_MI_MODE);
+ busy = !(val & MODE_IDLE);
+ }
+ if (busy)
+ add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
+}
+
static void
engines_sample(struct intel_gt *gt, unsigned int period_ns)
{
struct drm_i915_private *i915 = gt->i915;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ unsigned long flags;
if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
return;
@@ -283,53 +319,17 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
return;
for_each_engine(engine, gt, id) {
- struct intel_engine_pmu *pmu = &engine->pmu;
- spinlock_t *mmio_lock;
- unsigned long flags;
- bool busy;
- u32 val;
-
if (!intel_engine_pm_get_if_awake(engine))
continue;
- mmio_lock = NULL;
- if (exclusive_mmio_access(i915))
- mmio_lock = &engine->uncore->lock;
-
- if (unlikely(mmio_lock))
- spin_lock_irqsave(mmio_lock, flags);
-
- val = ENGINE_READ_FW(engine, RING_CTL);
- if (val == 0) /* powerwell off => engine idle */
- goto skip;
-
- if (val & RING_WAIT)
- add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns);
- if (val & RING_WAIT_SEMAPHORE)
- add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns);
-
- /* No need to sample when busy stats are supported. */
- if (intel_engine_supports_stats(engine))
- goto skip;
-
- /*
- * While waiting on a semaphore or event, MI_MODE reports the
- * ring as idle. However, previously using the seqno, and with
- * execlists sampling, we account for the ring waiting as the
- * engine being busy. Therefore, we record the sample as being
- * busy if either waiting or !idle.
- */
- busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT);
- if (!busy) {
- val = ENGINE_READ_FW(engine, RING_MI_MODE);
- busy = !(val & MODE_IDLE);
+ if (exclusive_mmio_access(i915)) {
+ spin_lock_irqsave(&engine->uncore->lock, flags);
+ engine_sample(engine, period_ns);
+ spin_unlock_irqrestore(&engine->uncore->lock, flags);
+ } else {
+ engine_sample(engine, period_ns);
}
- if (busy)
- add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
-skip:
- if (unlikely(mmio_lock))
- spin_unlock_irqrestore(mmio_lock, flags);
intel_engine_pm_put_async(engine);
}
}
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index 5003a71113cb..8aa7866ec6b6 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -42,7 +42,7 @@ enum {
* active request.
*/
#define I915_PRIORITY_UNPREEMPTABLE INT_MAX
-#define I915_PRIORITY_BARRIER INT_MAX
+#define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1)
struct i915_priolist {
struct list_head requests[I915_PRIORITY_COUNT];
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 7717581350bd..06cd1d28a176 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7896,7 +7896,7 @@ enum {
/* GEN7 chicken */
#define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010)
- #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1 << 10) | (1 << 26))
+ #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC (1 << 10)
#define GEN9_RHWO_OPTIMIZATION_DISABLE (1 << 14)
#define COMMON_SLICE_CHICKEN2 _MMIO(0x7014)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 696491d71a1d..07f663cd2d1c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6830,16 +6830,6 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
I915_WRITE(ILK_DISPLAY_CHICKEN2,
I915_READ(ILK_DISPLAY_CHICKEN2) |
ILK_ELPIN_409_SELECT);
- I915_WRITE(_3D_CHICKEN2,
- _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
- _3D_CHICKEN2_WM_READ_PIPELINED);
-
- /* WaDisableRenderCachePipelinedFlush:ilk */
- I915_WRITE(CACHE_MODE_0,
- _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
-
- /* WaDisable_RenderCache_OperationalFlush:ilk */
- I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
g4x_disable_trickle_feed(dev_priv);
@@ -6902,27 +6892,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
I915_READ(ILK_DISPLAY_CHICKEN2) |
ILK_ELPIN_409_SELECT);
- /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
- I915_WRITE(_3D_CHICKEN,
- _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
-
- /* WaDisable_RenderCache_OperationalFlush:snb */
- I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
-
- /*
- * BSpec recoomends 8x4 when MSAA is used,
- * however in practice 16x4 seems fastest.
- *
- * Note that PS/WM thread counts depend on the WIZ hashing
- * disable bit, which we don't touch here, but it's good
- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
- */
- I915_WRITE(GEN6_GT_MODE,
- _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
-
- I915_WRITE(CACHE_MODE_0,
- _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
-
I915_WRITE(GEN6_UCGCTL1,
I915_READ(GEN6_UCGCTL1) |
GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
@@ -6945,18 +6914,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
- /* WaStripsFansDisableFastClipPerformanceFix:snb */
- I915_WRITE(_3D_CHICKEN3,
- _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
-
- /*
- * Bspec says:
- * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
- * 3DSTATE_SF number of SF output attributes is more than 16."
- */
- I915_WRITE(_3D_CHICKEN3,
- _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
-
/*
* According to the spec the following bits should be
* set in order to enable memory self-refresh and fbc:
@@ -6986,24 +6943,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
gen6_check_mch_setup(dev_priv);
}
-static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
-{
- u32 reg = I915_READ(GEN7_FF_THREAD_MODE);
-
- /*
- * WaVSThreadDispatchOverride:ivb,vlv
- *
- * This actually overrides the dispatch
- * mode for all thread types.
- */
- reg &= ~GEN7_FF_SCHED_MASK;
- reg |= GEN7_FF_TS_SCHED_HW;
- reg |= GEN7_FF_VS_SCHED_HW;
- reg |= GEN7_FF_DS_SCHED_HW;
-
- I915_WRITE(GEN7_FF_THREAD_MODE, reg);
-}
-
static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
{
/*
@@ -7230,45 +7169,10 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
{
- /* L3 caching of data atomics doesn't work -- disable it. */
- I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
- I915_WRITE(HSW_ROW_CHICKEN3,
- _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
-
/* This is required by WaCatErrorRejectionIssue:hsw */
I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
- I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
- GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
-
- /* WaVSRefCountFullforceMissDisable:hsw */
- I915_WRITE(GEN7_FF_THREAD_MODE,
- I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
-
- /* WaDisable_RenderCache_OperationalFlush:hsw */
- I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
-
- /* enable HiZ Raw Stall Optimization */
- I915_WRITE(CACHE_MODE_0_GEN7,
- _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
-
- /* WaDisable4x2SubspanOptimization:hsw */
- I915_WRITE(CACHE_MODE_1,
- _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
-
- /*
- * BSpec recommends 8x4 when MSAA is used,
- * however in practice 16x4 seems fastest.
- *
- * Note that PS/WM thread counts depend on the WIZ hashing
- * disable bit, which we don't touch here, but it's good
- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
- */
- I915_WRITE(GEN7_GT_MODE,
- _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
-
- /* WaSampleCChickenBitEnable:hsw */
- I915_WRITE(HALF_SLICE_CHICKEN3,
- _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
+ I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
+ GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
/* WaSwitchSolVfFArbitrationPriority:hsw */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
@@ -7282,32 +7186,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
- /* WaDisableEarlyCull:ivb */
- I915_WRITE(_3D_CHICKEN3,
- _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
-
/* WaDisableBackToBackFlipFix:ivb */
I915_WRITE(IVB_CHICKEN3,
CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
CHICKEN3_DGMG_DONE_FIX_DISABLE);
- /* WaDisablePSDDualDispatchEnable:ivb */
- if (IS_IVB_GT1(dev_priv))
- I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
- _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
-
- /* WaDisable_RenderCache_OperationalFlush:ivb */
- I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
-
- /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
- I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
- GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
-
- /* WaApplyL3ControlAndL3ChickenMode:ivb */
- I915_WRITE(GEN7_L3CNTLREG1,
- GEN7_WA_FOR_GEN7_L3_CONTROL);
- I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
- GEN7_WA_L3_CHICKEN_MODE);
if (IS_IVB_GT1(dev_priv))
I915_WRITE(GEN7_ROW_CHICKEN2,
_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
@@ -7319,10 +7202,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
}
- /* WaForceL3Serialization:ivb */
- I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
- ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
-
/*
* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
* This implements the WaDisableRCZUnitClockGating:ivb workaround.
@@ -7337,29 +7216,6 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
g4x_disable_trickle_feed(dev_priv);
- gen7_setup_fixed_func_scheduler(dev_priv);
-
- if (0) { /* causes HiZ corruption on ivb:gt1 */
- /* enable HiZ Raw Stall Optimization */
- I915_WRITE(CACHE_MODE_0_GEN7,
- _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
- }
-
- /* WaDisable4x2SubspanOptimization:ivb */
- I915_WRITE(CACHE_MODE_1,
- _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
-
- /*
- * BSpec recommends 8x4 when MSAA is used,
- * however in practice 16x4 seems fastest.
- *
- * Note that PS/WM thread counts depend on the WIZ hashing
- * disable bit, which we don't touch here, but it's good
- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
- */
- I915_WRITE(GEN7_GT_MODE,
- _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
-
snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
snpcr &= ~GEN6_MBC_SNPCR_MASK;
snpcr |= GEN6_MBC_SNPCR_MED;
@@ -7373,28 +7229,11 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
{
- /* WaDisableEarlyCull:vlv */
- I915_WRITE(_3D_CHICKEN3,
- _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
-
/* WaDisableBackToBackFlipFix:vlv */
I915_WRITE(IVB_CHICKEN3,
CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
CHICKEN3_DGMG_DONE_FIX_DISABLE);
- /* WaPsdDispatchEnable:vlv */
- /* WaDisablePSDDualDispatchEnable:vlv */
- I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
- _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
- GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
-
- /* WaDisable_RenderCache_OperationalFlush:vlv */
- I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
-
- /* WaForceL3Serialization:vlv */
- I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
- ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
-
/* WaDisableDopClockGating:vlv */
I915_WRITE(GEN7_ROW_CHICKEN2,
_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
@@ -7404,8 +7243,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
- gen7_setup_fixed_func_scheduler(dev_priv);
-
/*
* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
* This implements the WaDisableRCZUnitClockGating:vlv workaround.
@@ -7420,30 +7257,6 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
/*
- * BSpec says this must be set, even though
- * WaDisable4x2SubspanOptimization isn't listed for VLV.
- */
- I915_WRITE(CACHE_MODE_1,
- _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
-
- /*
- * BSpec recommends 8x4 when MSAA is used,
- * however in practice 16x4 seems fastest.
- *
- * Note that PS/WM thread counts depend on the WIZ hashing
- * disable bit, which we don't touch here, but it's good
- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
- */
- I915_WRITE(GEN7_GT_MODE,
- _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
-
- /*
- * WaIncreaseL3CreditsForVLVB0:vlv
- * This is the hardware default actually.
- */
- I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
-
- /*
* WaDisableVLVClockGating_VBIIssue:vlv
* Disable clock gating on th GCFG unit to prevent a delay
* in the reporting of vblank events.
@@ -7495,13 +7308,6 @@ static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
- /* WaDisableRenderCachePipelinedFlush */
- I915_WRITE(CACHE_MODE_0,
- _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
-
- /* WaDisable_RenderCache_OperationalFlush:g4x */
- I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
-
g4x_disable_trickle_feed(dev_priv);
}
@@ -7517,11 +7323,6 @@ static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
intel_uncore_write(uncore,
MI_ARB_STATE,
_MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
-
- /* WaDisable_RenderCache_OperationalFlush:gen4 */
- intel_uncore_write(uncore,
- CACHE_MODE_0,
- _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
}
static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -7534,9 +7335,6 @@ static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
I915_WRITE(RENCLK_GATE_D2, 0);
I915_WRITE(MI_ARB_STATE,
_MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
-
- /* WaDisable_RenderCache_OperationalFlush:gen4 */
- I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
}
static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 6a2be7d0dd95..6090ce35226b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -21,6 +21,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
selftest(scatterlist, scatterlist_mock_selftests)
selftest(syncmap, i915_syncmap_mock_selftests)
selftest(uncore, intel_uncore_mock_selftests)
+selftest(ring, intel_ring_mock_selftests)
selftest(engine, intel_engine_cs_mock_selftests)
selftest(timelines, intel_timeline_mock_selftests)
selftest(requests, i915_request_mock_selftests)
diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig
index c420f5a3d33b..aa74aac3cbcc 100644
--- a/drivers/gpu/drm/mediatek/Kconfig
+++ b/drivers/gpu/drm/mediatek/Kconfig
@@ -6,12 +6,12 @@ config DRM_MEDIATEK
depends on COMMON_CLK
depends on HAVE_ARM_SMCCC
depends on OF
+ depends on MTK_MMSYS
select DRM_GEM_CMA_HELPER
select DRM_KMS_HELPER
select DRM_MIPI_DSI
select DRM_PANEL
select MEMORY
- select MTK_MMSYS
select MTK_SMI
select VIDEOMODE_HELPERS
help
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index fe46c4bac64d..7cd8f415fd02 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -193,7 +193,6 @@ static int mtk_crtc_ddp_clk_enable(struct mtk_drm_crtc *mtk_crtc)
int ret;
int i;
- DRM_DEBUG_DRIVER("%s\n", __func__);
for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) {
ret = clk_prepare_enable(mtk_crtc->ddp_comp[i]->clk);
if (ret) {
@@ -213,7 +212,6 @@ static void mtk_crtc_ddp_clk_disable(struct mtk_drm_crtc *mtk_crtc)
{
int i;
- DRM_DEBUG_DRIVER("%s\n", __func__);
for (i = 0; i < mtk_crtc->ddp_comp_nr; i++)
clk_disable_unprepare(mtk_crtc->ddp_comp[i]->clk);
}
@@ -258,7 +256,6 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc)
int ret;
int i;
- DRM_DEBUG_DRIVER("%s\n", __func__);
if (WARN_ON(!crtc->state))
return -EINVAL;
@@ -299,7 +296,6 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc)
goto err_mutex_unprepare;
}
- DRM_DEBUG_DRIVER("mediatek_ddp_ddp_path_setup\n");
for (i = 0; i < mtk_crtc->ddp_comp_nr - 1; i++) {
mtk_mmsys_ddp_connect(mtk_crtc->mmsys_dev,
mtk_crtc->ddp_comp[i]->id,
@@ -349,7 +345,6 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc)
struct drm_crtc *crtc = &mtk_crtc->base;
int i;
- DRM_DEBUG_DRIVER("%s\n", __func__);
for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) {
mtk_ddp_comp_stop(mtk_crtc->ddp_comp[i]);
if (i == 1)
@@ -831,7 +826,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
#if IS_REACHABLE(CONFIG_MTK_CMDQ)
mtk_crtc->cmdq_client =
- cmdq_mbox_create(dev, drm_crtc_index(&mtk_crtc->base),
+ cmdq_mbox_create(mtk_crtc->mmsys_dev,
+ drm_crtc_index(&mtk_crtc->base),
2000);
if (IS_ERR(mtk_crtc->cmdq_client)) {
dev_dbg(dev, "mtk_crtc %d failed to create mailbox client, writing register by CPU now\n",
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 6bd369434d9d..040a8f393fe2 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -444,7 +444,6 @@ static int mtk_drm_probe(struct platform_device *pdev)
if (!private)
return -ENOMEM;
- private->data = of_device_get_match_data(dev);
private->mmsys_dev = dev->parent;
if (!private->mmsys_dev) {
dev_err(dev, "Failed to get MMSYS device\n");
@@ -514,7 +513,8 @@ static int mtk_drm_probe(struct platform_device *pdev)
goto err_node;
}
- ret = mtk_ddp_comp_init(dev, node, comp, comp_id, NULL);
+ ret = mtk_ddp_comp_init(dev->parent, node, comp,
+ comp_id, NULL);
if (ret) {
of_node_put(node);
goto err_node;
@@ -571,7 +571,6 @@ static int mtk_drm_sys_suspend(struct device *dev)
int ret;
ret = drm_mode_config_helper_suspend(drm);
- DRM_DEBUG_DRIVER("mtk_drm_sys_suspend\n");
return ret;
}
@@ -583,7 +582,6 @@ static int mtk_drm_sys_resume(struct device *dev)
int ret;
ret = drm_mode_config_helper_resume(drm);
- DRM_DEBUG_DRIVER("mtk_drm_sys_resume\n");
return ret;
}
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
index c2bd683a87c8..92141a19681b 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
@@ -164,6 +164,16 @@ static int mtk_plane_atomic_check(struct drm_plane *plane,
true, true);
}
+static void mtk_plane_atomic_disable(struct drm_plane *plane,
+ struct drm_plane_state *old_state)
+{
+ struct mtk_plane_state *state = to_mtk_plane_state(plane->state);
+
+ state->pending.enable = false;
+ wmb(); /* Make sure the above parameter is set before update */
+ state->pending.dirty = true;
+}
+
static void mtk_plane_atomic_update(struct drm_plane *plane,
struct drm_plane_state *old_state)
{
@@ -178,6 +188,11 @@ static void mtk_plane_atomic_update(struct drm_plane *plane,
if (!crtc || WARN_ON(!fb))
return;
+ if (!plane->state->visible) {
+ mtk_plane_atomic_disable(plane, old_state);
+ return;
+ }
+
gem = fb->obj[0];
mtk_gem = to_mtk_gem_obj(gem);
addr = mtk_gem->dma_addr;
@@ -200,16 +215,6 @@ static void mtk_plane_atomic_update(struct drm_plane *plane,
state->pending.dirty = true;
}
-static void mtk_plane_atomic_disable(struct drm_plane *plane,
- struct drm_plane_state *old_state)
-{
- struct mtk_plane_state *state = to_mtk_plane_state(plane->state);
-
- state->pending.enable = false;
- wmb(); /* Make sure the above parameter is set before update */
- state->pending.dirty = true;
-}
-
static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = {
.prepare_fb = drm_gem_fb_prepare_fb,
.atomic_check = mtk_plane_atomic_check,
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 270bf22c98fe..02ac55c13a80 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -316,10 +316,7 @@ static void mtk_dsi_lane0_ulp_mode_leave(struct mtk_dsi *dsi)
static bool mtk_dsi_clk_hs_state(struct mtk_dsi *dsi)
{
- u32 tmp_reg1;
-
- tmp_reg1 = readl(dsi->regs + DSI_PHY_LCCON);
- return ((tmp_reg1 & LC_HS_TX_EN) == 1) ? true : false;
+ return readl(dsi->regs + DSI_PHY_LCCON) & LC_HS_TX_EN;
}
static void mtk_dsi_clk_hs_mode(struct mtk_dsi *dsi, bool enter)
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index 5feb760617cb..1eebe310470a 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1630,8 +1630,6 @@ static int mtk_hdmi_audio_startup(struct device *dev, void *data)
{
struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
- dev_dbg(dev, "%s\n", __func__);
-
mtk_hdmi_audio_enable(hdmi);
return 0;
@@ -1641,8 +1639,6 @@ static void mtk_hdmi_audio_shutdown(struct device *dev, void *data)
{
struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
- dev_dbg(dev, "%s\n", __func__);
-
mtk_hdmi_audio_disable(hdmi);
}
@@ -1651,8 +1647,6 @@ mtk_hdmi_audio_digital_mute(struct device *dev, void *data, bool enable)
{
struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
- dev_dbg(dev, "%s(%d)\n", __func__, enable);
-
if (enable)
mtk_hdmi_hw_aud_mute(hdmi);
else
@@ -1665,8 +1659,6 @@ static int mtk_hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf,
{
struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
- dev_dbg(dev, "%s\n", __func__);
-
memcpy(buf, hdmi->conn.eld, min(sizeof(hdmi->conn.eld), len));
return 0;
@@ -1766,7 +1758,6 @@ static int mtk_drm_hdmi_probe(struct platform_device *pdev)
goto err_bridge_remove;
}
- dev_dbg(dev, "mediatek hdmi probe success\n");
return 0;
err_bridge_remove:
@@ -1789,7 +1780,7 @@ static int mtk_hdmi_suspend(struct device *dev)
struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
mtk_hdmi_clk_disable_audio(hdmi);
- dev_dbg(dev, "hdmi suspend success!\n");
+
return 0;
}
@@ -1804,7 +1795,6 @@ static int mtk_hdmi_resume(struct device *dev)
return ret;
}
- dev_dbg(dev, "hdmi resume success!\n");
return 0;
}
#endif
diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
index b55f51675205..827b93786fac 100644
--- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
+++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
@@ -107,60 +107,10 @@
#define RGS_HDMITX_5T1_EDG (0xf << 4)
#define RGS_HDMITX_PLUG_TST BIT(0)
-static const u8 PREDIV[3][4] = {
- {0x0, 0x0, 0x0, 0x0}, /* 27Mhz */
- {0x1, 0x1, 0x1, 0x1}, /* 74Mhz */
- {0x1, 0x1, 0x1, 0x1} /* 148Mhz */
-};
-
-static const u8 TXDIV[3][4] = {
- {0x3, 0x3, 0x3, 0x2}, /* 27Mhz */
- {0x2, 0x1, 0x1, 0x1}, /* 74Mhz */
- {0x1, 0x0, 0x0, 0x0} /* 148Mhz */
-};
-
-static const u8 FBKSEL[3][4] = {
- {0x1, 0x1, 0x1, 0x1}, /* 27Mhz */
- {0x1, 0x0, 0x1, 0x1}, /* 74Mhz */
- {0x1, 0x0, 0x1, 0x1} /* 148Mhz */
-};
-
-static const u8 FBKDIV[3][4] = {
- {19, 24, 29, 19}, /* 27Mhz */
- {19, 24, 14, 19}, /* 74Mhz */
- {19, 24, 14, 19} /* 148Mhz */
-};
-
-static const u8 DIVEN[3][4] = {
- {0x2, 0x1, 0x1, 0x2}, /* 27Mhz */
- {0x2, 0x2, 0x2, 0x2}, /* 74Mhz */
- {0x2, 0x2, 0x2, 0x2} /* 148Mhz */
-};
-
-static const u8 HTPLLBP[3][4] = {
- {0xc, 0xc, 0x8, 0xc}, /* 27Mhz */
- {0xc, 0xf, 0xf, 0xc}, /* 74Mhz */
- {0xc, 0xf, 0xf, 0xc} /* 148Mhz */
-};
-
-static const u8 HTPLLBC[3][4] = {
- {0x2, 0x3, 0x3, 0x2}, /* 27Mhz */
- {0x2, 0x3, 0x3, 0x2}, /* 74Mhz */
- {0x2, 0x3, 0x3, 0x2} /* 148Mhz */
-};
-
-static const u8 HTPLLBR[3][4] = {
- {0x1, 0x1, 0x0, 0x1}, /* 27Mhz */
- {0x1, 0x2, 0x2, 0x1}, /* 74Mhz */
- {0x1, 0x2, 0x2, 0x1} /* 148Mhz */
-};
-
static int mtk_hdmi_pll_prepare(struct clk_hw *hw)
{
struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
- dev_dbg(hdmi_phy->dev, "%s\n", __func__);
-
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON1, RG_HDMITX_PLL_AUTOK_EN);
mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON0, RG_HDMITX_PLL_POSDIV);
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON3, RG_HDMITX_MHLCK_EN);
@@ -178,8 +128,6 @@ static void mtk_hdmi_pll_unprepare(struct clk_hw *hw)
{
struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
- dev_dbg(hdmi_phy->dev, "%s\n", __func__);
-
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON1, RG_HDMITX_PLL_TXDIV_EN);
mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON1, RG_HDMITX_PLL_BIAS_LPF_EN);
usleep_range(100, 150);
diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
index 60f6472a3e58..6021f8d9efd1 100644
--- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
@@ -408,7 +408,7 @@ a2xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
struct msm_gem_address_space *aspace;
aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M,
- SZ_16M + 0xfff * SZ_64K);
+ 0xfff * SZ_64K);
if (IS_ERR(aspace) && !IS_ERR(mmu))
mmu->funcs->destroy(mmu);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 096be97ce9f9..21e77d67151f 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1121,7 +1121,7 @@ static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu)
return -ENODEV;
mmu = msm_iommu_new(gmu->dev, domain);
- gmu->aspace = msm_gem_address_space_create(mmu, "gmu", 0x0, 0x7fffffff);
+ gmu->aspace = msm_gem_address_space_create(mmu, "gmu", 0x0, 0x80000000);
if (IS_ERR(gmu->aspace)) {
iommu_domain_free(domain);
return PTR_ERR(gmu->aspace);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index a1589e040c57..7768557cdfb2 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -893,8 +893,8 @@ static const struct adreno_gpu_funcs funcs = {
#if defined(CONFIG_DRM_MSM_GPU_STATE)
.gpu_state_get = a6xx_gpu_state_get,
.gpu_state_put = a6xx_gpu_state_put,
- .create_address_space = adreno_iommu_create_address_space,
#endif
+ .create_address_space = adreno_iommu_create_address_space,
},
.get_timestamp = a6xx_get_timestamp,
};
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 89673c7ed473..5db06b590943 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -194,7 +194,7 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu,
struct msm_gem_address_space *aspace;
aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M,
- 0xfffffff);
+ 0xffffffff - SZ_16M);
if (IS_ERR(aspace) && !IS_ERR(mmu))
mmu->funcs->destroy(mmu);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 63976dcd2ac8..0946a86b37b2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -521,7 +521,7 @@ static struct msm_display_topology dpu_encoder_get_topology(
struct dpu_kms *dpu_kms,
struct drm_display_mode *mode)
{
- struct msm_display_topology topology;
+ struct msm_display_topology topology = {0};
int i, intf_count = 0;
for (i = 0; i < MAX_PHYS_ENCODERS_PER_VIRTUAL; i++)
@@ -537,7 +537,8 @@ static struct msm_display_topology dpu_encoder_get_topology(
* 1 LM, 1 INTF
* 2 LM, 1 INTF (stream merge to support high resolution interfaces)
*
- * Adding color blocks only to primary interface
+ * Adding color blocks only to primary interface if available in
+ * sufficient number
*/
if (intf_count == 2)
topology.num_lm = 2;
@@ -546,8 +547,11 @@ static struct msm_display_topology dpu_encoder_get_topology(
else
topology.num_lm = (mode->hdisplay > MAX_HDISPLAY_SPLIT) ? 2 : 1;
- if (dpu_enc->disp_info.intf_type == DRM_MODE_ENCODER_DSI)
- topology.num_dspp = topology.num_lm;
+ if (dpu_enc->disp_info.intf_type == DRM_MODE_ENCODER_DSI) {
+ if (dpu_kms->catalog->dspp &&
+ (dpu_kms->catalog->dspp_count >= topology.num_lm))
+ topology.num_dspp = topology.num_lm;
+ }
topology.num_enc = 0;
topology.num_intf = intf_count;
@@ -2136,7 +2140,6 @@ int dpu_encoder_setup(struct drm_device *dev, struct drm_encoder *enc,
dpu_enc = to_dpu_encoder_virt(enc);
- mutex_init(&dpu_enc->enc_lock);
ret = dpu_encoder_setup_display(dpu_enc, dpu_kms, disp_info);
if (ret)
goto fail;
@@ -2151,7 +2154,6 @@ int dpu_encoder_setup(struct drm_device *dev, struct drm_encoder *enc,
0);
- mutex_init(&dpu_enc->rc_lock);
INIT_DELAYED_WORK(&dpu_enc->delayed_off_work,
dpu_encoder_off_work);
dpu_enc->idle_timeout = IDLE_TIMEOUT;
@@ -2183,7 +2185,7 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
dpu_enc = devm_kzalloc(dev->dev, sizeof(*dpu_enc), GFP_KERNEL);
if (!dpu_enc)
- return ERR_PTR(ENOMEM);
+ return ERR_PTR(-ENOMEM);
rc = drm_encoder_init(dev, &dpu_enc->base, &dpu_encoder_funcs,
drm_enc_mode, NULL);
@@ -2196,6 +2198,8 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
spin_lock_init(&dpu_enc->enc_spinlock);
dpu_enc->enabled = false;
+ mutex_init(&dpu_enc->enc_lock);
+ mutex_init(&dpu_enc->rc_lock);
return &dpu_enc->base;
}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index b8615d4fe8a3..680527e28d09 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -780,7 +780,7 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms)
mmu = msm_iommu_new(dpu_kms->dev->dev, domain);
aspace = msm_gem_address_space_create(mmu, "dpu1",
- 0x1000, 0xfffffff);
+ 0x1000, 0x100000000 - 0x1000);
if (IS_ERR(aspace)) {
mmu->funcs->destroy(mmu);
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
index 08897184b1d9..fc6a3f8134c7 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
@@ -514,7 +514,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
config->iommu);
aspace = msm_gem_address_space_create(mmu,
- "mdp4", 0x1000, 0xffffffff);
+ "mdp4", 0x1000, 0x100000000 - 0x1000);
if (IS_ERR(aspace)) {
if (!IS_ERR(mmu))
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index 19ec48695ffb..e193865ce9a2 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -633,7 +633,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
mmu = msm_iommu_new(iommu_dev, config->platform.iommu);
aspace = msm_gem_address_space_create(mmu, "mdp5",
- 0x1000, 0xffffffff);
+ 0x1000, 0x100000000 - 0x1000);
if (IS_ERR(aspace)) {
if (!IS_ERR(mmu))
diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
index 001fbf537440..a1d94be7883a 100644
--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/msm/msm_submitqueue.c
@@ -71,8 +71,10 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
queue->flags = flags;
if (priv->gpu) {
- if (prio >= priv->gpu->nr_rings)
+ if (prio >= priv->gpu->nr_rings) {
+ kfree(queue);
return -EINVAL;
+ }
queue->prio = prio;
}
diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
index b57c37ddd164..c7fbb7932f37 100644
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -2127,7 +2127,7 @@ static int ni_init_smc_spll_table(struct radeon_device *rdev)
if (clk_s & ~(SMC_NISLANDS_SPLL_DIV_TABLE_CLKS_MASK >> SMC_NISLANDS_SPLL_DIV_TABLE_CLKS_SHIFT))
ret = -EINVAL;
- if (clk_s & ~(SMC_NISLANDS_SPLL_DIV_TABLE_CLKS_MASK >> SMC_NISLANDS_SPLL_DIV_TABLE_CLKS_SHIFT))
+ if (fb_div & ~(SMC_NISLANDS_SPLL_DIV_TABLE_FBDIV_MASK >> SMC_NISLANDS_SPLL_DIV_TABLE_FBDIV_SHIFT))
ret = -EINVAL;
if (clk_v & ~(SMC_NISLANDS_SPLL_DIV_TABLE_CLKV_MASK >> SMC_NISLANDS_SPLL_DIV_TABLE_CLKV_SHIFT))
diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig
index 0919f1f159a4..f65d1489dc50 100644
--- a/drivers/gpu/drm/rcar-du/Kconfig
+++ b/drivers/gpu/drm/rcar-du/Kconfig
@@ -31,6 +31,7 @@ config DRM_RCAR_DW_HDMI
config DRM_RCAR_LVDS
tristate "R-Car DU LVDS Encoder Support"
depends on DRM && DRM_BRIDGE && OF
+ select DRM_KMS_HELPER
select DRM_PANEL
select OF_FLATTREE
select OF_OVERLAY
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 83f31c6e891c..04d6848d19fc 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -957,6 +957,7 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm,
}
drm_plane_helper_add(&plane->base, &tegra_cursor_plane_helper_funcs);
+ drm_plane_create_zpos_immutable_property(&plane->base, 255);
return &plane->base;
}
diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
index 8183e617bf6b..22a03f7ffdc1 100644
--- a/drivers/gpu/drm/tegra/hub.c
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -149,7 +149,9 @@ int tegra_display_hub_prepare(struct tegra_display_hub *hub)
for (i = 0; i < hub->soc->num_wgrps; i++) {
struct tegra_windowgroup *wgrp = &hub->wgrps[i];
- tegra_windowgroup_enable(wgrp);
+ /* Skip orphaned window group whose parent DC is disabled */
+ if (wgrp->parent)
+ tegra_windowgroup_enable(wgrp);
}
return 0;
@@ -166,7 +168,9 @@ void tegra_display_hub_cleanup(struct tegra_display_hub *hub)
for (i = 0; i < hub->soc->num_wgrps; i++) {
struct tegra_windowgroup *wgrp = &hub->wgrps[i];
- tegra_windowgroup_disable(wgrp);
+ /* Skip orphaned window group whose parent DC is disabled */
+ if (wgrp->parent)
+ tegra_windowgroup_disable(wgrp);
}
}
@@ -944,6 +948,15 @@ static int tegra_display_hub_probe(struct platform_device *pdev)
dev_err(&pdev->dev, "failed to register host1x client: %d\n",
err);
+ err = devm_of_platform_populate(&pdev->dev);
+ if (err < 0)
+ goto unregister;
+
+ return err;
+
+unregister:
+ host1x_client_unregister(&hub->client);
+ pm_runtime_disable(&pdev->dev);
return err;
}
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 6a995db51d6d..e201f62d62c0 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -686,8 +686,17 @@ EXPORT_SYMBOL(host1x_driver_register_full);
*/
void host1x_driver_unregister(struct host1x_driver *driver)
{
+ struct host1x *host1x;
+
driver_unregister(&driver->driver);
+ mutex_lock(&devices_lock);
+
+ list_for_each_entry(host1x, &devices, list)
+ host1x_detach_driver(host1x, driver);
+
+ mutex_unlock(&devices_lock);
+
mutex_lock(&drivers_lock);
list_del_init(&driver->list);
mutex_unlock(&drivers_lock);
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index d24344e91922..d0ebb70e2fdd 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -468,11 +468,18 @@ static int host1x_probe(struct platform_device *pdev)
err = host1x_register(host);
if (err < 0)
- goto deinit_intr;
+ goto deinit_debugfs;
+
+ err = devm_of_platform_populate(&pdev->dev);
+ if (err < 0)
+ goto unregister;
return 0;
-deinit_intr:
+unregister:
+ host1x_unregister(host);
+deinit_debugfs:
+ host1x_debug_deinit(host);
host1x_intr_deinit(host);
deinit_syncpt:
host1x_syncpt_deinit(host);