From efc52308c6a51125138c0034b3c7c24acbdd4173 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 30 Mar 2021 19:24:15 +0300 Subject: drm/i915: Split out glk_plane_min_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the glk+ stuff into it's own version of the .min_cdclk() vfunc. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210330162416.18616-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 53 ++++++++++++++-------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 7ffd7b570b54..5127489a0446 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -287,39 +287,54 @@ bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id) } static void -skl_plane_ratio(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state, +glk_plane_ratio(const struct intel_plane_state *plane_state, unsigned int *num, unsigned int *den) { - struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); const struct drm_framebuffer *fb = plane_state->hw.fb; if (fb->format->cpp[0] == 8) { - if (DISPLAY_VER(dev_priv) >= 10) { - *num = 10; - *den = 8; - } else { - *num = 9; - *den = 8; - } + *num = 10; + *den = 8; } else { *num = 1; *den = 1; } } -static int skl_plane_min_cdclk(const struct intel_crtc_state *crtc_state, +static int glk_plane_min_cdclk(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); - unsigned int num, den; unsigned int pixel_rate = intel_plane_pixel_rate(crtc_state, plane_state); + unsigned int num, den; - skl_plane_ratio(crtc_state, plane_state, &num, &den); + glk_plane_ratio(plane_state, &num, &den); - /* two pixels per clock on glk+ */ - if (DISPLAY_VER(dev_priv) >= 10) - den *= 2; + /* two pixels per clock */ + return DIV_ROUND_UP(pixel_rate * num, 2 * den); +} + +static void +skl_plane_ratio(const struct intel_plane_state *plane_state, + unsigned int *num, unsigned int *den) +{ + const struct drm_framebuffer *fb = plane_state->hw.fb; + + if (fb->format->cpp[0] == 8) { + *num = 9; + *den = 8; + } else { + *num = 1; + *den = 1; + } +} + +static int skl_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int pixel_rate = intel_plane_pixel_rate(crtc_state, plane_state); + unsigned int num, den; + + skl_plane_ratio(plane_state, &num, &den); return DIV_ROUND_UP(pixel_rate * num, den); } @@ -1965,12 +1980,15 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, plane->min_width = icl_plane_min_width; plane->max_width = icl_plane_max_width; plane->max_height = icl_plane_max_height; + plane->min_cdclk = glk_plane_min_cdclk; } else if (DISPLAY_VER(dev_priv) >= 10) { plane->max_width = glk_plane_max_width; plane->max_height = skl_plane_max_height; + plane->min_cdclk = glk_plane_min_cdclk; } else { plane->max_width = skl_plane_max_width; plane->max_height = skl_plane_max_height; + plane->min_cdclk = skl_plane_min_cdclk; } plane->max_stride = skl_plane_max_stride; @@ -1978,7 +1996,6 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, plane->disable_plane = skl_disable_plane; plane->get_hw_state = skl_plane_get_hw_state; plane->check_plane = skl_plane_check; - plane->min_cdclk = skl_plane_min_cdclk; if (plane_id == PLANE_PRIMARY) { plane->need_async_flip_disable_wa = IS_DISPLAY_RANGE(dev_priv, -- cgit From 6195f8502dad4d6d2e715797c994a969fe7e209a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 30 Mar 2021 19:24:16 +0300 Subject: drm/i915: Update plane ratio for icl+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to bspec icl+ no longer need any extra cdclk guardband for 64bpp formats. Make it so. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210330162416.18616-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 5127489a0446..098636c811a8 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -286,6 +286,15 @@ bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id) icl_hdr_plane_mask() & BIT(plane_id); } +static int icl_plane_min_cdclk(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + unsigned int pixel_rate = intel_plane_pixel_rate(crtc_state, plane_state); + + /* two pixels per clock */ + return DIV_ROUND_UP(pixel_rate, 2); +} + static void glk_plane_ratio(const struct intel_plane_state *plane_state, unsigned int *num, unsigned int *den) @@ -1980,7 +1989,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, plane->min_width = icl_plane_min_width; plane->max_width = icl_plane_max_width; plane->max_height = icl_plane_max_height; - plane->min_cdclk = glk_plane_min_cdclk; + plane->min_cdclk = icl_plane_min_cdclk; } else if (DISPLAY_VER(dev_priv) >= 10) { plane->max_width = glk_plane_max_width; plane->max_height = skl_plane_max_height; -- cgit From d339ef1c4d6bb351251211d296ecbe05c2a54819 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 11 Mar 2021 16:45:29 +0200 Subject: drm/i915: Uninit the DMC FW loader state during shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to wait for the DMC FW loader work to complete during shutdown, even if it's unlikely to be still pending by that time, fix this. This also fixes the wakeref tracking WARN during shutdown about the leaked reference we hold due to a missing DMC firmware. While at it add a TODO comment about unifying the shutdown and PM power-off sequences and later these sequences with the driver remove and system/runtime suspend sequences. Cc: Ville Syrjälä References: https://lore.kernel.org/lkml/20210303055517.GB2708@xsang-OptiPlex-9020 Reported-and-tested-by: kernel test robot Reported-and-tested-by: Edward Baker Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210311144529.3059024-1-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 8bd80a46ba0a..abb0152b56f4 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1029,10 +1029,18 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_suspend_encoders(i915); intel_shutdown_encoders(i915); + intel_csr_ucode_suspend(i915); + /* * The only requirement is to reboot with display DC states disabled, * for now leaving all display power wells in the INIT power domain - * enabled matching the driver reload sequence. + * enabled. + * + * TODO: + * - unify the pci_driver::shutdown sequence here with the + * pci_driver.driver.pm.poweroff,poweroff_late sequence. + * - unify the driver remove and system/runtime suspend sequences with + * the above unified shutdown/poweroff sequence. */ intel_power_domains_driver_remove(i915); enable_rpm_wakeref_asserts(&i915->runtime_pm); -- cgit From 2d667442dbe70e7e78c3450b17ba5aa5032a7b6b Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 1 Apr 2021 20:02:37 +0300 Subject: drm/i915/display/psr: Disable DC3CO when the PSR2 is used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to the changed sequence of activating/deactivating DC3CO, disable DC3CO until the changed dc3co activating/deactivating sequence is applied. References: https://gitlab.freedesktop.org/drm/intel/-/issues/3134 Signed-off-by: Gwan-gyeong Mun Reviewed-by: José Roberto de Souza Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210401170237.40472-1-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 1d561812fcad..32d3d56259c2 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -654,6 +654,13 @@ tgl_dc3co_exitline_compute_config(struct intel_dp *intel_dp, struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); u32 exit_scanlines; + /* + * FIXME: Due to the changed sequence of activating/deactivating DC3CO, + * disable DC3CO until the changed dc3co activating/deactivating sequence + * is applied. B.Specs:49196 + */ + return; + /* * DMC's DC3CO exit mechanism has an issue with Selective Fecth * TODO: when the issue is addressed, this restriction should be removed. -- cgit From 11cda4967a33003dc19e90f5df3470031a864d8d Mon Sep 17 00:00:00 2001 From: Xuezhi Zhang Date: Sun, 4 Apr 2021 08:41:03 +0000 Subject: drm/i915/sysfs: convert snprintf to sysfs_emit Fix the following coccicheck warning: drivers/gpu/drm/i915//i915_sysfs.c:266:8-16: WARNING: use scnprintf or sprintf drivers/gpu/drm/i915//i915_sysfs.c:285:8-16: WARNING: use scnprintf or sprintf drivers/gpu/drm/i915//i915_sysfs.c:276:8-16: WARNING: use scnprintf or sprintf drivers/gpu/drm/i915//i915_sysfs.c:335:8-16: WARNING: use scnprintf or sprintf drivers/gpu/drm/i915//i915_sysfs.c:390:8-16: WARNING: use scnprintf or sprintf drivers/gpu/drm/i915//i915_sysfs.c:465:8-16: WARNING: use scnprintf or sprintf drivers/gpu/drm/i915//i915_sysfs.c:107:8-16: WARNING: use scnprintf or sprintf drivers/gpu/drm/i915//i915_sysfs.c:75:8-16: WARNING: use scnprintf or sprintf drivers/gpu/drm/i915//i915_sysfs.c:83:8-16: WARNING: use scnprintf or sprintf drivers/gpu/drm/i915//i915_sysfs.c:91:8-16: WARNING: use scnprintf or sprintf drivers/gpu/drm/i915//i915_sysfs.c:99:8-16: WARNING: use scnprintf or sprintf drivers/gpu/drm/i915//i915_sysfs.c:326:8-16: WARNING: use scnprintf or sprintf Signed-off-by: Xuezhi Zhang Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210404084103.528211-1-llyz108@163.com --- drivers/gpu/drm/i915/i915_sysfs.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 45d32ef42787..4c6b5d52b5ca 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -72,7 +72,7 @@ show_rc6_mask(struct device *kdev, struct device_attribute *attr, char *buf) if (HAS_RC6pp(dev_priv)) mask |= BIT(2); - return snprintf(buf, PAGE_SIZE, "%x\n", mask); + return sysfs_emit(buf, "%x\n", mask); } static ssize_t @@ -80,7 +80,7 @@ show_rc6_ms(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); u32 rc6_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6); - return snprintf(buf, PAGE_SIZE, "%u\n", rc6_residency); + return sysfs_emit(buf, "%u\n", rc6_residency); } static ssize_t @@ -88,7 +88,7 @@ show_rc6p_ms(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); u32 rc6p_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6p); - return snprintf(buf, PAGE_SIZE, "%u\n", rc6p_residency); + return sysfs_emit(buf, "%u\n", rc6p_residency); } static ssize_t @@ -96,7 +96,7 @@ show_rc6pp_ms(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); u32 rc6pp_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6pp); - return snprintf(buf, PAGE_SIZE, "%u\n", rc6pp_residency); + return sysfs_emit(buf, "%u\n", rc6pp_residency); } static ssize_t @@ -104,7 +104,7 @@ show_media_rc6_ms(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); u32 rc6_residency = calc_residency(dev_priv, VLV_GT_MEDIA_RC6); - return snprintf(buf, PAGE_SIZE, "%u\n", rc6_residency); + return sysfs_emit(buf, "%u\n", rc6_residency); } static DEVICE_ATTR(rc6_enable, S_IRUGO, show_rc6_mask, NULL); @@ -263,8 +263,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); struct intel_rps *rps = &i915->gt.rps; - return snprintf(buf, PAGE_SIZE, "%d\n", - intel_rps_read_actual_frequency(rps)); + return sysfs_emit(buf, "%d\n", intel_rps_read_actual_frequency(rps)); } static ssize_t gt_cur_freq_mhz_show(struct device *kdev, @@ -273,8 +272,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); struct intel_rps *rps = &i915->gt.rps; - return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(rps, rps->cur_freq)); + return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->cur_freq)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -282,8 +280,7 @@ static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribu struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); struct intel_rps *rps = &i915->gt.rps; - return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(rps, rps->boost_freq)); + return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->boost_freq)); } static ssize_t gt_boost_freq_mhz_store(struct device *kdev, @@ -323,8 +320,7 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct intel_rps *rps = &dev_priv->gt.rps; - return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(rps, rps->efficient_freq)); + return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->efficient_freq)); } static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -332,8 +328,7 @@ static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct intel_rps *rps = &dev_priv->gt.rps; - return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(rps, rps->max_freq_softlimit)); + return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->max_freq_softlimit)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -387,8 +382,7 @@ static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct intel_rps *rps = &dev_priv->gt.rps; - return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(rps, rps->min_freq_softlimit)); + return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->min_freq_softlimit)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, @@ -462,7 +456,7 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr else BUG(); - return snprintf(buf, PAGE_SIZE, "%d\n", val); + return sysfs_emit(buf, "%d\n", val); } static const struct attribute * const gen6_attrs[] = { -- cgit From 337d7a1621c7f02af867229990ac67c97da1b53a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 2 Apr 2021 10:23:17 +0200 Subject: drm/i915: Fix invalid access to ACPI _DSM objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_dsm_platform_mux_info() tries to parse the ACPI package data from _DSM for the debug information, but it assumes the fixed format without checking what values are stored in the elements actually. When an unexpected value is returned from BIOS, it may lead to GPF or NULL dereference, as reported recently. Add the checks of the contents in the returned values and skip the values for invalid cases. v1->v2: Check the info contents before dereferencing, too BugLink: http://bugzilla.opensuse.org/show_bug.cgi?id=1184074 Cc: Signed-off-by: Takashi Iwai Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210402082317.871-1-tiwai@suse.de --- drivers/gpu/drm/i915/display/intel_acpi.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_acpi.c b/drivers/gpu/drm/i915/display/intel_acpi.c index e21fb14d5e07..833d0c1be4f1 100644 --- a/drivers/gpu/drm/i915/display/intel_acpi.c +++ b/drivers/gpu/drm/i915/display/intel_acpi.c @@ -84,13 +84,31 @@ static void intel_dsm_platform_mux_info(acpi_handle dhandle) return; } + if (!pkg->package.count) { + DRM_DEBUG_DRIVER("no connection in _DSM\n"); + return; + } + connector_count = &pkg->package.elements[0]; DRM_DEBUG_DRIVER("MUX info connectors: %lld\n", (unsigned long long)connector_count->integer.value); for (i = 1; i < pkg->package.count; i++) { union acpi_object *obj = &pkg->package.elements[i]; - union acpi_object *connector_id = &obj->package.elements[0]; - union acpi_object *info = &obj->package.elements[1]; + union acpi_object *connector_id; + union acpi_object *info; + + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < 2) { + DRM_DEBUG_DRIVER("Invalid object for MUX #%d\n", i); + continue; + } + + connector_id = &obj->package.elements[0]; + info = &obj->package.elements[1]; + if (info->type != ACPI_TYPE_BUFFER || info->buffer.length < 4) { + DRM_DEBUG_DRIVER("Invalid info for MUX obj #%d\n", i); + continue; + } + DRM_DEBUG_DRIVER("Connector id: 0x%016llx\n", (unsigned long long)connector_id->integer.value); DRM_DEBUG_DRIVER(" port id: %s\n", -- cgit From 70bfb30743d5da73058b0a2271e9c127a84fb494 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 7 Apr 2021 13:39:45 -0700 Subject: drm/i915/display: Eliminate IS_GEN9_{BC,LP} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we've eliminated INTEL_GEN(), IS_GEN_RANGE(), etc. from the display code, we should also kill off our use of the IS_GEN9_* macros too. We'll do the conversion manually this time instead of using Coccinelle since the most logical substitution can depend heavily on the code context, and sometimes we can keep the code simpler if we make additional adjustments such as swapping the order of if/else arms. v2: - Restore a lost negation in intel_pll_is_valid(). Cc: Ville Syrjälä Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20210407203945.1432531-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/display/intel_bios.c | 7 ++-- drivers/gpu/drm/i915/display/intel_cdclk.c | 34 ++++++++-------- drivers/gpu/drm/i915/display/intel_csr.c | 2 +- drivers/gpu/drm/i915/display/intel_ddi.c | 37 +++++++++-------- drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c | 8 ++-- drivers/gpu/drm/i915/display/intel_display.c | 11 +++--- .../gpu/drm/i915/display/intel_display_debugfs.c | 2 +- drivers/gpu/drm/i915/display/intel_display_power.c | 43 ++++++++++---------- drivers/gpu/drm/i915/display/intel_dp.c | 4 +- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- drivers/gpu/drm/i915/display/intel_dpll.c | 6 +-- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 6 +-- drivers/gpu/drm/i915/display/intel_fbc.c | 7 ++-- drivers/gpu/drm/i915/display/intel_gmbus.c | 12 +++--- drivers/gpu/drm/i915/display/intel_hdcp.c | 9 +++-- drivers/gpu/drm/i915/display/intel_hdmi.c | 7 ++-- drivers/gpu/drm/i915/display/intel_panel.c | 2 +- drivers/gpu/drm/i915/display/intel_pps.c | 14 +++---- drivers/gpu/drm/i915/display/vlv_dsi.c | 46 +++++++++++----------- drivers/gpu/drm/i915/i915_irq.c | 13 +++--- drivers/gpu/drm/i915/intel_pm.c | 8 ++-- 21 files changed, 146 insertions(+), 134 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 3d0c035b5e38..ea4837d485a1 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -917,7 +917,7 @@ parse_psr(struct drm_i915_private *i915, const struct bdb_header *bdb) * Old decimal value is wake up time in multiples of 100 us. */ if (bdb->version >= 205 && - (IS_GEN9_BC(i915) || DISPLAY_VER(i915) >= 10)) { + (DISPLAY_VER(i915) >= 9 && !IS_BROXTON(i915))) { switch (psr_table->tp1_wakeup_time) { case 0: i915->vbt.psr.tp1_wakeup_time_us = 500; @@ -1659,7 +1659,7 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) } else if (IS_ROCKETLAKE(i915) && INTEL_PCH_TYPE(i915) == PCH_TGP) { ddc_pin_map = rkl_pch_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(rkl_pch_tgp_ddc_pin_map); - } else if (HAS_PCH_TGP(i915) && IS_GEN9_BC(i915)) { + } else if (HAS_PCH_TGP(i915) && IS_DISPLAY_VER(i915, 9)) { ddc_pin_map = gen9bc_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(gen9bc_tgp_ddc_pin_map); } else if (INTEL_PCH_TYPE(i915) >= PCH_ICP) { @@ -2770,7 +2770,8 @@ intel_bios_is_port_hpd_inverted(const struct drm_i915_private *i915, const struct intel_bios_encoder_data *devdata = i915->vbt.ddi_port_info[port].devdata; - if (drm_WARN_ON_ONCE(&i915->drm, !IS_GEN9_LP(i915))) + if (drm_WARN_ON_ONCE(&i915->drm, + !IS_GEMINILAKE(i915) && !IS_BROXTON(i915))) return false; return devdata && devdata->child.hpd_invert; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 3f43ad4d7362..489acf6b5cf1 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1631,7 +1631,8 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, * Disable SSA Precharge when CD clock frequency < 500 MHz, * enable otherwise. */ - if (IS_GEN9_LP(dev_priv) && cdclk >= 500000) + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + cdclk >= 500000) val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; intel_de_write(dev_priv, CDCLK_CTL, val); @@ -1732,7 +1733,8 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) * Disable SSA Precharge when CD clock frequency < 500 MHz, * enable otherwise. */ - if (IS_GEN9_LP(dev_priv) && dev_priv->cdclk.hw.cdclk >= 500000) + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + dev_priv->cdclk.hw.cdclk >= 500000) expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; if (cdctl == expected) @@ -1797,9 +1799,9 @@ static void bxt_cdclk_uninit_hw(struct drm_i915_private *dev_priv) */ void intel_cdclk_init_hw(struct drm_i915_private *i915) { - if (IS_GEN9_LP(i915) || DISPLAY_VER(i915) >= 10) + if (DISPLAY_VER(i915) >= 10 || IS_BROXTON(i915)) bxt_cdclk_init_hw(i915); - else if (IS_GEN9_BC(i915)) + else if (IS_DISPLAY_VER(i915, 9)) skl_cdclk_init_hw(i915); } @@ -1812,9 +1814,9 @@ void intel_cdclk_init_hw(struct drm_i915_private *i915) */ void intel_cdclk_uninit_hw(struct drm_i915_private *i915) { - if (DISPLAY_VER(i915) >= 10 || IS_GEN9_LP(i915)) + if (DISPLAY_VER(i915) >= 10 || IS_BROXTON(i915)) bxt_cdclk_uninit_hw(i915); - else if (IS_GEN9_BC(i915)) + else if (IS_DISPLAY_VER(i915, 9)) skl_cdclk_uninit_hw(i915); } @@ -1852,7 +1854,7 @@ static bool intel_cdclk_can_cd2x_update(struct drm_i915_private *dev_priv, const struct intel_cdclk_config *b) { /* Older hw doesn't have the capability */ - if (DISPLAY_VER(dev_priv) < 10 && !IS_GEN9_LP(dev_priv)) + if (DISPLAY_VER(dev_priv) < 10 && !IS_BROXTON(dev_priv)) return false; return a->cdclk != b->cdclk && @@ -2625,7 +2627,11 @@ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) dev_priv->max_cdclk_freq = 652800; } else if (IS_CANNONLAKE(dev_priv)) { dev_priv->max_cdclk_freq = 528000; - } else if (IS_GEN9_BC(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->max_cdclk_freq = 316800; + } else if (IS_BROXTON(dev_priv)) { + dev_priv->max_cdclk_freq = 624000; + } else if (IS_DISPLAY_VER(dev_priv, 9)) { u32 limit = intel_de_read(dev_priv, SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; int max_cdclk, vco; @@ -2647,10 +2653,6 @@ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) max_cdclk = 308571; dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco); - } else if (IS_GEMINILAKE(dev_priv)) { - dev_priv->max_cdclk_freq = 316800; - } else if (IS_BROXTON(dev_priv)) { - dev_priv->max_cdclk_freq = 624000; } else if (IS_BROADWELL(dev_priv)) { /* * FIXME with extra cooling we can allow @@ -2878,7 +2880,7 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = cnl_calc_voltage_level; dev_priv->cdclk.table = cnl_cdclk_table; - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; @@ -2887,7 +2889,7 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->cdclk.table = glk_cdclk_table; else dev_priv->cdclk.table = bxt_cdclk_table; - } else if (IS_GEN9_BC(dev_priv)) { + } else if (IS_DISPLAY_VER(dev_priv, 9)) { dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.set_cdclk = skl_set_cdclk; dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk; @@ -2908,9 +2910,9 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.modeset_calc_cdclk = fixed_modeset_calc_cdclk; } - if (DISPLAY_VER(dev_priv) >= 10 || IS_GEN9_LP(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 10 || IS_BROXTON(dev_priv)) dev_priv->display.get_cdclk = bxt_get_cdclk; - else if (IS_GEN9_BC(dev_priv)) + else if (IS_DISPLAY_VER(dev_priv, 9)) dev_priv->display.get_cdclk = skl_get_cdclk; else if (IS_BROADWELL(dev_priv)) dev_priv->display.get_cdclk = bdw_get_cdclk; diff --git a/drivers/gpu/drm/i915/display/intel_csr.c b/drivers/gpu/drm/i915/display/intel_csr.c index 794efcc3ca08..e54521d7b931 100644 --- a/drivers/gpu/drm/i915/display/intel_csr.c +++ b/drivers/gpu/drm/i915/display/intel_csr.c @@ -284,7 +284,7 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) mask = DC_STATE_DEBUG_MASK_MEMORY_UP; - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) mask |= DC_STATE_DEBUG_MASK_CORES; /* The below bit doesn't need to be cleared ever afterwards */ diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 953de42e277c..deef24da00b5 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -113,7 +113,8 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, &n_entries); /* If we're boosting the current, set bit 31 of trans1 */ - if (IS_GEN9_BC(dev_priv) && intel_bios_encoder_dp_boost_level(encoder->devdata)) + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv) && + intel_bios_encoder_dp_boost_level(encoder->devdata)) iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; for (i = 0; i < n_entries; i++) { @@ -146,7 +147,8 @@ static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, level = n_entries - 1; /* If we're boosting the current, set bit 31 of trans1 */ - if (IS_GEN9_BC(dev_priv) && intel_bios_encoder_hdmi_boost_level(encoder->devdata)) + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv) && + intel_bios_encoder_hdmi_boost_level(encoder->devdata)) iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; /* Entry 9 is for HDMI: */ @@ -759,7 +761,7 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, *is_dp_mst = mst_pipe_mask; out: - if (*pipe_mask && IS_GEN9_LP(dev_priv)) { + if (*pipe_mask && (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv))) { tmp = intel_de_read(dev_priv, BXT_PHY_CTL(port)); if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK | BXT_PHY_LANE_POWERDOWN_ACK | @@ -987,7 +989,7 @@ static u8 intel_ddi_dp_voltage_max(struct intel_dp *intel_dp, icl_get_mg_buf_trans(encoder, crtc_state, &n_entries); } else if (IS_CANNONLAKE(dev_priv)) { cnl_get_buf_trans(encoder, crtc_state, &n_entries); - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { bxt_get_buf_trans(encoder, crtc_state, &n_entries); } else { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) @@ -1555,7 +1557,7 @@ hsw_set_signal_levels(struct intel_dp *intel_dp, intel_dp->DP &= ~DDI_BUF_EMP_MASK; intel_dp->DP |= signal_levels; - if (IS_GEN9_BC(dev_priv)) + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) skl_ddi_set_iboost(encoder, crtc_state, level); intel_de_write(dev_priv, DDI_BUF_CTL(port), intel_dp->DP); @@ -2648,7 +2650,7 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, icl_ddi_vswing_sequence(encoder, crtc_state, level); else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, crtc_state, level); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(encoder, crtc_state, level); else intel_prepare_dp_ddi_buffers(encoder, crtc_state); @@ -3096,16 +3098,16 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, icl_ddi_vswing_sequence(encoder, crtc_state, level); else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, crtc_state, level); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(encoder, crtc_state, level); else intel_prepare_hdmi_ddi_buffers(encoder, level); - if (IS_GEN9_BC(dev_priv)) + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) skl_ddi_set_iboost(encoder, crtc_state, level); /* Display WA #1143: skl,kbl,cfl */ - if (IS_GEN9_BC(dev_priv)) { + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) { /* * For some reason these chicken bits have been * stuffed into a transcoder register, event though @@ -3321,7 +3323,7 @@ intel_ddi_pre_pll_enable(struct intel_atomic_state *state, * Type-C ports. Skip this step for TBT. */ intel_tc_port_set_fia_lane_count(dig_port, crtc_state->lane_count); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_ddi_phy_set_lane_optim_mask(encoder, crtc_state->lane_lat_optim_mask); } @@ -3679,7 +3681,7 @@ static void intel_ddi_get_config(struct intel_encoder *encoder, if (!pipe_config->bigjoiner_slave) ddi_dotclock_get(pipe_config); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) pipe_config->lane_lat_optim_mask = bxt_ddi_phy_get_lane_lat_optim_mask(encoder); @@ -3885,7 +3887,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, pipe_config->pch_pfit.enabled || pipe_config->crc_enabled; - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) pipe_config->lane_lat_optim_mask = bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); @@ -4053,7 +4055,7 @@ intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) dig_port->dp.set_signal_levels = icl_set_signal_levels; else if (IS_CANNONLAKE(dev_priv)) dig_port->dp.set_signal_levels = cnl_set_signal_levels; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dig_port->dp.set_signal_levels = bxt_set_signal_levels; else dig_port->dp.set_signal_levels = hsw_set_signal_levels; @@ -4296,7 +4298,7 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) /* Broxton/Geminilake: Bspec says that DDI_A_4_LANES is the only * supported configuration */ - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) return true; /* Cannonlake: Most of SKUs don't support DDI_E, and the only @@ -4585,10 +4587,10 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->disable_clock = cnl_ddi_disable_clock; encoder->is_clock_enabled = cnl_ddi_is_clock_enabled; encoder->get_config = cnl_ddi_get_config; - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { /* BXT/GLK have fixed PLL->port mapping */ encoder->get_config = bxt_ddi_get_config; - } else if (IS_GEN9_BC(dev_priv)) { + } else if (IS_DISPLAY_VER(dev_priv, 9)) { encoder->enable_clock = skl_ddi_enable_clock; encoder->disable_clock = skl_ddi_disable_clock; encoder->is_clock_enabled = skl_ddi_is_clock_enabled; @@ -4672,7 +4674,8 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) else dig_port->connected = lpt_digital_port_connected; } else if (DISPLAY_VER(dev_priv) >= 8) { - if (port == PORT_A || IS_GEN9_LP(dev_priv)) + if (port == PORT_A || IS_GEMINILAKE(dev_priv) || + IS_BROXTON(dev_priv)) dig_port->connected = bdw_digital_port_connected; else dig_port->connected = lpt_digital_port_connected; diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c index 5d9ce6042e87..fdd25861edd5 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c @@ -881,7 +881,7 @@ intel_ddi_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (IS_GEN9_BC(dev_priv)) { + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) { const struct ddi_buf_trans *ddi_translations = skl_get_buf_trans_edp(encoder, n_entries); *n_entries = skl_buf_trans_num_entries(encoder->port, *n_entries); @@ -919,7 +919,7 @@ intel_ddi_get_buf_trans_hdmi(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (IS_GEN9_BC(dev_priv)) { + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) { return skl_get_buf_trans_hdmi(dev_priv, n_entries); } else if (IS_BROADWELL(dev_priv)) { *n_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); @@ -1370,10 +1370,10 @@ int intel_ddi_hdmi_num_entries(struct intel_encoder *encoder, } else if (IS_CANNONLAKE(dev_priv)) { cnl_get_buf_trans_hdmi(encoder, &n_entries); *default_entry = n_entries - 1; - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { bxt_get_buf_trans_hdmi(encoder, &n_entries); *default_entry = n_entries - 1; - } else if (IS_GEN9_BC(dev_priv)) { + } else if (IS_DISPLAY_VER(dev_priv, 9)) { intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); *default_entry = 8; } else if (IS_BROADWELL(dev_priv)) { diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index bdb2adb4d748..d3af51e63380 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5921,7 +5921,7 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc, active = hsw_get_transcoder_state(crtc, pipe_config, &power_domain_set); - if (IS_GEN9_LP(dev_priv) && + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && bxt_get_dsi_transcoder_state(crtc, pipe_config, &power_domain_set)) { drm_WARN_ON(&dev_priv->drm, active); active = true; @@ -6839,7 +6839,8 @@ static u16 skl_linetime_wm(const struct intel_crtc_state *crtc_state) crtc_state->pixel_rate); /* Display WA #1135: BXT:ALL GLK:ALL */ - if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled) + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + dev_priv->ipc_enabled) linetime_wm /= 2; return min(linetime_wm, 0x1ff); @@ -10858,7 +10859,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_F); icl_dsi_init(dev_priv); - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { /* * FIXME: Broxton doesn't support port detection via the * DDI_BUF_CTL_A or SFUSE_STRAP registers, find another way to @@ -10883,7 +10884,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) */ found = intel_de_read(dev_priv, DDI_BUF_CTL(PORT_A)) & DDI_INIT_DISPLAY_DETECTED; /* WaIgnoreDDIAStrap: skl */ - if (found || IS_GEN9_BC(dev_priv)) + if (found || IS_DISPLAY_VER(dev_priv, 9)) intel_ddi_init(dev_priv, PORT_A); /* DDI B, C, D, and F detection is indicated by the SFUSE_STRAP @@ -10908,7 +10909,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) /* * On SKL we don't have a way to detect DDI-E so we rely on VBT. */ - if (IS_GEN9_BC(dev_priv) && + if (IS_DISPLAY_VER(dev_priv, 9) && intel_bios_is_port_present(dev_priv, PORT_E)) intel_ddi_init(dev_priv, PORT_E); diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 1f5aa98bfcd5..490b6550a810 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -569,7 +569,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused) } else { dc5_reg = IS_BROXTON(dev_priv) ? BXT_CSR_DC3_DC5_COUNT : SKL_CSR_DC3_DC5_COUNT; - if (!IS_GEN9_LP(dev_priv)) + if (!IS_GEMINILAKE(dev_priv) && !IS_BROXTON(dev_priv)) dc6_reg = SKL_CSR_DC5_DC6_COUNT; } diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 99126caf5747..9419ae8c6111 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -709,7 +709,7 @@ static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, * BIOS's own request bits, which are forced-on for these power wells * when exiting DC5/6. */ - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_GEN9_LP(dev_priv) && + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv) && (id == SKL_DISP_PW_1 || id == SKL_DISP_PW_MISC_IO)) val |= intel_de_read(dev_priv, regs->bios); @@ -809,7 +809,7 @@ static u32 gen9_dc_mask(struct drm_i915_private *dev_priv) | DC_STATE_EN_DC9; else if (IS_DISPLAY_VER(dev_priv, 11)) mask |= DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) mask |= DC_STATE_EN_DC9; else mask |= DC_STATE_EN_UPTO_DC6; @@ -1060,7 +1060,7 @@ static void gen9_enable_dc5(struct drm_i915_private *dev_priv) drm_dbg_kms(&dev_priv->drm, "Enabling DC5\n"); /* Wa Display #1183: skl,kbl,cfl */ - if (IS_GEN9_BC(dev_priv)) + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) intel_de_write(dev_priv, GEN8_CHICKEN_DCPR_1, intel_de_read(dev_priv, GEN8_CHICKEN_DCPR_1) | SKL_SELECT_ALTERNATE_DC_EXIT); @@ -1087,7 +1087,7 @@ static void skl_enable_dc6(struct drm_i915_private *dev_priv) drm_dbg_kms(&dev_priv->drm, "Enabling DC6\n"); /* Wa Display #1183: skl,kbl,cfl */ - if (IS_GEN9_BC(dev_priv)) + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) intel_de_write(dev_priv, GEN8_CHICKEN_DCPR_1, intel_de_read(dev_priv, GEN8_CHICKEN_DCPR_1) | SKL_SELECT_ALTERNATE_DC_EXIT); @@ -1189,7 +1189,7 @@ static void gen9_disable_dc_states(struct drm_i915_private *dev_priv) gen9_assert_dbuf_enabled(dev_priv); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_verify_ddi_phy_power_wells(dev_priv); if (DISPLAY_VER(dev_priv) >= 11) @@ -4537,10 +4537,10 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, max_dc = 3; else if (DISPLAY_VER(dev_priv) >= 12) max_dc = 4; - else if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv) || IS_GEN9_BC(dev_priv)) - max_dc = 2; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) max_dc = 1; + else if (DISPLAY_VER(dev_priv) >= 9) + max_dc = 2; else max_dc = 0; @@ -4549,7 +4549,8 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, * not depending on the DMC firmware. It's needed by system * suspend/resume, so allow it unconditionally. */ - mask = IS_GEN9_LP(dev_priv) || DISPLAY_VER(dev_priv) >= 11 ? + mask = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) || + DISPLAY_VER(dev_priv) >= 11 ? DC_STATE_EN_DC9 : 0; if (!dev_priv->params.disable_power_well) @@ -4692,7 +4693,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) err = set_power_wells(power_domains, glk_power_wells); } else if (IS_BROXTON(dev_priv)) { err = set_power_wells(power_domains, bxt_power_wells); - } else if (IS_GEN9_BC(dev_priv)) { + } else if (IS_DISPLAY_VER(dev_priv, 9)) { err = set_power_wells(power_domains, skl_power_wells); } else if (IS_CHERRYVIEW(dev_priv)) { err = set_power_wells(power_domains, chv_power_wells); @@ -5623,10 +5624,10 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) icl_display_core_init(i915, resume); } else if (IS_CANNONLAKE(i915)) { cnl_display_core_init(i915, resume); - } else if (IS_GEN9_BC(i915)) { - skl_display_core_init(i915, resume); - } else if (IS_GEN9_LP(i915)) { + } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { bxt_display_core_init(i915, resume); + } else if (IS_DISPLAY_VER(i915, 9)) { + skl_display_core_init(i915, resume); } else if (IS_CHERRYVIEW(i915)) { mutex_lock(&power_domains->lock); chv_phy_control_init(i915); @@ -5784,10 +5785,10 @@ void intel_power_domains_suspend(struct drm_i915_private *i915, icl_display_core_uninit(i915); else if (IS_CANNONLAKE(i915)) cnl_display_core_uninit(i915); - else if (IS_GEN9_BC(i915)) - skl_display_core_uninit(i915); - else if (IS_GEN9_LP(i915)) + else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) bxt_display_core_uninit(i915); + else if (IS_DISPLAY_VER(i915, 9)) + skl_display_core_uninit(i915); power_domains->display_core_suspended = true; } @@ -5908,7 +5909,8 @@ static void intel_power_domains_verify_state(struct drm_i915_private *i915) void intel_display_power_suspend_late(struct drm_i915_private *i915) { - if (DISPLAY_VER(i915) >= 11 || IS_GEN9_LP(i915)) { + if (DISPLAY_VER(i915) >= 11 || IS_GEMINILAKE(i915) || + IS_BROXTON(i915)) { bxt_enable_dc9(i915); /* Tweaked Wa_14010685332:icp,jsp,mcc */ if (INTEL_PCH_TYPE(i915) >= PCH_ICP && INTEL_PCH_TYPE(i915) <= PCH_MCC) @@ -5921,7 +5923,8 @@ void intel_display_power_suspend_late(struct drm_i915_private *i915) void intel_display_power_resume_early(struct drm_i915_private *i915) { - if (DISPLAY_VER(i915) >= 11 || IS_GEN9_LP(i915)) { + if (DISPLAY_VER(i915) >= 11 || IS_GEMINILAKE(i915) || + IS_BROXTON(i915)) { gen9_sanitize_dc_state(i915); bxt_disable_dc9(i915); /* Tweaked Wa_14010685332:icp,jsp,mcc */ @@ -5938,7 +5941,7 @@ void intel_display_power_suspend(struct drm_i915_private *i915) if (DISPLAY_VER(i915) >= 11) { icl_display_core_uninit(i915); bxt_enable_dc9(i915); - } else if (IS_GEN9_LP(i915)) { + } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { bxt_display_core_uninit(i915); bxt_enable_dc9(i915); } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) { @@ -5959,7 +5962,7 @@ void intel_display_power_resume(struct drm_i915_private *i915) DC_STATE_EN_UPTO_DC5) gen9_enable_dc5(i915); } - } else if (IS_GEN9_LP(i915)) { + } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { bxt_disable_dc9(i915); bxt_display_core_init(i915, true); if (i915->csr.dmc_payload && diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index a560468765c0..a5231ac3443a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -301,10 +301,10 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) max_rate = ehl_max_source_rate(intel_dp); else max_rate = icl_max_source_rate(intel_dp); - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { source_rates = bxt_rates; size = ARRAY_SIZE(bxt_rates); - } else if (IS_GEN9_BC(dev_priv)) { + } else if (IS_DISPLAY_VER(dev_priv, 9)) { source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); } else if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 2daa3f67791e..180f97cd74cb 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -154,7 +154,7 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->limited_color_range = intel_dp_limited_color_range(pipe_config, conn_state); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) pipe_config->lane_lat_optim_mask = bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c index 3e3c5eed1600..158f271299a4 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.c +++ b/drivers/gpu/drm/i915/display/intel_dpll.c @@ -366,13 +366,11 @@ static bool intel_pll_is_valid(struct drm_i915_private *dev_priv, if (clock->m1 < limit->m1.min || limit->m1.max < clock->m1) return false; - if (!IS_PINEVIEW(dev_priv) && !IS_VALLEYVIEW(dev_priv) && - !IS_CHERRYVIEW(dev_priv) && !IS_GEN9_LP(dev_priv)) + if (!IS_PINEVIEW(dev_priv) && !IS_LP(dev_priv)) if (clock->m1 <= clock->m2) return false; - if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv) && - !IS_GEN9_LP(dev_priv)) { + if (!IS_LP(dev_priv)) { if (clock->p < limit->p.min || limit->p.max < clock->p) return false; if (clock->m < limit->m.min || limit->m.max < clock->m) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 1ae158d12c07..e32de7c848e9 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -4441,10 +4441,10 @@ void intel_shared_dpll_init(struct drm_device *dev) dpll_mgr = &icl_pll_mgr; else if (IS_CANNONLAKE(dev_priv)) dpll_mgr = &cnl_pll_mgr; - else if (IS_GEN9_BC(dev_priv)) - dpll_mgr = &skl_pll_mgr; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dpll_mgr = &bxt_pll_mgr; + else if (IS_DISPLAY_VER(dev_priv, 9)) + dpll_mgr = &skl_pll_mgr; else if (HAS_DDI(dev_priv)) dpll_mgr = &hsw_pll_mgr; else if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 986bbbe3b12f..04d9c7d22b04 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -302,7 +302,7 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) int threshold = dev_priv->fbc.threshold; /* Display WA #0529: skl, kbl, bxt. */ - if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) { + if (IS_DISPLAY_VER(dev_priv, 9)) { u32 val = intel_de_read(dev_priv, CHICKEN_MISC_4); val &= ~(FBC_STRIDE_OVERRIDE | FBC_STRIDE_MASK); @@ -445,7 +445,8 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, * reserved range size, so it always assumes the maximum (8mb) is used. * If we enable FBC using a CFB on that memory range we'll get FIFO * underruns, even if that range is not reserved by the BIOS. */ - if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv)) + if (IS_BROADWELL(dev_priv) || (IS_DISPLAY_VER(dev_priv, 9) && + !IS_BROXTON(dev_priv))) end = resource_size(&dev_priv->dsm) - 8 * 1024 * 1024; else end = U64_MAX; @@ -759,7 +760,7 @@ static u16 intel_fbc_gen9_wa_cfb_stride(struct drm_i915_private *dev_priv) struct intel_fbc *fbc = &dev_priv->fbc; struct intel_fbc_state_cache *cache = &fbc->state_cache; - if ((IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) && + if ((IS_DISPLAY_VER(dev_priv, 9)) && cache->fb.modifier != I915_FORMAT_MOD_X_TILED) return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->threshold) * 8; else diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 8ddc20daef64..13ba280d0228 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -107,9 +107,9 @@ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv, return &gmbus_pins_icp[pin]; else if (HAS_PCH_CNP(dev_priv)) return &gmbus_pins_cnp[pin]; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) return &gmbus_pins_bxt[pin]; - else if (IS_GEN9_BC(dev_priv)) + else if (IS_DISPLAY_VER(dev_priv, 9)) return &gmbus_pins_skl[pin]; else if (IS_BROADWELL(dev_priv)) return &gmbus_pins_bdw[pin]; @@ -128,9 +128,9 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, size = ARRAY_SIZE(gmbus_pins_icp); else if (HAS_PCH_CNP(dev_priv)) size = ARRAY_SIZE(gmbus_pins_cnp); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bxt); - else if (IS_GEN9_BC(dev_priv)) + else if (IS_DISPLAY_VER(dev_priv, 9)) size = ARRAY_SIZE(gmbus_pins_skl); else if (IS_BROADWELL(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bdw); @@ -600,7 +600,7 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num, int ret = 0; /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */ - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_gmbus_clock_gating(dev_priv, false); else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_CNP(dev_priv)) pch_gmbus_clock_gating(dev_priv, false); @@ -713,7 +713,7 @@ timeout: out: /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */ - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_gmbus_clock_gating(dev_priv, true); else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_CNP(dev_priv)) pch_gmbus_clock_gating(dev_priv, true); diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index d8570e14fe60..75050a040577 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -286,11 +286,12 @@ static int intel_hdcp_load_keys(struct drm_i915_private *dev_priv) /* * Initiate loading the HDCP key from fuses. * - * BXT+ platforms, HDCP key needs to be loaded by SW. Only Gen 9 - * platforms except BXT and GLK, differ in the key load trigger process - * from other platforms. So GEN9_BC uses the GT Driver Mailbox i/f. + * BXT+ platforms, HDCP key needs to be loaded by SW. Only display + * version 9 platforms (minus BXT) differ in the key load trigger + * process from other platforms. These platforms use the GT Driver + * Mailbox interface. */ - if (IS_GEN9_BC(dev_priv)) { + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) { ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_LOAD_HDCP_KEYS, 1); if (ret) { diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index d69f0a6dc26d..f2d1fef8bd9d 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1841,7 +1841,8 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, return MODE_CLOCK_RANGE; /* BXT/GLK DPLL can't generate 223-240 MHz */ - if (IS_GEN9_LP(dev_priv) && clock > 223333 && clock < 240000) + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + clock > 223333 && clock < 240000) return MODE_CLOCK_RANGE; /* CHV DPLL can't generate 216-240 MHz */ @@ -2714,7 +2715,7 @@ static u8 intel_hdmi_ddc_pin(struct intel_encoder *encoder) ddc_pin = dg1_port_to_ddc_pin(dev_priv, port); else if (IS_ROCKETLAKE(dev_priv)) ddc_pin = rkl_port_to_ddc_pin(dev_priv, port); - else if (IS_GEN9_BC(dev_priv) && HAS_PCH_TGP(dev_priv)) + else if (IS_DISPLAY_VER(dev_priv, 9) && HAS_PCH_TGP(dev_priv)) ddc_pin = gen9bc_tgp_port_to_ddc_pin(dev_priv, port); else if (HAS_PCH_MCC(dev_priv)) ddc_pin = mcc_port_to_ddc_pin(dev_priv, port); @@ -2722,7 +2723,7 @@ static u8 intel_hdmi_ddc_pin(struct intel_encoder *encoder) ddc_pin = icl_port_to_ddc_pin(dev_priv, port); else if (HAS_PCH_CNP(dev_priv)) ddc_pin = cnp_port_to_ddc_pin(dev_priv, port); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) ddc_pin = bxt_port_to_ddc_pin(dev_priv, port); else if (IS_CHERRYVIEW(dev_priv)) ddc_pin = chv_port_to_ddc_pin(dev_priv, port); diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 10022d1575e1..2fcbb2ba2d78 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -2161,7 +2161,7 @@ intel_panel_init_backlight_funcs(struct intel_panel *panel) intel_dsi_dcs_init_backlight_funcs(connector) == 0) return; - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { panel->backlight.pwm_funcs = &bxt_pwm_funcs; } else if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) { panel->backlight.pwm_funcs = &cnp_pwm_funcs; diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index c55da130773b..3a0985f1f531 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -313,10 +313,7 @@ void intel_pps_reset_all(struct drm_i915_private *dev_priv) { struct intel_encoder *encoder; - if (drm_WARN_ON(&dev_priv->drm, - !(IS_VALLEYVIEW(dev_priv) || - IS_CHERRYVIEW(dev_priv) || - IS_GEN9_LP(dev_priv)))) + if (drm_WARN_ON(&dev_priv->drm, !IS_LP(dev_priv))) return; /* @@ -338,7 +335,7 @@ void intel_pps_reset_all(struct drm_i915_private *dev_priv) if (encoder->type != INTEL_OUTPUT_EDP) continue; - if (IS_GEN9_LP(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 9) intel_dp->pps.pps_reset = true; else intel_dp->pps.pps_pipe = INVALID_PIPE; @@ -361,7 +358,7 @@ static void intel_pps_get_registers(struct intel_dp *intel_dp, memset(regs, 0, sizeof(*regs)); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) pps_idx = bxt_power_sequencer_idx(intel_dp); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) pps_idx = vlv_power_sequencer_pipe(intel_dp); @@ -372,7 +369,8 @@ static void intel_pps_get_registers(struct intel_dp *intel_dp, regs->pp_off = PP_OFF_DELAYS(pps_idx); /* Cycle delay moved from PP_DIVISOR to PP_CONTROL */ - if (IS_GEN9_LP(dev_priv) || INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) || + INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) regs->pp_div = INVALID_MMIO_REG; else regs->pp_div = PP_DIVISOR(pps_idx); @@ -1399,7 +1397,7 @@ void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv) void intel_pps_setup(struct drm_i915_private *i915) { - if (HAS_PCH_SPLIT(i915) || IS_GEN9_LP(i915)) + if (HAS_PCH_SPLIT(i915) || IS_GEMINILAKE(i915) || IS_BROXTON(i915)) i915->pps_mmio_base = PCH_PPS_BASE; else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) i915->pps_mmio_base = VLV_PPS_BASE; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 1059a26c1f58..9bee99fe5495 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -297,7 +297,7 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder, else pipe_config->pipe_bpp = 18; - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { /* Enable Frame time stamp based scanline reporting */ pipe_config->mode_flags |= I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP; @@ -522,7 +522,7 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder) if (IS_GEMINILAKE(dev_priv)) glk_dsi_device_ready(encoder); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_dsi_device_ready(encoder); else vlv_dsi_device_ready(encoder); @@ -601,7 +601,7 @@ static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) drm_dbg_kms(&dev_priv->drm, "\n"); for_each_dsi_port(port, intel_dsi->ports) { /* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */ - i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? + i915_reg_t port_ctrl = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ? BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A); u32 val; @@ -621,7 +621,7 @@ static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) * On VLV/CHV, wait till Clock lanes are in LP-00 state for MIPI * Port A only. MIPI Port C has no similar bit for checking. */ - if ((IS_GEN9_LP(dev_priv) || port == PORT_A) && + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) || port == PORT_A) && intel_de_wait_for_clear(dev_priv, port_ctrl, AFE_LATCHOUT, 30)) drm_err(&dev_priv->drm, "DSI LP not going Low\n"); @@ -646,7 +646,7 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder, if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) { u32 temp; - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { for_each_dsi_port(port, intel_dsi->ports) { temp = intel_de_read(dev_priv, MIPI_CTRL(port)); @@ -666,7 +666,7 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder, } for_each_dsi_port(port, intel_dsi->ports) { - i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? + i915_reg_t port_ctrl = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ? BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port); u32 temp; @@ -703,7 +703,7 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) enum port port; for_each_dsi_port(port, intel_dsi->ports) { - i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? + i915_reg_t port_ctrl = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ? BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port); u32 temp; @@ -781,7 +781,7 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state, * The BIOS may leave the PLL in a wonky state where it doesn't * lock. It needs to be fully powered down to fix it. */ - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { bxt_dsi_pll_disable(encoder); bxt_dsi_pll_enable(encoder, pipe_config); } else { @@ -932,7 +932,7 @@ static void intel_dsi_post_disable(struct intel_atomic_state *state, drm_dbg_kms(&dev_priv->drm, "\n"); - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { intel_crtc_vblank_off(old_crtc_state); skl_scaler_disable(old_crtc_state); @@ -971,7 +971,7 @@ static void intel_dsi_post_disable(struct intel_atomic_state *state, val & ~MIPIO_RST_CTRL); } - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { bxt_dsi_pll_disable(encoder); } else { u32 val; @@ -1024,12 +1024,13 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, * configuration, otherwise accessing DSI registers will hang the * machine. See BSpec North Display Engine registers/MIPI[BXT]. */ - if (IS_GEN9_LP(dev_priv) && !bxt_dsi_pll_is_enabled(dev_priv)) + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + !bxt_dsi_pll_is_enabled(dev_priv)) goto out_put_power; /* XXX: this only works for one DSI output */ for_each_dsi_port(port, intel_dsi->ports) { - i915_reg_t ctrl_reg = IS_GEN9_LP(dev_priv) ? + i915_reg_t ctrl_reg = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ? BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port); bool enabled = intel_de_read(dev_priv, ctrl_reg) & DPI_ENABLE; @@ -1055,7 +1056,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, if (!(intel_de_read(dev_priv, MIPI_DEVICE_READY(port)) & DEVICE_READY)) continue; - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { u32 tmp = intel_de_read(dev_priv, MIPI_CTRL(port)); tmp &= BXT_PIPE_SELECT_MASK; tmp >>= BXT_PIPE_SELECT_SHIFT; @@ -1251,7 +1252,7 @@ static void intel_dsi_get_config(struct intel_encoder *encoder, pipe_config->output_types |= BIT(INTEL_OUTPUT_DSI); - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { bxt_dsi_get_pipe_config(encoder, pipe_config); pclk = bxt_dsi_get_pclk(encoder, pipe_config); } else { @@ -1317,7 +1318,7 @@ static void set_dsi_timings(struct drm_encoder *encoder, hbp = txbyteclkhs(hbp, bpp, lane_count, intel_dsi->burst_mode_ratio); for_each_dsi_port(port, intel_dsi->ports) { - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { /* * Program hdisplay and vdisplay on MIPI transcoder. * This is different from calculated hactive and @@ -1407,7 +1408,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, tmp &= ~READ_REQUEST_PRIORITY_MASK; intel_de_write(dev_priv, MIPI_CTRL(port), tmp | READ_REQUEST_PRIORITY_HIGH); - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { enum pipe pipe = intel_crtc->pipe; tmp = intel_de_read(dev_priv, MIPI_CTRL(port)); @@ -1445,7 +1446,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, if (intel_dsi->clock_stop) tmp |= CLOCKSTOP; - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { tmp |= BXT_DPHY_DEFEATURE_EN; if (!is_cmd_mode(intel_dsi)) tmp |= BXT_DEFEATURE_DPI_FIFO_CTR; @@ -1492,7 +1493,8 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, intel_de_write(dev_priv, MIPI_INIT_COUNT(port), txclkesc(intel_dsi->escape_clk_div, 100)); - if (IS_GEN9_LP(dev_priv) && (!intel_dsi->dual_link)) { + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + !intel_dsi->dual_link) { /* * BXT spec says write MIPI_INIT_COUNT for * both the ports, even if only one is @@ -1570,7 +1572,7 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder) /* Panel commands can be sent when clock is in LP11 */ intel_de_write(dev_priv, MIPI_DEVICE_READY(port), 0x0); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_dsi_reset_clocks(encoder, port); else vlv_dsi_reset_clocks(encoder, port); @@ -1828,7 +1830,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) if (!intel_bios_is_dsi_present(dev_priv, &port)) return; - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dev_priv->mipi_mmio_base = BXT_MIPI_BASE; else dev_priv->mipi_mmio_base = VLV_MIPI_BASE; @@ -1854,7 +1856,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) intel_encoder->compute_config = intel_dsi_compute_config; intel_encoder->pre_enable = intel_dsi_pre_enable; - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) intel_encoder->enable = bxt_dsi_enable; intel_encoder->disable = intel_dsi_disable; intel_encoder->post_disable = intel_dsi_post_disable; @@ -1874,7 +1876,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) * On BYT/CHV, pipe A maps to MIPI DSI port A, pipe B maps to MIPI DSI * port C. BXT isn't limited like this. */ - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) intel_encoder->pipe_mask = ~0; else if (port == PORT_A) intel_encoder->pipe_mask = BIT(PIPE_A); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7eefbdec25a2..9aff790817c0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -194,7 +194,7 @@ static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) if (DISPLAY_VER(dev_priv) >= 11) hpd->hpd = hpd_gen11; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) hpd->hpd = hpd_bxt; else if (DISPLAY_VER(dev_priv) >= 8) hpd->hpd = hpd_bdw; @@ -2458,7 +2458,7 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) found = true; } - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { u32 hotplug_trigger = iir & BXT_DE_PORT_HOTPLUG_MASK; if (hotplug_trigger) { @@ -2474,7 +2474,8 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) } } - if (IS_GEN9_LP(dev_priv) && (iir & BXT_DE_PORT_GMBUS)) { + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + (iir & BXT_DE_PORT_GMBUS)) { gmbus_irq_handler(dev_priv); found = true; } @@ -3717,7 +3718,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (DISPLAY_VER(dev_priv) <= 10) de_misc_masked |= GEN8_DE_MISC_GSE; - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) de_port_masked |= BXT_DE_PORT_GMBUS; if (DISPLAY_VER(dev_priv) >= 11) { @@ -3732,7 +3733,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) gen8_de_pipe_flip_done_mask(dev_priv); de_port_enables = de_port_masked; - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) de_port_enables |= BXT_DE_PORT_HOTPLUG_MASK; else if (IS_BROADWELL(dev_priv)) de_port_enables |= BDW_DE_PORT_HOTPLUG_MASK; @@ -4317,7 +4318,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) dev_priv->display.hpd_irq_setup = dg1_hpd_irq_setup; else if (DISPLAY_VER(dev_priv) >= 11) dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dev_priv->display.hpd_irq_setup = bxt_hpd_irq_setup; else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) dev_priv->display.hpd_irq_setup = icp_hpd_irq_setup; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 066abaa73a06..32f301ca3ab0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3654,13 +3654,13 @@ u8 intel_enabled_dbuf_slices_mask(struct drm_i915_private *dev_priv) */ static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv) { - return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv); + return IS_DISPLAY_VER(dev_priv, 9); } static bool intel_has_sagv(struct drm_i915_private *dev_priv) { - return (IS_GEN9_BC(dev_priv) || DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) && + return DISPLAY_VER(dev_priv) >= 9 && !IS_LP(dev_priv) && dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED; } @@ -5258,7 +5258,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, lines = div_round_up_fixed16(selected_result, wp->plane_blocks_per_line); - if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) { + if (IS_DISPLAY_VER(dev_priv, 9)) { /* Display WA #1125: skl,bxt,kbl */ if (level == 0 && wp->rc_surface) blocks += fixed16_to_u32_round_up(wp->y_tile_minimum); @@ -5375,7 +5375,7 @@ static void skl_compute_transition_wm(struct drm_i915_private *dev_priv, * WaDisableTWM:skl,kbl,cfl,bxt * Transition WM are not recommended by HW team for GEN9 */ - if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) + if (IS_DISPLAY_VER(dev_priv, 9)) return; if (DISPLAY_VER(dev_priv) >= 11) -- cgit From 9e2eb6d5380e9dadcd2baecb51f238e5eba94bee Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 18 Mar 2021 13:02:02 -0400 Subject: drm/i915/dpcd_bl: Don't try vesa interface unless specified by VBT Looks like that there actually are another subset of laptops on the market that don't support the Intel HDR backlight interface, but do advertise support for the VESA DPCD backlight interface despite the fact it doesn't seem to work. Note though I'm not entirely clear on this - on one of the machines where this issue was observed, I also noticed that we appeared to be rejecting the VBT defined backlight frequency in intel_dp_aux_vesa_calc_max_backlight(). It's noted in this function that: /* Use highest possible value of Pn for more granularity of brightness * adjustment while satifying the conditions below. * ... * - FxP is within 25% of desired value. * Note: 25% is arbitrary value and may need some tweak. */ So it's possible that this value might just need to be tweaked, but for now let's just disable the VESA backlight interface unless it's specified in the VBT just to be safe. We might be able to try enabling this again by default in the future. Fixes: 2227816e647a ("drm/i915/dp: Allow forcing specific interfaces through enable_dpcd_backlight") Cc: Jani Nikula Cc: Rodrigo Vivi Bugzilla: https://gitlab.freedesktop.org/drm/intel/-/issues/3169 Signed-off-by: Lyude Paul Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210318170204.513000-1-lyude@redhat.com --- drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 651884390137..4f8337c7fd2e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -646,7 +646,6 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector) break; case INTEL_BACKLIGHT_DISPLAY_DDI: try_intel_interface = true; - try_vesa_interface = true; break; default: return -ENODEV; -- cgit From f99b805fb9413ff007ca0b6add871737664117dd Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Sat, 27 Mar 2021 02:59:45 +0200 Subject: drm/i915: Don't zero out the Y plane's watermarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't zero out the watermarks for the Y plane since we've already computed them when computing the UV plane's watermarks (since the UV plane always appears before ethe Y plane when iterating through the planes). This leads to allocating no DDB for the Y plane since .min_ddb_alloc also gets zeroed. And that of course leads to underruns when scanning out planar formats. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: dbf71381d733 ("drm/i915: Nuke intel_atomic_crtc_state_for_each_plane_state() from skl+ wm code") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210327005945.4929-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 32f301ca3ab0..3ecbd62915cf 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5511,12 +5511,12 @@ static int icl_build_plane_wm(struct intel_crtc_state *crtc_state, struct skl_plane_wm *wm = &crtc_state->wm.skl.raw.planes[plane_id]; int ret; - memset(wm, 0, sizeof(*wm)); - /* Watermarks calculated in master */ if (plane_state->planar_slave) return 0; + memset(wm, 0, sizeof(*wm)); + if (plane_state->planar_linked_plane) { const struct drm_framebuffer *fb = plane_state->hw.fb; enum plane_id y_plane_id = plane_state->planar_linked_plane->id; -- cgit From 28972f821ea773edaae2d40444bd08a33c9a46a5 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Thu, 8 Apr 2021 13:56:42 +0530 Subject: drm/i915/hdcp: Fix uninitialized symbol 'msg_end' Fix static analysis tool uninitialized symbol error. v2: - use ktime_set(0, 0) instead to initialize to zero. [Ankit] Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Anshuman Gupta Reviewed-by: Ankit Nautiyal Link: https://patchwork.freedesktop.org/patch/msgid/20210408082642.27066-1-anshuman.gupta@intel.com --- drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index 90868e156c69..2dd9360bdf9a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -532,7 +532,7 @@ int intel_dp_hdcp2_read_msg(struct intel_digital_port *dig_port, u8 *byte = buf; ssize_t ret, bytes_to_recv, len; const struct hdcp2_dp_msg_data *hdcp2_msg_data; - ktime_t msg_end; + ktime_t msg_end = ktime_set(0, 0); bool msg_expired; hdcp2_msg_data = get_hdcp2_dp_msg_data(msg_id); -- cgit From a844cfbe648d15d9f1031c45508c194f2d61c917 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 8 Apr 2021 13:31:48 -0700 Subject: drm/i915: Skip display interruption setup when display is not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return ealier in the functions doing interruption setup for GEN8+ also adding a warning in gen8_de_irq_handler() to let us know that something else is still missing. Reviewed-by: Radhakrishna Sripada Cc: Ville Syrjälä Cc: Lucas De Marchi Signed-off-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210408203150.237947-1-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 9aff790817c0..920327bdcb10 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2421,6 +2421,8 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) u32 iir; enum pipe pipe; + drm_WARN_ON_ONCE(&dev_priv->drm, !HAS_DISPLAY(dev_priv)); + if (master_ctl & GEN8_DE_MISC_IRQ) { iir = intel_uncore_read(&dev_priv->uncore, GEN8_DE_MISC_IIR); if (iir) { @@ -3059,14 +3061,13 @@ static void cnp_display_clock_wa(struct drm_i915_private *dev_priv) } } -static void gen8_irq_reset(struct drm_i915_private *dev_priv) +static void gen8_display_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; enum pipe pipe; - gen8_master_intr_disable(dev_priv->uncore.regs); - - gen8_gt_irq_reset(&dev_priv->gt); + if (!HAS_DISPLAY(dev_priv)) + return; intel_uncore_write(uncore, EDP_PSR_IMR, 0xffffffff); intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff); @@ -3078,6 +3079,16 @@ static void gen8_irq_reset(struct drm_i915_private *dev_priv) GEN3_IRQ_RESET(uncore, GEN8_DE_PORT_); GEN3_IRQ_RESET(uncore, GEN8_DE_MISC_); +} + +static void gen8_irq_reset(struct drm_i915_private *dev_priv) +{ + struct intel_uncore *uncore = &dev_priv->uncore; + + gen8_master_intr_disable(dev_priv->uncore.regs); + + gen8_gt_irq_reset(&dev_priv->gt); + gen8_display_irq_reset(dev_priv); GEN3_IRQ_RESET(uncore, GEN8_PCU_); if (HAS_PCH_SPLIT(dev_priv)) @@ -3093,6 +3104,9 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) u32 trans_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C) | BIT(TRANSCODER_D); + if (!HAS_DISPLAY(dev_priv)) + return; + intel_uncore_write(uncore, GEN11_DISPLAY_INT_CTL, 0); if (DISPLAY_VER(dev_priv) >= 12) { @@ -3715,6 +3729,9 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) BIT(TRANSCODER_C) | BIT(TRANSCODER_D); enum pipe pipe; + if (!HAS_DISPLAY(dev_priv)) + return; + if (DISPLAY_VER(dev_priv) <= 10) de_misc_masked |= GEN8_DE_MISC_GSE; @@ -3798,6 +3815,16 @@ static void gen8_irq_postinstall(struct drm_i915_private *dev_priv) gen8_master_intr_enable(dev_priv->uncore.regs); } +static void gen11_de_irq_postinstall(struct drm_i915_private *dev_priv) +{ + if (!HAS_DISPLAY(dev_priv)) + return; + + gen8_de_irq_postinstall(dev_priv); + + intel_uncore_write(&dev_priv->uncore, GEN11_DISPLAY_INT_CTL, + GEN11_DISPLAY_IRQ_ENABLE); +} static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) { @@ -3808,12 +3835,10 @@ static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) icp_irq_postinstall(dev_priv); gen11_gt_irq_postinstall(&dev_priv->gt); - gen8_de_irq_postinstall(dev_priv); + gen11_de_irq_postinstall(dev_priv); GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked); - intel_uncore_write(&dev_priv->uncore, GEN11_DISPLAY_INT_CTL, GEN11_DISPLAY_IRQ_ENABLE); - if (HAS_MASTER_UNIT_IRQ(dev_priv)) { dg1_master_intr_enable(uncore->regs); intel_uncore_posting_read(&dev_priv->uncore, DG1_MSTR_UNIT_INTR); -- cgit From 61a60d7962a6e8ed94ece62764f94368cd6082b2 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 8 Apr 2021 13:31:49 -0700 Subject: drm/i915: Do not set any power wells when there is no display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Power wells are only part of display block and not necessary when running a headless driver. Reviewed-by: Radhakrishna Sripada Cc: Lucas De Marchi Signed-off-by: José Roberto de Souza Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210408203150.237947-2-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_display_power.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 9419ae8c6111..53311b9764dc 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4674,7 +4674,10 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) * The enabling order will be from lower to higher indexed wells, * the disabling order is reversed. */ - if (IS_ALDERLAKE_S(dev_priv) || IS_DG1(dev_priv)) { + if (!HAS_DISPLAY(dev_priv)) { + power_domains->power_well_count = 0; + err = 0; + } else if (IS_ALDERLAKE_S(dev_priv) || IS_DG1(dev_priv)) { err = set_power_wells_mask(power_domains, tgl_power_wells, BIT_ULL(TGL_DISP_PW_TC_COLD_OFF)); } else if (IS_ROCKETLAKE(dev_priv)) { -- cgit From 5df7bd130818dfdc9047c2a81b19737d66e55f9a Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 8 Apr 2021 13:31:50 -0700 Subject: drm/i915: skip display initialization when there is no display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Display features should not be initialized or de-initialized when there is no display. Skip modeset initialization, output setup, plane, crtc, encoder, connector registration, display cdclk and rawclk initialization, display core initialization, etc. Skip the functionality at as high level as possible, and remove any redundant checks. If the functionality is conditional to *other* display checks, do not add more. If the un-initialization has checks for initialization, do not add more. We explicitly do not care about any GMCH/VLV/CHV code paths, as they've always had and will have display. Reviewed-by: Radhakrishna Sripada Cc: Lucas De Marchi Signed-off-by: José Roberto de Souza Signed-off-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210408203150.237947-3-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 44 +++++++++++++++++----- drivers/gpu/drm/i915/display/intel_display_power.c | 36 ++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dp.c | 6 +++ drivers/gpu/drm/i915/display/intel_gmbus.c | 3 -- drivers/gpu/drm/i915/display/intel_hotplug.c | 12 ++++++ drivers/gpu/drm/i915/display/intel_pps.c | 5 ++- drivers/gpu/drm/i915/i915_drv.c | 28 ++++++++++---- drivers/gpu/drm/i915/i915_suspend.c | 6 +++ 8 files changed, 120 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4f8f994a639f..411b46c012f8 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1335,6 +1335,9 @@ u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, struct intel_crtc *crtc; struct intel_plane *plane; + if (!HAS_DISPLAY(dev_priv)) + return 0; + /* * We assume the primary plane for pipe A has * the highest stride limits of them all, @@ -3838,6 +3841,9 @@ int intel_display_suspend(struct drm_device *dev) struct drm_atomic_state *state; int ret; + if (!HAS_DISPLAY(dev_priv)) + return 0; + state = drm_atomic_helper_suspend(dev); ret = PTR_ERR_OR_ZERO(state); if (ret) @@ -11429,6 +11435,9 @@ static const struct drm_mode_config_funcs intel_mode_funcs = { */ void intel_init_display_hooks(struct drm_i915_private *dev_priv) { + if (!HAS_DISPLAY(dev_priv)) + return; + intel_init_cdclk_hooks(dev_priv); intel_init_audio_hooks(dev_priv); @@ -11471,8 +11480,12 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) void intel_modeset_init_hw(struct drm_i915_private *i915) { - struct intel_cdclk_state *cdclk_state = - to_intel_cdclk_state(i915->cdclk.obj.state); + struct intel_cdclk_state *cdclk_state; + + if (!HAS_DISPLAY(i915)) + return; + + cdclk_state = to_intel_cdclk_state(i915->cdclk.obj.state); intel_update_cdclk(i915); intel_dump_cdclk_config(&i915->cdclk.hw, "Current CDCLK"); @@ -11788,6 +11801,9 @@ int intel_modeset_init_noirq(struct drm_i915_private *i915) /* FIXME: completely on the wrong abstraction layer */ intel_power_domains_init_hw(i915, false); + if (!HAS_DISPLAY(i915)) + return 0; + intel_csr_ucode_init(i915); i915->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); @@ -11838,6 +11854,9 @@ int intel_modeset_init_nogem(struct drm_i915_private *i915) struct intel_crtc *crtc; int ret; + if (!HAS_DISPLAY(i915)) + return 0; + intel_init_pm(i915); intel_panel_sanitize_ssc(i915); @@ -11850,13 +11869,11 @@ int intel_modeset_init_nogem(struct drm_i915_private *i915) INTEL_NUM_PIPES(i915), INTEL_NUM_PIPES(i915) > 1 ? "s" : ""); - if (HAS_DISPLAY(i915)) { - for_each_pipe(i915, pipe) { - ret = intel_crtc_init(i915, pipe); - if (ret) { - intel_mode_config_cleanup(i915); - return ret; - } + for_each_pipe(i915, pipe) { + ret = intel_crtc_init(i915, pipe); + if (ret) { + intel_mode_config_cleanup(i915); + return ret; } } @@ -12791,6 +12808,9 @@ void intel_display_resume(struct drm_device *dev) struct drm_modeset_acquire_ctx ctx; int ret; + if (!HAS_DISPLAY(dev_priv)) + return; + dev_priv->modeset_restore_state = NULL; if (state) state->acquire_ctx = &ctx; @@ -12840,6 +12860,9 @@ static void intel_hpd_poll_fini(struct drm_i915_private *i915) /* part #1: call before irq uninstall */ void intel_modeset_driver_remove(struct drm_i915_private *i915) { + if (!HAS_DISPLAY(i915)) + return; + flush_workqueue(i915->flip_wq); flush_workqueue(i915->modeset_wq); @@ -12850,6 +12873,9 @@ void intel_modeset_driver_remove(struct drm_i915_private *i915) /* part #2: call after irq uninstall */ void intel_modeset_driver_remove_noirq(struct drm_i915_private *i915) { + if (!HAS_DISPLAY(i915)) + return; + /* * Due to the hpd irq storm handling the hotplug work can re-arm the * poll handlers. Hence disable polling after hpd handling is shut down. diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 53311b9764dc..ad30947c58a3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -821,6 +821,9 @@ static void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv) { u32 val; + if (!HAS_DISPLAY(dev_priv)) + return; + val = intel_de_read(dev_priv, DC_STATE_EN) & gen9_dc_mask(dev_priv); drm_dbg_kms(&dev_priv->drm, @@ -857,6 +860,9 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state) u32 val; u32 mask; + if (!HAS_DISPLAY(dev_priv)) + return; + if (drm_WARN_ON_ONCE(&dev_priv->drm, state & ~dev_priv->csr.allowed_dc_mask)) state &= dev_priv->csr.allowed_dc_mask; @@ -1181,6 +1187,9 @@ static void gen9_disable_dc_states(struct drm_i915_private *dev_priv) gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + if (!HAS_DISPLAY(dev_priv)) + return; + dev_priv->display.get_cdclk(dev_priv, &cdclk_config); /* Can't read out voltage_level so can't use intel_cdclk_changed() */ drm_WARN_ON(&dev_priv->drm, @@ -4533,6 +4542,9 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, int requested_dc; int max_dc; + if (!HAS_DISPLAY(dev_priv)) + return 0; + if (IS_DG1(dev_priv)) max_dc = 3; else if (DISPLAY_VER(dev_priv) >= 12) @@ -5126,6 +5138,9 @@ static void skl_display_core_init(struct drm_i915_private *dev_priv, /* enable PCH reset handshake */ intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); + if (!HAS_DISPLAY(dev_priv)) + return; + /* enable PG1 and Misc I/O */ mutex_lock(&power_domains->lock); @@ -5150,6 +5165,9 @@ static void skl_display_core_uninit(struct drm_i915_private *dev_priv) struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *well; + if (!HAS_DISPLAY(dev_priv)) + return; + gen9_disable_dc_states(dev_priv); gen9_dbuf_disable(dev_priv); @@ -5190,6 +5208,9 @@ static void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume */ intel_pch_reset_handshake(dev_priv, false); + if (!HAS_DISPLAY(dev_priv)) + return; + /* Enable PG1 */ mutex_lock(&power_domains->lock); @@ -5211,6 +5232,9 @@ static void bxt_display_core_uninit(struct drm_i915_private *dev_priv) struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *well; + if (!HAS_DISPLAY(dev_priv)) + return; + gen9_disable_dc_states(dev_priv); gen9_dbuf_disable(dev_priv); @@ -5244,6 +5268,9 @@ static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume /* 1. Enable PCH Reset Handshake */ intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); + if (!HAS_DISPLAY(dev_priv)) + return; + /* 2-3. */ intel_combo_phy_init(dev_priv); @@ -5271,6 +5298,9 @@ static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *well; + if (!HAS_DISPLAY(dev_priv)) + return; + gen9_disable_dc_states(dev_priv); /* 1. Disable all display engine functions -> aready done */ @@ -5385,6 +5415,9 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, /* 1. Enable PCH reset handshake. */ intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); + if (!HAS_DISPLAY(dev_priv)) + return; + /* 2. Initialize all combo phys */ intel_combo_phy_init(dev_priv); @@ -5429,6 +5462,9 @@ static void icl_display_core_uninit(struct drm_i915_private *dev_priv) struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *well; + if (!HAS_DISPLAY(dev_priv)) + return; + gen9_disable_dc_states(dev_priv); /* 1. Disable all display engine functions -> aready done */ diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index a5231ac3443a..6750949aa261 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5430,6 +5430,9 @@ void intel_dp_mst_suspend(struct drm_i915_private *dev_priv) { struct intel_encoder *encoder; + if (!HAS_DISPLAY(dev_priv)) + return; + for_each_intel_encoder(&dev_priv->drm, encoder) { struct intel_dp *intel_dp; @@ -5450,6 +5453,9 @@ void intel_dp_mst_resume(struct drm_i915_private *dev_priv) { struct intel_encoder *encoder; + if (!HAS_DISPLAY(dev_priv)) + return; + for_each_intel_encoder(&dev_priv->drm, encoder) { struct intel_dp *intel_dp; int ret; diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 13ba280d0228..2ea6adc3bd3e 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -845,9 +845,6 @@ int intel_gmbus_setup(struct drm_i915_private *dev_priv) unsigned int pin; int ret; - if (!HAS_DISPLAY(dev_priv)) - return 0; - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) dev_priv->gpio_mmio_base = VLV_DISPLAY_BASE; else if (!HAS_GMCH(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index f46a1b7190b8..47c85ac97c87 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -595,6 +595,9 @@ void intel_hpd_init(struct drm_i915_private *dev_priv) { int i; + if (!HAS_DISPLAY(dev_priv)) + return; + for_each_hpd_pin(i) { dev_priv->hotplug.stats[i].count = 0; dev_priv->hotplug.stats[i].state = HPD_ENABLED; @@ -670,6 +673,9 @@ static void i915_hpd_poll_init_work(struct work_struct *work) */ void intel_hpd_poll_enable(struct drm_i915_private *dev_priv) { + if (!HAS_DISPLAY(dev_priv)) + return; + WRITE_ONCE(dev_priv->hotplug.poll_enabled, true); /* @@ -702,6 +708,9 @@ void intel_hpd_poll_enable(struct drm_i915_private *dev_priv) */ void intel_hpd_poll_disable(struct drm_i915_private *dev_priv) { + if (!HAS_DISPLAY(dev_priv)) + return; + WRITE_ONCE(dev_priv->hotplug.poll_enabled, false); schedule_work(&dev_priv->hotplug.poll_init_work); } @@ -718,6 +727,9 @@ void intel_hpd_init_work(struct drm_i915_private *dev_priv) void intel_hpd_cancel_work(struct drm_i915_private *dev_priv) { + if (!HAS_DISPLAY(dev_priv)) + return; + spin_lock_irq(&dev_priv->irq_lock); dev_priv->hotplug.long_port_mask = 0; diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 3a0985f1f531..0fd28902d779 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -316,6 +316,9 @@ void intel_pps_reset_all(struct drm_i915_private *dev_priv) if (drm_WARN_ON(&dev_priv->drm, !IS_LP(dev_priv))) return; + if (!HAS_DISPLAY(dev_priv)) + return; + /* * We can't grab pps_mutex here due to deadlock with power_domain * mutex when power_domain functions are called while holding pps_mutex. @@ -1376,7 +1379,7 @@ void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv) int pps_num; int pps_idx; - if (HAS_DDI(dev_priv)) + if (!HAS_DISPLAY(dev_priv) || HAS_DDI(dev_priv)) return; /* * This w/a is needed at least on CPT/PPT, but to be sure apply it diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2483447ff8dc..305557e1942a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -973,8 +973,12 @@ static int i915_driver_open(struct drm_device *dev, struct drm_file *file) */ static void i915_driver_lastclose(struct drm_device *dev) { + struct drm_i915_private *i915 = to_i915(dev); + intel_fbdev_restore_mode(dev); - vga_switcheroo_process_delayed_switch(); + + if (HAS_DISPLAY(i915)) + vga_switcheroo_process_delayed_switch(); } static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) @@ -994,6 +998,9 @@ static void intel_suspend_encoders(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; struct intel_encoder *encoder; + if (!HAS_DISPLAY(dev_priv)) + return; + drm_modeset_lock_all(dev); for_each_intel_encoder(dev, encoder) if (encoder->suspend) @@ -1006,6 +1013,9 @@ static void intel_shutdown_encoders(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; struct intel_encoder *encoder; + if (!HAS_DISPLAY(dev_priv)) + return; + drm_modeset_lock_all(dev); for_each_intel_encoder(dev, encoder) if (encoder->shutdown) @@ -1021,9 +1031,11 @@ void i915_driver_shutdown(struct drm_i915_private *i915) i915_gem_suspend(i915); - drm_kms_helper_poll_disable(&i915->drm); + if (HAS_DISPLAY(i915)) { + drm_kms_helper_poll_disable(&i915->drm); - drm_atomic_helper_shutdown(&i915->drm); + drm_atomic_helper_shutdown(&i915->drm); + } intel_dp_mst_suspend(i915); @@ -1087,8 +1099,8 @@ static int i915_drm_suspend(struct drm_device *dev) /* We do a lot of poking in a lot of registers, make sure they work * properly. */ intel_power_domains_disable(dev_priv); - - drm_kms_helper_poll_disable(dev); + if (HAS_DISPLAY(dev_priv)) + drm_kms_helper_poll_disable(dev); pci_save_state(pdev); @@ -1235,7 +1247,8 @@ static int i915_drm_resume(struct drm_device *dev) */ intel_runtime_pm_enable_interrupts(dev_priv); - drm_mode_config_reset(dev); + if (HAS_DISPLAY(dev_priv)) + drm_mode_config_reset(dev); i915_gem_resume(dev_priv); @@ -1248,7 +1261,8 @@ static int i915_drm_resume(struct drm_device *dev) intel_display_resume(dev); intel_hpd_poll_disable(dev_priv); - drm_kms_helper_poll_enable(dev); + if (HAS_DISPLAY(dev_priv)) + drm_kms_helper_poll_enable(dev); intel_opregion_resume(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 0bc7b49f843c..5fcc32821e18 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -87,6 +87,9 @@ void i915_save_display(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + if (!HAS_DISPLAY(dev_priv)) + return; + /* Display arbitration control */ if (INTEL_GEN(dev_priv) <= 4) dev_priv->regfile.saveDSPARB = intel_de_read(dev_priv, DSPARB); @@ -102,6 +105,9 @@ void i915_restore_display(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + if (!HAS_DISPLAY(dev_priv)) + return; + intel_restore_swf(dev_priv); if (IS_GEN(dev_priv, 4)) -- cgit From 8f2546dd3e3e29d3a13e28746ba75cee2b3c850d Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 8 Apr 2021 14:42:05 -0700 Subject: drm/i915/display: Defeature PSR2 for RKL and ADL-S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PSR2 is defeatured for RKL and ADL-S, no important power impact as those are desktop CPUs and PSR2 was not even enabled by default yet in platforms without PSR2 HW tracking. HSDES: 14011750631 HSDES: 14011741325 BSpec: 53273 Cc: Caz Yokoyama Cc: Gwan-gyeong Mun Signed-off-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210408214205.327704-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 32d3d56259c2..2627d0b558f3 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -739,6 +739,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } + /* Wa_16011181250 */ + if (IS_ROCKETLAKE(dev_priv) || IS_ALDERLAKE_S(dev_priv)) { + drm_dbg_kms(&dev_priv->drm, "PSR2 is defeatured for this platform\n"); + return false; + } + if (!transcoder_has_psr2(dev_priv, crtc_state->cpu_transcoder)) { drm_dbg_kms(&dev_priv->drm, "PSR2 not supported in transcoder %s\n", -- cgit From 2878b29fc25a0dac0e1c6c94177f07c7f94240f0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 25 Mar 2021 12:48:22 +0100 Subject: drm/i915/display/vlv_dsi: Do not skip panel_pwr_cycle_delay when disabling the panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the recently added commit fe0f1e3bfdfe ("drm/i915: Shut down displays gracefully on reboot"), the DSI panel on a Cherry Trail based Predia Basic tablet would no longer properly light up after reboot. I've managed to reproduce this without rebooting by doing: chvt 3; echo 1 > /sys/class/graphics/fb0/blank;\ echo 0 > /sys/class/graphics/fb0/blank Which rapidly turns the panel off and back on again. The vlv_dsi.c code uses an intel_dsi_msleep() helper for the various delays used for panel on/off, since starting with MIPI-sequences version >= 3 the delays are already included inside the MIPI-sequences. The problems exposed by the "Shut down displays gracefully on reboot" change, show that using this helper for the panel_pwr_cycle_delay is not the right thing to do. This has not been noticed until now because normally the panel never is cycled off and directly on again in quick succession. Change the msleep for the panel_pwr_cycle_delay to a normal msleep() call to avoid the panel staying black after a quick off + on cycle. Cc: Ville Syrjälä Fixes: fe0f1e3bfdfe ("drm/i915: Shut down displays gracefully on reboot") Signed-off-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210325114823.44922-1-hdegoede@redhat.com --- drivers/gpu/drm/i915/display/vlv_dsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 9bee99fe5495..f461a8f03e19 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -993,14 +993,14 @@ static void intel_dsi_post_disable(struct intel_atomic_state *state, * FIXME As we do with eDP, just make a note of the time here * and perform the wait before the next panel power on. */ - intel_dsi_msleep(intel_dsi, intel_dsi->panel_pwr_cycle_delay); + msleep(intel_dsi->panel_pwr_cycle_delay); } static void intel_dsi_shutdown(struct intel_encoder *encoder) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - intel_dsi_msleep(intel_dsi, intel_dsi->panel_pwr_cycle_delay); + msleep(intel_dsi->panel_pwr_cycle_delay); } static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, -- cgit From c87eba80470e4103f4c2cbb915e297e5242f85fa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 25 Mar 2021 12:48:23 +0100 Subject: drm/i915/display/vlv_dsi: Move panel_pwr_cycle_delay to next panel-on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of sleeping panel_pwr_cycle_delay ms when turning the panel off, record the time it is turned off and if necessary wait any (remaining) time when the panel is turned on again. Also sleep the remaining time on shutdown, because on reboot the GOP will immediately turn on the panel again. Cc: Ville Syrjälä Signed-off-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210325114823.44922-2-hdegoede@redhat.com --- drivers/gpu/drm/i915/display/intel_dsi.h | 1 + drivers/gpu/drm/i915/display/vlv_dsi.c | 25 +++++++++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h b/drivers/gpu/drm/i915/display/intel_dsi.h index 625f2f1ae061..50d6da0b2419 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi.h +++ b/drivers/gpu/drm/i915/display/intel_dsi.h @@ -124,6 +124,7 @@ struct intel_dsi { u16 panel_on_delay; u16 panel_off_delay; u16 panel_pwr_cycle_delay; + ktime_t panel_power_off_time; }; struct intel_dsi_host { diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index f461a8f03e19..cf51860a1429 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -714,6 +714,19 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) } } +static void intel_dsi_wait_panel_power_cycle(struct intel_dsi *intel_dsi) +{ + ktime_t panel_power_on_time; + s64 panel_power_off_duration; + + panel_power_on_time = ktime_get_boottime(); + panel_power_off_duration = ktime_ms_delta(panel_power_on_time, + intel_dsi->panel_power_off_time); + + if (panel_power_off_duration < (s64)intel_dsi->panel_pwr_cycle_delay) + msleep(intel_dsi->panel_pwr_cycle_delay - panel_power_off_duration); +} + static void intel_dsi_prepare(struct intel_encoder *intel_encoder, const struct intel_crtc_state *pipe_config); static void intel_dsi_unprepare(struct intel_encoder *encoder); @@ -775,6 +788,8 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state, drm_dbg_kms(&dev_priv->drm, "\n"); + intel_dsi_wait_panel_power_cycle(intel_dsi); + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); /* @@ -989,18 +1004,14 @@ static void intel_dsi_post_disable(struct intel_atomic_state *state, intel_dsi_msleep(intel_dsi, intel_dsi->panel_off_delay); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); - /* - * FIXME As we do with eDP, just make a note of the time here - * and perform the wait before the next panel power on. - */ - msleep(intel_dsi->panel_pwr_cycle_delay); + intel_dsi->panel_power_off_time = ktime_get_boottime(); } static void intel_dsi_shutdown(struct intel_encoder *encoder) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - msleep(intel_dsi->panel_pwr_cycle_delay); + intel_dsi_wait_panel_power_cycle(intel_dsi); } static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, @@ -1883,6 +1894,8 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) else intel_encoder->pipe_mask = BIT(PIPE_B); + intel_dsi->panel_power_off_time = ktime_get_boottime(); + if (dev_priv->vbt.dsi.config->dual_link) intel_dsi->ports = BIT(PORT_A) | BIT(PORT_C); else -- cgit From 41c70d2b7170274b8aa93b7f1d33eabd55871a0e Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 8 Apr 2021 13:49:16 -0700 Subject: drm/i915/display: Implement Wa_14013723622 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This WA fix some display glitches when the system is under high memory pressure. BSpec: 52890 Cc: Gwan-gyeong Mun Signed-off-by: José Roberto de Souza Reviewed-by: Gwan-gyeong Mun Tested-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20210408204917.254272-1-jose.souza@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_pm.c | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index cbf7a60afe54..f4a779643f4d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -12549,4 +12549,7 @@ enum skl_power_gate { #define TGL_ROOT_DEVICE_SKU_ULX 0x2 #define TGL_ROOT_DEVICE_SKU_ULT 0x4 +#define CLKREQ_POLICY _MMIO(0x101038) +#define CLKREQ_POLICY_MEM_UP_OVRD REG_BIT(1) + #endif /* _I915_REG_H_ */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3ecbd62915cf..3f6d8b502a61 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7141,6 +7141,11 @@ static void gen12lp_init_clock_gating(struct drm_i915_private *dev_priv) /* Wa_14011059788:tgl,rkl,adl_s,dg1 */ intel_uncore_rmw(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN, 0, DFR_DISABLE); + + /* Wa_14013723622:tgl,rkl,dg1,adl-s */ + if (DISPLAY_VER(dev_priv) == 12) + intel_uncore_rmw(&dev_priv->uncore, CLKREQ_POLICY, + CLKREQ_POLICY_MEM_UP_OVRD, 0); } static void dg1_init_clock_gating(struct drm_i915_private *dev_priv) -- cgit From f10fe8f89d85ab170ec4491cd15ba7410a03a4c8 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 8 Apr 2021 13:49:17 -0700 Subject: Revert "drm/i915/tgl/psr: Fix glitches when doing frontbuffer modifications" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 71c1a4998320962f7b8362b2c5ee36610d49e8fb. The proper fix is Wa_14013723622, so now we can revert this WA and get back some power savings. Cc: Gwan-gyeong Mun Signed-off-by: José Roberto de Souza Reviewed-by: Gwan-gyeong Mun Tested-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20210408204917.254272-2-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 2627d0b558f3..bf8e4ede2a6c 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1167,21 +1167,7 @@ static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - if (IS_TIGERLAKE(dev_priv)) - /* - * Writes to CURSURFLIVE in TGL are causing IOMMU errors and - * visual glitches that are often reproduced when executing - * CPU intensive workloads while a eDP 4K panel is attached. - * - * Manually exiting PSR causes the frontbuffer to be updated - * without glitches and the IOMMU errors are also gone but - * this comes at the cost of less time with PSR active. - * - * So using this workaround until this issue is root caused - * and a better fix is found. - */ - intel_psr_exit(intel_dp); - else if (DISPLAY_VER(dev_priv) >= 9) + if (DISPLAY_VER(dev_priv) >= 9) /* * Display WA #0884: skl+ * This documented WA for bxt can be safely applied -- cgit From 2446e1d6433b4b99d69e8fe0522bcdfbdef715b5 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 7 Apr 2021 13:39:45 -0700 Subject: drm/i915/display: Eliminate IS_GEN9_{BC,LP} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we've eliminated INTEL_GEN(), IS_GEN_RANGE(), etc. from the display code, we should also kill off our use of the IS_GEN9_* macros too. We'll do the conversion manually this time instead of using Coccinelle since the most logical substitution can depend heavily on the code context, and sometimes we can keep the code simpler if we make additional adjustments such as swapping the order of if/else arms. v2: - Restore a lost negation in intel_pll_is_valid(). Cc: Ville Syrjälä Cc: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20210407203945.1432531-1-matthew.d.roper@intel.com (cherry picked from commit 70bfb30743d5da73058b0a2271e9c127a84fb494) [Jani: cherry picked to topic branch to reduce conflicts] Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bios.c | 7 ++-- drivers/gpu/drm/i915/display/intel_cdclk.c | 34 ++++++++-------- drivers/gpu/drm/i915/display/intel_csr.c | 2 +- drivers/gpu/drm/i915/display/intel_ddi.c | 37 +++++++++-------- drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c | 8 ++-- drivers/gpu/drm/i915/display/intel_display.c | 11 +++--- .../gpu/drm/i915/display/intel_display_debugfs.c | 2 +- drivers/gpu/drm/i915/display/intel_display_power.c | 43 ++++++++++---------- drivers/gpu/drm/i915/display/intel_dp.c | 4 +- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- drivers/gpu/drm/i915/display/intel_dpll.c | 6 +-- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 6 +-- drivers/gpu/drm/i915/display/intel_fbc.c | 7 ++-- drivers/gpu/drm/i915/display/intel_gmbus.c | 12 +++--- drivers/gpu/drm/i915/display/intel_hdcp.c | 9 +++-- drivers/gpu/drm/i915/display/intel_hdmi.c | 7 ++-- drivers/gpu/drm/i915/display/intel_panel.c | 2 +- drivers/gpu/drm/i915/display/intel_pps.c | 14 +++---- drivers/gpu/drm/i915/display/vlv_dsi.c | 46 +++++++++++----------- drivers/gpu/drm/i915/i915_irq.c | 13 +++--- drivers/gpu/drm/i915/intel_pm.c | 8 ++-- 21 files changed, 146 insertions(+), 134 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 3d0c035b5e38..ea4837d485a1 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -917,7 +917,7 @@ parse_psr(struct drm_i915_private *i915, const struct bdb_header *bdb) * Old decimal value is wake up time in multiples of 100 us. */ if (bdb->version >= 205 && - (IS_GEN9_BC(i915) || DISPLAY_VER(i915) >= 10)) { + (DISPLAY_VER(i915) >= 9 && !IS_BROXTON(i915))) { switch (psr_table->tp1_wakeup_time) { case 0: i915->vbt.psr.tp1_wakeup_time_us = 500; @@ -1659,7 +1659,7 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) } else if (IS_ROCKETLAKE(i915) && INTEL_PCH_TYPE(i915) == PCH_TGP) { ddc_pin_map = rkl_pch_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(rkl_pch_tgp_ddc_pin_map); - } else if (HAS_PCH_TGP(i915) && IS_GEN9_BC(i915)) { + } else if (HAS_PCH_TGP(i915) && IS_DISPLAY_VER(i915, 9)) { ddc_pin_map = gen9bc_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(gen9bc_tgp_ddc_pin_map); } else if (INTEL_PCH_TYPE(i915) >= PCH_ICP) { @@ -2770,7 +2770,8 @@ intel_bios_is_port_hpd_inverted(const struct drm_i915_private *i915, const struct intel_bios_encoder_data *devdata = i915->vbt.ddi_port_info[port].devdata; - if (drm_WARN_ON_ONCE(&i915->drm, !IS_GEN9_LP(i915))) + if (drm_WARN_ON_ONCE(&i915->drm, + !IS_GEMINILAKE(i915) && !IS_BROXTON(i915))) return false; return devdata && devdata->child.hpd_invert; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 3f43ad4d7362..489acf6b5cf1 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1631,7 +1631,8 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, * Disable SSA Precharge when CD clock frequency < 500 MHz, * enable otherwise. */ - if (IS_GEN9_LP(dev_priv) && cdclk >= 500000) + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + cdclk >= 500000) val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; intel_de_write(dev_priv, CDCLK_CTL, val); @@ -1732,7 +1733,8 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) * Disable SSA Precharge when CD clock frequency < 500 MHz, * enable otherwise. */ - if (IS_GEN9_LP(dev_priv) && dev_priv->cdclk.hw.cdclk >= 500000) + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + dev_priv->cdclk.hw.cdclk >= 500000) expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; if (cdctl == expected) @@ -1797,9 +1799,9 @@ static void bxt_cdclk_uninit_hw(struct drm_i915_private *dev_priv) */ void intel_cdclk_init_hw(struct drm_i915_private *i915) { - if (IS_GEN9_LP(i915) || DISPLAY_VER(i915) >= 10) + if (DISPLAY_VER(i915) >= 10 || IS_BROXTON(i915)) bxt_cdclk_init_hw(i915); - else if (IS_GEN9_BC(i915)) + else if (IS_DISPLAY_VER(i915, 9)) skl_cdclk_init_hw(i915); } @@ -1812,9 +1814,9 @@ void intel_cdclk_init_hw(struct drm_i915_private *i915) */ void intel_cdclk_uninit_hw(struct drm_i915_private *i915) { - if (DISPLAY_VER(i915) >= 10 || IS_GEN9_LP(i915)) + if (DISPLAY_VER(i915) >= 10 || IS_BROXTON(i915)) bxt_cdclk_uninit_hw(i915); - else if (IS_GEN9_BC(i915)) + else if (IS_DISPLAY_VER(i915, 9)) skl_cdclk_uninit_hw(i915); } @@ -1852,7 +1854,7 @@ static bool intel_cdclk_can_cd2x_update(struct drm_i915_private *dev_priv, const struct intel_cdclk_config *b) { /* Older hw doesn't have the capability */ - if (DISPLAY_VER(dev_priv) < 10 && !IS_GEN9_LP(dev_priv)) + if (DISPLAY_VER(dev_priv) < 10 && !IS_BROXTON(dev_priv)) return false; return a->cdclk != b->cdclk && @@ -2625,7 +2627,11 @@ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) dev_priv->max_cdclk_freq = 652800; } else if (IS_CANNONLAKE(dev_priv)) { dev_priv->max_cdclk_freq = 528000; - } else if (IS_GEN9_BC(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->max_cdclk_freq = 316800; + } else if (IS_BROXTON(dev_priv)) { + dev_priv->max_cdclk_freq = 624000; + } else if (IS_DISPLAY_VER(dev_priv, 9)) { u32 limit = intel_de_read(dev_priv, SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; int max_cdclk, vco; @@ -2647,10 +2653,6 @@ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) max_cdclk = 308571; dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco); - } else if (IS_GEMINILAKE(dev_priv)) { - dev_priv->max_cdclk_freq = 316800; - } else if (IS_BROXTON(dev_priv)) { - dev_priv->max_cdclk_freq = 624000; } else if (IS_BROADWELL(dev_priv)) { /* * FIXME with extra cooling we can allow @@ -2878,7 +2880,7 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = cnl_calc_voltage_level; dev_priv->cdclk.table = cnl_cdclk_table; - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; @@ -2887,7 +2889,7 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->cdclk.table = glk_cdclk_table; else dev_priv->cdclk.table = bxt_cdclk_table; - } else if (IS_GEN9_BC(dev_priv)) { + } else if (IS_DISPLAY_VER(dev_priv, 9)) { dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.set_cdclk = skl_set_cdclk; dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk; @@ -2908,9 +2910,9 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.modeset_calc_cdclk = fixed_modeset_calc_cdclk; } - if (DISPLAY_VER(dev_priv) >= 10 || IS_GEN9_LP(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 10 || IS_BROXTON(dev_priv)) dev_priv->display.get_cdclk = bxt_get_cdclk; - else if (IS_GEN9_BC(dev_priv)) + else if (IS_DISPLAY_VER(dev_priv, 9)) dev_priv->display.get_cdclk = skl_get_cdclk; else if (IS_BROADWELL(dev_priv)) dev_priv->display.get_cdclk = bdw_get_cdclk; diff --git a/drivers/gpu/drm/i915/display/intel_csr.c b/drivers/gpu/drm/i915/display/intel_csr.c index 794efcc3ca08..e54521d7b931 100644 --- a/drivers/gpu/drm/i915/display/intel_csr.c +++ b/drivers/gpu/drm/i915/display/intel_csr.c @@ -284,7 +284,7 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) mask = DC_STATE_DEBUG_MASK_MEMORY_UP; - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) mask |= DC_STATE_DEBUG_MASK_CORES; /* The below bit doesn't need to be cleared ever afterwards */ diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 953de42e277c..deef24da00b5 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -113,7 +113,8 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, &n_entries); /* If we're boosting the current, set bit 31 of trans1 */ - if (IS_GEN9_BC(dev_priv) && intel_bios_encoder_dp_boost_level(encoder->devdata)) + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv) && + intel_bios_encoder_dp_boost_level(encoder->devdata)) iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; for (i = 0; i < n_entries; i++) { @@ -146,7 +147,8 @@ static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, level = n_entries - 1; /* If we're boosting the current, set bit 31 of trans1 */ - if (IS_GEN9_BC(dev_priv) && intel_bios_encoder_hdmi_boost_level(encoder->devdata)) + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv) && + intel_bios_encoder_hdmi_boost_level(encoder->devdata)) iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; /* Entry 9 is for HDMI: */ @@ -759,7 +761,7 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, *is_dp_mst = mst_pipe_mask; out: - if (*pipe_mask && IS_GEN9_LP(dev_priv)) { + if (*pipe_mask && (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv))) { tmp = intel_de_read(dev_priv, BXT_PHY_CTL(port)); if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK | BXT_PHY_LANE_POWERDOWN_ACK | @@ -987,7 +989,7 @@ static u8 intel_ddi_dp_voltage_max(struct intel_dp *intel_dp, icl_get_mg_buf_trans(encoder, crtc_state, &n_entries); } else if (IS_CANNONLAKE(dev_priv)) { cnl_get_buf_trans(encoder, crtc_state, &n_entries); - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { bxt_get_buf_trans(encoder, crtc_state, &n_entries); } else { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) @@ -1555,7 +1557,7 @@ hsw_set_signal_levels(struct intel_dp *intel_dp, intel_dp->DP &= ~DDI_BUF_EMP_MASK; intel_dp->DP |= signal_levels; - if (IS_GEN9_BC(dev_priv)) + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) skl_ddi_set_iboost(encoder, crtc_state, level); intel_de_write(dev_priv, DDI_BUF_CTL(port), intel_dp->DP); @@ -2648,7 +2650,7 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, icl_ddi_vswing_sequence(encoder, crtc_state, level); else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, crtc_state, level); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(encoder, crtc_state, level); else intel_prepare_dp_ddi_buffers(encoder, crtc_state); @@ -3096,16 +3098,16 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, icl_ddi_vswing_sequence(encoder, crtc_state, level); else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, crtc_state, level); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(encoder, crtc_state, level); else intel_prepare_hdmi_ddi_buffers(encoder, level); - if (IS_GEN9_BC(dev_priv)) + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) skl_ddi_set_iboost(encoder, crtc_state, level); /* Display WA #1143: skl,kbl,cfl */ - if (IS_GEN9_BC(dev_priv)) { + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) { /* * For some reason these chicken bits have been * stuffed into a transcoder register, event though @@ -3321,7 +3323,7 @@ intel_ddi_pre_pll_enable(struct intel_atomic_state *state, * Type-C ports. Skip this step for TBT. */ intel_tc_port_set_fia_lane_count(dig_port, crtc_state->lane_count); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_ddi_phy_set_lane_optim_mask(encoder, crtc_state->lane_lat_optim_mask); } @@ -3679,7 +3681,7 @@ static void intel_ddi_get_config(struct intel_encoder *encoder, if (!pipe_config->bigjoiner_slave) ddi_dotclock_get(pipe_config); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) pipe_config->lane_lat_optim_mask = bxt_ddi_phy_get_lane_lat_optim_mask(encoder); @@ -3885,7 +3887,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, pipe_config->pch_pfit.enabled || pipe_config->crc_enabled; - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) pipe_config->lane_lat_optim_mask = bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); @@ -4053,7 +4055,7 @@ intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) dig_port->dp.set_signal_levels = icl_set_signal_levels; else if (IS_CANNONLAKE(dev_priv)) dig_port->dp.set_signal_levels = cnl_set_signal_levels; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dig_port->dp.set_signal_levels = bxt_set_signal_levels; else dig_port->dp.set_signal_levels = hsw_set_signal_levels; @@ -4296,7 +4298,7 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) /* Broxton/Geminilake: Bspec says that DDI_A_4_LANES is the only * supported configuration */ - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) return true; /* Cannonlake: Most of SKUs don't support DDI_E, and the only @@ -4585,10 +4587,10 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->disable_clock = cnl_ddi_disable_clock; encoder->is_clock_enabled = cnl_ddi_is_clock_enabled; encoder->get_config = cnl_ddi_get_config; - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { /* BXT/GLK have fixed PLL->port mapping */ encoder->get_config = bxt_ddi_get_config; - } else if (IS_GEN9_BC(dev_priv)) { + } else if (IS_DISPLAY_VER(dev_priv, 9)) { encoder->enable_clock = skl_ddi_enable_clock; encoder->disable_clock = skl_ddi_disable_clock; encoder->is_clock_enabled = skl_ddi_is_clock_enabled; @@ -4672,7 +4674,8 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) else dig_port->connected = lpt_digital_port_connected; } else if (DISPLAY_VER(dev_priv) >= 8) { - if (port == PORT_A || IS_GEN9_LP(dev_priv)) + if (port == PORT_A || IS_GEMINILAKE(dev_priv) || + IS_BROXTON(dev_priv)) dig_port->connected = bdw_digital_port_connected; else dig_port->connected = lpt_digital_port_connected; diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c index 5d9ce6042e87..fdd25861edd5 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c @@ -881,7 +881,7 @@ intel_ddi_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (IS_GEN9_BC(dev_priv)) { + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) { const struct ddi_buf_trans *ddi_translations = skl_get_buf_trans_edp(encoder, n_entries); *n_entries = skl_buf_trans_num_entries(encoder->port, *n_entries); @@ -919,7 +919,7 @@ intel_ddi_get_buf_trans_hdmi(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (IS_GEN9_BC(dev_priv)) { + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) { return skl_get_buf_trans_hdmi(dev_priv, n_entries); } else if (IS_BROADWELL(dev_priv)) { *n_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); @@ -1370,10 +1370,10 @@ int intel_ddi_hdmi_num_entries(struct intel_encoder *encoder, } else if (IS_CANNONLAKE(dev_priv)) { cnl_get_buf_trans_hdmi(encoder, &n_entries); *default_entry = n_entries - 1; - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { bxt_get_buf_trans_hdmi(encoder, &n_entries); *default_entry = n_entries - 1; - } else if (IS_GEN9_BC(dev_priv)) { + } else if (IS_DISPLAY_VER(dev_priv, 9)) { intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); *default_entry = 8; } else if (IS_BROADWELL(dev_priv)) { diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index d74b263c5f4e..4f8f994a639f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5951,7 +5951,7 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc, active = hsw_get_transcoder_state(crtc, pipe_config, &power_domain_set); - if (IS_GEN9_LP(dev_priv) && + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && bxt_get_dsi_transcoder_state(crtc, pipe_config, &power_domain_set)) { drm_WARN_ON(&dev_priv->drm, active); active = true; @@ -6869,7 +6869,8 @@ static u16 skl_linetime_wm(const struct intel_crtc_state *crtc_state) crtc_state->pixel_rate); /* Display WA #1135: BXT:ALL GLK:ALL */ - if (IS_GEN9_LP(dev_priv) && dev_priv->ipc_enabled) + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + dev_priv->ipc_enabled) linetime_wm /= 2; return min(linetime_wm, 0x1ff); @@ -10871,7 +10872,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_F); icl_dsi_init(dev_priv); - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { /* * FIXME: Broxton doesn't support port detection via the * DDI_BUF_CTL_A or SFUSE_STRAP registers, find another way to @@ -10896,7 +10897,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) */ found = intel_de_read(dev_priv, DDI_BUF_CTL(PORT_A)) & DDI_INIT_DISPLAY_DETECTED; /* WaIgnoreDDIAStrap: skl */ - if (found || IS_GEN9_BC(dev_priv)) + if (found || IS_DISPLAY_VER(dev_priv, 9)) intel_ddi_init(dev_priv, PORT_A); /* DDI B, C, D, and F detection is indicated by the SFUSE_STRAP @@ -10921,7 +10922,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) /* * On SKL we don't have a way to detect DDI-E so we rely on VBT. */ - if (IS_GEN9_BC(dev_priv) && + if (IS_DISPLAY_VER(dev_priv, 9) && intel_bios_is_port_present(dev_priv, PORT_E)) intel_ddi_init(dev_priv, PORT_E); diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 564509a4e666..183c414d554a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -569,7 +569,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused) } else { dc5_reg = IS_BROXTON(dev_priv) ? BXT_CSR_DC3_DC5_COUNT : SKL_CSR_DC3_DC5_COUNT; - if (!IS_GEN9_LP(dev_priv)) + if (!IS_GEMINILAKE(dev_priv) && !IS_BROXTON(dev_priv)) dc6_reg = SKL_CSR_DC5_DC6_COUNT; } diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 99126caf5747..9419ae8c6111 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -709,7 +709,7 @@ static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, * BIOS's own request bits, which are forced-on for these power wells * when exiting DC5/6. */ - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_GEN9_LP(dev_priv) && + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv) && (id == SKL_DISP_PW_1 || id == SKL_DISP_PW_MISC_IO)) val |= intel_de_read(dev_priv, regs->bios); @@ -809,7 +809,7 @@ static u32 gen9_dc_mask(struct drm_i915_private *dev_priv) | DC_STATE_EN_DC9; else if (IS_DISPLAY_VER(dev_priv, 11)) mask |= DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) mask |= DC_STATE_EN_DC9; else mask |= DC_STATE_EN_UPTO_DC6; @@ -1060,7 +1060,7 @@ static void gen9_enable_dc5(struct drm_i915_private *dev_priv) drm_dbg_kms(&dev_priv->drm, "Enabling DC5\n"); /* Wa Display #1183: skl,kbl,cfl */ - if (IS_GEN9_BC(dev_priv)) + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) intel_de_write(dev_priv, GEN8_CHICKEN_DCPR_1, intel_de_read(dev_priv, GEN8_CHICKEN_DCPR_1) | SKL_SELECT_ALTERNATE_DC_EXIT); @@ -1087,7 +1087,7 @@ static void skl_enable_dc6(struct drm_i915_private *dev_priv) drm_dbg_kms(&dev_priv->drm, "Enabling DC6\n"); /* Wa Display #1183: skl,kbl,cfl */ - if (IS_GEN9_BC(dev_priv)) + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) intel_de_write(dev_priv, GEN8_CHICKEN_DCPR_1, intel_de_read(dev_priv, GEN8_CHICKEN_DCPR_1) | SKL_SELECT_ALTERNATE_DC_EXIT); @@ -1189,7 +1189,7 @@ static void gen9_disable_dc_states(struct drm_i915_private *dev_priv) gen9_assert_dbuf_enabled(dev_priv); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_verify_ddi_phy_power_wells(dev_priv); if (DISPLAY_VER(dev_priv) >= 11) @@ -4537,10 +4537,10 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, max_dc = 3; else if (DISPLAY_VER(dev_priv) >= 12) max_dc = 4; - else if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv) || IS_GEN9_BC(dev_priv)) - max_dc = 2; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) max_dc = 1; + else if (DISPLAY_VER(dev_priv) >= 9) + max_dc = 2; else max_dc = 0; @@ -4549,7 +4549,8 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, * not depending on the DMC firmware. It's needed by system * suspend/resume, so allow it unconditionally. */ - mask = IS_GEN9_LP(dev_priv) || DISPLAY_VER(dev_priv) >= 11 ? + mask = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) || + DISPLAY_VER(dev_priv) >= 11 ? DC_STATE_EN_DC9 : 0; if (!dev_priv->params.disable_power_well) @@ -4692,7 +4693,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) err = set_power_wells(power_domains, glk_power_wells); } else if (IS_BROXTON(dev_priv)) { err = set_power_wells(power_domains, bxt_power_wells); - } else if (IS_GEN9_BC(dev_priv)) { + } else if (IS_DISPLAY_VER(dev_priv, 9)) { err = set_power_wells(power_domains, skl_power_wells); } else if (IS_CHERRYVIEW(dev_priv)) { err = set_power_wells(power_domains, chv_power_wells); @@ -5623,10 +5624,10 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) icl_display_core_init(i915, resume); } else if (IS_CANNONLAKE(i915)) { cnl_display_core_init(i915, resume); - } else if (IS_GEN9_BC(i915)) { - skl_display_core_init(i915, resume); - } else if (IS_GEN9_LP(i915)) { + } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { bxt_display_core_init(i915, resume); + } else if (IS_DISPLAY_VER(i915, 9)) { + skl_display_core_init(i915, resume); } else if (IS_CHERRYVIEW(i915)) { mutex_lock(&power_domains->lock); chv_phy_control_init(i915); @@ -5784,10 +5785,10 @@ void intel_power_domains_suspend(struct drm_i915_private *i915, icl_display_core_uninit(i915); else if (IS_CANNONLAKE(i915)) cnl_display_core_uninit(i915); - else if (IS_GEN9_BC(i915)) - skl_display_core_uninit(i915); - else if (IS_GEN9_LP(i915)) + else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) bxt_display_core_uninit(i915); + else if (IS_DISPLAY_VER(i915, 9)) + skl_display_core_uninit(i915); power_domains->display_core_suspended = true; } @@ -5908,7 +5909,8 @@ static void intel_power_domains_verify_state(struct drm_i915_private *i915) void intel_display_power_suspend_late(struct drm_i915_private *i915) { - if (DISPLAY_VER(i915) >= 11 || IS_GEN9_LP(i915)) { + if (DISPLAY_VER(i915) >= 11 || IS_GEMINILAKE(i915) || + IS_BROXTON(i915)) { bxt_enable_dc9(i915); /* Tweaked Wa_14010685332:icp,jsp,mcc */ if (INTEL_PCH_TYPE(i915) >= PCH_ICP && INTEL_PCH_TYPE(i915) <= PCH_MCC) @@ -5921,7 +5923,8 @@ void intel_display_power_suspend_late(struct drm_i915_private *i915) void intel_display_power_resume_early(struct drm_i915_private *i915) { - if (DISPLAY_VER(i915) >= 11 || IS_GEN9_LP(i915)) { + if (DISPLAY_VER(i915) >= 11 || IS_GEMINILAKE(i915) || + IS_BROXTON(i915)) { gen9_sanitize_dc_state(i915); bxt_disable_dc9(i915); /* Tweaked Wa_14010685332:icp,jsp,mcc */ @@ -5938,7 +5941,7 @@ void intel_display_power_suspend(struct drm_i915_private *i915) if (DISPLAY_VER(i915) >= 11) { icl_display_core_uninit(i915); bxt_enable_dc9(i915); - } else if (IS_GEN9_LP(i915)) { + } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { bxt_display_core_uninit(i915); bxt_enable_dc9(i915); } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) { @@ -5959,7 +5962,7 @@ void intel_display_power_resume(struct drm_i915_private *i915) DC_STATE_EN_UPTO_DC5) gen9_enable_dc5(i915); } - } else if (IS_GEN9_LP(i915)) { + } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { bxt_disable_dc9(i915); bxt_display_core_init(i915, true); if (i915->csr.dmc_payload && diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index a560468765c0..a5231ac3443a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -301,10 +301,10 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) max_rate = ehl_max_source_rate(intel_dp); else max_rate = icl_max_source_rate(intel_dp); - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { source_rates = bxt_rates; size = ARRAY_SIZE(bxt_rates); - } else if (IS_GEN9_BC(dev_priv)) { + } else if (IS_DISPLAY_VER(dev_priv, 9)) { source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); } else if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 2daa3f67791e..180f97cd74cb 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -154,7 +154,7 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->limited_color_range = intel_dp_limited_color_range(pipe_config, conn_state); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) pipe_config->lane_lat_optim_mask = bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c index 3e3c5eed1600..158f271299a4 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.c +++ b/drivers/gpu/drm/i915/display/intel_dpll.c @@ -366,13 +366,11 @@ static bool intel_pll_is_valid(struct drm_i915_private *dev_priv, if (clock->m1 < limit->m1.min || limit->m1.max < clock->m1) return false; - if (!IS_PINEVIEW(dev_priv) && !IS_VALLEYVIEW(dev_priv) && - !IS_CHERRYVIEW(dev_priv) && !IS_GEN9_LP(dev_priv)) + if (!IS_PINEVIEW(dev_priv) && !IS_LP(dev_priv)) if (clock->m1 <= clock->m2) return false; - if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv) && - !IS_GEN9_LP(dev_priv)) { + if (!IS_LP(dev_priv)) { if (clock->p < limit->p.min || limit->p.max < clock->p) return false; if (clock->m < limit->m.min || limit->m.max < clock->m) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 1ae158d12c07..e32de7c848e9 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -4441,10 +4441,10 @@ void intel_shared_dpll_init(struct drm_device *dev) dpll_mgr = &icl_pll_mgr; else if (IS_CANNONLAKE(dev_priv)) dpll_mgr = &cnl_pll_mgr; - else if (IS_GEN9_BC(dev_priv)) - dpll_mgr = &skl_pll_mgr; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dpll_mgr = &bxt_pll_mgr; + else if (IS_DISPLAY_VER(dev_priv, 9)) + dpll_mgr = &skl_pll_mgr; else if (HAS_DDI(dev_priv)) dpll_mgr = &hsw_pll_mgr; else if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 986bbbe3b12f..04d9c7d22b04 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -302,7 +302,7 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) int threshold = dev_priv->fbc.threshold; /* Display WA #0529: skl, kbl, bxt. */ - if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) { + if (IS_DISPLAY_VER(dev_priv, 9)) { u32 val = intel_de_read(dev_priv, CHICKEN_MISC_4); val &= ~(FBC_STRIDE_OVERRIDE | FBC_STRIDE_MASK); @@ -445,7 +445,8 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, * reserved range size, so it always assumes the maximum (8mb) is used. * If we enable FBC using a CFB on that memory range we'll get FIFO * underruns, even if that range is not reserved by the BIOS. */ - if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv)) + if (IS_BROADWELL(dev_priv) || (IS_DISPLAY_VER(dev_priv, 9) && + !IS_BROXTON(dev_priv))) end = resource_size(&dev_priv->dsm) - 8 * 1024 * 1024; else end = U64_MAX; @@ -759,7 +760,7 @@ static u16 intel_fbc_gen9_wa_cfb_stride(struct drm_i915_private *dev_priv) struct intel_fbc *fbc = &dev_priv->fbc; struct intel_fbc_state_cache *cache = &fbc->state_cache; - if ((IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) && + if ((IS_DISPLAY_VER(dev_priv, 9)) && cache->fb.modifier != I915_FORMAT_MOD_X_TILED) return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->threshold) * 8; else diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 8ddc20daef64..13ba280d0228 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -107,9 +107,9 @@ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv, return &gmbus_pins_icp[pin]; else if (HAS_PCH_CNP(dev_priv)) return &gmbus_pins_cnp[pin]; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) return &gmbus_pins_bxt[pin]; - else if (IS_GEN9_BC(dev_priv)) + else if (IS_DISPLAY_VER(dev_priv, 9)) return &gmbus_pins_skl[pin]; else if (IS_BROADWELL(dev_priv)) return &gmbus_pins_bdw[pin]; @@ -128,9 +128,9 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, size = ARRAY_SIZE(gmbus_pins_icp); else if (HAS_PCH_CNP(dev_priv)) size = ARRAY_SIZE(gmbus_pins_cnp); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bxt); - else if (IS_GEN9_BC(dev_priv)) + else if (IS_DISPLAY_VER(dev_priv, 9)) size = ARRAY_SIZE(gmbus_pins_skl); else if (IS_BROADWELL(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bdw); @@ -600,7 +600,7 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num, int ret = 0; /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */ - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_gmbus_clock_gating(dev_priv, false); else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_CNP(dev_priv)) pch_gmbus_clock_gating(dev_priv, false); @@ -713,7 +713,7 @@ timeout: out: /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */ - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_gmbus_clock_gating(dev_priv, true); else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_CNP(dev_priv)) pch_gmbus_clock_gating(dev_priv, true); diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index d8570e14fe60..75050a040577 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -286,11 +286,12 @@ static int intel_hdcp_load_keys(struct drm_i915_private *dev_priv) /* * Initiate loading the HDCP key from fuses. * - * BXT+ platforms, HDCP key needs to be loaded by SW. Only Gen 9 - * platforms except BXT and GLK, differ in the key load trigger process - * from other platforms. So GEN9_BC uses the GT Driver Mailbox i/f. + * BXT+ platforms, HDCP key needs to be loaded by SW. Only display + * version 9 platforms (minus BXT) differ in the key load trigger + * process from other platforms. These platforms use the GT Driver + * Mailbox interface. */ - if (IS_GEN9_BC(dev_priv)) { + if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) { ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_LOAD_HDCP_KEYS, 1); if (ret) { diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index d69f0a6dc26d..f2d1fef8bd9d 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1841,7 +1841,8 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, return MODE_CLOCK_RANGE; /* BXT/GLK DPLL can't generate 223-240 MHz */ - if (IS_GEN9_LP(dev_priv) && clock > 223333 && clock < 240000) + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + clock > 223333 && clock < 240000) return MODE_CLOCK_RANGE; /* CHV DPLL can't generate 216-240 MHz */ @@ -2714,7 +2715,7 @@ static u8 intel_hdmi_ddc_pin(struct intel_encoder *encoder) ddc_pin = dg1_port_to_ddc_pin(dev_priv, port); else if (IS_ROCKETLAKE(dev_priv)) ddc_pin = rkl_port_to_ddc_pin(dev_priv, port); - else if (IS_GEN9_BC(dev_priv) && HAS_PCH_TGP(dev_priv)) + else if (IS_DISPLAY_VER(dev_priv, 9) && HAS_PCH_TGP(dev_priv)) ddc_pin = gen9bc_tgp_port_to_ddc_pin(dev_priv, port); else if (HAS_PCH_MCC(dev_priv)) ddc_pin = mcc_port_to_ddc_pin(dev_priv, port); @@ -2722,7 +2723,7 @@ static u8 intel_hdmi_ddc_pin(struct intel_encoder *encoder) ddc_pin = icl_port_to_ddc_pin(dev_priv, port); else if (HAS_PCH_CNP(dev_priv)) ddc_pin = cnp_port_to_ddc_pin(dev_priv, port); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) ddc_pin = bxt_port_to_ddc_pin(dev_priv, port); else if (IS_CHERRYVIEW(dev_priv)) ddc_pin = chv_port_to_ddc_pin(dev_priv, port); diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 10022d1575e1..2fcbb2ba2d78 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -2161,7 +2161,7 @@ intel_panel_init_backlight_funcs(struct intel_panel *panel) intel_dsi_dcs_init_backlight_funcs(connector) == 0) return; - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { panel->backlight.pwm_funcs = &bxt_pwm_funcs; } else if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) { panel->backlight.pwm_funcs = &cnp_pwm_funcs; diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index c55da130773b..3a0985f1f531 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -313,10 +313,7 @@ void intel_pps_reset_all(struct drm_i915_private *dev_priv) { struct intel_encoder *encoder; - if (drm_WARN_ON(&dev_priv->drm, - !(IS_VALLEYVIEW(dev_priv) || - IS_CHERRYVIEW(dev_priv) || - IS_GEN9_LP(dev_priv)))) + if (drm_WARN_ON(&dev_priv->drm, !IS_LP(dev_priv))) return; /* @@ -338,7 +335,7 @@ void intel_pps_reset_all(struct drm_i915_private *dev_priv) if (encoder->type != INTEL_OUTPUT_EDP) continue; - if (IS_GEN9_LP(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 9) intel_dp->pps.pps_reset = true; else intel_dp->pps.pps_pipe = INVALID_PIPE; @@ -361,7 +358,7 @@ static void intel_pps_get_registers(struct intel_dp *intel_dp, memset(regs, 0, sizeof(*regs)); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) pps_idx = bxt_power_sequencer_idx(intel_dp); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) pps_idx = vlv_power_sequencer_pipe(intel_dp); @@ -372,7 +369,8 @@ static void intel_pps_get_registers(struct intel_dp *intel_dp, regs->pp_off = PP_OFF_DELAYS(pps_idx); /* Cycle delay moved from PP_DIVISOR to PP_CONTROL */ - if (IS_GEN9_LP(dev_priv) || INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) || + INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) regs->pp_div = INVALID_MMIO_REG; else regs->pp_div = PP_DIVISOR(pps_idx); @@ -1399,7 +1397,7 @@ void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv) void intel_pps_setup(struct drm_i915_private *i915) { - if (HAS_PCH_SPLIT(i915) || IS_GEN9_LP(i915)) + if (HAS_PCH_SPLIT(i915) || IS_GEMINILAKE(i915) || IS_BROXTON(i915)) i915->pps_mmio_base = PCH_PPS_BASE; else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) i915->pps_mmio_base = VLV_PPS_BASE; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 1059a26c1f58..9bee99fe5495 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -297,7 +297,7 @@ static int intel_dsi_compute_config(struct intel_encoder *encoder, else pipe_config->pipe_bpp = 18; - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { /* Enable Frame time stamp based scanline reporting */ pipe_config->mode_flags |= I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP; @@ -522,7 +522,7 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder) if (IS_GEMINILAKE(dev_priv)) glk_dsi_device_ready(encoder); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_dsi_device_ready(encoder); else vlv_dsi_device_ready(encoder); @@ -601,7 +601,7 @@ static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) drm_dbg_kms(&dev_priv->drm, "\n"); for_each_dsi_port(port, intel_dsi->ports) { /* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */ - i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? + i915_reg_t port_ctrl = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ? BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A); u32 val; @@ -621,7 +621,7 @@ static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) * On VLV/CHV, wait till Clock lanes are in LP-00 state for MIPI * Port A only. MIPI Port C has no similar bit for checking. */ - if ((IS_GEN9_LP(dev_priv) || port == PORT_A) && + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) || port == PORT_A) && intel_de_wait_for_clear(dev_priv, port_ctrl, AFE_LATCHOUT, 30)) drm_err(&dev_priv->drm, "DSI LP not going Low\n"); @@ -646,7 +646,7 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder, if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) { u32 temp; - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { for_each_dsi_port(port, intel_dsi->ports) { temp = intel_de_read(dev_priv, MIPI_CTRL(port)); @@ -666,7 +666,7 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder, } for_each_dsi_port(port, intel_dsi->ports) { - i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? + i915_reg_t port_ctrl = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ? BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port); u32 temp; @@ -703,7 +703,7 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) enum port port; for_each_dsi_port(port, intel_dsi->ports) { - i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? + i915_reg_t port_ctrl = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ? BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port); u32 temp; @@ -781,7 +781,7 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state, * The BIOS may leave the PLL in a wonky state where it doesn't * lock. It needs to be fully powered down to fix it. */ - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { bxt_dsi_pll_disable(encoder); bxt_dsi_pll_enable(encoder, pipe_config); } else { @@ -932,7 +932,7 @@ static void intel_dsi_post_disable(struct intel_atomic_state *state, drm_dbg_kms(&dev_priv->drm, "\n"); - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { intel_crtc_vblank_off(old_crtc_state); skl_scaler_disable(old_crtc_state); @@ -971,7 +971,7 @@ static void intel_dsi_post_disable(struct intel_atomic_state *state, val & ~MIPIO_RST_CTRL); } - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { bxt_dsi_pll_disable(encoder); } else { u32 val; @@ -1024,12 +1024,13 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, * configuration, otherwise accessing DSI registers will hang the * machine. See BSpec North Display Engine registers/MIPI[BXT]. */ - if (IS_GEN9_LP(dev_priv) && !bxt_dsi_pll_is_enabled(dev_priv)) + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + !bxt_dsi_pll_is_enabled(dev_priv)) goto out_put_power; /* XXX: this only works for one DSI output */ for_each_dsi_port(port, intel_dsi->ports) { - i915_reg_t ctrl_reg = IS_GEN9_LP(dev_priv) ? + i915_reg_t ctrl_reg = IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv) ? BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port); bool enabled = intel_de_read(dev_priv, ctrl_reg) & DPI_ENABLE; @@ -1055,7 +1056,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, if (!(intel_de_read(dev_priv, MIPI_DEVICE_READY(port)) & DEVICE_READY)) continue; - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { u32 tmp = intel_de_read(dev_priv, MIPI_CTRL(port)); tmp &= BXT_PIPE_SELECT_MASK; tmp >>= BXT_PIPE_SELECT_SHIFT; @@ -1251,7 +1252,7 @@ static void intel_dsi_get_config(struct intel_encoder *encoder, pipe_config->output_types |= BIT(INTEL_OUTPUT_DSI); - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { bxt_dsi_get_pipe_config(encoder, pipe_config); pclk = bxt_dsi_get_pclk(encoder, pipe_config); } else { @@ -1317,7 +1318,7 @@ static void set_dsi_timings(struct drm_encoder *encoder, hbp = txbyteclkhs(hbp, bpp, lane_count, intel_dsi->burst_mode_ratio); for_each_dsi_port(port, intel_dsi->ports) { - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { /* * Program hdisplay and vdisplay on MIPI transcoder. * This is different from calculated hactive and @@ -1407,7 +1408,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, tmp &= ~READ_REQUEST_PRIORITY_MASK; intel_de_write(dev_priv, MIPI_CTRL(port), tmp | READ_REQUEST_PRIORITY_HIGH); - } else if (IS_GEN9_LP(dev_priv)) { + } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { enum pipe pipe = intel_crtc->pipe; tmp = intel_de_read(dev_priv, MIPI_CTRL(port)); @@ -1445,7 +1446,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, if (intel_dsi->clock_stop) tmp |= CLOCKSTOP; - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { tmp |= BXT_DPHY_DEFEATURE_EN; if (!is_cmd_mode(intel_dsi)) tmp |= BXT_DEFEATURE_DPI_FIFO_CTR; @@ -1492,7 +1493,8 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, intel_de_write(dev_priv, MIPI_INIT_COUNT(port), txclkesc(intel_dsi->escape_clk_div, 100)); - if (IS_GEN9_LP(dev_priv) && (!intel_dsi->dual_link)) { + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + !intel_dsi->dual_link) { /* * BXT spec says write MIPI_INIT_COUNT for * both the ports, even if only one is @@ -1570,7 +1572,7 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder) /* Panel commands can be sent when clock is in LP11 */ intel_de_write(dev_priv, MIPI_DEVICE_READY(port), 0x0); - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_dsi_reset_clocks(encoder, port); else vlv_dsi_reset_clocks(encoder, port); @@ -1828,7 +1830,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) if (!intel_bios_is_dsi_present(dev_priv, &port)) return; - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dev_priv->mipi_mmio_base = BXT_MIPI_BASE; else dev_priv->mipi_mmio_base = VLV_MIPI_BASE; @@ -1854,7 +1856,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) intel_encoder->compute_config = intel_dsi_compute_config; intel_encoder->pre_enable = intel_dsi_pre_enable; - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) intel_encoder->enable = bxt_dsi_enable; intel_encoder->disable = intel_dsi_disable; intel_encoder->post_disable = intel_dsi_post_disable; @@ -1874,7 +1876,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) * On BYT/CHV, pipe A maps to MIPI DSI port A, pipe B maps to MIPI DSI * port C. BXT isn't limited like this. */ - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) intel_encoder->pipe_mask = ~0; else if (port == PORT_A) intel_encoder->pipe_mask = BIT(PIPE_A); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7eefbdec25a2..9aff790817c0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -194,7 +194,7 @@ static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) if (DISPLAY_VER(dev_priv) >= 11) hpd->hpd = hpd_gen11; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) hpd->hpd = hpd_bxt; else if (DISPLAY_VER(dev_priv) >= 8) hpd->hpd = hpd_bdw; @@ -2458,7 +2458,7 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) found = true; } - if (IS_GEN9_LP(dev_priv)) { + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { u32 hotplug_trigger = iir & BXT_DE_PORT_HOTPLUG_MASK; if (hotplug_trigger) { @@ -2474,7 +2474,8 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) } } - if (IS_GEN9_LP(dev_priv) && (iir & BXT_DE_PORT_GMBUS)) { + if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) && + (iir & BXT_DE_PORT_GMBUS)) { gmbus_irq_handler(dev_priv); found = true; } @@ -3717,7 +3718,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (DISPLAY_VER(dev_priv) <= 10) de_misc_masked |= GEN8_DE_MISC_GSE; - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) de_port_masked |= BXT_DE_PORT_GMBUS; if (DISPLAY_VER(dev_priv) >= 11) { @@ -3732,7 +3733,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) gen8_de_pipe_flip_done_mask(dev_priv); de_port_enables = de_port_masked; - if (IS_GEN9_LP(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) de_port_enables |= BXT_DE_PORT_HOTPLUG_MASK; else if (IS_BROADWELL(dev_priv)) de_port_enables |= BDW_DE_PORT_HOTPLUG_MASK; @@ -4317,7 +4318,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) dev_priv->display.hpd_irq_setup = dg1_hpd_irq_setup; else if (DISPLAY_VER(dev_priv) >= 11) dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dev_priv->display.hpd_irq_setup = bxt_hpd_irq_setup; else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) dev_priv->display.hpd_irq_setup = icp_hpd_irq_setup; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 066abaa73a06..32f301ca3ab0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3654,13 +3654,13 @@ u8 intel_enabled_dbuf_slices_mask(struct drm_i915_private *dev_priv) */ static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv) { - return IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv); + return IS_DISPLAY_VER(dev_priv, 9); } static bool intel_has_sagv(struct drm_i915_private *dev_priv) { - return (IS_GEN9_BC(dev_priv) || DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) && + return DISPLAY_VER(dev_priv) >= 9 && !IS_LP(dev_priv) && dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED; } @@ -5258,7 +5258,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, lines = div_round_up_fixed16(selected_result, wp->plane_blocks_per_line); - if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) { + if (IS_DISPLAY_VER(dev_priv, 9)) { /* Display WA #1125: skl,bxt,kbl */ if (level == 0 && wp->rc_surface) blocks += fixed16_to_u32_round_up(wp->y_tile_minimum); @@ -5375,7 +5375,7 @@ static void skl_compute_transition_wm(struct drm_i915_private *dev_priv, * WaDisableTWM:skl,kbl,cfl,bxt * Transition WM are not recommended by HW team for GEN9 */ - if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) + if (IS_DISPLAY_VER(dev_priv, 9)) return; if (DISPLAY_VER(dev_priv) >= 11) -- cgit From 87b8c3bc8d27270c9abd5e895ea9066e918ef89b Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 12 Apr 2021 08:46:03 +0300 Subject: drm/i915: Restore lost glk FBC 16bpp w/a MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We lost the FBC 16bpp 512byte stride requirement on glk when we switched from display version 9 to 10. Restore the w/a to avoid enabling FBC with a bad stride and thus display garbage. Cc: Matt Roper Fixes: 2b5a4562edd0 ("drm/i915/display: Simplify GLK display version tests") Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210412054607.18133-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_fbc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 04d9c7d22b04..dcc0f73ea225 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -598,7 +598,7 @@ static bool stride_is_valid(struct drm_i915_private *dev_priv, return false; /* Display WA #1105: skl,bxt,kbl,cfl,glk */ - if (IS_DISPLAY_VER(dev_priv, 9) && + if ((IS_DISPLAY_VER(dev_priv, 9) || IS_GEMINILAKE(dev_priv)) && modifier == DRM_FORMAT_MOD_LINEAR && stride & 511) return false; -- cgit From 0fe6637d9852a33c2873e59ae7e5225f92ac4cc2 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 12 Apr 2021 08:46:04 +0300 Subject: drm/i915: Restore lost glk ccs w/a MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We lost a CCS related w/a on glk when the display version became 10 instead of 9. Restore the correct check. Cc: Matt Roper Fixes: 2b5a4562edd0 ("drm/i915/display: Simplify GLK display version tests") Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210412054607.18133-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4f8f994a639f..00120697e59b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1403,7 +1403,8 @@ intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane) * require the entire fb to accommodate that to avoid * potential runtime errors at plane configuration time. */ - if (IS_DISPLAY_VER(dev_priv, 9) && color_plane == 0 && fb->width > 3840) + if ((IS_DISPLAY_VER(dev_priv, 9) || IS_GEMINILAKE(dev_priv)) && + color_plane == 0 && fb->width > 3840) tile_width *= 4; /* * The main surface pitch must be padded to a multiple of four -- cgit From 543d592a73d3948737d0ae4880a407c6da28662f Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 12 Apr 2021 08:46:05 +0300 Subject: drm/i915: Disable LTTPR detection on GLK once again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The glk display version change is causing us to again attempt LTTPR detection on glk. We must not do tha since glk doesn't have a long enough AUX timeout. Restore the correct logic to skip the detection. Cc: Matt Roper Fixes: 2b5a4562edd0 ("drm/i915/display: Simplify GLK display version tests") Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210412054607.18133-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_dp_link_training.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 5e9c3c74310c..597634e4f35f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -96,7 +96,7 @@ static bool intel_dp_read_lttpr_common_caps(struct intel_dp *intel_dp) * Detecting LTTPRs must be avoided on platforms with an AUX timeout * period < 3.2ms. (see DP Standard v2.0, 2.11.2, 3.6.6.1). */ - if (DISPLAY_VER(i915) < 10) + if (DISPLAY_VER(i915) < 10 || IS_GEMINILAKE(i915)) return false; if (drm_dp_read_lttpr_common_caps(&intel_dp->aux, -- cgit From 6c51f288b41f5dfc3061ab1fd8a576513edfce6b Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 12 Apr 2021 08:46:06 +0300 Subject: drm/i915: Don't use {skl, cnl}_hpd_pin() for bxt/glk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just let bxt/glk fall back to intel_hpd_pin_default() instead of using skl_hpd_pin() or cnl_hpd_pin(). Doesn't really matter since both functions will end up returning the correct hpd pin anyway, but I find it a bit less confusing when bxt/glk are fully separated from the logic for the other platforms. Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210412054607.18133-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index deef24da00b5..e208ded9a688 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4612,9 +4612,9 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->hpd_pin = ehl_hpd_pin(dev_priv, port); else if (IS_DISPLAY_VER(dev_priv, 11)) encoder->hpd_pin = icl_hpd_pin(dev_priv, port); - else if (IS_DISPLAY_VER(dev_priv, 10)) + else if (IS_CANNONLAKE(dev_priv)) encoder->hpd_pin = cnl_hpd_pin(dev_priv, port); - else if (IS_DISPLAY_VER(dev_priv, 9)) + else if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) encoder->hpd_pin = skl_hpd_pin(dev_priv, port); else encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); -- cgit From ad314fec4d0551c31688dc7530b12d04af326f9e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 12 Apr 2021 08:46:07 +0300 Subject: drm/i915: Remove a few redundant glk checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that glk display version is 10 we can drop a few more glk checks. Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210412054607.18133-6-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- drivers/gpu/drm/i915/display/skl_universal_plane.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index e208ded9a688..79ece9e45d66 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -176,7 +176,7 @@ static void intel_wait_ddi_buf_active(struct drm_i915_private *dev_priv, enum port port) { /* Wait > 518 usecs for DDI_BUF_CTL to be non idle */ - if (DISPLAY_VER(dev_priv) < 10 && !IS_GEMINILAKE(dev_priv)) { + if (DISPLAY_VER(dev_priv) < 10) { usleep_range(518, 1000); return; } diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 7ffd7b570b54..32bfd984be5c 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -829,7 +829,7 @@ static u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, plane_ctl = PLANE_CTL_ENABLE; - if (DISPLAY_VER(dev_priv) < 10 && !IS_GEMINILAKE(dev_priv)) { + if (DISPLAY_VER(dev_priv) < 10) { plane_ctl |= skl_plane_ctl_alpha(plane_state); plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; -- cgit From 27ec10b3eb2e14588b2ce5b4c8ea5460e7143179 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 12 Apr 2021 22:09:51 -0700 Subject: drm/i915/display: use DISPLAY_VER() on remaining users Commit 989634fb49ad ("drm/i915/audio: set HDA link parameters in driver") added INTEL_GEN() in the display code, where it should actually be using DISPLAY_VER(). Switch to the new macro. Cc: Kai Vehmanen Reviewed-by: Jani Nikula Signed-off-by: Lucas De Marchi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210413051002.92589-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 9671c8f6e892..9fe3a25710b8 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -1309,7 +1309,7 @@ static void i915_audio_component_init(struct drm_i915_private *dev_priv) if (DISPLAY_VER(dev_priv) >= 9) { aud_freq_init = intel_de_read(dev_priv, AUD_FREQ_CNTRL); - if (INTEL_GEN(dev_priv) >= 12) + if (DISPLAY_VER(dev_priv) >= 12) aud_freq = AUD_FREQ_GEN12; else aud_freq = aud_freq_init; -- cgit From 4df9c1ae7a4bb9366c298e6a4eddf54b65a0714c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 12 Apr 2021 22:09:52 -0700 Subject: drm/i915: rename display.version to display.ver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macro we use to check is called DISPLAY_VER(). While using this macro and the new ones being added in following changes I made the mistake multiple times when mixing both "ver" and "version". Although it's usually better to prefer the complete name, the shorhand DISPLAY_VER() / GRAPHICS_VER / MEDIA_VER are clear and cause less visual polution. Another issue is when copying the variable to other places. "display.version" would be copied to a "display_version" variable which is long and would make people abbreviate as "version", or "display_ver". In the first case it's not always clear what version refers to, and in the second case it just hints it should be the name in the first place. So, in the same way use used "gen" rather than "generation", use "ver" instead of "version". Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Acked-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210413051002.92589-3-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_pci.c | 4 ++-- drivers/gpu/drm/i915/intel_device_info.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 69e43bf91a15..8c62bb2abd31 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1237,7 +1237,7 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define INTEL_GEN(dev_priv) (INTEL_INFO(dev_priv)->gen) #define INTEL_DEVID(dev_priv) (RUNTIME_INFO(dev_priv)->device_id) -#define DISPLAY_VER(i915) (INTEL_INFO(i915)->display.version) +#define DISPLAY_VER(i915) (INTEL_INFO(i915)->display.ver) #define IS_DISPLAY_RANGE(i915, from, until) \ (DISPLAY_VER(i915) >= (from) && DISPLAY_VER(i915) <= (until)) #define IS_DISPLAY_VER(i915, v) (DISPLAY_VER(i915) == (v)) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 480553746794..ce5cbeaf036d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -36,7 +36,7 @@ #include "i915_selftest.h" #define PLATFORM(x) .platform = (x) -#define GEN(x) .gen = (x), .gen_mask = BIT((x) - 1), .display.version = (x) +#define GEN(x) .gen = (x), .gen_mask = BIT((x) - 1), .display.ver = (x) #define I845_PIPE_OFFSETS \ .pipe_offsets = { \ @@ -723,7 +723,7 @@ static const struct intel_device_info bxt_info = { static const struct intel_device_info glk_info = { GEN9_LP_FEATURES, PLATFORM(INTEL_GEMINILAKE), - .display.version = 10, + .display.ver = 10, .ddb_size = 1024, GLK_COLORS, }; diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 2f442d418a15..b16c75927a12 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -189,7 +189,7 @@ struct intel_device_info { #undef DEFINE_FLAG struct { - u8 version; + u8 ver; #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_DISPLAY_FOR_EACH_FLAG(DEFINE_FLAG); -- cgit From 93e7e61eb448318e5793c4b20b21a8fd92d4f949 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 12 Apr 2021 22:09:53 -0700 Subject: drm/i915/display: rename display version macros While converting the rest of the driver to use GRAPHICS_VER() and MEDIA_VER(), following what was done for display, some discussions went back on what we did for display: 1) Why is the == comparison special that deserves a separate macro instead of just getting the version and comparing directly like is done for >, >=, <=? 2) IS_DISPLAY_RANGE() is weird in that it omits the "_VER" for brevity. If we remove the current users of IS_DISPLAY_VER(), we could actually repurpose it for a range check With (1) there could be an advantage if we used gen_mask since multiple conditionals be combined by the compiler in a single and instruction and check the result. However a) INTEL_GEN() doesn't use the mask since it would make the code bigger everywhere else and b) in the cases it made sense, it also made sense to convert to the _RANGE() variant. So here we repurpose IS_DISPLAY_VER() to work with a [ from, to ] range like was the IS_DISPLAY_RANGE() and convert the current IS_DISPLAY_VER() users to use == and != operators. Aside from the definition changes, this was done by the following semantic patch: @@ expression dev_priv, E1; @@ - !IS_DISPLAY_VER(dev_priv, E1) + DISPLAY_VER(dev_priv) != E1 @@ expression dev_priv, E1; @@ - IS_DISPLAY_VER(dev_priv, E1) + DISPLAY_VER(dev_priv) == E1 @@ expression dev_priv, from, until; @@ - IS_DISPLAY_RANGE(dev_priv, from, until) + IS_DISPLAY_VER(dev_priv, from, until) Cc: Jani Nikula Cc: Matt Roper Reviewed-by: Jani Nikula Signed-off-by: Lucas De Marchi [Jani: Minor conflict resolve while applying.] Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20210413051002.92589-4-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/i9xx_plane.c | 2 +- drivers/gpu/drm/i915/display/icl_dsi.c | 4 +- drivers/gpu/drm/i915/display/intel_atomic.c | 2 +- drivers/gpu/drm/i915/display/intel_audio.c | 2 +- drivers/gpu/drm/i915/display/intel_bios.c | 4 +- drivers/gpu/drm/i915/display/intel_bw.c | 8 ++-- drivers/gpu/drm/i915/display/intel_cdclk.c | 18 +++---- drivers/gpu/drm/i915/display/intel_color.c | 6 +-- drivers/gpu/drm/i915/display/intel_crt.c | 6 +-- drivers/gpu/drm/i915/display/intel_crtc.c | 4 +- drivers/gpu/drm/i915/display/intel_csr.c | 2 +- drivers/gpu/drm/i915/display/intel_ddi.c | 24 +++++----- drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c | 8 ++-- drivers/gpu/drm/i915/display/intel_display.c | 56 +++++++++++----------- drivers/gpu/drm/i915/display/intel_display_power.c | 26 +++++----- drivers/gpu/drm/i915/display/intel_dp.c | 8 ++-- drivers/gpu/drm/i915/display/intel_dpll.c | 2 +- drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 2 +- drivers/gpu/drm/i915/display/intel_fb.c | 2 +- drivers/gpu/drm/i915/display/intel_fbc.c | 20 ++++---- drivers/gpu/drm/i915/display/intel_fifo_underrun.c | 4 +- drivers/gpu/drm/i915/display/intel_gmbus.c | 4 +- drivers/gpu/drm/i915/display/intel_hdcp.c | 2 +- drivers/gpu/drm/i915/display/intel_hdmi.c | 4 +- drivers/gpu/drm/i915/display/intel_lvds.c | 2 +- drivers/gpu/drm/i915/display/intel_overlay.c | 10 ++-- drivers/gpu/drm/i915/display/intel_panel.c | 8 ++-- drivers/gpu/drm/i915/display/intel_pipe_crc.c | 4 +- drivers/gpu/drm/i915/display/intel_psr.c | 4 +- drivers/gpu/drm/i915/display/intel_tc.c | 6 +-- drivers/gpu/drm/i915/display/intel_tv.c | 6 +-- drivers/gpu/drm/i915/display/skl_universal_plane.c | 8 ++-- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_irq.c | 10 ++-- drivers/gpu/drm/i915/intel_pm.c | 48 +++++++++---------- 35 files changed, 164 insertions(+), 165 deletions(-) diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 456374ddf37a..80da0e3571a4 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -144,7 +144,7 @@ static bool i9xx_plane_has_windowing(struct intel_plane *plane) return i9xx_plane == PLANE_B; else if (DISPLAY_VER(dev_priv) >= 5 || IS_G4X(dev_priv)) return false; - else if (IS_DISPLAY_VER(dev_priv, 4)) + else if (DISPLAY_VER(dev_priv) == 4) return i9xx_plane == PLANE_C; else return i9xx_plane == PLANE_B || diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 9282978060b0..37e2d93d064c 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -592,7 +592,7 @@ gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder, * a value '0' inside TA_PARAM_REGISTERS otherwise * leave all fields at HW default values. */ - if (IS_DISPLAY_VER(dev_priv, 11)) { + if (DISPLAY_VER(dev_priv) == 11) { if (afe_clk(encoder, crtc_state) <= 800000) { for_each_dsi_port(port, intel_dsi->ports) { tmp = intel_de_read(dev_priv, @@ -1158,7 +1158,7 @@ gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder, gen11_dsi_configure_transcoder(encoder, crtc_state); /* Step 4l: Gate DDI clocks */ - if (IS_DISPLAY_VER(dev_priv, 11)) + if (DISPLAY_VER(dev_priv) == 11) gen11_dsi_gate_clocks(encoder); } diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 4fa389fce8cb..45feaaddab26 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -332,7 +332,7 @@ static void intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_sta plane_state->hw.fb->format->is_yuv && plane_state->hw.fb->format->num_planes > 1) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - if (IS_DISPLAY_VER(dev_priv, 9)) { + if (DISPLAY_VER(dev_priv) == 9) { mode = SKL_PS_SCALER_MODE_NV12; } else if (icl_is_hdr_plane(dev_priv, plane->id)) { /* diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 9fe3a25710b8..b40e929a167e 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -591,7 +591,7 @@ static void enable_audio_dsc_wa(struct intel_encoder *encoder, val = intel_de_read(i915, AUD_CONFIG_BE); - if (IS_DISPLAY_VER(i915, 11)) + if (DISPLAY_VER(i915) == 11) val |= HBLANK_EARLY_ENABLE_ICL(pipe); else if (DISPLAY_VER(i915) >= 12) val |= HBLANK_EARLY_ENABLE_TGL(pipe); diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index ea4837d485a1..befab891a6b9 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -610,7 +610,7 @@ parse_sdvo_device_mapping(struct drm_i915_private *i915) * Only parse SDVO mappings on gens that could have SDVO. This isn't * accurate and doesn't have to be, as long as it's not too strict. */ - if (!IS_DISPLAY_RANGE(i915, 3, 7)) { + if (!IS_DISPLAY_VER(i915, 3, 7)) { drm_dbg_kms(&i915->drm, "Skipping SDVO device mapping\n"); return; } @@ -1659,7 +1659,7 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) } else if (IS_ROCKETLAKE(i915) && INTEL_PCH_TYPE(i915) == PCH_TGP) { ddc_pin_map = rkl_pch_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(rkl_pch_tgp_ddc_pin_map); - } else if (HAS_PCH_TGP(i915) && IS_DISPLAY_VER(i915, 9)) { + } else if (HAS_PCH_TGP(i915) && DISPLAY_VER(i915) == 9) { ddc_pin_map = gen9bc_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(gen9bc_tgp_ddc_pin_map); } else if (INTEL_PCH_TYPE(i915) >= PCH_ICP) { diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 584ab5ce4106..20dbc3759d27 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -77,7 +77,7 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv, qi->num_points = dram_info->num_qgv_points; - if (IS_DISPLAY_VER(dev_priv, 12)) + if (DISPLAY_VER(dev_priv) == 12) switch (dram_info->type) { case INTEL_DRAM_DDR4: qi->t_bl = 4; @@ -89,7 +89,7 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv, qi->t_bl = 16; break; } - else if (IS_DISPLAY_VER(dev_priv, 11)) + else if (DISPLAY_VER(dev_priv) == 11) qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? 4 : 8; if (drm_WARN_ON(&dev_priv->drm, @@ -271,9 +271,9 @@ void intel_bw_init_hw(struct drm_i915_private *dev_priv) icl_get_bw_info(dev_priv, &adls_sa_info); else if (IS_ROCKETLAKE(dev_priv)) icl_get_bw_info(dev_priv, &rkl_sa_info); - else if (IS_DISPLAY_VER(dev_priv, 12)) + else if (DISPLAY_VER(dev_priv) == 12) icl_get_bw_info(dev_priv, &tgl_sa_info); - else if (IS_DISPLAY_VER(dev_priv, 11)) + else if (DISPLAY_VER(dev_priv) == 11) icl_get_bw_info(dev_priv, &icl_sa_info); } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 489acf6b5cf1..1f0bd23bb883 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1801,7 +1801,7 @@ void intel_cdclk_init_hw(struct drm_i915_private *i915) { if (DISPLAY_VER(i915) >= 10 || IS_BROXTON(i915)) bxt_cdclk_init_hw(i915); - else if (IS_DISPLAY_VER(i915, 9)) + else if (DISPLAY_VER(i915) == 9) skl_cdclk_init_hw(i915); } @@ -1816,7 +1816,7 @@ void intel_cdclk_uninit_hw(struct drm_i915_private *i915) { if (DISPLAY_VER(i915) >= 10 || IS_BROXTON(i915)) bxt_cdclk_uninit_hw(i915); - else if (IS_DISPLAY_VER(i915, 9)) + else if (DISPLAY_VER(i915) == 9) skl_cdclk_uninit_hw(i915); } @@ -2004,7 +2004,7 @@ static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state) if (DISPLAY_VER(dev_priv) >= 10) return DIV_ROUND_UP(pixel_rate, 2); - else if (IS_DISPLAY_VER(dev_priv, 9) || + else if (DISPLAY_VER(dev_priv) == 9 || IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) return pixel_rate; else if (IS_CHERRYVIEW(dev_priv)) @@ -2052,10 +2052,10 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) crtc_state->has_audio && crtc_state->port_clock >= 540000 && crtc_state->lane_count == 4) { - if (IS_DISPLAY_VER(dev_priv, 10)) { + if (DISPLAY_VER(dev_priv) == 10) { /* Display WA #1145: glk,cnl */ min_cdclk = max(316800, min_cdclk); - } else if (IS_DISPLAY_VER(dev_priv, 9) || IS_BROADWELL(dev_priv)) { + } else if (DISPLAY_VER(dev_priv) == 9 || IS_BROADWELL(dev_priv)) { /* Display WA #1144: skl,bxt */ min_cdclk = max(432000, min_cdclk); } @@ -2594,7 +2594,7 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) if (DISPLAY_VER(dev_priv) >= 10) return 2 * max_cdclk_freq; - else if (IS_DISPLAY_VER(dev_priv, 9) || + else if (DISPLAY_VER(dev_priv) == 9 || IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) return max_cdclk_freq; else if (IS_CHERRYVIEW(dev_priv)) @@ -2631,7 +2631,7 @@ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) dev_priv->max_cdclk_freq = 316800; } else if (IS_BROXTON(dev_priv)) { dev_priv->max_cdclk_freq = 624000; - } else if (IS_DISPLAY_VER(dev_priv, 9)) { + } else if (DISPLAY_VER(dev_priv) == 9) { u32 limit = intel_de_read(dev_priv, SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; int max_cdclk, vco; @@ -2889,7 +2889,7 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->cdclk.table = glk_cdclk_table; else dev_priv->cdclk.table = bxt_cdclk_table; - } else if (IS_DISPLAY_VER(dev_priv, 9)) { + } else if (DISPLAY_VER(dev_priv) == 9) { dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.set_cdclk = skl_set_cdclk; dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk; @@ -2912,7 +2912,7 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) if (DISPLAY_VER(dev_priv) >= 10 || IS_BROXTON(dev_priv)) dev_priv->display.get_cdclk = bxt_get_cdclk; - else if (IS_DISPLAY_VER(dev_priv, 9)) + else if (DISPLAY_VER(dev_priv) == 9) dev_priv->display.get_cdclk = skl_get_cdclk; else if (IS_BROADWELL(dev_priv)) dev_priv->display.get_cdclk = bdw_get_cdclk; diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index c75d7124d57a..5fae69879adf 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -225,7 +225,7 @@ static bool ilk_csc_limited_range(const struct intel_crtc_state *crtc_state) */ return crtc_state->limited_color_range && (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_DISPLAY_RANGE(dev_priv, 9, 10)); + IS_DISPLAY_VER(dev_priv, 9, 10)); } static void ilk_csc_convert_ctm(const struct intel_crtc_state *crtc_state, @@ -1711,7 +1711,7 @@ int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_stat } else { if (DISPLAY_VER(dev_priv) >= 11) return icl_gamma_precision(crtc_state); - else if (IS_DISPLAY_VER(dev_priv, 10)) + else if (DISPLAY_VER(dev_priv) == 10) return glk_gamma_precision(crtc_state); else if (IS_IRONLAKE(dev_priv)) return ilk_gamma_precision(crtc_state); @@ -2136,7 +2136,7 @@ void intel_color_init(struct intel_crtc *crtc) if (DISPLAY_VER(dev_priv) >= 11) { dev_priv->display.load_luts = icl_load_luts; dev_priv->display.read_luts = icl_read_luts; - } else if (IS_DISPLAY_VER(dev_priv, 10)) { + } else if (DISPLAY_VER(dev_priv) == 10) { dev_priv->display.load_luts = glk_load_luts; dev_priv->display.read_luts = glk_read_luts; } else if (DISPLAY_VER(dev_priv) >= 8) { diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 580d652c3276..c85092eaa5c2 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -356,7 +356,7 @@ intel_crt_mode_valid(struct drm_connector *connector, * DAC limit supposedly 355 MHz. */ max_clock = 270000; - else if (IS_DISPLAY_RANGE(dev_priv, 3, 4)) + else if (IS_DISPLAY_VER(dev_priv, 3, 4)) max_clock = 400000; else max_clock = 350000; @@ -711,7 +711,7 @@ intel_crt_load_detect(struct intel_crt *crt, u32 pipe) /* Set the border color to purple. */ intel_uncore_write(uncore, bclrpat_reg, 0x500050); - if (!IS_DISPLAY_VER(dev_priv, 2)) { + if (DISPLAY_VER(dev_priv) != 2) { u32 pipeconf = intel_uncore_read(uncore, pipeconf_reg); intel_uncore_write(uncore, pipeconf_reg, @@ -1047,7 +1047,7 @@ void intel_crt_init(struct drm_i915_private *dev_priv) else crt->base.pipe_mask = ~0; - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) connector->interlace_allowed = 0; else connector->interlace_allowed = 1; diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 39358076c05b..95ff1707b4bd 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -302,11 +302,11 @@ int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) if (IS_CHERRYVIEW(dev_priv) || IS_VALLEYVIEW(dev_priv) || IS_G4X(dev_priv)) funcs = &g4x_crtc_funcs; - else if (IS_DISPLAY_VER(dev_priv, 4)) + else if (DISPLAY_VER(dev_priv) == 4) funcs = &i965_crtc_funcs; else if (IS_I945GM(dev_priv) || IS_I915GM(dev_priv)) funcs = &i915gm_crtc_funcs; - else if (IS_DISPLAY_VER(dev_priv, 3)) + else if (DISPLAY_VER(dev_priv) == 3) funcs = &i915_crtc_funcs; else funcs = &i8xx_crtc_funcs; diff --git a/drivers/gpu/drm/i915/display/intel_csr.c b/drivers/gpu/drm/i915/display/intel_csr.c index e54521d7b931..26a3c6787e9e 100644 --- a/drivers/gpu/drm/i915/display/intel_csr.c +++ b/drivers/gpu/drm/i915/display/intel_csr.c @@ -709,7 +709,7 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) csr->fw_path = TGL_CSR_PATH; csr->required_version = TGL_CSR_VERSION_REQUIRED; csr->max_fw_size = GEN12_CSR_MAX_FW_SIZE; - } else if (IS_DISPLAY_VER(dev_priv, 11)) { + } else if (DISPLAY_VER(dev_priv) == 11) { csr->fw_path = ICL_CSR_PATH; csr->required_version = ICL_CSR_VERSION_REQUIRED; csr->max_fw_size = ICL_CSR_MAX_FW_SIZE; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 79ece9e45d66..4ef573883412 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -113,7 +113,7 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, &n_entries); /* If we're boosting the current, set bit 31 of trans1 */ - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv) && + if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv) && intel_bios_encoder_dp_boost_level(encoder->devdata)) iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; @@ -147,7 +147,7 @@ static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, level = n_entries - 1; /* If we're boosting the current, set bit 31 of trans1 */ - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv) && + if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv) && intel_bios_encoder_hdmi_boost_level(encoder->devdata)) iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; @@ -473,7 +473,7 @@ intel_ddi_transcoder_func_reg_val_get(struct intel_encoder *encoder, temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } - if (IS_DISPLAY_RANGE(dev_priv, 8, 10) && + if (IS_DISPLAY_VER(dev_priv, 8, 10) && crtc_state->master_transcoder != INVALID_TRANSCODER) { u8 master_select = bdw_trans_port_sync_master_select(crtc_state->master_transcoder); @@ -548,7 +548,7 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state ctl &= ~TRANS_DDI_FUNC_ENABLE; - if (IS_DISPLAY_RANGE(dev_priv, 8, 10)) + if (IS_DISPLAY_VER(dev_priv, 8, 10)) ctl &= ~(TRANS_DDI_PORT_SYNC_ENABLE | TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK); @@ -978,7 +978,7 @@ static u8 intel_ddi_dp_voltage_max(struct intel_dp *intel_dp, tgl_get_combo_buf_trans(encoder, crtc_state, &n_entries); else tgl_get_dkl_buf_trans(encoder, crtc_state, &n_entries); - } else if (IS_DISPLAY_VER(dev_priv, 11)) { + } else if (DISPLAY_VER(dev_priv) == 11) { if (IS_PLATFORM(dev_priv, INTEL_JASPERLAKE)) jsl_get_combo_buf_trans(encoder, crtc_state, &n_entries); else if (IS_PLATFORM(dev_priv, INTEL_ELKHARTLAKE)) @@ -1557,7 +1557,7 @@ hsw_set_signal_levels(struct intel_dp *intel_dp, intel_dp->DP &= ~DDI_BUF_EMP_MASK; intel_dp->DP |= signal_levels; - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) + if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) skl_ddi_set_iboost(encoder, crtc_state, level); intel_de_write(dev_priv, DDI_BUF_CTL(port), intel_dp->DP); @@ -3094,7 +3094,7 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, if (DISPLAY_VER(dev_priv) >= 12) tgl_ddi_vswing_sequence(encoder, crtc_state, level); - else if (IS_DISPLAY_VER(dev_priv, 11)) + else if (DISPLAY_VER(dev_priv) == 11) icl_ddi_vswing_sequence(encoder, crtc_state, level); else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, crtc_state, level); @@ -3103,11 +3103,11 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, else intel_prepare_hdmi_ddi_buffers(encoder, level); - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) + if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) skl_ddi_set_iboost(encoder, crtc_state, level); /* Display WA #1143: skl,kbl,cfl */ - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) { + if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) { /* * For some reason these chicken bits have been * stuffed into a transcoder register, event though @@ -4590,7 +4590,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { /* BXT/GLK have fixed PLL->port mapping */ encoder->get_config = bxt_ddi_get_config; - } else if (IS_DISPLAY_VER(dev_priv, 9)) { + } else if (DISPLAY_VER(dev_priv) == 9) { encoder->enable_clock = skl_ddi_enable_clock; encoder->disable_clock = skl_ddi_disable_clock; encoder->is_clock_enabled = skl_ddi_is_clock_enabled; @@ -4610,11 +4610,11 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->hpd_pin = tgl_hpd_pin(dev_priv, port); else if (IS_JSL_EHL(dev_priv)) encoder->hpd_pin = ehl_hpd_pin(dev_priv, port); - else if (IS_DISPLAY_VER(dev_priv, 11)) + else if (DISPLAY_VER(dev_priv) == 11) encoder->hpd_pin = icl_hpd_pin(dev_priv, port); else if (IS_CANNONLAKE(dev_priv)) encoder->hpd_pin = cnl_hpd_pin(dev_priv, port); - else if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) + else if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) encoder->hpd_pin = skl_hpd_pin(dev_priv, port); else encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c index fdd25861edd5..58d6417b8f3e 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c @@ -881,7 +881,7 @@ intel_ddi_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) { + if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) { const struct ddi_buf_trans *ddi_translations = skl_get_buf_trans_edp(encoder, n_entries); *n_entries = skl_buf_trans_num_entries(encoder->port, *n_entries); @@ -919,7 +919,7 @@ intel_ddi_get_buf_trans_hdmi(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) { + if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) { return skl_get_buf_trans_hdmi(dev_priv, n_entries); } else if (IS_BROADWELL(dev_priv)) { *n_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); @@ -1361,7 +1361,7 @@ int intel_ddi_hdmi_num_entries(struct intel_encoder *encoder, else tgl_get_dkl_buf_trans_hdmi(encoder, crtc_state, &n_entries); *default_entry = n_entries - 1; - } else if (IS_DISPLAY_VER(dev_priv, 11)) { + } else if (DISPLAY_VER(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) icl_get_combo_buf_trans_hdmi(encoder, crtc_state, &n_entries); else @@ -1373,7 +1373,7 @@ int intel_ddi_hdmi_num_entries(struct intel_encoder *encoder, } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { bxt_get_buf_trans_hdmi(encoder, &n_entries); *default_entry = n_entries - 1; - } else if (IS_DISPLAY_VER(dev_priv, 9)) { + } else if (DISPLAY_VER(dev_priv) == 9) { intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); *default_entry = 8; } else if (IS_BROADWELL(dev_priv)) { diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 00120697e59b..7cd19ff40ed3 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -230,7 +230,7 @@ static bool pipe_scanline_is_moving(struct drm_i915_private *dev_priv, u32 line1, line2; u32 line_mask; - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) line_mask = DSL_LINEMASK_GEN2; else line_mask = DSL_LINEMASK_GEN3; @@ -874,7 +874,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) case DRM_FORMAT_MOD_LINEAR: return intel_tile_size(dev_priv); case I915_FORMAT_MOD_X_TILED: - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) return 128; else return 512; @@ -889,7 +889,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) return 64; fallthrough; case I915_FORMAT_MOD_Y_TILED: - if (IS_DISPLAY_VER(dev_priv, 2) || HAS_128_BYTE_Y_TILING(dev_priv)) + if (DISPLAY_VER(dev_priv) == 2 || HAS_128_BYTE_Y_TILING(dev_priv)) return 128; else return 512; @@ -1403,7 +1403,7 @@ intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane) * require the entire fb to accommodate that to avoid * potential runtime errors at plane configuration time. */ - if ((IS_DISPLAY_VER(dev_priv, 9) || IS_GEMINILAKE(dev_priv)) && + if ((DISPLAY_VER(dev_priv) == 9 || IS_GEMINILAKE(dev_priv)) && color_plane == 0 && fb->width > 3840) tile_width *= 4; /* @@ -1606,7 +1606,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc, * Gen2 reports pipe underruns whenever all planes are disabled. * So disable underrun reporting before all the planes get disabled. */ - if (IS_DISPLAY_VER(dev_priv, 2) && !crtc_state->active_planes) + if (DISPLAY_VER(dev_priv) == 2 && !crtc_state->active_planes) intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); intel_disable_plane(plane, crtc_state); @@ -2469,7 +2469,7 @@ static bool needs_nv12_wa(const struct intel_crtc_state *crtc_state) return false; /* WA Display #0827: Gen9:all */ - if (IS_DISPLAY_VER(dev_priv, 9)) + if (DISPLAY_VER(dev_priv) == 9) return true; return false; @@ -2480,7 +2480,7 @@ static bool needs_scalerclk_wa(const struct intel_crtc_state *crtc_state) struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); /* Wa_2006604312:icl,ehl */ - if (crtc_state->scaler_state.scaler_users > 0 && IS_DISPLAY_VER(dev_priv, 11)) + if (crtc_state->scaler_state.scaler_users > 0 && DISPLAY_VER(dev_priv) == 11) return true; return false; @@ -2680,7 +2680,7 @@ static void intel_pre_plane_update(struct intel_atomic_state *state, * chance of catching underruns with the intermediate watermarks * vs. the old plane configuration. */ - if (IS_DISPLAY_VER(dev_priv, 2) && planes_disabling(old_crtc_state, new_crtc_state)) + if (DISPLAY_VER(dev_priv) == 2 && planes_disabling(old_crtc_state, new_crtc_state)) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); /* @@ -3199,7 +3199,7 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, crtc->active = true; /* Display WA #1180: WaDisableScalarClockGating: glk, cnl */ - psl_clkgate_wa = IS_DISPLAY_VER(dev_priv, 10) && + psl_clkgate_wa = DISPLAY_VER(dev_priv) == 10 && new_crtc_state->pch_pfit.enabled; if (psl_clkgate_wa) glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true); @@ -3653,7 +3653,7 @@ static void i9xx_crtc_enable(struct intel_atomic_state *state, crtc->active = true; - if (!IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) != 2) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); intel_encoders_pre_enable(state, crtc); @@ -3678,7 +3678,7 @@ static void i9xx_crtc_enable(struct intel_atomic_state *state, intel_encoders_enable(state, crtc); /* prevents spurious underruns */ - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) intel_wait_for_vblank(dev_priv, pipe); } @@ -3709,7 +3709,7 @@ static void i9xx_crtc_disable(struct intel_atomic_state *state, * On gen2 planes are double buffered but the pipe isn't, so we must * wait for planes to fully turn off before disabling the pipe. */ - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) intel_wait_for_vblank(dev_priv, pipe); intel_encoders_disable(state, crtc); @@ -3733,7 +3733,7 @@ static void i9xx_crtc_disable(struct intel_atomic_state *state, intel_encoders_post_pll_disable(state, crtc); - if (!IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) != 2) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); if (!dev_priv->display.initial_watermarks) @@ -4297,7 +4297,7 @@ static bool transcoder_has_m2_n2(struct drm_i915_private *dev_priv, * Strictly speaking some registers are available before * gen7, but we only support DRRS on gen7+ */ - return IS_DISPLAY_VER(dev_priv, 7) || IS_CHERRYVIEW(dev_priv); + return DISPLAY_VER(dev_priv) == 7 || IS_CHERRYVIEW(dev_priv); } static void intel_cpu_transcoder_set_m_n(const struct intel_crtc_state *crtc_state, @@ -4444,7 +4444,7 @@ static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state) struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) return false; if (DISPLAY_VER(dev_priv) >= 9 || @@ -5639,7 +5639,7 @@ static void ilk_get_pfit_config(struct intel_crtc_state *crtc_state) * ivb/hsw (since we don't use the higher upscaling modes which * differentiates them) so just WARN about this case for now. */ - drm_WARN_ON(&dev_priv->drm, IS_DISPLAY_VER(dev_priv, 7) && + drm_WARN_ON(&dev_priv->drm, DISPLAY_VER(dev_priv) == 7 && (ctl & PF_PIPE_SEL_MASK_IVB) != PF_PIPE_SEL_IVB(crtc->pipe)); } @@ -6322,7 +6322,7 @@ static int i9xx_pll_refclk(struct drm_device *dev, return dev_priv->vbt.lvds_ssc_freq; else if (HAS_PCH_SPLIT(dev_priv)) return 120000; - else if (!IS_DISPLAY_VER(dev_priv, 2)) + else if (DISPLAY_VER(dev_priv) != 2) return 96000; else return 48000; @@ -6355,7 +6355,7 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, clock.m2 = (fp & FP_M2_DIV_MASK) >> FP_M2_DIV_SHIFT; } - if (!IS_DISPLAY_VER(dev_priv, 2)) { + if (DISPLAY_VER(dev_priv) != 2) { if (IS_PINEVIEW(dev_priv)) clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_PINEVIEW) >> DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW); @@ -8783,7 +8783,7 @@ intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state) * However if queried just before the start of vblank we'll get an * answer that's slightly in the future. */ - if (IS_DISPLAY_VER(dev_priv, 2)) { + if (DISPLAY_VER(dev_priv) == 2) { int vtotal; vtotal = adjusted_mode.crtc_vtotal; @@ -9660,7 +9660,7 @@ void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - if (!IS_DISPLAY_VER(dev_priv, 2) || crtc_state->active_planes) + if (DISPLAY_VER(dev_priv) != 2 || crtc_state->active_planes) intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); if (crtc_state->has_pch_encoder) { @@ -10278,7 +10278,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) * chance of catching underruns with the intermediate watermarks * vs. the new plane configuration. */ - if (IS_DISPLAY_VER(dev_priv, 2) && planes_enabling(old_crtc_state, new_crtc_state)) + if (DISPLAY_VER(dev_priv) == 2 && planes_enabling(old_crtc_state, new_crtc_state)) intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); if (dev_priv->display.optimize_watermarks) @@ -10857,7 +10857,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_C); intel_ddi_init(dev_priv, PORT_D); icl_dsi_init(dev_priv); - } else if (IS_DISPLAY_VER(dev_priv, 11)) { + } else if (DISPLAY_VER(dev_priv) == 11) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); @@ -10898,7 +10898,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) */ found = intel_de_read(dev_priv, DDI_BUF_CTL(PORT_A)) & DDI_INIT_DISPLAY_DETECTED; /* WaIgnoreDDIAStrap: skl */ - if (found || IS_DISPLAY_VER(dev_priv, 9)) + if (found || DISPLAY_VER(dev_priv) == 9) intel_ddi_init(dev_priv, PORT_A); /* DDI B, C, D, and F detection is indicated by the SFUSE_STRAP @@ -10923,7 +10923,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) /* * On SKL we don't have a way to detect DDI-E so we rely on VBT. */ - if (IS_DISPLAY_VER(dev_priv, 9) && + if (DISPLAY_VER(dev_priv) == 9 && intel_bios_is_port_present(dev_priv, PORT_E)) intel_ddi_init(dev_priv, PORT_E); @@ -11014,7 +11014,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) } else if (IS_PINEVIEW(dev_priv)) { intel_lvds_init(dev_priv); intel_crt_init(dev_priv); - } else if (IS_DISPLAY_RANGE(dev_priv, 3, 4)) { + } else if (IS_DISPLAY_VER(dev_priv, 3, 4)) { bool found = false; if (IS_MOBILE(dev_priv)) @@ -11058,7 +11058,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (SUPPORTS_TV(dev_priv)) intel_tv_init(dev_priv); - } else if (IS_DISPLAY_VER(dev_priv, 2)) { + } else if (DISPLAY_VER(dev_priv) == 2) { if (IS_I85X(dev_priv)) intel_lvds_init(dev_priv); @@ -11722,7 +11722,7 @@ static void intel_mode_config_init(struct drm_i915_private *i915) } else if (DISPLAY_VER(i915) >= 4) { mode_config->max_width = 8192; mode_config->max_height = 8192; - } else if (IS_DISPLAY_VER(i915, 3)) { + } else if (DISPLAY_VER(i915) == 3) { mode_config->max_width = 4096; mode_config->max_height = 4096; } else { @@ -12611,7 +12611,7 @@ static void intel_early_display_was(struct drm_i915_private *dev_priv) * Display WA #1185 WaDisableDARBFClkGating:cnl,glk,icl,ehl,tgl * Also known as Wa_14010480278. */ - if (IS_DISPLAY_RANGE(dev_priv, 10, 12)) + if (IS_DISPLAY_VER(dev_priv, 10, 12)) intel_de_write(dev_priv, GEN9_CLKGATE_DIS_0, intel_de_read(dev_priv, GEN9_CLKGATE_DIS_0) | DARBF_GATING_DIS); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 9419ae8c6111..723dd1e815bc 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -550,7 +550,7 @@ static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv, if (drm_WARN_ON(&dev_priv->drm, !dig_port)) return; - if (IS_DISPLAY_VER(dev_priv, 11) && dig_port->tc_legacy_port) + if (DISPLAY_VER(dev_priv) == 11 && dig_port->tc_legacy_port) return; drm_WARN_ON(&dev_priv->drm, !intel_tc_port_ref_held(dig_port)); @@ -619,7 +619,7 @@ icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, * exit sequence. */ timeout_expected = is_tbt; - if (IS_DISPLAY_VER(dev_priv, 11) && dig_port->tc_legacy_port) { + if (DISPLAY_VER(dev_priv) == 11 && dig_port->tc_legacy_port) { icl_tc_cold_exit(dev_priv); timeout_expected = true; } @@ -709,7 +709,7 @@ static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, * BIOS's own request bits, which are forced-on for these power wells * when exiting DC5/6. */ - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv) && + if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv) && (id == SKL_DISP_PW_1 || id == SKL_DISP_PW_MISC_IO)) val |= intel_de_read(dev_priv, regs->bios); @@ -807,7 +807,7 @@ static u32 gen9_dc_mask(struct drm_i915_private *dev_priv) if (DISPLAY_VER(dev_priv) >= 12) mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9; - else if (IS_DISPLAY_VER(dev_priv, 11)) + else if (DISPLAY_VER(dev_priv) == 11) mask |= DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9; else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) mask |= DC_STATE_EN_DC9; @@ -1060,7 +1060,7 @@ static void gen9_enable_dc5(struct drm_i915_private *dev_priv) drm_dbg_kms(&dev_priv->drm, "Enabling DC5\n"); /* Wa Display #1183: skl,kbl,cfl */ - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) + if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) intel_de_write(dev_priv, GEN8_CHICKEN_DCPR_1, intel_de_read(dev_priv, GEN8_CHICKEN_DCPR_1) | SKL_SELECT_ALTERNATE_DC_EXIT); @@ -1087,7 +1087,7 @@ static void skl_enable_dc6(struct drm_i915_private *dev_priv) drm_dbg_kms(&dev_priv->drm, "Enabling DC6\n"); /* Wa Display #1183: skl,kbl,cfl */ - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) + if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) intel_de_write(dev_priv, GEN8_CHICKEN_DCPR_1, intel_de_read(dev_priv, GEN8_CHICKEN_DCPR_1) | SKL_SELECT_ALTERNATE_DC_EXIT); @@ -4679,9 +4679,9 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) BIT_ULL(TGL_DISP_PW_TC_COLD_OFF)); } else if (IS_ROCKETLAKE(dev_priv)) { err = set_power_wells(power_domains, rkl_power_wells); - } else if (IS_DISPLAY_VER(dev_priv, 12)) { + } else if (DISPLAY_VER(dev_priv) == 12) { err = set_power_wells(power_domains, tgl_power_wells); - } else if (IS_DISPLAY_VER(dev_priv, 11)) { + } else if (DISPLAY_VER(dev_priv) == 11) { err = set_power_wells(power_domains, icl_power_wells); } else if (IS_CNL_WITH_PORT_F(dev_priv)) { err = set_power_wells(power_domains, cnl_power_wells); @@ -4693,7 +4693,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) err = set_power_wells(power_domains, glk_power_wells); } else if (IS_BROXTON(dev_priv)) { err = set_power_wells(power_domains, bxt_power_wells); - } else if (IS_DISPLAY_VER(dev_priv, 9)) { + } else if (DISPLAY_VER(dev_priv) == 9) { err = set_power_wells(power_domains, skl_power_wells); } else if (IS_CHERRYVIEW(dev_priv)) { err = set_power_wells(power_domains, chv_power_wells); @@ -4838,7 +4838,7 @@ static void icl_mbus_init(struct drm_i915_private *dev_priv) * expect us to program the abox_ctl0 register as well, even though * we don't have to program other instance-0 registers like BW_BUDDY. */ - if (IS_DISPLAY_VER(dev_priv, 12)) + if (DISPLAY_VER(dev_priv) == 12) abox_regs |= BIT(0); for_each_set_bit(i, &abox_regs, sizeof(abox_regs)) @@ -5414,7 +5414,7 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, intel_csr_load_program(dev_priv); /* Wa_14011508470 */ - if (IS_DISPLAY_VER(dev_priv, 12)) { + if (DISPLAY_VER(dev_priv) == 12) { val = DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM | DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR; intel_uncore_rmw(&dev_priv->uncore, GEN11_CHICKEN_DCPR_2, 0, val); @@ -5626,7 +5626,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) cnl_display_core_init(i915, resume); } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { bxt_display_core_init(i915, resume); - } else if (IS_DISPLAY_VER(i915, 9)) { + } else if (DISPLAY_VER(i915) == 9) { skl_display_core_init(i915, resume); } else if (IS_CHERRYVIEW(i915)) { mutex_lock(&power_domains->lock); @@ -5787,7 +5787,7 @@ void intel_power_domains_suspend(struct drm_i915_private *i915, cnl_display_core_uninit(i915); else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) bxt_display_core_uninit(i915); - else if (IS_DISPLAY_VER(i915, 9)) + else if (DISPLAY_VER(i915) == 9) skl_display_core_uninit(i915); power_domains->display_core_suspended = true; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index a5231ac3443a..2a686f14c42f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -215,7 +215,7 @@ bool intel_dp_can_bigjoiner(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); return DISPLAY_VER(dev_priv) >= 12 || - (IS_DISPLAY_VER(dev_priv, 11) && + (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A); } @@ -295,7 +295,7 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) { source_rates = cnl_rates; size = ARRAY_SIZE(cnl_rates); - if (IS_DISPLAY_VER(dev_priv, 10)) + if (DISPLAY_VER(dev_priv) == 10) max_rate = cnl_max_source_rate(intel_dp); else if (IS_JSL_EHL(dev_priv)) max_rate = ehl_max_source_rate(intel_dp); @@ -304,7 +304,7 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { source_rates = bxt_rates; size = ARRAY_SIZE(bxt_rates); - } else if (IS_DISPLAY_VER(dev_priv, 9)) { + } else if (DISPLAY_VER(dev_priv) == 9) { source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); } else if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || @@ -916,7 +916,7 @@ static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp, if (DISPLAY_VER(dev_priv) >= 12) return true; - if (IS_DISPLAY_VER(dev_priv, 11) && pipe_config->cpu_transcoder != TRANSCODER_A) + if (DISPLAY_VER(dev_priv) == 11 && pipe_config->cpu_transcoder != TRANSCODER_A) return true; return false; diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c index 158f271299a4..9114953f57f1 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.c +++ b/drivers/gpu/drm/i915/display/intel_dpll.c @@ -1356,7 +1356,7 @@ intel_dpll_init_clock_hook(struct drm_i915_private *dev_priv) dev_priv->display.crtc_compute_clock = g4x_crtc_compute_clock; else if (IS_PINEVIEW(dev_priv)) dev_priv->display.crtc_compute_clock = pnv_crtc_compute_clock; - else if (!IS_DISPLAY_VER(dev_priv, 2)) + else if (DISPLAY_VER(dev_priv) != 2) dev_priv->display.crtc_compute_clock = i9xx_crtc_compute_clock; else dev_priv->display.crtc_compute_clock = i8xx_crtc_compute_clock; diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index e32de7c848e9..e1c916640768 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -4443,7 +4443,7 @@ void intel_shared_dpll_init(struct drm_device *dev) dpll_mgr = &cnl_pll_mgr; else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dpll_mgr = &bxt_pll_mgr; - else if (IS_DISPLAY_VER(dev_priv, 9)) + else if (DISPLAY_VER(dev_priv) == 9) dpll_mgr = &skl_pll_mgr; else if (HAS_DDI(dev_priv)) dpll_mgr = &hsw_pll_mgr; diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index fca41ac5b8e1..0ec9ad7220a1 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -84,7 +84,7 @@ int skl_main_to_aux_plane(const struct drm_framebuffer *fb, int main_plane) unsigned int intel_tile_size(const struct drm_i915_private *i915) { - return IS_DISPLAY_VER(i915, 2) ? 2048 : 4096; + return DISPLAY_VER(i915) == 2 ? 2048 : 4096; } unsigned int intel_tile_height(const struct drm_framebuffer *fb, int color_plane) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index dcc0f73ea225..91dad8004c34 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -67,7 +67,7 @@ static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv, int lines; intel_fbc_get_plane_source_size(cache, NULL, &lines); - if (IS_DISPLAY_VER(dev_priv, 7)) + if (DISPLAY_VER(dev_priv) == 7) lines = min(lines, 2048); else if (DISPLAY_VER(dev_priv) >= 8) lines = min(lines, 2560); @@ -109,7 +109,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) cfb_pitch = params->fb.stride; /* FBC_CTL wants 32B or 64B units */ - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) cfb_pitch = (cfb_pitch / 32) - 1; else cfb_pitch = (cfb_pitch / 64) - 1; @@ -118,7 +118,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++) intel_de_write(dev_priv, FBC_TAG(i), 0); - if (IS_DISPLAY_VER(dev_priv, 4)) { + if (DISPLAY_VER(dev_priv) == 4) { u32 fbc_ctl2; /* Set it up... */ @@ -302,7 +302,7 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) int threshold = dev_priv->fbc.threshold; /* Display WA #0529: skl, kbl, bxt. */ - if (IS_DISPLAY_VER(dev_priv, 9)) { + if (DISPLAY_VER(dev_priv) == 9) { u32 val = intel_de_read(dev_priv, CHICKEN_MISC_4); val &= ~(FBC_STRIDE_OVERRIDE | FBC_STRIDE_MASK); @@ -445,7 +445,7 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, * reserved range size, so it always assumes the maximum (8mb) is used. * If we enable FBC using a CFB on that memory range we'll get FIFO * underruns, even if that range is not reserved by the BIOS. */ - if (IS_BROADWELL(dev_priv) || (IS_DISPLAY_VER(dev_priv, 9) && + if (IS_BROADWELL(dev_priv) || (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv))) end = resource_size(&dev_priv->dsm) - 8 * 1024 * 1024; else @@ -591,14 +591,14 @@ static bool stride_is_valid(struct drm_i915_private *dev_priv, if (stride < 512) return false; - if (IS_DISPLAY_VER(dev_priv, 2) || IS_DISPLAY_VER(dev_priv, 3)) + if (DISPLAY_VER(dev_priv) == 2 || DISPLAY_VER(dev_priv) == 3) return stride == 4096 || stride == 8192; - if (IS_DISPLAY_VER(dev_priv, 4) && !IS_G4X(dev_priv) && stride < 2048) + if (DISPLAY_VER(dev_priv) == 4 && !IS_G4X(dev_priv) && stride < 2048) return false; /* Display WA #1105: skl,bxt,kbl,cfl,glk */ - if ((IS_DISPLAY_VER(dev_priv, 9) || IS_GEMINILAKE(dev_priv)) && + if ((DISPLAY_VER(dev_priv) == 9 || IS_GEMINILAKE(dev_priv)) && modifier == DRM_FORMAT_MOD_LINEAR && stride & 511) return false; @@ -618,7 +618,7 @@ static bool pixel_format_is_valid(struct drm_i915_private *dev_priv, case DRM_FORMAT_XRGB1555: case DRM_FORMAT_RGB565: /* 16bpp not supported on gen2 */ - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) return false; /* WaFbcOnly1to1Ratio:ctg */ if (IS_G4X(dev_priv)) @@ -760,7 +760,7 @@ static u16 intel_fbc_gen9_wa_cfb_stride(struct drm_i915_private *dev_priv) struct intel_fbc *fbc = &dev_priv->fbc; struct intel_fbc_state_cache *cache = &fbc->state_cache; - if ((IS_DISPLAY_VER(dev_priv, 9)) && + if ((DISPLAY_VER(dev_priv) == 9) && cache->fb.modifier != I915_FORMAT_MOD_X_TILED) return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->threshold) * 8; else diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c index 9605a1064366..0fce9fd6e0a9 100644 --- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c @@ -271,7 +271,7 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, i9xx_set_fifo_underrun_reporting(dev, pipe, enable, old); else if (IS_IRONLAKE(dev_priv) || IS_SANDYBRIDGE(dev_priv)) ilk_set_fifo_underrun_reporting(dev, pipe, enable); - else if (IS_DISPLAY_VER(dev_priv, 7)) + else if (DISPLAY_VER(dev_priv) == 7) ivb_set_fifo_underrun_reporting(dev, pipe, enable, old); else if (DISPLAY_VER(dev_priv) >= 8) bdw_set_fifo_underrun_reporting(dev, pipe, enable); @@ -432,7 +432,7 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv) if (HAS_GMCH(dev_priv)) i9xx_check_fifo_underruns(crtc); - else if (IS_DISPLAY_VER(dev_priv, 7)) + else if (DISPLAY_VER(dev_priv) == 7) ivb_check_fifo_underruns(crtc); } diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 13ba280d0228..d3a5e4224969 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -109,7 +109,7 @@ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv, return &gmbus_pins_cnp[pin]; else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) return &gmbus_pins_bxt[pin]; - else if (IS_DISPLAY_VER(dev_priv, 9)) + else if (DISPLAY_VER(dev_priv) == 9) return &gmbus_pins_skl[pin]; else if (IS_BROADWELL(dev_priv)) return &gmbus_pins_bdw[pin]; @@ -130,7 +130,7 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, size = ARRAY_SIZE(gmbus_pins_cnp); else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bxt); - else if (IS_DISPLAY_VER(dev_priv, 9)) + else if (DISPLAY_VER(dev_priv) == 9) size = ARRAY_SIZE(gmbus_pins_skl); else if (IS_BROADWELL(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bdw); diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 75050a040577..d254fe67ab7f 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -291,7 +291,7 @@ static int intel_hdcp_load_keys(struct drm_i915_private *dev_priv) * process from other platforms. These platforms use the GT Driver * Mailbox interface. */ - if (IS_DISPLAY_VER(dev_priv, 9) && !IS_BROXTON(dev_priv)) { + if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) { ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_LOAD_HDCP_KEYS, 1); if (ret) { diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index f2d1fef8bd9d..47a8f0a1c5e2 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1978,7 +1978,7 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, /* Display Wa_1405510057:icl,ehl */ if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 && - bpc == 10 && IS_DISPLAY_VER(dev_priv, 11) && + bpc == 10 && DISPLAY_VER(dev_priv) == 11 && (adjusted_mode->crtc_hblank_end - adjusted_mode->crtc_hblank_start) % 8 == 2) return false; @@ -2715,7 +2715,7 @@ static u8 intel_hdmi_ddc_pin(struct intel_encoder *encoder) ddc_pin = dg1_port_to_ddc_pin(dev_priv, port); else if (IS_ROCKETLAKE(dev_priv)) ddc_pin = rkl_port_to_ddc_pin(dev_priv, port); - else if (IS_DISPLAY_VER(dev_priv, 9) && HAS_PCH_TGP(dev_priv)) + else if (DISPLAY_VER(dev_priv) == 9 && HAS_PCH_TGP(dev_priv)) ddc_pin = gen9bc_tgp_port_to_ddc_pin(dev_priv, port); else if (HAS_PCH_MCC(dev_priv)) ddc_pin = mcc_port_to_ddc_pin(dev_priv, port); diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index f31a368f34c5..dd12d15f47c7 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -280,7 +280,7 @@ static void intel_pre_enable_lvds(struct intel_atomic_state *state, * special lvds dither control bit on pch-split platforms, dithering is * only controlled through the PIPECONF reg. */ - if (IS_DISPLAY_VER(dev_priv, 4)) { + if (DISPLAY_VER(dev_priv) == 4) { /* * Bspec wording suggests that LVDS port dithering only exists * for 18bpp panels. diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index e477b6114a60..d1255911a327 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -550,7 +550,7 @@ static u32 calc_swidthsw(struct drm_i915_private *dev_priv, u32 offset, u32 widt { u32 sw; - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) sw = ALIGN((offset & 31) + width, 32); else sw = ALIGN((offset & 63) + width, 64); @@ -818,7 +818,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, oconfig |= OCONF_CC_OUT_8BIT; if (crtc_state->gamma_enable) oconfig |= OCONF_GAMMA2_ENABLE; - if (IS_DISPLAY_VER(dev_priv, 4)) + if (DISPLAY_VER(dev_priv) == 4) oconfig |= OCONF_CSC_MODE_BT709; oconfig |= pipe == 0 ? OCONF_PIPE_A : OCONF_PIPE_B; @@ -1052,7 +1052,7 @@ static int check_overlay_src(struct drm_i915_private *dev_priv, if (rec->stride_Y & stride_mask || rec->stride_UV & stride_mask) return -EINVAL; - if (IS_DISPLAY_VER(dev_priv, 4) && rec->stride_Y < 512) + if (DISPLAY_VER(dev_priv) == 4 && rec->stride_Y < 512) return -EINVAL; tmp = (rec->flags & I915_OVERLAY_TYPE_MASK) == I915_OVERLAY_YUV_PLANAR ? @@ -1279,7 +1279,7 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, attrs->contrast = overlay->contrast; attrs->saturation = overlay->saturation; - if (!IS_DISPLAY_VER(dev_priv, 2)) { + if (DISPLAY_VER(dev_priv) != 2) { attrs->gamma0 = intel_de_read(dev_priv, OGAMC0); attrs->gamma1 = intel_de_read(dev_priv, OGAMC1); attrs->gamma2 = intel_de_read(dev_priv, OGAMC2); @@ -1303,7 +1303,7 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, update_reg_attrs(overlay, overlay->regs); if (attrs->flags & I915_OVERLAY_UPDATE_GAMMA) { - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) goto out_unlock; if (overlay->active) { diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 2fcbb2ba2d78..551fcaa77c2c 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -667,7 +667,7 @@ static void i9xx_set_backlight(const struct drm_connector_state *conn_state, u32 pci_write_config_byte(to_pci_dev(dev_priv->drm.dev), LBPC, lbpc); } - if (IS_DISPLAY_VER(dev_priv, 4)) { + if (DISPLAY_VER(dev_priv) == 4) { mask = BACKLIGHT_DUTY_CYCLE_MASK; } else { level <<= 1; @@ -1040,7 +1040,7 @@ static void i9xx_enable_backlight(const struct intel_crtc_state *crtc_state, * 855gm only, but checking for gen2 is safe, as 855gm is the only gen2 * that has backlight. */ - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) intel_de_write(dev_priv, BLC_HIST_CTL, BLM_HISTOGRAM_ENABLE); } @@ -1728,7 +1728,7 @@ static int i9xx_setup_backlight(struct intel_connector *connector, enum pipe unu ctl = intel_de_read(dev_priv, BLC_PWM_CTL); - if (IS_DISPLAY_VER(dev_priv, 2) || IS_I915GM(dev_priv) || IS_I945GM(dev_priv)) + if (DISPLAY_VER(dev_priv) == 2 || IS_I915GM(dev_priv) || IS_I945GM(dev_priv)) panel->backlight.combination_mode = ctl & BLM_LEGACY_MODE; if (IS_PINEVIEW(dev_priv)) @@ -2178,7 +2178,7 @@ intel_panel_init_backlight_funcs(struct intel_panel *panel) } else { panel->backlight.pwm_funcs = &vlv_pwm_funcs; } - } else if (IS_DISPLAY_VER(dev_priv, 4)) { + } else if (DISPLAY_VER(dev_priv) == 4) { panel->backlight.pwm_funcs = &i965_pwm_funcs; } else { panel->backlight.pwm_funcs = &i9xx_pwm_funcs; diff --git a/drivers/gpu/drm/i915/display/intel_pipe_crc.c b/drivers/gpu/drm/i915/display/intel_pipe_crc.c index 7c8e0d76207f..0f6de96e6d43 100644 --- a/drivers/gpu/drm/i915/display/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/display/intel_pipe_crc.c @@ -409,7 +409,7 @@ static int get_new_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source *source, u32 *val) { - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) return i8xx_pipe_crc_ctl_reg(source, val); else if (DISPLAY_VER(dev_priv) < 5) return i9xx_pipe_crc_ctl_reg(dev_priv, pipe, source, val); @@ -539,7 +539,7 @@ static int intel_is_valid_crc_source(struct drm_i915_private *dev_priv, const enum intel_pipe_crc_source source) { - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) return i8xx_crc_source_valid(dev_priv, source); else if (DISPLAY_VER(dev_priv) < 5) return i9xx_crc_source_valid(dev_priv, source); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 1d561812fcad..cd5452110095 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -769,7 +769,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, psr_max_h = 4096; psr_max_v = 2304; max_bpp = 24; - } else if (IS_DISPLAY_VER(dev_priv, 9)) { + } else if (DISPLAY_VER(dev_priv) == 9) { psr_max_h = 3640; psr_max_v = 2304; max_bpp = 24; @@ -909,7 +909,7 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) hsw_psr_setup_aux(intel_dp); - if (intel_dp->psr.psr2_enabled && IS_DISPLAY_VER(dev_priv, 9)) { + if (intel_dp->psr.psr2_enabled && DISPLAY_VER(dev_priv) == 9) { i915_reg_t reg = CHICKEN_TRANS(cpu_transcoder); u32 chicken = intel_de_read(dev_priv, reg); diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 71b8edafb1c3..88085486ee59 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -28,7 +28,7 @@ tc_cold_get_power_domain(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - if (IS_DISPLAY_VER(i915, 11)) + if (DISPLAY_VER(i915) == 11) return intel_legacy_aux_to_power_domain(dig_port->aux_ch); else return POWER_DOMAIN_TC_COLD_OFF; @@ -40,7 +40,7 @@ tc_cold_block(struct intel_digital_port *dig_port) struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); enum intel_display_power_domain domain; - if (IS_DISPLAY_VER(i915, 11) && !dig_port->tc_legacy_port) + if (DISPLAY_VER(i915) == 11 && !dig_port->tc_legacy_port) return 0; domain = tc_cold_get_power_domain(dig_port); @@ -71,7 +71,7 @@ assert_tc_cold_blocked(struct intel_digital_port *dig_port) struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); bool enabled; - if (IS_DISPLAY_VER(i915, 11) && !dig_port->tc_legacy_port) + if (DISPLAY_VER(i915) == 11 && !dig_port->tc_legacy_port) return; enabled = intel_display_power_is_enabled(i915, diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index e558f121ec4e..2c5c77693474 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1165,7 +1165,7 @@ intel_tv_get_config(struct intel_encoder *encoder, static bool intel_tv_source_too_wide(struct drm_i915_private *dev_priv, int hdisplay) { - return IS_DISPLAY_VER(dev_priv, 3) && hdisplay > 1024; + return DISPLAY_VER(dev_priv) == 3 && hdisplay > 1024; } static bool intel_tv_vert_scaling(const struct drm_display_mode *tv_mode, @@ -1789,7 +1789,7 @@ intel_tv_get_modes(struct drm_connector *connector) continue; /* no vertical scaling with wide sources on gen3 */ - if (IS_DISPLAY_VER(dev_priv, 3) && input->w > 1024 && + if (DISPLAY_VER(dev_priv) == 3 && input->w > 1024 && input->h > intel_tv_mode_vdisplay(tv_mode)) continue; @@ -1978,7 +1978,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) /* Create TV properties then attach current values */ for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { /* 1080p50/1080p60 not supported on gen3 */ - if (IS_DISPLAY_VER(dev_priv, 3) && + if (DISPLAY_VER(dev_priv) == 3 && tv_modes[i].oversample == 1) break; diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 32bfd984be5c..b59636d898b9 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1184,7 +1184,7 @@ static int skl_plane_check_dst_coordinates(const struct intel_crtc_state *crtc_s * than the cursor ending less than 4 pixels from the left edge of the * screen may cause FIFO underflow and display corruption. */ - if (IS_DISPLAY_VER(dev_priv, 10) && + if (DISPLAY_VER(dev_priv) == 10 && (crtc_x + crtc_w < 4 || crtc_x > pipe_src_w - 4)) { drm_dbg_kms(&dev_priv->drm, "requested plane X %s position %d invalid (valid range %d-%d)\n", @@ -1671,7 +1671,7 @@ static bool skl_plane_has_planar(struct drm_i915_private *dev_priv, if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) return false; - if (IS_DISPLAY_VER(dev_priv, 9) && pipe == PIPE_C) + if (DISPLAY_VER(dev_priv) == 9 && pipe == PIPE_C) return false; if (plane_id != PLANE_PRIMARY && plane_id != PLANE_SPRITE0) @@ -1981,8 +1981,8 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, plane->min_cdclk = skl_plane_min_cdclk; if (plane_id == PLANE_PRIMARY) { - plane->need_async_flip_disable_wa = IS_DISPLAY_RANGE(dev_priv, - 9, 10); + plane->need_async_flip_disable_wa = IS_DISPLAY_VER(dev_priv, + 9, 10); plane->async_flip = skl_plane_async_flip; plane->enable_flip_done = skl_plane_enable_flip_done; plane->disable_flip_done = skl_plane_disable_flip_done; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8c62bb2abd31..907c66efb469 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1238,9 +1238,8 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define INTEL_DEVID(dev_priv) (RUNTIME_INFO(dev_priv)->device_id) #define DISPLAY_VER(i915) (INTEL_INFO(i915)->display.ver) -#define IS_DISPLAY_RANGE(i915, from, until) \ +#define IS_DISPLAY_VER(i915, from, until) \ (DISPLAY_VER(i915) >= (from) && DISPLAY_VER(i915) <= (until)) -#define IS_DISPLAY_VER(i915, v) (DISPLAY_VER(i915) == (v)) #define REVID_FOREVER 0xff #define INTEL_REVID(dev_priv) (to_pci_dev((dev_priv)->drm.dev)->revision) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 9aff790817c0..ef1911f11739 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -806,7 +806,7 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) if (mode->flags & DRM_MODE_FLAG_INTERLACE) vtotal /= 2; - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) position = intel_de_read_fw(dev_priv, PIPEDSL(pipe)) & DSL_LINEMASK_GEN2; else position = intel_de_read_fw(dev_priv, PIPEDSL(pipe)) & DSL_LINEMASK_GEN3; @@ -857,7 +857,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, int vbl_start, vbl_end, hsync_start, htotal, vtotal; unsigned long irqflags; bool use_scanline_counter = DISPLAY_VER(dev_priv) >= 5 || - IS_G4X(dev_priv) || IS_DISPLAY_VER(dev_priv, 2) || + IS_G4X(dev_priv) || DISPLAY_VER(dev_priv) == 2 || crtc->mode_flags & I915_MODE_FLAG_USE_SCANLINE_COUNTER; if (drm_WARN_ON(&dev_priv->drm, !mode->crtc_clock)) { @@ -2077,7 +2077,7 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv, intel_uncore_write(&dev_priv->uncore, SDEIIR, pch_iir); } - if (IS_DISPLAY_VER(dev_priv, 5) && de_iir & DE_PCU_EVENT) + if (DISPLAY_VER(dev_priv) == 5 && de_iir & DE_PCU_EVENT) gen5_rps_irq_handler(&dev_priv->gt.rps); } @@ -2287,10 +2287,10 @@ static u32 gen8_de_port_aux_mask(struct drm_i915_private *dev_priv) GEN9_AUX_CHANNEL_C | GEN9_AUX_CHANNEL_D; - if (IS_CNL_WITH_PORT_F(dev_priv) || IS_DISPLAY_VER(dev_priv, 11)) + if (IS_CNL_WITH_PORT_F(dev_priv) || DISPLAY_VER(dev_priv) == 11) mask |= CNL_AUX_CHANNEL_F; - if (IS_DISPLAY_VER(dev_priv, 11)) + if (DISPLAY_VER(dev_priv) == 11) mask |= ICL_AUX_CHANNEL_E; return mask; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 32f301ca3ab0..db4040811306 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2339,7 +2339,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) if (IS_I945GM(dev_priv)) wm_info = &i945_wm_info; - else if (!IS_DISPLAY_VER(dev_priv, 2)) + else if (DISPLAY_VER(dev_priv) != 2) wm_info = &i915_wm_info; else wm_info = &i830_a_wm_info; @@ -2353,7 +2353,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) crtc->base.primary->state->fb; int cpp; - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) cpp = 4; else cpp = fb->format->cpp[0]; @@ -2368,7 +2368,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) planea_wm = wm_info->max_wm; } - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) wm_info = &i830_bc_wm_info; fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B); @@ -2380,7 +2380,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) crtc->base.primary->state->fb; int cpp; - if (IS_DISPLAY_VER(dev_priv, 2)) + if (DISPLAY_VER(dev_priv) == 2) cpp = 4; else cpp = fb->format->cpp[0]; @@ -2967,7 +2967,7 @@ static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv, u16 wm[5]) { /* ILK sprite LP0 latency is 1300 ns */ - if (IS_DISPLAY_VER(dev_priv, 5)) + if (DISPLAY_VER(dev_priv) == 5) wm[0] = 13; } @@ -2975,7 +2975,7 @@ static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv, u16 wm[5]) { /* ILK cursor LP0 latency is 1300 ns */ - if (IS_DISPLAY_VER(dev_priv, 5)) + if (DISPLAY_VER(dev_priv) == 5) wm[0] = 13; } @@ -3105,7 +3105,7 @@ static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv) intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); - if (IS_DISPLAY_VER(dev_priv, 6)) { + if (DISPLAY_VER(dev_priv) == 6) { snb_wm_latency_quirk(dev_priv); snb_wm_lp3_irq_quirk(dev_priv); } @@ -3354,7 +3354,7 @@ static void ilk_wm_merge(struct drm_i915_private *dev_priv, * What we should check here is whether FBC can be * enabled sometime later. */ - if (IS_DISPLAY_VER(dev_priv, 5) && !merged->fbc_wm_enabled && + if (DISPLAY_VER(dev_priv) == 5 && !merged->fbc_wm_enabled && intel_fbc_is_active(dev_priv)) { for (level = 2; level <= max_level; level++) { struct intel_wm_level *wm = &merged->wm[level]; @@ -3654,7 +3654,7 @@ u8 intel_enabled_dbuf_slices_mask(struct drm_i915_private *dev_priv) */ static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv) { - return IS_DISPLAY_VER(dev_priv, 9); + return DISPLAY_VER(dev_priv) == 9; } static bool @@ -3680,13 +3680,13 @@ skl_setup_sagv_block_time(struct drm_i915_private *dev_priv) } drm_dbg(&dev_priv->drm, "Couldn't read SAGV block time!\n"); - } else if (IS_DISPLAY_VER(dev_priv, 11)) { + } else if (DISPLAY_VER(dev_priv) == 11) { dev_priv->sagv_block_time_us = 10; return; - } else if (IS_DISPLAY_VER(dev_priv, 10)) { + } else if (DISPLAY_VER(dev_priv) == 10) { dev_priv->sagv_block_time_us = 20; return; - } else if (IS_DISPLAY_VER(dev_priv, 9)) { + } else if (DISPLAY_VER(dev_priv) == 9) { dev_priv->sagv_block_time_us = 30; return; } else { @@ -4613,9 +4613,9 @@ static u8 skl_compute_dbuf_slices(struct intel_crtc *crtc, u8 active_pipes) struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - if (IS_DISPLAY_VER(dev_priv, 12)) + if (DISPLAY_VER(dev_priv) == 12) return tgl_compute_dbuf_slices(pipe, active_pipes); - else if (IS_DISPLAY_VER(dev_priv, 11)) + else if (DISPLAY_VER(dev_priv) == 11) return icl_compute_dbuf_slices(pipe, active_pipes); /* * For anything else just return one slice yet. @@ -4986,7 +4986,7 @@ skl_allocate_plane_ddb(struct intel_atomic_state *state, * Wa_1408961008:icl, ehl * Underruns with WM1+ disabled */ - if (IS_DISPLAY_VER(dev_priv, 11) && + if (DISPLAY_VER(dev_priv) == 11 && level == 1 && wm->wm[0].enable) { wm->wm[level].blocks = wm->wm[0].blocks; wm->wm[level].lines = wm->wm[0].lines; @@ -5245,7 +5245,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, (wp->plane_bytes_per_line / wp->dbuf_block_size < 1)) { selected_result = method2; } else if (latency >= wp->linetime_us) { - if (IS_DISPLAY_VER(dev_priv, 9)) + if (DISPLAY_VER(dev_priv) == 9) selected_result = min_fixed16(method1, method2); else selected_result = method2; @@ -5258,7 +5258,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, lines = div_round_up_fixed16(selected_result, wp->plane_blocks_per_line); - if (IS_DISPLAY_VER(dev_priv, 9)) { + if (DISPLAY_VER(dev_priv) == 9) { /* Display WA #1125: skl,bxt,kbl */ if (level == 0 && wp->rc_surface) blocks += fixed16_to_u32_round_up(wp->y_tile_minimum); @@ -5375,7 +5375,7 @@ static void skl_compute_transition_wm(struct drm_i915_private *dev_priv, * WaDisableTWM:skl,kbl,cfl,bxt * Transition WM are not recommended by HW team for GEN9 */ - if (IS_DISPLAY_VER(dev_priv, 9)) + if (DISPLAY_VER(dev_priv) == 9) return; if (DISPLAY_VER(dev_priv) >= 11) @@ -5384,7 +5384,7 @@ static void skl_compute_transition_wm(struct drm_i915_private *dev_priv, trans_min = 14; /* Display WA #1140: glk,cnl */ - if (IS_DISPLAY_VER(dev_priv, 10)) + if (DISPLAY_VER(dev_priv) == 10) trans_amount = 0; else trans_amount = 10; /* This is configurable amount */ @@ -7689,9 +7689,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv) } else if (HAS_PCH_SPLIT(dev_priv)) { ilk_setup_wm_latency(dev_priv); - if ((IS_DISPLAY_VER(dev_priv, 5) && dev_priv->wm.pri_latency[1] && + if ((DISPLAY_VER(dev_priv) == 5 && dev_priv->wm.pri_latency[1] && dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) || - (!IS_DISPLAY_VER(dev_priv, 5) && dev_priv->wm.pri_latency[0] && + (DISPLAY_VER(dev_priv) != 5 && dev_priv->wm.pri_latency[0] && dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) { dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm; dev_priv->display.compute_intermediate_wm = @@ -7734,12 +7734,12 @@ void intel_init_pm(struct drm_i915_private *dev_priv) dev_priv->display.update_wm = NULL; } else dev_priv->display.update_wm = pnv_update_wm; - } else if (IS_DISPLAY_VER(dev_priv, 4)) { + } else if (DISPLAY_VER(dev_priv) == 4) { dev_priv->display.update_wm = i965_update_wm; - } else if (IS_DISPLAY_VER(dev_priv, 3)) { + } else if (DISPLAY_VER(dev_priv) == 3) { dev_priv->display.update_wm = i9xx_update_wm; dev_priv->display.get_fifo_size = i9xx_get_fifo_size; - } else if (IS_DISPLAY_VER(dev_priv, 2)) { + } else if (DISPLAY_VER(dev_priv) == 2) { if (INTEL_NUM_PIPES(dev_priv) == 1) { dev_priv->display.update_wm = i845_update_wm; dev_priv->display.get_fifo_size = i845_get_fifo_size; -- cgit From 93babb061e2ab5e486cc83c7024448c1727ce38b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 12 Apr 2021 22:09:54 -0700 Subject: drm/i915: add macros for graphics and media versions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like it was done in commit 01eb15c9165e ("drm/i915: Add DISPLAY_VER() and related macros") add the correspondent macros for graphics and media. Going forward we will prefer checking the versions for the specific IPs (graphics, media and display) rather than grouping everything under a "gen" version. For consistency and to make the maintenance easier, it'd be preferred not to mix the *GEN* macros with the new ones. For older platforms we can simply consider that the previous "gen" number will extend to all 3 IPs. Then we can start replacing its use in the driver. Right now this replacement is not done and only the infrastructure is put in place. We also leave gen and gen_mask inside struct intel_device_info while it's still being used throughout the code. v2: Repurpose IS_{GRAPHICS,MEDIA}_VER() macros to work with a range Signed-off-by: Lucas De Marchi Reviewed-by: José Roberto de Souza Reviewed-by: Jani Nikula [Jani: Minor code comment change while applying.] Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210413051002.92589-5-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 15 ++++++++++++++- drivers/gpu/drm/i915/i915_pci.c | 7 ++++++- drivers/gpu/drm/i915/intel_device_info.h | 3 +++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 907c66efb469..23316c35bf85 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1234,9 +1234,22 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define RUNTIME_INFO(dev_priv) (&(dev_priv)->__runtime) #define DRIVER_CAPS(dev_priv) (&(dev_priv)->caps) -#define INTEL_GEN(dev_priv) (INTEL_INFO(dev_priv)->gen) #define INTEL_DEVID(dev_priv) (RUNTIME_INFO(dev_priv)->device_id) +/* + * Deprecated: this will be replaced by individual IP checks: + * GRAPHICS_VER(), MEDIA_VER() and DISPLAY_VER() + */ +#define INTEL_GEN(dev_priv) (INTEL_INFO(dev_priv)->gen) + +#define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics_ver) +#define IS_GRAPHICS_VER(i915, from, until) \ + (GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until)) + +#define MEDIA_VER(i915) (INTEL_INFO(i915)->media_ver) +#define IS_MEDIA_VER(i915, from, until) \ + (MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until)) + #define DISPLAY_VER(i915) (INTEL_INFO(i915)->display.ver) #define IS_DISPLAY_VER(i915, from, until) \ (DISPLAY_VER(i915) >= (from) && DISPLAY_VER(i915) <= (until)) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ce5cbeaf036d..97ab73276334 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -36,7 +36,12 @@ #include "i915_selftest.h" #define PLATFORM(x) .platform = (x) -#define GEN(x) .gen = (x), .gen_mask = BIT((x) - 1), .display.ver = (x) +#define GEN(x) \ + .gen_mask = BIT((x) - 1), \ + .gen = (x), \ + .graphics_ver = (x), \ + .media_ver = (x), \ + .display.ver = (x) #define I845_PIPE_OFFSETS \ .pipe_offsets = { \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index b16c75927a12..405883a8cc84 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -162,6 +162,9 @@ enum intel_ppgtt_type { struct intel_device_info { u16 gen_mask; + u8 graphics_ver; + u8 media_ver; + u8 gen; u8 gt; /* GT number, 0 if undefined */ intel_engine_mask_t platform_engine_mask; /* Engines supported by the HW */ -- cgit From 1b9d840682965842ca89079201eefbd2e3195575 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 12 Apr 2021 22:09:55 -0700 Subject: drm/i915/gt: replace gen use in intel_engine_cs Start using the new fields graphics_version for the previous gen checks. Here we rename the "gen" field and replace the comparisons using it to start using the new GRAPHICS_VER(). Other uses of INTEL_GEN() were left as is for automatic conversion later. Reviewed-by: Jani Nikula Signed-off-by: Lucas De Marchi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210413051002.92589-6-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 40 ++++++++++++++-------------- drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 18 ++++++------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index efe935f80c1a..6dbdbde00f14 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -45,9 +45,9 @@ struct engine_info { unsigned int hw_id; u8 class; u8 instance; - /* mmio bases table *must* be sorted in reverse gen order */ + /* mmio bases table *must* be sorted in reverse graphics_ver order */ struct engine_mmio_base { - u32 gen : 8; + u32 graphics_ver : 8; u32 base : 24; } mmio_bases[MAX_MMIO_BASES]; }; @@ -58,7 +58,7 @@ static const struct engine_info intel_engines[] = { .class = RENDER_CLASS, .instance = 0, .mmio_bases = { - { .gen = 1, .base = RENDER_RING_BASE } + { .graphics_ver = 1, .base = RENDER_RING_BASE } }, }, [BCS0] = { @@ -66,7 +66,7 @@ static const struct engine_info intel_engines[] = { .class = COPY_ENGINE_CLASS, .instance = 0, .mmio_bases = { - { .gen = 6, .base = BLT_RING_BASE } + { .graphics_ver = 6, .base = BLT_RING_BASE } }, }, [VCS0] = { @@ -74,9 +74,9 @@ static const struct engine_info intel_engines[] = { .class = VIDEO_DECODE_CLASS, .instance = 0, .mmio_bases = { - { .gen = 11, .base = GEN11_BSD_RING_BASE }, - { .gen = 6, .base = GEN6_BSD_RING_BASE }, - { .gen = 4, .base = BSD_RING_BASE } + { .graphics_ver = 11, .base = GEN11_BSD_RING_BASE }, + { .graphics_ver = 6, .base = GEN6_BSD_RING_BASE }, + { .graphics_ver = 4, .base = BSD_RING_BASE } }, }, [VCS1] = { @@ -84,8 +84,8 @@ static const struct engine_info intel_engines[] = { .class = VIDEO_DECODE_CLASS, .instance = 1, .mmio_bases = { - { .gen = 11, .base = GEN11_BSD2_RING_BASE }, - { .gen = 8, .base = GEN8_BSD2_RING_BASE } + { .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE }, + { .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE } }, }, [VCS2] = { @@ -93,7 +93,7 @@ static const struct engine_info intel_engines[] = { .class = VIDEO_DECODE_CLASS, .instance = 2, .mmio_bases = { - { .gen = 11, .base = GEN11_BSD3_RING_BASE } + { .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE } }, }, [VCS3] = { @@ -101,7 +101,7 @@ static const struct engine_info intel_engines[] = { .class = VIDEO_DECODE_CLASS, .instance = 3, .mmio_bases = { - { .gen = 11, .base = GEN11_BSD4_RING_BASE } + { .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE } }, }, [VECS0] = { @@ -109,8 +109,8 @@ static const struct engine_info intel_engines[] = { .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, .mmio_bases = { - { .gen = 11, .base = GEN11_VEBOX_RING_BASE }, - { .gen = 7, .base = VEBOX_RING_BASE } + { .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE }, + { .graphics_ver = 7, .base = VEBOX_RING_BASE } }, }, [VECS1] = { @@ -118,7 +118,7 @@ static const struct engine_info intel_engines[] = { .class = VIDEO_ENHANCEMENT_CLASS, .instance = 1, .mmio_bases = { - { .gen = 11, .base = GEN11_VEBOX2_RING_BASE } + { .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE } }, }, }; @@ -146,9 +146,9 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) switch (class) { case RENDER_CLASS: - switch (INTEL_GEN(gt->i915)) { + switch (GRAPHICS_VER(gt->i915)) { default: - MISSING_CASE(INTEL_GEN(gt->i915)); + MISSING_CASE(GRAPHICS_VER(gt->i915)); return DEFAULT_LR_CONTEXT_RENDER_SIZE; case 12: case 11: @@ -184,8 +184,8 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) */ cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1; drm_dbg(>->i915->drm, - "gen%d CXT_SIZE = %d bytes [0x%08x]\n", - INTEL_GEN(gt->i915), cxt_size * 64, + "graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n", + GRAPHICS_VER(gt->i915), cxt_size * 64, cxt_size - 1); return round_up(cxt_size * 64, PAGE_SIZE); case 3: @@ -201,7 +201,7 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) case VIDEO_DECODE_CLASS: case VIDEO_ENHANCEMENT_CLASS: case COPY_ENGINE_CLASS: - if (INTEL_GEN(gt->i915) < 8) + if (GRAPHICS_VER(gt->i915) < 8) return 0; return GEN8_LR_CONTEXT_OTHER_SIZE; } @@ -213,7 +213,7 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915, int i; for (i = 0; i < MAX_MMIO_BASES; i++) - if (INTEL_GEN(i915) >= bases[i].gen) + if (GRAPHICS_VER(i915) >= bases[i].graphics_ver) break; GEM_BUG_ON(i == MAX_MMIO_BASES); diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index b32814a1f20b..3453eb77c498 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -376,34 +376,34 @@ static int intel_mmio_bases_check(void *arg) u8 prev = U8_MAX; for (j = 0; j < MAX_MMIO_BASES; j++) { - u8 gen = info->mmio_bases[j].gen; + u8 ver = info->mmio_bases[j].graphics_ver; u32 base = info->mmio_bases[j].base; - if (gen >= prev) { - pr_err("%s(%s, class:%d, instance:%d): mmio base for gen %x is before the one for gen %x\n", + if (ver >= prev) { + pr_err("%s(%s, class:%d, instance:%d): mmio base for graphics ver %u is before the one for ver %u\n", __func__, intel_engine_class_repr(info->class), info->class, info->instance, - prev, gen); + prev, ver); return -EINVAL; } - if (gen == 0) + if (ver == 0) break; if (!base) { - pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for gen %x at entry %u\n", + pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for graphics ver %u at entry %u\n", __func__, intel_engine_class_repr(info->class), info->class, info->instance, - base, gen, j); + base, ver, j); return -EINVAL; } - prev = gen; + prev = ver; } - pr_debug("%s: min gen supported for %s%d is %d\n", + pr_debug("%s: min graphics version supported for %s%d is %u\n", __func__, intel_engine_class_repr(info->class), info->instance, -- cgit From 3e6e4c21df34c16820ca68425f90d5b2741a8a39 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 12 Apr 2021 22:09:56 -0700 Subject: drm/i915/selftests: replace unused mask with simple version Since its introduction 2 years ago, we never used the mask to span more than one gen. Replace gen_mask a single number and start using the new GRAPHICS_VER(). Reviewed-by: Jani Nikula Signed-off-by: Lucas De Marchi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210413051002.92589-7-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 19850489a3fc..64937ec3f2dc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -927,7 +927,7 @@ err_batch: struct regmask { i915_reg_t reg; - unsigned long gen_mask; + u8 graphics_ver; }; static bool find_reg(struct drm_i915_private *i915, @@ -938,7 +938,7 @@ static bool find_reg(struct drm_i915_private *i915, u32 offset = i915_mmio_reg_offset(reg); while (count--) { - if (INTEL_INFO(i915)->gen_mask & tbl->gen_mask && + if (GRAPHICS_VER(i915) == tbl->graphics_ver && i915_mmio_reg_offset(tbl->reg) == offset) return true; tbl++; @@ -951,8 +951,8 @@ static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg) { /* Alas, we must pardon some whitelists. Mistakes already made */ static const struct regmask pardon[] = { - { GEN9_CTX_PREEMPT_REG, INTEL_GEN_MASK(9, 9) }, - { GEN8_L3SQCREG4, INTEL_GEN_MASK(9, 9) }, + { GEN9_CTX_PREEMPT_REG, 9 }, + { GEN8_L3SQCREG4, 9 }, }; return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon)); @@ -974,7 +974,7 @@ static bool writeonly_reg(struct drm_i915_private *i915, i915_reg_t reg) { /* Some registers do not seem to behave and our writes unreadable */ static const struct regmask wo[] = { - { GEN9_SLICE_COMMON_ECO_CHICKEN1, INTEL_GEN_MASK(9, 9) }, + { GEN9_SLICE_COMMON_ECO_CHICKEN1, 9 }, }; return find_reg(i915, reg, wo, ARRAY_SIZE(wo)); -- cgit From 33adf482af73ffdde00a25ba15d22b4047cc78f2 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 12 Apr 2021 22:09:57 -0700 Subject: drm/i915/selftests: eliminate use of gen_mask Remove the remaining uses of INTEL_GEN_MASK() and the correspondent gen_mask in struct intel_device_info. This will allow the removal of gen_mask later since it's incompatible with the new per-IP versioning scheme. Reviewed-by: Jani Nikula Signed-off-by: Lucas De Marchi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210413051002.92589-8-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 8 +++++--- drivers/gpu/drm/i915/selftests/intel_uncore.c | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 661b50191f2b..ed5abe7be498 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2008,12 +2008,14 @@ void intel_uncore_fini_mmio(struct intel_uncore *uncore) static const struct reg_whitelist { i915_reg_t offset_ldw; i915_reg_t offset_udw; - u16 gen_mask; + u8 min_graphics_ver; + u8 max_graphics_ver; u8 size; } reg_read_whitelist[] = { { .offset_ldw = RING_TIMESTAMP(RENDER_RING_BASE), .offset_udw = RING_TIMESTAMP_UDW(RENDER_RING_BASE), - .gen_mask = INTEL_GEN_MASK(4, 12), + .min_graphics_ver = 4, + .max_graphics_ver = 12, .size = 8 } }; @@ -2038,7 +2040,7 @@ int i915_reg_read_ioctl(struct drm_device *dev, GEM_BUG_ON(entry->size > 8); GEM_BUG_ON(entry_offset & (entry->size - 1)); - if (INTEL_INFO(i915)->gen_mask & entry->gen_mask && + if (IS_GRAPHICS_VER(i915, entry->min_graphics_ver, entry->max_graphics_ver) && entry_offset == (reg->offset & -entry->size)) break; entry++; diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 0e4e6be0101d..f76c9bcec735 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -125,17 +125,19 @@ static int live_forcewake_ops(void *arg) { static const struct reg { const char *name; + u8 min_graphics_ver; + u8 max_graphics_ver; unsigned long platforms; unsigned int offset; } registers[] = { { "RING_START", - INTEL_GEN_MASK(6, 7), + 6, 7, 0x38, }, { "RING_MI_MODE", - INTEL_GEN_MASK(8, BITS_PER_LONG), + 8, U8_MAX, 0x9c, } }; @@ -170,7 +172,7 @@ static int live_forcewake_ops(void *arg) /* We have to pick carefully to get the exact behaviour we need */ for (r = registers; r->name; r++) - if (r->platforms & INTEL_INFO(gt->i915)->gen_mask) + if (IS_GRAPHICS_VER(gt->i915, r->min_graphics_ver, r->max_graphics_ver)) break; if (!r->name) { pr_debug("Forcewaked register not known for %s; skipping\n", -- cgit From 58e0d8a0f529761a77acd220f70ebef38bcc3bfe Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 12 Apr 2021 22:09:58 -0700 Subject: drm/i915: finish removal of gen_mask Now that it's not used anywhere, remove it from struct intel_device_info. To allow a period in which code will be converted to the new macro, keep IS_GEN_RANGE() around, just redefining it to use the new fields. The size advantage from IS_GEN_RANGE() using a mask is not that big as it has pretty limited use througout the driver: text data bss dec hex filename 2758497 95965 6496 2860958 2ba79e drivers/gpu/drm/i915/i915.ko.old 2758586 95953 6496 2861035 2ba7eb drivers/gpu/drm/i915/i915.ko.new Reviewed-by: Jani Nikula Signed-off-by: Lucas De Marchi [Jani: Minor code comment change while applying.] Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210413051002.92589-9-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 2 -- drivers/gpu/drm/i915/i915_drv.h | 14 +++++--------- drivers/gpu/drm/i915/i915_pci.c | 1 - drivers/gpu/drm/i915/intel_device_info.h | 2 -- 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c2329bc44f55..91cbe2060f10 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -768,8 +768,6 @@ i915_driver_create(struct pci_dev *pdev, const struct pci_device_id *ent) memcpy(device_info, match_info, sizeof(*device_info)); RUNTIME_INFO(i915)->device_id = pdev->device; - BUG_ON(device_info->gen > BITS_PER_TYPE(device_info->gen_mask)); - return i915; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 23316c35bf85..b022e3d7d296 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1241,6 +1241,11 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) * GRAPHICS_VER(), MEDIA_VER() and DISPLAY_VER() */ #define INTEL_GEN(dev_priv) (INTEL_INFO(dev_priv)->gen) +/* + * Deprecated: use IS_GRAPHICS_VER(), IS_MEDIA_VER() and IS_DISPLAY_VER() as + * appropriate. + */ +#define IS_GEN_RANGE(dev_priv, s, e) IS_GRAPHICS_VER(dev_priv, (s), (e)) #define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics_ver) #define IS_GRAPHICS_VER(i915, from, until) \ @@ -1257,15 +1262,6 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define REVID_FOREVER 0xff #define INTEL_REVID(dev_priv) (to_pci_dev((dev_priv)->drm.dev)->revision) -#define INTEL_GEN_MASK(s, e) ( \ - BUILD_BUG_ON_ZERO(!__builtin_constant_p(s)) + \ - BUILD_BUG_ON_ZERO(!__builtin_constant_p(e)) + \ - GENMASK((e) - 1, (s) - 1)) - -/* Returns true if Gen is in inclusive range [Start, End] */ -#define IS_GEN_RANGE(dev_priv, s, e) \ - (!!(INTEL_INFO(dev_priv)->gen_mask & INTEL_GEN_MASK((s), (e)))) - #define IS_GEN(dev_priv, n) \ (BUILD_BUG_ON_ZERO(!__builtin_constant_p(n)) + \ INTEL_INFO(dev_priv)->gen == (n)) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 97ab73276334..3b9cd1af0f28 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -37,7 +37,6 @@ #define PLATFORM(x) .platform = (x) #define GEN(x) \ - .gen_mask = BIT((x) - 1), \ .gen = (x), \ .graphics_ver = (x), \ .media_ver = (x), \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 405883a8cc84..b8f7b996f140 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -160,8 +160,6 @@ enum intel_ppgtt_type { func(supports_tv); struct intel_device_info { - u16 gen_mask; - u8 graphics_ver; u8 media_ver; -- cgit From 8802190f642034a919c8e757987c964d3d9df39f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 12 Apr 2021 22:09:59 -0700 Subject: drm/i915: eliminate remaining uses of intel_device_info->gen Replace gen with the new graphics_ver value and use GRAPHICS_VER() in those places. Reviewed-by: Jani Nikula Signed-off-by: Lucas De Marchi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210413051002.92589-10-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 22 +++++++++++----------- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/intel_device_info.c | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 5964e67c7d36..297143511f99 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -274,7 +274,7 @@ struct i915_execbuffer { struct drm_mm_node node; /** temporary GTT binding */ unsigned long vaddr; /** Current kmap address */ unsigned long page; /** Currently mapped page index */ - unsigned int gen; /** Cached value of INTEL_GEN */ + unsigned int graphics_ver; /** Cached value of GRAPHICS_VER */ bool use_64bit_reloc : 1; bool has_llc : 1; bool has_fence : 1; @@ -1049,10 +1049,10 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->page = -1; cache->vaddr = 0; /* Must be a variable in the struct to allow GCC to unroll. */ - cache->gen = INTEL_GEN(i915); + cache->graphics_ver = GRAPHICS_VER(i915); cache->has_llc = HAS_LLC(i915); cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); - cache->has_fence = cache->gen < 4; + cache->has_fence = cache->graphics_ver < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; reloc_cache_clear(cache); @@ -1402,7 +1402,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, err = eb->engine->emit_bb_start(rq, batch->node.start, PAGE_SIZE, - cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); + cache->graphics_ver > 5 ? 0 : I915_DISPATCH_SECURE); if (err) goto skip_request; @@ -1503,14 +1503,14 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, u64 offset, u64 target_addr) { - const unsigned int gen = eb->reloc_cache.gen; + const unsigned int ver = eb->reloc_cache.graphics_ver; unsigned int len; u32 *batch; u64 addr; - if (gen >= 8) + if (ver >= 8) len = offset & 7 ? 8 : 5; - else if (gen >= 4) + else if (ver >= 4) len = 4; else len = 3; @@ -1522,7 +1522,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, return false; addr = gen8_canonical_addr(vma->node.start + offset); - if (gen >= 8) { + if (ver >= 8) { if (offset & 7) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = lower_32_bits(addr); @@ -1542,7 +1542,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, *batch++ = lower_32_bits(target_addr); *batch++ = upper_32_bits(target_addr); } - } else if (gen >= 6) { + } else if (ver >= 6) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = 0; *batch++ = addr; @@ -1552,12 +1552,12 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, *batch++ = 0; *batch++ = vma_phys_addr(vma, offset); *batch++ = target_addr; - } else if (gen >= 4) { + } else if (ver >= 4) { *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *batch++ = 0; *batch++ = addr; *batch++ = target_addr; - } else if (gen >= 3 && + } else if (ver >= 3 && !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) { *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; *batch++ = addr; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 91cbe2060f10..3286bcd0a6d5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -794,7 +794,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return PTR_ERR(i915); /* Disable nuclear pageflip by default on pre-ILK */ - if (!i915->params.nuclear_pageflip && match_info->gen < 5) + if (!i915->params.nuclear_pageflip && match_info->graphics_ver < 5) i915->drm.driver_features &= ~DRIVER_ATOMIC; /* diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index de02207f6ec6..b58bc7bff65e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -95,7 +95,7 @@ static const char *iommu_name(void) void intel_device_info_print_static(const struct intel_device_info *info, struct drm_printer *p) { - drm_printf(p, "gen: %d\n", info->gen); + drm_printf(p, "graphics_ver: %u\n", info->graphics_ver); drm_printf(p, "gt: %d\n", info->gt); drm_printf(p, "iommu: %s\n", iommu_name()); drm_printf(p, "memory-regions: %x\n", info->memory_regions); -- cgit From 7b36b7d3be3ebcfcf28e61cc65868e277aa9f36a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 12 Apr 2021 22:10:00 -0700 Subject: drm/i915: finish removal of gen from intel_device_info Now that it's not being used anymore, finish its removal. Like for gen_mask, we replace INTEL_GEN() and IS_GEN() macros to use the new field. Reviewed-by: Jani Nikula Signed-off-by: Lucas De Marchi [Jani: Minor code comment change while applying.] Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210413051002.92589-11-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 10 +++++----- drivers/gpu/drm/i915/i915_pci.c | 1 - drivers/gpu/drm/i915/intel_device_info.h | 1 - drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b022e3d7d296..e20294e9227a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1240,12 +1240,16 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) * Deprecated: this will be replaced by individual IP checks: * GRAPHICS_VER(), MEDIA_VER() and DISPLAY_VER() */ -#define INTEL_GEN(dev_priv) (INTEL_INFO(dev_priv)->gen) +#define INTEL_GEN(dev_priv) GRAPHICS_VER(dev_priv) /* * Deprecated: use IS_GRAPHICS_VER(), IS_MEDIA_VER() and IS_DISPLAY_VER() as * appropriate. */ #define IS_GEN_RANGE(dev_priv, s, e) IS_GRAPHICS_VER(dev_priv, (s), (e)) +/* + * Deprecated: use GRAPHICS_VER(), MEDIA_VER() and DISPLAY_VER() as appropriate. + */ +#define IS_GEN(dev_priv, n) (GRAPHICS_VER(dev_priv) == (n)) #define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics_ver) #define IS_GRAPHICS_VER(i915, from, until) \ @@ -1262,10 +1266,6 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define REVID_FOREVER 0xff #define INTEL_REVID(dev_priv) (to_pci_dev((dev_priv)->drm.dev)->revision) -#define IS_GEN(dev_priv, n) \ - (BUILD_BUG_ON_ZERO(!__builtin_constant_p(n)) + \ - INTEL_INFO(dev_priv)->gen == (n)) - #define HAS_DSB(dev_priv) (INTEL_INFO(dev_priv)->display.has_dsb) /* diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 3b9cd1af0f28..1453c1436f31 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -37,7 +37,6 @@ #define PLATFORM(x) .platform = (x) #define GEN(x) \ - .gen = (x), \ .graphics_ver = (x), \ .media_ver = (x), \ .display.ver = (x) diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index b8f7b996f140..8ab4fa6c7fdd 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -163,7 +163,6 @@ struct intel_device_info { u8 graphics_ver; u8 media_ver; - u8 gen; u8 gt; /* GT number, 0 if undefined */ intel_engine_mask_t platform_engine_mask; /* Engines supported by the HW */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 0188f877cab2..2ffc763fe90d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -162,7 +162,7 @@ struct drm_i915_private *mock_gem_device(void) /* Using the global GTT may ask questions about KMS users, so prepare */ drm_mode_config_init(&i915->drm); - mkwrite_device_info(i915)->gen = -1; + mkwrite_device_info(i915)->graphics_ver = -1; mkwrite_device_info(i915)->page_sizes = I915_GTT_PAGE_SIZE_4K | -- cgit From 1ab52b4fc008bb0ea4a3a279aadd05f8a24dd79f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 12 Apr 2021 22:10:01 -0700 Subject: drm/i915: add media and display versions to device_info print Since we are now converting from a single gen version to graphics_ver, media_ver and display_ver, add the last 2 when printing the device info. Reviewed-by: Jani Nikula Signed-off-by: Lucas De Marchi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210413051002.92589-12-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_device_info.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index b58bc7bff65e..6a351a709417 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -96,6 +96,8 @@ void intel_device_info_print_static(const struct intel_device_info *info, struct drm_printer *p) { drm_printf(p, "graphics_ver: %u\n", info->graphics_ver); + drm_printf(p, "media_ver: %u\n", info->media_ver); + drm_printf(p, "display_ver: %u\n", info->display.ver); drm_printf(p, "gt: %d\n", info->gt); drm_printf(p, "iommu: %s\n", iommu_name()); drm_printf(p, "memory-regions: %x\n", info->memory_regions); -- cgit From 425390c5dce6da76578389629d19517fcd79c959 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 12 Apr 2021 22:10:02 -0700 Subject: drm/i915: split dgfx features from gen 12 Make them independent so we can use DGFX_FEATURES more generically. For future platforms that do not use the GEN nomenclature we will define graphics, media and display separately, so we avoid setting graphics_ver with the GEN() macro. Reviewed-by: Jani Nikula Signed-off-by: Lucas De Marchi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210413051002.92589-13-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 1453c1436f31..44e7b94db63d 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -907,8 +907,7 @@ static const struct intel_device_info rkl_info = { BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0), }; -#define GEN12_DGFX_FEATURES \ - GEN12_FEATURES, \ +#define DGFX_FEATURES \ .memory_regions = REGION_SMEM | REGION_LMEM, \ .has_master_unit_irq = 1, \ .has_llc = 0, \ @@ -916,7 +915,8 @@ static const struct intel_device_info rkl_info = { .is_dgfx = 1 static const struct intel_device_info dg1_info __maybe_unused = { - GEN12_DGFX_FEATURES, + GEN12_FEATURES, + DGFX_FEATURES, PLATFORM(INTEL_DG1), .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), .require_force_probe = 1, -- cgit From e42e7e585984b85b0fb9dd1fefc85ee4800ca629 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 13 Apr 2021 02:24:12 +0300 Subject: drm/i915: Fix modesetting in case of unexpected AUX timeouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case AUX failures happen unexpectedly during a modeset, the driver should still complete the modeset. In particular the driver should perform the link training sequence steps even in case of an AUX failure, as this sequence also includes port initialization steps. Not doing that can leave the port/pipe in a broken state and lead for instance to a flip done timeout. Fix this by continuing with link training (in a no-LTTPR mode) if the DPRX DPCD readout failed for some reason at the beginning of link training. After a successful connector detection we already have the DPCD read out and cached, so the failed repeated read for it should not cause a problem. Note that a partial AUX read could in theory partly overwrite the cached DPCD (and return error) but this overwrite should not happen if the returned values are corrupted (due to a timeout or some other IO error). Kudos to Ville to root cause the problem. Fixes: 264613b406eb ("drm/i915: Disable LTTPR support when the DPCD rev < 1.4") References: https://gitlab.freedesktop.org/drm/intel/-/issues/3308 Cc: stable@vger.kernel.org # 5.11 Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210412232413.2755054-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp_link_training.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 5e9c3c74310c..cbcfb0c4c370 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -882,7 +882,8 @@ void intel_dp_start_link_train(struct intel_dp *intel_dp, int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp); if (lttpr_count < 0) - return; + /* Still continue with enabling the port and link training. */ + lttpr_count = 0; if (!intel_dp_link_train_all_phys(intel_dp, crtc_state, lttpr_count)) intel_dp_schedule_fallback_link_training(intel_dp, crtc_state); -- cgit From 770d4c71eca7285216dc862849564b33c0d6a210 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 13 Apr 2021 02:24:13 +0300 Subject: drm/i915: Drop redundant address-of op before lttpr_common_caps array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The address-of op in front of an array is just an alias to using the array on its own, so drop the op. Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210412232413.2755054-2-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_dp_link_training.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index cbcfb0c4c370..7f684d33314f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -37,7 +37,7 @@ intel_dp_dump_link_status(struct drm_device *drm, static void intel_dp_reset_lttpr_common_caps(struct intel_dp *intel_dp) { - memset(&intel_dp->lttpr_common_caps, 0, sizeof(intel_dp->lttpr_common_caps)); + memset(intel_dp->lttpr_common_caps, 0, sizeof(intel_dp->lttpr_common_caps)); } static void intel_dp_reset_lttpr_count(struct intel_dp *intel_dp) -- cgit From 1884b579c0cfbb52a92462184406558ac633cafb Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Fri, 9 Apr 2021 16:17:38 -0700 Subject: drm/i915/display/psr: Fix cppcheck warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix redundant condition, caught in cppcheck by kernel test robot. Reported-by: kernel test robot Cc: Gwan-gyeong Mun Fixes: b64d6c51380b ("drm/i915/display: Support PSR Multiple Instances") Signed-off-by: José Roberto de Souza Reviewed-by: Harish Chegondi Link: https://patchwork.freedesktop.org/patch/msgid/20210409231738.238682-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index bf8e4ede2a6c..10a2e8fae333 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1518,8 +1518,7 @@ void intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state) u32 psr_status; mutex_lock(&intel_dp->psr.lock); - if (!intel_dp->psr.enabled || - (intel_dp->psr.enabled && intel_dp->psr.psr2_enabled)) { + if (!intel_dp->psr.enabled || intel_dp->psr.psr2_enabled) { mutex_unlock(&intel_dp->psr.lock); continue; } -- cgit From f7c475b8dfc23d461a47dfac5e498f8cc96faea5 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 24 Feb 2021 11:28:08 +0800 Subject: drm/ttm: Do not add non-system domain BO into swap list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BO would be added into swap list if it is validated into system domain. If BO is validated again into non-system domain, say, VRAM domain. It actually should not be in the swap list. Acked-by: Alex Deucher Signed-off-by: xinhui pan Signed-off-by: Alex Deucher Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210224032808.150465-1-xinhui.pan@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 101a68dc615b..799ec7a7caa4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -153,6 +153,8 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, swap = &ttm_bo_glob.swap_lru[bo->priority]; list_move_tail(&bo->swap, swap); + } else { + list_del_init(&bo->swap); } if (bdev->driver->del_from_lru_notify) -- cgit From b876e79d7bffb2e6f78c291ddcd1f76c4e3fe5c5 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 1 Apr 2021 18:40:43 +0300 Subject: drm/i915: Extract intel_adjusted_rate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract a small helper to calculate the downscaling adjusted pixel rate/data rate/etc. v2: Drop the plane visibility check and add a comment explaining why Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210401154043.19466-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_atomic_plane.c | 36 ++++++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index c3f2962aa1eb..07fcfec58c49 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -133,25 +133,45 @@ intel_plane_destroy_state(struct drm_plane *plane, kfree(plane_state); } -unsigned int intel_plane_pixel_rate(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static unsigned int intel_adjusted_rate(const struct drm_rect *src, + const struct drm_rect *dst, + unsigned int rate) { unsigned int src_w, src_h, dst_w, dst_h; - unsigned int pixel_rate = crtc_state->pixel_rate; - src_w = drm_rect_width(&plane_state->uapi.src) >> 16; - src_h = drm_rect_height(&plane_state->uapi.src) >> 16; - dst_w = drm_rect_width(&plane_state->uapi.dst); - dst_h = drm_rect_height(&plane_state->uapi.dst); + src_w = drm_rect_width(src) >> 16; + src_h = drm_rect_height(src) >> 16; + dst_w = drm_rect_width(dst); + dst_h = drm_rect_height(dst); /* Downscaling limits the maximum pixel rate */ dst_w = min(src_w, dst_w); dst_h = min(src_h, dst_h); - return DIV_ROUND_UP_ULL(mul_u32_u32(pixel_rate, src_w * src_h), + return DIV_ROUND_UP_ULL(mul_u32_u32(rate, src_w * src_h), dst_w * dst_h); } +unsigned int intel_plane_pixel_rate(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + /* + * Note we don't check for plane visibility here as + * we want to use this when calculating the cursor + * watermarks even if the cursor is fully offscreen. + * That depends on the src/dst rectangles being + * correctly populated whenever the watermark code + * considers the cursor to be visible, whether or not + * it is actually visible. + * + * See: intel_wm_plane_visible() and intel_check_cursor() + */ + + return intel_adjusted_rate(&plane_state->uapi.src, + &plane_state->uapi.dst, + crtc_state->pixel_rate); +} + unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { -- cgit From 4028988ea52971891e8b9c672673fd0de53bcf61 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 30 Mar 2021 21:42:54 +0300 Subject: drm/i915: Reuse intel_adjusted_rate() for pfit pixel rate adjustment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the hand rolled pfit downscale calculations with intel_adjusted_rate(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210330184254.6290-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_atomic_plane.c | 6 +++--- drivers/gpu/drm/i915/display/intel_atomic_plane.h | 4 ++++ drivers/gpu/drm/i915/display/intel_display.c | 23 ++++++----------------- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index 07fcfec58c49..7bfb26ca0bd0 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -133,9 +133,9 @@ intel_plane_destroy_state(struct drm_plane *plane, kfree(plane_state); } -static unsigned int intel_adjusted_rate(const struct drm_rect *src, - const struct drm_rect *dst, - unsigned int rate) +unsigned int intel_adjusted_rate(const struct drm_rect *src, + const struct drm_rect *dst, + unsigned int rate) { unsigned int src_w, src_h, dst_w, dst_h; diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h index 5c78a087ed86..dc4d05e75e1c 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h @@ -10,6 +10,7 @@ struct drm_plane; struct drm_property; +struct drm_rect; struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; @@ -18,6 +19,9 @@ struct intel_plane_state; extern const struct drm_plane_helper_funcs intel_plane_helper_funcs; +unsigned int intel_adjusted_rate(const struct drm_rect *src, + const struct drm_rect *dst, + unsigned int rate); unsigned int intel_plane_pixel_rate(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 411b46c012f8..67691f593ee6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3984,7 +3984,7 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *crtc_state) { u32 pixel_rate = crtc_state->hw.pipe_mode.crtc_clock; - unsigned int pipe_w, pipe_h, pfit_w, pfit_h; + struct drm_rect src; /* * We only use IF-ID interlacing. If we ever use @@ -3994,23 +3994,12 @@ static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *crtc_state) if (!crtc_state->pch_pfit.enabled) return pixel_rate; - pipe_w = crtc_state->pipe_src_w; - pipe_h = crtc_state->pipe_src_h; + drm_rect_init(&src, 0, 0, + crtc_state->pipe_src_w << 16, + crtc_state->pipe_src_h << 16); - pfit_w = drm_rect_width(&crtc_state->pch_pfit.dst); - pfit_h = drm_rect_height(&crtc_state->pch_pfit.dst); - - if (pipe_w < pfit_w) - pipe_w = pfit_w; - if (pipe_h < pfit_h) - pipe_h = pfit_h; - - if (drm_WARN_ON(crtc_state->uapi.crtc->dev, - !pfit_w || !pfit_h)) - return pixel_rate; - - return div_u64(mul_u32_u32(pixel_rate, pipe_w * pipe_h), - pfit_w * pfit_h); + return intel_adjusted_rate(&src, &crtc_state->pch_pfit.dst, + pixel_rate); } static void intel_mode_from_crtc_timings(struct drm_display_mode *mode, -- cgit From c6deb5e97ded2623dfaec9b15a9bdd1b56464735 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 13 Apr 2021 12:32:59 -0500 Subject: drm/i915/pm: Make the wm parameter of print_wm_latency a pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following build error with GCC 11: In function ‘snb_wm_latency_quirk’, inlined from ‘ilk_setup_wm_latency’ at drivers/gpu/drm/i915/intel_pm.c:3109:3, inlined from ‘intel_init_pm’ at drivers/gpu/drm/i915/intel_pm.c:7695:3: drivers/gpu/drm/i915/intel_pm.c:3058:9: error: ‘intel_print_wm_latency’ reading 16 bytes from a region of size 10 [-Werror=stringop-overread] 3058 | intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/intel_pm.c: In function ‘intel_init_pm’: drivers/gpu/drm/i915/intel_pm.c:3058:9: note: referencing argument 3 of type ‘const u16 *’ {aka ‘const short unsigned int *’} drivers/gpu/drm/i915/intel_pm.c:2995:13: note: in a call to function ‘intel_print_wm_latency’ 2995 | static void intel_print_wm_latency(struct drm_i915_private *dev_priv, | ^~~~~~~~~~~~~~~~~~~~~~ As far as I can tell, we don't actually need 8 elements except on SKL and that uses dev_priv->wm.skl_latency which has enough. Signed-off-by: Jason Ekstrand Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210413173259.472405-1-jason@jlekstrand.net Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index eaf4c072ade0..105bce19d1df 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2994,7 +2994,7 @@ int ilk_wm_max_level(const struct drm_i915_private *dev_priv) static void intel_print_wm_latency(struct drm_i915_private *dev_priv, const char *name, - const u16 wm[8]) + const u16 *wm) { int level, max_level = ilk_wm_max_level(dev_priv); -- cgit From 103b8cbac28ea2965f24ca52e6a92d76d3851b3c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Apr 2021 09:02:24 +0300 Subject: drm/i915: fix an error code in intel_overlay_do_put_image() This code should propagate the error from intel_overlay_pin_fb() but currently it returns success. Fixes: 1b321026e213 ("drm/i915: Pass ww ctx to intel_pin_to_display_plane") Signed-off-by: Dan Carpenter Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/YHaFcEzcnh/hk1/Q@mwanda --- drivers/gpu/drm/i915/display/intel_overlay.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index d1255911a327..fffbde4256db 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -803,8 +803,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, atomic_inc(&dev_priv->gpu_error.pending_fb_pin); vma = intel_overlay_pin_fb(new_bo); - if (IS_ERR(vma)) + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto out_pin_section; + } i915_gem_object_flush_frontbuffer(new_bo, ORIGIN_DIRTYFB); -- cgit From 708de86e10e59352eb57a3ad6a5a524d8f5c9f13 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 16 Apr 2021 20:10:04 +0300 Subject: drm/i915: Collect dbuf device info into a sub-struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collect the related dbuf information into a struct. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210416171011.19012-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_power.c | 4 ++-- drivers/gpu/drm/i915/i915_pci.c | 16 ++++++++-------- drivers/gpu/drm/i915/intel_device_info.h | 6 ++++-- drivers/gpu/drm/i915/intel_pm.c | 14 +++++++------- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 0af1dee1ac95..0e433a0e1fce 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4777,7 +4777,7 @@ static void gen9_dbuf_slice_set(struct drm_i915_private *dev_priv, void gen9_dbuf_slices_update(struct drm_i915_private *dev_priv, u8 req_slices) { - int num_slices = INTEL_INFO(dev_priv)->num_supported_dbuf_slices; + int num_slices = INTEL_INFO(dev_priv)->dbuf.num_slices; struct i915_power_domains *power_domains = &dev_priv->power_domains; enum dbuf_slice slice; @@ -4825,7 +4825,7 @@ static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) static void gen12_dbuf_slices_config(struct drm_i915_private *dev_priv) { - const int num_slices = INTEL_INFO(dev_priv)->num_supported_dbuf_slices; + int num_slices = INTEL_INFO(dev_priv)->dbuf.num_slices; enum dbuf_slice slice; for (slice = DBUF_S1; slice < (DBUF_S1 + num_slices); slice++) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 44e7b94db63d..484d2633894a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -647,8 +647,8 @@ static const struct intel_device_info chv_info = { .has_gt_uc = 1, \ .display.has_hdcp = 1, \ .display.has_ipc = 1, \ - .ddb_size = 896, \ - .num_supported_dbuf_slices = 1 + .dbuf.size = 896, \ + .dbuf.num_slices = 1 #define SKL_PLATFORM \ GEN9_FEATURES, \ @@ -683,7 +683,7 @@ static const struct intel_device_info skl_gt4_info = { #define GEN9_LP_FEATURES \ GEN(9), \ .is_lp = 1, \ - .num_supported_dbuf_slices = 1, \ + .dbuf.num_slices = 1, \ .display.has_hotplug = 1, \ .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \ @@ -720,14 +720,14 @@ static const struct intel_device_info skl_gt4_info = { static const struct intel_device_info bxt_info = { GEN9_LP_FEATURES, PLATFORM(INTEL_BROXTON), - .ddb_size = 512, + .dbuf.size = 512, }; static const struct intel_device_info glk_info = { GEN9_LP_FEATURES, PLATFORM(INTEL_GEMINILAKE), .display.ver = 10, - .ddb_size = 1024, + .dbuf.size = 1024, GLK_COLORS, }; @@ -790,7 +790,7 @@ static const struct intel_device_info cml_gt2_info = { #define GEN10_FEATURES \ GEN9_FEATURES, \ GEN(10), \ - .ddb_size = 1024, \ + .dbuf.size = 1024, \ .display.has_dsc = 1, \ .has_coherent_ggtt = false, \ GLK_COLORS @@ -830,8 +830,8 @@ static const struct intel_device_info cnl_info = { [TRANSCODER_DSI_1] = TRANSCODER_DSI1_OFFSET, \ }, \ GEN(11), \ - .ddb_size = 2048, \ - .num_supported_dbuf_slices = 2, \ + .dbuf.size = 2048, \ + .dbuf.num_slices = 2, \ .has_logical_ring_elsq = 1, \ .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 } diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 8ab4fa6c7fdd..74591e4f9c44 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -196,8 +196,10 @@ struct intel_device_info { #undef DEFINE_FLAG } display; - u16 ddb_size; /* in blocks */ - u8 num_supported_dbuf_slices; /* number of DBuf slices */ + struct { + u16 size; /* in blocks */ + u8 num_slices; + } dbuf; /* Register offsets for the various display pipes and transcoders */ int pipe_offsets[I915_MAX_TRANSCODERS]; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 105bce19d1df..02d64db2c591 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3637,10 +3637,10 @@ bool ilk_disable_lp_wm(struct drm_i915_private *dev_priv) u8 intel_enabled_dbuf_slices_mask(struct drm_i915_private *dev_priv) { int i; - int max_slices = INTEL_INFO(dev_priv)->num_supported_dbuf_slices; + int num_slices = INTEL_INFO(dev_priv)->dbuf.num_slices; u8 enabled_slices_mask = 0; - for (i = 0; i < max_slices; i++) { + for (i = 0; i < num_slices; i++) { if (intel_uncore_read(&dev_priv->uncore, DBUF_CTL_S(i)) & DBUF_POWER_STATE) enabled_slices_mask |= BIT(i); } @@ -4030,7 +4030,7 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) static int intel_dbuf_size(struct drm_i915_private *dev_priv) { - int ddb_size = INTEL_INFO(dev_priv)->ddb_size; + int ddb_size = INTEL_INFO(dev_priv)->dbuf.size; drm_WARN_ON(&dev_priv->drm, ddb_size == 0); @@ -4043,7 +4043,7 @@ static int intel_dbuf_size(struct drm_i915_private *dev_priv) static int intel_dbuf_slice_size(struct drm_i915_private *dev_priv) { return intel_dbuf_size(dev_priv) / - INTEL_INFO(dev_priv)->num_supported_dbuf_slices; + INTEL_INFO(dev_priv)->dbuf.num_slices; } static void @@ -4070,8 +4070,8 @@ u32 skl_ddb_dbuf_slice_mask(struct drm_i915_private *dev_priv, { u32 slice_mask = 0; u16 ddb_size = intel_dbuf_size(dev_priv); - u16 num_supported_slices = INTEL_INFO(dev_priv)->num_supported_dbuf_slices; - u16 slice_size = ddb_size / num_supported_slices; + int num_slices = INTEL_INFO(dev_priv)->dbuf.num_slices; + u16 slice_size = ddb_size / num_slices; u16 start_slice; u16 end_slice; @@ -5828,7 +5828,7 @@ skl_compute_ddb(struct intel_atomic_state *state) "Enabled dbuf slices 0x%x -> 0x%x (out of %d dbuf slices)\n", old_dbuf_state->enabled_slices, new_dbuf_state->enabled_slices, - INTEL_INFO(dev_priv)->num_supported_dbuf_slices); + INTEL_INFO(dev_priv)->dbuf.num_slices); } for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { -- cgit From 77531b0ef621b62e5164d33411106a3415eb7b67 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 16 Apr 2021 20:10:05 +0300 Subject: drm/i915: Handle dbuf bypass path allocation earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We always reserve the same 4 dbuf blocks for the bypass path allocation, so might as well do that when declaring the dbuf size. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210416171011.19012-3-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/i915_pci.c | 8 ++++---- drivers/gpu/drm/i915/intel_pm.c | 9 +-------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 484d2633894a..8b04fe35e51a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -647,7 +647,7 @@ static const struct intel_device_info chv_info = { .has_gt_uc = 1, \ .display.has_hdcp = 1, \ .display.has_ipc = 1, \ - .dbuf.size = 896, \ + .dbuf.size = 896 - 4, /* 4 blocks for bypass path allocation */ \ .dbuf.num_slices = 1 #define SKL_PLATFORM \ @@ -720,14 +720,14 @@ static const struct intel_device_info skl_gt4_info = { static const struct intel_device_info bxt_info = { GEN9_LP_FEATURES, PLATFORM(INTEL_BROXTON), - .dbuf.size = 512, + .dbuf.size = 512 - 4, /* 4 blocks for bypass path allocation */ }; static const struct intel_device_info glk_info = { GEN9_LP_FEATURES, PLATFORM(INTEL_GEMINILAKE), .display.ver = 10, - .dbuf.size = 1024, + .dbuf.size = 1024 - 4, /* 4 blocks for bypass path allocation */ GLK_COLORS, }; @@ -790,7 +790,7 @@ static const struct intel_device_info cml_gt2_info = { #define GEN10_FEATURES \ GEN9_FEATURES, \ GEN(10), \ - .dbuf.size = 1024, \ + .dbuf.size = 1024 - 4, /* 4 blocks for bypass path allocation */ \ .display.has_dsc = 1, \ .has_coherent_ggtt = false, \ GLK_COLORS diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 02d64db2c591..ad3344ec6284 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4030,14 +4030,7 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) static int intel_dbuf_size(struct drm_i915_private *dev_priv) { - int ddb_size = INTEL_INFO(dev_priv)->dbuf.size; - - drm_WARN_ON(&dev_priv->drm, ddb_size == 0); - - if (DISPLAY_VER(dev_priv) < 11) - return ddb_size - 4; /* 4 blocks for bypass path allocation */ - - return ddb_size; + return INTEL_INFO(dev_priv)->dbuf.size; } static int intel_dbuf_slice_size(struct drm_i915_private *dev_priv) -- cgit From 578e6edec45cf883681ae4e0b0d53ec62460af93 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 16 Apr 2021 20:10:06 +0300 Subject: drm/i915: Store dbuf slice mask in device info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's just store the dbuf slice information as a bitmask in the device info. Makes life a little easier later. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210416171011.19012-4-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_power.c | 4 ++-- drivers/gpu/drm/i915/i915_pci.c | 6 +++--- drivers/gpu/drm/i915/intel_device_info.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 13 +++++++++---- drivers/gpu/drm/i915/intel_pm.h | 1 + 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 0e433a0e1fce..0435103082eb 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4777,7 +4777,7 @@ static void gen9_dbuf_slice_set(struct drm_i915_private *dev_priv, void gen9_dbuf_slices_update(struct drm_i915_private *dev_priv, u8 req_slices) { - int num_slices = INTEL_INFO(dev_priv)->dbuf.num_slices; + int num_slices = intel_dbuf_num_slices(dev_priv); struct i915_power_domains *power_domains = &dev_priv->power_domains; enum dbuf_slice slice; @@ -4825,7 +4825,7 @@ static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) static void gen12_dbuf_slices_config(struct drm_i915_private *dev_priv) { - int num_slices = INTEL_INFO(dev_priv)->dbuf.num_slices; + int num_slices = intel_dbuf_num_slices(dev_priv); enum dbuf_slice slice; for (slice = DBUF_S1; slice < (DBUF_S1 + num_slices); slice++) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 8b04fe35e51a..7786217638ed 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -648,7 +648,7 @@ static const struct intel_device_info chv_info = { .display.has_hdcp = 1, \ .display.has_ipc = 1, \ .dbuf.size = 896 - 4, /* 4 blocks for bypass path allocation */ \ - .dbuf.num_slices = 1 + .dbuf.slice_mask = BIT(DBUF_S1) #define SKL_PLATFORM \ GEN9_FEATURES, \ @@ -683,7 +683,7 @@ static const struct intel_device_info skl_gt4_info = { #define GEN9_LP_FEATURES \ GEN(9), \ .is_lp = 1, \ - .dbuf.num_slices = 1, \ + .dbuf.slice_mask = BIT(DBUF_S1), \ .display.has_hotplug = 1, \ .platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \ @@ -831,7 +831,7 @@ static const struct intel_device_info cnl_info = { }, \ GEN(11), \ .dbuf.size = 2048, \ - .dbuf.num_slices = 2, \ + .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2), \ .has_logical_ring_elsq = 1, \ .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 } diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 74591e4f9c44..6aefe4fde197 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -198,7 +198,7 @@ struct intel_device_info { struct { u16 size; /* in blocks */ - u8 num_slices; + u8 slice_mask; } dbuf; /* Register offsets for the various display pipes and transcoders */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ad3344ec6284..ca4710a211c9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3637,7 +3637,7 @@ bool ilk_disable_lp_wm(struct drm_i915_private *dev_priv) u8 intel_enabled_dbuf_slices_mask(struct drm_i915_private *dev_priv) { int i; - int num_slices = INTEL_INFO(dev_priv)->dbuf.num_slices; + int num_slices = intel_dbuf_num_slices(dev_priv); u8 enabled_slices_mask = 0; for (i = 0; i < num_slices; i++) { @@ -4033,10 +4033,15 @@ static int intel_dbuf_size(struct drm_i915_private *dev_priv) return INTEL_INFO(dev_priv)->dbuf.size; } +int intel_dbuf_num_slices(struct drm_i915_private *dev_priv) +{ + return hweight8(INTEL_INFO(dev_priv)->dbuf.slice_mask); +} + static int intel_dbuf_slice_size(struct drm_i915_private *dev_priv) { return intel_dbuf_size(dev_priv) / - INTEL_INFO(dev_priv)->dbuf.num_slices; + intel_dbuf_num_slices(dev_priv); } static void @@ -4063,7 +4068,7 @@ u32 skl_ddb_dbuf_slice_mask(struct drm_i915_private *dev_priv, { u32 slice_mask = 0; u16 ddb_size = intel_dbuf_size(dev_priv); - int num_slices = INTEL_INFO(dev_priv)->dbuf.num_slices; + int num_slices = intel_dbuf_num_slices(dev_priv); u16 slice_size = ddb_size / num_slices; u16 start_slice; u16 end_slice; @@ -5821,7 +5826,7 @@ skl_compute_ddb(struct intel_atomic_state *state) "Enabled dbuf slices 0x%x -> 0x%x (out of %d dbuf slices)\n", old_dbuf_state->enabled_slices, new_dbuf_state->enabled_slices, - INTEL_INFO(dev_priv)->dbuf.num_slices); + intel_dbuf_num_slices(dev_priv)); } for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 669c8d505677..7dc11dec8984 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -38,6 +38,7 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv); void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv); void skl_wm_get_hw_state(struct drm_i915_private *dev_priv); u8 intel_enabled_dbuf_slices_mask(struct drm_i915_private *dev_priv); +int intel_dbuf_num_slices(struct drm_i915_private *dev_priv); void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, struct skl_ddb_entry *ddb_y, struct skl_ddb_entry *ddb_uv); -- cgit From 6390e5aa139c2be0f54a702e01dbaee4c9112616 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 16 Apr 2021 20:10:07 +0300 Subject: drm/i915: Use intel_dbuf_slice_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use intel_dbuf_slice_size() instead of hand rolling it. Also clean up some of the types. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210416171011.19012-5-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ca4710a211c9..baaab58e01fb 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4066,12 +4066,9 @@ skl_ddb_entry_for_slices(struct drm_i915_private *dev_priv, u8 slice_mask, u32 skl_ddb_dbuf_slice_mask(struct drm_i915_private *dev_priv, const struct skl_ddb_entry *entry) { - u32 slice_mask = 0; - u16 ddb_size = intel_dbuf_size(dev_priv); - int num_slices = intel_dbuf_num_slices(dev_priv); - u16 slice_size = ddb_size / num_slices; - u16 start_slice; - u16 end_slice; + int slice_size = intel_dbuf_slice_size(dev_priv); + enum dbuf_slice start_slice, end_slice; + u8 slice_mask = 0; if (!skl_ddb_entry_size(entry)) return 0; -- cgit From d152bb1f672b773432209b81b689e7f1687ca77a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 16 Apr 2021 20:10:08 +0300 Subject: drm/i915: Use intel_de_rmw() for DBUF_POWER_REQUEST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use intel_de_rmw() instead of hand rolling it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210416171011.19012-6-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_power.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 0435103082eb..528fbede0ee7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4757,14 +4757,9 @@ static void gen9_dbuf_slice_set(struct drm_i915_private *dev_priv, { i915_reg_t reg = DBUF_CTL_S(slice); bool state; - u32 val; - val = intel_de_read(dev_priv, reg); - if (enable) - val |= DBUF_POWER_REQUEST; - else - val &= ~DBUF_POWER_REQUEST; - intel_de_write(dev_priv, reg, val); + intel_de_rmw(dev_priv, reg, DBUF_POWER_REQUEST, + enable ? DBUF_POWER_REQUEST : 0); intel_de_posting_read(dev_priv, reg); udelay(10); -- cgit From b88da66055a966fdf913f7152a22e97dc9d8a68e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 16 Apr 2021 20:10:09 +0300 Subject: drm/i915: Polish for_each_dbuf_slice() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have the dbuf slice mask stored in the device info let's use it for for_each_dbuf_slice_in_mask*(). With this we cal also rip out intel_dbuf_size() and intel_dbuf_num_slices(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210416171011.19012-7-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bw.c | 11 +++---- drivers/gpu/drm/i915/display/intel_display.h | 9 +++--- drivers/gpu/drm/i915/display/intel_display_power.c | 13 ++++----- drivers/gpu/drm/i915/intel_pm.c | 34 ++++++++-------------- drivers/gpu/drm/i915/intel_pm.h | 1 - 5 files changed, 29 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 20dbc3759d27..969169743630 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -390,7 +390,6 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) const struct intel_crtc_state *crtc_state; struct intel_crtc *crtc; int max_bw = 0; - int slice_id; enum pipe pipe; int i; @@ -418,6 +417,7 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) &crtc_state->wm.skl.plane_ddb_uv[plane_id]; unsigned int data_rate = crtc_state->data_rate[plane_id]; unsigned int dbuf_mask = 0; + enum dbuf_slice slice; dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, plane_alloc); dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, uv_plane_alloc); @@ -435,8 +435,8 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) * pessimistic, which shouldn't pose any significant * problem anyway. */ - for_each_dbuf_slice_in_mask(slice_id, dbuf_mask) - crtc_bw->used_bw[slice_id] += data_rate; + for_each_dbuf_slice_in_mask(dev_priv, slice, dbuf_mask) + crtc_bw->used_bw[slice] += data_rate; } } @@ -445,10 +445,11 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) for_each_pipe(dev_priv, pipe) { struct intel_dbuf_bw *crtc_bw; + enum dbuf_slice slice; crtc_bw = &new_bw_state->dbuf_bw[pipe]; - for_each_dbuf_slice(slice_id) { + for_each_dbuf_slice(dev_priv, slice) { /* * Current experimental observations show that contrary * to BSpec we get underruns once we exceed 64 * CDCLK @@ -457,7 +458,7 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) * bumped up all the time we calculate CDCLK according * to this formula for overall bw consumed by slices. */ - max_bw += crtc_bw->used_bw[slice_id]; + max_bw += crtc_bw->used_bw[slice]; } } diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 105294ec2dcc..b68bcd502206 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -188,12 +188,13 @@ enum plane_id { for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \ for_each_if((__crtc)->plane_ids_mask & BIT(__p)) -#define for_each_dbuf_slice_in_mask(__slice, __mask) \ +#define for_each_dbuf_slice(__dev_priv, __slice) \ for ((__slice) = DBUF_S1; (__slice) < I915_MAX_DBUF_SLICES; (__slice)++) \ - for_each_if((BIT(__slice)) & (__mask)) + for_each_if(INTEL_INFO(__dev_priv)->dbuf.slice_mask & BIT(__slice)) -#define for_each_dbuf_slice(__slice) \ - for_each_dbuf_slice_in_mask(__slice, BIT(I915_MAX_DBUF_SLICES) - 1) +#define for_each_dbuf_slice_in_mask(__dev_priv, __slice, __mask) \ + for_each_dbuf_slice((__dev_priv), (__slice)) \ + for_each_if((__mask) & BIT(__slice)) enum port { PORT_NONE = -1, diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 528fbede0ee7..0fb4864a191a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4772,13 +4772,13 @@ static void gen9_dbuf_slice_set(struct drm_i915_private *dev_priv, void gen9_dbuf_slices_update(struct drm_i915_private *dev_priv, u8 req_slices) { - int num_slices = intel_dbuf_num_slices(dev_priv); struct i915_power_domains *power_domains = &dev_priv->power_domains; + u8 slice_mask = INTEL_INFO(dev_priv)->dbuf.slice_mask; enum dbuf_slice slice; - drm_WARN(&dev_priv->drm, req_slices & ~(BIT(num_slices) - 1), - "Invalid set of dbuf slices (0x%x) requested (num dbuf slices %d)\n", - req_slices, num_slices); + drm_WARN(&dev_priv->drm, req_slices & ~slice_mask, + "Invalid set of dbuf slices (0x%x) requested (total dbuf slices 0x%x)\n", + req_slices, slice_mask); drm_dbg_kms(&dev_priv->drm, "Updating dbuf slices to 0x%x\n", req_slices); @@ -4792,7 +4792,7 @@ void gen9_dbuf_slices_update(struct drm_i915_private *dev_priv, */ mutex_lock(&power_domains->lock); - for (slice = DBUF_S1; slice < num_slices; slice++) + for_each_dbuf_slice(dev_priv, slice) gen9_dbuf_slice_set(dev_priv, slice, req_slices & BIT(slice)); dev_priv->dbuf.enabled_slices = req_slices; @@ -4820,10 +4820,9 @@ static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) static void gen12_dbuf_slices_config(struct drm_i915_private *dev_priv) { - int num_slices = intel_dbuf_num_slices(dev_priv); enum dbuf_slice slice; - for (slice = DBUF_S1; slice < (DBUF_S1 + num_slices); slice++) + for_each_dbuf_slice(dev_priv, slice) intel_de_rmw(dev_priv, DBUF_CTL_S(slice), DBUF_TRACKER_STATE_SERVICE_MASK, DBUF_TRACKER_STATE_SERVICE(8)); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index baaab58e01fb..cd584474d1e8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3636,16 +3636,16 @@ bool ilk_disable_lp_wm(struct drm_i915_private *dev_priv) u8 intel_enabled_dbuf_slices_mask(struct drm_i915_private *dev_priv) { - int i; - int num_slices = intel_dbuf_num_slices(dev_priv); - u8 enabled_slices_mask = 0; + u8 enabled_slices = 0; + enum dbuf_slice slice; - for (i = 0; i < num_slices; i++) { - if (intel_uncore_read(&dev_priv->uncore, DBUF_CTL_S(i)) & DBUF_POWER_STATE) - enabled_slices_mask |= BIT(i); + for_each_dbuf_slice(dev_priv, slice) { + if (intel_uncore_read(&dev_priv->uncore, + DBUF_CTL_S(slice)) & DBUF_POWER_STATE) + enabled_slices |= BIT(slice); } - return enabled_slices_mask; + return enabled_slices; } /* @@ -4028,20 +4028,10 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) return 0; } -static int intel_dbuf_size(struct drm_i915_private *dev_priv) -{ - return INTEL_INFO(dev_priv)->dbuf.size; -} - -int intel_dbuf_num_slices(struct drm_i915_private *dev_priv) -{ - return hweight8(INTEL_INFO(dev_priv)->dbuf.slice_mask); -} - static int intel_dbuf_slice_size(struct drm_i915_private *dev_priv) { - return intel_dbuf_size(dev_priv) / - intel_dbuf_num_slices(dev_priv); + return INTEL_INFO(dev_priv)->dbuf.size / + hweight8(INTEL_INFO(dev_priv)->dbuf.slice_mask); } static void @@ -4060,7 +4050,7 @@ skl_ddb_entry_for_slices(struct drm_i915_private *dev_priv, u8 slice_mask, ddb->end = fls(slice_mask) * slice_size; WARN_ON(ddb->start >= ddb->end); - WARN_ON(ddb->end > intel_dbuf_size(dev_priv)); + WARN_ON(ddb->end > INTEL_INFO(dev_priv)->dbuf.size); } u32 skl_ddb_dbuf_slice_mask(struct drm_i915_private *dev_priv, @@ -5820,10 +5810,10 @@ skl_compute_ddb(struct intel_atomic_state *state) return ret; drm_dbg_kms(&dev_priv->drm, - "Enabled dbuf slices 0x%x -> 0x%x (out of %d dbuf slices)\n", + "Enabled dbuf slices 0x%x -> 0x%x (total dbuf slices 0x%x)\n", old_dbuf_state->enabled_slices, new_dbuf_state->enabled_slices, - intel_dbuf_num_slices(dev_priv)); + INTEL_INFO(dev_priv)->dbuf.slice_mask); } for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 7dc11dec8984..669c8d505677 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -38,7 +38,6 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv); void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv); void skl_wm_get_hw_state(struct drm_i915_private *dev_priv); u8 intel_enabled_dbuf_slices_mask(struct drm_i915_private *dev_priv); -int intel_dbuf_num_slices(struct drm_i915_private *dev_priv); void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, struct skl_ddb_entry *ddb_y, struct skl_ddb_entry *ddb_uv); -- cgit From 0868b1ce8772058da85786caac4baa61abb4f93a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 16 Apr 2021 20:10:10 +0300 Subject: drm/i915: Add enabledisable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'enable ? "enable" : "disable"' is a fairly common pattern in our debug prints. Let's introduce a helper for it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210416171011.19012-8-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_ddi.c | 4 ++-- drivers/gpu/drm/i915/display/intel_display_power.c | 2 +- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c | 2 +- drivers/gpu/drm/i915/i915_utils.h | 5 +++++ 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 4ef573883412..f4249f087fa7 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2334,8 +2334,8 @@ static void intel_dp_sink_set_msa_timing_par_ignore_state(struct intel_dp *intel if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_DOWNSPREAD_CTRL, enable ? DP_MSA_TIMING_PAR_IGNORE_EN : 0) <= 0) drm_dbg_kms(&i915->drm, - "Failed to set MSA_TIMING_PAR_IGNORE %s in the sink\n", - enable ? "enable" : "disable"); + "Failed to %s MSA_TIMING_PAR_IGNORE in the sink\n", + enabledisable(enable)); } static void intel_dp_sink_set_fec_ready(struct intel_dp *intel_dp, diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 0fb4864a191a..d48dd15a4f6e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4766,7 +4766,7 @@ static void gen9_dbuf_slice_set(struct drm_i915_private *dev_priv, state = intel_de_read(dev_priv, reg) & DBUF_POWER_STATE; drm_WARN(&dev_priv->drm, enable != state, "DBuf slice %d power %s timeout!\n", - slice, enable ? "enable" : "disable"); + slice, enabledisable(enable)); } void gen9_dbuf_slices_update(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 5ee953aaa00c..44109a4b69aa 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1861,7 +1861,7 @@ void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp, if (ret < 0) drm_dbg_kms(&i915->drm, "Failed to %s sink decompression state\n", - enable ? "enable" : "disable"); + enabledisable(enable)); } static void diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 4f8337c7fd2e..8e9ac9ba1d38 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -291,7 +291,7 @@ static void set_vesa_backlight_enable(struct intel_dp *intel_dp, bool enable) if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER, reg_val) != 1) { drm_dbg_kms(&i915->drm, "Failed to %s aux backlight\n", - enable ? "enable" : "disable"); + enabledisable(enable)); } } diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index abd4dcd9f79c..f02f52ab5070 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -418,6 +418,11 @@ static inline const char *onoff(bool v) return v ? "on" : "off"; } +static inline const char *enabledisable(bool v) +{ + return v ? "enable" : "disable"; +} + static inline const char *enableddisabled(bool v) { return v ? "enabled" : "disabled"; -- cgit From 3a11529d615ed74e9be5eef953bd136a943663b4 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 16 Apr 2021 20:10:11 +0300 Subject: drm/i915: Say "enable foo" instead of "set foo to enabled" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use simpler sentences. Just say "enable foo" instead of "set foo to enabled" etc. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210416171011.19012-9-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp.c | 12 ++++++------ drivers/gpu/drm/i915/display/intel_tc.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 44109a4b69aa..52ea09fc5e70 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2293,8 +2293,8 @@ void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp, if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_PROTOCOL_CONVERTER_CONTROL_0, tmp) != 1) - drm_dbg_kms(&i915->drm, "Failed to set protocol converter HDMI mode to %s\n", - enableddisabled(intel_dp->has_hdmi_sink)); + drm_dbg_kms(&i915->drm, "Failed to %s protocol converter HDMI mode\n", + enabledisable(intel_dp->has_hdmi_sink)); tmp = crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444 && intel_dp->dfp.ycbcr_444_to_420 ? DP_CONVERSION_TO_YCBCR420_ENABLE : 0; @@ -2302,8 +2302,8 @@ void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp, if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_PROTOCOL_CONVERTER_CONTROL_1, tmp) != 1) drm_dbg_kms(&i915->drm, - "Failed to set protocol converter YCbCr 4:2:0 conversion mode to %s\n", - enableddisabled(intel_dp->dfp.ycbcr_444_to_420)); + "Failed to %s protocol converter YCbCr 4:2:0 conversion mode\n", + enabledisable(intel_dp->dfp.ycbcr_444_to_420)); tmp = 0; if (intel_dp->dfp.rgb_to_ycbcr) { @@ -2340,8 +2340,8 @@ void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp, if (drm_dp_pcon_convert_rgb_to_ycbcr(&intel_dp->aux, tmp) < 0) drm_dbg_kms(&i915->drm, - "Failed to set protocol converter RGB->YCbCr conversion mode to %s\n", - enableddisabled(tmp ? true : false)); + "Failed to %s protocol converter RGB->YCbCr conversion mode\n", + enabledisable(tmp)); } diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 88085486ee59..59de6ca436db 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -267,8 +267,8 @@ static bool icl_tc_phy_set_safe_mode(struct intel_digital_port *dig_port, PORT_TX_DFLEXDPCSSS(dig_port->tc_phy_fia)); if (val == 0xffffffff) { drm_dbg_kms(&i915->drm, - "Port %s: PHY in TCCOLD, can't set safe-mode to %s\n", - dig_port->tc_port_name, enableddisabled(enable)); + "Port %s: PHY in TCCOLD, can't %s safe-mode\n", + dig_port->tc_port_name, enabledisable(enable)); return false; } -- cgit From b40a6ab2cf9213923bf8e821ce7fa7f6a0a26990 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 7 Apr 2021 18:19:58 -0400 Subject: drm/amdkfd: Use drm_priv to pass VM from KFD to amdgpu amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu needs the drm_priv to allow mmap to access the BO through the corresponding file descriptor. The VM can also be extracted from drm_priv, so drm_priv can replace the vm parameter in the kfd2kgd interface. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 14 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 69 +++++++++++++--------- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 8 +-- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 23 ++++---- 6 files changed, 67 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 5ffb07b02810..0d59bebd92af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -236,20 +236,20 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s /* GPUVM API */ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct file *filp, u32 pasid, - void **vm, void **process_info, + void **process_info, struct dma_fence **ef); -void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm); -uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); +void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv); +uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct kgd_dev *kgd, uint64_t va, uint64_t size, - void *vm, struct kgd_mem **mem, + void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, @@ -260,7 +260,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, struct dma_buf *dmabuf, - uint64_t va, void *vm, + uint64_t va, void *drm_priv, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset); int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7d4118c8128a..dc86faa03b88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -948,6 +948,13 @@ static int process_update_pds(struct amdkfd_process_info *process_info, return 0; } +static struct amdgpu_vm *drm_priv_to_vm(struct drm_file *drm_priv) +{ + struct amdgpu_fpriv *fpriv = drm_priv->driver_priv; + + return &fpriv->vm; +} + static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, struct dma_fence **ef) { @@ -1036,15 +1043,19 @@ create_evict_fence_fail: int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct file *filp, u32 pasid, - void **vm, void **process_info, + void **process_info, struct dma_fence **ef) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct drm_file *drm_priv = filp->private_data; - struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; - struct amdgpu_vm *avm = &drv_priv->vm; + struct amdgpu_fpriv *drv_priv; + struct amdgpu_vm *avm; int ret; + ret = amdgpu_file_to_fpriv(filp, &drv_priv); + if (ret) + return ret; + avm = &drv_priv->vm; + /* Already a compute VM? */ if (avm->process_info) return -EINVAL; @@ -1059,7 +1070,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, if (ret) return ret; - *vm = (void *)avm; + amdgpu_vm_set_task_info(avm); return 0; } @@ -1100,15 +1111,17 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, } } -void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) +void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_vm *avm; - if (WARN_ON(!kgd || !vm)) + if (WARN_ON(!kgd || !drm_priv)) return; - pr_debug("Releasing process vm %p\n", vm); + avm = drm_priv_to_vm(drm_priv); + + pr_debug("Releasing process vm %p\n", avm); /* The original pasid of amdgpu vm has already been * released during making a amdgpu vm to a compute vm @@ -1119,9 +1132,9 @@ void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) amdgpu_vm_release_compute(adev, avm); } -uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) +uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv) { - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); struct amdgpu_bo *pd = avm->root.base.bo; struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); @@ -1132,11 +1145,11 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct kgd_dev *kgd, uint64_t va, uint64_t size, - void *vm, struct kgd_mem **mem, + void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); enum ttm_bo_type bo_type = ttm_bo_type_device; struct sg_table *sg = NULL; uint64_t user_addr = 0; @@ -1347,10 +1360,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); int ret; struct amdgpu_bo *bo; uint32_t domain; @@ -1391,9 +1404,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", mem->va, mem->va + bo_size * (1 + mem->aql_queue), - vm, domain_string(domain)); + avm, domain_string(domain)); - ret = reserve_bo_and_vm(mem, vm, &ctx); + ret = reserve_bo_and_vm(mem, avm, &ctx); if (unlikely(ret)) goto out; @@ -1437,7 +1450,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( } list_for_each_entry(entry, &mem->bo_va_list, bo_list) { - if (entry->bo_va->base.vm == vm && !entry->is_mapped) { + if (entry->bo_va->base.vm == avm && !entry->is_mapped) { pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", entry->va, entry->va + bo_size, entry); @@ -1449,7 +1462,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( goto map_bo_to_gpuvm_failed; } - ret = vm_update_pds(vm, ctx.sync); + ret = vm_update_pds(avm, ctx.sync); if (ret) { pr_err("Failed to update page directories\n"); goto map_bo_to_gpuvm_failed; @@ -1485,11 +1498,11 @@ out: } int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdkfd_process_info *process_info = - ((struct amdgpu_vm *)vm)->process_info; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); + struct amdkfd_process_info *process_info = avm->process_info; unsigned long bo_size = mem->bo->tbo.base.size; struct kfd_bo_va_list *entry; struct bo_vm_reservation_context ctx; @@ -1497,7 +1510,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( mutex_lock(&mem->lock); - ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); + ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx); if (unlikely(ret)) goto out; /* If no VMs were reserved, it means the BO wasn't actually mapped */ @@ -1506,17 +1519,17 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( goto unreserve_out; } - ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm); + ret = vm_validate_pt_pd_bos(avm); if (unlikely(ret)) goto unreserve_out; pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", mem->va, mem->va + bo_size * (1 + mem->aql_queue), - vm); + avm); list_for_each_entry(entry, &mem->bo_va_list, bo_list) { - if (entry->bo_va->base.vm == vm && entry->is_mapped) { + if (entry->bo_va->base.vm == avm && entry->is_mapped) { pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", entry->va, entry->va + bo_size, @@ -1642,14 +1655,14 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, struct dma_buf *dma_buf, - uint64_t va, void *vm, + uint64_t va, void *drm_priv, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); struct drm_gem_object *obj; struct amdgpu_bo *bo; - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; if (dma_buf->ops != &amdgpu_dmabuf_ops) /* Can't handle non-graphics buffers */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 43de260b2230..97da1632f504 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1297,7 +1297,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( dev->kgd, args->va_addr, args->size, - pdd->vm, (struct kgd_mem **) &mem, &offset, + pdd->drm_priv, (struct kgd_mem **) &mem, &offset, flags); if (err) @@ -1448,7 +1448,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, goto get_mem_obj_from_handle_failed; } err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); + peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv); if (err) { pr_err("Failed to map to gpu %d/%d\n", i, args->n_devices); @@ -1555,7 +1555,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, goto get_mem_obj_from_handle_failed; } err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); + peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv); if (err) { pr_err("Failed to unmap from gpu %d/%d\n", i, args->n_devices); @@ -1701,7 +1701,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, } r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf, - args->va_addr, pdd->vm, + args->va_addr, pdd->drm_priv, (struct kgd_mem **)&mem, &size, NULL); if (r) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d3eaa1549bd7..98c2046c7331 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -738,7 +738,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ - pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); + pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ @@ -821,7 +821,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ - pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); + pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ @@ -873,7 +873,7 @@ static int register_process(struct device_queue_manager *dqm, pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ - pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); + pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); dqm_lock(dqm); list_add(&n->list, &dqm->queues); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0b6595f7acda..c6357ceb247d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -669,7 +669,7 @@ struct kfd_process_device { /* VM context for GPUVM allocations */ struct file *drm_file; - void *vm; + void *drm_priv; /* GPUVM allocations storage */ struct idr alloc_idr; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d97e330a5022..209e9edd6ddd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -647,7 +647,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem, { struct kfd_dev *dev = pdd->dev; - amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm); + amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv); amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL); } @@ -667,11 +667,11 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, int err; err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size, - pdd->vm, &mem, NULL, flags); + pdd->drm_priv, &mem, NULL, flags); if (err) goto err_alloc_mem; - err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm); + err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv); if (err) goto err_map_mem; @@ -901,10 +901,10 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *peer_pdd = p->pdds[i]; - if (!peer_pdd->vm) + if (!peer_pdd->drm_priv) continue; amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - peer_pdd->dev->kgd, mem, peer_pdd->vm); + peer_pdd->dev->kgd, mem, peer_pdd->drm_priv); } amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL); @@ -932,7 +932,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) if (pdd->drm_file) { amdgpu_amdkfd_gpuvm_release_process_vm( - pdd->dev->kgd, pdd->vm); + pdd->dev->kgd, pdd->drm_priv); fput(pdd->drm_file); } @@ -1375,7 +1375,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, if (!drm_file) return -EINVAL; - if (pdd->vm) + if (pdd->drm_priv) return -EBUSY; p = pdd->process; @@ -1383,13 +1383,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( dev->kgd, drm_file, p->pasid, - &pdd->vm, &p->kgd_process_info, &p->ef); + &p->kgd_process_info, &p->ef); if (ret) { pr_err("Failed to create process VM object\n"); return ret; } - - amdgpu_vm_set_task_info(pdd->vm); + pdd->drm_priv = drm_file->private_data; ret = kfd_process_device_reserve_ib_mem(pdd); if (ret) @@ -1405,7 +1404,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, err_init_cwsr: err_reserve_ib_mem: kfd_process_device_free_bos(pdd); - pdd->vm = NULL; + pdd->drm_priv = NULL; return ret; } @@ -1429,7 +1428,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, return ERR_PTR(-ENOMEM); } - if (!pdd->vm) + if (!pdd->drm_priv) return ERR_PTR(-ENODEV); /* -- cgit From d4ec4bdc0bd5ad352854473ba4dcbdb39fd5bfdd Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 7 Apr 2021 18:46:26 -0400 Subject: drm/amdkfd: Allow access for mmapping KFD BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DRM render node file handles are used for CPU mapping of BOs using mmap by the Thunk. It uses the DRM render node of the GPU where the BO was allocated. DRM allows mmap access automatically when it creates a GEM handle for a BO. KFD BOs don't have GEM handles, so KFD needs to manage access manually. Use drm_vma_node_allow to allow user mode to mmap BOs allocated with kfd_ioctl_alloc_memory_of_gpu through the DRM render node that was used in the kfd_ioctl_acquire_vm call for the same GPU. Signed-off-by: Felix Kuehling Acked-by: Christian König Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 18 +++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 8 +++++--- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 9 ++++++--- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 0d59bebd92af..7c8c5e469707 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -245,7 +245,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size); + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, + uint64_t *size); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index dc86faa03b88..f96c331c9b6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1229,6 +1229,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( domain_string(alloc_domain), ret); goto err_bo_create; } + ret = drm_vma_node_allow(&gobj->vma_node, drm_priv); + if (ret) { + pr_debug("Failed to allow vma node access. ret %d\n", ret); + goto err_node_allow; + } bo = gem_to_amdgpu_bo(gobj); if (bo_type == ttm_bo_type_sg) { bo->tbo.sg = sg; @@ -1258,6 +1263,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_vma_node_revoke(&gobj->vma_node, drm_priv); +err_node_allow: amdgpu_bo_unref(&bo); /* Don't unreserve system mem limit twice */ goto err_reserve_limit; @@ -1275,7 +1282,8 @@ err: } int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size) + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, + uint64_t *size) { struct amdkfd_process_info *process_info = mem->process_info; unsigned long bo_size = mem->bo->tbo.base.size; @@ -1352,6 +1360,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } /* Free the BO*/ + drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv); drm_gem_object_put(&mem->bo->tbo.base); mutex_destroy(&mem->lock); kfree(mem); @@ -1663,6 +1672,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); struct drm_gem_object *obj; struct amdgpu_bo *bo; + int ret; if (dma_buf->ops != &amdgpu_dmabuf_ops) /* Can't handle non-graphics buffers */ @@ -1683,6 +1693,12 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, if (!*mem) return -ENOMEM; + ret = drm_vma_node_allow(&obj->vma_node, drm_priv); + if (ret) { + kfree(mem); + return ret; + } + if (size) *size = amdgpu_bo_size(bo); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 97da1632f504..242e8b28feac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1328,7 +1328,8 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return 0; err_free: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, + pdd->drm_priv, NULL); err_unlock: mutex_unlock(&p->mutex); return err; @@ -1365,7 +1366,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, } ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, - (struct kgd_mem *)mem, &size); + (struct kgd_mem *)mem, pdd->drm_priv, &size); /* If freeing the buffer failed, leave the handle in place for * clean-up during process tear-down. @@ -1721,7 +1722,8 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, return 0; err_free: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, + pdd->drm_priv, NULL); err_unlock: mutex_unlock(&p->mutex); dma_buf_put(dmabuf); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 209e9edd6ddd..0a5379439642 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -648,7 +648,8 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem, struct kfd_dev *dev = pdd->dev; amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv); - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv, + NULL); } /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process @@ -712,7 +713,8 @@ sync_memory_failed: return err; err_map_mem: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv, + NULL); err_alloc_mem: *kptr = NULL; return err; @@ -907,7 +909,8 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) peer_pdd->dev->kgd, mem, peer_pdd->drm_priv); } - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, + pdd->drm_priv, NULL); kfd_process_device_remove_obj_handle(pdd, id); } } -- cgit From cccbeb6209bddee35539d9353de9fd775ce9bb55 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 7 Apr 2021 18:48:09 -0400 Subject: drm/amdgpu: Remove verify_access shortcut for KFD BOs This shortcut is no longer needed with access managed properly by KFD. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 3bef0432cac2..1485f33c3cc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -165,13 +165,6 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); - /* - * Don't verify access for KFD BOs. They don't have a GEM - * object associated with them. - */ - if (abo->kfd_bo) - return 0; - if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; return drm_vma_node_verify_access(&abo->tbo.base.vma_node, -- cgit From 2aeb742b72121c539cfc9cff8c00c5ca32ba4b1c Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 7 Apr 2020 23:08:45 -0500 Subject: drm/amdkfd: helper to convert gpu id and idx svm range uses gpu bitmap to store which GPU svm range maps to. Application pass driver gpu id to specify GPU, the helper is needed to convert gpu id to gpu bitmap idx. Access through kfd_process_device pointers array from kfd_process. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 11 +++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index c6357ceb247d..f887f2eb0a66 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -842,6 +842,17 @@ struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); + +int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); +static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, + uint32_t gpuidx, uint32_t *gpuid) { + return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL; +} +static inline struct kfd_process_device *kfd_process_device_from_gpuidx( + struct kfd_process *p, uint32_t gpuidx) { + return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL; +} + void kfd_unref_process(struct kfd_process *p); int kfd_process_evict_queues(struct kfd_process *p); int kfd_process_restore_queues(struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 0a5379439642..d40cd6c10096 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1602,6 +1602,16 @@ int kfd_process_restore_queues(struct kfd_process *p) return ret; } +int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id) +{ + int i; + + for (i = 0; i < p->n_pdds; i++) + if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id) + return i; + return -EINVAL; +} + static void evict_process_worker(struct work_struct *work) { int ret; -- cgit From 40ce74d1b28d38e5debc14b5a6ddd9071ae2d310 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 5 Feb 2020 17:59:15 -0500 Subject: drm/amdkfd: add svm ioctl API Add svm (shared virtual memory) ioctl data structure and API definition. The svm ioctl API is designed to be extensible in the future. All operations are provided by a single IOCTL to preserve ioctl number space. The arguments structure ends with a variable size array of attributes that can be used to set or get one or multiple attributes. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 12 +++ drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 4 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 + include/uapi/linux/kfd_ioctl.h | 130 ++++++++++++++++++++++++++- 5 files changed, 147 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 242e8b28feac..f888c2c673bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1744,6 +1744,16 @@ static int kfd_ioctl_smi_events(struct file *filep, return kfd_smi_event_open(dev, &args->anon_fd); } +static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) +{ + int r = 0; + + if (p->svm_disabled) + return -EPERM; + + return r; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1842,6 +1852,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, kfd_ioctl_smi_events, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index a2c9063076cc..72815e86a3b8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -405,6 +405,10 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_POLARIS12: case CHIP_VEGAM: kfd_init_apertures_vi(pdd, id); + /* VI GPUs cannot support SVM with only + * 40 bits of virtual address space. + */ + process->svm_disabled = true; break; case CHIP_VEGA10: case CHIP_VEGA12: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f887f2eb0a66..afc8f879b452 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -809,6 +809,8 @@ struct kfd_process { struct kobject *kobj; struct kobject *kobj_queues; struct attribute attr_pasid; + + bool svm_disabled; }; #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d40cd6c10096..aaff85c13398 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1208,6 +1208,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) process->mm = thread->mm; process->lead_thread = thread->group_leader; process->n_pdds = 0; + process->svm_disabled = false; INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); process->last_restore_timestamp = get_jiffies_64(); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index bf5e7d7846dd..247b57baa94f 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -30,9 +30,10 @@ * - 1.1 - initial version * - 1.3 - Add SMI events support * - 1.4 - Indicate new SRAM EDC bit in device properties + * - 1.5 - Add SVM API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 4 +#define KFD_IOCTL_MINOR_VERSION 5 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -473,6 +474,129 @@ enum kfd_mmio_remap { KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, }; +/* Guarantee host access to memory */ +#define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x00000001 +/* Fine grained coherency between all devices with access */ +#define KFD_IOCTL_SVM_FLAG_COHERENT 0x00000002 +/* Use any GPU in same hive as preferred device */ +#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL 0x00000004 +/* GPUs only read, allows replication */ +#define KFD_IOCTL_SVM_FLAG_GPU_RO 0x00000008 +/* Allow execution on GPU */ +#define KFD_IOCTL_SVM_FLAG_GPU_EXEC 0x00000010 +/* GPUs mostly read, may allow similar optimizations as RO, but writes fault */ +#define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY 0x00000020 + +/** + * kfd_ioctl_svm_op - SVM ioctl operations + * + * @KFD_IOCTL_SVM_OP_SET_ATTR: Modify one or more attributes + * @KFD_IOCTL_SVM_OP_GET_ATTR: Query one or more attributes + */ +enum kfd_ioctl_svm_op { + KFD_IOCTL_SVM_OP_SET_ATTR, + KFD_IOCTL_SVM_OP_GET_ATTR +}; + +/** kfd_ioctl_svm_location - Enum for preferred and prefetch locations + * + * GPU IDs are used to specify GPUs as preferred and prefetch locations. + * Below definitions are used for system memory or for leaving the preferred + * location unspecified. + */ +enum kfd_ioctl_svm_location { + KFD_IOCTL_SVM_LOCATION_SYSMEM = 0, + KFD_IOCTL_SVM_LOCATION_UNDEFINED = 0xffffffff +}; + +/** + * kfd_ioctl_svm_attr_type - SVM attribute types + * + * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: gpuid of the preferred location, 0 for + * system memory + * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: gpuid of the prefetch location, 0 for + * system memory. Setting this triggers an + * immediate prefetch (migration). + * @KFD_IOCTL_SVM_ATTR_ACCESS: + * @KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + * @KFD_IOCTL_SVM_ATTR_NO_ACCESS: specify memory access for the gpuid given + * by the attribute value + * @KFD_IOCTL_SVM_ATTR_SET_FLAGS: bitmask of flags to set (see + * KFD_IOCTL_SVM_FLAG_...) + * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS: bitmask of flags to clear + * @KFD_IOCTL_SVM_ATTR_GRANULARITY: migration granularity + * (log2 num pages) + */ +enum kfd_ioctl_svm_attr_type { + KFD_IOCTL_SVM_ATTR_PREFERRED_LOC, + KFD_IOCTL_SVM_ATTR_PREFETCH_LOC, + KFD_IOCTL_SVM_ATTR_ACCESS, + KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE, + KFD_IOCTL_SVM_ATTR_NO_ACCESS, + KFD_IOCTL_SVM_ATTR_SET_FLAGS, + KFD_IOCTL_SVM_ATTR_CLR_FLAGS, + KFD_IOCTL_SVM_ATTR_GRANULARITY +}; + +/** + * kfd_ioctl_svm_attribute - Attributes as pairs of type and value + * + * The meaning of the @value depends on the attribute type. + * + * @type: attribute type (see enum @kfd_ioctl_svm_attr_type) + * @value: attribute value + */ +struct kfd_ioctl_svm_attribute { + __u32 type; + __u32 value; +}; + +/** + * kfd_ioctl_svm_args - Arguments for SVM ioctl + * + * @op specifies the operation to perform (see enum + * @kfd_ioctl_svm_op). @start_addr and @size are common for all + * operations. + * + * A variable number of attributes can be given in @attrs. + * @nattr specifies the number of attributes. New attributes can be + * added in the future without breaking the ABI. If unknown attributes + * are given, the function returns -EINVAL. + * + * @KFD_IOCTL_SVM_OP_SET_ATTR sets attributes for a virtual address + * range. It may overlap existing virtual address ranges. If it does, + * the existing ranges will be split such that the attribute changes + * only apply to the specified address range. + * + * @KFD_IOCTL_SVM_OP_GET_ATTR returns the intersection of attributes + * over all memory in the given range and returns the result as the + * attribute value. If different pages have different preferred or + * prefetch locations, 0xffffffff will be returned for + * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC or + * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC resepctively. For + * @KFD_IOCTL_SVM_ATTR_SET_FLAGS, flags of all pages will be + * aggregated by bitwise AND. The minimum migration granularity + * throughout the range will be returned for + * @KFD_IOCTL_SVM_ATTR_GRANULARITY. + * + * Querying of accessibility attributes works by initializing the + * attribute type to @KFD_IOCTL_SVM_ATTR_ACCESS and the value to the + * GPUID being queried. Multiple attributes can be given to allow + * querying multiple GPUIDs. The ioctl function overwrites the + * attribute type to indicate the access for the specified GPU. + * + * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS is invalid for + * @KFD_IOCTL_SVM_OP_GET_ATTR. + */ +struct kfd_ioctl_svm_args { + __u64 start_addr; + __u64 size; + __u32 op; + __u32 nattr; + /* Variable length array of attributes */ + struct kfd_ioctl_svm_attribute attrs[0]; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -573,7 +697,9 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_SMI_EVENTS \ AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) +#define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x20 +#define AMDKFD_COMMAND_END 0x21 #endif -- cgit From 42de677f79999791bee4e21be318c32d90ab62c6 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 6 Feb 2020 13:43:56 -0500 Subject: drm/amdkfd: register svm range svm range structure stores the range start address, size, attributes, flags, prefetch location and gpu bitmap which indicates which GPU this range maps to. Same virtual address is shared by CPU and GPUs. Process has svm range list which uses both interval tree and list to store all svm ranges registered by the process. Interval tree is used by GPU vm fault handler and CPU page fault handler to get svm range structure from the specific address. List is used to scan all ranges in eviction restore work. No overlap range interval [start, last] exist in svms object interval tree. If process registers new range which has overlap with old range, the old range split into 2 ranges depending on the overlap happens at head or tail part of old range. Apply attributes preferred location, prefetch location, mapping flags, migration granularity to svm range, store mapping gpu index into bitmap. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 17 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 8 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 9 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 728 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 86 ++++ 6 files changed, 850 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.h diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index e1e4115dcf78..387ce0217d35 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -54,7 +54,8 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_dbgdev.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ $(AMDKFD_PATH)/kfd_smi_events.o \ - $(AMDKFD_PATH)/kfd_crat.o + $(AMDKFD_PATH)/kfd_crat.o \ + $(AMDKFD_PATH)/kfd_svm.o ifneq ($(CONFIG_AMD_IOMMU_V2),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f888c2c673bb..c31ce8c88df4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -38,6 +38,7 @@ #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" +#include "kfd_svm.h" #include "amdgpu_amdkfd.h" #include "kfd_smi_events.h" @@ -1746,11 +1747,27 @@ static int kfd_ioctl_smi_events(struct file *filep, static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) { + struct kfd_ioctl_svm_args *args = data; int r = 0; if (p->svm_disabled) return -EPERM; + pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n", + args->start_addr, args->size, args->op, args->nattr); + + if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK)) + return -EINVAL; + if (!args->start_addr || !args->size) + return -EINVAL; + + mutex_lock(&p->mutex); + + r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr, + args->attrs); + + mutex_unlock(&p->mutex); + return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index afc8f879b452..cfdd34c2223c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -731,6 +731,12 @@ struct kfd_process_device { #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) +struct svm_range_list { + struct mutex lock; + struct rb_root_cached objects; + struct list_head list; +}; + /* Process data */ struct kfd_process { /* @@ -810,6 +816,8 @@ struct kfd_process { struct kobject *kobj_queues; struct attribute attr_pasid; + /* shared virtual memory registered by this process */ + struct svm_range_list svms; bool svm_disabled; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index aaff85c13398..5f1ec7553509 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -35,6 +35,7 @@ #include #include "amdgpu_amdkfd.h" #include "amdgpu.h" +#include "kfd_svm.h" struct mm_struct; @@ -42,6 +43,7 @@ struct mm_struct; #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" #include "kfd_iommu.h" +#include "kfd_svm.h" /* * List of struct kfd_process (field kfd_process). @@ -1003,6 +1005,7 @@ static void kfd_process_wq_release(struct work_struct *work) kfd_iommu_unbind_process(p); kfd_process_free_outstanding_kfd_bos(p); + svm_range_list_fini(p); kfd_process_destroy_pdds(p); dma_fence_put(p->ef); @@ -1228,6 +1231,10 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (err != 0) goto err_init_apertures; + err = svm_range_list_init(process); + if (err) + goto err_init_svm_range_list; + /* alloc_notifier needs to find the process in the hash table */ hash_add_rcu(kfd_processes_table, &process->kfd_processes, (uintptr_t)process->mm); @@ -1250,6 +1257,8 @@ static struct kfd_process *create_process(const struct task_struct *thread) err_register_notifier: hash_del_rcu(&process->kfd_processes); + svm_range_list_fini(process); +err_init_svm_range_list: kfd_process_free_outstanding_kfd_bos(process); kfd_process_destroy_pdds(process); err_init_apertures: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c new file mode 100644 index 000000000000..1bf909ee8d56 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -0,0 +1,728 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include "amdgpu_sync.h" +#include "amdgpu_object.h" +#include "amdgpu_vm.h" +#include "amdgpu_mn.h" +#include "kfd_priv.h" +#include "kfd_svm.h" + +/** + * svm_range_unlink - unlink svm_range from lists and interval tree + * @prange: svm range structure to be removed + * + * Remove the svm range from svms interval tree and link list + * + * Context: The caller must hold svms->lock + */ +static void svm_range_unlink(struct svm_range *prange) +{ + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange, prange->start, prange->last); + + list_del(&prange->list); + if (prange->it_node.start != 0 && prange->it_node.last != 0) + interval_tree_remove(&prange->it_node, &prange->svms->objects); +} + +/** + * svm_range_add_to_svms - add svm range to svms + * @prange: svm range structure to be added + * + * Add the svm range to svms interval tree and link list + * + * Context: The caller must hold svms->lock + */ +static void svm_range_add_to_svms(struct svm_range *prange) +{ + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange, prange->start, prange->last); + + list_add_tail(&prange->list, &prange->svms->list); + prange->it_node.start = prange->start; + prange->it_node.last = prange->last; + interval_tree_insert(&prange->it_node, &prange->svms->objects); +} + +static void svm_range_free(struct svm_range *prange) +{ + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, + prange->start, prange->last); + + kfree(prange); +} + +static void +svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc, + uint8_t *granularity, uint32_t *flags) +{ + *location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + *prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + *granularity = 9; + *flags = + KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT; +} + +static struct +svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, + uint64_t last) +{ + uint64_t size = last - start + 1; + struct svm_range *prange; + + prange = kzalloc(sizeof(*prange), GFP_KERNEL); + if (!prange) + return NULL; + prange->npages = size; + prange->svms = svms; + prange->start = start; + prange->last = last; + INIT_LIST_HEAD(&prange->list); + INIT_LIST_HEAD(&prange->update_list); + INIT_LIST_HEAD(&prange->remove_list); + INIT_LIST_HEAD(&prange->insert_list); + svm_range_set_default_attributes(&prange->preferred_loc, + &prange->prefetch_loc, + &prange->granularity, &prange->flags); + + pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last); + + return prange; +} + +static int +svm_range_check_attr(struct kfd_process *p, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) +{ + uint32_t i; + int gpuidx; + + for (i = 0; i < nattr; i++) { + switch (attrs[i].type) { + case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: + if (attrs[i].value != KFD_IOCTL_SVM_LOCATION_SYSMEM && + attrs[i].value != KFD_IOCTL_SVM_LOCATION_UNDEFINED && + kfd_process_gpuidx_from_gpuid(p, + attrs[i].value) < 0) { + pr_debug("no GPU 0x%x found\n", attrs[i].value); + return -EINVAL; + } + break; + case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: + if (attrs[i].value != KFD_IOCTL_SVM_LOCATION_SYSMEM && + kfd_process_gpuidx_from_gpuid(p, + attrs[i].value) < 0) { + pr_debug("no GPU 0x%x found\n", attrs[i].value); + return -EINVAL; + } + break; + case KFD_IOCTL_SVM_ATTR_ACCESS: + case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + case KFD_IOCTL_SVM_ATTR_NO_ACCESS: + gpuidx = kfd_process_gpuidx_from_gpuid(p, + attrs[i].value); + if (gpuidx < 0) { + pr_debug("no GPU 0x%x found\n", attrs[i].value); + return -EINVAL; + } + break; + case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + break; + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: + break; + case KFD_IOCTL_SVM_ATTR_GRANULARITY: + break; + default: + pr_debug("unknown attr type 0x%x\n", attrs[i].type); + return -EINVAL; + } + } + + return 0; +} + +static void +svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) +{ + uint32_t i; + int gpuidx; + + for (i = 0; i < nattr; i++) { + switch (attrs[i].type) { + case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: + prange->preferred_loc = attrs[i].value; + break; + case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: + prange->prefetch_loc = attrs[i].value; + break; + case KFD_IOCTL_SVM_ATTR_ACCESS: + case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + case KFD_IOCTL_SVM_ATTR_NO_ACCESS: + gpuidx = kfd_process_gpuidx_from_gpuid(p, + attrs[i].value); + if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) { + bitmap_clear(prange->bitmap_access, gpuidx, 1); + bitmap_clear(prange->bitmap_aip, gpuidx, 1); + } else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) { + bitmap_set(prange->bitmap_access, gpuidx, 1); + bitmap_clear(prange->bitmap_aip, gpuidx, 1); + } else { + bitmap_clear(prange->bitmap_access, gpuidx, 1); + bitmap_set(prange->bitmap_aip, gpuidx, 1); + } + break; + case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + prange->flags |= attrs[i].value; + break; + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: + prange->flags &= ~attrs[i].value; + break; + case KFD_IOCTL_SVM_ATTR_GRANULARITY: + prange->granularity = attrs[i].value; + break; + default: + WARN_ONCE(1, "svm_range_check_attrs wasn't called?"); + } + } +} + +/** + * svm_range_debug_dump - print all range information from svms + * @svms: svm range list header + * + * debug output svm range start, end, prefetch location from svms + * interval tree and link list + * + * Context: The caller must hold svms->lock + */ +static void svm_range_debug_dump(struct svm_range_list *svms) +{ + struct interval_tree_node *node; + struct svm_range *prange; + + pr_debug("dump svms 0x%p list\n", svms); + pr_debug("range\tstart\tpage\tend\t\tlocation\n"); + + list_for_each_entry(prange, &svms->list, list) { + pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n", + prange, prange->start, prange->npages, + prange->start + prange->npages - 1, + prange->actual_loc); + } + + pr_debug("dump svms 0x%p interval tree\n", svms); + pr_debug("range\tstart\tpage\tend\t\tlocation\n"); + node = interval_tree_iter_first(&svms->objects, 0, ~0ULL); + while (node) { + prange = container_of(node, struct svm_range, it_node); + pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n", + prange, prange->start, prange->npages, + prange->start + prange->npages - 1, + prange->actual_loc); + node = interval_tree_iter_next(node, 0, ~0ULL); + } +} + +static bool +svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new) +{ + return (old->prefetch_loc == new->prefetch_loc && + old->flags == new->flags && + old->granularity == new->granularity); +} + +/** + * svm_range_split_adjust - split range and adjust + * + * @new: new range + * @old: the old range + * @start: the old range adjust to start address in pages + * @last: the old range adjust to last address in pages + * + * Copy attributes in old range to new + * range from new_start up to size new->npages, the remaining old range is from + * start to last + * + * Return: + * 0 - OK, -ENOMEM - out of memory + */ +static int +svm_range_split_adjust(struct svm_range *new, struct svm_range *old, + uint64_t start, uint64_t last) +{ + pr_debug("svms 0x%p new 0x%lx old [0x%lx 0x%lx] => [0x%llx 0x%llx]\n", + new->svms, new->start, old->start, old->last, start, last); + + if (new->start < old->start || + new->last > old->last) { + WARN_ONCE(1, "invalid new range start or last\n"); + return -EINVAL; + } + + old->npages = last - start + 1; + old->start = start; + old->last = last; + new->flags = old->flags; + new->preferred_loc = old->preferred_loc; + new->prefetch_loc = old->prefetch_loc; + new->actual_loc = old->actual_loc; + new->granularity = old->granularity; + bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); + bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + + return 0; +} + +/** + * svm_range_split - split a range in 2 ranges + * + * @prange: the svm range to split + * @start: the remaining range start address in pages + * @last: the remaining range last address in pages + * @new: the result new range generated + * + * Two cases only: + * case 1: if start == prange->start + * prange ==> prange[start, last] + * new range [last + 1, prange->last] + * + * case 2: if last == prange->last + * prange ==> prange[start, last] + * new range [prange->start, start - 1] + * + * Return: + * 0 - OK, -ENOMEM - out of memory, -EINVAL - invalid start, last + */ +static int +svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last, + struct svm_range **new) +{ + uint64_t old_start = prange->start; + uint64_t old_last = prange->last; + struct svm_range_list *svms; + int r = 0; + + pr_debug("svms 0x%p [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", prange->svms, + old_start, old_last, start, last); + + if (old_start != start && old_last != last) + return -EINVAL; + if (start < old_start || last > old_last) + return -EINVAL; + + svms = prange->svms; + if (old_start == start) + *new = svm_range_new(svms, last + 1, old_last); + else + *new = svm_range_new(svms, old_start, start - 1); + if (!*new) + return -ENOMEM; + + r = svm_range_split_adjust(*new, prange, start, last); + if (r) { + pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", + r, old_start, old_last, start, last); + svm_range_free(*new); + *new = NULL; + } + + return r; +} + +static int +svm_range_split_tail(struct svm_range *prange, struct svm_range *new, + uint64_t new_last, struct list_head *insert_list) +{ + struct svm_range *tail; + int r = svm_range_split(prange, prange->start, new_last, &tail); + + if (!r) + list_add(&tail->insert_list, insert_list); + return r; +} + +static int +svm_range_split_head(struct svm_range *prange, struct svm_range *new, + uint64_t new_start, struct list_head *insert_list) +{ + struct svm_range *head; + int r = svm_range_split(prange, new_start, prange->last, &head); + + if (!r) + list_add(&head->insert_list, insert_list); + return r; +} + +static struct svm_range *svm_range_clone(struct svm_range *old) +{ + struct svm_range *new; + + new = svm_range_new(old->svms, old->start, old->last); + if (!new) + return NULL; + + new->flags = old->flags; + new->preferred_loc = old->preferred_loc; + new->prefetch_loc = old->prefetch_loc; + new->actual_loc = old->actual_loc; + new->granularity = old->granularity; + bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); + bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + + return new; +} + +/** + * svm_range_handle_overlap - split overlap ranges + * @svms: svm range list header + * @new: range added with this attributes + * @start: range added start address, in pages + * @last: range last address, in pages + * @update_list: output, the ranges attributes are updated. For set_attr, this + * will do validation and map to GPUs. For unmap, this will be + * removed and unmap from GPUs + * @insert_list: output, the ranges will be inserted into svms, attributes are + * not changes. For set_attr, this will add into svms. + * @remove_list:output, the ranges will be removed from svms + * @left: the remaining range after overlap, For set_attr, this will be added + * as new range. + * + * Total have 5 overlap cases. + * + * This function handles overlap of an address interval with existing + * struct svm_ranges for applying new attributes. This may require + * splitting existing struct svm_ranges. All changes should be applied to + * the range_list and interval tree transactionally. If any split operation + * fails, the entire update fails. Therefore the existing overlapping + * svm_ranges are cloned and the original svm_ranges left unchanged. If the + * transaction succeeds, the modified clones are added and the originals + * freed. Otherwise the clones are removed and the old svm_ranges remain. + * + * Context: The caller must hold svms->lock + */ +static int +svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, + unsigned long start, unsigned long last, + struct list_head *update_list, + struct list_head *insert_list, + struct list_head *remove_list, + unsigned long *left) +{ + struct interval_tree_node *node; + struct svm_range *prange; + struct svm_range *tmp; + int r = 0; + + INIT_LIST_HEAD(update_list); + INIT_LIST_HEAD(insert_list); + INIT_LIST_HEAD(remove_list); + + node = interval_tree_iter_first(&svms->objects, start, last); + while (node) { + struct interval_tree_node *next; + struct svm_range *old; + unsigned long next_start; + + pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start, + node->last); + + old = container_of(node, struct svm_range, it_node); + next = interval_tree_iter_next(node, start, last); + next_start = min(node->last, last) + 1; + + if (node->start < start || node->last > last) { + /* node intersects the updated range, clone+split it */ + prange = svm_range_clone(old); + if (!prange) { + r = -ENOMEM; + goto out; + } + + list_add(&old->remove_list, remove_list); + list_add(&prange->insert_list, insert_list); + + if (node->start < start) { + pr_debug("change old range start\n"); + r = svm_range_split_head(prange, new, start, + insert_list); + if (r) + goto out; + } + if (node->last > last) { + pr_debug("change old range last\n"); + r = svm_range_split_tail(prange, new, last, + insert_list); + if (r) + goto out; + } + } else { + /* The node is contained within start..last, + * just update it + */ + prange = old; + } + + if (!svm_range_is_same_attrs(prange, new)) + list_add(&prange->update_list, update_list); + + /* insert a new node if needed */ + if (node->start > start) { + prange = svm_range_new(prange->svms, start, + node->start - 1); + if (!prange) { + r = -ENOMEM; + goto out; + } + + list_add(&prange->insert_list, insert_list); + list_add(&prange->update_list, update_list); + } + + node = next; + start = next_start; + } + + if (left && start <= last) + *left = last - start + 1; + +out: + if (r) + list_for_each_entry_safe(prange, tmp, insert_list, insert_list) + svm_range_free(prange); + + return r; +} + +void svm_range_list_fini(struct kfd_process *p) +{ + mutex_destroy(&p->svms.lock); + + pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms); +} + +int svm_range_list_init(struct kfd_process *p) +{ + struct svm_range_list *svms = &p->svms; + + svms->objects = RB_ROOT_CACHED; + mutex_init(&svms->lock); + INIT_LIST_HEAD(&svms->list); + + return 0; +} + +/** + * svm_range_is_valid - check if virtual address range is valid + * @mm: current process mm_struct + * @start: range start address, in pages + * @size: range size, in pages + * + * Valid virtual address range means it belongs to one or more VMAs + * + * Context: Process context + * + * Return: + * true - valid svm range + * false - invalid svm range + */ +static bool +svm_range_is_valid(struct mm_struct *mm, uint64_t start, uint64_t size) +{ + const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP; + struct vm_area_struct *vma; + unsigned long end; + + start <<= PAGE_SHIFT; + end = start + (size << PAGE_SHIFT); + + do { + vma = find_vma(mm, start); + if (!vma || start < vma->vm_start || + (vma->vm_flags & device_vma)) + return false; + start = min(end, vma->vm_end); + } while (start < end); + + return true; +} + +/** + * svm_range_add - add svm range and handle overlap + * @p: the range add to this process svms + * @start: page size aligned + * @size: page size aligned + * @nattr: number of attributes + * @attrs: array of attributes + * @update_list: output, the ranges need validate and update GPU mapping + * @insert_list: output, the ranges need insert to svms + * @remove_list: output, the ranges are replaced and need remove from svms + * + * Check if the virtual address range has overlap with the registered ranges, + * split the overlapped range, copy and adjust pages address and vram nodes in + * old and new ranges. + * + * Context: Process context, caller must hold svms->lock + * + * Return: + * 0 - OK, otherwise error code + */ +static int +svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, + struct list_head *update_list, struct list_head *insert_list, + struct list_head *remove_list) +{ + uint64_t last = start + size - 1UL; + struct svm_range_list *svms; + struct svm_range new = {0}; + struct svm_range *prange; + unsigned long left = 0; + int r = 0; + + pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last); + + svm_range_apply_attrs(p, &new, nattr, attrs); + + svms = &p->svms; + + r = svm_range_handle_overlap(svms, &new, start, last, update_list, + insert_list, remove_list, &left); + if (r) + return r; + + if (left) { + prange = svm_range_new(svms, last - left + 1, last); + list_add(&prange->insert_list, insert_list); + list_add(&prange->update_list, update_list); + } + + return 0; +} + +static int +svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) +{ + struct amdkfd_process_info *process_info = p->kgd_process_info; + struct mm_struct *mm = current->mm; + struct list_head update_list; + struct list_head insert_list; + struct list_head remove_list; + struct svm_range_list *svms; + struct svm_range *prange; + struct svm_range *next; + int r = 0; + + pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n", + p->pasid, &p->svms, start, start + size - 1, size); + + r = svm_range_check_attr(p, nattr, attrs); + if (r) + return r; + + svms = &p->svms; + + mutex_lock(&process_info->lock); + + mmap_write_lock(mm); + + if (!svm_range_is_valid(mm, start, size)) { + pr_debug("invalid range\n"); + r = -EFAULT; + mmap_write_unlock(mm); + goto out; + } + + mutex_lock(&svms->lock); + + /* Add new range and split existing ranges as needed */ + r = svm_range_add(p, start, size, nattr, attrs, &update_list, + &insert_list, &remove_list); + if (r) { + mutex_unlock(&svms->lock); + mmap_write_unlock(mm); + goto out; + } + /* Apply changes as a transaction */ + list_for_each_entry_safe(prange, next, &insert_list, insert_list) { + svm_range_add_to_svms(prange); + } + list_for_each_entry(prange, &update_list, update_list) { + svm_range_apply_attrs(p, prange, nattr, attrs); + /* TODO: unmap ranges from GPU that lost access */ + } + list_for_each_entry_safe(prange, next, &remove_list, + remove_list) { + pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange, prange->start, + prange->last); + svm_range_unlink(prange); + svm_range_free(prange); + } + + mmap_write_downgrade(mm); + /* Trigger migrations and revalidate and map to GPUs as needed. If + * this fails we may be left with partially completed actions. There + * is no clean way of rolling back to the previous state in such a + * case because the rollback wouldn't be guaranteed to work either. + */ + list_for_each_entry(prange, &update_list, update_list) { + /* TODO */ + } + + svm_range_debug_dump(svms); + + mutex_unlock(&svms->lock); + mmap_read_unlock(mm); +out: + mutex_unlock(&process_info->lock); + + pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid, + &p->svms, start, start + size - 1, r); + + return r; +} + +int +svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, + uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs) +{ + int r; + + start >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + + switch (op) { + case KFD_IOCTL_SVM_OP_SET_ATTR: + r = svm_range_set_attr(p, start, size, nattrs, attrs); + break; + default: + r = EINVAL; + break; + } + + return r; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h new file mode 100644 index 000000000000..537101dd19a6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_SVM_H_ +#define KFD_SVM_H_ + +#include +#include +#include +#include +#include +#include "amdgpu.h" +#include "kfd_priv.h" + +/** + * struct svm_range - shared virtual memory range + * + * @svms: list of svm ranges, structure defined in kfd_process + * @start: range start address in pages + * @last: range last address in pages + * @it_node: node [start, last] stored in interval tree, start, last are page + * aligned, page size is (last - start + 1) + * @list: link list node, used to scan all ranges of svms + * @update_list:link list node used to add to update_list + * @remove_list:link list node used to add to remove list + * @insert_list:link list node used to add to insert list + * @npages: number of pages + * @flags: flags defined as KFD_IOCTL_SVM_FLAG_* + * @perferred_loc: perferred location, 0 for CPU, or GPU id + * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id + * @actual_loc: the actual location, 0 for CPU, or GPU id + * @granularity:migration granularity, log2 num pages + * @bitmap_access: index bitmap of GPUs which can access the range + * @bitmap_aip: index bitmap of GPUs which can access the range in place + * + * Data structure for virtual memory range shared by CPU and GPUs, it can be + * allocated from system memory ram or device vram, and migrate from ram to vram + * or from vram to ram. + */ +struct svm_range { + struct svm_range_list *svms; + unsigned long start; + unsigned long last; + struct interval_tree_node it_node; + struct list_head list; + struct list_head update_list; + struct list_head remove_list; + struct list_head insert_list; + uint64_t npages; + uint32_t flags; + uint32_t preferred_loc; + uint32_t prefetch_loc; + uint32_t actual_loc; + uint8_t granularity; + DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); + DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); +}; + +int svm_range_list_init(struct kfd_process *p); +void svm_range_list_fini(struct kfd_process *p); +int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, + uint64_t size, uint32_t nattrs, + struct kfd_ioctl_svm_attribute *attrs); + +#endif /* KFD_SVM_H_ */ -- cgit From c5e2e4781ac5760c6b5efb09fed6ac7725fcf041 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Sun, 16 Feb 2020 12:42:00 -0500 Subject: drm/amdkfd: add svm ioctl GET_ATTR op Get the intersection of attributes over all memory in the given range Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 164 +++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 1bf909ee8d56..bab61dcb9187 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -706,6 +706,167 @@ out: return r; } +static int +svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) +{ + DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); + DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); + bool get_preferred_loc = false; + bool get_prefetch_loc = false; + bool get_granularity = false; + bool get_accessible = false; + bool get_flags = false; + uint64_t last = start + size - 1UL; + struct mm_struct *mm = current->mm; + uint8_t granularity = 0xff; + struct interval_tree_node *node; + struct svm_range_list *svms; + struct svm_range *prange; + uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + uint32_t flags = 0xffffffff; + int gpuidx; + uint32_t i; + + pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start, + start + size - 1, nattr); + + mmap_read_lock(mm); + if (!svm_range_is_valid(mm, start, size)) { + pr_debug("invalid range\n"); + mmap_read_unlock(mm); + return -EINVAL; + } + mmap_read_unlock(mm); + + for (i = 0; i < nattr; i++) { + switch (attrs[i].type) { + case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: + get_preferred_loc = true; + break; + case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: + get_prefetch_loc = true; + break; + case KFD_IOCTL_SVM_ATTR_ACCESS: + get_accessible = true; + break; + case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + get_flags = true; + break; + case KFD_IOCTL_SVM_ATTR_GRANULARITY: + get_granularity = true; + break; + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: + case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + case KFD_IOCTL_SVM_ATTR_NO_ACCESS: + fallthrough; + default: + pr_debug("get invalid attr type 0x%x\n", attrs[i].type); + return -EINVAL; + } + } + + svms = &p->svms; + + mutex_lock(&svms->lock); + + node = interval_tree_iter_first(&svms->objects, start, last); + if (!node) { + pr_debug("range attrs not found return default values\n"); + svm_range_set_default_attributes(&location, &prefetch_loc, + &granularity, &flags); + /* TODO: Automatically create SVM ranges and map them on + * GPU page faults + if (p->xnack_enabled) + bitmap_fill(bitmap_access, MAX_GPU_INSTANCE); + */ + + goto fill_values; + } + bitmap_fill(bitmap_access, MAX_GPU_INSTANCE); + bitmap_fill(bitmap_aip, MAX_GPU_INSTANCE); + + while (node) { + struct interval_tree_node *next; + + prange = container_of(node, struct svm_range, it_node); + next = interval_tree_iter_next(node, start, last); + + if (get_preferred_loc) { + if (prange->preferred_loc == + KFD_IOCTL_SVM_LOCATION_UNDEFINED || + (location != KFD_IOCTL_SVM_LOCATION_UNDEFINED && + location != prange->preferred_loc)) { + location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + get_preferred_loc = false; + } else { + location = prange->preferred_loc; + } + } + if (get_prefetch_loc) { + if (prange->prefetch_loc == + KFD_IOCTL_SVM_LOCATION_UNDEFINED || + (prefetch_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED && + prefetch_loc != prange->prefetch_loc)) { + prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + get_prefetch_loc = false; + } else { + prefetch_loc = prange->prefetch_loc; + } + } + if (get_accessible) { + bitmap_and(bitmap_access, bitmap_access, + prange->bitmap_access, MAX_GPU_INSTANCE); + bitmap_and(bitmap_aip, bitmap_aip, + prange->bitmap_aip, MAX_GPU_INSTANCE); + } + if (get_flags) + flags &= prange->flags; + + if (get_granularity && prange->granularity < granularity) + granularity = prange->granularity; + + node = next; + } +fill_values: + mutex_unlock(&svms->lock); + + for (i = 0; i < nattr; i++) { + switch (attrs[i].type) { + case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: + attrs[i].value = location; + break; + case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: + attrs[i].value = prefetch_loc; + break; + case KFD_IOCTL_SVM_ATTR_ACCESS: + gpuidx = kfd_process_gpuidx_from_gpuid(p, + attrs[i].value); + if (gpuidx < 0) { + pr_debug("invalid gpuid %x\n", attrs[i].value); + return -EINVAL; + } + if (test_bit(gpuidx, bitmap_access)) + attrs[i].type = KFD_IOCTL_SVM_ATTR_ACCESS; + else if (test_bit(gpuidx, bitmap_aip)) + attrs[i].type = + KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE; + else + attrs[i].type = KFD_IOCTL_SVM_ATTR_NO_ACCESS; + break; + case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + attrs[i].value = flags; + break; + case KFD_IOCTL_SVM_ATTR_GRANULARITY: + attrs[i].value = (uint32_t)granularity; + break; + } + } + + return 0; +} + int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs) @@ -719,6 +880,9 @@ svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, case KFD_IOCTL_SVM_OP_SET_ATTR: r = svm_range_set_attr(p, start, size, nattrs, attrs); break; + case KFD_IOCTL_SVM_OP_GET_ATTR: + r = svm_range_get_attr(p, start, size, nattrs, attrs); + break; default: r = EINVAL; break; -- cgit From 04d8d73dbcbe645a378fca6adc6f0e7111e46c17 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 24 Feb 2020 21:17:30 -0500 Subject: drm/amdgpu: add common HMM get pages function Move the HMM get pages function from amdgpu_ttm and to amdgpu_mn. This common function will be used by new svm APIs. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 83 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 7 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 76 ++++-------------------------- 3 files changed, 100 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 828b5167ff12..997da4237a10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -155,3 +155,86 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) mmu_interval_notifier_remove(&bo->notifier); bo->notifier.mm = NULL; } + +int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, + struct mm_struct *mm, struct page **pages, + uint64_t start, uint64_t npages, + struct hmm_range **phmm_range, bool readonly, + bool mmap_locked) +{ + struct hmm_range *hmm_range; + unsigned long timeout; + unsigned long i; + unsigned long *pfns; + int r = 0; + + hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL); + if (unlikely(!hmm_range)) + return -ENOMEM; + + pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); + if (unlikely(!pfns)) { + r = -ENOMEM; + goto out_free_range; + } + + hmm_range->notifier = notifier; + hmm_range->default_flags = HMM_PFN_REQ_FAULT; + if (!readonly) + hmm_range->default_flags |= HMM_PFN_REQ_WRITE; + hmm_range->hmm_pfns = pfns; + hmm_range->start = start; + hmm_range->end = start + npages * PAGE_SIZE; + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); + +retry: + hmm_range->notifier_seq = mmu_interval_read_begin(notifier); + + if (likely(!mmap_locked)) + mmap_read_lock(mm); + + r = hmm_range_fault(hmm_range); + + if (likely(!mmap_locked)) + mmap_read_unlock(mm); + if (unlikely(r)) { + /* + * FIXME: This timeout should encompass the retry from + * mmu_interval_read_retry() as well. + */ + if (r == -EBUSY && !time_after(jiffies, timeout)) + goto retry; + goto out_free_pfns; + } + + /* + * Due to default_flags, all pages are HMM_PFN_VALID or + * hmm_range_fault() fails. FIXME: The pages cannot be touched outside + * the notifier_lock, and mmu_interval_read_retry() must be done first. + */ + for (i = 0; pages && i < npages; i++) + pages[i] = hmm_pfn_to_page(pfns[i]); + + *phmm_range = hmm_range; + + return 0; + +out_free_pfns: + kvfree(pfns); +out_free_range: + kfree(hmm_range); + + return r; +} + +int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range) +{ + int r; + + r = mmu_interval_read_retry(hmm_range->notifier, + hmm_range->notifier_seq); + kvfree(hmm_range->hmm_pfns); + kfree(hmm_range); + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index a292238f75eb..7f7d37a457c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -30,6 +30,13 @@ #include #include +int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, + struct mm_struct *mm, struct page **pages, + uint64_t start, uint64_t npages, + struct hmm_range **phmm_range, bool readonly, + bool mmap_locked); +int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); + #if defined(CONFIG_HMM_MIRROR) int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1485f33c3cc7..b007e7d472b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -32,7 +32,6 @@ #include #include -#include #include #include #include @@ -663,10 +662,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned long start = gtt->userptr; struct vm_area_struct *vma; - struct hmm_range *range; - unsigned long timeout; struct mm_struct *mm; - unsigned long i; + bool readonly; int r = 0; mm = bo->notifier.mm; @@ -682,76 +679,26 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) if (!mmget_not_zero(mm)) /* Happens during process shutdown */ return -ESRCH; - range = kzalloc(sizeof(*range), GFP_KERNEL); - if (unlikely(!range)) { - r = -ENOMEM; - goto out; - } - range->notifier = &bo->notifier; - range->start = bo->notifier.interval_tree.start; - range->end = bo->notifier.interval_tree.last + 1; - range->default_flags = HMM_PFN_REQ_FAULT; - if (!amdgpu_ttm_tt_is_readonly(ttm)) - range->default_flags |= HMM_PFN_REQ_WRITE; - - range->hmm_pfns = kvmalloc_array(ttm->num_pages, - sizeof(*range->hmm_pfns), GFP_KERNEL); - if (unlikely(!range->hmm_pfns)) { - r = -ENOMEM; - goto out_free_ranges; - } - mmap_read_lock(mm); vma = find_vma(mm, start); + mmap_read_unlock(mm); if (unlikely(!vma || start < vma->vm_start)) { r = -EFAULT; - goto out_unlock; + goto out_putmm; } if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && vma->vm_file)) { r = -EPERM; - goto out_unlock; - } - mmap_read_unlock(mm); - timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - -retry: - range->notifier_seq = mmu_interval_read_begin(&bo->notifier); - - mmap_read_lock(mm); - r = hmm_range_fault(range); - mmap_read_unlock(mm); - if (unlikely(r)) { - /* - * FIXME: This timeout should encompass the retry from - * mmu_interval_read_retry() as well. - */ - if (r == -EBUSY && !time_after(jiffies, timeout)) - goto retry; - goto out_free_pfns; + goto out_putmm; } - /* - * Due to default_flags, all pages are HMM_PFN_VALID or - * hmm_range_fault() fails. FIXME: The pages cannot be touched outside - * the notifier_lock, and mmu_interval_read_retry() must be done first. - */ - for (i = 0; i < ttm->num_pages; i++) - pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); - - gtt->range = range; + readonly = amdgpu_ttm_tt_is_readonly(ttm); + r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, + ttm->num_pages, >t->range, readonly, + false); +out_putmm: mmput(mm); - return 0; - -out_unlock: - mmap_read_unlock(mm); -out_free_pfns: - kvfree(range->hmm_pfns); -out_free_ranges: - kfree(range); -out: - mmput(mm); return r; } @@ -780,10 +727,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) * FIXME: Must always hold notifier_lock for this, and must * not ignore the return code. */ - r = mmu_interval_read_retry(gtt->range->notifier, - gtt->range->notifier_seq); - kvfree(gtt->range->hmm_pfns); - kfree(gtt->range); + r = amdgpu_hmm_range_get_pages_done(gtt->range); gtt->range = NULL; } -- cgit From d8a3c1c80ceb656de00b45fe1595a040e5535202 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 30 Mar 2021 14:02:53 -0400 Subject: drm/amdkfd: support larger svm range allocation For larger range allocation, if hmm_range_fault return -EBUSY, set retry timeout based on 1 second for every 512MB, this is safe timeout value. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 997da4237a10..2741c28ff1b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -185,7 +185,10 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, hmm_range->hmm_pfns = pfns; hmm_range->start = start; hmm_range->end = start + npages * PAGE_SIZE; - timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); + + /* Assuming 512MB takes maxmium 1 second to fault page address */ + timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT; + timeout = jiffies + msecs_to_jiffies(timeout); retry: hmm_range->notifier_seq = mmu_interval_read_begin(notifier); -- cgit From b1c46c7d622bed62898579548a7e9a21e1320c9d Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Sat, 15 Feb 2020 15:02:49 -0500 Subject: drm/amdkfd: validate svm range system memory Use HMM to get system memory pages address, which will be used to map to GPUs or migrate to vram. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 116 ++++++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 18 ++++++ 2 files changed, 133 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index bab61dcb9187..2f92b92739bc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -29,6 +29,15 @@ #include "kfd_priv.h" #include "kfd_svm.h" +static bool +svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); + +static const struct mmu_interval_notifier_ops svm_range_mn_ops = { + .invalidate = svm_range_cpu_invalidate_pagetables, +}; + /** * svm_range_unlink - unlink svm_range from lists and interval tree * @prange: svm range structure to be removed @@ -47,6 +56,18 @@ static void svm_range_unlink(struct svm_range *prange) interval_tree_remove(&prange->it_node, &prange->svms->objects); } +static void +svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange) +{ + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange, prange->start, prange->last); + + mmu_interval_notifier_insert_locked(&prange->notifier, mm, + prange->start << PAGE_SHIFT, + prange->npages << PAGE_SHIFT, + &svm_range_mn_ops); +} + /** * svm_range_add_to_svms - add svm range to svms * @prange: svm range structure to be added @@ -66,11 +87,24 @@ static void svm_range_add_to_svms(struct svm_range *prange) interval_tree_insert(&prange->it_node, &prange->svms->objects); } +static void svm_range_remove_notifier(struct svm_range *prange) +{ + pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange, + prange->notifier.interval_tree.start >> PAGE_SHIFT, + prange->notifier.interval_tree.last >> PAGE_SHIFT); + + if (prange->notifier.interval_tree.start != 0 && + prange->notifier.interval_tree.last != 0) + mmu_interval_notifier_remove(&prange->notifier); +} + static void svm_range_free(struct svm_range *prange) { pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); + mutex_destroy(&prange->lock); kfree(prange); } @@ -103,6 +137,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->update_list); INIT_LIST_HEAD(&prange->remove_list); INIT_LIST_HEAD(&prange->insert_list); + mutex_init(&prange->lock); svm_range_set_default_attributes(&prange->preferred_loc, &prange->prefetch_loc, &prange->granularity, &prange->flags); @@ -376,6 +411,65 @@ svm_range_split_head(struct svm_range *prange, struct svm_range *new, return r; } +/* + * Validation+GPU mapping with concurrent invalidation (MMU notifiers) + * + * To prevent concurrent destruction or change of range attributes, the + * svm_read_lock must be held. The caller must not hold the svm_write_lock + * because that would block concurrent evictions and lead to deadlocks. To + * serialize concurrent migrations or validations of the same range, the + * prange->migrate_mutex must be held. + * + * For VRAM ranges, the SVM BO must be allocated and valid (protected by its + * eviction fence. + * + * The following sequence ensures race-free validation and GPU mapping: + * + * 1. Reserve page table (and SVM BO if range is in VRAM) + * 2. hmm_range_fault to get page addresses (if system memory) + * 3. DMA-map pages (if system memory) + * 4-a. Take notifier lock + * 4-b. Check that pages still valid (mmu_interval_read_retry) + * 4-c. Check that the range was not split or otherwise invalidated + * 4-d. Update GPU page table + * 4.e. Release notifier lock + * 5. Release page table (and SVM BO) reservation + */ +static int svm_range_validate_and_map(struct mm_struct *mm, + struct svm_range *prange, + uint32_t gpuidx, bool intr, bool wait) +{ + struct hmm_range *hmm_range; + int r = 0; + + if (!prange->actual_loc) { + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, + prange->start << PAGE_SHIFT, + prange->npages, &hmm_range, + false, true); + if (r) { + pr_debug("failed %d to get svm range pages\n", r); + goto unreserve_out; + } + } + + svm_range_lock(prange); + if (!prange->actual_loc) { + if (amdgpu_hmm_range_get_pages_done(hmm_range)) { + r = -EAGAIN; + goto unlock_out; + } + } + + /* TODO: map to GPU */ + +unlock_out: + svm_range_unlock(prange); +unreserve_out: + + return r; +} + static struct svm_range *svm_range_clone(struct svm_range *old) { struct svm_range *new; @@ -516,6 +610,18 @@ out: return r; } +/** + * svm_range_cpu_invalidate_pagetables - interval notifier callback + * + */ +static bool +svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + return true; +} + void svm_range_list_fini(struct kfd_process *p) { mutex_destroy(&p->svms.lock); @@ -669,6 +775,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, /* Apply changes as a transaction */ list_for_each_entry_safe(prange, next, &insert_list, insert_list) { svm_range_add_to_svms(prange); + svm_range_add_notifier_locked(mm, prange); } list_for_each_entry(prange, &update_list, update_list) { svm_range_apply_attrs(p, prange, nattr, attrs); @@ -680,6 +787,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, prange->svms, prange, prange->start, prange->last); svm_range_unlink(prange); + svm_range_remove_notifier(prange); svm_range_free(prange); } @@ -690,7 +798,13 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, * case because the rollback wouldn't be guaranteed to work either. */ list_for_each_entry(prange, &update_list, update_list) { - /* TODO */ + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, + true, true); + if (r) { + pr_debug("failed %d to map 0x%lx to gpus\n", r, + prange->start); + break; + } } svm_range_debug_dump(svms); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 537101dd19a6..10c73348c2c0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -46,11 +46,14 @@ * @remove_list:link list node used to add to remove list * @insert_list:link list node used to add to insert list * @npages: number of pages + * @lock: protect prange start, last, child_list, svm_bo_list + * @saved_flags:save/restore current PF_MEMALLOC flags * @flags: flags defined as KFD_IOCTL_SVM_FLAG_* * @perferred_loc: perferred location, 0 for CPU, or GPU id * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id * @actual_loc: the actual location, 0 for CPU, or GPU id * @granularity:migration granularity, log2 num pages + * @notifier: register mmu interval notifier * @bitmap_access: index bitmap of GPUs which can access the range * @bitmap_aip: index bitmap of GPUs which can access the range in place * @@ -68,15 +71,30 @@ struct svm_range { struct list_head remove_list; struct list_head insert_list; uint64_t npages; + struct mutex lock; + unsigned int saved_flags; uint32_t flags; uint32_t preferred_loc; uint32_t prefetch_loc; uint32_t actual_loc; uint8_t granularity; + struct mmu_interval_notifier notifier; DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); }; +static inline void svm_range_lock(struct svm_range *prange) +{ + mutex_lock(&prange->lock); + prange->saved_flags = memalloc_noreclaim_save(); + +} +static inline void svm_range_unlock(struct svm_range *prange) +{ + memalloc_noreclaim_restore(prange->saved_flags); + mutex_unlock(&prange->lock); +} + int svm_range_list_init(struct kfd_process *p); void svm_range_list_fini(struct kfd_process *p); int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, -- cgit From 4683cfecadeb383b03019ad11aeea1efac38c1ba Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 8 Apr 2020 11:09:45 -0400 Subject: drm/amdkfd: deregister svm range When application explicitly call unmap or unmap from mmput when application exit, driver will receive MMU_NOTIFY_UNMAP event to remove svm range from process svms object tree and list first, unmap from GPUs (in the following patch). Split the svm ranges to handle partial unmapping of svm ranges. To avoid deadlocks, updating MMU notifiers, range lists and interval trees is done in a deferred worker. New child ranges are attached to their parent range's child_list until the worker can update the svm_range_list. svm_range_set_attr flushes deferred work and takes the mmap_write_lock to guarantee that it has an up-to-date svm_range_list. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 291 +++++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 18 +++ 3 files changed, 311 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index cfdd34c2223c..6cb0c4168fa0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -735,6 +735,9 @@ struct svm_range_list { struct mutex lock; struct rb_root_cached objects; struct list_head list; + struct work_struct deferred_list_work; + struct list_head deferred_range_list; + spinlock_t deferred_list_lock; }; /* Process data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2f92b92739bc..72d54cf4acc8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -137,6 +137,8 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->update_list); INIT_LIST_HEAD(&prange->remove_list); INIT_LIST_HEAD(&prange->insert_list); + INIT_LIST_HEAD(&prange->deferred_list); + INIT_LIST_HEAD(&prange->child_list); mutex_init(&prange->lock); svm_range_set_default_attributes(&prange->preferred_loc, &prange->prefetch_loc, @@ -411,6 +413,18 @@ svm_range_split_head(struct svm_range *prange, struct svm_range *new, return r; } +static void +svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, + struct svm_range *pchild, enum svm_work_list_ops op) +{ + pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n", + pchild, pchild->start, pchild->last, prange, op); + + pchild->work_item.mm = mm; + pchild->work_item.op = op; + list_add_tail(&pchild->child_list, &prange->child_list); +} + /* * Validation+GPU mapping with concurrent invalidation (MMU notifiers) * @@ -470,6 +484,30 @@ unreserve_out: return r; } +/** + * svm_range_list_lock_and_flush_work - flush pending deferred work + * + * @svms: the svm range list + * @mm: the mm structure + * + * Context: Returns with mmap write lock held, pending deferred work flushed + * + */ +static void +svm_range_list_lock_and_flush_work(struct svm_range_list *svms, + struct mm_struct *mm) +{ +retry_flush_work: + flush_work(&svms->deferred_list_work); + mmap_write_lock(mm); + + if (list_empty(&svms->deferred_range_list)) + return; + mmap_write_unlock(mm); + pr_debug("retry flush\n"); + goto retry_flush_work; +} + static struct svm_range *svm_range_clone(struct svm_range *old) { struct svm_range *new; @@ -610,15 +648,260 @@ out: return r; } +static void +svm_range_update_notifier_and_interval_tree(struct mm_struct *mm, + struct svm_range *prange) +{ + unsigned long start; + unsigned long last; + + start = prange->notifier.interval_tree.start >> PAGE_SHIFT; + last = prange->notifier.interval_tree.last >> PAGE_SHIFT; + + if (prange->start == start && prange->last == last) + return; + + pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", + prange->svms, prange, start, last, prange->start, + prange->last); + + if (start != 0 && last != 0) { + interval_tree_remove(&prange->it_node, &prange->svms->objects); + svm_range_remove_notifier(prange); + } + prange->it_node.start = prange->start; + prange->it_node.last = prange->last; + + interval_tree_insert(&prange->it_node, &prange->svms->objects); + svm_range_add_notifier_locked(mm, prange); +} + +static void +svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) +{ + struct mm_struct *mm = prange->work_item.mm; + + switch (prange->work_item.op) { + case SVM_OP_NULL: + pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n", + svms, prange, prange->start, prange->last); + break; + case SVM_OP_UNMAP_RANGE: + pr_debug("remove 0x%p prange 0x%p [0x%lx 0x%lx]\n", + svms, prange, prange->start, prange->last); + svm_range_unlink(prange); + svm_range_remove_notifier(prange); + svm_range_free(prange); + break; + case SVM_OP_UPDATE_RANGE_NOTIFIER: + pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n", + svms, prange, prange->start, prange->last); + svm_range_update_notifier_and_interval_tree(mm, prange); + break; + case SVM_OP_ADD_RANGE: + pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, + prange->start, prange->last); + svm_range_add_to_svms(prange); + svm_range_add_notifier_locked(mm, prange); + break; + default: + WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange, + prange->work_item.op); + } +} + +static void svm_range_deferred_list_work(struct work_struct *work) +{ + struct svm_range_list *svms; + struct svm_range *prange; + struct mm_struct *mm; + + svms = container_of(work, struct svm_range_list, deferred_list_work); + pr_debug("enter svms 0x%p\n", svms); + + spin_lock(&svms->deferred_list_lock); + while (!list_empty(&svms->deferred_range_list)) { + prange = list_first_entry(&svms->deferred_range_list, + struct svm_range, deferred_list); + spin_unlock(&svms->deferred_list_lock); + pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange, + prange->start, prange->last, prange->work_item.op); + + mm = prange->work_item.mm; + mmap_write_lock(mm); + mutex_lock(&svms->lock); + + /* Remove from deferred_list must be inside mmap write lock, + * otherwise, svm_range_list_lock_and_flush_work may hold mmap + * write lock, and continue because deferred_list is empty, then + * deferred_list handle is blocked by mmap write lock. + */ + spin_lock(&svms->deferred_list_lock); + list_del_init(&prange->deferred_list); + spin_unlock(&svms->deferred_list_lock); + + while (!list_empty(&prange->child_list)) { + struct svm_range *pchild; + + pchild = list_first_entry(&prange->child_list, + struct svm_range, child_list); + pr_debug("child prange 0x%p op %d\n", pchild, + pchild->work_item.op); + list_del_init(&pchild->child_list); + svm_range_handle_list_op(svms, pchild); + } + + svm_range_handle_list_op(svms, prange); + mutex_unlock(&svms->lock); + mmap_write_unlock(mm); + + spin_lock(&svms->deferred_list_lock); + } + spin_unlock(&svms->deferred_list_lock); + + pr_debug("exit svms 0x%p\n", svms); +} + +static void +svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, + struct mm_struct *mm, enum svm_work_list_ops op) +{ + spin_lock(&svms->deferred_list_lock); + /* if prange is on the deferred list */ + if (!list_empty(&prange->deferred_list)) { + pr_debug("update exist prange 0x%p work op %d\n", prange, op); + WARN_ONCE(prange->work_item.mm != mm, "unmatch mm\n"); + if (op != SVM_OP_NULL && + prange->work_item.op != SVM_OP_UNMAP_RANGE) + prange->work_item.op = op; + } else { + prange->work_item.op = op; + prange->work_item.mm = mm; + list_add_tail(&prange->deferred_list, + &prange->svms->deferred_range_list); + pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", + prange, prange->start, prange->last, op); + } + spin_unlock(&svms->deferred_list_lock); +} + +static void schedule_deferred_list_work(struct svm_range_list *svms) +{ + spin_lock(&svms->deferred_list_lock); + if (!list_empty(&svms->deferred_range_list)) + schedule_work(&svms->deferred_list_work); + spin_unlock(&svms->deferred_list_lock); +} + +static void +svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, + struct svm_range *prange, unsigned long start, + unsigned long last) +{ + struct svm_range *head; + struct svm_range *tail; + + if (prange->work_item.op == SVM_OP_UNMAP_RANGE) { + pr_debug("prange 0x%p [0x%lx 0x%lx] is already freed\n", prange, + prange->start, prange->last); + return; + } + if (start > prange->last || last < prange->start) + return; + + head = tail = prange; + if (start > prange->start) + svm_range_split(prange, prange->start, start - 1, &tail); + if (last < tail->last) + svm_range_split(tail, last + 1, tail->last, &head); + + if (head != prange && tail != prange) { + svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); + } else if (tail != prange) { + svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE); + } else if (head != prange) { + svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); + } else if (parent != prange) { + prange->work_item.op = SVM_OP_UNMAP_RANGE; + } +} + +static void +svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, + unsigned long start, unsigned long last) +{ + struct svm_range_list *svms; + struct svm_range *pchild; + struct kfd_process *p; + bool unmap_parent; + + p = kfd_lookup_process_by_mm(mm); + if (!p) + return; + svms = &p->svms; + + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, + prange, prange->start, prange->last, start, last); + + unmap_parent = start <= prange->start && last >= prange->last; + + list_for_each_entry(pchild, &prange->child_list, child_list) + svm_range_unmap_split(mm, prange, pchild, start, last); + svm_range_unmap_split(mm, prange, prange, start, last); + + if (unmap_parent) + svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE); + else + svm_range_add_list_work(svms, prange, mm, + SVM_OP_UPDATE_RANGE_NOTIFIER); + schedule_deferred_list_work(svms); + + kfd_unref_process(p); +} + /** * svm_range_cpu_invalidate_pagetables - interval notifier callback * + * MMU range unmap notifier to remove svm ranges */ static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq) { + struct svm_range *prange; + unsigned long start; + unsigned long last; + + if (range->event == MMU_NOTIFY_RELEASE) + return true; + + start = mni->interval_tree.start; + last = mni->interval_tree.last; + start = (start > range->start ? start : range->start) >> PAGE_SHIFT; + last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT; + pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n", + start, last, range->start >> PAGE_SHIFT, + (range->end - 1) >> PAGE_SHIFT, + mni->interval_tree.start >> PAGE_SHIFT, + mni->interval_tree.last >> PAGE_SHIFT, range->event); + + prange = container_of(mni, struct svm_range, notifier); + + svm_range_lock(prange); + mmu_interval_set_seq(mni, cur_seq); + + switch (range->event) { + case MMU_NOTIFY_UNMAP: + svm_range_unmap_from_cpu(mni->mm, prange, start, last); + break; + default: + break; + } + + svm_range_unlock(prange); + return true; } @@ -627,6 +910,9 @@ void svm_range_list_fini(struct kfd_process *p) mutex_destroy(&p->svms.lock); pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms); + + /* Ensure list work is finished before process is destroyed */ + flush_work(&p->svms.deferred_list_work); } int svm_range_list_init(struct kfd_process *p) @@ -636,6 +922,9 @@ int svm_range_list_init(struct kfd_process *p) svms->objects = RB_ROOT_CACHED; mutex_init(&svms->lock); INIT_LIST_HEAD(&svms->list); + INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work); + INIT_LIST_HEAD(&svms->deferred_range_list); + spin_lock_init(&svms->deferred_list_lock); return 0; } @@ -753,7 +1042,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, mutex_lock(&process_info->lock); - mmap_write_lock(mm); + svm_range_list_lock_and_flush_work(svms, mm); if (!svm_range_is_valid(mm, start, size)) { pr_debug("invalid range\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 10c73348c2c0..173e93e138a9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -33,6 +33,18 @@ #include "amdgpu.h" #include "kfd_priv.h" +enum svm_work_list_ops { + SVM_OP_NULL, + SVM_OP_UNMAP_RANGE, + SVM_OP_UPDATE_RANGE_NOTIFIER, + SVM_OP_ADD_RANGE +}; + +struct svm_work_list_item { + enum svm_work_list_ops op; + struct mm_struct *mm; +}; + /** * struct svm_range - shared virtual memory range * @@ -54,6 +66,9 @@ * @actual_loc: the actual location, 0 for CPU, or GPU id * @granularity:migration granularity, log2 num pages * @notifier: register mmu interval notifier + * @work_item: deferred work item information + * @deferred_list: list header used to add range to deferred list + * @child_list: list header for split ranges which are not added to svms yet * @bitmap_access: index bitmap of GPUs which can access the range * @bitmap_aip: index bitmap of GPUs which can access the range in place * @@ -79,6 +94,9 @@ struct svm_range { uint32_t actual_loc; uint8_t granularity; struct mmu_interval_notifier notifier; + struct svm_work_list_item work_item; + struct list_head deferred_list; + struct list_head child_list; DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); }; -- cgit From d27afacfead5af7a38c1a65c93c07b7bca6f2420 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 23 Sep 2019 11:53:08 -0400 Subject: drm/amdgpu: export vm update mapping interface It will be used by kfd to map svm range to GPU, because svm range does not have amdgpu_bo and bo_va, cannot use amdgpu_bo_update interface, use amdgpu vm update interface directly. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 +++++++++++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0ffdf847cad0..2dd5b0e4baf9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1592,15 +1592,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * Returns: * 0 for success, -EINVAL for failure. */ -static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct drm_mm_node *nodes, - dma_addr_t *pages_addr, - struct dma_fence **fence) +int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev, + struct amdgpu_vm *vm, bool immediate, + bool unlocked, struct dma_resv *resv, + uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, + struct drm_mm_node *nodes, + dma_addr_t *pages_addr, + struct dma_fence **fence) { struct amdgpu_vm_update_params params; enum amdgpu_sync_mode sync_mode; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 976a12e5a8b9..848e175e99ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -366,6 +366,8 @@ struct amdgpu_vm_manager { spinlock_t pasid_lock; }; +struct amdgpu_bo_va_mapping; + #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) @@ -397,6 +399,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev, + struct amdgpu_vm *vm, bool immediate, + bool unlocked, struct dma_resv *resv, + uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, + struct drm_mm_node *nodes, + dma_addr_t *pages_addr, + struct dma_fence **fence); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); -- cgit From f80fe9d3c1149bb2ad0d7807aaaf5eff7c4e80a6 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 18:29:06 -0500 Subject: drm/amdkfd: map svm range to GPUs Use amdgpu_vm_bo_update_mapping to update GPU page table to map or unmap svm range system memory pages address to GPUs. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 - drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 473 ++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 + 4 files changed, 478 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 7c8c5e469707..670e9f8d58b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -234,6 +234,10 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s }) /* GPUVM API */ +#define drm_priv_to_vm(drm_priv) \ + (&((struct amdgpu_fpriv *) \ + ((struct drm_file *)(drm_priv))->driver_priv)->vm) + int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct file *filp, u32 pasid, void **process_info, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f96c331c9b6d..618614fe3eb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -948,13 +948,6 @@ static int process_update_pds(struct amdkfd_process_info *process_info, return 0; } -static struct amdgpu_vm *drm_priv_to_vm(struct drm_file *drm_priv) -{ - struct amdgpu_fpriv *fpriv = drm_priv->driver_priv; - - return &fpriv->vm; -} - static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, struct dma_fence **ef) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 72d54cf4acc8..3ccb75d45f13 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -99,11 +99,123 @@ static void svm_range_remove_notifier(struct svm_range *prange) mmu_interval_notifier_remove(&prange->notifier); } +static int +svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr, + unsigned long *hmm_pfns, uint64_t npages) +{ + enum dma_data_direction dir = DMA_BIDIRECTIONAL; + dma_addr_t *addr = *dma_addr; + struct page *page; + int i, r; + + if (!addr) { + addr = kvmalloc_array(npages, sizeof(*addr), + GFP_KERNEL | __GFP_ZERO); + if (!addr) + return -ENOMEM; + *dma_addr = addr; + } + + for (i = 0; i < npages; i++) { + if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]), + "leaking dma mapping\n")) + dma_unmap_page(dev, addr[i], PAGE_SIZE, dir); + + page = hmm_pfn_to_page(hmm_pfns[i]); + addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); + r = dma_mapping_error(dev, addr[i]); + if (r) { + pr_debug("failed %d dma_map_page\n", r); + return r; + } + pr_debug("dma mapping 0x%llx for page addr 0x%lx\n", + addr[i] >> PAGE_SHIFT, page_to_pfn(page)); + } + return 0; +} + +static int +svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, + unsigned long *hmm_pfns) +{ + struct kfd_process *p; + uint32_t gpuidx; + int r; + + p = container_of(prange->svms, struct kfd_process, svms); + + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + + pr_debug("mapping to gpu idx 0x%x\n", gpuidx); + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + return -EINVAL; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + + r = svm_range_dma_map_dev(adev->dev, &prange->dma_addr[gpuidx], + hmm_pfns, prange->npages); + if (r) + break; + } + + return r; +} + +static void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, + unsigned long offset, unsigned long npages) +{ + enum dma_data_direction dir = DMA_BIDIRECTIONAL; + int i; + + if (!dma_addr) + return; + + for (i = offset; i < offset + npages; i++) { + if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) + continue; + pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT); + dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); + dma_addr[i] = 0; + } +} + +static void svm_range_free_dma_mappings(struct svm_range *prange) +{ + struct kfd_process_device *pdd; + dma_addr_t *dma_addr; + struct device *dev; + struct kfd_process *p; + uint32_t gpuidx; + + p = container_of(prange->svms, struct kfd_process, svms); + + for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) { + dma_addr = prange->dma_addr[gpuidx]; + if (!dma_addr) + continue; + + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + continue; + } + dev = &pdd->dev->pdev->dev; + svm_range_dma_unmap(dev, dma_addr, 0, prange->npages); + kvfree(dma_addr); + prange->dma_addr[gpuidx] = NULL; + } +} + static void svm_range_free(struct svm_range *prange) { pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); + svm_range_free_dma_mappings(prange); mutex_destroy(&prange->lock); kfree(prange); } @@ -149,6 +261,15 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, return prange; } +static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo) +{ + struct ttm_operation_ctx ctx = { false, false }; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); + + return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); +} + static int svm_range_check_attr(struct kfd_process *p, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) @@ -291,6 +412,61 @@ svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new) old->granularity == new->granularity); } +static int +svm_range_split_array(void *ppnew, void *ppold, size_t size, + uint64_t old_start, uint64_t old_n, + uint64_t new_start, uint64_t new_n) +{ + unsigned char *new, *old, *pold; + uint64_t d; + + if (!ppold) + return 0; + pold = *(unsigned char **)ppold; + if (!pold) + return 0; + + new = kvmalloc_array(new_n, size, GFP_KERNEL); + if (!new) + return -ENOMEM; + + d = (new_start - old_start) * size; + memcpy(new, pold + d, new_n * size); + + old = kvmalloc_array(old_n, size, GFP_KERNEL); + if (!old) { + kvfree(new); + return -ENOMEM; + } + + d = (new_start == old_start) ? new_n * size : 0; + memcpy(old, pold + d, old_n * size); + + kvfree(pold); + *(void **)ppold = old; + *(void **)ppnew = new; + + return 0; +} + +static int +svm_range_split_pages(struct svm_range *new, struct svm_range *old, + uint64_t start, uint64_t last) +{ + uint64_t npages = last - start + 1; + int i, r; + + for (i = 0; i < MAX_GPU_INSTANCE; i++) { + r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i], + sizeof(*old->dma_addr[i]), old->start, + npages, new->start, new->npages); + if (r) + return r; + } + + return 0; +} + /** * svm_range_split_adjust - split range and adjust * @@ -299,7 +475,7 @@ svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new) * @start: the old range adjust to start address in pages * @last: the old range adjust to last address in pages * - * Copy attributes in old range to new + * Copy system memory dma_addr in old range to new * range from new_start up to size new->npages, the remaining old range is from * start to last * @@ -310,6 +486,8 @@ static int svm_range_split_adjust(struct svm_range *new, struct svm_range *old, uint64_t start, uint64_t last) { + int r; + pr_debug("svms 0x%p new 0x%lx old [0x%lx 0x%lx] => [0x%llx 0x%llx]\n", new->svms, new->start, old->start, old->last, start, last); @@ -319,6 +497,10 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old, return -EINVAL; } + r = svm_range_split_pages(new, old, start, last); + if (r) + return r; + old->npages = last - start + 1; old->start = start; old->last = last; @@ -425,6 +607,249 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, list_add_tail(&pchild->child_list, &prange->child_list); } +static uint64_t +svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) +{ + uint32_t flags = prange->flags; + uint32_t mapping_flags; + uint64_t pte_flags; + + pte_flags = AMDGPU_PTE_VALID; + pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED; + + mapping_flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE; + + if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO) + mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE; + if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + if (flags & KFD_IOCTL_SVM_FLAG_COHERENT) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + + /* TODO: add CHIP_ARCTURUS new flags for vram mapping */ + + pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags); + + /* Apply ASIC specific mapping flags */ + amdgpu_gmc_get_vm_pte(adev, &prange->mapping, &pte_flags); + + pr_debug("PTE flags 0x%llx\n", pte_flags); + + return pte_flags; +} + +static int +svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, + uint64_t start, uint64_t last, + struct dma_fence **fence) +{ + uint64_t init_pte_value = 0; + + pr_debug("[0x%llx 0x%llx]\n", start, last); + + return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL, + start, last, init_pte_value, 0, + NULL, NULL, fence); +} + +static int +svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, + unsigned long last) +{ + DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); + struct kfd_process_device *pdd; + struct dma_fence *fence = NULL; + struct amdgpu_device *adev; + struct kfd_process *p; + uint32_t gpuidx; + int r = 0; + + bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, + MAX_GPU_INSTANCE); + p = container_of(prange->svms, struct kfd_process, svms); + + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + pr_debug("unmap from gpu idx 0x%x\n", gpuidx); + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + return -EINVAL; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + + r = svm_range_unmap_from_gpu(adev, drm_priv_to_vm(pdd->drm_priv), + start, last, &fence); + if (r) + break; + + if (fence) { + r = dma_fence_wait(fence, false); + dma_fence_put(fence); + fence = NULL; + if (r) + break; + } + amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, + p->pasid); + } + + return r; +} + +static int +svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct svm_range *prange, dma_addr_t *dma_addr, + struct dma_fence **fence) +{ + uint64_t pte_flags; + int r = 0; + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, + prange->last); + + prange->mapping.start = prange->start; + prange->mapping.last = prange->last; + prange->mapping.offset = 0; + pte_flags = svm_range_get_pte_flags(adev, prange); + + r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, NULL, + prange->mapping.start, + prange->mapping.last, pte_flags, + prange->mapping.offset, NULL, + dma_addr, &vm->last_update); + if (r) { + pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start); + goto out; + } + + r = amdgpu_vm_update_pdes(adev, vm, false); + if (r) { + pr_debug("failed %d to update directories 0x%lx\n", r, + prange->start); + goto out; + } + + if (fence) + *fence = dma_fence_get(vm->last_update); + +out: + return r; +} + +static int svm_range_map_to_gpus(struct svm_range *prange, + unsigned long *bitmap, bool wait) +{ + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + struct kfd_process *p; + struct dma_fence *fence = NULL; + uint32_t gpuidx; + int r = 0; + + p = container_of(prange->svms, struct kfd_process, svms); + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + return -EINVAL; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + + pdd = kfd_bind_process_to_device(pdd->dev, p); + if (IS_ERR(pdd)) + return -EINVAL; + + r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv), + prange, prange->dma_addr[gpuidx], + wait ? &fence : NULL); + if (r) + break; + + if (fence) { + r = dma_fence_wait(fence, false); + dma_fence_put(fence); + fence = NULL; + if (r) { + pr_debug("failed %d to dma fence wait\n", r); + break; + } + } + } + + return r; +} + +struct svm_validate_context { + struct kfd_process *process; + struct svm_range *prange; + bool intr; + unsigned long bitmap[MAX_GPU_INSTANCE]; + struct ttm_validate_buffer tv[MAX_GPU_INSTANCE+1]; + struct list_head validate_list; + struct ww_acquire_ctx ticket; +}; + +static int svm_range_reserve_bos(struct svm_validate_context *ctx) +{ + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + struct amdgpu_vm *vm; + uint32_t gpuidx; + int r; + + INIT_LIST_HEAD(&ctx->validate_list); + for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) { + pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + return -EINVAL; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + vm = drm_priv_to_vm(pdd->drm_priv); + + ctx->tv[gpuidx].bo = &vm->root.base.bo->tbo; + ctx->tv[gpuidx].num_shared = 4; + list_add(&ctx->tv[gpuidx].head, &ctx->validate_list); + } + + r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list, + ctx->intr, NULL); + if (r) { + pr_debug("failed %d to reserve bo\n", r); + return r; + } + + for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) { + pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + r = -EINVAL; + goto unreserve_out; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + + r = amdgpu_vm_validate_pt_bos(adev, drm_priv_to_vm(pdd->drm_priv), + svm_range_bo_validate, NULL); + if (r) { + pr_debug("failed %d validate pt bos\n", r); + goto unreserve_out; + } + } + + return 0; + +unreserve_out: + ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list); + return r; +} + +static void svm_range_unreserve_bos(struct svm_validate_context *ctx) +{ + ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list); +} + /* * Validation+GPU mapping with concurrent invalidation (MMU notifiers) * @@ -453,9 +878,27 @@ static int svm_range_validate_and_map(struct mm_struct *mm, struct svm_range *prange, uint32_t gpuidx, bool intr, bool wait) { + struct svm_validate_context ctx; struct hmm_range *hmm_range; int r = 0; + ctx.process = container_of(prange->svms, struct kfd_process, svms); + ctx.prange = prange; + ctx.intr = intr; + + if (gpuidx < MAX_GPU_INSTANCE) { + bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE); + bitmap_set(ctx.bitmap, gpuidx, 1); + } else { + bitmap_or(ctx.bitmap, prange->bitmap_access, + prange->bitmap_aip, MAX_GPU_INSTANCE); + } + + if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) + return 0; + + svm_range_reserve_bos(&ctx); + if (!prange->actual_loc) { r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, prange->start << PAGE_SHIFT, @@ -465,6 +908,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm, pr_debug("failed %d to get svm range pages\n", r); goto unreserve_out; } + + r = svm_range_dma_map(prange, ctx.bitmap, + hmm_range->hmm_pfns); + if (r) { + pr_debug("failed %d to dma map range\n", r); + goto unreserve_out; + } } svm_range_lock(prange); @@ -474,12 +924,17 @@ static int svm_range_validate_and_map(struct mm_struct *mm, goto unlock_out; } } + if (!list_empty(&prange->child_list)) { + r = -EAGAIN; + goto unlock_out; + } - /* TODO: map to GPU */ + r = svm_range_map_to_gpus(prange, ctx.bitmap, wait); unlock_out: svm_range_unlock(prange); unreserve_out: + svm_range_unreserve_bos(&ctx); return r; } @@ -834,6 +1289,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, struct svm_range_list *svms; struct svm_range *pchild; struct kfd_process *p; + unsigned long s, l; bool unmap_parent; p = kfd_lookup_process_by_mm(mm); @@ -846,8 +1302,19 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, unmap_parent = start <= prange->start && last >= prange->last; - list_for_each_entry(pchild, &prange->child_list, child_list) + list_for_each_entry(pchild, &prange->child_list, child_list) { + mutex_lock_nested(&pchild->lock, 1); + s = max(start, pchild->start); + l = min(last, pchild->last); + if (l >= s) + svm_range_unmap_from_gpus(pchild, s, l); svm_range_unmap_split(mm, prange, pchild, start, last); + mutex_unlock(&pchild->lock); + } + s = max(start, prange->start); + l = min(last, prange->last); + if (l >= s) + svm_range_unmap_from_gpus(prange, s, l); svm_range_unmap_split(mm, prange, prange, start, last); if (unmap_parent) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 173e93e138a9..5949890bf48c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -57,7 +57,9 @@ struct svm_work_list_item { * @update_list:link list node used to add to update_list * @remove_list:link list node used to add to remove list * @insert_list:link list node used to add to insert list + * @mapping: bo_va mapping structure to create and update GPU page table * @npages: number of pages + * @dma_addr: dma mapping address on each GPU for system memory physical page * @lock: protect prange start, last, child_list, svm_bo_list * @saved_flags:save/restore current PF_MEMALLOC flags * @flags: flags defined as KFD_IOCTL_SVM_FLAG_* @@ -85,7 +87,9 @@ struct svm_range { struct list_head update_list; struct list_head remove_list; struct list_head insert_list; + struct amdgpu_bo_va_mapping mapping; uint64_t npages; + dma_addr_t *dma_addr[MAX_GPU_INSTANCE]; struct mutex lock; unsigned int saved_flags; uint32_t flags; -- cgit From 8a7c184a16d73ab25360e0f5997f82306ff5d5a2 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 18:47:52 -0500 Subject: drm/amdkfd: svm range eviction and restore HMM interval notifier callback notify CPU page table will be updated, stop process queues if the updated address belongs to svm range registered in process svms objects tree. Scheduled restore work to update GPU page table using new pages address in the updated svm range. The restore worker flushes any deferred work to make sure it restores an up-to-date svm_range_list. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 135 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 + 4 files changed, 140 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 6cb0c4168fa0..81f71c4079a6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -738,6 +738,8 @@ struct svm_range_list { struct work_struct deferred_list_work; struct list_head deferred_range_list; spinlock_t deferred_list_lock; + atomic_t evicted_ranges; + struct delayed_work restore_work; }; /* Process data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 5f1ec7553509..3c72e9dc6422 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1064,6 +1064,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); + cancel_delayed_work_sync(&p->svms.restore_work); mutex_lock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3ccb75d45f13..1fe6913242d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -22,6 +22,7 @@ */ #include +#include #include "amdgpu_sync.h" #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -29,6 +30,8 @@ #include "kfd_priv.h" #include "kfd_svm.h" +#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 + static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, @@ -251,6 +254,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->insert_list); INIT_LIST_HEAD(&prange->deferred_list); INIT_LIST_HEAD(&prange->child_list); + atomic_set(&prange->invalid, 0); mutex_init(&prange->lock); svm_range_set_default_attributes(&prange->preferred_loc, &prange->prefetch_loc, @@ -963,6 +967,129 @@ retry_flush_work: goto retry_flush_work; } +static void svm_range_restore_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct amdkfd_process_info *process_info; + struct svm_range_list *svms; + struct svm_range *prange; + struct kfd_process *p; + struct mm_struct *mm; + int evicted_ranges; + int invalid; + int r; + + svms = container_of(dwork, struct svm_range_list, restore_work); + evicted_ranges = atomic_read(&svms->evicted_ranges); + if (!evicted_ranges) + return; + + pr_debug("restore svm ranges\n"); + + /* kfd_process_notifier_release destroys this worker thread. So during + * the lifetime of this thread, kfd_process and mm will be valid. + */ + p = container_of(svms, struct kfd_process, svms); + process_info = p->kgd_process_info; + mm = p->mm; + if (!mm) + return; + + mutex_lock(&process_info->lock); + svm_range_list_lock_and_flush_work(svms, mm); + mutex_lock(&svms->lock); + + evicted_ranges = atomic_read(&svms->evicted_ranges); + + list_for_each_entry(prange, &svms->list, list) { + invalid = atomic_read(&prange->invalid); + if (!invalid) + continue; + + pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n", + prange->svms, prange, prange->start, prange->last, + invalid); + + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, + false, true); + if (r) { + pr_debug("failed %d to map 0x%lx to gpus\n", r, + prange->start); + goto unlock_out; + } + + if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid) + goto unlock_out; + } + + if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) != + evicted_ranges) + goto unlock_out; + + evicted_ranges = 0; + + r = kgd2kfd_resume_mm(mm); + if (r) { + /* No recovery from this failure. Probably the CP is + * hanging. No point trying again. + */ + pr_debug("failed %d to resume KFD\n", r); + } + + pr_debug("restore svm ranges successfully\n"); + +unlock_out: + mutex_unlock(&svms->lock); + mmap_write_unlock(mm); + mutex_unlock(&process_info->lock); + + /* If validation failed, reschedule another attempt */ + if (evicted_ranges) { + pr_debug("reschedule to restore svm range\n"); + schedule_delayed_work(&svms->restore_work, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + } +} + +/** + * svm_range_evict - evict svm range + * + * Stop all queues of the process to ensure GPU doesn't access the memory, then + * return to let CPU evict the buffer and proceed CPU pagetable update. + * + * Don't need use lock to sync cpu pagetable invalidation with GPU execution. + * If invalidation happens while restore work is running, restore work will + * restart to ensure to get the latest CPU pages mapping to GPU, then start + * the queues. + */ +static int +svm_range_evict(struct svm_range *prange, struct mm_struct *mm, + unsigned long start, unsigned long last) +{ + struct svm_range_list *svms = prange->svms; + int evicted_ranges; + int r = 0; + + atomic_inc(&prange->invalid); + evicted_ranges = atomic_inc_return(&svms->evicted_ranges); + if (evicted_ranges != 1) + return r; + + pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n", + prange->svms, prange->start, prange->last); + + /* First eviction, stop the queues */ + r = kgd2kfd_quiesce_mm(mm); + if (r) + pr_debug("failed to quiesce KFD\n"); + + pr_debug("schedule to restore svm %p ranges\n", svms); + schedule_delayed_work(&svms->restore_work, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + + return r; +} + static struct svm_range *svm_range_clone(struct svm_range *old) { struct svm_range *new; @@ -1331,6 +1458,11 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, * svm_range_cpu_invalidate_pagetables - interval notifier callback * * MMU range unmap notifier to remove svm ranges + * + * If GPU vm fault retry is not enabled, evict the svm range, then restore + * work will update GPU mapping. + * If GPU vm fault retry is enabled, unmap the svm range from GPU, vm fault + * will update GPU mapping. */ static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, @@ -1364,6 +1496,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, svm_range_unmap_from_cpu(mni->mm, prange, start, last); break; default: + svm_range_evict(prange, mni->mm, start, last); break; } @@ -1389,6 +1522,8 @@ int svm_range_list_init(struct kfd_process *p) svms->objects = RB_ROOT_CACHED; mutex_init(&svms->lock); INIT_LIST_HEAD(&svms->list); + atomic_set(&svms->evicted_ranges, 0); + INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work); INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work); INIT_LIST_HEAD(&svms->deferred_range_list); spin_lock_init(&svms->deferred_list_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 5949890bf48c..3c94899c5c40 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -67,6 +67,7 @@ struct svm_work_list_item { * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id * @actual_loc: the actual location, 0 for CPU, or GPU id * @granularity:migration granularity, log2 num pages + * @invalid: not 0 means cpu page table is invalidated * @notifier: register mmu interval notifier * @work_item: deferred work item information * @deferred_list: list header used to add range to deferred list @@ -97,6 +98,7 @@ struct svm_range { uint32_t prefetch_loc; uint32_t actual_loc; uint8_t granularity; + atomic_t invalid; struct mmu_interval_notifier notifier; struct svm_work_list_item work_item; struct list_head deferred_list; -- cgit From 9705c85ff2dc0e421255a1a68171c3b78116b313 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 11 Feb 2021 15:57:20 -0500 Subject: drm/amdgpu: Enable retry faults unconditionally on Aldebaran This is needed to allow per-process XNACK mode selection in the SQ when booting with XNACK off by default. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Tested-by: Alex Sierra Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 6 ++++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 922938931e1a..4f62c78485ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -640,7 +640,8 @@ module_param_named(mes, amdgpu_mes, int, 0444); /** * DOC: noretry (int) - * Disable retry faults in the GPU memory controller. + * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that + * do not support per-process XNACK this also disables retry page faults. * (0 = retry enabled, 1 = retry disabled, -1 auto (default)) */ MODULE_PARM_DESC(noretry, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 1e4678cb98f0..a03fdd41212b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -283,10 +283,14 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, block_size); - /* Send no-retry XNACK on fault to suppress VM fault storm. */ + /* Send no-retry XNACK on fault to suppress VM fault storm. + * On Aldebaran, XNACK can be enabled in the SQ per-process. + * Retry faults need to be enabled for that to work. + */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !adev->gmc.noretry); + !adev->gmc.noretry || + adev->asic_type == CHIP_ALDEBARAN); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 0103a5ab28e6..9aaa137662b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -296,10 +296,12 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, block_size); - /* Send no-retry XNACK on fault to suppress VM fault storm. */ + /* On Aldebaran, XNACK can be enabled in the SQ per-process. + * Retry faults need to be enabled for that to work. + */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !adev->gmc.noretry); + 1); WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, -- cgit From 063e33c5469cf5f46407701f1121a978a3af2ce8 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 28 May 2020 18:03:15 -0500 Subject: drm/amdkfd: add xnack enabled flag to kfd_process XNACK mode controls the SQ RETRY_DISABLE setting that determines, whether recoverable page faults can be supported on GFXv9 hardware. Only on Aldebaran we can support different processes running with different XNACK modes. On older chips all processes must use the same RETRY_DISABLE setting. However, processes not relying on recoverable page faults can work with RETRY enabled. This means XNACK off is always available as a fallback so we can use the same mode on all GPUs in a process. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager_v9.c | 13 +++++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 53 ++++++++++++++++++++++ 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index eca6331efa94..b5c3d13643f1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -61,10 +61,19 @@ static int update_qpd_v9(struct device_queue_manager *dqm, qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; - if (dqm->dev->noretry && - !dqm->dev->use_iommu_v2) + + if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) { + /* Aldebaran can safely support different XNACK modes + * per process + */ + if (!pdd->process->xnack_enabled) + qpd->sh_mem_config |= + 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; + } else if (dqm->dev->noretry && + !dqm->dev->use_iommu_v2) { qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; + } qpd->sh_mem_ape1_limit = 0; qpd->sh_mem_ape1_base = 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 81f71c4079a6..59423c886930 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -824,6 +824,8 @@ struct kfd_process { /* shared virtual memory registered by this process */ struct svm_range_list svms; bool svm_disabled; + + bool xnack_enabled; }; #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ @@ -883,6 +885,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); +bool kfd_process_xnack_mode(struct kfd_process *p, bool supported); + int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 3c72e9dc6422..b8db509e2bbd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1193,6 +1193,56 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd, } } +bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) +{ + int i; + + /* On most GFXv9 GPUs, the retry mode in the SQ must match the + * boot time retry setting. Mixing processes with different + * XNACK/retry settings can hang the GPU. + * + * Different GPUs can have different noretry settings depending + * on HW bugs or limitations. We need to find at least one + * XNACK mode for this process that's compatible with all GPUs. + * Fortunately GPUs with retry enabled (noretry=0) can run code + * built for XNACK-off. On GFXv9 it may perform slower. + * + * Therefore applications built for XNACK-off can always be + * supported and will be our fallback if any GPU does not + * support retry. + */ + for (i = 0; i < p->n_pdds; i++) { + struct kfd_dev *dev = p->pdds[i]->dev; + + /* Only consider GFXv9 and higher GPUs. Older GPUs don't + * support the SVM APIs and don't need to be considered + * for the XNACK mode selection. + */ + if (dev->device_info->asic_family < CHIP_VEGA10) + continue; + /* Aldebaran can always support XNACK because it can support + * per-process XNACK mode selection. But let the dev->noretry + * setting still influence the default XNACK mode. + */ + if (supported && + dev->device_info->asic_family == CHIP_ALDEBARAN) + continue; + + /* GFXv10 and later GPUs do not support shader preemption + * during page faults. This can lead to poor QoS for queue + * management and memory-manager-related preemptions or + * even deadlocks. + */ + if (dev->device_info->asic_family >= CHIP_NAVI10) + return false; + + if (dev->noretry) + return false; + } + + return true; +} + /* * On return the kfd_process is fully operational and will be freed when the * mm is released @@ -1232,6 +1282,9 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (err != 0) goto err_init_apertures; + /* Check XNACK support after PDDs are created in kfd_init_apertures */ + process->xnack_enabled = kfd_process_xnack_mode(process, false); + err = svm_range_list_init(process); if (err) goto err_init_svm_range_list; -- cgit From 0f7b5c44d4c53710993e4773bd6eaf171f1888e6 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Thu, 28 May 2020 18:27:05 -0500 Subject: drm/amdkfd: add ioctl to configure and query xnack retries Xnack retries are used for page fault recovery. Some AMD chip families support continuously retry while page table entries are invalid. The driver must handle the page fault interrupt and fill in a valid entry for the GPU to continue. This ioctl allows to enable/disable XNACK retries per KFD process. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 28 +++++++++++++++++++++ include/uapi/linux/kfd_ioctl.h | 43 +++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index c31ce8c88df4..913cc29d8857 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1745,6 +1745,31 @@ static int kfd_ioctl_smi_events(struct file *filep, return kfd_smi_event_open(dev, &args->anon_fd); } +static int kfd_ioctl_set_xnack_mode(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_set_xnack_mode_args *args = data; + int r = 0; + + mutex_lock(&p->mutex); + if (args->xnack_enabled >= 0) { + if (!list_empty(&p->pqm.queues)) { + pr_debug("Process has user queues running\n"); + mutex_unlock(&p->mutex); + return -EBUSY; + } + if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) + r = -EPERM; + else + p->xnack_enabled = args->xnack_enabled; + } else { + args->xnack_enabled = p->xnack_enabled; + } + mutex_unlock(&p->mutex); + + return r; +} + static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_svm_args *args = data; @@ -1871,6 +1896,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { kfd_ioctl_smi_events, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, + kfd_ioctl_set_xnack_mode, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 247b57baa94f..3cb5b5dd9f77 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -597,6 +597,44 @@ struct kfd_ioctl_svm_args { struct kfd_ioctl_svm_attribute attrs[0]; }; +/** + * kfd_ioctl_set_xnack_mode_args - Arguments for set_xnack_mode + * + * @xnack_enabled: [in/out] Whether to enable XNACK mode for this process + * + * @xnack_enabled indicates whether recoverable page faults should be + * enabled for the current process. 0 means disabled, positive means + * enabled, negative means leave unchanged. If enabled, virtual address + * translations on GFXv9 and later AMD GPUs can return XNACK and retry + * the access until a valid PTE is available. This is used to implement + * device page faults. + * + * On output, @xnack_enabled returns the (new) current mode (0 or + * positive). Therefore, a negative input value can be used to query + * the current mode without changing it. + * + * The XNACK mode fundamentally changes the way SVM managed memory works + * in the driver, with subtle effects on application performance and + * functionality. + * + * Enabling XNACK mode requires shader programs to be compiled + * differently. Furthermore, not all GPUs support changing the mode + * per-process. Therefore changing the mode is only allowed while no + * user mode queues exist in the process. This ensure that no shader + * code is running that may be compiled for the wrong mode. And GPUs + * that cannot change to the requested mode will prevent the XNACK + * mode from occurring. All GPUs used by the process must be in the + * same XNACK mode. + * + * GFXv8 or older GPUs do not support 48 bit virtual addresses or SVM. + * Therefore those GPUs are not considered for the XNACK mode switch. + * + * Return: 0 on success, -errno on failure + */ +struct kfd_ioctl_set_xnack_mode_args { + __s32 xnack_enabled; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -699,7 +737,10 @@ struct kfd_ioctl_svm_args { #define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args) +#define AMDKFD_IOC_SET_XNACK_MODE \ + AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x21 +#define AMDKFD_COMMAND_END 0x22 #endif -- cgit From 814ab9930cfd709768439799eae3c7ef0a658b54 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 7 Feb 2020 15:34:33 -0500 Subject: drm/amdkfd: register HMM device private zone Register vram memory as MEMORY_DEVICE_PRIVATE type resource, to allocate vram backing pages for page migration. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/Kconfig | 2 + drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 ++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 104 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 49 +++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 + 6 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index f02c938f75da..fb8d85716599 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -8,6 +8,8 @@ config HSA_AMD depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64) imply AMD_IOMMU_V2 if X86_64 select HMM_MIRROR + select ZONE_DEVICE + select DEVICE_PRIVATE select MMU_NOTIFIER select DRM_AMDGPU_USERPTR help diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 387ce0217d35..a93301dbc464 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -55,7 +55,8 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ $(AMDKFD_PATH)/kfd_smi_events.o \ $(AMDKFD_PATH)/kfd_crat.o \ - $(AMDKFD_PATH)/kfd_svm.o + $(AMDKFD_PATH)/kfd_svm.o \ + $(AMDKFD_PATH)/kfd_migrate.o ifneq ($(CONFIG_AMD_IOMMU_V2),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 357b9bf62a1c..b31bae91fbd0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -30,6 +30,7 @@ #include "kfd_iommu.h" #include "amdgpu_amdkfd.h" #include "kfd_smi_events.h" +#include "kfd_migrate.h" #define MQD_SIZE_ALIGNED 768 @@ -814,6 +815,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_cwsr_init(kfd); + svm_migrate_init((struct amdgpu_device *)kfd->kgd); + if (kfd_resume(kfd)) goto kfd_resume_error; @@ -862,6 +865,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) { if (kfd->init_complete) { kgd2kfd_suspend(kfd, false); + svm_migrate_fini((struct amdgpu_device *)kfd->kgd); device_queue_manager_uninit(kfd->dqm); kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c new file mode 100644 index 000000000000..b32d8fdfe8e9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include "amdgpu_sync.h" +#include "amdgpu_object.h" +#include "amdgpu_vm.h" +#include "amdgpu_mn.h" +#include "kfd_priv.h" +#include "kfd_svm.h" +#include "kfd_migrate.h" + +static void svm_migrate_page_free(struct page *page) +{ +} + +/** + * svm_migrate_to_ram - CPU page fault handler + * @vmf: CPU vm fault vma, address + * + * Context: vm fault handler, mm->mmap_sem is taken + * + * Return: + * 0 - OK + * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault + */ +static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static const struct dev_pagemap_ops svm_migrate_pgmap_ops = { + .page_free = svm_migrate_page_free, + .migrate_to_ram = svm_migrate_to_ram, +}; + +int svm_migrate_init(struct amdgpu_device *adev) +{ + struct kfd_dev *kfddev = adev->kfd.dev; + struct dev_pagemap *pgmap; + struct resource *res; + unsigned long size; + void *r; + + /* Page migration works on Vega10 or newer */ + if (kfddev->device_info->asic_family < CHIP_VEGA10) + return -EINVAL; + + pgmap = &kfddev->pgmap; + memset(pgmap, 0, sizeof(*pgmap)); + + /* TODO: register all vram to HMM for now. + * should remove reserved size + */ + size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20); + res = devm_request_free_mem_region(adev->dev, &iomem_resource, size); + if (IS_ERR(res)) + return -ENOMEM; + + pgmap->type = MEMORY_DEVICE_PRIVATE; + pgmap->nr_range = 1; + pgmap->range.start = res->start; + pgmap->range.end = res->end; + pgmap->ops = &svm_migrate_pgmap_ops; + pgmap->owner = adev; + pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + r = devm_memremap_pages(adev->dev, pgmap); + if (IS_ERR(r)) { + pr_err("failed to register HMM device memory\n"); + return PTR_ERR(r); + } + + pr_info("HMM registered %ldMB device memory\n", size >> 20); + + return 0; +} + +void svm_migrate_fini(struct amdgpu_device *adev) +{ + memunmap_pages(&adev->kfd.dev->pgmap); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h new file mode 100644 index 000000000000..89392548ec44 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_MIGRATE_H_ +#define KFD_MIGRATE_H_ + +#include +#include +#include +#include +#include +#include "kfd_priv.h" +#include "kfd_svm.h" + +#if defined(CONFIG_DEVICE_PRIVATE) +int svm_migrate_init(struct amdgpu_device *adev); +void svm_migrate_fini(struct amdgpu_device *adev); + +#else +static inline int svm_migrate_init(struct amdgpu_device *adev) +{ + DRM_WARN_ONCE("DEVICE_PRIVATE kernel config option is not enabled, " + "add CONFIG_DEVICE_PRIVATE=y in config file to fix\n"); + return -ENODEV; +} +static inline void svm_migrate_fini(struct amdgpu_device *adev) {} +#endif +#endif /* KFD_MIGRATE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 59423c886930..a58bea31e23c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -322,6 +322,9 @@ struct kfd_dev { unsigned int max_doorbell_slices; int noretry; + + /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ + struct dev_pagemap pgmap; }; enum kfd_mempool { -- cgit From c46ebb6a6d9d28fce66595b56db070b0cc4fab71 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 15 Apr 2021 17:43:32 -0400 Subject: drm/amdkfd: set memory limit to avoid OOM with HMM enabled HMM migration alloc sizeof(struct page) on system memory for each VRAM page, it is 1GB system memory reserved for 64GB VRAM. To avoid application OOM, increase system memory used size based on VRAM size of all GPUs, then application alloc memory will fail if system memory usage reach the limit. Signed-off-by: Philip Yang Reviewed-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 +++++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 8 ++++++++ 3 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 670e9f8d58b4..b3e5ecec316c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -275,6 +275,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); +void amdgpu_amdkfd_reserve_system_mem(uint64_t size); #else static inline void amdgpu_amdkfd_gpuvm_init_mem_limits(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 618614fe3eb4..d03088d1eb60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -108,6 +108,11 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) (kfd_mem_limit.max_ttm_mem_limit >> 20)); } +void amdgpu_amdkfd_reserve_system_mem(uint64_t size) +{ + kfd_mem_limit.system_mem_used += size; +} + /* Estimate page table size needed to represent a given memory size * * With 4KB pages, we need one 8 byte PTE for each 4KB of memory diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index b32d8fdfe8e9..d8cec5ebe1d4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -57,6 +57,9 @@ static const struct dev_pagemap_ops svm_migrate_pgmap_ops = { .migrate_to_ram = svm_migrate_to_ram, }; +/* Each VRAM page uses sizeof(struct page) on system memory */ +#define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page)) + int svm_migrate_init(struct amdgpu_device *adev) { struct kfd_dev *kfddev = adev->kfd.dev; @@ -93,6 +96,11 @@ int svm_migrate_init(struct amdgpu_device *adev) return PTR_ERR(r); } + pr_debug("reserve %ldMB system memory for VRAM pages struct\n", + SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20); + + amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size)); + pr_info("HMM registered %ldMB device memory\n", size >> 20); return 0; -- cgit From e49fe4040a10c1cd3b215c511f658d15aa7c8be9 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 19:56:09 -0500 Subject: drm/amdkfd: validate vram svm range from TTM If svm range perfetch location is not zero, use TTM to alloc amdgpu_bo vram nodes to validate svm range, then map vram nodes to GPUs. Use offset to sub allocate from the same amdgpu_bo to handle overlap vram range while adding new range or unmapping range. svm_bo has ref count to trace the shared ranges. If all ranges of shared amdgpu_bo are migrated to ram, ref count becomes 0, then amdgpu_bo is released, all ranges svm_bo is set to NULL. To migrate range from ram back to vram, allocate the same amdgpu_bo with previous offset if the range has svm_bo. If prange migrate to VRAM, no CPU mapping exist, then process exit will not have unmap callback for this prange to free prange and svm bo. Free outstanding pranges from svms list before process is freed in svm_range_list_fini. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 296 ++++++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 20 +++ 2 files changed, 309 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 1fe6913242d7..261160d3de9e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -45,7 +45,8 @@ static const struct mmu_interval_notifier_ops svm_range_mn_ops = { * svm_range_unlink - unlink svm_range from lists and interval tree * @prange: svm range structure to be removed * - * Remove the svm range from svms interval tree and link list + * Remove the svm_range from the svms and svm_bo lists and the svms + * interval tree. * * Context: The caller must hold svms->lock */ @@ -54,6 +55,12 @@ static void svm_range_unlink(struct svm_range *prange) pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); + if (prange->svm_bo) { + spin_lock(&prange->svm_bo->list_lock); + list_del(&prange->svm_bo_list); + spin_unlock(&prange->svm_bo->list_lock); + } + list_del(&prange->list); if (prange->it_node.start != 0 && prange->it_node.last != 0) interval_tree_remove(&prange->it_node, &prange->svms->objects); @@ -218,6 +225,7 @@ static void svm_range_free(struct svm_range *prange) pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); + svm_range_vram_node_free(prange); svm_range_free_dma_mappings(prange); mutex_destroy(&prange->lock); kfree(prange); @@ -252,6 +260,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->update_list); INIT_LIST_HEAD(&prange->remove_list); INIT_LIST_HEAD(&prange->insert_list); + INIT_LIST_HEAD(&prange->svm_bo_list); INIT_LIST_HEAD(&prange->deferred_list); INIT_LIST_HEAD(&prange->child_list); atomic_set(&prange->invalid, 0); @@ -265,6 +274,210 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, return prange; } +static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo) +{ + if (!svm_bo || !kref_get_unless_zero(&svm_bo->kref)) + return false; + + return true; +} + +static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo) +{ + if (svm_bo) + kref_get(&svm_bo->kref); + + return svm_bo; +} + +static void svm_range_bo_release(struct kref *kref) +{ + struct svm_range_bo *svm_bo; + + svm_bo = container_of(kref, struct svm_range_bo, kref); + spin_lock(&svm_bo->list_lock); + while (!list_empty(&svm_bo->range_list)) { + struct svm_range *prange = + list_first_entry(&svm_bo->range_list, + struct svm_range, svm_bo_list); + /* list_del_init tells a concurrent svm_range_vram_node_new when + * it's safe to reuse the svm_bo pointer and svm_bo_list head. + */ + list_del_init(&prange->svm_bo_list); + spin_unlock(&svm_bo->list_lock); + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange->start, prange->last); + mutex_lock(&prange->lock); + prange->svm_bo = NULL; + mutex_unlock(&prange->lock); + + spin_lock(&svm_bo->list_lock); + } + spin_unlock(&svm_bo->list_lock); + + amdgpu_bo_unref(&svm_bo->bo); + kfree(svm_bo); +} + +static void svm_range_bo_unref(struct svm_range_bo *svm_bo) +{ + if (!svm_bo) + return; + + kref_put(&svm_bo->kref, svm_range_bo_release); +} + +static struct svm_range_bo *svm_range_bo_new(void) +{ + struct svm_range_bo *svm_bo; + + svm_bo = kzalloc(sizeof(*svm_bo), GFP_KERNEL); + if (!svm_bo) + return NULL; + + kref_init(&svm_bo->kref); + INIT_LIST_HEAD(&svm_bo->range_list); + spin_lock_init(&svm_bo->list_lock); + + return svm_bo; +} + +int +svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, + bool clear) +{ + struct amdkfd_process_info *process_info; + struct amdgpu_bo_param bp; + struct svm_range_bo *svm_bo; + struct amdgpu_bo_user *ubo; + struct amdgpu_bo *bo; + struct kfd_process *p; + int r; + + pr_debug("[0x%lx 0x%lx]\n", prange->start, prange->last); + mutex_lock(&prange->lock); + if (prange->svm_bo) { + if (prange->ttm_res) { + /* We still have a reference, all is well */ + mutex_unlock(&prange->lock); + return 0; + } + if (svm_bo_ref_unless_zero(prange->svm_bo)) { + /* The BO was still around and we got + * a new reference to it + */ + mutex_unlock(&prange->lock); + pr_debug("reuse old bo [0x%lx 0x%lx]\n", + prange->start, prange->last); + + prange->ttm_res = &prange->svm_bo->bo->tbo.mem; + return 0; + } + + mutex_unlock(&prange->lock); + + /* We need a new svm_bo. Spin-loop to wait for concurrent + * svm_range_bo_release to finish removing this range from + * its range list. After this, it is safe to reuse the + * svm_bo pointer and svm_bo_list head. + */ + while (!list_empty_careful(&prange->svm_bo_list)) + ; + + } else { + mutex_unlock(&prange->lock); + } + + svm_bo = svm_range_bo_new(); + if (!svm_bo) { + pr_debug("failed to alloc svm bo\n"); + return -ENOMEM; + } + + memset(&bp, 0, sizeof(bp)); + bp.size = prange->npages * PAGE_SIZE; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0; + bp.type = ttm_bo_type_device; + bp.resv = NULL; + + r = amdgpu_bo_create_user(adev, &bp, &ubo); + if (r) { + pr_debug("failed %d to create bo\n", r); + kfree(svm_bo); + return r; + } + bo = &ubo->bo; + + p = container_of(prange->svms, struct kfd_process, svms); + r = amdgpu_bo_reserve(bo, true); + if (r) { + pr_debug("failed %d to reserve bo\n", r); + goto reserve_bo_failed; + } + + r = dma_resv_reserve_shared(bo->tbo.base.resv, 1); + if (r) { + pr_debug("failed %d to reserve bo\n", r); + amdgpu_bo_unreserve(bo); + goto reserve_bo_failed; + } + process_info = p->kgd_process_info; + amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); + + amdgpu_bo_unreserve(bo); + + svm_bo->bo = bo; + prange->svm_bo = svm_bo; + prange->ttm_res = &bo->tbo.mem; + prange->offset = 0; + + spin_lock(&svm_bo->list_lock); + list_add(&prange->svm_bo_list, &svm_bo->range_list); + spin_unlock(&svm_bo->list_lock); + + return 0; + +reserve_bo_failed: + kfree(svm_bo); + amdgpu_bo_unref(&bo); + prange->ttm_res = NULL; + + return r; +} + +void svm_range_vram_node_free(struct svm_range *prange) +{ + svm_range_bo_unref(prange->svm_bo); + prange->ttm_res = NULL; +} + +struct amdgpu_device * +svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id) +{ + struct kfd_process_device *pdd; + struct kfd_process *p; + int32_t gpu_idx; + + p = container_of(prange->svms, struct kfd_process, svms); + + gpu_idx = kfd_process_gpuidx_from_gpuid(p, gpu_id); + if (gpu_idx < 0) { + pr_debug("failed to get device by id 0x%x\n", gpu_id); + return NULL; + } + pdd = kfd_process_device_from_gpuidx(p, gpu_idx); + if (!pdd) { + pr_debug("failed to get device by idx 0x%x\n", gpu_idx); + return NULL; + } + + return (struct amdgpu_device *)pdd->dev->kgd; +} + static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo) { struct ttm_operation_ctx ctx = { false, false }; @@ -471,6 +684,32 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old, return 0; } +static int +svm_range_split_nodes(struct svm_range *new, struct svm_range *old, + uint64_t start, uint64_t last) +{ + uint64_t npages = last - start + 1; + + pr_debug("svms 0x%p new prange 0x%p start 0x%lx [0x%llx 0x%llx]\n", + new->svms, new, new->start, start, last); + + if (new->start == old->start) { + new->offset = old->offset; + old->offset += new->npages; + } else { + new->offset = old->offset + npages; + } + + new->svm_bo = svm_range_bo_ref(old->svm_bo); + new->ttm_res = old->ttm_res; + + spin_lock(&new->svm_bo->list_lock); + list_add(&new->svm_bo_list, &new->svm_bo->range_list); + spin_unlock(&new->svm_bo->list_lock); + + return 0; +} + /** * svm_range_split_adjust - split range and adjust * @@ -479,7 +718,7 @@ svm_range_split_pages(struct svm_range *new, struct svm_range *old, * @start: the old range adjust to start address in pages * @last: the old range adjust to last address in pages * - * Copy system memory dma_addr in old range to new + * Copy system memory dma_addr or vram ttm_res in old range to new * range from new_start up to size new->npages, the remaining old range is from * start to last * @@ -505,6 +744,12 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old, if (r) return r; + if (old->actual_loc && old->ttm_res) { + r = svm_range_split_nodes(new, old, start, last); + if (r) + return r; + } + old->npages = last - start + 1; old->start = start; old->last = last; @@ -619,7 +864,8 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) uint64_t pte_flags; pte_flags = AMDGPU_PTE_VALID; - pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED; + if (!prange->ttm_res) + pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED; mapping_flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE; @@ -639,7 +885,9 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) /* Apply ASIC specific mapping flags */ amdgpu_gmc_get_vm_pte(adev, &prange->mapping, &pte_flags); - pr_debug("PTE flags 0x%llx\n", pte_flags); + pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE flags 0x%llx\n", + prange->svms, prange->start, prange->last, + prange->ttm_res ? 1:0, pte_flags); return pte_flags; } @@ -715,13 +963,15 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, prange->mapping.start = prange->start; prange->mapping.last = prange->last; - prange->mapping.offset = 0; + prange->mapping.offset = prange->offset; pte_flags = svm_range_get_pte_flags(adev, prange); r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, NULL, prange->mapping.start, prange->mapping.last, pte_flags, - prange->mapping.offset, NULL, + prange->mapping.offset, + prange->ttm_res ? + prange->ttm_res->mm_node : NULL, dma_addr, &vm->last_update); if (r) { pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start); @@ -817,6 +1067,11 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx) ctx->tv[gpuidx].num_shared = 4; list_add(&ctx->tv[gpuidx].head, &ctx->validate_list); } + if (ctx->prange->svm_bo && ctx->prange->ttm_res) { + ctx->tv[MAX_GPU_INSTANCE].bo = &ctx->prange->svm_bo->bo->tbo; + ctx->tv[MAX_GPU_INSTANCE].num_shared = 1; + list_add(&ctx->tv[MAX_GPU_INSTANCE].head, &ctx->validate_list); + } r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list, ctx->intr, NULL); @@ -901,6 +1156,14 @@ static int svm_range_validate_and_map(struct mm_struct *mm, if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) return 0; + if (prange->actual_loc && !prange->ttm_res) { + /* This should never happen. actual_loc gets set by + * svm_migrate_ram_to_vram after allocating a BO. + */ + WARN(1, "VRAM BO missing during validation\n"); + return -EINVAL; + } + svm_range_reserve_bos(&ctx); if (!prange->actual_loc) { @@ -1098,6 +1361,14 @@ static struct svm_range *svm_range_clone(struct svm_range *old) if (!new) return NULL; + if (old->svm_bo) { + new->ttm_res = old->ttm_res; + new->offset = old->offset; + new->svm_bo = svm_range_bo_ref(old->svm_bo); + spin_lock(&new->svm_bo->list_lock); + list_add(&new->svm_bo_list, &new->svm_bo->range_list); + spin_unlock(&new->svm_bo->list_lock); + } new->flags = old->flags; new->preferred_loc = old->preferred_loc; new->prefetch_loc = old->prefetch_loc; @@ -1507,12 +1778,23 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, void svm_range_list_fini(struct kfd_process *p) { - mutex_destroy(&p->svms.lock); + struct svm_range *prange; + struct svm_range *next; pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms); /* Ensure list work is finished before process is destroyed */ flush_work(&p->svms.deferred_list_work); + + list_for_each_entry_safe(prange, next, &p->svms.list, list) { + svm_range_unlink(prange); + svm_range_remove_notifier(prange); + svm_range_free(prange); + } + + mutex_destroy(&p->svms.lock); + + pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, &p->svms); } int svm_range_list_init(struct kfd_process *p) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 3c94899c5c40..0aab88c71855 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -33,6 +33,13 @@ #include "amdgpu.h" #include "kfd_priv.h" +struct svm_range_bo { + struct amdgpu_bo *bo; + struct kref kref; + struct list_head range_list; /* all svm ranges shared this bo */ + spinlock_t list_lock; +}; + enum svm_work_list_ops { SVM_OP_NULL, SVM_OP_UNMAP_RANGE, @@ -60,6 +67,10 @@ struct svm_work_list_item { * @mapping: bo_va mapping structure to create and update GPU page table * @npages: number of pages * @dma_addr: dma mapping address on each GPU for system memory physical page + * @ttm_res: vram ttm resource map + * @offset: range start offset within mm_nodes + * @svm_bo: struct to manage splited amdgpu_bo + * @svm_bo_list:link list node, to scan all ranges which share same svm_bo * @lock: protect prange start, last, child_list, svm_bo_list * @saved_flags:save/restore current PF_MEMALLOC flags * @flags: flags defined as KFD_IOCTL_SVM_FLAG_* @@ -91,6 +102,10 @@ struct svm_range { struct amdgpu_bo_va_mapping mapping; uint64_t npages; dma_addr_t *dma_addr[MAX_GPU_INSTANCE]; + struct ttm_resource *ttm_res; + uint64_t offset; + struct svm_range_bo *svm_bo; + struct list_head svm_bo_list; struct mutex lock; unsigned int saved_flags; uint32_t flags; @@ -124,5 +139,10 @@ void svm_range_list_fini(struct kfd_process *p); int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs); +struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange, + uint32_t id); +int svm_range_vram_node_new(struct amdgpu_device *adev, + struct svm_range *prange, bool clear); +void svm_range_vram_node_free(struct svm_range *prange); #endif /* KFD_SVM_H_ */ -- cgit From b53fa124acdcec6d05bcdb36b55bf0f84471b1b7 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 23 Jun 2020 17:06:53 -0400 Subject: drm/amdkfd: support xgmi same hive mapping amdgpu_gmc_get_vm_pte use bo_va->is_xgmi same hive information to set pte flags to update GPU mapping. Add local structure variable bo_va, and update bo_va.is_xgmi, pass it to mapping->bo_va while mapping to GPU. Assuming xgmi pstate is hi after boot. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 94 ++++++++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 261160d3de9e..7a70f5e92f18 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -27,6 +27,8 @@ #include "amdgpu_object.h" #include "amdgpu_vm.h" #include "amdgpu_mn.h" +#include "amdgpu.h" +#include "amdgpu_xgmi.h" #include "kfd_priv.h" #include "kfd_svm.h" @@ -859,35 +861,70 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, static uint64_t svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) { + struct amdgpu_device *bo_adev; uint32_t flags = prange->flags; - uint32_t mapping_flags; + uint32_t mapping_flags = 0; uint64_t pte_flags; + bool snoop = !prange->ttm_res; + bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; + + if (prange->svm_bo && prange->ttm_res) + bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + if (prange->svm_bo && prange->ttm_res) { + if (bo_adev == adev) { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; + } else { + mapping_flags |= AMDGPU_VM_MTYPE_UC; + if (amdgpu_xgmi_same_hive(adev, bo_adev)) + snoop = true; + } + } else { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + break; + case CHIP_ALDEBARAN: + if (prange->svm_bo && prange->ttm_res) { + if (bo_adev == adev) { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; + if (adev->gmc.xgmi.connected_to_cpu) + snoop = true; + } else { + mapping_flags |= AMDGPU_VM_MTYPE_UC; + if (amdgpu_xgmi_same_hive(adev, bo_adev)) + snoop = true; + } + } else { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + break; + default: + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } - pte_flags = AMDGPU_PTE_VALID; - if (!prange->ttm_res) - pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED; - - mapping_flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE; + mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE; if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO) mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE; if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC) mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; - if (flags & KFD_IOCTL_SVM_FLAG_COHERENT) - mapping_flags |= AMDGPU_VM_MTYPE_UC; - else - mapping_flags |= AMDGPU_VM_MTYPE_NC; - /* TODO: add CHIP_ARCTURUS new flags for vram mapping */ + pte_flags = AMDGPU_PTE_VALID; + pte_flags |= prange->ttm_res ? 0 : AMDGPU_PTE_SYSTEM; + pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags); - /* Apply ASIC specific mapping flags */ - amdgpu_gmc_get_vm_pte(adev, &prange->mapping, &pte_flags); - - pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE flags 0x%llx\n", + pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n", prange->svms, prange->start, prange->last, - prange->ttm_res ? 1:0, pte_flags); + prange->ttm_res ? 1:0, pte_flags, mapping_flags); return pte_flags; } @@ -953,20 +990,26 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, static int svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct svm_range *prange, dma_addr_t *dma_addr, - struct dma_fence **fence) + struct amdgpu_device *bo_adev, struct dma_fence **fence) { + struct amdgpu_bo_va bo_va; uint64_t pte_flags; int r = 0; pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, prange->last); + if (prange->svm_bo && prange->ttm_res) { + bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); + prange->mapping.bo_va = &bo_va; + } + prange->mapping.start = prange->start; prange->mapping.last = prange->last; prange->mapping.offset = prange->offset; pte_flags = svm_range_get_pte_flags(adev, prange); - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, NULL, + r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL, prange->mapping.start, prange->mapping.last, pte_flags, prange->mapping.offset, @@ -989,6 +1032,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, *fence = dma_fence_get(vm->last_update); out: + prange->mapping.bo_va = NULL; return r; } @@ -996,12 +1040,18 @@ static int svm_range_map_to_gpus(struct svm_range *prange, unsigned long *bitmap, bool wait) { struct kfd_process_device *pdd; + struct amdgpu_device *bo_adev; struct amdgpu_device *adev; struct kfd_process *p; struct dma_fence *fence = NULL; uint32_t gpuidx; int r = 0; + if (prange->svm_bo && prange->ttm_res) + bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + else + bo_adev = NULL; + p = container_of(prange->svms, struct kfd_process, svms); for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { pdd = kfd_process_device_from_gpuidx(p, gpuidx); @@ -1015,9 +1065,15 @@ static int svm_range_map_to_gpus(struct svm_range *prange, if (IS_ERR(pdd)) return -EINVAL; + if (bo_adev && adev != bo_adev && + !amdgpu_xgmi_same_hive(adev, bo_adev)) { + pr_debug("cannot map to device idx %d\n", gpuidx); + continue; + } + r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv), prange, prange->dma_addr[gpuidx], - wait ? &fence : NULL); + bo_adev, wait ? &fence : NULL); if (r) break; -- cgit From 50ea50cf6f6d31d3235ad1853c5dbea766a3ed11 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 7 Feb 2020 17:08:04 -0500 Subject: drm/amdkfd: copy memory through gart table Use sdma linear copy to migrate data between ram and vram. The sdma linear copy command uses kernel buffer function queue to access system memory through gart table. Use reserved gart table window 0 to map system page address, and vram page address is direct mapping. Use the same kernel buffer function to fill in gart table mapping, so this is serialized with memory copy by sdma job submit. We only need wait for the last memory copy sdma fence for larger buffer migration. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 172 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 5 + 2 files changed, 177 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index d8cec5ebe1d4..74b38856cce3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -33,6 +33,178 @@ #include "kfd_svm.h" #include "kfd_migrate.h" +static uint64_t +svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr) +{ + return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM); +} + +static int +svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, + dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + unsigned int num_dw, num_bytes; + struct dma_fence *fence; + uint64_t src_addr, dst_addr; + uint64_t pte_flags; + void *cpu_addr; + int r; + + /* use gart window 0 */ + *gart_addr = adev->gmc.gart_start; + + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); + num_bytes = npages * 8; + + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, + AMDGPU_IB_POOL_DELAYED, &job); + if (r) + return r; + + src_addr = num_dw * 4; + src_addr += job->ibs[0].gpu_addr; + + dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, + dst_addr, num_bytes, false); + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + + pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE; + pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED; + if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO)) + pte_flags |= AMDGPU_PTE_WRITEABLE; + pte_flags |= adev->gart.gart_pte_flags; + + cpu_addr = &job->ibs[0].ptr[num_dw]; + + r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr); + if (r) + goto error_free; + + r = amdgpu_job_submit(job, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + dma_fence_put(fence); + + return r; + +error_free: + amdgpu_job_free(job); + return r; +} + +/** + * svm_migrate_copy_memory_gart - sdma copy data between ram and vram + * + * @adev: amdgpu device the sdma ring running + * @src: source page address array + * @dst: destination page address array + * @npages: number of pages to copy + * @direction: enum MIGRATION_COPY_DIR + * @mfence: output, sdma fence to signal after sdma is done + * + * ram address uses GART table continuous entries mapping to ram pages, + * vram address uses direct mapping of vram pages, which must have npages + * number of continuous pages. + * GART update and sdma uses same buf copy function ring, sdma is splited to + * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for + * the last sdma finish fence which is returned to check copy memory is done. + * + * Context: Process context, takes and releases gtt_window_lock + * + * Return: + * 0 - OK, otherwise error code + */ + +static int +svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, + uint64_t *vram, uint64_t npages, + enum MIGRATION_COPY_DIR direction, + struct dma_fence **mfence) +{ + const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE; + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + uint64_t gart_s, gart_d; + struct dma_fence *next; + uint64_t size; + int r; + + mutex_lock(&adev->mman.gtt_window_lock); + + while (npages) { + size = min(GTT_MAX_PAGES, npages); + + if (direction == FROM_VRAM_TO_RAM) { + gart_s = svm_migrate_direct_mapping_addr(adev, *vram); + r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0); + + } else if (direction == FROM_RAM_TO_VRAM) { + r = svm_migrate_gart_map(ring, size, sys, &gart_s, + KFD_IOCTL_SVM_FLAG_GPU_RO); + gart_d = svm_migrate_direct_mapping_addr(adev, *vram); + } + if (r) { + pr_debug("failed %d to create gart mapping\n", r); + goto out_unlock; + } + + r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE, + NULL, &next, false, true, false); + if (r) { + pr_debug("failed %d to copy memory\n", r); + goto out_unlock; + } + + dma_fence_put(*mfence); + *mfence = next; + npages -= size; + if (npages) { + sys += size; + vram += size; + } + } + +out_unlock: + mutex_unlock(&adev->mman.gtt_window_lock); + + return r; +} + +/** + * svm_migrate_copy_done - wait for memory copy sdma is done + * + * @adev: amdgpu device the sdma memory copy is executing on + * @mfence: migrate fence + * + * Wait for dma fence is signaled, if the copy ssplit into multiple sdma + * operations, this is the last sdma operation fence. + * + * Context: called after svm_migrate_copy_memory + * + * Return: + * 0 - success + * otherwise - error code from dma fence signal + */ +int +svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence) +{ + int r = 0; + + if (mfence) { + r = dma_fence_wait(mfence, false); + dma_fence_put(mfence); + pr_debug("sdma copy memory fence done\n"); + } + + return r; +} + static void svm_migrate_page_free(struct page *page) { } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 89392548ec44..df84e4143e25 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -33,6 +33,11 @@ #include "kfd_priv.h" #include "kfd_svm.h" +enum MIGRATION_COPY_DIR { + FROM_RAM_TO_VRAM = 0, + FROM_VRAM_TO_RAM +}; + #if defined(CONFIG_DEVICE_PRIVATE) int svm_migrate_init(struct amdgpu_device *adev); void svm_migrate_fini(struct amdgpu_device *adev); -- cgit From 0b0e518d61af8e1cb73cbbfb313b215640c8a6f3 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 20:40:20 -0500 Subject: drm/amdkfd: HMM migrate ram to vram Register svm range with same address and size but perferred_location is changed from CPU to GPU or from GPU to CPU, trigger migration the svm range from ram to vram or from vram to ram. If svm range prefetch location is GPU with flags KFD_IOCTL_SVM_FLAG_HOST_ACCESS, validate the svm range on ram first, then migrate it from ram to vram. After migrating to vram is done, CPU access will have cpu page fault, page fault handler migrate it back to ram and resume cpu access. Migration steps: 1. migrate_vma_pages get svm range ram pages, notify the interval is invalidated and unmap from CPU page table, HMM interval notifier callback evict process queues 2. Allocate new pages in vram using TTM 3. Use svm copy memory to sdma copy data from ram to vram 4. migrate_vma_pages copy ram pages structure to vram pages structure 5. migrate_vma_finalize put ram pages to free ram pages and memory 6. Restore work wait for migration is finished, then update GPUs page table mapping to new vram pages, resume process queues If migrate_vma_setup failed to collect all ram pages of range, retry 3 times until success to start migration. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 305 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 2 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 201 ++++++++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 7 + 4 files changed, 502 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 74b38856cce3..7b025c169935 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -205,6 +205,311 @@ svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence) return r; } +static uint64_t +svm_migrate_node_physical_addr(struct amdgpu_device *adev, + struct drm_mm_node **mm_node, uint64_t *offset) +{ + struct drm_mm_node *node = *mm_node; + uint64_t pos = *offset; + + if (node->start == AMDGPU_BO_INVALID_OFFSET) { + pr_debug("drm node is not validated\n"); + return 0; + } + + pr_debug("vram node start 0x%llx npages 0x%llx\n", node->start, + node->size); + + if (pos >= node->size) { + do { + pos -= node->size; + node++; + } while (pos >= node->size); + + *mm_node = node; + *offset = pos; + } + + return (node->start + pos) << PAGE_SHIFT; +} + +unsigned long +svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr) +{ + return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT; +} + +static void +svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn) +{ + struct page *page; + + page = pfn_to_page(pfn); + page->zone_device_data = prange; + get_page(page); + lock_page(page); +} + +static void +svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr) +{ + struct page *page; + + page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr)); + unlock_page(page); + put_page(page); +} + + +static int +svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, + struct migrate_vma *migrate, struct dma_fence **mfence, + dma_addr_t *scratch) +{ + uint64_t npages = migrate->cpages; + struct device *dev = adev->dev; + struct drm_mm_node *node; + dma_addr_t *src; + uint64_t *dst; + uint64_t vram_addr; + uint64_t offset; + uint64_t i, j; + int r = -ENOMEM; + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, + prange->last); + + src = scratch; + dst = (uint64_t *)(scratch + npages); + + r = svm_range_vram_node_new(adev, prange, true); + if (r) { + pr_debug("failed %d get 0x%llx pages from vram\n", r, npages); + goto out; + } + + node = prange->ttm_res->mm_node; + offset = prange->offset; + vram_addr = svm_migrate_node_physical_addr(adev, &node, &offset); + if (!vram_addr) { + WARN_ONCE(1, "vram node address is 0\n"); + r = -ENOMEM; + goto out; + } + + for (i = j = 0; i < npages; i++) { + struct page *spage; + + dst[i] = vram_addr + (j << PAGE_SHIFT); + migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); + svm_migrate_get_vram_page(prange, migrate->dst[i]); + + migrate->dst[i] = migrate_pfn(migrate->dst[i]); + migrate->dst[i] |= MIGRATE_PFN_LOCKED; + + if (migrate->src[i] & MIGRATE_PFN_VALID) { + spage = migrate_pfn_to_page(migrate->src[i]); + src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, + DMA_TO_DEVICE); + r = dma_mapping_error(dev, src[i]); + if (r) { + pr_debug("failed %d dma_map_page\n", r); + goto out_free_vram_pages; + } + } else { + if (j) { + r = svm_migrate_copy_memory_gart( + adev, src + i - j, + dst + i - j, j, + FROM_RAM_TO_VRAM, + mfence); + if (r) + goto out_free_vram_pages; + offset += j; + vram_addr = (node->start + offset) << PAGE_SHIFT; + j = 0; + } else { + offset++; + vram_addr += PAGE_SIZE; + } + if (offset >= node->size) { + node++; + pr_debug("next node size 0x%llx\n", node->size); + vram_addr = node->start << PAGE_SHIFT; + offset = 0; + } + continue; + } + + pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n", + src[i] >> PAGE_SHIFT, page_to_pfn(spage)); + + if (j + offset >= node->size - 1 && i < npages - 1) { + r = svm_migrate_copy_memory_gart(adev, src + i - j, + dst + i - j, j + 1, + FROM_RAM_TO_VRAM, + mfence); + if (r) + goto out_free_vram_pages; + + node++; + pr_debug("next node size 0x%llx\n", node->size); + vram_addr = node->start << PAGE_SHIFT; + offset = 0; + j = 0; + } else { + j++; + } + } + + r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j, + FROM_RAM_TO_VRAM, mfence); + +out_free_vram_pages: + if (r) { + pr_debug("failed %d to copy memory to vram\n", r); + while (i--) { + svm_migrate_put_vram_page(adev, dst[i]); + migrate->dst[i] = 0; + } + } + +out: + return r; +} + +static int +svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, + struct vm_area_struct *vma, uint64_t start, + uint64_t end) +{ + uint64_t npages = (end - start) >> PAGE_SHIFT; + struct dma_fence *mfence = NULL; + struct migrate_vma migrate; + dma_addr_t *scratch; + size_t size; + void *buf; + int r = -ENOMEM; + int retry = 0; + + memset(&migrate, 0, sizeof(migrate)); + migrate.vma = vma; + migrate.start = start; + migrate.end = end; + migrate.flags = MIGRATE_VMA_SELECT_SYSTEM; + migrate.pgmap_owner = adev; + + size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); + size *= npages; + buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); + if (!buf) + goto out; + + migrate.src = buf; + migrate.dst = migrate.src + npages; + scratch = (dma_addr_t *)(migrate.dst + npages); + +retry: + r = migrate_vma_setup(&migrate); + if (r) { + pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", + r, prange->svms, prange->start, prange->last); + goto out_free; + } + if (migrate.cpages != npages) { + pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages, + npages); + migrate_vma_finalize(&migrate); + if (retry++ >= 3) { + r = -ENOMEM; + pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n", + r, prange->svms, prange->start, prange->last); + goto out_free; + } + + goto retry; + } + + if (migrate.cpages) { + svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, + scratch); + migrate_vma_pages(&migrate); + svm_migrate_copy_done(adev, mfence); + migrate_vma_finalize(&migrate); + } + + svm_range_dma_unmap(adev->dev, scratch, 0, npages); + svm_range_free_dma_mappings(prange); + +out_free: + kvfree(buf); +out: + return r; +} + +/** + * svm_migrate_ram_to_vram - migrate svm range from system to device + * @prange: range structure + * @best_loc: the device to migrate to + * + * Context: Process context, caller hold mmap read lock, svms lock, prange lock + * + * Return: + * 0 - OK, otherwise error code + */ +int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) +{ + unsigned long addr, start, end; + struct vm_area_struct *vma; + struct amdgpu_device *adev; + struct mm_struct *mm; + int r = 0; + + if (prange->actual_loc == best_loc) { + pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", + prange->svms, prange->start, prange->last, best_loc); + return 0; + } + + adev = svm_range_get_adev_by_id(prange, best_loc); + if (!adev) { + pr_debug("failed to get device by id 0x%x\n", best_loc); + return -ENODEV; + } + + pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, + prange->start, prange->last, best_loc); + + /* FIXME: workaround for page locking bug with invalid pages */ + svm_range_prefault(prange, mm); + + start = prange->start << PAGE_SHIFT; + end = (prange->last + 1) << PAGE_SHIFT; + + mm = current->mm; + + for (addr = start; addr < end;) { + unsigned long next; + + vma = find_vma(mm, addr); + if (!vma || addr < vma->vm_start) + break; + + next = min(vma->vm_end, end); + r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next); + if (r) { + pr_debug("failed to migrate\n"); + break; + } + addr = next; + } + + if (!r) + prange->actual_loc = best_loc; + + return r; +} + static void svm_migrate_page_free(struct page *page) { } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index df84e4143e25..d9cee0f6285a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -38,6 +38,8 @@ enum MIGRATION_COPY_DIR { FROM_VRAM_TO_RAM }; +int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc); + #if defined(CONFIG_DEVICE_PRIVATE) int svm_migrate_init(struct amdgpu_device *adev); void svm_migrate_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 7a70f5e92f18..c49fb8513b2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -31,6 +31,7 @@ #include "amdgpu_xgmi.h" #include "kfd_priv.h" #include "kfd_svm.h" +#include "kfd_migrate.h" #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 @@ -177,8 +178,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, return r; } -static void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, - unsigned long offset, unsigned long npages) +void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, + unsigned long offset, unsigned long npages) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; int i; @@ -195,7 +196,7 @@ static void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, } } -static void svm_range_free_dma_mappings(struct svm_range *prange) +void svm_range_free_dma_mappings(struct svm_range *prange) { struct kfd_process_device *pdd; dma_addr_t *dma_addr; @@ -230,6 +231,7 @@ static void svm_range_free(struct svm_range *prange) svm_range_vram_node_free(prange); svm_range_free_dma_mappings(prange); mutex_destroy(&prange->lock); + mutex_destroy(&prange->migrate_mutex); kfree(prange); } @@ -266,6 +268,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->deferred_list); INIT_LIST_HEAD(&prange->child_list); atomic_set(&prange->invalid, 0); + mutex_init(&prange->migrate_mutex); mutex_init(&prange->lock); svm_range_set_default_attributes(&prange->preferred_loc, &prange->prefetch_loc, @@ -1238,6 +1241,8 @@ static int svm_range_validate_and_map(struct mm_struct *mm, pr_debug("failed %d to dma map range\n", r); goto unreserve_out; } + + prange->validated_once = true; } svm_range_lock(prange); @@ -1329,21 +1334,28 @@ static void svm_range_restore_work(struct work_struct *work) prange->svms, prange, prange->start, prange->last, invalid); + /* + * If range is migrating, wait for migration is done. + */ + mutex_lock(&prange->migrate_mutex); + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, false, true); - if (r) { + if (r) pr_debug("failed %d to map 0x%lx to gpus\n", r, prange->start); - goto unlock_out; - } + + mutex_unlock(&prange->migrate_mutex); + if (r) + goto out_reschedule; if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid) - goto unlock_out; + goto out_reschedule; } if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) != evicted_ranges) - goto unlock_out; + goto out_reschedule; evicted_ranges = 0; @@ -1357,7 +1369,7 @@ static void svm_range_restore_work(struct work_struct *work) pr_debug("restore svm ranges successfully\n"); -unlock_out: +out_reschedule: mutex_unlock(&svms->lock); mmap_write_unlock(mm); mutex_unlock(&process_info->lock); @@ -1649,6 +1661,7 @@ static void svm_range_deferred_list_work(struct work_struct *work) list_del_init(&prange->deferred_list); spin_unlock(&svms->deferred_list_lock); + mutex_lock(&prange->migrate_mutex); while (!list_empty(&prange->child_list)) { struct svm_range *pchild; @@ -1659,6 +1672,7 @@ static void svm_range_deferred_list_work(struct work_struct *work) list_del_init(&pchild->child_list); svm_range_handle_list_op(svms, pchild); } + mutex_unlock(&prange->migrate_mutex); svm_range_handle_list_op(svms, prange); mutex_unlock(&svms->lock); @@ -1957,6 +1971,151 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, return 0; } +/* svm_range_best_location - decide the best actual location + * @prange: svm range structure + * + * For xnack off: + * If range map to single GPU, the best acutal location is prefetch loc, which + * can be CPU or GPU. + * + * If range map to multiple GPUs, only if mGPU connection on xgmi same hive, + * the best actual location could be prefetch_loc GPU. If mGPU connection on + * PCIe, the best actual location is always CPU, because GPU cannot access vram + * of other GPUs, assuming PCIe small bar (large bar support is not upstream). + * + * For xnack on: + * The best actual location is prefetch location. If mGPU connection on xgmi + * same hive, range map to multiple GPUs. Otherwise, the range only map to + * actual location GPU. Other GPU access vm fault will trigger migration. + * + * Context: Process context + * + * Return: + * 0 for CPU or GPU id + */ +static uint32_t svm_range_best_location(struct svm_range *prange) +{ + DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); + uint32_t best_loc = prange->prefetch_loc; + struct kfd_process_device *pdd; + struct amdgpu_device *bo_adev; + struct amdgpu_device *adev; + struct kfd_process *p; + uint32_t gpuidx; + + p = container_of(prange->svms, struct kfd_process, svms); + + /* xnack on */ + if (p->xnack_enabled) + goto out; + + /* xnack off */ + if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) + goto out; + + bo_adev = svm_range_get_adev_by_id(prange, best_loc); + bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, + MAX_GPU_INSTANCE); + + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { + pr_debug("failed to get device by idx 0x%x\n", gpuidx); + continue; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + + if (adev == bo_adev) + continue; + + if (!amdgpu_xgmi_same_hive(adev, bo_adev)) { + best_loc = 0; + break; + } + } + +out: + pr_debug("xnack %d svms 0x%p [0x%lx 0x%lx] best loc 0x%x\n", + p->xnack_enabled, &p->svms, prange->start, prange->last, + best_loc); + + return best_loc; +} + +/* FIXME: This is a workaround for page locking bug when some pages are + * invalid during migration to VRAM + */ +void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm) +{ + struct hmm_range *hmm_range; + int r; + + if (prange->validated_once) + return; + + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, + prange->start << PAGE_SHIFT, + prange->npages, &hmm_range, + false, true); + if (!r) { + amdgpu_hmm_range_get_pages_done(hmm_range); + prange->validated_once = true; + } +} + +/* svm_range_trigger_migration - start page migration if prefetch loc changed + * @mm: current process mm_struct + * @prange: svm range structure + * @migrated: output, true if migration is triggered + * + * If range perfetch_loc is GPU, actual loc is cpu 0, then migrate the range + * from ram to vram. + * If range prefetch_loc is cpu 0, actual loc is GPU, then migrate the range + * from vram to ram. + * + * If GPU vm fault retry is not enabled, migration interact with MMU notifier + * and restore work: + * 1. migrate_vma_setup invalidate pages, MMU notifier callback svm_range_evict + * stops all queues, schedule restore work + * 2. svm_range_restore_work wait for migration is done by + * a. svm_range_validate_vram takes prange->migrate_mutex + * b. svm_range_validate_ram HMM get pages wait for CPU fault handle returns + * 3. restore work update mappings of GPU, resume all queues. + * + * Context: Process context + * + * Return: + * 0 - OK, otherwise - error code of migration + */ +static int +svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, + bool *migrated) +{ + uint32_t best_loc; + int r = 0; + + *migrated = false; + best_loc = svm_range_best_location(prange); + + if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || + best_loc == prange->actual_loc) + return 0; + + if (best_loc && !prange->actual_loc && + !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) + return 0; + + if (best_loc) { + pr_debug("migrate from ram to vram\n"); + r = svm_migrate_ram_to_vram(prange, best_loc); + + if (!r) + *migrated = true; + } + + return r; +} + static int svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) @@ -2027,13 +2186,29 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, * case because the rollback wouldn't be guaranteed to work either. */ list_for_each_entry(prange, &update_list, update_list) { + bool migrated; + + mutex_lock(&prange->migrate_mutex); + + r = svm_range_trigger_migration(mm, prange, &migrated); + if (r) + goto out_unlock_range; + + if (migrated) { + pr_debug("restore_work will update mappings of GPUs\n"); + mutex_unlock(&prange->migrate_mutex); + continue; + } + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, true, true); - if (r) { - pr_debug("failed %d to map 0x%lx to gpus\n", r, - prange->start); + if (r) + pr_debug("failed %d to map svm range\n", r); + +out_unlock_range: + mutex_unlock(&prange->migrate_mutex); + if (r) break; - } } svm_range_debug_dump(svms); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 0aab88c71855..34214a44b099 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -56,6 +56,7 @@ struct svm_work_list_item { * struct svm_range - shared virtual memory range * * @svms: list of svm ranges, structure defined in kfd_process + * @migrate_mutex: to serialize range migration, validation and mapping update * @start: range start address in pages * @last: range last address in pages * @it_node: node [start, last] stored in interval tree, start, last are page @@ -92,6 +93,7 @@ struct svm_work_list_item { */ struct svm_range { struct svm_range_list *svms; + struct mutex migrate_mutex; unsigned long start; unsigned long last; struct interval_tree_node it_node; @@ -120,6 +122,7 @@ struct svm_range { struct list_head child_list; DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); + bool validated_once; }; static inline void svm_range_lock(struct svm_range *prange) @@ -144,5 +147,9 @@ struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange, int svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, bool clear); void svm_range_vram_node_free(struct svm_range *prange); +void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, + unsigned long offset, unsigned long npages); +void svm_range_free_dma_mappings(struct svm_range *prange); +void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm); #endif /* KFD_SVM_H_ */ -- cgit From 48ff079b28d82dbce000cc45c0fd35b6ae9ffbda Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 17 Mar 2021 00:24:12 -0400 Subject: drm/amdkfd: HMM migrate vram to ram If CPU page fault happens, HMM pgmap_ops callback migrate_to_ram start migrate memory from vram to ram in steps: 1. migrate_vma_pages get vram pages, and notify HMM to invalidate the pages, HMM interval notifier callback evict process queues 2. Allocate system memory pages 3. Use svm copy memory to migrate data from vram to ram 4. migrate_vma_pages copy pages structure from vram pages to ram pages 5. Return VM_FAULT_SIGBUS if migration failed, to notify application 6. migrate_vma_finalize put vram pages, page_free callback free vram pages and vram nodes 7. Restore work wait for migration is finished, then update GPU page table mapping to system memory, and resume process queues Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 302 ++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 127 ++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 10 + 4 files changed, 429 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 7b025c169935..73c10dad0489 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -191,7 +191,7 @@ out_unlock: * 0 - success * otherwise - error code from dma fence signal */ -int +static int svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence) { int r = 0; @@ -260,6 +260,35 @@ svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr) put_page(page); } +static unsigned long +svm_migrate_addr(struct amdgpu_device *adev, struct page *page) +{ + unsigned long addr; + + addr = page_to_pfn(page) << PAGE_SHIFT; + return (addr - adev->kfd.dev->pgmap.range.start); +} + +static struct page * +svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr) +{ + struct page *page; + + page = alloc_page_vma(GFP_HIGHUSER, vma, addr); + if (page) + lock_page(page); + + return page; +} + +void svm_migrate_put_sys_page(unsigned long addr) +{ + struct page *page; + + page = pfn_to_page(addr >> PAGE_SHIFT); + unlock_page(page); + put_page(page); +} static int svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, @@ -512,13 +541,213 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) static void svm_migrate_page_free(struct page *page) { + /* Keep this function to avoid warning */ +} + +static int +svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, + struct migrate_vma *migrate, struct dma_fence **mfence, + dma_addr_t *scratch) +{ + uint64_t npages = migrate->cpages; + struct device *dev = adev->dev; + uint64_t *src; + dma_addr_t *dst; + struct page *dpage; + uint64_t i = 0, j; + uint64_t addr; + int r = 0; + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, + prange->last); + + addr = prange->start << PAGE_SHIFT; + + src = (uint64_t *)(scratch + npages); + dst = scratch; + + for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) { + struct page *spage; + + spage = migrate_pfn_to_page(migrate->src[i]); + if (!spage) { + pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange->start, prange->last); + r = -ENOMEM; + goto out_oom; + } + src[i] = svm_migrate_addr(adev, spage); + if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) { + r = svm_migrate_copy_memory_gart(adev, dst + i - j, + src + i - j, j, + FROM_VRAM_TO_RAM, + mfence); + if (r) + goto out_oom; + j = 0; + } + + dpage = svm_migrate_get_sys_page(migrate->vma, addr); + if (!dpage) { + pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange->start, prange->last); + r = -ENOMEM; + goto out_oom; + } + + dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); + r = dma_mapping_error(dev, dst[i]); + if (r) { + pr_debug("failed %d dma_map_page\n", r); + goto out_oom; + } + + pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n", + dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); + + migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); + migrate->dst[i] |= MIGRATE_PFN_LOCKED; + } + + r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j, + FROM_VRAM_TO_RAM, mfence); + +out_oom: + if (r) { + pr_debug("failed %d copy to ram\n", r); + while (i--) { + svm_migrate_put_sys_page(dst[i]); + migrate->dst[i] = 0; + } + } + + return r; +} + +static int +svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, + struct vm_area_struct *vma, uint64_t start, uint64_t end) +{ + uint64_t npages = (end - start) >> PAGE_SHIFT; + struct dma_fence *mfence = NULL; + struct migrate_vma migrate; + dma_addr_t *scratch; + size_t size; + void *buf; + int r = -ENOMEM; + + memset(&migrate, 0, sizeof(migrate)); + migrate.vma = vma; + migrate.start = start; + migrate.end = end; + migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + migrate.pgmap_owner = adev; + + size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); + size *= npages; + buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); + if (!buf) + goto out; + + migrate.src = buf; + migrate.dst = migrate.src + npages; + scratch = (dma_addr_t *)(migrate.dst + npages); + + r = migrate_vma_setup(&migrate); + if (r) { + pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", + r, prange->svms, prange->start, prange->last); + goto out_free; + } + + pr_debug("cpages %ld\n", migrate.cpages); + + if (migrate.cpages) { + svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, + scratch); + migrate_vma_pages(&migrate); + svm_migrate_copy_done(adev, mfence); + migrate_vma_finalize(&migrate); + } else { + pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n", + prange->start, prange->last); + } + + svm_range_dma_unmap(adev->dev, scratch, 0, npages); + +out_free: + kvfree(buf); +out: + return r; +} + +/** + * svm_migrate_vram_to_ram - migrate svm range from device to system + * @prange: range structure + * @mm: process mm, use current->mm if NULL + * + * Context: Process context, caller hold mmap read lock, svms lock, prange lock + * + * Return: + * 0 - OK, otherwise error code + */ +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) +{ + struct amdgpu_device *adev; + struct vm_area_struct *vma; + unsigned long addr; + unsigned long start; + unsigned long end; + int r = 0; + + if (!prange->actual_loc) { + pr_debug("[0x%lx 0x%lx] already migrated to ram\n", + prange->start, prange->last); + return 0; + } + + adev = svm_range_get_adev_by_id(prange, prange->actual_loc); + if (!adev) { + pr_debug("failed to get device by id 0x%x\n", + prange->actual_loc); + return -ENODEV; + } + + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n", + prange->svms, prange, prange->start, prange->last, + prange->actual_loc); + + start = prange->start << PAGE_SHIFT; + end = (prange->last + 1) << PAGE_SHIFT; + + for (addr = start; addr < end;) { + unsigned long next; + + vma = find_vma(mm, addr); + if (!vma || addr < vma->vm_start) + break; + + next = min(vma->vm_end, end); + r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next); + if (r) { + pr_debug("failed %d to migrate\n", r); + break; + } + addr = next; + } + + if (!r) { + svm_range_vram_node_free(prange); + prange->actual_loc = 0; + } + return r; } /** * svm_migrate_to_ram - CPU page fault handler * @vmf: CPU vm fault vma, address * - * Context: vm fault handler, mm->mmap_sem is taken + * Context: vm fault handler, caller holds the mmap read lock * * Return: * 0 - OK @@ -526,7 +755,74 @@ static void svm_migrate_page_free(struct page *page) */ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) { - return VM_FAULT_SIGBUS; + unsigned long addr = vmf->address; + struct vm_area_struct *vma; + enum svm_work_list_ops op; + struct svm_range *parent; + struct svm_range *prange; + struct kfd_process *p; + struct mm_struct *mm; + int r = 0; + + vma = vmf->vma; + mm = vma->vm_mm; + + p = kfd_lookup_process_by_mm(vma->vm_mm); + if (!p) { + pr_debug("failed find process at fault address 0x%lx\n", addr); + return VM_FAULT_SIGBUS; + } + addr >>= PAGE_SHIFT; + pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr); + + mutex_lock(&p->svms.lock); + + prange = svm_range_from_addr(&p->svms, addr, &parent); + if (!prange) { + pr_debug("cannot find svm range at 0x%lx\n", addr); + r = -EFAULT; + goto out; + } + + mutex_lock(&parent->migrate_mutex); + if (prange != parent) + mutex_lock_nested(&prange->migrate_mutex, 1); + + if (!prange->actual_loc) + goto out_unlock_prange; + + svm_range_lock(parent); + if (prange != parent) + mutex_lock_nested(&prange->lock, 1); + r = svm_range_split_by_granularity(p, mm, addr, parent, prange); + if (prange != parent) + mutex_unlock(&prange->lock); + svm_range_unlock(parent); + if (r) { + pr_debug("failed %d to split range by granularity\n", r); + goto out_unlock_prange; + } + + r = svm_migrate_vram_to_ram(prange, mm); + if (r) + pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, + prange, prange->start, prange->last); + + op = SVM_OP_UPDATE_RANGE_NOTIFIER; + svm_range_add_list_work(&p->svms, parent, mm, op); + schedule_deferred_list_work(&p->svms); + +out_unlock_prange: + if (prange != parent) + mutex_unlock(&prange->migrate_mutex); + mutex_unlock(&parent->migrate_mutex); +out: + mutex_unlock(&p->svms.lock); + kfd_unref_process(p); + + pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); + + return r ? VM_FAULT_SIGBUS : 0; } static const struct dev_pagemap_ops svm_migrate_pgmap_ops = { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index d9cee0f6285a..082b9bb22270 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -39,6 +39,9 @@ enum MIGRATION_COPY_DIR { }; int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc); +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); +unsigned long +svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); #if defined(CONFIG_DEVICE_PRIVATE) int svm_migrate_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c49fb8513b2b..6fcfb9fa1b37 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -861,6 +861,60 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, list_add_tail(&pchild->child_list, &prange->child_list); } +/** + * svm_range_split_by_granularity - collect ranges within granularity boundary + * + * @p: the process with svms list + * @mm: mm structure + * @addr: the vm fault address in pages, to split the prange + * @parent: parent range if prange is from child list + * @prange: prange to split + * + * Trims @prange to be a single aligned block of prange->granularity if + * possible. The head and tail are added to the child_list in @parent. + * + * Context: caller must hold mmap_read_lock and prange->lock + * + * Return: + * 0 - OK, otherwise error code + */ +int +svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, + unsigned long addr, struct svm_range *parent, + struct svm_range *prange) +{ + struct svm_range *head, *tail; + unsigned long start, last, size; + int r; + + /* Align splited range start and size to granularity size, then a single + * PTE will be used for whole range, this reduces the number of PTE + * updated and the L1 TLB space used for translation. + */ + size = 1UL << prange->granularity; + start = ALIGN_DOWN(addr, size); + last = ALIGN(addr + 1, size) - 1; + + pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n", + prange->svms, prange->start, prange->last, start, last, size); + + if (start > prange->start) { + r = svm_range_split(prange, start, prange->last, &head); + if (r) + return r; + svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE); + } + + if (last < prange->last) { + r = svm_range_split(prange, prange->start, last, &tail); + if (r) + return r; + svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); + } + + return 0; +} + static uint64_t svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) { @@ -1685,7 +1739,7 @@ static void svm_range_deferred_list_work(struct work_struct *work) pr_debug("exit svms 0x%p\n", svms); } -static void +void svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, struct mm_struct *mm, enum svm_work_list_ops op) { @@ -1708,7 +1762,7 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, spin_unlock(&svms->deferred_list_lock); } -static void schedule_deferred_list_work(struct svm_range_list *svms) +void schedule_deferred_list_work(struct svm_range_list *svms) { spin_lock(&svms->deferred_list_lock); if (!list_empty(&svms->deferred_range_list)) @@ -1798,12 +1852,19 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, /** * svm_range_cpu_invalidate_pagetables - interval notifier callback * - * MMU range unmap notifier to remove svm ranges + * If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it + * is from migration, or CPU page invalidation callback. + * + * For unmap event, unmap range from GPUs, remove prange from svms in a delayed + * work thread, and split prange if only part of prange is unmapped. + * + * For invalidation event, if GPU retry fault is not enabled, evict the queues, + * then schedule svm_range_restore_work to update GPU mapping and resume queues. + * If GPU retry fault is enabled, unmap the svm range from GPU, retry fault will + * update GPU mapping to recover. * - * If GPU vm fault retry is not enabled, evict the svm range, then restore - * work will update GPU mapping. - * If GPU vm fault retry is enabled, unmap the svm range from GPU, vm fault - * will update GPU mapping. + * Context: mmap lock, notifier_invalidate_start lock are held + * for invalidate event, prange lock is held if this is from migration */ static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, @@ -1846,6 +1907,49 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, return true; } +/** + * svm_range_from_addr - find svm range from fault address + * @svms: svm range list header + * @addr: address to search range interval tree, in pages + * @parent: parent range if range is on child list + * + * Context: The caller must hold svms->lock + * + * Return: the svm_range found or NULL + */ +struct svm_range * +svm_range_from_addr(struct svm_range_list *svms, unsigned long addr, + struct svm_range **parent) +{ + struct interval_tree_node *node; + struct svm_range *prange; + struct svm_range *pchild; + + node = interval_tree_iter_first(&svms->objects, addr, addr); + if (!node) + return NULL; + + prange = container_of(node, struct svm_range, it_node); + pr_debug("address 0x%lx prange [0x%lx 0x%lx] node [0x%lx 0x%lx]\n", + addr, prange->start, prange->last, node->start, node->last); + + if (addr >= prange->start && addr <= prange->last) { + if (parent) + *parent = prange; + return prange; + } + list_for_each_entry(pchild, &prange->child_list, child_list) + if (addr >= pchild->start && addr <= pchild->last) { + pr_debug("found address 0x%lx pchild [0x%lx 0x%lx]\n", + addr, pchild->start, pchild->last); + if (parent) + *parent = prange; + return pchild; + } + + return NULL; +} + void svm_range_list_fini(struct kfd_process *p) { struct svm_range *prange; @@ -2108,11 +2212,14 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, if (best_loc) { pr_debug("migrate from ram to vram\n"); r = svm_migrate_ram_to_vram(prange, best_loc); - - if (!r) - *migrated = true; + } else { + pr_debug("migrate from vram to ram\n"); + r = svm_migrate_vram_to_ram(prange, current->mm); } + if (!r) + *migrated = true; + return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 34214a44b099..37cfa1689c4f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -142,11 +142,21 @@ void svm_range_list_fini(struct kfd_process *p); int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs); +struct svm_range *svm_range_from_addr(struct svm_range_list *svms, + unsigned long addr, + struct svm_range **parent); struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange, uint32_t id); int svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, bool clear); void svm_range_vram_node_free(struct svm_range *prange); +int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, + unsigned long addr, struct svm_range *parent, + struct svm_range *prange); +void svm_range_add_list_work(struct svm_range_list *svms, + struct svm_range *prange, struct mm_struct *mm, + enum svm_work_list_ops op); +void schedule_deferred_list_work(struct svm_range_list *svms); void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, unsigned long offset, unsigned long npages); void svm_range_free_dma_mappings(struct svm_range *prange); -- cgit From 90d7d3eda5796fa48048f1114b0aa090c5465d17 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 22:34:11 -0500 Subject: drm/amdkfd: invalidate tables on page retry fault GPU page tables are invalidated by unmapping prange directly at the mmu notifier, when page fault retry is enabled through amdgpu_noretry global parameter. The restore page table is performed at the page fault handler. If xnack is on, we update GPU mappings after migration to avoid unnecessary GPUVM faults. Signed-off-by: Alex Sierra Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 6 ++- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 77 +++++++++++++++++++++++++------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 +- 3 files changed, 70 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 73c10dad0489..2a32c423a393 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -808,7 +808,11 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, prange, prange->start, prange->last); - op = SVM_OP_UPDATE_RANGE_NOTIFIER; + /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ + if (p->xnack_enabled && parent == prange) + op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP; + else + op = SVM_OP_UPDATE_RANGE_NOTIFIER; svm_range_add_list_work(&p->svms, parent, mm, op); schedule_deferred_list_work(&p->svms); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 6fcfb9fa1b37..6d3ed1957496 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -912,6 +912,13 @@ svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); } + /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ + if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) { + prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP; + pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n", + prange, prange->start, prange->last, + SVM_OP_ADD_RANGE_AND_MAP); + } return 0; } @@ -1452,25 +1459,52 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, unsigned long start, unsigned long last) { struct svm_range_list *svms = prange->svms; - int evicted_ranges; + struct kfd_process *p; int r = 0; - atomic_inc(&prange->invalid); - evicted_ranges = atomic_inc_return(&svms->evicted_ranges); - if (evicted_ranges != 1) - return r; + p = container_of(svms, struct kfd_process, svms); - pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n", - prange->svms, prange->start, prange->last); + pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n", + svms, prange->start, prange->last, start, last); - /* First eviction, stop the queues */ - r = kgd2kfd_quiesce_mm(mm); - if (r) - pr_debug("failed to quiesce KFD\n"); + if (!p->xnack_enabled) { + int evicted_ranges; - pr_debug("schedule to restore svm %p ranges\n", svms); - schedule_delayed_work(&svms->restore_work, - msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + atomic_inc(&prange->invalid); + evicted_ranges = atomic_inc_return(&svms->evicted_ranges); + if (evicted_ranges != 1) + return r; + + pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n", + prange->svms, prange->start, prange->last); + + /* First eviction, stop the queues */ + r = kgd2kfd_quiesce_mm(mm); + if (r) + pr_debug("failed to quiesce KFD\n"); + + pr_debug("schedule to restore svm %p ranges\n", svms); + schedule_delayed_work(&svms->restore_work, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + } else { + struct svm_range *pchild; + unsigned long s, l; + + pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", + prange->svms, start, last); + list_for_each_entry(pchild, &prange->child_list, child_list) { + mutex_lock_nested(&pchild->lock, 1); + s = max(start, pchild->start); + l = min(last, pchild->last); + if (l >= s) + svm_range_unmap_from_gpus(pchild, s, l); + mutex_unlock(&pchild->lock); + } + s = max(start, prange->start); + l = min(last, prange->last); + if (l >= s) + svm_range_unmap_from_gpus(prange, s, l); + } return r; } @@ -1673,12 +1707,25 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) svms, prange, prange->start, prange->last); svm_range_update_notifier_and_interval_tree(mm, prange); break; + case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP: + pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", + svms, prange, prange->start, prange->last); + svm_range_update_notifier_and_interval_tree(mm, prange); + /* TODO: implement deferred validation and mapping */ + break; case SVM_OP_ADD_RANGE: pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last); svm_range_add_to_svms(prange); svm_range_add_notifier_locked(mm, prange); break; + case SVM_OP_ADD_RANGE_AND_MAP: + pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, + prange, prange->start, prange->last); + svm_range_add_to_svms(prange); + svm_range_add_notifier_locked(mm, prange); + /* TODO: implement deferred validation and mapping */ + break; default: WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange, prange->work_item.op); @@ -2301,7 +2348,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, if (r) goto out_unlock_range; - if (migrated) { + if (migrated && !p->xnack_enabled) { pr_debug("restore_work will update mappings of GPUs\n"); mutex_unlock(&prange->migrate_mutex); continue; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 37cfa1689c4f..d88926d2855f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -44,7 +44,9 @@ enum svm_work_list_ops { SVM_OP_NULL, SVM_OP_UNMAP_RANGE, SVM_OP_UPDATE_RANGE_NOTIFIER, - SVM_OP_ADD_RANGE + SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP, + SVM_OP_ADD_RANGE, + SVM_OP_ADD_RANGE_AND_MAP }; struct svm_work_list_item { -- cgit From 9dd9cc2f7433cdf389049c91c87c09eaccece373 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Fri, 15 Jan 2021 17:03:18 -0600 Subject: drm/amdgpu: enable 48-bit IH timestamp counter By default this timestamp is 32 bit counter. It gets overflowed in around 10 minutes. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index ca8efa5c6978..2f17c8a57015 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -104,6 +104,7 @@ static int vega10_ih_toggle_ring_interrupts(struct amdgpu_device *adev, tmp = RREG32(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1); /* enable_intr field is only valid in ring0 */ if (ih == &adev->irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); -- cgit From 2383f56bbe4ae1460d11ae77b93c1730c4a20c26 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 23:02:21 -0500 Subject: drm/amdkfd: page table restore through svm API Page table restore implementation in SVM API. This is called from the fault handler at amdgpu_vm. To update page tables through the page fault retry IH. Signed-off-by: Alex Sierra Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 59 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 ++ 2 files changed, 61 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 6d3ed1957496..a992fa8bce0f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1997,6 +1997,65 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr, return NULL; } +int +svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, + uint64_t addr) +{ + int r = 0; + struct mm_struct *mm = NULL; + struct svm_range *prange; + struct svm_range_list *svms; + struct kfd_process *p; + + p = kfd_lookup_process_by_pasid(pasid); + if (!p) { + pr_debug("kfd process not founded pasid 0x%x\n", pasid); + return -ESRCH; + } + if (!p->xnack_enabled) { + pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); + return -EFAULT; + } + svms = &p->svms; + + pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); + + mm = get_task_mm(p->lead_thread); + if (!mm) { + pr_debug("svms 0x%p failed to get mm\n", svms); + r = -ESRCH; + goto out; + } + + mmap_read_lock(mm); + mutex_lock(&svms->lock); + prange = svm_range_from_addr(svms, addr, NULL); + + if (!prange) { + pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", + svms, addr); + r = -EFAULT; + goto out_unlock_svms; + } + + mutex_lock(&prange->migrate_mutex); + + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, false, false); + if (r) + pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpu\n", r, + svms, prange->start, prange->last); + + mutex_unlock(&prange->migrate_mutex); +out_unlock_svms: + mutex_unlock(&svms->lock); + mmap_read_unlock(mm); + mmput(mm); +out: + kfd_unref_process(p); + + return r; +} + void svm_range_list_fini(struct kfd_process *p) { struct svm_range *prange; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index d88926d2855f..9a078d354cba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -155,6 +155,8 @@ void svm_range_vram_node_free(struct svm_range *prange); int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, unsigned long addr, struct svm_range *parent, struct svm_range *prange); +int svm_range_restore_pages(struct amdgpu_device *adev, + unsigned int pasid, uint64_t addr); void svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, struct mm_struct *mm, enum svm_work_list_ops op); -- cgit From ea53af8a59c89b1bb6743d0956da53eee4cb4cd2 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Mon, 11 May 2020 22:01:24 -0500 Subject: drm/amdkfd: SVM API call to restore page tables Use SVM API to restore page tables when retry fault and compute context are enabled. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 2dd5b0e4baf9..c00094acfced 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -37,6 +37,7 @@ #include "amdgpu_gmc.h" #include "amdgpu_xgmi.h" #include "amdgpu_dma_buf.h" +#include "kfd_svm.h" /** * DOC: GPUVM @@ -3298,6 +3299,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, uint64_t addr) { + bool is_compute_context = false; struct amdgpu_bo *root; uint64_t value, flags; struct amdgpu_vm *vm; @@ -3305,15 +3307,25 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, spin_lock(&adev->vm_manager.pasid_lock); vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - if (vm) + if (vm) { root = amdgpu_bo_ref(vm->root.base.bo); - else + is_compute_context = vm->is_compute_context; + } else { root = NULL; + } spin_unlock(&adev->vm_manager.pasid_lock); if (!root) return false; + addr /= AMDGPU_GPU_PAGE_SIZE; + + if (is_compute_context && + !svm_range_restore_pages(adev, pasid, addr)) { + amdgpu_bo_unref(&root); + return true; + } + r = amdgpu_bo_reserve(root, true); if (r) goto error_unref; @@ -3327,18 +3339,16 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, if (!vm) goto error_unlock; - addr /= AMDGPU_GPU_PAGE_SIZE; flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | AMDGPU_PTE_SYSTEM; - if (vm->is_compute_context) { + if (is_compute_context) { /* Intentionally setting invalid PTE flag * combination to force a no-retry-fault */ flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | AMDGPU_PTE_TF; value = 0; - } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ value = adev->dummy_page_addr; -- cgit From eb2cec5537bbab46a319b4b0bcd71c320c382d2b Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 28 Jul 2020 13:35:32 -0500 Subject: drm/amdkfd: add svm_bo reference for eviction fence [why] As part of the SVM functionality, the eviction mechanism used for SVM_BOs is different. This mechanism uses one eviction fence per prange, instead of one fence per kfd_process. [how] A svm_bo reference to amdgpu_amdkfd_fence to allow differentiate between SVM_BO or regular BO evictions. This also include modifications to set the reference at the fence creation call. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 ++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b3e5ecec316c..313ee49b9f17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -75,6 +75,7 @@ struct amdgpu_amdkfd_fence { struct mm_struct *mm; spinlock_t lock; char timeline_name[TASK_COMM_LEN]; + struct svm_range_bo *svm_bo; }; struct amdgpu_kfd_dev { @@ -148,7 +149,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, int queue_bit); struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm); + struct mm_struct *mm, + struct svm_range_bo *svm_bo); #if IS_ENABLED(CONFIG_HSA_AMD) bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 5af464933976..53559643c712 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -60,7 +60,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0); */ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm) + struct mm_struct *mm, + struct svm_range_bo *svm_bo) { struct amdgpu_amdkfd_fence *fence; @@ -73,7 +74,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, fence->mm = mm; get_task_comm(fence->timeline_name, current); spin_lock_init(&fence->lock); - + fence->svm_bo = svm_bo; dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock, context, atomic_inc_return(&fence_seq)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d03088d1eb60..6c4df1f1796e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -972,7 +972,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, info->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), - current->mm); + current->mm, + NULL); if (!info->eviction_fence) { pr_err("Failed to create eviction fence\n"); ret = -ENOMEM; @@ -2162,7 +2163,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) */ new_fence = amdgpu_amdkfd_fence_create( process_info->eviction_fence->base.context, - process_info->eviction_fence->mm); + process_info->eviction_fence->mm, + NULL); if (!new_fence) { pr_err("Failed to create eviction fence\n"); ret = -ENOMEM; -- cgit From f04c79cfba7e01db060d17c8d46f23cf8e02845a Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 28 Jul 2020 13:38:29 -0500 Subject: drm/amdgpu: add param bit flag to create SVM BOs Add CREATE_SVM_BO define bit for SVM BOs. Another define flag was moved to concentrate these KFD type flags in one include file. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 ++----- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 ++++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6c4df1f1796e..dfa025d694f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -33,9 +33,6 @@ #include #include "amdgpu_xgmi.h" -/* BO flag to indicate a KFD userptr BO */ -#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) - /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate */ @@ -222,7 +219,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) u32 domain = bo->preferred_domains; bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); - if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { + if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) { domain = AMDGPU_GEM_DOMAIN_CPU; sg = false; } @@ -1241,7 +1238,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( bo->kfd_bo = *mem; (*mem)->bo = bo; if (user_addr) - bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; + bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO; (*mem)->va = va; (*mem)->domain = domain; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 2d1fefbe1e99..973c88bdf37b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -37,6 +37,10 @@ #define AMDGPU_BO_INVALID_OFFSET LONG_MAX #define AMDGPU_BO_MAX_PLACEMENTS 3 +/* BO flag to indicate a KFD userptr BO */ +#define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63) +#define AMDGPU_AMDKFD_CREATE_SVM_BO (1ULL << 62) + #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo) struct amdgpu_bo_param { -- cgit From b41896e3eeb5cfb9f906f5d72d9b9c735dc234d4 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 23:24:53 -0500 Subject: drm/amdkfd: add svm_bo eviction mechanism support svm_bo eviction mechanism is different from regular BOs. Every SVM_BO created contains one eviction fence and one worker item for eviction process. SVM_BOs can be attached to one or more pranges. For SVM_BO eviction mechanism, TTM will start to call enable_signal callback for every SVM_BO until VRAM space is available. Here, all the ttm_evict calls are synchronous, this guarantees that each eviction has completed and the fence has signaled before it returns. Signed-off-by: Alex Sierra Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 201 +++++++++++++++++++++++++++-------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 13 ++- 2 files changed, 168 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index a992fa8bce0f..c771532420dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -35,6 +35,7 @@ #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 +static void svm_range_evict_svm_bo_worker(struct work_struct *work); static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, @@ -320,7 +321,15 @@ static void svm_range_bo_release(struct kref *kref) spin_lock(&svm_bo->list_lock); } spin_unlock(&svm_bo->list_lock); - + if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) { + /* We're not in the eviction worker. + * Signal the fence and synchronize with any + * pending eviction work. + */ + dma_fence_signal(&svm_bo->eviction_fence->base); + cancel_work_sync(&svm_bo->eviction_work); + } + dma_fence_put(&svm_bo->eviction_fence->base); amdgpu_bo_unref(&svm_bo->bo); kfree(svm_bo); } @@ -333,6 +342,61 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo) kref_put(&svm_bo->kref, svm_range_bo_release); } +static bool svm_range_validate_svm_bo(struct svm_range *prange) +{ + mutex_lock(&prange->lock); + if (!prange->svm_bo) { + mutex_unlock(&prange->lock); + return false; + } + if (prange->ttm_res) { + /* We still have a reference, all is well */ + mutex_unlock(&prange->lock); + return true; + } + if (svm_bo_ref_unless_zero(prange->svm_bo)) { + if (READ_ONCE(prange->svm_bo->evicting)) { + struct dma_fence *f; + struct svm_range_bo *svm_bo; + /* The BO is getting evicted, + * we need to get a new one + */ + mutex_unlock(&prange->lock); + svm_bo = prange->svm_bo; + f = dma_fence_get(&svm_bo->eviction_fence->base); + svm_range_bo_unref(prange->svm_bo); + /* wait for the fence to avoid long spin-loop + * at list_empty_careful + */ + dma_fence_wait(f, false); + dma_fence_put(f); + } else { + /* The BO was still around and we got + * a new reference to it + */ + mutex_unlock(&prange->lock); + pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange->start, prange->last); + + prange->ttm_res = &prange->svm_bo->bo->tbo.mem; + return true; + } + + } else { + mutex_unlock(&prange->lock); + } + + /* We need a new svm_bo. Spin-loop to wait for concurrent + * svm_range_bo_release to finish removing this range from + * its range list. After this, it is safe to reuse the + * svm_bo pointer and svm_bo_list head. + */ + while (!list_empty_careful(&prange->svm_bo_list)) + ; + + return false; +} + static struct svm_range_bo *svm_range_bo_new(void) { struct svm_range_bo *svm_bo; @@ -352,72 +416,56 @@ int svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, bool clear) { - struct amdkfd_process_info *process_info; struct amdgpu_bo_param bp; struct svm_range_bo *svm_bo; struct amdgpu_bo_user *ubo; struct amdgpu_bo *bo; struct kfd_process *p; + struct mm_struct *mm; int r; - pr_debug("[0x%lx 0x%lx]\n", prange->start, prange->last); - mutex_lock(&prange->lock); - if (prange->svm_bo) { - if (prange->ttm_res) { - /* We still have a reference, all is well */ - mutex_unlock(&prange->lock); - return 0; - } - if (svm_bo_ref_unless_zero(prange->svm_bo)) { - /* The BO was still around and we got - * a new reference to it - */ - mutex_unlock(&prange->lock); - pr_debug("reuse old bo [0x%lx 0x%lx]\n", - prange->start, prange->last); - - prange->ttm_res = &prange->svm_bo->bo->tbo.mem; - return 0; - } - - mutex_unlock(&prange->lock); - - /* We need a new svm_bo. Spin-loop to wait for concurrent - * svm_range_bo_release to finish removing this range from - * its range list. After this, it is safe to reuse the - * svm_bo pointer and svm_bo_list head. - */ - while (!list_empty_careful(&prange->svm_bo_list)) - ; + p = container_of(prange->svms, struct kfd_process, svms); + pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms, + prange->start, prange->last); - } else { - mutex_unlock(&prange->lock); - } + if (svm_range_validate_svm_bo(prange)) + return 0; svm_bo = svm_range_bo_new(); if (!svm_bo) { pr_debug("failed to alloc svm bo\n"); return -ENOMEM; } - + mm = get_task_mm(p->lead_thread); + if (!mm) { + pr_debug("failed to get mm\n"); + kfree(svm_bo); + return -ESRCH; + } + svm_bo->svms = prange->svms; + svm_bo->eviction_fence = + amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), + mm, + svm_bo); + mmput(mm); + INIT_WORK(&svm_bo->eviction_work, svm_range_evict_svm_bo_worker); + svm_bo->evicting = 0; memset(&bp, 0, sizeof(bp)); bp.size = prange->npages * PAGE_SIZE; bp.byte_align = PAGE_SIZE; bp.domain = AMDGPU_GEM_DOMAIN_VRAM; bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0; + bp.flags |= AMDGPU_AMDKFD_CREATE_SVM_BO; bp.type = ttm_bo_type_device; bp.resv = NULL; r = amdgpu_bo_create_user(adev, &bp, &ubo); if (r) { pr_debug("failed %d to create bo\n", r); - kfree(svm_bo); - return r; + goto create_bo_failed; } bo = &ubo->bo; - - p = container_of(prange->svms, struct kfd_process, svms); r = amdgpu_bo_reserve(bo, true); if (r) { pr_debug("failed %d to reserve bo\n", r); @@ -430,8 +478,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, amdgpu_bo_unreserve(bo); goto reserve_bo_failed; } - process_info = p->kgd_process_info; - amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); + amdgpu_bo_fence(bo, &svm_bo->eviction_fence->base, true); amdgpu_bo_unreserve(bo); @@ -447,8 +494,10 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, return 0; reserve_bo_failed: - kfree(svm_bo); amdgpu_bo_unref(&bo); +create_bo_failed: + dma_fence_put(&svm_bo->eviction_fence->base); + kfree(svm_bo); prange->ttm_res = NULL; return r; @@ -2329,6 +2378,74 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, return r; } +int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) +{ + if (!fence) + return -EINVAL; + + if (dma_fence_is_signaled(&fence->base)) + return 0; + + if (fence->svm_bo) { + WRITE_ONCE(fence->svm_bo->evicting, 1); + schedule_work(&fence->svm_bo->eviction_work); + } + + return 0; +} + +static void svm_range_evict_svm_bo_worker(struct work_struct *work) +{ + struct svm_range_bo *svm_bo; + struct kfd_process *p; + struct mm_struct *mm; + + svm_bo = container_of(work, struct svm_range_bo, eviction_work); + if (!svm_bo_ref_unless_zero(svm_bo)) + return; /* svm_bo was freed while eviction was pending */ + + /* svm_range_bo_release destroys this worker thread. So during + * the lifetime of this thread, kfd_process and mm will be valid. + */ + p = container_of(svm_bo->svms, struct kfd_process, svms); + mm = p->mm; + if (!mm) + return; + + mmap_read_lock(mm); + spin_lock(&svm_bo->list_lock); + while (!list_empty(&svm_bo->range_list)) { + struct svm_range *prange = + list_first_entry(&svm_bo->range_list, + struct svm_range, svm_bo_list); + list_del_init(&prange->svm_bo_list); + spin_unlock(&svm_bo->list_lock); + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, + prange->start, prange->last); + + mutex_lock(&prange->migrate_mutex); + svm_migrate_vram_to_ram(prange, svm_bo->eviction_fence->mm); + + mutex_lock(&prange->lock); + prange->svm_bo = NULL; + mutex_unlock(&prange->lock); + + mutex_unlock(&prange->migrate_mutex); + + spin_lock(&svm_bo->list_lock); + } + spin_unlock(&svm_bo->list_lock); + mmap_read_unlock(mm); + + dma_fence_signal(&svm_bo->eviction_fence->base); + /* This is the last reference to svm_bo, after svm_range_vram_node_free + * has been called in svm_migrate_vram_to_ram + */ + WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n"); + svm_range_bo_unref(svm_bo); +} + static int svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 9a078d354cba..11b04a706ff9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -34,10 +34,14 @@ #include "kfd_priv.h" struct svm_range_bo { - struct amdgpu_bo *bo; - struct kref kref; - struct list_head range_list; /* all svm ranges shared this bo */ - spinlock_t list_lock; + struct amdgpu_bo *bo; + struct kref kref; + struct list_head range_list; /* all svm ranges shared this bo */ + spinlock_t list_lock; + struct amdgpu_amdkfd_fence *eviction_fence; + struct work_struct eviction_work; + struct svm_range_list *svms; + uint32_t evicting; }; enum svm_work_list_ops { @@ -157,6 +161,7 @@ int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, struct svm_range *prange); int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint64_t addr); +int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence); void svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, struct mm_struct *mm, enum svm_work_list_ops op); -- cgit From 5f319c5c21b5909abb43d8aadc92a8aa549ee443 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 28 Jul 2020 16:04:41 -0500 Subject: drm/amdgpu: svm bo enable_signal call condition [why] To support svm bo eviction mechanism. [how] If the BO crated has AMDGPU_AMDKFD_CREATE_SVM_BO flag set, enable_signal callback will be called inside amdgpu_evict_flags. This also causes gutting of the BO by removing all placements, so that TTM won't actually do an eviction. Instead it will discard the memory held by the BO. This is needed for HMM migration to user mode system memory pages. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b007e7d472b2..7a3489fa302e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -111,6 +111,20 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } abo = ttm_to_amdgpu_bo(bo); + if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) { + struct dma_fence *fence; + struct dma_resv *resv = &bo->base._resv; + + rcu_read_lock(); + fence = rcu_dereference(resv->fence_excl); + if (fence && !fence->ops->signaled) + dma_fence_enable_sw_signaling(fence); + + placement->num_placement = 0; + placement->num_busy_placement = 0; + rcu_read_unlock(); + return; + } switch (bo->mem.mem_type) { case AMDGPU_PL_GDS: case AMDGPU_PL_GWS: -- cgit From 485bea1f90b3347bb5c1f8adad533f14d8d8ff1c Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 28 Jul 2020 16:03:05 -0500 Subject: drm/amdgpu: add svm_bo eviction to enable_signal cb Add to amdgpu_amdkfd_fence.enable_signal callback, support for svm_bo fence eviction. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 53559643c712..1fe233cddb20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -28,6 +28,7 @@ #include #include #include "amdgpu_amdkfd.h" +#include "kfd_svm.h" static const struct dma_fence_ops amdkfd_fence_ops; static atomic_t fence_seq = ATOMIC_INIT(0); @@ -123,9 +124,13 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f) if (dma_fence_is_signaled(f)) return true; - if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f)) - return true; - + if (!fence->svm_bo) { + if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f)) + return true; + } else { + if (!svm_range_schedule_evict_svm_bo(fence)) + return true; + } return false; } -- cgit From cda0f85bfa5e5fddc51b94cfd6680c6697707a89 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 23:46:28 -0500 Subject: drm/amdkfd: refine migration policy with xnack on With xnack on, GPU vm fault handler decide the best restore location, then migrate range to the best restore location and update GPU mapping to recover the GPU vm fault. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 7 +- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 16 ++++ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 136 ++++++++++++++++++++++++++++--- 5 files changed, 150 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 2a32c423a393..3726a671d7d8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -480,18 +480,19 @@ out: * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @mm: the process mm structure * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * * Return: * 0 - OK, otherwise error code */ -int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) +int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) { unsigned long addr, start, end; struct vm_area_struct *vma; struct amdgpu_device *adev; - struct mm_struct *mm; int r = 0; if (prange->actual_loc == best_loc) { @@ -515,8 +516,6 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; - mm = current->mm; - for (addr = start; addr < end;) { unsigned long next; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 082b9bb22270..53c899b80b85 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -38,7 +38,8 @@ enum MIGRATION_COPY_DIR { FROM_VRAM_TO_RAM }; -int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc); +int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm); int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index a58bea31e23c..a1ddcf6446db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -864,6 +864,9 @@ struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); +int kfd_process_gpuid_from_kgd(struct kfd_process *p, + struct amdgpu_device *adev, uint32_t *gpuid, + uint32_t *gpuidx); static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, uint32_t gpuidx, uint32_t *gpuid) { return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index b8db509e2bbd..d7006ef2388f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1676,6 +1676,22 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id) return -EINVAL; } +int +kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev, + uint32_t *gpuid, uint32_t *gpuidx) +{ + struct kgd_dev *kgd = (struct kgd_dev *)adev; + int i; + + for (i = 0; i < p->n_pdds; i++) + if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) { + *gpuid = p->pdds[i]->dev->id; + *gpuidx = i; + return 0; + } + return -EINVAL; +} + static void evict_process_worker(struct work_struct *work) { int ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c771532420dc..7206e0b7b422 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1317,6 +1317,24 @@ static int svm_range_validate_and_map(struct mm_struct *mm, if (gpuidx < MAX_GPU_INSTANCE) { bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE); bitmap_set(ctx.bitmap, gpuidx, 1); + } else if (ctx.process->xnack_enabled) { + bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE); + + /* If prefetch range to GPU, or GPU retry fault migrate range to + * GPU, which has ACCESS attribute to the range, create mapping + * on that GPU. + */ + if (prange->actual_loc) { + gpuidx = kfd_process_gpuidx_from_gpuid(ctx.process, + prange->actual_loc); + if (gpuidx < 0) { + WARN_ONCE(1, "failed get device by id 0x%x\n", + prange->actual_loc); + return -EINVAL; + } + if (test_bit(gpuidx, prange->bitmap_access)) + bitmap_set(ctx.bitmap, gpuidx, 1); + } } else { bitmap_or(ctx.bitmap, prange->bitmap_access, prange->bitmap_aip, MAX_GPU_INSTANCE); @@ -2046,15 +2064,75 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr, return NULL; } +/* svm_range_best_restore_location - decide the best fault restore location + * @prange: svm range structure + * @adev: the GPU on which vm fault happened + * + * This is only called when xnack is on, to decide the best location to restore + * the range mapping after GPU vm fault. Caller uses the best location to do + * migration if actual loc is not best location, then update GPU page table + * mapping to the best location. + * + * If vm fault gpu is range preferred loc, the best_loc is preferred loc. + * If vm fault gpu idx is on range ACCESSIBLE bitmap, best_loc is vm fault gpu + * If vm fault gpu idx is on range ACCESSIBLE_IN_PLACE bitmap, then + * if range actual loc is cpu, best_loc is cpu + * if vm fault gpu is on xgmi same hive of range actual loc gpu, best_loc is + * range actual loc. + * Otherwise, GPU no access, best_loc is -1. + * + * Return: + * -1 means vm fault GPU no access + * 0 for CPU or GPU id + */ +static int32_t +svm_range_best_restore_location(struct svm_range *prange, + struct amdgpu_device *adev, + int32_t *gpuidx) +{ + struct amdgpu_device *bo_adev; + struct kfd_process *p; + uint32_t gpuid; + int r; + + p = container_of(prange->svms, struct kfd_process, svms); + + r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx); + if (r < 0) { + pr_debug("failed to get gpuid from kgd\n"); + return -1; + } + + if (prange->preferred_loc == gpuid) + return prange->preferred_loc; + + if (test_bit(*gpuidx, prange->bitmap_access)) + return gpuid; + + if (test_bit(*gpuidx, prange->bitmap_aip)) { + if (!prange->actual_loc) + return 0; + + bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc); + if (amdgpu_xgmi_same_hive(adev, bo_adev)) + return prange->actual_loc; + else + return 0; + } + + return -1; +} + int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint64_t addr) { - int r = 0; struct mm_struct *mm = NULL; - struct svm_range *prange; struct svm_range_list *svms; + struct svm_range *prange; struct kfd_process *p; + int32_t best_loc, gpuidx; + int r = 0; p = kfd_lookup_process_by_pasid(pasid); if (!p) { @@ -2089,11 +2167,48 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, mutex_lock(&prange->migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, false, false); - if (r) - pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpu\n", r, + best_loc = svm_range_best_restore_location(prange, adev, &gpuidx); + if (best_loc == -1) { + pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n", svms, prange->start, prange->last); + r = -EACCES; + goto out_unlock_range; + } + + pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n", + svms, prange->start, prange->last, best_loc, + prange->actual_loc); + if (prange->actual_loc != best_loc) { + if (best_loc) { + r = svm_migrate_ram_to_vram(prange, best_loc, mm); + if (r) { + pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", + r, addr); + /* Fallback to system memory if migration to + * VRAM failed + */ + if (prange->actual_loc) + r = svm_migrate_vram_to_ram(prange, mm); + else + r = 0; + } + } else { + r = svm_migrate_vram_to_ram(prange, mm); + } + if (r) { + pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", + r, svms, prange->start, prange->last); + goto out_unlock_range; + } + } + + r = svm_range_validate_and_map(mm, prange, gpuidx, false, false); + if (r) + pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", + r, svms, prange->start, prange->last); + +out_unlock_range: mutex_unlock(&prange->migrate_mutex); out_unlock_svms: mutex_unlock(&svms->lock); @@ -2230,7 +2345,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, return 0; } -/* svm_range_best_location - decide the best actual location +/* svm_range_best_prefetch_location - decide the best prefetch location * @prange: svm range structure * * For xnack off: @@ -2252,7 +2367,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, * Return: * 0 for CPU or GPU id */ -static uint32_t svm_range_best_location(struct svm_range *prange) +static uint32_t +svm_range_best_prefetch_location(struct svm_range *prange) { DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); uint32_t best_loc = prange->prefetch_loc; @@ -2354,7 +2470,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, int r = 0; *migrated = false; - best_loc = svm_range_best_location(prange); + best_loc = svm_range_best_prefetch_location(prange); if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || best_loc == prange->actual_loc) @@ -2366,10 +2482,10 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, if (best_loc) { pr_debug("migrate from ram to vram\n"); - r = svm_migrate_ram_to_vram(prange, best_loc); + r = svm_migrate_ram_to_vram(prange, best_loc, mm); } else { pr_debug("migrate from vram to ram\n"); - r = svm_migrate_vram_to_ram(prange, current->mm); + r = svm_migrate_vram_to_ram(prange, mm); } if (!r) -- cgit From 564d2b92c7d4569cdc76a08fd700de1309faa5e8 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 23:55:27 -0500 Subject: drm/amdkfd: add svm range validate timestamp With xnack on, add validate timestamp in order to handle GPU vm fault from multiple GPUs. If GPU retry fault need migrate the range to the best restore location, use range validate timestamp to record system timestamp after range is restored to update GPU page table. Because multiple pages of same range have multiple retry fault, define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING to the long time period that pending retry fault may still comes after page table update, to skip duplicate retry fault of same range. If difference between system timestamp and range last validate timestamp is bigger than AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING, that means the retry fault is from another GPU, then continue to handle retry fault recover. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 +++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 7206e0b7b422..0694211a118b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -35,6 +35,11 @@ #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 +/* Long enough to ensure no retry fault comes after svm range is restored and + * page table is updated. + */ +#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000 + static void svm_range_evict_svm_bo_worker(struct work_struct *work); static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, @@ -269,6 +274,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->deferred_list); INIT_LIST_HEAD(&prange->child_list); atomic_set(&prange->invalid, 0); + prange->validate_timestamp = ktime_to_us(ktime_get()); mutex_init(&prange->migrate_mutex); mutex_init(&prange->lock); svm_range_set_default_attributes(&prange->preferred_loc, @@ -1392,6 +1398,9 @@ unlock_out: unreserve_out: svm_range_unreserve_bos(&ctx); + if (!r) + prange->validate_timestamp = ktime_to_us(ktime_get()); + return r; } @@ -2131,6 +2140,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, struct svm_range_list *svms; struct svm_range *prange; struct kfd_process *p; + uint64_t timestamp; int32_t best_loc, gpuidx; int r = 0; @@ -2166,6 +2176,13 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, } mutex_lock(&prange->migrate_mutex); + timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp; + /* skip duplicate vm fault on different pages of same range */ + if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) { + pr_debug("svms 0x%p [0x%lx %lx] already restored\n", + svms, prange->start, prange->last); + goto out_unlock_range; + } best_loc = svm_range_best_restore_location(prange, adev, &gpuidx); if (best_loc == -1) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 11b04a706ff9..aa829b3c992d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -86,6 +86,7 @@ struct svm_work_list_item { * @actual_loc: the actual location, 0 for CPU, or GPU id * @granularity:migration granularity, log2 num pages * @invalid: not 0 means cpu page table is invalidated + * @validate_timestamp: system timestamp when range is validated * @notifier: register mmu interval notifier * @work_item: deferred work item information * @deferred_list: list header used to add range to deferred list @@ -122,6 +123,7 @@ struct svm_range { uint32_t actual_loc; uint8_t granularity; atomic_t invalid; + uint64_t validate_timestamp; struct mmu_interval_notifier notifier; struct svm_work_list_item work_item; struct list_head deferred_list; -- cgit From 1a3b2b5dca1924f2e7eae618ad79471c4a253236 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 23:57:33 -0500 Subject: drm/amdkfd: multiple gpu migrate vram to vram If prefetch range to gpu with acutal location is another gpu, or GPU retry fault restore pages to migrate the range with acutal location is gpu, then migrate from one gpu to another gpu. Use system memory as bridge because sdma engine may not able to access another gpu vram, use sdma of source gpu to migrate to system memory, then use sdma of destination gpu to migrate from system memory to gpu. Print out gpuid or gpuidx in debug messages. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 47 +++++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 4 +-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 51 +++++++++++++++++++++++++------- 3 files changed, 87 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 3726a671d7d8..d44a46eb00d6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -487,8 +487,9 @@ out: * Return: * 0 - OK, otherwise error code */ -int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm) +static int +svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) { unsigned long addr, start, end; struct vm_area_struct *vma; @@ -742,6 +743,48 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) return r; } +/** + * svm_migrate_vram_to_vram - migrate svm range from device to device + * @prange: range structure + * @best_loc: the device to migrate to + * @mm: process mm, use current->mm if NULL + * + * Context: Process context, caller hold mmap read lock, svms lock, prange lock + * + * Return: + * 0 - OK, otherwise error code + */ +static int +svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) +{ + int r; + + /* + * TODO: for both devices with PCIe large bar or on same xgmi hive, skip + * system memory as migration bridge + */ + + pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc); + + r = svm_migrate_vram_to_ram(prange, mm); + if (r) + return r; + + return svm_migrate_ram_to_vram(prange, best_loc, mm); +} + +int +svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) +{ + if (!prange->actual_loc) + return svm_migrate_ram_to_vram(prange, best_loc, mm); + else + return svm_migrate_vram_to_vram(prange, best_loc, mm); + +} + /** * svm_migrate_to_ram - CPU page fault handler * @vmf: CPU vm fault vma, address diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 53c899b80b85..37ad99cb073d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -38,8 +38,8 @@ enum MIGRATION_COPY_DIR { FROM_VRAM_TO_RAM }; -int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm); +int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm); int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 0694211a118b..0e0b4ffd20ab 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -348,8 +348,11 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo) kref_put(&svm_bo->kref, svm_range_bo_release); } -static bool svm_range_validate_svm_bo(struct svm_range *prange) +static bool +svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange) { + struct amdgpu_device *bo_adev; + mutex_lock(&prange->lock); if (!prange->svm_bo) { mutex_unlock(&prange->lock); @@ -361,6 +364,22 @@ static bool svm_range_validate_svm_bo(struct svm_range *prange) return true; } if (svm_bo_ref_unless_zero(prange->svm_bo)) { + /* + * Migrate from GPU to GPU, remove range from source bo_adev + * svm_bo range list, and return false to allocate svm_bo from + * destination adev. + */ + bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + if (bo_adev != adev) { + mutex_unlock(&prange->lock); + + spin_lock(&prange->svm_bo->list_lock); + list_del_init(&prange->svm_bo_list); + spin_unlock(&prange->svm_bo->list_lock); + + svm_range_bo_unref(prange->svm_bo); + return false; + } if (READ_ONCE(prange->svm_bo->evicting)) { struct dma_fence *f; struct svm_range_bo *svm_bo; @@ -434,7 +453,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms, prange->start, prange->last); - if (svm_range_validate_svm_bo(prange)) + if (svm_range_validate_svm_bo(adev, prange)) return 0; svm_bo = svm_range_bo_new(); @@ -1173,6 +1192,7 @@ static int svm_range_map_to_gpus(struct svm_range *prange, p = container_of(prange->svms, struct kfd_process, svms); for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + pr_debug("mapping to gpu idx 0x%x\n", gpuidx); pdd = kfd_process_device_from_gpuidx(p, gpuidx); if (!pdd) { pr_debug("failed to find device idx %d\n", gpuidx); @@ -2198,7 +2218,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, if (prange->actual_loc != best_loc) { if (best_loc) { - r = svm_migrate_ram_to_vram(prange, best_loc, mm); + r = svm_migrate_to_vram(prange, best_loc, mm); if (r) { pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", r, addr); @@ -2406,6 +2426,11 @@ svm_range_best_prefetch_location(struct svm_range *prange) goto out; bo_adev = svm_range_get_adev_by_id(prange, best_loc); + if (!bo_adev) { + WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc); + best_loc = 0; + goto out; + } bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, MAX_GPU_INSTANCE); @@ -2493,20 +2518,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, best_loc == prange->actual_loc) return 0; + /* + * Prefetch to GPU without host access flag, set actual_loc to gpu, then + * validate on gpu and map to gpus will be handled afterwards. + */ if (best_loc && !prange->actual_loc && - !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) + !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) { + prange->actual_loc = best_loc; return 0; + } - if (best_loc) { - pr_debug("migrate from ram to vram\n"); - r = svm_migrate_ram_to_vram(prange, best_loc, mm); - } else { - pr_debug("migrate from vram to ram\n"); + if (!best_loc) { r = svm_migrate_vram_to_ram(prange, mm); + *migrated = !r; + return r; } - if (!r) - *migrated = true; + r = svm_migrate_to_vram(prange, best_loc, mm); + *migrated = !r; return r; } -- cgit From 4c166eb95decf03cfee42d03b42145513a15554a Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 9 Dec 2020 21:51:50 -0500 Subject: drm/amdkfd: Add SVM API support capability bits SVMAPISupported property added to HSA_CAPABILITY, the value match HSA_CAPABILITY defined in Thunk spec: SVMAPISupported: it will not be supported on older kernels that don't have HMM or on systems with GFXv8 or older GPUs without support for 48-bit virtual addresses. CoherentHostAccess property added to HSA_MEMORYPROPERTY, the value match HSA_MEMORYPROPERTY defined in Thunk spec: CoherentHostAccess: whether or not device memory can be coherently accessed by the host CPU. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 6 ++++++ drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 10 ++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index cdef608db4f4..083ac9babfa8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1419,6 +1419,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.capability |= (adev->ras_features != 0) ? HSA_CAP_RASEVENTNOTIFY : 0; + /* SVM API and HMM page migration work together, device memory type + * is initialized to not 0 when page migration register device memory. + */ + if (adev->kfd.dev->pgmap.type != 0) + dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; + kfd_debug_print_topology(); if (!res) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index b8b68087bd7a..6bd6380b0ee0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -53,8 +53,9 @@ #define HSA_CAP_ASIC_REVISION_MASK 0x03c00000 #define HSA_CAP_ASIC_REVISION_SHIFT 22 #define HSA_CAP_SRAM_EDCSUPPORTED 0x04000000 +#define HSA_CAP_SVMAPI_SUPPORTED 0x08000000 -#define HSA_CAP_RESERVED 0xf80f8000 +#define HSA_CAP_RESERVED 0xf00f8000 struct kfd_node_properties { uint64_t hive_id; @@ -98,9 +99,10 @@ struct kfd_node_properties { #define HSA_MEM_HEAP_TYPE_GPU_LDS 4 #define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5 -#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001 -#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002 -#define HSA_MEM_FLAGS_RESERVED 0xfffffffc +#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001 +#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002 +#define HSA_MEM_FLAGS_COHERENTHOSTACCESS 0x00000004 +#define HSA_MEM_FLAGS_RESERVED 0xfffffff8 struct kfd_mem_properties { struct list_head list; -- cgit From 4ab159d2547c26b34a4ff4770598b72660da1461 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 29 Mar 2021 18:49:12 -0400 Subject: drm/amdkfd: Add CONFIG_HSA_AMD_SVM Control whether to build SVM support into amdgpu with a Kconfig option. This makes it easier to disable it in production kernels if this new feature causes problems in production environments. Use "depends on" instead of "select" for DEVICE_PRIVATE, as is recommended for visible options. Reviewed-by: Philip Yang Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/Kconfig | 15 +++++++++++++-- drivers/gpu/drm/amd/amdkfd/Makefile | 9 ++++++--- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +++++++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 17 +++++++++++------ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 30 ++++++++++++++++++++++++++++++ 5 files changed, 67 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index fb8d85716599..8cc0a76ddf9f 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -8,9 +8,20 @@ config HSA_AMD depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64) imply AMD_IOMMU_V2 if X86_64 select HMM_MIRROR - select ZONE_DEVICE - select DEVICE_PRIVATE select MMU_NOTIFIER select DRM_AMDGPU_USERPTR help Enable this if you want to use HSA features on AMD GPU devices. + +config HSA_AMD_SVM + bool "Enable HMM-based shared virtual memory manager" + depends on HSA_AMD && DEVICE_PRIVATE + default y + select HMM_MIRROR + select MMU_NOTIFIER + help + Enable this to use unified memory and managed memory in HIP. This + memory manager supports two modes of operation. One based on + preemptions and one based on page faults. To enable page fault + based memory management on most GFXv9 GPUs, set the module + parameter amdgpu.noretry=0. diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index a93301dbc464..c4f3aff11072 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -54,9 +54,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_dbgdev.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ $(AMDKFD_PATH)/kfd_smi_events.o \ - $(AMDKFD_PATH)/kfd_crat.o \ - $(AMDKFD_PATH)/kfd_svm.o \ - $(AMDKFD_PATH)/kfd_migrate.o + $(AMDKFD_PATH)/kfd_crat.o ifneq ($(CONFIG_AMD_IOMMU_V2),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o @@ -65,3 +63,8 @@ endif ifneq ($(CONFIG_DEBUG_FS),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o endif + +ifneq ($(CONFIG_HSA_AMD_SVM),) +AMDKFD_FILES += $(AMDKFD_PATH)/kfd_svm.o \ + $(AMDKFD_PATH)/kfd_migrate.o +endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 913cc29d8857..059c3f1ca27d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1770,6 +1770,7 @@ static int kfd_ioctl_set_xnack_mode(struct file *filep, return r; } +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_svm_args *args = data; @@ -1795,6 +1796,12 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) return r; } +#else +static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) +{ + return -EPERM; +} +#endif #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 37ad99cb073d..0de76b5d4973 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -25,6 +25,8 @@ #ifndef KFD_MIGRATE_H_ #define KFD_MIGRATE_H_ +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + #include #include #include @@ -44,17 +46,20 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); -#if defined(CONFIG_DEVICE_PRIVATE) int svm_migrate_init(struct amdgpu_device *adev); void svm_migrate_fini(struct amdgpu_device *adev); #else + static inline int svm_migrate_init(struct amdgpu_device *adev) { - DRM_WARN_ONCE("DEVICE_PRIVATE kernel config option is not enabled, " - "add CONFIG_DEVICE_PRIVATE=y in config file to fix\n"); - return -ENODEV; + return 0; +} +static inline void svm_migrate_fini(struct amdgpu_device *adev) +{ + /* empty */ } -static inline void svm_migrate_fini(struct amdgpu_device *adev) {} -#endif + +#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */ + #endif /* KFD_MIGRATE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index aa829b3c992d..573f984b81fe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -25,6 +25,8 @@ #ifndef KFD_SVM_H_ #define KFD_SVM_H_ +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + #include #include #include @@ -173,4 +175,32 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, void svm_range_free_dma_mappings(struct svm_range *prange); void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm); +#else + +struct kfd_process; + +static inline int svm_range_list_init(struct kfd_process *p) +{ + return 0; +} +static inline void svm_range_list_fini(struct kfd_process *p) +{ + /* empty */ +} + +static inline int svm_range_restore_pages(struct amdgpu_device *adev, + unsigned int pasid, uint64_t addr) +{ + return -EFAULT; +} + +static inline int svm_range_schedule_evict_svm_bo( + struct amdgpu_amdkfd_fence *fence) +{ + WARN_ONCE(1, "SVM eviction fence triggered, but SVM is disabled"); + return -EINVAL; +} + +#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */ + #endif /* KFD_SVM_H_ */ -- cgit From 2196927bcb4f4ed45bd1e6d44c1c8f805d984173 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 16 Apr 2021 15:37:11 +0100 Subject: drm/amd/amdgpu/amdgpu_device: Remove unused variable 'r' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/amd/amdgpu/amdgpu_device.c: In function ‘amdgpu_device_suspend’: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:3733:6: warning: variable ‘r’ set but not used [-Wunused-but-set-variable] Cc: Alex Deucher Cc: "Christian König" Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Signed-off-by: Lee Jones Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b4ad1c055c70..eef54b265ffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3730,7 +3730,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) { struct amdgpu_device *adev = drm_to_adev(dev); - int r; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -3745,7 +3744,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_ras_suspend(adev); - r = amdgpu_device_ip_suspend_phase1(adev); + amdgpu_device_ip_suspend_phase1(adev); if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); @@ -3755,7 +3754,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_fence_driver_suspend(adev); - r = amdgpu_device_ip_suspend_phase2(adev); + amdgpu_device_ip_suspend_phase2(adev); /* evict remaining vram memory * This second call to evict vram is to evict the gart page table * using the CPU. -- cgit From d3da76aa770bf2fe523f2d3365dfb4d0c11fdaf6 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 16 Apr 2021 15:37:10 +0100 Subject: drm/radeon/radeon_device: Provide function name in kernel-doc header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/radeon/radeon_device.c:1101: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Cc: Alex Deucher Cc: "Christian König" Cc: David Airlie Cc: Daniel Vetter Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Christian König Signed-off-by: Lee Jones Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index cc445c4cba2e..46eea01950cb 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1098,7 +1098,8 @@ static bool radeon_check_pot_argument(int arg) } /** - * Determine a sensible default GART size according to ASIC family. + * radeon_gart_size_auto - Determine a sensible default GART size + * according to ASIC family. * * @family: ASIC family name */ -- cgit From b16cc4bb1abbe3c725228f4020f9ebd8ea1d1188 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 16 Apr 2021 15:37:14 +0100 Subject: drm/amd/amdgpu/amdgpu_fence: Provide description for 'sched_score' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c:444: warning: Function parameter or member 'sched_score' not described in 'amdgpu_fence_driver_init_ring' Cc: Alex Deucher Cc: "Christian König" Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: Jerome Glisse Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Reviewed-by: Christian König Signed-off-by: Lee Jones Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 47ea46859618..30772608eac6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -434,6 +434,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, * * @ring: ring to init the fence driver on * @num_hw_submission: number of entries on the hardware queue + * @sched_score: optional score atomic shared with other schedulers * * Init the fence driver for the requested ring (all asics). * Helper function for amdgpu_fence_driver_init(). -- cgit From 777d9000d91b8ac92e55b6ab8496890fbcbbdf73 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 16 Apr 2021 15:37:16 +0100 Subject: drm/amd/amdgpu/amdgpu_gart: Correct a couple of function names in the docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c:73: warning: expecting prototype for amdgpu_dummy_page_init(). Prototype was for amdgpu_gart_dummy_page_init() instead drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c:96: warning: expecting prototype for amdgpu_dummy_page_fini(). Prototype was for amdgpu_gart_dummy_page_fini() instead Cc: Alex Deucher Cc: "Christian König" Cc: David Airlie Cc: Daniel Vetter Cc: Nirmoy Das Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Lee Jones Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index c5a9a4fb10d2..5562b5c90c03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -60,7 +60,7 @@ */ /** - * amdgpu_dummy_page_init - init dummy page used by the driver + * amdgpu_gart_dummy_page_init - init dummy page used by the driver * * @adev: amdgpu_device pointer * @@ -86,7 +86,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) } /** - * amdgpu_dummy_page_fini - free dummy page used by the driver + * amdgpu_gart_dummy_page_fini - free dummy page used by the driver * * @adev: amdgpu_device pointer * -- cgit From 27aa4a69b4ac61c2e1c8263ac2e4851db8d706e9 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 16 Apr 2021 15:37:17 +0100 Subject: drm/amd/amdgpu/amdgpu_ttm: Fix incorrectly documented function 'amdgpu_ttm_copy_mem_to_mem()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c:311: warning: expecting prototype for amdgpu_copy_ttm_mem_to_mem(). Prototype was for amdgpu_ttm_copy_mem_to_mem() instead Cc: Alex Deucher Cc: "Christian König" Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: Jerome Glisse Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Reviewed-by: Christian König Signed-off-by: Lee Jones Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 7a3489fa302e..3251f6b67e23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -294,7 +294,7 @@ error_free: } /** - * amdgpu_copy_ttm_mem_to_mem - Helper function for copy + * amdgpu_ttm_copy_mem_to_mem - Helper function for copy * @adev: amdgpu device * @src: buffer/address where to read from * @dst: buffer/address where to write to -- cgit From 03691f55028a93da0d979737950823e24e473961 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 16 Apr 2021 15:37:18 +0100 Subject: drm/amd/amdgpu/amdgpu_ring: Provide description for 'sched_score' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c:169: warning: Function parameter or member 'sched_score' not described in 'amdgpu_ring_init' Cc: Alex Deucher Cc: "Christian König" Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Reviewed-by: Christian König Signed-off-by: Lee Jones Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 688624ebe421..7b634a1517f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -158,6 +158,7 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) * @irq_src: interrupt source to use for this ring * @irq_type: interrupt type to use for this ring * @hw_prio: ring priority (NORMAL/HIGH) + * @sched_score: optional score atomic shared with other schedulers * * Initialize the driver information for the selected ring (all asics). * Returns 0 on success, error on failure. -- cgit From 3bffd71debc68feb571bf6e81be286675acbb972 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 16 Apr 2021 15:37:20 +0100 Subject: drm/amd/amdgpu/amdgpu_cs: Repair some function naming disparity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c:685: warning: expecting prototype for cs_parser_fini(). Prototype was for amdgpu_cs_parser_fini() instead drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c:1502: warning: expecting prototype for amdgpu_cs_wait_all_fence(). Prototype was for amdgpu_cs_wait_all_fences() instead drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c:1656: warning: expecting prototype for amdgpu_cs_find_bo_va(). Prototype was for amdgpu_cs_find_mapping() instead Cc: Alex Deucher Cc: "Christian König" Cc: David Airlie Cc: Daniel Vetter Cc: Sumit Semwal Cc: Jerome Glisse Cc: amd-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Reviewed-by: Christian König Signed-off-by: Lee Jones Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b5c766998045..90136f9dedd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -672,7 +672,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) } /** - * cs_parser_fini() - clean parser states + * amdgpu_cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. * @error: error number * @backoff: indicator to backoff the reservation @@ -1488,7 +1488,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, } /** - * amdgpu_cs_wait_all_fence - wait on all fences to signal + * amdgpu_cs_wait_all_fences - wait on all fences to signal * * @adev: amdgpu device * @filp: file private @@ -1639,7 +1639,7 @@ err_free_fences: } /** - * amdgpu_cs_find_bo_va - find bo_va for VM address + * amdgpu_cs_find_mapping - find bo_va for VM address * * @parser: command submission parser context * @addr: VM address -- cgit From ffe8768fb8f391cb478466778c55e2110525c15c Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 15 Apr 2021 16:45:25 +0800 Subject: drm/vc4: remove unused function Fix the following clang warning: drivers/gpu/drm/vc4/vc4_vec.c:201:1: warning: unused function 'to_vc4_vec_connector' [-Wunused-function]. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/1618476325-112629-1-git-send-email-jiapeng.chong@linux.alibaba.com --- drivers/gpu/drm/vc4/vc4_vec.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c index bd5b8eb58b18..090529d0d5dc 100644 --- a/drivers/gpu/drm/vc4/vc4_vec.c +++ b/drivers/gpu/drm/vc4/vc4_vec.c @@ -197,12 +197,6 @@ struct vc4_vec_connector { struct drm_encoder *encoder; }; -static inline struct vc4_vec_connector * -to_vc4_vec_connector(struct drm_connector *connector) -{ - return container_of(connector, struct vc4_vec_connector, base); -} - enum vc4_vec_tv_mode_id { VC4_VEC_TV_MODE_NTSC, VC4_VEC_TV_MODE_NTSC_J, -- cgit From acca7762eb71bc05a8f28d29320d193150051f79 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 21 Apr 2021 13:20:31 +0800 Subject: drm/i915/dp: Use slow and wide link training for everything MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Screen flickers on Innolux eDP 1.3 panel when clock rate 540000 is in use. According to the panel vendor, though clock rate 540000 is advertised, but the max clock rate it really supports is 270000. Ville Syrjälä mentioned that fast and narrow also breaks some eDP 1.4 panel, so use slow and wide training for all panels to resolve the issue. User also confirmed that the new strategy doesn't introduce any regression on XPS 9380. v2: - Use slow and wide for everything. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3384 References: https://gitlab.freedesktop.org/drm/intel/-/issues/272 Signed-off-by: Kai-Heng Feng Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210421052054.1434718-1-kai.heng.feng@canonical.com --- drivers/gpu/drm/i915/display/intel_dp.c | 59 +++------------------------------ 1 file changed, 5 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 52ea09fc5e70..4ad12dde5938 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1095,44 +1095,6 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, return -EINVAL; } -/* Optimize link config in order: max bpp, min lanes, min clock */ -static int -intel_dp_compute_link_config_fast(struct intel_dp *intel_dp, - struct intel_crtc_state *pipe_config, - const struct link_config_limits *limits) -{ - const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; - int bpp, clock, lane_count; - int mode_rate, link_clock, link_avail; - - for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) { - int output_bpp = intel_dp_output_bpp(pipe_config->output_format, bpp); - - mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, - output_bpp); - - for (lane_count = limits->min_lane_count; - lane_count <= limits->max_lane_count; - lane_count <<= 1) { - for (clock = limits->min_clock; clock <= limits->max_clock; clock++) { - link_clock = intel_dp->common_rates[clock]; - link_avail = intel_dp_max_data_rate(link_clock, - lane_count); - - if (mode_rate <= link_avail) { - pipe_config->lane_count = lane_count; - pipe_config->pipe_bpp = bpp; - pipe_config->port_clock = link_clock; - - return 0; - } - } - } - } - - return -EINVAL; -} - static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc) { int i, num_bpc; @@ -1382,22 +1344,11 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, intel_dp_can_bigjoiner(intel_dp)) pipe_config->bigjoiner = true; - if (intel_dp_is_edp(intel_dp)) - /* - * Optimize for fast and narrow. eDP 1.3 section 3.3 and eDP 1.4 - * section A.1: "It is recommended that the minimum number of - * lanes be used, using the minimum link rate allowed for that - * lane configuration." - * - * Note that we fall back to the max clock and lane count for eDP - * panels that fail with the fast optimal settings (see - * intel_dp->use_max_params), in which case the fast vs. wide - * choice doesn't matter. - */ - ret = intel_dp_compute_link_config_fast(intel_dp, pipe_config, &limits); - else - /* Optimize for slow and wide. */ - ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits); + /* + * Optimize for slow and wide for everything, because there are some + * eDP 1.3 and 1.4 panels don't work well with fast and narrow. + */ + ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits); /* enable compression if the mode doesn't fit available BW */ drm_dbg_kms(&i915->drm, "Force DSC en = %d\n", intel_dp->force_dsc_en); -- cgit From dc09b30969fd1cf4b9a9c8075d62efeb2303b9b7 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Apr 2021 05:44:06 -0400 Subject: drm/i915/dmc: Let's abstract the dmc path. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although this abstraction removes the convenience of grepping for the file name, it: - makes addition easier. - makes it easier to tweak global path when experiments are needed. - get in sync with guc/huc, without getting overly abstracted. - allows future junction with CSR_VERSION for simplicity. - Enforces dmc file will never change this standard. v2: define DMC_PATH inside .c (Lucas) Cc: Fei Yang Cc: Jani Nikula Cc: Lucas De Marchi Signed-off-by: Rodrigo Vivi Reviewed-by: José Roberto de Souza #v1 Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210421094406.2017733-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/display/intel_csr.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_csr.c b/drivers/gpu/drm/i915/display/intel_csr.c index 26a3c6787e9e..26a922d34263 100644 --- a/drivers/gpu/drm/i915/display/intel_csr.c +++ b/drivers/gpu/drm/i915/display/intel_csr.c @@ -38,50 +38,56 @@ * low-power state and comes back to normal. */ +#define DMC_PATH(platform, major, minor) \ + "i915/" \ + __stringify(platform) "_dmc_ver" \ + __stringify(major) "_" \ + __stringify(minor) ".bin" + #define GEN12_CSR_MAX_FW_SIZE ICL_CSR_MAX_FW_SIZE -#define ADLS_CSR_PATH "i915/adls_dmc_ver2_01.bin" +#define ADLS_CSR_PATH DMC_PATH(adls, 2, 01) #define ADLS_CSR_VERSION_REQUIRED CSR_VERSION(2, 1) MODULE_FIRMWARE(ADLS_CSR_PATH); -#define DG1_CSR_PATH "i915/dg1_dmc_ver2_02.bin" +#define DG1_CSR_PATH DMC_PATH(dg1, 2, 02) #define DG1_CSR_VERSION_REQUIRED CSR_VERSION(2, 2) MODULE_FIRMWARE(DG1_CSR_PATH); -#define RKL_CSR_PATH "i915/rkl_dmc_ver2_02.bin" +#define RKL_CSR_PATH DMC_PATH(rkl, 2, 02) #define RKL_CSR_VERSION_REQUIRED CSR_VERSION(2, 2) MODULE_FIRMWARE(RKL_CSR_PATH); -#define TGL_CSR_PATH "i915/tgl_dmc_ver2_08.bin" +#define TGL_CSR_PATH DMC_PATH(tgl, 2, 08) #define TGL_CSR_VERSION_REQUIRED CSR_VERSION(2, 8) MODULE_FIRMWARE(TGL_CSR_PATH); -#define ICL_CSR_PATH "i915/icl_dmc_ver1_09.bin" +#define ICL_CSR_PATH DMC_PATH(icl, 1, 09) #define ICL_CSR_VERSION_REQUIRED CSR_VERSION(1, 9) #define ICL_CSR_MAX_FW_SIZE 0x6000 MODULE_FIRMWARE(ICL_CSR_PATH); -#define CNL_CSR_PATH "i915/cnl_dmc_ver1_07.bin" +#define CNL_CSR_PATH DMC_PATH(cnl, 1, 07) #define CNL_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) #define CNL_CSR_MAX_FW_SIZE GLK_CSR_MAX_FW_SIZE MODULE_FIRMWARE(CNL_CSR_PATH); -#define GLK_CSR_PATH "i915/glk_dmc_ver1_04.bin" +#define GLK_CSR_PATH DMC_PATH(glk, 1, 04) #define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) #define GLK_CSR_MAX_FW_SIZE 0x4000 MODULE_FIRMWARE(GLK_CSR_PATH); -#define KBL_CSR_PATH "i915/kbl_dmc_ver1_04.bin" +#define KBL_CSR_PATH DMC_PATH(kbl, 1, 04) #define KBL_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) #define KBL_CSR_MAX_FW_SIZE BXT_CSR_MAX_FW_SIZE MODULE_FIRMWARE(KBL_CSR_PATH); -#define SKL_CSR_PATH "i915/skl_dmc_ver1_27.bin" +#define SKL_CSR_PATH DMC_PATH(skl, 1, 27) #define SKL_CSR_VERSION_REQUIRED CSR_VERSION(1, 27) #define SKL_CSR_MAX_FW_SIZE BXT_CSR_MAX_FW_SIZE MODULE_FIRMWARE(SKL_CSR_PATH); -#define BXT_CSR_PATH "i915/bxt_dmc_ver1_07.bin" +#define BXT_CSR_PATH DMC_PATH(bxt, 1, 07) #define BXT_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) #define BXT_CSR_MAX_FW_SIZE 0x3000 MODULE_FIRMWARE(BXT_CSR_PATH); -- cgit From 38f46186b11b58f4ef7e5ba768d246d433395410 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 21 Apr 2021 15:02:24 -0700 Subject: drm/i915/display/xelpd: Do not program EDP_Y_COORDINATE_ENABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EDP_Y_COORDINATE_ENABLE became a reserved register in display 13. EDP_Y_COORDINATE_VALID have the same fate as EDP_Y_COORDINATE_ENABLE but as we don't need it, removing the macro definition of it. BSpec: 50422 Cc: Gwan-gyeong Mun Cc: Anusha Srivatsa Signed-off-by: José Roberto de Souza Reviewed-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20210421220224.200729-2-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 4ad756e238c5..66335ec6b7d1 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -524,7 +524,7 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) val = psr_compute_idle_frames(intel_dp) << EDP_PSR2_IDLE_FRAME_SHIFT; val |= EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE; - if (DISPLAY_VER(dev_priv) >= 10) + if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) <= 12) val |= EDP_Y_COORDINATE_ENABLE; val |= EDP_PSR2_FRAME_BEFORE_SU(intel_dp->psr.sink_sync_latency + 1); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f4a779643f4d..a47399781773 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4563,8 +4563,7 @@ enum { #define EDP_SU_TRACK_ENABLE (1 << 30) #define TGL_EDP_PSR2_BLOCK_COUNT_NUM_2 (0 << 28) #define TGL_EDP_PSR2_BLOCK_COUNT_NUM_3 (1 << 28) -#define EDP_Y_COORDINATE_VALID (1 << 26) /* GLK and CNL+ */ -#define EDP_Y_COORDINATE_ENABLE (1 << 25) /* GLK and CNL+ */ +#define EDP_Y_COORDINATE_ENABLE REG_BIT(25) /* display 10, 11 and 12 */ #define EDP_MAX_SU_DISABLE_TIME(t) ((t) << 20) #define EDP_MAX_SU_DISABLE_TIME_MASK (0x1f << 20) #define EDP_PSR2_IO_BUFFER_WAKE_MAX_LINES 8 -- cgit From 6effe779726738ea06ba4454e127ab953ce3848c Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Wed, 21 Apr 2021 12:50:22 +0800 Subject: drm/amdgpu: refine gprs init shaders to check coverage Add codes to check whether all SIMDs are covered, make sure that all GPRs are initialized. Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 9 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 377 +++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h | 1 + 3 files changed, 382 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a078a38c2cee..16a3b279a9ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4559,8 +4559,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) if (!ring->sched.ready) return 0; - if (adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_ALDEBARAN) { + if (adev->asic_type == CHIP_ARCTURUS) { vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); vgpr_init_regs_ptr = vgpr_init_regs_arcturus; @@ -4745,7 +4744,11 @@ static int gfx_v9_0_ecc_late_init(void *handle) } /* requires IBs so do in late init after IB pool is initialized */ - r = gfx_v9_0_do_edc_gpr_workarounds(adev); + if (adev->asic_type == CHIP_ALDEBARAN) + r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); + else + r = gfx_v9_0_do_edc_gpr_workarounds(adev); + if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index a30c7c10cd9a..d17e57dea178 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -22,6 +22,7 @@ */ #include "amdgpu.h" #include "soc15.h" +#include "soc15d.h" #include "gc/gc_9_4_2_offset.h" #include "gc/gc_9_4_2_sh_mask.h" @@ -79,6 +80,377 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20), }; +static const u32 vgpr_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0x9207c006, 0x92088405, 0x81070807, + 0x81070407, 0x8e078207, 0xbe88008f, 0xc0410200, 0x00000007, 0xd3d94000, + 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003, + 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006, + 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009, + 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c, + 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f, + 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012, + 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015, + 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018, + 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b, + 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e, + 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021, + 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024, + 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027, + 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a, + 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d, + 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030, + 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033, + 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036, + 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039, + 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c, + 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f, + 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042, + 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045, + 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048, + 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b, + 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e, + 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051, + 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054, + 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057, + 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a, + 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d, + 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060, + 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063, + 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066, + 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069, + 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c, + 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f, + 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072, + 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075, + 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078, + 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b, + 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e, + 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081, + 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084, + 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087, + 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a, + 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d, + 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090, + 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093, + 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096, + 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099, + 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c, + 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f, + 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2, + 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5, + 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8, + 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab, + 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae, + 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1, + 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4, + 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7, + 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba, + 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd, + 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0, + 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3, + 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6, + 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9, + 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc, + 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf, + 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2, + 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5, + 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8, + 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db, + 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de, + 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1, + 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4, + 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7, + 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea, + 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed, + 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0, + 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3, + 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6, + 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9, + 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc, + 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff, + 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280, + 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280, + 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001, + 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 0xb78b4000, + 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201, + 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8, + 0xbf810000, +}; + +const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = { + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 4 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0xbf }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400004 }, /* 64KB LDS */ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +static const u32 sgpr_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0x9207c006, 0x92088405, 0x81070807, + 0x81070407, 0x8e078207, 0xbefc0006, 0xbf800000, 0xbf900001, 0xbe88008f, + 0xc0410200, 0x00000007, 0xb07c0000, 0xbe8000ff, 0x0000005f, 0xbee50080, + 0xbe812c65, 0xbe822c65, 0xbe832c65, 0xbe842c65, 0xbe852c65, 0xb77c0005, + 0x80808500, 0xbf84fff8, 0xbe800080, 0xbf810000, +}; + +static const struct soc15_reg_entry sgpr1_init_regs_aldebaran[] = { + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS): SGPRS[9:6] VGPRS[5:0] */ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x4 }, /* USER_SGPR[5:1]*/ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, +}; + +static const struct soc15_reg_entry sgpr2_init_regs_aldebaran[] = { + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x4 }, /* USER_SGPR[5:1]*/ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, +}; + +static int gfx_v9_4_2_check_gprs_init_coverage(struct amdgpu_device *adev, + uint32_t *wb) +{ + uint32_t se_id, cu_id, simd_id; + uint32_t simd_cnt = 0; + uint32_t se_offset, cu_offset, data; + + for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { + se_offset = se_id * 16 * 4; + for (cu_id = 0; cu_id < 16; cu_id++) { + cu_offset = cu_id * 4; + for (simd_id = 0; simd_id < 4; simd_id++) { + data = wb[se_offset + cu_offset + simd_id]; + if (data == 0xF) + simd_cnt++; + } + } + } + + if (adev->gfx.cu_info.number * 4 == simd_cnt) + return 0; + + dev_warn(adev->dev, "SIMD Count: %d, %d\n", + adev->gfx.cu_info.number * 4, simd_cnt); + + for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { + se_offset = se_id * 16 * 4; + for (cu_id = 0; cu_id < 16; cu_id++) { + cu_offset = cu_id * 4; + for (simd_id = 0; simd_id < 4; simd_id++) { + data = wb[se_offset + cu_offset + simd_id]; + if (data != 0xF) + dev_warn(adev->dev, "SE[%d]CU[%d]SIMD[%d]: isn't inited\n", + se_id, cu_id, simd_id); + } + } + } + + return -EFAULT; +} + +static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev, + const uint32_t *shader_ptr, uint32_t shader_size, + const struct soc15_reg_entry *init_regs, uint32_t regs_size, + uint32_t compute_dim_x, u64 wb_gpu_addr) +{ + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + int r, i; + uint32_t total_size, shader_offset; + u64 gpu_addr; + + total_size = (regs_size * 3 + 4 + 4 + 5 + 2) * 4; + total_size = ALIGN(total_size, 256); + shader_offset = total_size; + total_size += ALIGN(shader_size, 256); + + /* allocate an indirect buffer to put the commands in */ + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, total_size, + AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + return r; + } + + /* load the compute shaders */ + for (i = 0; i < shader_size/sizeof(u32); i++) + ib.ptr[i + (shader_offset / 4)] = shader_ptr[i]; + + /* init the ib length to 0 */ + ib.length_dw = 0; + + /* write the register state for the compute dispatch */ + for (i = 0; i < regs_size; i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = init_regs[i].reg_value; + } + + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)shader_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write the wb buffer address */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(wb_gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(wb_gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = compute_dim_x; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* shedule the ib on the ring */ + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) { + DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); + goto fail; + } + + /* wait for the GPU to finish processing the IB */ + r = dma_fence_wait(f, false); + if (r) { + DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); + goto fail; + } +fail: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); + + return r; +} + +int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; + int r; + int compute_dim_x = adev->gfx.config.max_shader_engines * + adev->gfx.config.max_cu_per_sh * + adev->gfx.config.max_sh_per_se; + int sgpr_work_group_size = 5; + /* CU_ID: 0~15, SIMD_ID: 0~3 */ + int wb_size = adev->gfx.config.max_shader_engines * 16 * 4; + struct amdgpu_ib ib; + + /* only support when RAS is enabled */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return 0; + + /* bail if the compute ring is not ready */ + if (!ring->sched.ready) + return 0; + + /* allocate an indirect buffer to put the commands in */ + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, wb_size * sizeof(uint32_t), + AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + return r; + } + + memset(ib.ptr, 0, wb_size * sizeof(uint32_t)); + r = gfx_v9_4_2_run_shader(adev, vgpr_init_compute_shader_aldebaran, + sizeof(vgpr_init_compute_shader_aldebaran), + vgpr_init_regs_aldebaran, + ARRAY_SIZE(vgpr_init_regs_aldebaran), + compute_dim_x * 2, ib.gpu_addr); + if (r) { + dev_err(adev->dev, "Init VGPRS: failed to run shader\n"); + goto failed; + } + + r = gfx_v9_4_2_check_gprs_init_coverage(adev, ib.ptr); + if (r) { + dev_err(adev->dev, "Init VGPRS: failed to cover all SIMDs\n"); + goto failed; + } else { + dev_info(adev->dev, "Init VGPRS Successfully\n"); + } + + memset(ib.ptr, 0, wb_size * sizeof(uint32_t)); + r = gfx_v9_4_2_run_shader(adev, sgpr_init_compute_shader_aldebaran, + sizeof(sgpr_init_compute_shader_aldebaran), + sgpr1_init_regs_aldebaran, + ARRAY_SIZE(sgpr1_init_regs_aldebaran), + compute_dim_x / 2 * sgpr_work_group_size, + ib.gpu_addr); + if (r) { + dev_err(adev->dev, "Init SGPRS Part1: failed to run shader\n"); + goto failed; + } + + r = gfx_v9_4_2_run_shader(adev, sgpr_init_compute_shader_aldebaran, + sizeof(sgpr_init_compute_shader_aldebaran), + sgpr2_init_regs_aldebaran, + ARRAY_SIZE(sgpr2_init_regs_aldebaran), + compute_dim_x / 2 * sgpr_work_group_size, + ib.gpu_addr); + if (r) { + dev_err(adev->dev, "Init SGPRS Part2: failed to run shader\n"); + goto failed; + } + + r = gfx_v9_4_2_check_gprs_init_coverage(adev, ib.ptr); + if (r) + dev_err(adev->dev, + "Init SGPRS: failed to cover all SIMDs\n"); + else + dev_info(adev->dev, "Init SGPRS Successfully\n"); + +failed: + amdgpu_ib_free(adev, &ib, NULL); + return r; +} + static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev); static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev); @@ -808,8 +1180,9 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = { REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) }, }; -static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = - { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 }; +static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = { + SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 +}; static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev, const struct soc15_reg_entry *reg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h index 81c5833b6b9f..6db1f88509af 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h @@ -29,6 +29,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev, uint32_t die_id); void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev); +int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev); extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs; -- cgit From 7e882aee845fa11da043a2e7f872551763b3a9dc Mon Sep 17 00:00:00 2001 From: John Clements Date: Wed, 21 Apr 2021 15:01:54 +0800 Subject: drm/amdgpu: add support for ras init flags conditionally configure ras for dgpu mode or poison propogation mode Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/ta_ras_if.h | 7 +++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a09483beb968..9311dcc94cb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1037,6 +1037,13 @@ static int psp_ras_load(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size); + ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + + if (psp->adev->gmc.xgmi.connected_to_cpu) + ras_cmd->ras_in_message.init_flags.poison_mode_en = 1; + else + ras_cmd->ras_in_message.init_flags.dgpu_mode = 1; + psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->ta_ras_ucode_size, @@ -1046,8 +1053,6 @@ static int psp_ras_load(struct psp_context *psp) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; - if (!ret) { psp->ras.session_id = cmd->resp.session_id; diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 745ed0fba1ed..0f214a398dd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -105,6 +105,12 @@ struct ta_ras_trigger_error_input { uint64_t value; // method if error injection. i.e persistent, coherent etc. }; +struct ta_ras_init_flags +{ + uint8_t poison_mode_en; + uint8_t dgpu_mode; +}; + struct ta_ras_output_flags { uint8_t ras_init_success_flag; @@ -115,6 +121,7 @@ struct ta_ras_output_flags /* Common input structure for RAS callbacks */ /**********************************************************/ union ta_ras_cmd_input { + struct ta_ras_init_flags init_flags; struct ta_ras_enable_features_input enable_features; struct ta_ras_disable_features_input disable_features; struct ta_ras_trigger_error_input trigger_error; -- cgit From ef9bcfde9e1f6f3ebefabf2c72d5fd04f9745d9d Mon Sep 17 00:00:00 2001 From: Jinzhou Su Date: Wed, 21 Apr 2021 10:59:13 +0800 Subject: drm/amdgpu: Enable SDMA MGCG for Vangogh Add flags AMD_CG_SUPPORT_SDMA_MGCG for Vangogh. Start to open sdma mgcg from firmware version 70. Signed-off-by: Jinzhou Su Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index d54af7f8801b..0142f6760ad2 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1118,6 +1118,7 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_GFX_FGCG | AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_SDMA_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_VCN | diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index b1ad9e52b234..4ba7fce4c0b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1556,6 +1556,10 @@ static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *ade int i; for (i = 0; i < adev->sdma.num_instances; i++) { + + if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH) + adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG; + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { /* Enable sdma clock gating */ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); -- cgit From 19d0dfda4c75a23012dd714e57b4f569df61089d Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Mon, 19 Apr 2021 11:33:10 +0800 Subject: drm/amdgpu: optimize gfx ras features flag clean Signed-off-by: Stanley.Yang Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index b0d2fc9454ca..bb0d02755f11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -658,11 +658,7 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev, con->features |= BIT(head->block); } else { if (obj && amdgpu_ras_is_feature_enabled(adev, head)) { - /* skip clean gfx ras context feature for VEGA20 Gaming. - * will clean later - */ - if (!(!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX))) - con->features &= ~BIT(head->block); + con->features &= ~BIT(head->block); put_obj(obj); } } @@ -770,6 +766,10 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, con->features |= BIT(head->block); ret = amdgpu_ras_feature_enable(adev, head, 0); + + /* clean gfx block ras features flag */ + if (adev->ras_features && head->block == AMDGPU_RAS_BLOCK__GFX) + con->features &= ~BIT(head->block); } } else ret = amdgpu_ras_feature_enable(adev, head, enable); -- cgit From 1d712be90a6d8af1d6d693cc936fc103376f7d36 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 22 Apr 2021 17:31:04 +0800 Subject: drm/amd/amdgpu: add cgls enable cgls to improve the runtime power efficiency. Signed-off-by: Kenneth Feng Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 0142f6760ad2..9c4f232e81c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1065,6 +1065,7 @@ static int nv_common_early_init(void *handle) case CHIP_SIENNA_CICHLID: adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_VCN_MGCG | @@ -1088,6 +1089,7 @@ static int nv_common_early_init(void *handle) case CHIP_NAVY_FLOUNDER: adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG | @@ -1130,6 +1132,7 @@ static int nv_common_early_init(void *handle) case CHIP_DIMGREY_CAVEFISH: adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG | -- cgit From fd6a440ebc83d3ca3e3699fad6b34c9c523c008b Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 2 Mar 2021 11:28:36 -0500 Subject: drm/amdkfd: add per-vmid-debug map_process_support In order to support multi-process debugging, HWS PM4 packet MAP_PROCESS requires an extension of 5 DWORDS to support targeting of per-vmid SPI debug control registers as well as watch points per process. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 9 ++- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | 58 +++++++++++++- .../gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h | 93 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 5 files changed, 161 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index b31bae91fbd0..348fd3e49017 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -26,6 +26,7 @@ #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" +#include "kfd_pm4_headers_aldebaran.h" #include "cwsr_trap_handler.h" #include "kfd_iommu.h" #include "amdgpu_amdkfd.h" @@ -714,7 +715,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) { - unsigned int size; + unsigned int size, map_process_packet_size; kfd->ddev = ddev; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, @@ -749,7 +750,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, * calculate max size of runlist packet. * There can be only 2 packets at once */ - size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) + + map_process_packet_size = + kfd->device_info->asic_family == CHIP_ALDEBARAN ? + sizeof(struct pm4_mes_map_process_aldebaran) : + sizeof(struct pm4_mes_map_process); + size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size + max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues) + sizeof(struct pm4_mes_runlist)) * 2; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index e840dd581719..0ce507d7208a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -242,7 +242,6 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_RAVEN: case CHIP_RENOIR: case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: case CHIP_NAVI10: case CHIP_NAVI12: case CHIP_NAVI14: @@ -252,6 +251,9 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_DIMGREY_CAVEFISH: pm->pmf = &kfd_v9_pm_funcs; break; + case CHIP_ALDEBARAN: + pm->pmf = &kfd_aldebaran_pm_funcs; + break; default: WARN(1, "Unexpected ASIC family %u", dqm->dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index e3ba0cd3b6fa..7ea3f671b325 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -24,6 +24,7 @@ #include "kfd_kernel_queue.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_ai.h" +#include "kfd_pm4_headers_aldebaran.h" #include "kfd_pm4_opcodes.h" #include "gc/gc_10_1_0_sh_mask.h" @@ -35,7 +36,6 @@ static int pm_map_process_v9(struct packet_manager *pm, packet = (struct pm4_mes_map_process *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process)); - packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process)); packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; @@ -73,6 +73,45 @@ static int pm_map_process_v9(struct packet_manager *pm, return 0; } +static int pm_map_process_aldebaran(struct packet_manager *pm, + uint32_t *buffer, struct qcm_process_device *qpd) +{ + struct pm4_mes_map_process_aldebaran *packet; + uint64_t vm_page_table_base_addr = qpd->page_table_base; + + packet = (struct pm4_mes_map_process_aldebaran *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); + packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_mes_map_process_aldebaran)); + packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; + packet->bitfields2.process_quantum = 10; + packet->bitfields2.pasid = qpd->pqm->process->pasid; + packet->bitfields14.gds_size = qpd->gds_size & 0x3F; + packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF; + packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0; + packet->bitfields14.num_oac = qpd->num_oac; + packet->bitfields14.sdma_enable = 1; + packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + + packet->sh_mem_config = qpd->sh_mem_config; + packet->sh_mem_bases = qpd->sh_mem_bases; + if (qpd->tba_addr) { + packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); + packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); + packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); + } + + packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); + packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); + + packet->vm_context_page_table_base_addr_lo32 = + lower_32_bits(vm_page_table_base_addr); + packet->vm_context_page_table_base_addr_hi32 = + upper_32_bits(vm_page_table_base_addr); + + return 0; +} + static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, uint64_t ib, size_t ib_size_in_dwords, bool chain) { @@ -324,3 +363,20 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = 0, }; + +const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { + .map_process = pm_map_process_aldebaran, + .runlist = pm_runlist_v9, + .set_resources = pm_set_resources_v9, + .map_queues = pm_map_queues_v9, + .unmap_queues = pm_unmap_queues_v9, + .query_status = pm_query_status_v9, + .release_mem = NULL, + .map_process_size = sizeof(struct pm4_mes_map_process_aldebaran), + .runlist_size = sizeof(struct pm4_mes_runlist), + .set_resources_size = sizeof(struct pm4_mes_set_resources), + .map_queues_size = sizeof(struct pm4_mes_map_queues), + .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .query_status_size = sizeof(struct pm4_mes_query_status), + .release_mem_size = 0, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h new file mode 100644 index 000000000000..f795ec815e2a --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h @@ -0,0 +1,93 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/*--------------------MES_MAP_PROCESS (PER DEBUG VMID)--------------------*/ + +#ifndef PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED +#define PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED + +struct pm4_mes_map_process_aldebaran { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t pasid:16; /* 0 - 15 */ + uint32_t single_memops:1; /* 16 */ + uint32_t reserved1:1; /* 17 */ + uint32_t debug_vmid:4; /* 18 - 21 */ + uint32_t new_debug:1; /* 22 */ + uint32_t tmz:1; /* 23 */ + uint32_t diq_enable:1; /* 24 */ + uint32_t process_quantum:7; /* 25 - 31 */ + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t vm_context_page_table_base_addr_lo32; + + uint32_t vm_context_page_table_base_addr_hi32; + + uint32_t sh_mem_bases; + + uint32_t sh_mem_config; + + uint32_t sq_shader_tba_lo; + + uint32_t sq_shader_tba_hi; + + uint32_t sq_shader_tma_lo; + + uint32_t sq_shader_tma_hi; + + uint32_t reserved6; + + uint32_t gds_addr_lo; + + uint32_t gds_addr_hi; + + union { + struct { + uint32_t num_gws:7; + uint32_t sdma_enable:1; + uint32_t num_oac:4; + uint32_t gds_size_hi:4; + uint32_t gds_size:6; + uint32_t num_queues:10; + } bitfields14; + uint32_t ordinal14; + }; + + uint32_t spi_gdbg_per_vmid_cntl; + + uint32_t tcp_watch_cntl[4]; + + uint32_t completion_signal_lo; + + uint32_t completion_signal_hi; + +}; + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index a1ddcf6446db..64552f6b8ba4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1088,6 +1088,7 @@ struct packet_manager_funcs { extern const struct packet_manager_funcs kfd_vi_pm_funcs; extern const struct packet_manager_funcs kfd_v9_pm_funcs; +extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs; int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); void pm_uninit(struct packet_manager *pm, bool hanging); -- cgit From 502f0e28042b4ef271c70f54dc2e4feb230fdb64 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 22 Apr 2021 21:58:08 +0800 Subject: drm/amdgpu: disable gfx ras by default in aldebaran aldebaran gfx ras is still under development Signed-off-by: Hawking Zhang Reviewed-by: Dennis Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index bb0d02755f11..f62873fbf249 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2066,8 +2066,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, } else { /* driver only manages a few IP blocks RAS feature * when GPU is connected cpu through XGMI */ - *hw_supported |= (1 << AMDGPU_RAS_BLOCK__GFX | - 1 << AMDGPU_RAS_BLOCK__SDMA | + *hw_supported |= (1 << AMDGPU_RAS_BLOCK__SDMA | 1 << AMDGPU_RAS_BLOCK__MMHUB); } -- cgit From 126bbd4ab524160e63725d04e838c0f18c917e11 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Mon, 12 Apr 2021 13:34:57 -0500 Subject: drm/amdgpu: extend xnack limit page fault timeout Extending this timeout will prevent IH from storm interrupts coming from SDMA while a page fault is active. Currently, on Aldebaran, handling that many interrupts can take a lot of CPU time (up to 4 seconds). This eventually causes timeouts in other tasks. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 5715be6770ec..823a367990bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1109,6 +1109,8 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) if (adev->asic_type == CHIP_ARCTURUS && adev->sdma.instance[i].fw_version >= 14) WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable); + /* Extend page fault timeout to avoid interrupt storm */ + WREG32_SDMA(i, mmSDMA0_UTCL1_TIMEOUT, 0x00800080); } } -- cgit From b19dbb7a90b42e0ba22878e647cfb62e2b09ed13 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Mon, 12 Apr 2021 13:35:18 -0500 Subject: drm/amdkfd: svm ranges creation for unregistered memory SVM ranges are created for unregistered memory, triggered by page faults. These ranges are migrated/mapped to GPU VRAM memory. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 107 ++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 0e0b4ffd20ab..46591ce71b1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -274,7 +274,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->deferred_list); INIT_LIST_HEAD(&prange->child_list); atomic_set(&prange->invalid, 0); - prange->validate_timestamp = ktime_to_us(ktime_get()); + prange->validate_timestamp = 0; mutex_init(&prange->migrate_mutex); mutex_init(&prange->lock); svm_range_set_default_attributes(&prange->preferred_loc, @@ -2151,6 +2151,86 @@ svm_range_best_restore_location(struct svm_range *prange, return -1; } +static int +svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, + unsigned long *start, unsigned long *last) +{ + struct vm_area_struct *vma; + struct interval_tree_node *node; + unsigned long start_limit, end_limit; + + vma = find_vma(p->mm, addr << PAGE_SHIFT); + if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { + pr_debug("VMA does not exist in address [0x%llx]\n", addr); + return -EFAULT; + } + start_limit = max(vma->vm_start >> PAGE_SHIFT, + (unsigned long)ALIGN_DOWN(addr, 2UL << 8)); + end_limit = min(vma->vm_end >> PAGE_SHIFT, + (unsigned long)ALIGN(addr + 1, 2UL << 8)); + /* First range that starts after the fault address */ + node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX); + if (node) { + end_limit = min(end_limit, node->start); + /* Last range that ends before the fault address */ + node = container_of(rb_prev(&node->rb), + struct interval_tree_node, rb); + } else { + /* Last range must end before addr because + * there was no range after addr + */ + node = container_of(rb_last(&p->svms.objects.rb_root), + struct interval_tree_node, rb); + } + if (node) { + if (node->last >= addr) { + WARN(1, "Overlap with prev node and page fault addr\n"); + return -EFAULT; + } + start_limit = max(start_limit, node->last + 1); + } + + *start = start_limit; + *last = end_limit - 1; + + pr_debug("vma start: 0x%lx start: 0x%lx vma end: 0x%lx last: 0x%lx\n", + vma->vm_start >> PAGE_SHIFT, *start, + vma->vm_end >> PAGE_SHIFT, *last); + + return 0; + +} +static struct +svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, + struct kfd_process *p, + struct mm_struct *mm, + int64_t addr) +{ + struct svm_range *prange = NULL; + unsigned long start, last; + uint32_t gpuid, gpuidx; + + if (svm_range_get_range_boundaries(p, addr, &start, &last)) + return NULL; + + prange = svm_range_new(&p->svms, start, last); + if (!prange) { + pr_debug("Failed to create prange in address [0x%llx]\\n", addr); + return NULL; + } + if (kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx)) { + pr_debug("failed to get gpuid from kgd\n"); + svm_range_free(prange); + return NULL; + } + prange->preferred_loc = gpuid; + prange->actual_loc = 0; + /* Gurantee prange is migrate it */ + svm_range_add_to_svms(prange); + svm_range_add_notifier_locked(mm, prange); + + return prange; +} int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, @@ -2162,6 +2242,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, struct kfd_process *p; uint64_t timestamp; int32_t best_loc, gpuidx; + bool write_locked = false; int r = 0; p = kfd_lookup_process_by_pasid(pasid); @@ -2185,15 +2266,35 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, } mmap_read_lock(mm); +retry_write_locked: mutex_lock(&svms->lock); prange = svm_range_from_addr(svms, addr, NULL); if (!prange) { pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", svms, addr); - r = -EFAULT; - goto out_unlock_svms; + if (!write_locked) { + /* Need the write lock to create new range with MMU notifier. + * Also flush pending deferred work to make sure the interval + * tree is up to date before we add a new range + */ + mutex_unlock(&svms->lock); + mmap_read_unlock(mm); + mmap_write_lock(mm); + write_locked = true; + goto retry_write_locked; + } + prange = svm_range_create_unregistered_range(adev, p, mm, addr); + if (!prange) { + pr_debug("failed to create unregisterd range svms 0x%p address [0x%llx]\n", + svms, addr); + mmap_write_downgrade(mm); + r = -EFAULT; + goto out_unlock_svms; + } } + if (write_locked) + mmap_write_downgrade(mm); mutex_lock(&prange->migrate_mutex); timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp; -- cgit From 63f1af83ae4614314834eab727ad94cd8d4278b0 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Wed, 21 Apr 2021 13:54:09 -0500 Subject: drm/amdkfd: set attribute access for default ranges Attribute access value for default ranges is set, based on process xnack on/off. XNACK ON has GPU access attribute for unregistered ranges through page fault. While XNACK OFF has no access attribute for unregistered ranges. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 46591ce71b1c..cfd8065fe0f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2887,12 +2887,11 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, pr_debug("range attrs not found return default values\n"); svm_range_set_default_attributes(&location, &prefetch_loc, &granularity, &flags); - /* TODO: Automatically create SVM ranges and map them on - * GPU page faults if (p->xnack_enabled) bitmap_fill(bitmap_access, MAX_GPU_INSTANCE); - */ - + else + bitmap_zero(bitmap_access, MAX_GPU_INSTANCE); + bitmap_zero(bitmap_aip, MAX_GPU_INSTANCE); goto fill_values; } bitmap_fill(bitmap_access, MAX_GPU_INSTANCE); -- cgit From 65f8db8150ee30623cd6b870d6f836c4339026a5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 Apr 2021 13:31:58 +0100 Subject: drm/amdkfd: fix uint32 variable compared to less than zero Currently the call to kfd_process_gpuidx_from_gpuid is returning an int value and this is being assigned to a uint32_t variable gpuidx and this is being checked for a negative error return which is always going to be false. Fix this by making gpuidx an int32_t. This makes gpuidx also type consistent with the use of gpuidx from the callers. Addresses-Coverity: ("Unsigned compared against 0") Fixes: cda0f85bfa5e ("drm/amdkfd: refine migration policy with xnack on") Signed-off-by: Colin Ian King Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index cfd8065fe0f6..4cc2539bed5b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1330,7 +1330,7 @@ static void svm_range_unreserve_bos(struct svm_validate_context *ctx) */ static int svm_range_validate_and_map(struct mm_struct *mm, struct svm_range *prange, - uint32_t gpuidx, bool intr, bool wait) + int32_t gpuidx, bool intr, bool wait) { struct svm_validate_context ctx; struct hmm_range *hmm_range; -- cgit From a40eb089b46e082239b3461eaf50f966e5f51e4c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 Apr 2021 13:44:52 +0100 Subject: drm/amdkfd: remove redundant initialization to variable r The variable r is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index d44a46eb00d6..a66b67083d83 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -303,7 +303,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, uint64_t vram_addr; uint64_t offset; uint64_t i, j; - int r = -ENOMEM; + int r; pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, prange->last); -- cgit From a614b336f1c16d2febcf5ab93d4f9f92f851f0fe Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 11 Feb 2021 16:32:04 +0100 Subject: drm/amdgpu: fix coding style and documentation in amdgpu_gtt_mgr.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid the forward define, fix coding style, add documentation. No functional change. Signed-off-by: Christian König Reviewed-by: Nirmoy Das Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 169 +++++++++++++++------------- 1 file changed, 90 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 540c01052b21..8860545344c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -24,7 +24,8 @@ #include "amdgpu.h" -static inline struct amdgpu_gtt_mgr *to_gtt_mgr(struct ttm_resource_manager *man) +static inline struct amdgpu_gtt_mgr * +to_gtt_mgr(struct ttm_resource_manager *man) { return container_of(man, struct amdgpu_gtt_mgr, manager); } @@ -43,12 +44,14 @@ struct amdgpu_gtt_node { * the GTT block, in bytes */ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + struct ttm_resource_manager *man; + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); return sysfs_emit(buf, "%llu\n", man->size * PAGE_SIZE); } @@ -61,12 +64,14 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev, * size of the GTT block, in bytes */ static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + struct ttm_resource_manager *man; + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(man)); } @@ -75,80 +80,6 @@ static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO, static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO, amdgpu_mem_info_gtt_used_show, NULL); -static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func; -/** - * amdgpu_gtt_mgr_init - init GTT manager and DRM MM - * - * @adev: amdgpu_device pointer - * @gtt_size: maximum size of GTT - * - * Allocate and initialize the GTT manager. - */ -int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) -{ - struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; - struct ttm_resource_manager *man = &mgr->manager; - uint64_t start, size; - int ret; - - man->use_tt = true; - man->func = &amdgpu_gtt_mgr_func; - - ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT); - - start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; - size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; - drm_mm_init(&mgr->mm, start, size); - spin_lock_init(&mgr->lock); - atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT); - - ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total); - if (ret) { - DRM_ERROR("Failed to create device file mem_info_gtt_total\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used); - if (ret) { - DRM_ERROR("Failed to create device file mem_info_gtt_used\n"); - return ret; - } - - ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager); - ttm_resource_manager_set_used(man, true); - return 0; -} - -/** - * amdgpu_gtt_mgr_fini - free and destroy GTT manager - * - * @adev: amdgpu_device pointer - * - * Destroy and free the GTT manager, returns -EBUSY if ranges are still - * allocated inside it. - */ -void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev) -{ - struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; - struct ttm_resource_manager *man = &mgr->manager; - int ret; - - ttm_resource_manager_set_used(man, false); - - ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); - if (ret) - return; - - spin_lock(&mgr->lock); - drm_mm_takedown(&mgr->mm); - spin_unlock(&mgr->lock); - - device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total); - device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used); - - ttm_resource_manager_cleanup(man); - ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL); -} - /** * amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space * @@ -265,6 +196,13 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man) return (result > 0 ? result : 0) * PAGE_SIZE; } +/** + * amdgpu_gtt_mgr_recover - re-init gart + * + * @man: TTM memory type manager + * + * Re-init the gart for each known BO in the GTT. + */ int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man) { struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); @@ -311,3 +249,76 @@ static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = { .free = amdgpu_gtt_mgr_del, .debug = amdgpu_gtt_mgr_debug }; + +/** + * amdgpu_gtt_mgr_init - init GTT manager and DRM MM + * + * @adev: amdgpu_device pointer + * @gtt_size: maximum size of GTT + * + * Allocate and initialize the GTT manager. + */ +int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) +{ + struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; + struct ttm_resource_manager *man = &mgr->manager; + uint64_t start, size; + int ret; + + man->use_tt = true; + man->func = &amdgpu_gtt_mgr_func; + + ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT); + + start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; + size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; + drm_mm_init(&mgr->mm, start, size); + spin_lock_init(&mgr->lock); + atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT); + + ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_gtt_total\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_gtt_used\n"); + return ret; + } + + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager); + ttm_resource_manager_set_used(man, true); + return 0; +} + +/** + * amdgpu_gtt_mgr_fini - free and destroy GTT manager + * + * @adev: amdgpu_device pointer + * + * Destroy and free the GTT manager, returns -EBUSY if ranges are still + * allocated inside it. + */ +void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev) +{ + struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; + struct ttm_resource_manager *man = &mgr->manager; + int ret; + + ttm_resource_manager_set_used(man, false); + + ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); + if (ret) + return; + + spin_lock(&mgr->lock); + drm_mm_takedown(&mgr->mm); + spin_unlock(&mgr->lock); + + device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total); + device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used); + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL); +} -- cgit From 589939d40116a4d023f667928d751e541719e8fc Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 12 Feb 2021 10:46:30 +0100 Subject: drm/amdgpu: fix coding style and documentation in amdgpu_vram_mgr.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional changes, just cleaning up some leftovers and improve documentation. Signed-off-by: Christian König Reviewed-by: Nirmoy Das Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 173 ++++++++++++++------------- 1 file changed, 93 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index bce105e2973e..529c5c32a205 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -29,12 +29,14 @@ #include "amdgpu_atomfirmware.h" #include "atom.h" -static inline struct amdgpu_vram_mgr *to_vram_mgr(struct ttm_resource_manager *man) +static inline struct amdgpu_vram_mgr * +to_vram_mgr(struct ttm_resource_manager *man) { return container_of(man, struct amdgpu_vram_mgr, manager); } -static inline struct amdgpu_device *to_amdgpu_device(struct amdgpu_vram_mgr *mgr) +static inline struct amdgpu_device * +to_amdgpu_device(struct amdgpu_vram_mgr *mgr) { return container_of(mgr, struct amdgpu_device, mman.vram_mgr); } @@ -82,12 +84,14 @@ static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev, * amount of currently used VRAM in bytes */ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); + struct ttm_resource_manager *man; + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_usage(man)); } @@ -100,18 +104,28 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev, * amount of currently used visible VRAM in bytes */ static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); + struct ttm_resource_manager *man; + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_vis_usage(man)); } +/** + * DOC: mem_info_vram_vendor + * + * The amdgpu driver provides a sysfs API for reporting the vendor of the + * installed VRAM + * The file mem_info_vram_vendor is used for this and returns the name of the + * vendor. + */ static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev, - struct device_attribute *attr, - char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); @@ -162,78 +176,6 @@ static const struct attribute *amdgpu_vram_mgr_attributes[] = { NULL }; -static const struct ttm_resource_manager_func amdgpu_vram_mgr_func; - -/** - * amdgpu_vram_mgr_init - init VRAM manager and DRM MM - * - * @adev: amdgpu_device pointer - * - * Allocate and initialize the VRAM manager. - */ -int amdgpu_vram_mgr_init(struct amdgpu_device *adev) -{ - struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; - struct ttm_resource_manager *man = &mgr->manager; - int ret; - - ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT); - - man->func = &amdgpu_vram_mgr_func; - - drm_mm_init(&mgr->mm, 0, man->size); - spin_lock_init(&mgr->lock); - INIT_LIST_HEAD(&mgr->reservations_pending); - INIT_LIST_HEAD(&mgr->reserved_pages); - - /* Add the two VRAM-related sysfs files */ - ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); - if (ret) - DRM_ERROR("Failed to register sysfs\n"); - - ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); - ttm_resource_manager_set_used(man, true); - return 0; -} - -/** - * amdgpu_vram_mgr_fini - free and destroy VRAM manager - * - * @adev: amdgpu_device pointer - * - * Destroy and free the VRAM manager, returns -EBUSY if ranges are still - * allocated inside it. - */ -void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) -{ - struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; - struct ttm_resource_manager *man = &mgr->manager; - int ret; - struct amdgpu_vram_reservation *rsv, *temp; - - ttm_resource_manager_set_used(man, false); - - ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); - if (ret) - return; - - spin_lock(&mgr->lock); - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) - kfree(rsv); - - list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { - drm_mm_remove_node(&rsv->mm_node); - kfree(rsv); - } - drm_mm_takedown(&mgr->mm); - spin_unlock(&mgr->lock); - - sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); - - ttm_resource_manager_cleanup(man); - ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); -} - /** * amdgpu_vram_mgr_vis_size - Calculate visible node size * @@ -283,6 +225,7 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) return usage; } +/* Commit the reservation of VRAM pages */ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); @@ -727,3 +670,73 @@ static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { .free = amdgpu_vram_mgr_del, .debug = amdgpu_vram_mgr_debug }; + +/** + * amdgpu_vram_mgr_init - init VRAM manager and DRM MM + * + * @adev: amdgpu_device pointer + * + * Allocate and initialize the VRAM manager. + */ +int amdgpu_vram_mgr_init(struct amdgpu_device *adev) +{ + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct ttm_resource_manager *man = &mgr->manager; + int ret; + + ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT); + + man->func = &amdgpu_vram_mgr_func; + + drm_mm_init(&mgr->mm, 0, man->size); + spin_lock_init(&mgr->lock); + INIT_LIST_HEAD(&mgr->reservations_pending); + INIT_LIST_HEAD(&mgr->reserved_pages); + + /* Add the two VRAM-related sysfs files */ + ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); + if (ret) + DRM_ERROR("Failed to register sysfs\n"); + + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); + ttm_resource_manager_set_used(man, true); + return 0; +} + +/** + * amdgpu_vram_mgr_fini - free and destroy VRAM manager + * + * @adev: amdgpu_device pointer + * + * Destroy and free the VRAM manager, returns -EBUSY if ranges are still + * allocated inside it. + */ +void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) +{ + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct ttm_resource_manager *man = &mgr->manager; + int ret; + struct amdgpu_vram_reservation *rsv, *temp; + + ttm_resource_manager_set_used(man, false); + + ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); + if (ret) + return; + + spin_lock(&mgr->lock); + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) + kfree(rsv); + + list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { + drm_mm_remove_node(&rsv->mm_node); + kfree(rsv); + } + drm_mm_takedown(&mgr->mm); + spin_unlock(&mgr->lock); + + sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); +} -- cgit From 25e9146ae69ea303234092242514f72b0ccdb40c Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 21 Apr 2021 17:27:46 +0200 Subject: drm/amdgpu: expose amdgpu_bo_create_shadow() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exposed amdgpu_bo_create_shadow() will be needed for amdgpu_vm handling. Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 1345f7eba011..9cdeb20fb6cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -625,9 +625,9 @@ fail_unreserve: return r; } -static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, - unsigned long size, - struct amdgpu_bo *bo) +int amdgpu_bo_create_shadow(struct amdgpu_device *adev, + unsigned long size, + struct amdgpu_bo *bo) { struct amdgpu_bo_param bp; int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 973c88bdf37b..e0ec48d6a3fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -271,6 +271,9 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev, struct amdgpu_bo_user **ubo_ptr); void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); +int amdgpu_bo_create_shadow(struct amdgpu_device *adev, + unsigned long size, + struct amdgpu_bo *bo); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); void *amdgpu_bo_kptr(struct amdgpu_bo *bo); void amdgpu_bo_kunmap(struct amdgpu_bo *bo); -- cgit From a35455d065c544669d7a80b9bc422ecb9208e282 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 22 Apr 2021 12:53:16 +0200 Subject: drm/amdgpu: cleanup amdgpu_vm_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently only way to create compute vm is through amdgpu_vm_make_compute(). So vm_context isn't required anymore for amdgpu_vm_init(). Signed-off-by: Nirmoy Das Reviewed-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 +++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +-- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 39ee88d29cca..07e8a7c28561 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1114,7 +1114,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) dev_warn(adev->dev, "No more PASIDs available!"); pasid = 0; } - r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid); + + r = amdgpu_vm_init(adev, &fpriv->vm, pasid); if (r) goto error_pasid; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c00094acfced..9736aca73733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2782,8 +2782,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int vm_context, u32 pasid) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid) { struct amdgpu_bo_param bp; struct amdgpu_bo *root; @@ -2817,16 +2816,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->pte_support_ats = false; vm->is_compute_context = false; - if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { - vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & - AMDGPU_VM_USE_CPU_FOR_COMPUTE); + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_GFX); - if (adev->asic_type == CHIP_RAVEN) - vm->pte_support_ats = true; - } else { - vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & - AMDGPU_VM_USE_CPU_FOR_GFX); - } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && @@ -2844,8 +2836,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->evicting = false; amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp); - if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) - bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW; r = amdgpu_bo_create(adev, &bp, &root); if (r) goto error_free_delayed; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 848e175e99ff..7f07acae447b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -379,8 +379,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int vm_context, u32 pasid); +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid); int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid); void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); -- cgit From 77df5c131d4d3413acf8da259e3cad22426ff90d Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 22 Apr 2021 16:38:58 +0200 Subject: drm/amdgpu: remove unused vm context flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused AMDGPU_VM_CONTEXT_GFX and AMDGPU_VM_CONTEXT_COMPUTE flags. Signed-off-by: Nirmoy Das Reviewed-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 7f07acae447b..6a9dcedfcf89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -121,9 +121,6 @@ struct amdgpu_bo_list_entry; /* max vmids dedicated for process */ #define AMDGPU_VM_MAX_RESERVED_VMID 1 -#define AMDGPU_VM_CONTEXT_GFX 0 -#define AMDGPU_VM_CONTEXT_COMPUTE 1 - /* See vm_update_mode */ #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) -- cgit From adf6f5c51ea14121e2302fb1b0ab3a90921ede59 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 22 Apr 2021 16:54:01 +0200 Subject: drm/amdgpu: create shadow bo using amdgpu_bo_create_shadow() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shadow BOs are only needed for vm code so call amdgpu_bo_create_shadow() directly instead of depending on amdgpu_bo_create(). Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 69 ++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9736aca73733..116b75413a35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -850,35 +850,60 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, } /** - * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation + * amdgpu_vm_pt_create - create bo for PD/PT * * @adev: amdgpu_device pointer * @vm: requesting vm * @level: the page table level * @immediate: use a immediate update - * @bp: resulting BO allocation parameters + * @bo: pointer to the buffer object pointer */ -static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, +static int amdgpu_vm_pt_create(struct amdgpu_device *adev, + struct amdgpu_vm *vm, int level, bool immediate, - struct amdgpu_bo_param *bp) + struct amdgpu_bo **bo) { - memset(bp, 0, sizeof(*bp)); + struct amdgpu_bo_param bp; + int r; - bp->size = amdgpu_vm_bo_size(adev, level); - bp->byte_align = AMDGPU_GPU_PAGE_SIZE; - bp->domain = AMDGPU_GEM_DOMAIN_VRAM; - bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain); - bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + memset(&bp, 0, sizeof(bp)); + + bp.size = amdgpu_vm_bo_size(adev, level); + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain); + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_CPU_GTT_USWC; - bp->bo_ptr_size = sizeof(struct amdgpu_bo); + bp.bo_ptr_size = sizeof(struct amdgpu_bo); if (vm->use_cpu_for_update) - bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - else if (!vm->root.base.bo || vm->root.base.bo->shadow) - bp->flags |= AMDGPU_GEM_CREATE_SHADOW; - bp->type = ttm_bo_type_kernel; - bp->no_wait_gpu = immediate; + bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + + bp.type = ttm_bo_type_kernel; + bp.no_wait_gpu = immediate; if (vm->root.base.bo) - bp->resv = vm->root.base.bo->tbo.base.resv; + bp.resv = vm->root.base.bo->tbo.base.resv; + + r = amdgpu_bo_create(adev, &bp, bo); + if (r) + return r; + + if (vm->is_compute_context && (adev->flags & AMD_IS_APU)) + return 0; + + if (!bp.resv) + WARN_ON(dma_resv_lock((*bo)->tbo.base.resv, + NULL)); + r = amdgpu_bo_create_shadow(adev, bp.size, *bo); + + if (!bp.resv) + dma_resv_unlock((*bo)->tbo.base.resv); + + if (r) { + amdgpu_bo_unref(bo); + return r; + } + + return 0; } /** @@ -901,7 +926,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, bool immediate) { struct amdgpu_vm_pt *entry = cursor->entry; - struct amdgpu_bo_param bp; struct amdgpu_bo *pt; int r; @@ -919,9 +943,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, if (entry->base.bo) return 0; - amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp); - - r = amdgpu_bo_create(adev, &bp, &pt); + r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); if (r) return r; @@ -2784,7 +2806,6 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) */ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid) { - struct amdgpu_bo_param bp; struct amdgpu_bo *root; int r, i; @@ -2835,8 +2856,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid) mutex_init(&vm->eviction_lock); vm->evicting = false; - amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp); - r = amdgpu_bo_create(adev, &bp, &root); + r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, + false, &root); if (r) goto error_free_delayed; -- cgit From cd2454d6cd970d4c8cbf6dfed61ff1f36b65e561 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 21 Apr 2021 18:09:46 +0200 Subject: drm/amdgpu: cleanup amdgpu_bo_create() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove shadow bo related code as vm code is creating shadow bo using proper API. Without shadow bo code, amdgpu_bo_create() is basically a wrapper around amdgpu_bo_do_create(). So rename amdgpu_bo_do_create() to amdgpu_bo_create(). Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 61 +++++++----------------------- 1 file changed, 14 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 9cdeb20fb6cd..39f88e4a8eb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -515,7 +515,18 @@ bool amdgpu_bo_support_uswc(u64 bo_flags) #endif } -static int amdgpu_bo_do_create(struct amdgpu_device *adev, +/** + * amdgpu_bo_create - create an &amdgpu_bo buffer object + * @adev: amdgpu device object + * @bp: parameters to be used for the buffer object + * @bo_ptr: pointer to the buffer object pointer + * + * Creates an &amdgpu_bo buffer object. + * + * Returns: + * 0 for success or a negative error code on failure. + */ +int amdgpu_bo_create(struct amdgpu_device *adev, struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) { @@ -644,7 +655,7 @@ int amdgpu_bo_create_shadow(struct amdgpu_device *adev, bp.resv = bo->tbo.base.resv; bp.bo_ptr_size = sizeof(struct amdgpu_bo); - r = amdgpu_bo_do_create(adev, &bp, &bo->shadow); + r = amdgpu_bo_create(adev, &bp, &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); mutex_lock(&adev->shadow_list_lock); @@ -655,50 +666,6 @@ int amdgpu_bo_create_shadow(struct amdgpu_device *adev, return r; } -/** - * amdgpu_bo_create - create an &amdgpu_bo buffer object - * @adev: amdgpu device object - * @bp: parameters to be used for the buffer object - * @bo_ptr: pointer to the buffer object pointer - * - * Creates an &amdgpu_bo buffer object; and if requested, also creates a - * shadow object. - * Shadow object is used to backup the original buffer object, and is always - * in GTT. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_create(struct amdgpu_device *adev, - struct amdgpu_bo_param *bp, - struct amdgpu_bo **bo_ptr) -{ - u64 flags = bp->flags; - int r; - - bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW; - - r = amdgpu_bo_do_create(adev, bp, bo_ptr); - if (r) - return r; - - if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) { - if (!bp->resv) - WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv, - NULL)); - - r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr); - - if (!bp->resv) - dma_resv_unlock((*bo_ptr)->tbo.base.resv); - - if (r) - amdgpu_bo_unref(bo_ptr); - } - - return r; -} - /** * amdgpu_bo_create_user - create an &amdgpu_bo_user buffer object * @adev: amdgpu device object @@ -720,7 +687,7 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev, bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW; bp->bo_ptr_size = sizeof(struct amdgpu_bo_user); - r = amdgpu_bo_do_create(adev, bp, &bo_ptr); + r = amdgpu_bo_create(adev, bp, &bo_ptr); if (r) return r; -- cgit From 42daecfc2069da98e084e2e2457be2a3d50b0a30 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 22 Apr 2021 17:20:20 +0200 Subject: drm/amdgpu: remove AMDGPU_GEM_CREATE_SHADOW flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused AMDGPU_GEM_CREATE_SHADOW flag. Userspace is never allowed to use this. Signed-off-by: Nirmoy Das Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 +---- include/uapi/drm/amdgpu_drm.h | 2 -- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 39f88e4a8eb5..da6d4ee0a132 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -649,8 +649,7 @@ int amdgpu_bo_create_shadow(struct amdgpu_device *adev, memset(&bp, 0, sizeof(bp)); bp.size = size; bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_SHADOW; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; bp.type = ttm_bo_type_kernel; bp.resv = bo->tbo.base.resv; bp.bo_ptr_size = sizeof(struct amdgpu_bo); @@ -685,7 +684,6 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev, struct amdgpu_bo *bo_ptr; int r; - bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW; bp->bo_ptr_size = sizeof(struct amdgpu_bo_user); r = amdgpu_bo_create(adev, bp, &bo_ptr); if (r) @@ -1559,7 +1557,6 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS); amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC); amdgpu_bo_print_flag(m, bo, VRAM_CLEARED); - amdgpu_bo_print_flag(m, bo, SHADOW); amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS); amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID); amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 728566542f8a..2063a1c10f79 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -116,8 +116,6 @@ extern "C" { #define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) /* Flag that the memory should be in VRAM and cleared */ #define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) -/* Flag that create shadow bo(GTT) while allocating vram bo */ -#define AMDGPU_GEM_CREATE_SHADOW (1 << 4) /* Flag that allocating the BO should use linear VRAM */ #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) /* Flag that BO is always valid in this VM */ -- cgit From 3dc7216c1d5058f2f809c26cbc44f4dfa440b07e Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 22 Apr 2021 13:11:39 +0200 Subject: drm/amdgpu: fix concurrent VM flushes on Vega/Navi v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with Vega the hardware supports concurrent flushes of VMID which can be used to implement per process VMID allocation. But concurrent flushes are mutual exclusive with back to back VMID allocations, fix this to avoid a VMID used in two ways at the same time. v2: don't set ring to NULL Signed-off-by: Christian König Reviewed-by: James Zhu Tested-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 19 +++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 94b069630db3..b4971e90b98c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -215,7 +215,11 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, /* Check if we have an idle VMID */ i = 0; list_for_each_entry((*idle), &id_mgr->ids_lru, list) { - fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, ring); + /* Don't use per engine and per process VMID at the same time */ + struct amdgpu_ring *r = adev->vm_manager.concurrent_flush ? + NULL : ring; + + fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, r); if (!fences[i]) break; ++i; @@ -281,7 +285,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, if (updates && (*id)->flushed_updates && updates->context == (*id)->flushed_updates->context && !dma_fence_is_later(updates, (*id)->flushed_updates)) - updates = NULL; + updates = NULL; if ((*id)->owner != vm->immediate.fence_context || job->vm_pd_addr != (*id)->pd_gpu_addr || @@ -290,6 +294,10 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, !dma_fence_is_signaled((*id)->last_flush))) { struct dma_fence *tmp; + /* Don't use per engine and per process VMID at the same time */ + if (adev->vm_manager.concurrent_flush) + ring = NULL; + /* to prevent one context starved by another context */ (*id)->pd_gpu_addr = 0; tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); @@ -365,12 +373,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, if (updates && (!flushed || dma_fence_is_later(updates, flushed))) needs_flush = true; - /* Concurrent flushes are only possible starting with Vega10 and - * are broken on Navi10 and Navi14. - */ - if (needs_flush && (adev->asic_type < CHIP_VEGA10 || - adev->asic_type == CHIP_NAVI10 || - adev->asic_type == CHIP_NAVI14)) + if (needs_flush && !adev->vm_manager.concurrent_flush) continue; /* Good, we can use this VMID. Remember this submission as diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 116b75413a35..16252d48e5a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3160,6 +3160,12 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) { unsigned i; + /* Concurrent flushes are only possible starting with Vega10 and + * are broken on Navi10 and Navi14. + */ + adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 || + adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14); amdgpu_vmid_mgr_init(adev); adev->vm_manager.fence_context = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6a9dcedfcf89..e5a3f18be2b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -328,6 +328,7 @@ struct amdgpu_vm_manager { /* Handling of VMIDs */ struct amdgpu_vmid_mgr id_mgr[AMDGPU_MAX_VMHUBS]; unsigned int first_kfd_vmid; + bool concurrent_flush; /* Handling of VM fences */ u64 fence_context; -- cgit From c6c6a712199ab355ce333fa5764a59506bb107c1 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Thu, 22 Apr 2021 19:10:52 -0400 Subject: drm/amd/display: Reject non-zero src_y and src_x for video planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] This hasn't been well tested and leads to complete system hangs on DCN1 based systems, possibly others. The system hang can be reproduced by gesturing the video on the YouTube Android app on ChromeOS into full screen. [How] Reject atomic commits with non-zero drm_plane_state.src_x or src_y values. v2: - Add code comment describing the reason we're rejecting non-zero src_x and src_y - Drop gerrit Change-Id - Add stable CC - Based on amd-staging-drm-next v3: removed trailing whitespace Signed-off-by: Harry Wentland Cc: stable@vger.kernel.org Cc: nicholas.kazlauskas@amd.com Cc: amd-gfx@lists.freedesktop.org Cc: alexander.deucher@amd.com Cc: Roman.Li@amd.com Cc: hersenxs.wu@amd.com Cc: danny.wang@amd.com Reviewed-by: Nicholas Kazlauskas Acked-by: Christian König Reviewed-by: Hersen Wu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b34ab76c5f4c..78f762320532 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4015,6 +4015,23 @@ static int fill_dc_scaling_info(const struct drm_plane_state *state, scaling_info->src_rect.x = state->src_x >> 16; scaling_info->src_rect.y = state->src_y >> 16; + /* + * For reasons we don't (yet) fully understand a non-zero + * src_y coordinate into an NV12 buffer can cause a + * system hang. To avoid hangs (and maybe be overly cautious) + * let's reject both non-zero src_x and src_y. + * + * We currently know of only one use-case to reproduce a + * scenario with non-zero src_x and src_y for NV12, which + * is to gesture the YouTube Android app into full screen + * on ChromeOS. + */ + if (state->fb && + state->fb->format->format == DRM_FORMAT_NV12 && + (scaling_info->src_rect.x != 0 || + scaling_info->src_rect.y != 0)) + return -EINVAL; + scaling_info->src_rect.width = state->src_w >> 16; if (scaling_info->src_rect.width == 0) return -EINVAL; -- cgit From 655c0ed19772d92c9665ed08bdc5202acc096dda Mon Sep 17 00:00:00 2001 From: Yingjie Wang Date: Thu, 8 Apr 2021 17:57:20 -0700 Subject: drm/amd/dc: Fix a missing check bug in dm_dp_mst_detect() In dm_dp_mst_detect(), We should check whether or not @connector has been unregistered from userspace. If the connector is unregistered, we should return disconnected status. Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)") Signed-off-by: Yingjie Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 9b221db526dc..d62460b69d95 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -278,6 +278,9 @@ dm_dp_mst_detect(struct drm_connector *connector, struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); struct amdgpu_dm_connector *master = aconnector->mst_port; + if (drm_connector_is_unregistered(connector)) + return connector_status_disconnected; + return drm_dp_mst_detect_port(connector, ctx, &master->mst_mgr, aconnector->port); } -- cgit From b83478b698141cc2d1fef78b167d1c2348a9125c Mon Sep 17 00:00:00 2001 From: zuoqilin Date: Wed, 17 Mar 2021 15:42:28 +0800 Subject: drm/i915/dsi: Fix comment typo Change 'befor' to 'before'. Signed-off-by: zuoqilin [Jani: Fix comment marker placement while applying.] Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210317074228.1147-1-zuoqilin1@163.com --- drivers/gpu/drm/i915/display/vlv_dsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index cf51860a1429..0d52da613101 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -861,8 +861,10 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state, /* Send initialization commands in LP mode */ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); - /* Enable port in pre-enable phase itself because as per hw team - * recommendation, port should be enabled befor plane & pipe */ + /* + * Enable port in pre-enable phase itself because as per hw team + * recommendation, port should be enabled before plane & pipe + */ if (is_cmd_mode(intel_dsi)) { for_each_dsi_port(port, intel_dsi->ports) intel_de_write(dev_priv, -- cgit From 7a6e95f375bb17d917a8213f0cfe9b55b882f342 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 21 Apr 2021 20:32:20 +0300 Subject: drm/i915: Simplify CCS and UV plane alignment handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can handle the surface alignment of CCS and UV color planes for all modifiers at one place, so do this. An AUX color plane can be a CCS or a UV plane, use only the more specific query functions and remove is_aux_plane() becoming redundant. While at it add a TODO for linear UV color plane alignments. The spec requires this to be stride-in-bytes * 64 on all platforms, whereas the driver uses an alignment of 4k for gen<12 and 256k for gen>=12 for linear UV planes. v2: - Restore previous alignment for linear UV surfaces. Signed-off-by: Imre Deak Reviewed-by: Juha-Pekka Heikkila Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210421173220.3587009-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 27 ++++++++++++++++++--------- drivers/gpu/drm/i915/display/intel_fb.c | 8 -------- drivers/gpu/drm/i915/display/intel_fb.h | 1 - 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a10e26380ef3..e246e5cf7586 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -973,10 +973,26 @@ unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, struct drm_i915_private *dev_priv = to_i915(fb->dev); /* AUX_DIST needs only 4K alignment */ - if ((DISPLAY_VER(dev_priv) < 12 && is_aux_plane(fb, color_plane)) || - is_ccs_plane(fb, color_plane)) + if (is_ccs_plane(fb, color_plane)) return 4096; + if (is_semiplanar_uv_plane(fb, color_plane)) { + /* + * TODO: cross-check wrt. the bspec stride in bytes * 64 bytes + * alignment for linear UV planes on all platforms. + */ + if (DISPLAY_VER(dev_priv) >= 12) { + if (fb->modifier == DRM_FORMAT_MOD_LINEAR) + return intel_linear_alignment(dev_priv); + + return intel_tile_row_size(fb, color_plane); + } + + return 4096; + } + + drm_WARN_ON(&dev_priv->drm, color_plane != 0); + switch (fb->modifier) { case DRM_FORMAT_MOD_LINEAR: return intel_linear_alignment(dev_priv); @@ -985,19 +1001,12 @@ unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, return 256 * 1024; return 0; case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: - if (is_semiplanar_uv_plane(fb, color_plane)) - return intel_tile_row_size(fb, color_plane); - fallthrough; case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC: return 16 * 1024; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS: case I915_FORMAT_MOD_Y_TILED: - if (DISPLAY_VER(dev_priv) >= 12 && - is_semiplanar_uv_plane(fb, color_plane)) - return intel_tile_row_size(fb, color_plane); - fallthrough; case I915_FORMAT_MOD_Yf_TILED: return 1 * 1024 * 1024; default: diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 0ec9ad7220a1..c8aaca3e79e9 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -30,14 +30,6 @@ bool is_gen12_ccs_cc_plane(const struct drm_framebuffer *fb, int plane) plane == 2; } -bool is_aux_plane(const struct drm_framebuffer *fb, int plane) -{ - if (is_ccs_modifier(fb->modifier)) - return is_ccs_plane(fb, plane); - - return plane == 1; -} - bool is_semiplanar_uv_plane(const struct drm_framebuffer *fb, int color_plane) { return intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) && diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index 6acf792a8c44..13244ec1ad21 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -19,7 +19,6 @@ struct intel_plane_state; bool is_ccs_plane(const struct drm_framebuffer *fb, int plane); bool is_gen12_ccs_plane(const struct drm_framebuffer *fb, int plane); bool is_gen12_ccs_cc_plane(const struct drm_framebuffer *fb, int plane); -bool is_aux_plane(const struct drm_framebuffer *fb, int plane); bool is_semiplanar_uv_plane(const struct drm_framebuffer *fb, int color_plane); bool is_surface_linear(const struct drm_framebuffer *fb, int color_plane); -- cgit From d5b5f63cc5b0e7cf9b16b694215bc43fb6b71441 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 21 Apr 2021 15:02:23 -0700 Subject: drm: Rename DP_PSR_SELECTIVE_UPDATE to better mach eDP spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DP_PSR_EN_CFG bit 5 aka "Selective Update Region Scan Line Capture Indication" in eDP spec has a ambiguous name, so renaming to better match specification. While at it, replacing bit shit by BIT() macro and adding the version some registers were added to eDP specification. Cc: Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Gwan-gyeong Mun Signed-off-by: José Roberto de Souza Acked-by: Daniel Vetter Reviewed-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20210421220224.200729-1-jose.souza@intel.com --- include/drm/drm_dp_helper.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 85d728f4aad0..e3c4130dab07 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -687,14 +687,14 @@ struct drm_device; #define DP_DSC_ENABLE 0x160 /* DP 1.4 */ # define DP_DECOMPRESSION_EN (1 << 0) -#define DP_PSR_EN_CFG 0x170 /* XXX 1.2? */ -# define DP_PSR_ENABLE (1 << 0) -# define DP_PSR_MAIN_LINK_ACTIVE (1 << 1) -# define DP_PSR_CRC_VERIFICATION (1 << 2) -# define DP_PSR_FRAME_CAPTURE (1 << 3) -# define DP_PSR_SELECTIVE_UPDATE (1 << 4) -# define DP_PSR_IRQ_HPD_WITH_CRC_ERRORS (1 << 5) -# define DP_PSR_ENABLE_PSR2 (1 << 6) /* eDP 1.4a */ +#define DP_PSR_EN_CFG 0x170 /* XXX 1.2? */ +# define DP_PSR_ENABLE BIT(0) +# define DP_PSR_MAIN_LINK_ACTIVE BIT(1) +# define DP_PSR_CRC_VERIFICATION BIT(2) +# define DP_PSR_FRAME_CAPTURE BIT(3) +# define DP_PSR_SU_REGION_SCANLINE_CAPTURE BIT(4) /* eDP 1.4a */ +# define DP_PSR_IRQ_HPD_WITH_CRC_ERRORS BIT(5) /* eDP 1.4a */ +# define DP_PSR_ENABLE_PSR2 BIT(6) /* eDP 1.4a */ #define DP_ADAPTER_CTRL 0x1a0 # define DP_ADAPTER_CTRL_FORCE_LOAD_SENSE (1 << 0) -- cgit From 8804325861be242a420d68c0175127a5947ac35a Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 22 Apr 2021 19:05:44 +0300 Subject: drm/i915/display: Disable PSR2 if TGL Display stepping is B1 from A0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TGL PSR2 hardware tracking shows momentary flicker and screen shift if TGL Display stepping is B1 from A0. It has been fixed from TGL Display stepping C0. HSDES: 18015970021 HSDES: 2209313811 BSpec: 55378 v2: Add checking of PSR2 manual tracking (Jose) Cc: José Roberto de Souza Signed-off-by: Gwan-gyeong Mun Reviewed-by: José Roberto de Souza Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210422160544.2427123-1-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 66335ec6b7d1..c61e1d774667 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -817,6 +817,13 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, } } + /* Wa_2209313811 */ + if (!crtc_state->enable_psr2_sel_fetch && + IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B1)) { + drm_dbg_kms(&dev_priv->drm, "PSR2 HW tracking is not supported this Display stepping\n"); + return false; + } + if (!crtc_state->enable_psr2_sel_fetch && (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v)) { drm_dbg_kms(&dev_priv->drm, -- cgit From 10f76165d30bf568214e75767f2d8d8682cd4040 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 26 Apr 2021 16:53:25 -0700 Subject: drm/msm: Do not unpin/evict exported dma-buf's Our initial logic for excluding dma-bufs was not quite right. In particular we want msm_gem_get/put_pages() path used for exported dma-bufs to increment/decrement the pin-count. Also, in case the importer is vmap'ing the dma-buf, we need to be sure to update the object's status, because it is now no longer potentially evictable. Fixes: 63f17ef83428 drm/msm: Support evicting GEM objects to swap Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20210426235326.1230125-1-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 16 +++++++++++++++- drivers/gpu/drm/msm/msm_gem.h | 4 ++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index b199942266a2..56df86e5f740 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -190,13 +190,25 @@ struct page **msm_gem_get_pages(struct drm_gem_object *obj) } p = get_pages(obj); + + if (!IS_ERR(p)) { + msm_obj->pin_count++; + update_inactive(msm_obj); + } + msm_gem_unlock(obj); return p; } void msm_gem_put_pages(struct drm_gem_object *obj) { - /* when we start tracking the pin count, then do something here */ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + msm_gem_lock(obj); + msm_obj->pin_count--; + GEM_WARN_ON(msm_obj->pin_count < 0); + update_inactive(msm_obj); + msm_gem_unlock(obj); } int msm_gem_mmap_obj(struct drm_gem_object *obj, @@ -646,6 +658,8 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv) ret = -ENOMEM; goto fail; } + + update_inactive(msm_obj); } return msm_obj->vaddr; diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index a6480d2c81b2..03e2cc2a2ce1 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -221,7 +221,7 @@ static inline bool is_active(struct msm_gem_object *msm_obj) /* imported/exported objects are not purgeable: */ static inline bool is_unpurgeable(struct msm_gem_object *msm_obj) { - return msm_obj->base.dma_buf && msm_obj->base.import_attach; + return msm_obj->base.import_attach || msm_obj->pin_count; } static inline bool is_purgeable(struct msm_gem_object *msm_obj) @@ -271,7 +271,7 @@ static inline void mark_unpurgeable(struct msm_gem_object *msm_obj) static inline bool is_unevictable(struct msm_gem_object *msm_obj) { - return is_unpurgeable(msm_obj) || msm_obj->pin_count || msm_obj->vaddr; + return is_unpurgeable(msm_obj) || msm_obj->vaddr; } static inline void mark_evictable(struct msm_gem_object *msm_obj) -- cgit From 4b95d371fb001185af84d177e69a23d55bd0167a Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Fri, 23 Apr 2021 21:49:26 -0400 Subject: drm/msm: fix LLC not being enabled for mmu500 targets mmu500 targets don't have a "cx_mem" region, set llc_mmio to NULL in that case to avoid the IS_ERR() condition in a6xx_llc_activate(). Fixes: 3d247123b5a1 ("drm/msm/a6xx: Add support for using system cache on MMU500 based targets") Signed-off-by: Jonathan Marek Link: https://lore.kernel.org/r/20210424014927.1661-1-jonathan@marek.ca Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index d553f62f4eeb..b4d8e1b01ee4 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1153,10 +1153,6 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, { struct device_node *phandle; - a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx"); - if (IS_ERR(a6xx_gpu->llc_mmio)) - return; - /* * There is a different programming path for targets with an mmu500 * attached, so detect if that is the case @@ -1166,6 +1162,11 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, of_device_is_compatible(phandle, "arm,mmu-500")); of_node_put(phandle); + if (a6xx_gpu->have_mmu500) + a6xx_gpu->llc_mmio = NULL; + else + a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx"); + a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); -- cgit From 08811c057b3e22f7a3df3955c138a59f3b651df0 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 10 Apr 2021 04:19:01 +0300 Subject: drm/msm/dsi: dsi_phy_28nm_8960: fix uninitialized variable access The parent_name initialization was lost in refactoring, restore it now. Fixes: 5d13459650b3 ("drm/msm/dsi: push provided clocks handling into a generic code") Reported-by: kernel test robot Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Link: https://lore.kernel.org/r/20210410011901.1735866-1-dmitry.baryshkov@linaro.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c index 582b1428f971..86e40a0d41a3 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c @@ -405,6 +405,10 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov if (!vco_name) return -ENOMEM; + parent_name = devm_kzalloc(dev, 32, GFP_KERNEL); + if (!parent_name) + return -ENOMEM; + clk_name = devm_kzalloc(dev, 32, GFP_KERNEL); if (!clk_name) return -ENOMEM; -- cgit From 094c7f39ba4b5ae7e4c448527834428b79e3baf9 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 12 Apr 2021 03:01:58 +0300 Subject: drm/msm/dsi: fix msm_dsi_phy_get_clk_provider return code msm_dsi_phy_get_clk_provider() always returns two provided clocks, so return 0 instead of returning incorrect -EINVAL error code. Fixes: 5d13459650b3 ("drm/msm/dsi: push provided clocks handling into a generic code") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Tested-by: Jonathan Marek Link: https://lore.kernel.org/r/20210412000158.2049066-1-dmitry.baryshkov@linaro.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index f0a2ddf96a4b..ff7f2ec42030 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -843,7 +843,7 @@ int msm_dsi_phy_get_clk_provider(struct msm_dsi_phy *phy, if (pixel_clk_provider) *pixel_clk_provider = phy->provided_clocks->hws[DSI_PIXEL_PLL_CLK]->clk; - return -EINVAL; + return 0; } void msm_dsi_phy_pll_save_state(struct msm_dsi_phy *phy) -- cgit From b23109c5b5368ab7512edd78c6fd23ad35a6886b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 27 Apr 2021 14:45:20 +0300 Subject: drm/i915/hdcp: add intel_dp_hdcp.h and rename init accordingly Add separate intel_dp_hdcp.h to go with intel_dp_hdcp.c, and rename the init function intel_dp_hdcp_init() to follow naming where function prefix matches the file name. Reviewed-by: Rodrigo Vivi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210427114520.4740-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 5 +++-- drivers/gpu/drm/i915/display/intel_dp.h | 3 --- drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 5 +++-- drivers/gpu/drm/i915/display/intel_dp_hdcp.h | 15 +++++++++++++++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 5 +++-- 5 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/i915/display/intel_dp_hdcp.h diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 4ad12dde5938..dfa7da928ae5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -49,10 +49,11 @@ #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_aux.h" +#include "intel_dp_hdcp.h" #include "intel_dp_link_training.h" #include "intel_dp_mst.h" -#include "intel_dpll.h" #include "intel_dpio_phy.h" +#include "intel_dpll.h" #include "intel_fifo_underrun.h" #include "intel_hdcp.h" #include "intel_hdmi.h" @@ -5348,7 +5349,7 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, intel_dp_add_properties(intel_dp, connector); if (is_hdcp_supported(dev_priv, port) && !intel_dp_is_edp(intel_dp)) { - int ret = intel_dp_init_hdcp(dig_port, intel_connector); + int ret = intel_dp_hdcp_init(dig_port, intel_connector); if (ret) drm_dbg_kms(&dev_priv->drm, "HDCP init failed, skipping.\n"); diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 8db5062f6c4a..680631b5b437 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -119,9 +119,6 @@ void intel_ddi_update_pipe(struct intel_atomic_state *state, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); -int intel_dp_init_hdcp(struct intel_digital_port *dig_port, - struct intel_connector *intel_connector); - bool intel_dp_initial_fastset_check(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state); void intel_dp_sync_state(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index 2dd9360bdf9a..d7c3a74b81a3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -11,9 +11,10 @@ #include #include -#include "intel_display_types.h" #include "intel_ddi.h" +#include "intel_display_types.h" #include "intel_dp.h" +#include "intel_dp_hdcp.h" #include "intel_hdcp.h" static unsigned int transcoder_to_stream_enc_status(enum transcoder cpu_transcoder) @@ -835,7 +836,7 @@ static const struct intel_hdcp_shim intel_dp_mst_hdcp_shim = { .protocol = HDCP_PROTOCOL_DP, }; -int intel_dp_init_hdcp(struct intel_digital_port *dig_port, +int intel_dp_hdcp_init(struct intel_digital_port *dig_port, struct intel_connector *intel_connector) { struct drm_device *dev = intel_connector->base.dev; diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.h b/drivers/gpu/drm/i915/display/intel_dp_hdcp.h new file mode 100644 index 000000000000..eff5ec5c5021 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __INTEL_DP_HDCP___ +#define __INTEL_DP_HDCP___ + +struct intel_connector; +struct intel_digital_port; + +int intel_dp_hdcp_init(struct intel_digital_port *dig_port, + struct intel_connector *intel_connector); + +#endif /* __INTEL_DP_HDCP___ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 180f97cd74cb..3558bce242ee 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -34,11 +34,12 @@ #include "intel_connector.h" #include "intel_ddi.h" #include "intel_display_types.h" -#include "intel_hotplug.h" #include "intel_dp.h" +#include "intel_dp_hdcp.h" #include "intel_dp_mst.h" #include "intel_dpio_phy.h" #include "intel_hdcp.h" +#include "intel_hotplug.h" #include "skl_scaler.h" static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, @@ -832,7 +833,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo intel_attach_broadcast_rgb_property(connector); if (DISPLAY_VER(dev_priv) <= 12) { - ret = intel_dp_init_hdcp(dig_port, intel_connector); + ret = intel_dp_hdcp_init(dig_port, intel_connector); if (ret) drm_dbg_kms(&dev_priv->drm, "[%s:%d] HDCP MST init failed, skipping.\n", connector->name, connector->base.id); -- cgit From 7c53e628344bf17aa9282b795aa7cd9c46958949 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 27 Apr 2021 15:03:15 +0300 Subject: drm/i915/display: move crtc and dpll declarations where they belong MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The definitions are in the crtc and dpll files; move the declarations to the corresponding headers. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210427120315.12342-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/display/icl_dsi.c | 1 + drivers/gpu/drm/i915/display/intel_crt.c | 1 + drivers/gpu/drm/i915/display/intel_crtc.h | 3 +++ drivers/gpu/drm/i915/display/intel_ddi.c | 1 + drivers/gpu/drm/i915/display/intel_display.h | 6 ------ drivers/gpu/drm/i915/display/intel_dp_mst.c | 1 + drivers/gpu/drm/i915/display/intel_dpll.h | 5 +++++ drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 1 + drivers/gpu/drm/i915/display/intel_sdvo.c | 1 + drivers/gpu/drm/i915/display/vlv_dsi.c | 1 + drivers/gpu/drm/i915/i915_trace.h | 1 + 11 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 37e2d93d064c..781630a40f06 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -31,6 +31,7 @@ #include "intel_atomic.h" #include "intel_combo_phy.h" #include "intel_connector.h" +#include "intel_crtc.h" #include "intel_ddi.h" #include "intel_dsi.h" #include "intel_panel.h" diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index c85092eaa5c2..1aac8bead4eb 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -36,6 +36,7 @@ #include "i915_drv.h" #include "intel_connector.h" #include "intel_crt.h" +#include "intel_crtc.h" #include "intel_ddi.h" #include "intel_display_types.h" #include "intel_fdi.h" diff --git a/drivers/gpu/drm/i915/display/intel_crtc.h b/drivers/gpu/drm/i915/display/intel_crtc.h index 08112d557411..a5ae997581aa 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.h +++ b/drivers/gpu/drm/i915/display/intel_crtc.h @@ -18,5 +18,8 @@ int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe); struct intel_crtc_state *intel_crtc_state_alloc(struct intel_crtc *crtc); void intel_crtc_state_reset(struct intel_crtc_state *crtc_state, struct intel_crtc *crtc); +u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc); +void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state); +void intel_crtc_vblank_off(const struct intel_crtc_state *crtc_state); #endif diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index f4249f087fa7..93d94d50b63d 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -31,6 +31,7 @@ #include "intel_audio.h" #include "intel_combo_phy.h" #include "intel_connector.h" +#include "intel_crtc.h" #include "intel_ddi.h" #include "intel_ddi_buf_trans.h" #include "intel_display_types.h" diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index b68bcd502206..fc0df4c63e8d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -557,9 +557,6 @@ enum tc_port intel_port_to_tc(struct drm_i915_private *dev_priv, enum port port); int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc); -void intel_crtc_vblank_on(const struct intel_crtc_state *crtc_state); -void intel_crtc_vblank_off(const struct intel_crtc_state *crtc_state); int ilk_get_lanes_required(int target_clock, int link_bw, int bpp); void vlv_wait_port_ready(struct drm_i915_private *dev_priv, @@ -598,9 +595,6 @@ void intel_dp_get_m_n(struct intel_crtc *crtc, void intel_dp_set_m_n(const struct intel_crtc_state *crtc_state, enum link_m_n_set m_n); int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n); -bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, - struct dpll *best_clock); -int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state); void hsw_enable_ips(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 3558bce242ee..a30ca4380a06 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -32,6 +32,7 @@ #include "intel_atomic.h" #include "intel_audio.h" #include "intel_connector.h" +#include "intel_crtc.h" #include "intel_ddi.h" #include "intel_display_types.h" #include "intel_dp.h" diff --git a/drivers/gpu/drm/i915/display/intel_dpll.h b/drivers/gpu/drm/i915/display/intel_dpll.h index 7ff4b0d29ed1..88247027fd5a 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.h +++ b/drivers/gpu/drm/i915/display/intel_dpll.h @@ -6,6 +6,8 @@ #ifndef _INTEL_DPLL_H_ #define _INTEL_DPLL_H_ +#include + struct dpll; struct drm_i915_private; struct intel_crtc; @@ -37,5 +39,8 @@ void vlv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); void chv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); +bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, + struct dpll *best_clock); +int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); #endif diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index e1c916640768..da2ff0b3ceac 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -23,6 +23,7 @@ #include "intel_display_types.h" #include "intel_dpio_phy.h" +#include "intel_dpll.h" #include "intel_dpll_mgr.h" /** diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index f770d6bcd2c9..f6eb95c717d2 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -38,6 +38,7 @@ #include "i915_drv.h" #include "intel_atomic.h" #include "intel_connector.h" +#include "intel_crtc.h" #include "intel_display_types.h" #include "intel_fifo_underrun.h" #include "intel_gmbus.h" diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 0d52da613101..ac0553d492aa 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -33,6 +33,7 @@ #include "i915_drv.h" #include "intel_atomic.h" #include "intel_connector.h" +#include "intel_crtc.h" #include "intel_display_types.h" #include "intel_dsi.h" #include "intel_fifo_underrun.h" diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index a4addcc64978..cac385e526c1 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -8,6 +8,7 @@ #include +#include "display/intel_crtc.h" #include "display/intel_display_types.h" #include "gt/intel_engine.h" -- cgit From 8ec7d10a54798718f6c166639a79991c8b53ddee Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 18 Mar 2021 20:10:38 +0200 Subject: drm/i915: Fix pre-skl DP AUX precharge length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DP v1.1+ says: "The DisplayPort transmitter, which is the driving end for a request transaction, pre-charges the AUX-CH+ and AUX-CH- to a common mode voltage by transmitting 10 to 16 consecutive 0’s in Manchester II code. After the active pre-charge, the transmitter sends an AUX Sync pattern. The AUX Sync pattern must be as follows: Start with 16 consecutive 0s in Manchester-II code, which results in a transition from low to high in the middle of each bit period. Including active pre-charge pulses, there shall be 26 to 32 consecutive 0s before the end of the AUX_SYNC pattern." BDW bspec says: "Used to determine the precharge time for the Aux Channel. During this time the Aux Channel will drive the SYNC pattern. Every microsecond gives one additional SYNC pulse beyond the hard coded 26 SYNC pulses. The value is the number of microseconds times 2. Default is 3 decimal which gives 6us of precharge which is 6 extra SYN pulses for a total of 32." CPT bspec says the same thing apart from: "... Default is 5 decimal which gives 10us of precharge which is 10 extra SYNC pulses for a total of 36." So it looks like to match the max of 32 of the DP spec we should just always program this extra precharge time to 3. Unfortunately g4x/ibx bspec doesn't have this clarification, but since the cpt default was still the same 5 as for g4x/ibx let's assume the behaviour was always the same. I also did a bit more archaeology and found the following: commit e3421a189447 ("drm/i915: enable DP/eDP for Sandybridge/Cougarpoint") added the precharge==3 for snb commit 092945e11c5b ("drm/i915/dp: Use auxch precharge value of 5 everywhere") tried to change it to be 5 for snb commit 6b4e0a93ff6e ("Revert "drm/i915/dp: Use auxch precharge value of 5 everywhere"") went back to 3 for snb due to a regression So I think the value of 5 was just always wrong, but I guess very few display actually get upset if we do too many SYNCs. Also DP 1.0 did not specify any max value for this, whereas DP 1.1+ added the max==32 wording. Additionally I hooked up a scope to a few machines with the following findings: - ibx and cpt both give us the expected 32 total sync pulses with precharge==3 - ctg is a bit different, it has the 10 hardcoded precharge sync pulses same as later platforms (so we get at least 26 sync pulses in total). However the additional precharge length (which is what we're changing here) is not done with sync pulses. Instead ctg does this part of the precharge with a steady DC voltage. If we wanted to 100% match DP 1.1+ here we should perhaps set prechange length to 0, but less precharge might make AUX less reliable, and so far we're not aware of any problems due to the DC precharge. Hence I think precharge==3 is probably the best choice here too to make the total length of precharge consistent with the later platforms. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210318181039.17260-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_dp_aux.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index 7e83bc2cc34a..805f6953c048 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -126,12 +126,7 @@ static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp, struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - u32 precharge, timeout; - - if (IS_SANDYBRIDGE(dev_priv)) - precharge = 3; - else - precharge = 5; + u32 timeout; /* Max timeout value on G4x-BDW: 1.6ms */ if (IS_BROADWELL(dev_priv)) @@ -146,7 +141,7 @@ static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp, timeout | DP_AUX_CH_CTL_RECEIVE_ERROR | (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) | - (precharge << DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT) | + (3 << DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT) | (aux_clock_divider << DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT); } -- cgit From 661af37946f32d9bf28b11a6e0626c1012813190 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 18 Mar 2021 20:10:39 +0200 Subject: drm/i915: Remove stray newlines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A bunch of files have a stray newline at the end. Remove it. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210318181039.17260-2-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/i9xx_plane.c | 1 - drivers/gpu/drm/i915/display/skl_universal_plane.c | 1 - drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c | 1 - drivers/gpu/drm/i915/i915_params.h | 1 - drivers/gpu/drm/i915/i915_vma_types.h | 1 - 5 files changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 80da0e3571a4..393ef09ba0ac 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -1039,4 +1039,3 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, plane_config->fb = intel_fb; } - diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 75d3ca3dbb37..553bc937ad90 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -2241,4 +2241,3 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, error: kfree(intel_fb); } - diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c index 129e0cf7dfe2..64e0b86bf258 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c @@ -121,4 +121,3 @@ void intel_guc_log_debugfs_register(struct intel_guc_log *log, intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), log); } - diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 34ebb0662547..14cd64cc61d0 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -97,4 +97,3 @@ void i915_params_copy(struct i915_params *dest, const struct i915_params *src); void i915_params_free(struct i915_params *params); #endif - diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h index 6b1bfa230b82..995b502d7e5d 100644 --- a/drivers/gpu/drm/i915/i915_vma_types.h +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -286,4 +286,3 @@ struct i915_vma { }; #endif - -- cgit From b109da48e6baceeb7b80e6b3292324f46c7d1255 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 26 Apr 2021 21:56:12 +0300 Subject: drm/i915: Stop using crtc->index as the pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pipe crc code slipped theough the net when we tried to eliminate all crtc->index==pipe abuses. Remedy that. And while at it get rid of those nasty intel_crtc+drm_crtc pointer aliases. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210426185612.13223-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula Reviewed-by: José Roberto de Souza --- drivers/gpu/drm/i915/display/intel_pipe_crc.c | 51 ++++++++++++++------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_pipe_crc.c b/drivers/gpu/drm/i915/display/intel_pipe_crc.c index 0f6de96e6d43..acc64b87d29f 100644 --- a/drivers/gpu/drm/i915/display/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/display/intel_pipe_crc.c @@ -580,13 +580,14 @@ int intel_crtc_verify_crc_source(struct drm_crtc *crtc, const char *source_name, return -EINVAL; } -int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name) +int intel_crtc_set_crc_source(struct drm_crtc *_crtc, const char *source_name) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_pipe_crc *pipe_crc = &intel_crtc->pipe_crc; + struct intel_crtc *crtc = to_intel_crtc(_crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_pipe_crc *pipe_crc = &crtc->pipe_crc; enum intel_display_power_domain power_domain; enum intel_pipe_crc_source source; + enum pipe pipe = crtc->pipe; intel_wakeref_t wakeref; u32 val = 0; /* shut up gcc */ int ret = 0; @@ -597,7 +598,7 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name) return -EINVAL; } - power_domain = POWER_DOMAIN_PIPE(crtc->index); + power_domain = POWER_DOMAIN_PIPE(pipe); wakeref = intel_display_power_get_if_enabled(dev_priv, power_domain); if (!wakeref) { drm_dbg_kms(&dev_priv->drm, @@ -607,64 +608,64 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name) enable = source != INTEL_PIPE_CRC_SOURCE_NONE; if (enable) - intel_crtc_crc_setup_workarounds(to_intel_crtc(crtc), true); + intel_crtc_crc_setup_workarounds(crtc, true); - ret = get_new_crc_ctl_reg(dev_priv, crtc->index, &source, &val); + ret = get_new_crc_ctl_reg(dev_priv, pipe, &source, &val); if (ret != 0) goto out; pipe_crc->source = source; - intel_de_write(dev_priv, PIPE_CRC_CTL(crtc->index), val); - intel_de_posting_read(dev_priv, PIPE_CRC_CTL(crtc->index)); + intel_de_write(dev_priv, PIPE_CRC_CTL(pipe), val); + intel_de_posting_read(dev_priv, PIPE_CRC_CTL(pipe)); if (!source) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_undo_pipe_scramble_reset(dev_priv, crtc->index); + vlv_undo_pipe_scramble_reset(dev_priv, pipe); } pipe_crc->skipped = 0; out: if (!enable) - intel_crtc_crc_setup_workarounds(to_intel_crtc(crtc), false); + intel_crtc_crc_setup_workarounds(crtc, false); intel_display_power_put(dev_priv, power_domain, wakeref); return ret; } -void intel_crtc_enable_pipe_crc(struct intel_crtc *intel_crtc) +void intel_crtc_enable_pipe_crc(struct intel_crtc *crtc) { - struct drm_crtc *crtc = &intel_crtc->base; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_pipe_crc *pipe_crc = &intel_crtc->pipe_crc; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_pipe_crc *pipe_crc = &crtc->pipe_crc; + enum pipe pipe = crtc->pipe; u32 val = 0; - if (!crtc->crc.opened) + if (!crtc->base.crc.opened) return; - if (get_new_crc_ctl_reg(dev_priv, crtc->index, &pipe_crc->source, &val) < 0) + if (get_new_crc_ctl_reg(dev_priv, pipe, &pipe_crc->source, &val) < 0) return; /* Don't need pipe_crc->lock here, IRQs are not generated. */ pipe_crc->skipped = 0; - intel_de_write(dev_priv, PIPE_CRC_CTL(crtc->index), val); - intel_de_posting_read(dev_priv, PIPE_CRC_CTL(crtc->index)); + intel_de_write(dev_priv, PIPE_CRC_CTL(pipe), val); + intel_de_posting_read(dev_priv, PIPE_CRC_CTL(pipe)); } -void intel_crtc_disable_pipe_crc(struct intel_crtc *intel_crtc) +void intel_crtc_disable_pipe_crc(struct intel_crtc *crtc) { - struct drm_crtc *crtc = &intel_crtc->base; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_pipe_crc *pipe_crc = &intel_crtc->pipe_crc; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_pipe_crc *pipe_crc = &crtc->pipe_crc; + enum pipe pipe = crtc->pipe; /* Swallow crc's until we stop generating them. */ spin_lock_irq(&pipe_crc->lock); pipe_crc->skipped = INT_MIN; spin_unlock_irq(&pipe_crc->lock); - intel_de_write(dev_priv, PIPE_CRC_CTL(crtc->index), 0); - intel_de_posting_read(dev_priv, PIPE_CRC_CTL(crtc->index)); + intel_de_write(dev_priv, PIPE_CRC_CTL(pipe), 0); + intel_de_posting_read(dev_priv, PIPE_CRC_CTL(pipe)); intel_synchronize_irq(dev_priv); } -- cgit From 1a86ac792f67ebd50cdafb20d4aad6b23c8495f7 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 14 Apr 2021 05:23:02 +0300 Subject: drm/i915: Add frontbuffer tracking tracepoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add some tracpoints for frontbuffer tracking so we can try to figure out what's going on. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210414022309.30898-2-ville.syrjala@linux.intel.com Acked-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_frontbuffer.c | 5 ++++ drivers/gpu/drm/i915/i915_trace.h | 38 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 6fc6965b6133..8161d49e78ba 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -58,6 +58,7 @@ #include "display/intel_dp.h" #include "i915_drv.h" +#include "i915_trace.h" #include "intel_display_types.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" @@ -87,6 +88,8 @@ static void frontbuffer_flush(struct drm_i915_private *i915, if (!frontbuffer_bits) return; + trace_intel_frontbuffer_flush(frontbuffer_bits, origin); + might_sleep(); intel_edp_drrs_flush(i915, frontbuffer_bits); intel_psr_flush(i915, frontbuffer_bits, origin); @@ -173,6 +176,8 @@ void __intel_fb_invalidate(struct intel_frontbuffer *front, spin_unlock(&i915->fb_tracking.lock); } + trace_intel_frontbuffer_invalidate(frontbuffer_bits, origin); + might_sleep(); intel_psr_invalidate(i915, frontbuffer_bits, origin); intel_edp_drrs_invalidate(i915, frontbuffer_bits); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index cac385e526c1..6778ad2a14a4 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -475,6 +475,44 @@ TRACE_EVENT(intel_pipe_update_end, __entry->scanline) ); +/* frontbuffer tracking */ + +TRACE_EVENT(intel_frontbuffer_invalidate, + TP_PROTO(unsigned int frontbuffer_bits, unsigned int origin), + TP_ARGS(frontbuffer_bits, origin), + + TP_STRUCT__entry( + __field(unsigned int, frontbuffer_bits) + __field(unsigned int, origin) + ), + + TP_fast_assign( + __entry->frontbuffer_bits = frontbuffer_bits; + __entry->origin = origin; + ), + + TP_printk("frontbuffer_bits=0x%08x, origin=%u", + __entry->frontbuffer_bits, __entry->origin) +); + +TRACE_EVENT(intel_frontbuffer_flush, + TP_PROTO(unsigned int frontbuffer_bits, unsigned int origin), + TP_ARGS(frontbuffer_bits, origin), + + TP_STRUCT__entry( + __field(unsigned int, frontbuffer_bits) + __field(unsigned int, origin) + ), + + TP_fast_assign( + __entry->frontbuffer_bits = frontbuffer_bits; + __entry->origin = origin; + ), + + TP_printk("frontbuffer_bits=0x%08x, origin=%u", + __entry->frontbuffer_bits, __entry->origin) +); + /* object tracking */ TRACE_EVENT(i915_gem_object_create, -- cgit From adbd914dcde0b03bfc08ffe40b81f31b0457833f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 21 Apr 2021 14:31:50 +0100 Subject: btrfs: zoned: fix silent data loss after failure splitting ordered extent On a zoned filesystem, sometimes we need to split an ordered extent into 3 different ordered extents. The original ordered extent is shortened, at the front and at the rear, and we create two other new ordered extents to represent the trimmed parts of the original ordered extent. After adjusting the original ordered extent, we create an ordered extent to represent the pre-range, and that may fail with ENOMEM for example. After that we always try to create the ordered extent for the post-range, and if that happens to succeed we end up returning success to the caller as we overwrite the 'ret' variable which contained the previous error. This means we end up with a file range for which there is no ordered extent, which results in the range never getting a new file extent item pointing to the new data location. And since the split operation did not return an error, writeback does not fail and the inode's mapping is not flagged with an error, resulting in a subsequent fsync not reporting an error either. It's possibly very unlikely to have the creation of the post-range ordered extent succeed after the creation of the pre-range ordered extent failed, but it's not impossible. So fix this by making sure we only create the post-range ordered extent if there was no error creating the ordered extent for the pre-range. Fixes: d22002fd37bd97 ("btrfs: zoned: split ordered extent when bio is sent") CC: stable@vger.kernel.org # 5.12+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 07b0b4218791..6c413bb451a3 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -984,7 +984,7 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre, if (pre) ret = clone_ordered_extent(ordered, 0, pre); - if (post) + if (ret == 0 && post) ret = clone_ordered_extent(ordered, pre + ordered->disk_num_bytes, post); -- cgit From ffb7c2e923cb3232454a513dcb5636e73091aa88 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 22 Apr 2021 12:09:21 +0100 Subject: btrfs: do not consider send context as valid when trying to flush qgroups At qgroup.c:try_flush_qgroup() we are asserting that current->journal_info is either NULL or has the value BTRFS_SEND_TRANS_STUB. However allowing for BTRFS_SEND_TRANS_STUB makes no sense because: 1) It is misleading, because send operations are read-only and do not ever need to reserve qgroup space; 2) We already assert that current->journal_info != BTRFS_SEND_TRANS_STUB at transaction.c:start_transaction(); 3) On a kernel without CONFIG_BTRFS_ASSERT=y set, it would result in a crash if try_flush_qgroup() is ever called in a send context, because at transaction.c:start_transaction we cast current->journal_info into a struct btrfs_trans_handle pointer and then dereference it. So just do allow a send context at try_flush_qgroup() and update the comment about it. Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 2319c923c9e6..b1caf5acf1e2 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3545,11 +3545,15 @@ static int try_flush_qgroup(struct btrfs_root *root) struct btrfs_trans_handle *trans; int ret; - /* Can't hold an open transaction or we run the risk of deadlocking */ - ASSERT(current->journal_info == NULL || - current->journal_info == BTRFS_SEND_TRANS_STUB); - if (WARN_ON(current->journal_info && - current->journal_info != BTRFS_SEND_TRANS_STUB)) + /* + * Can't hold an open transaction or we run the risk of deadlocking, + * and can't either be under the context of a send operation (where + * current->journal_info is set to BTRFS_SEND_TRANS_STUB), as that + * would result in a crash when starting a transaction and does not + * make sense either (send is a read-only operation). + */ + ASSERT(current->journal_info == NULL); + if (WARN_ON(current->journal_info)) return 0; /* -- cgit From 626e9f41f7c281ba3e02843702f68471706aa6d9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 27 Apr 2021 11:27:20 +0100 Subject: btrfs: fix race leading to unpersisted data and metadata on fsync When doing a fast fsync on a file, there is a race which can result in the fsync returning success to user space without logging the inode and without durably persisting new data. The following example shows one possible scenario for this: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ touch /mnt/bar $ xfs_io -f -c "pwrite -S 0xab 0 1M" -c "fsync" /mnt/baz # Now we have: # file bar == inode 257 # file baz == inode 258 $ mv /mnt/baz /mnt/foo # Now we have: # file bar == inode 257 # file foo == inode 258 $ xfs_io -c "pwrite -S 0xcd 0 1M" /mnt/foo # fsync bar before foo, it is important to trigger the race. $ xfs_io -c "fsync" /mnt/bar $ xfs_io -c "fsync" /mnt/foo # After this: # inode 257, file bar, is empty # inode 258, file foo, has 1M filled with 0xcd # Replay the log: $ mount /dev/sdc /mnt # After this point file foo should have 1M filled with 0xcd and not 0xab The following steps explain how the race happens: 1) Before the first fsync of inode 258, when it has the "baz" name, its ->logged_trans is 0, ->last_sub_trans is 0 and ->last_log_commit is -1. The inode also has the full sync flag set; 2) After the first fsync, we set inode 258 ->logged_trans to 6, which is the generation of the current transaction, and set ->last_log_commit to 0, which is the current value of ->last_sub_trans (done at btrfs_log_inode()). The full sync flag is cleared from the inode during the fsync. The log sub transaction that was committed had an ID of 0 and when we synced the log, at btrfs_sync_log(), we incremented root->log_transid from 0 to 1; 3) During the rename: We update inode 258, through btrfs_update_inode(), and that causes its ->last_sub_trans to be set to 1 (the current log transaction ID), and ->last_log_commit remains with a value of 0. After updating inode 258, because we have previously logged the inode in the previous fsync, we log again the inode through the call to btrfs_log_new_name(). This results in updating the inode's ->last_log_commit from 0 to 1 (the current value of its ->last_sub_trans). The ->last_sub_trans of inode 257 is updated to 1, which is the ID of the next log transaction; 4) Then a buffered write against inode 258 is made. This leaves the value of ->last_sub_trans as 1 (the ID of the current log transaction, stored at root->log_transid); 5) Then an fsync against inode 257 (or any other inode other than 258), happens. This results in committing the log transaction with ID 1, which results in updating root->last_log_commit to 1 and bumping root->log_transid from 1 to 2; 6) Then an fsync against inode 258 starts. We flush delalloc and wait only for writeback to complete, since the full sync flag is not set in the inode's runtime flags - we do not wait for ordered extents to complete. Then, at btrfs_sync_file(), we call btrfs_inode_in_log() before the ordered extent completes. The call returns true: static inline bool btrfs_inode_in_log(...) { bool ret = false; spin_lock(&inode->lock); if (inode->logged_trans == generation && inode->last_sub_trans <= inode->last_log_commit && inode->last_sub_trans <= inode->root->last_log_commit) ret = true; spin_unlock(&inode->lock); return ret; } generation has a value of 6 (fs_info->generation), ->logged_trans also has a value of 6 (set when we logged the inode during the first fsync and when logging it during the rename), ->last_sub_trans has a value of 1, set during the rename (step 3), ->last_log_commit also has a value of 1 (set in step 3) and root->last_log_commit has a value of 1, which was set in step 5 when fsyncing inode 257. As a consequence we don't log the inode, any new extents and do not sync the log, resulting in a data loss if a power failure happens after the fsync and before the current transaction commits. Also, because we do not log the inode, after a power failure the mtime and ctime of the inode do not match those we had before. When the ordered extent completes before we call btrfs_inode_in_log(), then the call returns false and we log the inode and sync the log, since at the end of ordered extent completion we update the inode and set ->last_sub_trans to 2 (the value of root->log_transid) and ->last_log_commit to 1. This problem is found after removing the check for the emptiness of the inode's list of modified extents in the recent commit 209ecbb8585bf6 ("btrfs: remove stale comment and logic from btrfs_inode_in_log()"), added in the 5.13 merge window. However checking the emptiness of the list is not really the way to solve this problem, and was never intended to, because while that solves the problem for COW writes, the problem persists for NOCOW writes because in that case the list is always empty. In the case of NOCOW writes, even though we wait for the writeback to complete before returning from btrfs_sync_file(), we end up not logging the inode, which has a new mtime/ctime, and because we don't sync the log, we never issue disk barriers (send REQ_PREFLUSH to the device) since that only happens when we sync the log (when we write super blocks at btrfs_sync_log()). So effectively, for a NOCOW case, when we return from btrfs_sync_file() to user space, we are not guaranteeing that the data is durably persisted on disk. Also, while the example above uses a rename exchange to show how the problem happens, it is not the only way to trigger it. An alternative could be adding a new hard link to inode 258, since that also results in calling btrfs_log_new_name() and updating the inode in the log. An example reproducer using the addition of a hard link instead of a rename operation: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ touch /mnt/bar $ xfs_io -f -c "pwrite -S 0xab 0 1M" -c "fsync" /mnt/foo $ ln /mnt/foo /mnt/foo_link $ xfs_io -c "pwrite -S 0xcd 0 1M" /mnt/foo $ xfs_io -c "fsync" /mnt/bar $ xfs_io -c "fsync" /mnt/foo # Replay the log: $ mount /dev/sdc /mnt # After this point file foo often has 1M filled with 0xab and not 0xcd The reasons leading to the final fsync of file foo, inode 258, not persisting the new data are the same as for the previous example with a rename operation. So fix by never skipping logging and log syncing when there are still any ordered extents in flight. To avoid making the conditional if statement that checks if logging an inode is needed harder to read, place all the logic into an helper function with separate if statements to make it more manageable and easier to read. A test case for fstests will follow soon. For NOCOW writes, the problem existed before commit b5e6c3e170b770 ("btrfs: always wait on ordered extents at fsync time"), introduced in kernel 4.19, then it went away with that commit since we started to always wait for ordered extent completion before logging. The problem came back again once the fast fsync path was changed again to avoid waiting for ordered extent completion, in commit 487781796d3022 ("btrfs: make fast fsyncs wait only for writeback"), added in kernel 5.10. However, for COW writes, the race only happens after the recent commit 209ecbb8585bf6 ("btrfs: remove stale comment and logic from btrfs_inode_in_log()"), introduced in the 5.13 merge window. For NOCOW writes, the bug existed before that commit. So tag 5.10+ as the release for stable backports. CC: stable@vger.kernel.org # 5.10+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 35 +++++++++++++++++++++++++---------- fs/btrfs/tree-log.c | 3 ++- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 864c08d08a35..3b10d98b4ebb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2067,6 +2067,30 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) return ret; } +static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) +{ + struct btrfs_inode *inode = BTRFS_I(ctx->inode); + struct btrfs_fs_info *fs_info = inode->root->fs_info; + + if (btrfs_inode_in_log(inode, fs_info->generation) && + list_empty(&ctx->ordered_extents)) + return true; + + /* + * If we are doing a fast fsync we can not bail out if the inode's + * last_trans is <= then the last committed transaction, because we only + * update the last_trans of the inode during ordered extent completion, + * and for a fast fsync we don't wait for that, we only wait for the + * writeback to complete. + */ + if (inode->last_trans <= fs_info->last_trans_committed && + (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) || + list_empty(&ctx->ordered_extents))) + return true; + + return false; +} + /* * fsync call for both files and directories. This logs the inode into * the tree log instead of forcing full commits whenever possible. @@ -2185,17 +2209,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); - /* - * If we are doing a fast fsync we can not bail out if the inode's - * last_trans is <= then the last committed transaction, because we only - * update the last_trans of the inode during ordered extent completion, - * and for a fast fsync we don't wait for that, we only wait for the - * writeback to complete. - */ smp_mb(); - if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) || - (BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed && - (full_sync || list_empty(&ctx.ordered_extents)))) { + if (skip_inode_logging(&ctx)) { /* * We've had everything committed since the last time we were * modified so clear this flag in case it was set for whatever diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c1353b84ae54..a0fc3a1390ab 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6060,7 +6060,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, * (since logging them is pointless, a link count of 0 means they * will never be accessible). */ - if (btrfs_inode_in_log(inode, trans->transid) || + if ((btrfs_inode_in_log(inode, trans->transid) && + list_empty(&ctx->ordered_extents)) || inode->vfs_inode.i_nlink == 0) { ret = BTRFS_NO_LOG_SYNC; goto end_no_trans; -- cgit From f9baa501b4fd6962257853d46ddffbc21f27e344 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 22 Apr 2021 12:08:05 +0100 Subject: btrfs: fix deadlock when cloning inline extents and using qgroups There are a few exceptional cases where cloning an inline extent needs to copy the inline extent data into a page of the destination inode. When this happens, we end up starting a transaction while having a dirty page for the destination inode and while having the range locked in the destination's inode iotree too. Because when reserving metadata space for a transaction we may need to flush existing delalloc in case there is not enough free space, we have a mechanism in place to prevent a deadlock, which was introduced in commit 3d45f221ce627d ("btrfs: fix deadlock when cloning inline extent and low on free metadata space"). However when using qgroups, a transaction also reserves metadata qgroup space, which can also result in flushing delalloc in case there is not enough available space at the moment. When this happens we deadlock, since flushing delalloc requires locking the file range in the inode's iotree and the range was already locked at the very beginning of the clone operation, before attempting to start the transaction. When this issue happens, stack traces like the following are reported: [72747.556262] task:kworker/u81:9 state:D stack: 0 pid: 225 ppid: 2 flags:0x00004000 [72747.556268] Workqueue: writeback wb_workfn (flush-btrfs-1142) [72747.556271] Call Trace: [72747.556273] __schedule+0x296/0x760 [72747.556277] schedule+0x3c/0xa0 [72747.556279] io_schedule+0x12/0x40 [72747.556284] __lock_page+0x13c/0x280 [72747.556287] ? generic_file_readonly_mmap+0x70/0x70 [72747.556325] extent_write_cache_pages+0x22a/0x440 [btrfs] [72747.556331] ? __set_page_dirty_nobuffers+0xe7/0x160 [72747.556358] ? set_extent_buffer_dirty+0x5e/0x80 [btrfs] [72747.556362] ? update_group_capacity+0x25/0x210 [72747.556366] ? cpumask_next_and+0x1a/0x20 [72747.556391] extent_writepages+0x44/0xa0 [btrfs] [72747.556394] do_writepages+0x41/0xd0 [72747.556398] __writeback_single_inode+0x39/0x2a0 [72747.556403] writeback_sb_inodes+0x1ea/0x440 [72747.556407] __writeback_inodes_wb+0x5f/0xc0 [72747.556410] wb_writeback+0x235/0x2b0 [72747.556414] ? get_nr_inodes+0x35/0x50 [72747.556417] wb_workfn+0x354/0x490 [72747.556420] ? newidle_balance+0x2c5/0x3e0 [72747.556424] process_one_work+0x1aa/0x340 [72747.556426] worker_thread+0x30/0x390 [72747.556429] ? create_worker+0x1a0/0x1a0 [72747.556432] kthread+0x116/0x130 [72747.556435] ? kthread_park+0x80/0x80 [72747.556438] ret_from_fork+0x1f/0x30 [72747.566958] Workqueue: btrfs-flush_delalloc btrfs_work_helper [btrfs] [72747.566961] Call Trace: [72747.566964] __schedule+0x296/0x760 [72747.566968] ? finish_wait+0x80/0x80 [72747.566970] schedule+0x3c/0xa0 [72747.566995] wait_extent_bit.constprop.68+0x13b/0x1c0 [btrfs] [72747.566999] ? finish_wait+0x80/0x80 [72747.567024] lock_extent_bits+0x37/0x90 [btrfs] [72747.567047] btrfs_invalidatepage+0x299/0x2c0 [btrfs] [72747.567051] ? find_get_pages_range_tag+0x2cd/0x380 [72747.567076] __extent_writepage+0x203/0x320 [btrfs] [72747.567102] extent_write_cache_pages+0x2bb/0x440 [btrfs] [72747.567106] ? update_load_avg+0x7e/0x5f0 [72747.567109] ? enqueue_entity+0xf4/0x6f0 [72747.567134] extent_writepages+0x44/0xa0 [btrfs] [72747.567137] ? enqueue_task_fair+0x93/0x6f0 [72747.567140] do_writepages+0x41/0xd0 [72747.567144] __filemap_fdatawrite_range+0xc7/0x100 [72747.567167] btrfs_run_delalloc_work+0x17/0x40 [btrfs] [72747.567195] btrfs_work_helper+0xc2/0x300 [btrfs] [72747.567200] process_one_work+0x1aa/0x340 [72747.567202] worker_thread+0x30/0x390 [72747.567205] ? create_worker+0x1a0/0x1a0 [72747.567208] kthread+0x116/0x130 [72747.567211] ? kthread_park+0x80/0x80 [72747.567214] ret_from_fork+0x1f/0x30 [72747.569686] task:fsstress state:D stack: 0 pid:841421 ppid:841417 flags:0x00000000 [72747.569689] Call Trace: [72747.569691] __schedule+0x296/0x760 [72747.569694] schedule+0x3c/0xa0 [72747.569721] try_flush_qgroup+0x95/0x140 [btrfs] [72747.569725] ? finish_wait+0x80/0x80 [72747.569753] btrfs_qgroup_reserve_data+0x34/0x50 [btrfs] [72747.569781] btrfs_check_data_free_space+0x5f/0xa0 [btrfs] [72747.569804] btrfs_buffered_write+0x1f7/0x7f0 [btrfs] [72747.569810] ? path_lookupat.isra.48+0x97/0x140 [72747.569833] btrfs_file_write_iter+0x81/0x410 [btrfs] [72747.569836] ? __kmalloc+0x16a/0x2c0 [72747.569839] do_iter_readv_writev+0x160/0x1c0 [72747.569843] do_iter_write+0x80/0x1b0 [72747.569847] vfs_writev+0x84/0x140 [72747.569869] ? btrfs_file_llseek+0x38/0x270 [btrfs] [72747.569873] do_writev+0x65/0x100 [72747.569876] do_syscall_64+0x33/0x40 [72747.569879] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [72747.569899] task:fsstress state:D stack: 0 pid:841424 ppid:841417 flags:0x00004000 [72747.569903] Call Trace: [72747.569906] __schedule+0x296/0x760 [72747.569909] schedule+0x3c/0xa0 [72747.569936] try_flush_qgroup+0x95/0x140 [btrfs] [72747.569940] ? finish_wait+0x80/0x80 [72747.569967] __btrfs_qgroup_reserve_meta+0x36/0x50 [btrfs] [72747.569989] start_transaction+0x279/0x580 [btrfs] [72747.570014] clone_copy_inline_extent+0x332/0x490 [btrfs] [72747.570041] btrfs_clone+0x5b7/0x7a0 [btrfs] [72747.570068] ? lock_extent_bits+0x64/0x90 [btrfs] [72747.570095] btrfs_clone_files+0xfc/0x150 [btrfs] [72747.570122] btrfs_remap_file_range+0x3d8/0x4a0 [btrfs] [72747.570126] do_clone_file_range+0xed/0x200 [72747.570131] vfs_clone_file_range+0x37/0x110 [72747.570134] ioctl_file_clone+0x7d/0xb0 [72747.570137] do_vfs_ioctl+0x138/0x630 [72747.570140] __x64_sys_ioctl+0x62/0xc0 [72747.570143] do_syscall_64+0x33/0x40 [72747.570146] entry_SYSCALL_64_after_hwframe+0x44/0xa9 So fix this by skipping the flush of delalloc for an inode that is flagged with BTRFS_INODE_NO_DELALLOC_FLUSH, meaning it is currently under such a special case of cloning an inline extent, when flushing delalloc during qgroup metadata reservation. The special cases for cloning inline extents were added in kernel 5.7 by by commit 05a5a7621ce66c ("Btrfs: implement full reflink support for inline extents"), while having qgroup metadata space reservation flushing delalloc when low on space was added in kernel 5.9 by commit c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT"). So use a "Fixes:" tag for the later commit to ease stable kernel backports. Reported-by: Wang Yugui Link: https://lore.kernel.org/linux-btrfs/20210421083137.31E3.409509F4@e16-tech.com/ Fixes: c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT") CC: stable@vger.kernel.org # 5.9+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 4 ++-- fs/btrfs/ioctl.c | 2 +- fs/btrfs/qgroup.c | 2 +- fs/btrfs/send.c | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 278e0cbc9a98..0f5b0b12762b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3127,7 +3127,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, u64 new_size, u32 min_type); -int btrfs_start_delalloc_snapshot(struct btrfs_root *root); +int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context); int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, bool in_reclaim_context); int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1a349759efae..69fcdf8f0b1c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9691,7 +9691,7 @@ out: return ret; } -int btrfs_start_delalloc_snapshot(struct btrfs_root *root) +int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context) { struct writeback_control wbc = { .nr_to_write = LONG_MAX, @@ -9704,7 +9704,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root) if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) return -EROFS; - return start_delalloc_inodes(root, &wbc, true, false); + return start_delalloc_inodes(root, &wbc, true, in_reclaim_context); } int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b1328f17607e..0ba0e4ddaf6b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1051,7 +1051,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent, */ btrfs_drew_read_lock(&root->snapshot_lock); - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, false); if (ret) goto out; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b1caf5acf1e2..3ded812f522c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3566,7 +3566,7 @@ static int try_flush_qgroup(struct btrfs_root *root) return 0; } - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, true); if (ret < 0) goto out; btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 55741adf9071..bd69db72acc5 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -7170,7 +7170,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx) int i; if (root) { - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, false); if (ret) return ret; btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); @@ -7178,7 +7178,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx) for (i = 0; i < sctx->clone_roots_cnt; i++) { root = sctx->clone_roots[i].root; - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, false); if (ret) return ret; btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); -- cgit From 02ded1314a465a89267be38231d9858206853d80 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Fri, 23 Apr 2021 15:04:20 -0400 Subject: drm/msm: fix minor version to indicate MSM_PARAM_SUSPENDS support Increase the minor version to indicate that MSM_PARAM_SUSPENDS is supported. Fixes: 3ab1c5cc3939 ("drm/msm: Add param for userspace to query suspend count") Signed-off-by: Jonathan Marek Link: https://lore.kernel.org/r/20210423190420.25217-1-jonathan@marek.ca Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index e1104d2454e2..fe7d17cd35ec 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -42,7 +42,7 @@ * - 1.7.0 - Add MSM_PARAM_SUSPENDS to access suspend count */ #define MSM_VERSION_MAJOR 1 -#define MSM_VERSION_MINOR 6 +#define MSM_VERSION_MINOR 7 #define MSM_VERSION_PATCHLEVEL 0 static const struct drm_mode_config_funcs mode_config_funcs = { -- cgit From f876549dd002e8f5788c281096a7310bc53fbe20 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Fri, 23 Apr 2021 14:57:55 -0400 Subject: drm/amd/pm: Update energy_accumulator in gpu metrics Now that it is available. v2: squash in typecast fix (Alex) Signed-off-by: Harish Kasiviswanathan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index dcbe3a72da09..771e2f0acdd3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1657,7 +1657,9 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_mm_activity = 0; gpu_metrics->average_socket_power = metrics.AverageSocketPower; - gpu_metrics->energy_accumulator = 0; + gpu_metrics->energy_accumulator = + (uint64_t)metrics.EnergyAcc64bitHigh << 32 | + metrics.EnergyAcc64bitLow; gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequency; gpu_metrics->average_socclk_frequency = metrics.AverageSocclkFrequency; -- cgit From 1e4a53de01c68d4ec9800b9a0f6efe9f26184a77 Mon Sep 17 00:00:00 2001 From: Darren Powell Date: Fri, 9 Apr 2021 18:28:24 -0400 Subject: amdgpu/pm: add extra info to SMU msg pre-check failed message Insert the value of the response to error message emitted when the SMU msg pre-check failes Signed-off-by: Darren Powell Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index dc7d2e71aa6f..5d1743f3321e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -104,8 +104,8 @@ int smu_cmn_send_msg_without_waiting(struct smu_context *smu, ret = smu_cmn_wait_for_response(smu); if (ret != 0x1) { - dev_err(adev->dev, "Msg issuing pre-check failed and " - "SMU may be not in the right state!\n"); + dev_err(adev->dev, "Msg issuing pre-check failed(0x%x) and " + "SMU may be not in the right state!\n", ret); if (ret != -ETIME) ret = -EIO; return ret; -- cgit From 51ec699275d91f52d861926f176635ce08b8af11 Mon Sep 17 00:00:00 2001 From: Darren Powell Date: Wed, 7 Apr 2021 00:34:35 -0400 Subject: amdgpu/pm: Prevent force of DCEFCLK on NAVI10 and SIENNA_CICHLID Writing to dcefclk causes the gpu to become unresponsive, and requires a reboot. Patch ignores a .force_clk_levels(SMU_DCEFCLK) call and issues an info message. Signed-off-by: Darren Powell Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 5 ++++- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index f827096dc849..ac13042672ea 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1443,7 +1443,6 @@ static int navi10_force_clk_levels(struct smu_context *smu, case SMU_SOCCLK: case SMU_MCLK: case SMU_UCLK: - case SMU_DCEFCLK: case SMU_FCLK: /* There is only 2 levels for fine grained DPM */ if (navi10_is_support_fine_grained_dpm(smu, clk_type)) { @@ -1463,6 +1462,10 @@ static int navi10_force_clk_levels(struct smu_context *smu, if (ret) return size; break; + case SMU_DCEFCLK: + dev_info(smu->adev->dev,"Setting DCEFCLK min/max dpm level is not supported!\n"); + break; + default: break; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 72d9c1be1835..d2fd44b903ca 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1127,7 +1127,6 @@ static int sienna_cichlid_force_clk_levels(struct smu_context *smu, case SMU_SOCCLK: case SMU_MCLK: case SMU_UCLK: - case SMU_DCEFCLK: case SMU_FCLK: /* There is only 2 levels for fine grained DPM */ if (sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) { @@ -1147,6 +1146,9 @@ static int sienna_cichlid_force_clk_levels(struct smu_context *smu, if (ret) goto forec_level_out; break; + case SMU_DCEFCLK: + dev_info(smu->adev->dev,"Setting DCEFCLK min/max dpm level is not supported!\n"); + break; default: break; } -- cgit From ede14a1b32234685e2d6893205ff24c959b3f021 Mon Sep 17 00:00:00 2001 From: Darren Powell Date: Wed, 7 Apr 2021 18:40:34 -0400 Subject: amdgpu/pm: set pp_dpm_dcefclk to readonly on NAVI10 and newer gpus v2 : change condition to apply to all chips after NAVI10 Writing to dcefclk causes the gpu to become unresponsive, and requires a reboot. Patch prevents user from successfully writing to file pp_dpm_dcefclk on parts NAVI10 and newer, and gives better user feedback that this operation is not allowed. Signed-off-by: Darren Powell Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 8128603ef495..4e459ef632ef 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1893,6 +1893,14 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } } + if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) { + /* SMU MP1 does not support dcefclk level setting */ + if (asic_type >= CHIP_NAVI10) { + dev_attr->attr.mode &= ~S_IWUGO; + dev_attr->store = NULL; + } + } + #undef DEVICE_ATTR_IS return 0; -- cgit From 5f5cb2afd67f5d1b2468b7a195b9c3c4963949af Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Sat, 24 Apr 2021 15:10:20 +0530 Subject: drm/amdgpu: Added missing prototype Kernel test robot throws below warning -> >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c:125:5: warning: >> no previous prototype for 'kgd_arcturus_hqd_sdma_load' >> [-Wmissing-prototypes] 125 | int kgd_arcturus_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c:195:5: warning: >> no previous prototype for 'kgd_arcturus_hqd_sdma_dump' >> [-Wmissing-prototypes] 195 | int kgd_arcturus_hqd_sdma_dump(struct kgd_dev *kgd, >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c:227:6: warning: >> no previous prototype for 'kgd_arcturus_hqd_sdma_is_occupied' >> [-Wmissing-prototypes] 227 | bool kgd_arcturus_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c:246:5: warning: >> no previous prototype for 'kgd_arcturus_hqd_sdma_destroy' >> [-Wmissing-prototypes] 246 | int kgd_arcturus_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Added prototype for these functions. Reported-by: kernel test robot Signed-off-by: Souptick Joarder Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 9ef9f3ddad48..6409d6b1b2df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -25,6 +25,7 @@ #include #include "amdgpu.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_amdkfd_arcturus.h" #include "sdma0/sdma0_4_2_2_offset.h" #include "sdma0/sdma0_4_2_2_sh_mask.h" #include "sdma1/sdma1_4_2_2_offset.h" -- cgit From 71ff0b4d965f528214e3dd26b71fdcbd015d19b4 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Sat, 24 Apr 2021 18:50:39 +0200 Subject: drm/amdkfd: Fix kernel-doc syntax error Fixed a kernel-doc error in the documentation of a function. Signed-off-by: Fabio M. De Francesco Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d7006ef2388f..9d4f527bda7c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -252,7 +252,7 @@ cleanup: } /** - * @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device + * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device * by current process. Translates acquired wave count into number of compute units * that are occupied. * -- cgit From ec0f72cb959d9b8d6ce51457b392945fe1719d46 Mon Sep 17 00:00:00 2001 From: Jinzhou Su Date: Fri, 23 Apr 2021 16:29:14 +0800 Subject: drm/amdgpu: Enable SDMA LS for Vangogh Add flags AMD_CG_SUPPORT_SDMA_LS for Vangogh. Start to open sdma ls from firmware version 70. Signed-off-by: Jinzhou Su Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 9c4f232e81c0..82a380be8368 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1121,6 +1121,7 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_FGCG | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_VCN | diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 4ba7fce4c0b4..7c4e0586e26d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1593,6 +1593,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev int i; for (i = 0; i < adev->sdma.num_instances; i++) { + + if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH) + adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS; + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { /* Enable sdma mem light sleep */ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); -- cgit From dfdd4b8a95197d3769563e6f504357d98dd63887 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 23 Apr 2021 14:37:10 +0800 Subject: drm/amdgpu: implement smuio callback to query socket id get_socket_id is used to query socket id Signed-off-by: Hawking Zhang Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h | 1 + drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index b860ec913ac5..484bb3dcec47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -29,6 +29,7 @@ struct amdgpu_smuio_funcs { void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable); void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); u32 (*get_die_id)(struct amdgpu_device *adev); + u32 (*get_socket_id)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); }; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c index 079b094c48ad..3c47c94846d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c @@ -88,6 +88,23 @@ static u32 smuio_v13_0_get_die_id(struct amdgpu_device *adev) return die_id; } +/** + * smuio_v13_0_get_socket_id - query socket id from FCH + * + * @adev: amdgpu device pointer + * + * Returns socket id + */ +static u32 smuio_v13_0_get_socket_id(struct amdgpu_device *adev) +{ + u32 data, socket_id; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID); + + return socket_id; +} + /** * smuio_v13_0_supports_host_gpu_xgmi - detect xgmi interface between cpu and gpu/s. * @@ -115,6 +132,7 @@ const struct amdgpu_smuio_funcs smuio_v13_0_funcs = { .get_rom_index_offset = smuio_v13_0_get_rom_index_offset, .get_rom_data_offset = smuio_v13_0_get_rom_data_offset, .get_die_id = smuio_v13_0_get_die_id, + .get_socket_id = smuio_v13_0_get_socket_id, .is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported, .update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating, .get_clock_gating_state = smuio_v13_0_get_clock_gating_state, -- cgit From a30f128602001c986bb2b3ede074b6193d43905f Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sun, 25 Apr 2021 14:34:25 +0800 Subject: drm/amdgpu: provide socket/die id info in RAS message Add socket/die information in RAS messages for platforms that support query those information Signed-off-by: Hawking Zhang Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index f62873fbf249..ae9fb2025259 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -901,17 +901,42 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, info->ce_count = obj->err_data.ce_count; if (err_data.ce_count) { - dev_info(adev->dev, "%ld correctable hardware errors " + if (adev->smuio.funcs && + adev->smuio.funcs->get_socket_id && + adev->smuio.funcs->get_die_id) { + dev_info(adev->dev, "socket: %d, die: %d " + "%ld correctable hardware errors " "detected in %s block, no user " "action is needed.\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), obj->err_data.ce_count, ras_block_str(info->head.block)); + } else { + dev_info(adev->dev, "%ld correctable hardware errors " + "detected in %s block, no user " + "action is needed.\n", + obj->err_data.ce_count, + ras_block_str(info->head.block)); + } } if (err_data.ue_count) { - dev_info(adev->dev, "%ld uncorrectable hardware errors " + if (adev->smuio.funcs && + adev->smuio.funcs->get_socket_id && + adev->smuio.funcs->get_die_id) { + dev_info(adev->dev, "socket: %d, die: %d " + "%ld uncorrectable hardware errors " "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), obj->err_data.ue_count, ras_block_str(info->head.block)); + } else { + dev_info(adev->dev, "%ld uncorrectable hardware errors " + "detected in %s block\n", + obj->err_data.ue_count, + ras_block_str(info->head.block)); + } } return 0; -- cgit From be9064b7bcaab6b35d6040499af3fd5db8863ba4 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sun, 25 Apr 2021 14:29:25 +0800 Subject: drm/amdgpu: remove unnecessary header include amdgpu.h is included in kfd_priv.h Signed-off-by: Hawking Zhang Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 696944fa0177..97c36e3c8c80 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -25,7 +25,6 @@ #include "soc15_int.h" #include "kfd_device_queue_manager.h" #include "kfd_smi_events.h" -#include "amdgpu.h" enum SQ_INTERRUPT_WORD_ENCODING { SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0, -- cgit From 760d2d818d2dbf232d2e1df2317381c59b95f86d Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Wed, 7 Apr 2021 11:21:44 -0400 Subject: drm/amd/display: Add new case to get spread spectrum info [WHY] New minor revision needs to be handled [HOW] Add switch case and assert to catch missing switch cases in the future Signed-off-by: Michael Strauss Reviewed-by: Eric Yang Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index d79f4fe06c47..49126a0f66af 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -836,8 +836,10 @@ static enum bp_result bios_parser_get_spread_spectrum_info( return get_ss_info_v4_1(bp, signal, index, ss_info); case 2: case 3: + case 4: return get_ss_info_v4_2(bp, signal, index, ss_info); default: + ASSERT(0); break; } break; -- cgit From 99c248c41c2199bd34232ce8e729d18c4b343b64 Mon Sep 17 00:00:00 2001 From: Brandon Syu Date: Fri, 9 Apr 2021 14:47:46 +0800 Subject: drm/amd/display: fix HDCP reset sequence on reinitialize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [why] When setup is called after hdcp has already setup, it would cause to disable HDCP flow won’t execute. [how] Don't clean up hdcp content to be 0. Signed-off-by: Brandon Syu Reviewed-by: Wenjing Liu Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c index 68a6481d7f8f..b963226e8af4 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c @@ -260,7 +260,6 @@ enum mod_hdcp_status mod_hdcp_setup(struct mod_hdcp *hdcp, struct mod_hdcp_output output; enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; - memset(hdcp, 0, sizeof(struct mod_hdcp)); memset(&output, 0, sizeof(output)); hdcp->config = *config; HDCP_TOP_INTERFACE_TRACE(hdcp); -- cgit From d5433a9f692f57c814286f8af2746c567ef79fc8 Mon Sep 17 00:00:00 2001 From: Lewis Huang Date: Tue, 13 Apr 2021 07:02:28 +0800 Subject: drm/amd/display: Revert wait vblank on update dpp clock [Why] This change only fix dpp clock switch to lower case. New solution later can fix both case, which is "dc: skip program clock when allow seamless boot" [How] This reverts commit "dc: wait vblank when stream enabled and update dpp clock" Signed-off-by: Lewis Huang Reviewed-by: Eric Yang Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 10 +--------- drivers/gpu/drm/amd/display/dc/core/dc.c | 13 ------------- drivers/gpu/drm/amd/display/dc/dc.h | 1 - 3 files changed, 1 insertion(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index a06e86853bb9..49d19fdd750b 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -128,7 +128,7 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dc *dc = clk_mgr_base->ctx->dc; - int display_count, i; + int display_count; bool update_dppclk = false; bool update_dispclk = false; bool dpp_clock_lowered = false; @@ -210,14 +210,6 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, clk_mgr_base->clks.dppclk_khz, safe_to_lower); - for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->signal == SIGNAL_TYPE_EDP && - context->streams[i]->apply_seamless_boot_optimization) { - dc_wait_for_vblank(dc, context->streams[i]); - break; - } - } - clk_mgr_base->clks.actual_dppclk_khz = rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 4713f09bcbf1..e57df2f6f824 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3219,19 +3219,6 @@ void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink) } } -void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream) -{ - int i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) - if (dc->current_state->res_ctx.pipe_ctx[i].stream == stream) { - struct timing_generator *tg = - dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; - tg->funcs->wait_for_state(tg, CRTC_STATE_VBLANK); - break; - } -} - void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info) { info->displayClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dispclk_khz; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 100d434f7a03..65f801b50686 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -719,7 +719,6 @@ void dc_init_callbacks(struct dc *dc, void dc_deinit_callbacks(struct dc *dc); void dc_destroy(struct dc **dc); -void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream); /******************************************************************************* * Surface Interfaces ******************************************************************************/ -- cgit From 47c02af751d168372d6ba3189799671eab606ef1 Mon Sep 17 00:00:00 2001 From: Lewis Huang Date: Fri, 9 Apr 2021 18:39:43 +0800 Subject: drm/amd/display: skip program clock when allow seamless boot [Why] Driver program dpp clock calculate by pipe split config but hw config is single pipe. [How] Skip programming clock when allow seamless boot. After porgramming pipe config, seamless boot flag will be clear. Signed-off-by: Lewis Huang Reviewed-by: Eric Yang Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index f4374d83662a..931fbb4d6169 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1206,14 +1206,25 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) { const struct dc *dc = link->dc; bool ret; + bool can_apply_seamless_boot = false; + int i; + + for (i = 0; i < dc->current_state->stream_count; i++) { + if (dc->current_state->streams[i]->apply_seamless_boot_optimization) { + can_apply_seamless_boot = true; + break; + } + } /* get out of low power state */ - clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); + if (!can_apply_seamless_boot && reason != DETECT_REASON_BOOT) + clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); ret = dc_link_detect_helper(link, reason); /* Go back to power optimized state */ - clk_mgr_optimize_pwr_state(dc, dc->clk_mgr); + if (!can_apply_seamless_boot && reason != DETECT_REASON_BOOT) + clk_mgr_optimize_pwr_state(dc, dc->clk_mgr); return ret; } -- cgit From b7cc1312c110f8760a3d3e8e41a6d84e32f71a61 Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Wed, 7 Apr 2021 18:14:55 +0800 Subject: drm/amd/display: Expose internal display flag via debugfs [Why & How] Add a per-connector debugfs entry to expose internal display flag, which is indication that the display is "internally connected" and not hotpluggable. Signed-off-by: Stylon Wang Reviewed-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 529545045a3e..656bc8f00a42 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -925,6 +925,22 @@ static int hdcp_sink_capability_show(struct seq_file *m, void *data) return 0; } #endif + +/* + * Returns whether the connected display is internal and not hotpluggable. + * Example usage: cat /sys/kernel/debug/dri/0/DP-1/internal_display + */ +static int internal_display_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct dc_link *link = aconnector->dc_link; + + seq_printf(m, "Internal: %u\n", link->is_internal_display); + + return 0; +} + /* function description * * generic SDP message access for testing @@ -2369,6 +2385,7 @@ DEFINE_SHOW_ATTRIBUTE(dp_lttpr_status); #ifdef CONFIG_DRM_AMD_DC_HDCP DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability); #endif +DEFINE_SHOW_ATTRIBUTE(internal_display); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2600,7 +2617,8 @@ static const struct { } connector_debugfs_entries[] = { {"force_yuv420_output", &force_yuv420_output_fops}, {"output_bpc", &output_bpc_fops}, - {"trigger_hotplug", &trigger_hotplug_debugfs_fops} + {"trigger_hotplug", &trigger_hotplug_debugfs_fops}, + {"internal_display", &internal_display_fops} }; void connector_debugfs_init(struct amdgpu_dm_connector *connector) -- cgit From 130ac6d8c69bbecf3331a8427ddb743c6cf4a8f1 Mon Sep 17 00:00:00 2001 From: Yu-ting Shen Date: Tue, 13 Apr 2021 13:47:23 +0800 Subject: drm/amd/display: ddc resource data need to be initialized [WHY] to initial ddc structure before we use them to avoid error checking. [HOW] add 0 as structure initialization value in DCN resource construct Signed-off-by: Yu-ting Shen Reviewed-by: Meenakshikumar Somasundaram Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 527e56c353cb..6a56a03cfba3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -3667,7 +3667,7 @@ static bool dcn20_resource_construct( int i; struct dc_context *ctx = dc->ctx; struct irq_service_init_data init_data; - struct ddc_service_init_data ddc_init_data; + struct ddc_service_init_data ddc_init_data = {0}; struct _vcs_dpi_soc_bounding_box_st *loaded_bb = get_asic_rev_soc_bb(ctx->asic_id.hw_internal_rev); struct _vcs_dpi_ip_params_st *loaded_ip = diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 4a5fa23d8e7b..45f96221a094 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -2538,7 +2538,7 @@ static bool dcn30_resource_construct( int i; struct dc_context *ctx = dc->ctx; struct irq_service_init_data init_data; - struct ddc_service_init_data ddc_init_data; + struct ddc_service_init_data ddc_init_data = {0}; uint32_t pipe_fuses = read_pipe_fuses(ctx); uint32_t num_pipes = 0; -- cgit From 63de4f0413fe5487ba5a1ed04c92fa10ca6af7c7 Mon Sep 17 00:00:00 2001 From: Jake Wang Date: Mon, 12 Apr 2021 17:46:49 -0400 Subject: drm/amd/display: Added multi instance support for ABM [WHY & HOW] ABM assumes only 1 eDP is connected. Refactored existing ABM interface to support multiple instances. Signed-off-by: Jake Wang Reviewed-by: Nicholas Kazlauskas Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 30 ++++++++ drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c | 2 +- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 79 ++++++++++++++++++++-- 3 files changed, 105 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c index eb1698d54a48..6939ca2e8212 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c @@ -56,11 +56,19 @@ static void dmub_abm_enable_fractional_pwm(struct dc_context *dc) { union dmub_rb_cmd cmd; uint32_t fractional_pwm = (dc->dc->config.disable_fractional_pwm == false) ? 1 : 0; + uint32_t edp_id_count = dc->dc_edp_id_count; + int i; + uint8_t panel_mask = 0; + + for (i = 0; i < edp_id_count; i++) + panel_mask |= 0x01 << i; memset(&cmd, 0, sizeof(cmd)); cmd.abm_set_pwm_frac.header.type = DMUB_CMD__ABM; cmd.abm_set_pwm_frac.header.sub_type = DMUB_CMD__ABM_SET_PWM_FRAC; cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.fractional_pwm = fractional_pwm; + cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1; + cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.panel_mask = panel_mask; cmd.abm_set_pwm_frac.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pwm_frac_data); dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); @@ -135,11 +143,24 @@ static bool dmub_abm_set_level(struct abm *abm, uint32_t level) { union dmub_rb_cmd cmd; struct dc_context *dc = abm->ctx; + struct dc_link *edp_links[MAX_NUM_EDP]; + int i; + int edp_num; + uint8_t panel_mask = 0; + + get_edp_links(dc->dc, edp_links, &edp_num); + + for (i = 0; i < edp_num; i++) { + if (edp_links[i]->link_status.link_active) + panel_mask |= (0x01 << i); + } memset(&cmd, 0, sizeof(cmd)); cmd.abm_set_level.header.type = DMUB_CMD__ABM; cmd.abm_set_level.header.sub_type = DMUB_CMD__ABM_SET_LEVEL; cmd.abm_set_level.abm_set_level_data.level = level; + cmd.abm_set_level.abm_set_level_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1; + cmd.abm_set_level.abm_set_level_data.panel_mask = panel_mask; cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_level_data); dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); @@ -155,6 +176,12 @@ static bool dmub_abm_init_config(struct abm *abm, { union dmub_rb_cmd cmd; struct dc_context *dc = abm->ctx; + uint32_t edp_id_count = dc->dc_edp_id_count; + int i; + uint8_t panel_mask = 0; + + for (i = 0; i < edp_id_count; i++) + panel_mask |= 0x01 << i; // TODO: Optimize by only reading back final 4 bytes dmub_flush_buffer_mem(&dc->dmub_srv->dmub->scratch_mem_fb); @@ -168,6 +195,9 @@ static bool dmub_abm_init_config(struct abm *abm, cmd.abm_init_config.header.sub_type = DMUB_CMD__ABM_INIT_CONFIG; cmd.abm_init_config.abm_init_config_data.src.quad_part = dc->dmub_srv->dmub->scratch_mem_fb.gpu_addr; cmd.abm_init_config.abm_init_config_data.bytes = bytes; + cmd.abm_init_config.abm_init_config_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1; + cmd.abm_init_config.abm_init_config_data.panel_mask = panel_mask; + cmd.abm_init_config.header.payload_bytes = sizeof(struct dmub_cmd_abm_init_config_data); dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c index 8fccee5a3036..69cc192a7e71 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c @@ -218,7 +218,7 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, cmd.abm_set_backlight.header.sub_type = DMUB_CMD__ABM_SET_BACKLIGHT; cmd.abm_set_backlight.abm_set_backlight_data.frame_ramp = frame_ramp; cmd.abm_set_backlight.abm_set_backlight_data.backlight_user_level = backlight_pwm_u16_16; - cmd.abm_set_backlight.abm_set_backlight_data.version = DMUB_CMD_ABM_SET_BACKLIGHT_VERSION_1; + cmd.abm_set_backlight.abm_set_backlight_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1; cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_cntl->inst); cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data); diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 4195ff10c514..82c6e8a8a7c9 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -121,14 +121,14 @@ #define TRACE_BUFFER_ENTRY_OFFSET 16 /** - * ABM backlight control version legacy + * ABM control version legacy */ -#define DMUB_CMD_ABM_SET_BACKLIGHT_VERSION_UNKNOWN 0x0 +#define DMUB_CMD_ABM_CONTROL_VERSION_UNKNOWN 0x0 /** - * ABM backlight control version with multi edp support + * ABM control version with multi edp support */ -#define DMUB_CMD_ABM_SET_BACKLIGHT_VERSION_1 0x1 +#define DMUB_CMD_ABM_CONTROL_VERSION_1 0x1 /** * Physical framebuffer address location, 64-bit. @@ -1637,7 +1637,7 @@ struct dmub_cmd_abm_set_backlight_data { uint32_t backlight_user_level; /** - * Backlight data version. + * ABM control version. */ uint8_t version; @@ -1677,6 +1677,23 @@ struct dmub_cmd_abm_set_level_data { * Set current ABM operating/aggression level. */ uint32_t level; + + /** + * ABM control version. + */ + uint8_t version; + + /** + * Panel Control HW instance mask. + * Bit 0 is Panel Control HW instance 0. + * Bit 1 is Panel Control HW instance 1. + */ + uint8_t panel_mask; + + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad[2]; }; /** @@ -1702,6 +1719,23 @@ struct dmub_cmd_abm_set_ambient_level_data { * Ambient light sensor reading from OS. */ uint32_t ambient_lux; + + /** + * ABM control version. + */ + uint8_t version; + + /** + * Panel Control HW instance mask. + * Bit 0 is Panel Control HW instance 0. + * Bit 1 is Panel Control HW instance 1. + */ + uint8_t panel_mask; + + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad[2]; }; /** @@ -1728,6 +1762,23 @@ struct dmub_cmd_abm_set_pwm_frac_data { * TODO: Convert to uint8_t. */ uint32_t fractional_pwm; + + /** + * ABM control version. + */ + uint8_t version; + + /** + * Panel Control HW instance mask. + * Bit 0 is Panel Control HW instance 0. + * Bit 1 is Panel Control HW instance 1. + */ + uint8_t panel_mask; + + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad[2]; }; /** @@ -1758,6 +1809,24 @@ struct dmub_cmd_abm_init_config_data { * Indirect buffer length. */ uint16_t bytes; + + + /** + * ABM control version. + */ + uint8_t version; + + /** + * Panel Control HW instance mask. + * Bit 0 is Panel Control HW instance 0. + * Bit 1 is Panel Control HW instance 1. + */ + uint8_t panel_mask; + + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad[2]; }; /** -- cgit From b2d4b9f72fb14c1e6e4f0128964a84539a72d831 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Tue, 13 Apr 2021 22:25:23 -0400 Subject: drm/amd/display: Fix BSOD with NULL check [Why] CLK mgr is null for server settings. [How] Guard the function with NULL check. Signed-off-by: Chris Park Reviewed-by: Nicholas Kazlauskas Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index e57df2f6f824..a869702d77af 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3274,7 +3274,7 @@ void dc_allow_idle_optimizations(struct dc *dc, bool allow) if (dc->debug.disable_idle_power_optimizations) return; - if (dc->clk_mgr->funcs->is_smu_present) + if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->is_smu_present) if (!dc->clk_mgr->funcs->is_smu_present(dc->clk_mgr)) return; -- cgit From dd0ef5992b441340991258087b14f29ca84c6f7f Mon Sep 17 00:00:00 2001 From: "Max.Tseng" Date: Thu, 15 Apr 2021 11:35:03 +0800 Subject: drm/amd/display: Add new DP_SEC registers for programming SDP Line number [Why] Add register for programming in new platform Signed-off-by: Max.Tseng Reviewed-by: Anthony Koo Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h index 0b1755f1dea8..a8f49ecb84ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h @@ -169,7 +169,9 @@ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP5_LINE_REFERENCE, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL5, DP_SEC_GSP5_LINE_NUM, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL6, DP_SEC_GSP7_LINE_NUM, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP11_PPS, mask_sh),\ SE_SF(DP0_DP_GSP11_CNTL, DP_SEC_GSP11_ENABLE, mask_sh),\ -- cgit From 831c95c966014cabc7ed5dc4a6b87c1b8440f48f Mon Sep 17 00:00:00 2001 From: Robin Chen Date: Thu, 15 Apr 2021 10:08:10 +0800 Subject: drm/amd/display: Clear MASTER_UPDATE_LOCK_DB_EN when disable doublebuffer lock [Why] It seems there's a typo to set MASTER_UPDATE_LOCK_DB_EN when disable doublebuffer lock. [How] Clear MASTER_UPDATE_LOCK_DB_EN when disable doublebuffer lock Signed-off-by: Robin Chen Reviewed-by: Joshua Aberback Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c index 8980c90b2277..ac478bdcfb2a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c @@ -97,7 +97,7 @@ void optc3_lock_doublebuffer_disable(struct timing_generator *optc) MASTER_UPDATE_LOCK_DB_END_Y, 0); REG_UPDATE(OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, 0); - REG_UPDATE(OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, 1); + REG_UPDATE(OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, 0); } void optc3_lock(struct timing_generator *optc) -- cgit From abf1f863e08dc23764ce617dd55e1b533e525a3f Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Wed, 14 Apr 2021 14:15:52 -0400 Subject: drm/amd/display: fix wrong statement in mst hpd debugfs [why] Previous statement would always evaluate to true making it meaningless [how] Just check if a connector is MST by checking if its port exists. Reported-by: kernel test robot Signed-off-by: Mikita Lipski Reviewed-by: Nicholas Kazlauskas Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 656bc8f00a42..8bf0b566612b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -3030,7 +3030,7 @@ static int trigger_hpd_mst_set(void *data, u64 val) if (!aconnector->dc_link) continue; - if (!(aconnector->port && &aconnector->mst_port->mst_mgr)) + if (!aconnector->mst_port) continue; link = aconnector->dc_link; -- cgit From 642d3a2bf234578da83ed1fb6ef87dba36d31dea Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Fri, 9 Apr 2021 14:55:18 -0400 Subject: drm/amd/display: take max dsc stream bandwidth overhead into account [why] As hardware team suggested that we need to add a max dsc bw overhead into existing stream bandwidth when DSC is used. The formula as below: max_dsc_bw_overhead = v_addressable * slice_count * 256 bit * pixel clock / v_total / h_total effective stream bandwidth = pixel clock * bpp stream bandwidth = effective stream bandwidth + dsc stream overhead Signed-off-by: Wenjing Liu Reviewed-by: Eric Bernstein Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 7 +- drivers/gpu/drm/amd/display/dc/dc_dsc.h | 5 +- drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 113 ++++++++++++++++++++------ drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c | 43 ---------- drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h | 2 - 5 files changed, 94 insertions(+), 76 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 931fbb4d6169..8bbed9c90c5d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3486,9 +3486,10 @@ uint32_t dc_bandwidth_in_kbps_from_timing( uint32_t kbps; #if defined(CONFIG_DRM_AMD_DC_DCN) - if (timing->flags.DSC) { - return dc_dsc_stream_bandwidth_in_kbps(timing->pix_clk_100hz, timing->dsc_cfg.bits_per_pixel); - } + if (timing->flags.DSC) + return dc_dsc_stream_bandwidth_in_kbps(timing, + timing->dsc_cfg.bits_per_pixel, + timing->dsc_cfg.num_slices_h); #endif switch (timing->display_color_depth) { diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h index c51d2d961b7a..afddb8b7d3e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h @@ -78,7 +78,8 @@ bool dc_dsc_compute_config( const struct dc_crtc_timing *timing, struct dc_dsc_config *dsc_cfg); -uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16); +uint32_t dc_dsc_stream_bandwidth_in_kbps(const struct dc_crtc_timing *timing, + uint32_t bpp_x16, uint32_t num_slices_h); void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, @@ -88,6 +89,4 @@ void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit); void dc_dsc_policy_set_enable_dsc_when_not_needed(bool enable); -uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16); - #endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index be57088d185d..bfe3ad58070a 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -258,12 +258,58 @@ static inline uint32_t dsc_div_by_10_round_up(uint32_t value) return (value + 9) / 10; } +static struct fixed31_32 compute_dsc_max_bandwidth_overhead( + const struct dc_crtc_timing *timing, + const int num_slices_h) +{ + struct fixed31_32 max_dsc_overhead; + struct fixed31_32 refresh_rate; + + /* use target bpp that can take entire target bandwidth */ + refresh_rate = dc_fixpt_from_int(timing->pix_clk_100hz); + refresh_rate = dc_fixpt_div_int(refresh_rate, timing->h_total); + refresh_rate = dc_fixpt_div_int(refresh_rate, timing->v_total); + refresh_rate = dc_fixpt_mul_int(refresh_rate, 100); + + max_dsc_overhead = dc_fixpt_from_int(num_slices_h); + max_dsc_overhead = dc_fixpt_mul_int(max_dsc_overhead, timing->v_addressable); + max_dsc_overhead = dc_fixpt_mul_int(max_dsc_overhead, 256); + max_dsc_overhead = dc_fixpt_div_int(max_dsc_overhead, 1000); + max_dsc_overhead = dc_fixpt_mul(max_dsc_overhead, refresh_rate); + + return max_dsc_overhead; +} + +static uint32_t compute_bpp_x16_from_target_bandwidth( + const uint32_t bandwidth_in_kbps, + const struct dc_crtc_timing *timing, + const uint32_t num_slices_h, + const uint32_t bpp_increment_div) +{ + struct fixed31_32 overhead_in_kbps; + struct fixed31_32 effective_bandwidth_in_kbps; + struct fixed31_32 bpp_x16; + + overhead_in_kbps = compute_dsc_max_bandwidth_overhead( + timing, num_slices_h); + effective_bandwidth_in_kbps = dc_fixpt_from_int(bandwidth_in_kbps); + effective_bandwidth_in_kbps = dc_fixpt_sub(effective_bandwidth_in_kbps, + overhead_in_kbps); + bpp_x16 = dc_fixpt_mul_int(effective_bandwidth_in_kbps, 10); + bpp_x16 = dc_fixpt_div_int(bpp_x16, timing->pix_clk_100hz); + bpp_x16 = dc_fixpt_from_int(dc_fixpt_floor(dc_fixpt_mul_int(bpp_x16, bpp_increment_div))); + bpp_x16 = dc_fixpt_div_int(bpp_x16, bpp_increment_div); + bpp_x16 = dc_fixpt_mul_int(bpp_x16, 16); + return dc_fixpt_floor(bpp_x16); +} + /* Get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range, and timing's pixel clock * and uncompressed bandwidth. */ static void get_dsc_bandwidth_range( const uint32_t min_bpp_x16, const uint32_t max_bpp_x16, + const uint32_t num_slices_h, const struct dsc_enc_caps *dsc_caps, const struct dc_crtc_timing *timing, struct dc_dsc_bw_range *range) @@ -272,16 +318,20 @@ static void get_dsc_bandwidth_range( range->stream_kbps = dc_bandwidth_in_kbps_from_timing(timing); /* max dsc target bpp */ - range->max_kbps = dc_dsc_stream_bandwidth_in_kbps(timing->pix_clk_100hz, max_bpp_x16); + range->max_kbps = dc_dsc_stream_bandwidth_in_kbps(timing, + max_bpp_x16, num_slices_h); range->max_target_bpp_x16 = max_bpp_x16; if (range->max_kbps > range->stream_kbps) { /* max dsc target bpp is capped to native bandwidth */ range->max_kbps = range->stream_kbps; - range->max_target_bpp_x16 = calc_dsc_bpp_x16(range->stream_kbps, timing->pix_clk_100hz, dsc_caps->bpp_increment_div); + range->max_target_bpp_x16 = compute_bpp_x16_from_target_bandwidth( + range->max_kbps, timing, num_slices_h, + dsc_caps->bpp_increment_div); } /* min dsc target bpp */ - range->min_kbps = dc_dsc_stream_bandwidth_in_kbps(timing->pix_clk_100hz, min_bpp_x16); + range->min_kbps = dc_dsc_stream_bandwidth_in_kbps(timing, + min_bpp_x16, num_slices_h); range->min_target_bpp_x16 = min_bpp_x16; if (range->min_kbps > range->max_kbps) { /* min dsc target bpp is capped to max dsc bandwidth*/ @@ -290,7 +340,6 @@ static void get_dsc_bandwidth_range( } } - /* Decides if DSC should be used and calculates target bpp if it should, applying DSC policy. * * Returns: @@ -303,6 +352,7 @@ static bool decide_dsc_target_bpp_x16( const struct dsc_enc_caps *dsc_common_caps, const int target_bandwidth_kbps, const struct dc_crtc_timing *timing, + const int num_slices_h, int *target_bpp_x16) { bool should_use_dsc = false; @@ -311,7 +361,7 @@ static bool decide_dsc_target_bpp_x16( memset(&range, 0, sizeof(range)); get_dsc_bandwidth_range(policy->min_target_bpp * 16, policy->max_target_bpp * 16, - dsc_common_caps, timing, &range); + num_slices_h, dsc_common_caps, timing, &range); if (!policy->enable_dsc_when_not_needed && target_bandwidth_kbps >= range.stream_kbps) { /* enough bandwidth without dsc */ *target_bpp_x16 = 0; @@ -327,7 +377,11 @@ static bool decide_dsc_target_bpp_x16( should_use_dsc = true; } else if (target_bandwidth_kbps >= range.min_kbps) { /* use target bpp that can take entire target bandwidth */ - *target_bpp_x16 = calc_dsc_bpp_x16(target_bandwidth_kbps, timing->pix_clk_100hz, dsc_common_caps->bpp_increment_div); + *target_bpp_x16 = compute_bpp_x16_from_target_bandwidth( + range.max_kbps, timing, num_slices_h, + dsc_common_caps->bpp_increment_div); + if (*target_bpp_x16 < range.min_kbps) + *target_bpp_x16 = range.min_kbps; should_use_dsc = true; } else { /* not enough bandwidth to fulfill minimum requirement */ @@ -531,18 +585,6 @@ static bool setup_dsc_config( if (!is_dsc_possible) goto done; - if (target_bandwidth_kbps > 0) { - is_dsc_possible = decide_dsc_target_bpp_x16( - &policy, - &dsc_common_caps, - target_bandwidth_kbps, - timing, - &target_bpp); - dsc_cfg->bits_per_pixel = target_bpp; - } - if (!is_dsc_possible) - goto done; - sink_per_slice_throughput_mps = 0; // Validate available DSC settings against the mode timing @@ -690,6 +732,19 @@ static bool setup_dsc_config( dsc_cfg->num_slices_v = pic_height/slice_height; + if (target_bandwidth_kbps > 0) { + is_dsc_possible = decide_dsc_target_bpp_x16( + &policy, + &dsc_common_caps, + target_bandwidth_kbps, + timing, + num_slices_h, + &target_bpp); + dsc_cfg->bits_per_pixel = target_bpp; + } + if (!is_dsc_possible) + goto done; + // Final decission: can we do DSC or not? if (is_dsc_possible) { // Fill out the rest of DSC settings @@ -838,7 +893,8 @@ bool dc_dsc_compute_bandwidth_range( dsc_min_slice_height_override, max_bpp_x16, &config); if (is_dsc_possible) - get_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16, &dsc_common_caps, timing, range); + get_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16, + config.num_slices_h, &dsc_common_caps, timing, range); return is_dsc_possible; } @@ -864,13 +920,20 @@ bool dc_dsc_compute_config( return is_dsc_possible; } -uint32_t dc_dsc_stream_bandwidth_in_kbps(uint32_t pix_clk_100hz, uint32_t bpp_x16) +uint32_t dc_dsc_stream_bandwidth_in_kbps(const struct dc_crtc_timing *timing, + uint32_t bpp_x16, uint32_t num_slices_h) { - struct fixed31_32 link_bw_kbps; - link_bw_kbps = dc_fixpt_from_int(pix_clk_100hz); - link_bw_kbps = dc_fixpt_div_int(link_bw_kbps, 160); - link_bw_kbps = dc_fixpt_mul_int(link_bw_kbps, bpp_x16); - return dc_fixpt_ceil(link_bw_kbps); + struct fixed31_32 overhead_in_kbps; + struct fixed31_32 bpp; + struct fixed31_32 actual_bandwidth_in_kbps; + + overhead_in_kbps = compute_dsc_max_bandwidth_overhead( + timing, num_slices_h); + bpp = dc_fixpt_from_fraction(bpp_x16, 16); + actual_bandwidth_in_kbps = dc_fixpt_from_fraction(timing->pix_clk_100hz, 10); + actual_bandwidth_in_kbps = dc_fixpt_mul(actual_bandwidth_in_kbps, bpp); + actual_bandwidth_in_kbps = dc_fixpt_add(actual_bandwidth_in_kbps, overhead_in_kbps); + return dc_fixpt_ceil(actual_bandwidth_in_kbps); } void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, struct dc_dsc_policy *policy) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c index c6a1cd80aeae..7b294f637881 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c @@ -284,26 +284,6 @@ static u32 _do_bytes_per_pixel_calc(int slice_width, u16 drm_bpp, return bytes_per_pixel; } -static u32 _do_calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, - u32 bpp_increment_div) -{ - u32 dsc_target_bpp_x16; - float f_dsc_target_bpp; - float f_stream_bandwidth_100bps; - // bpp_increment_div is actually precision - u32 precision = bpp_increment_div; - - f_stream_bandwidth_100bps = stream_bandwidth_kbps * 10.0f; - f_dsc_target_bpp = f_stream_bandwidth_100bps / pix_clk_100hz; - - // Round down to the nearest precision stop to bring it into DSC spec - // range - dsc_target_bpp_x16 = (u32)(f_dsc_target_bpp * precision); - dsc_target_bpp_x16 = (dsc_target_bpp_x16 * 16) / precision; - - return dsc_target_bpp_x16; -} - /** * calc_rc_params - reads the user's cmdline mode * @rc: DC internal DSC parameters @@ -367,26 +347,3 @@ u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps) DC_FP_END(); return ret; } - -/** - * calc_dsc_bpp_x16 - retrieve the dsc bits per pixel - * @stream_bandwidth_kbps: - * @pix_clk_100hz: - * @bpp_increment_div: - * - * Calculate the total of bits per pixel for DSC configuration. - * - * @note This calculation requires float point operation, most of it executes - * under kernel_fpu_{begin,end}. - */ -u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, - u32 bpp_increment_div) -{ - u32 dsc_bpp; - - DC_FP_START(); - dsc_bpp = _do_calc_dsc_bpp_x16(stream_bandwidth_kbps, pix_clk_100hz, - bpp_increment_div); - DC_FP_END(); - return dsc_bpp; -} diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h index 8123827840c5..262f06afcbf9 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.h @@ -79,8 +79,6 @@ typedef struct qp_entry qp_table[]; void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps); u32 calc_dsc_bytes_per_pixel(const struct drm_dsc_config *pps); -u32 calc_dsc_bpp_x16(u32 stream_bandwidth_kbps, u32 pix_clk_100hz, - u32 bpp_increment_div); #endif -- cgit From 088bebc79ee8c5ee9d61fa0381af278d5ff7da45 Mon Sep 17 00:00:00 2001 From: Yu-ting Shen Date: Wed, 14 Apr 2021 15:33:33 +0800 Subject: drm/amd/display: avoid to authentication when DEVICE_COUNT=0 [why] we don't support authentication with DEVICE_COUNT=0 [how] check value DEVICE_COUNT before doing authentication Signed-off-by: Yu-ting Shen Reviewed-by: Wenjing Liu Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c | 5 +++++ drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 2cbd931363bd..43e6f8b17e79 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -128,6 +128,11 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp) static inline enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp) { + /* Avoid device count == 0 to do authentication */ + if (0 == get_device_count(hdcp)) { + return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE; + } + /* Some MST display may choose to report the internal panel as an HDCP RX. * To update this condition with 1(because the immediate repeater's internal * panel is possibly not included in DEVICE_COUNT) + get_device_count(hdcp). diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c index c1331facdcb4..117c6b45f718 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c @@ -207,6 +207,11 @@ static inline uint8_t get_device_count(struct mod_hdcp *hdcp) static enum mod_hdcp_status check_device_count(struct mod_hdcp *hdcp) { + /* Avoid device count == 0 to do authentication */ + if (0 == get_device_count(hdcp)) { + return MOD_HDCP_STATUS_HDCP1_DEVICE_COUNT_MISMATCH_FAILURE; + } + /* Some MST display may choose to report the internal panel as an HDCP RX. */ /* To update this condition with 1(because the immediate repeater's internal */ /* panel is possibly not included in DEVICE_COUNT) + get_device_count(hdcp). */ -- cgit From 069a11cca5b649f6e16ea3d97408c4f289d300ed Mon Sep 17 00:00:00 2001 From: "Max.Tseng" Date: Fri, 16 Apr 2021 10:04:51 +0800 Subject: drm/amd/display: Add SE_DCN3_REG_LIST for control SDP num [Why] New platform. Need to add corresponding register control Signed-off-by: Max.Tseng Reviewed-by: Anthony Koo Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h index a8f49ecb84ba..9566b9037458 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h @@ -85,7 +85,9 @@ SRI(DP_MSE_RATE_UPDATE, DP, id), \ SRI(DP_PIXEL_FORMAT, DP, id), \ SRI(DP_SEC_CNTL, DP, id), \ + SRI(DP_SEC_CNTL1, DP, id), \ SRI(DP_SEC_CNTL2, DP, id), \ + SRI(DP_SEC_CNTL5, DP, id), \ SRI(DP_SEC_CNTL6, DP, id), \ SRI(DP_STEER_FIFO, DP, id), \ SRI(DP_VID_M, DP, id), \ -- cgit From 8167538ffb22b299958ee1ad7873fadd12254c1d Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sat, 17 Apr 2021 15:13:15 -0400 Subject: drm/amd/display: [FW Promotion] Release 0.0.63 Signed-off-by: Anthony Koo Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 82c6e8a8a7c9..95fab1d3d7af 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -47,10 +47,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0x23db9b126 +#define DMUB_FW_VERSION_GIT_HASH 0x41548deb6 #define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 62 +#define DMUB_FW_VERSION_REVISION 63 #define DMUB_FW_VERSION_TEST 0 #define DMUB_FW_VERSION_VBIOS 0 #define DMUB_FW_VERSION_HOTFIX 0 @@ -117,6 +117,8 @@ /* Maximum number of planes on any ASIC. */ #define DMUB_MAX_PLANES 6 +#define DMUB_MAX_SUBVP_STREAMS 2 + /* Trace buffer offset for entry */ #define TRACE_BUFFER_ENTRY_OFFSET 16 @@ -328,7 +330,8 @@ union dmub_fw_boot_options { uint32_t skip_phy_access : 1; /**< 1 if PHY access should be skipped */ uint32_t disable_clk_gate: 1; /**< 1 if clock gating should be disabled */ uint32_t skip_phy_init_panel_sequence: 1; /**< 1 to skip panel init seq */ - uint32_t reserved : 26; /**< reserved */ + uint32_t reserved_unreleased: 1; /**< reserved for an unreleased feature */ + uint32_t reserved : 25; /**< reserved */ } bits; /**< boot bits */ uint32_t all; /**< 32-bit access to bits */ }; -- cgit From 18fa44625c6bf9375734fb9e564bd5911200ddc1 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 19 Apr 2021 00:46:28 -0400 Subject: drm/amd/display: 3.2.133 Signed-off-by: Aric Cyr Acked-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 65f801b50686..dba2584e8986 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -45,7 +45,7 @@ /* forward declaration */ struct aux_payload; -#define DC_VER "3.2.132" +#define DC_VER "3.2.133" #define MAX_SURFACES 3 #define MAX_PLANES 6 -- cgit From 041e69160d161f5b4ad77cd915640c5cfe9c9bc3 Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Thu, 22 Apr 2021 17:38:09 +0800 Subject: drm/amdgpu/sriov: Remove clear vf fw support PSP clear_vf_fw feature is outdated and has been removed. Remove the related functions. Signed-off-by: Victor Zhao Reviewed-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 32 -------------------------------- drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 1 - 2 files changed, 33 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9311dcc94cb6..623044414bb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -417,26 +417,6 @@ static int psp_tmr_init(struct psp_context *psp) return ret; } -static int psp_clear_vf_fw(struct psp_context *psp) -{ - int ret; - struct psp_gfx_cmd_resp *cmd; - - if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12) - return 0; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW; - - ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); - - return ret; -} - static bool psp_skip_tmr(struct psp_context *psp) { switch (psp->adev->asic_type) { @@ -1925,12 +1905,6 @@ static int psp_hw_start(struct psp_context *psp) return ret; } - ret = psp_clear_vf_fw(psp); - if (ret) { - DRM_ERROR("PSP clear vf fw!\n"); - return ret; - } - ret = psp_boot_config_set(adev); if (ret) { DRM_WARN("PSP set boot config@\n"); @@ -2439,7 +2413,6 @@ static int psp_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - int ret; if (psp->adev->psp.ta_fw) { psp_ras_terminate(psp); @@ -2450,11 +2423,6 @@ static int psp_hw_fini(void *handle) } psp_asd_unload(psp); - ret = psp_clear_vf_fw(psp); - if (ret) { - DRM_ERROR("PSP clear vf fw!\n"); - return ret; - } psp_tmr_terminate(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 96064c343163..f6d3180febc4 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -97,7 +97,6 @@ enum psp_gfx_cmd_id GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */ - GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */ GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F, /* Query GPUVA of the Fw Attestation DB */ /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */ GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */ -- cgit From 3d2bee9188f24211b56874c7753a304d43278fad Mon Sep 17 00:00:00 2001 From: Feifei Xu Date: Sun, 25 Apr 2021 15:26:15 +0800 Subject: drm/amdgpu: Change the sdma interrupt print level Change the print level into debug. Signed-off-by: Feifei Xu Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 823a367990bf..85ab9bbcd551 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2229,7 +2229,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, memset(&task_info, 0, sizeof(struct amdgpu_task_info)); amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_info(adev->dev, + dev_dbg_ratelimited(adev->dev, "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u " "pasid:%u, for process %s pid %d thread %s pid %d\n", instance, addr, entry->src_id, entry->ring_id, entry->vmid, @@ -2242,7 +2242,7 @@ static int sdma_v4_0_process_vm_hole_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - dev_err(adev->dev, "MC or SEM address in VM hole\n"); + dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n"); sdma_v4_0_print_iv_entry(adev, entry); return 0; } @@ -2251,7 +2251,7 @@ static int sdma_v4_0_process_doorbell_invalid_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - dev_err(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n"); + dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n"); sdma_v4_0_print_iv_entry(adev, entry); return 0; } @@ -2260,7 +2260,7 @@ static int sdma_v4_0_process_pool_timeout_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - dev_err(adev->dev, + dev_dbg_ratelimited(adev->dev, "Polling register/memory timeout executing POLL_REG/MEM with finite timer\n"); sdma_v4_0_print_iv_entry(adev, entry); return 0; @@ -2270,7 +2270,7 @@ static int sdma_v4_0_process_srbm_write_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - dev_err(adev->dev, + dev_dbg_ratelimited(adev->dev, "SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n"); sdma_v4_0_print_iv_entry(adev, entry); return 0; -- cgit From 5d11699914b9993d7c56ff74450f815c7a54e99f Mon Sep 17 00:00:00 2001 From: Feifei Xu Date: Sun, 25 Apr 2021 15:49:23 +0800 Subject: drm/amdgpu: Correct and simplify sdma 4.x irq.num_types Correct and init the sdma4.x irq.num_types. v2: squash in fix (Alex) Signed-off-by: Feifei Xu Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 85ab9bbcd551..d197185f7789 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2599,27 +2599,18 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_srbm_write_irq_funcs = { static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) { + adev->sdma.trap_irq.num_types = adev->sdma.num_instances; + adev->sdma.ecc_irq.num_types = adev->sdma.num_instances; + /*For Arcturus and Aldebaran, add another 4 irq handler*/ switch (adev->sdma.num_instances) { - case 1: - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; - break; case 5: - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5; - break; case 8: - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; - adev->sdma.vm_hole_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5; - adev->sdma.doorbell_invalid_irq.num_types = AMDGPU_SDMA_IRQ_LAST; - adev->sdma.pool_timeout_irq.num_types = AMDGPU_SDMA_IRQ_LAST; - adev->sdma.srbm_write_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances; + adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances; + adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances; + adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances; break; - case 2: default: - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; break; } adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs; -- cgit From dd57e65f7c9b1ab4e9b04f2b2bcd28f60abc4b38 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 26 Apr 2021 13:13:04 +0100 Subject: drm/amdkfd: Fix spelling mistake "unregisterd" -> "unregistered" There is a spelling mistake in a pr_debug message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Nirmoy Das Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 4cc2539bed5b..e4ce97ab6e26 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2286,7 +2286,7 @@ retry_write_locked: } prange = svm_range_create_unregistered_range(adev, p, mm, addr); if (!prange) { - pr_debug("failed to create unregisterd range svms 0x%p address [0x%llx]\n", + pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n", svms, addr); mmap_write_downgrade(mm); r = -EFAULT; -- cgit From c0f76fc8ad5f3511bdee37efc130555b39d3f711 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 26 Apr 2021 14:25:37 -0400 Subject: drm/amdkfd: fix double free device pgmap resource Use devm_memunmap_pages instead of memunmap_pages to release pgmap and remove pgmap from device action, to avoid double free pgmap when unloading driver module. Release device memory region if failed to create device memory pages structure. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index a66b67083d83..6b810863f6ba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -912,6 +912,8 @@ int svm_migrate_init(struct amdgpu_device *adev) r = devm_memremap_pages(adev->dev, pgmap); if (IS_ERR(r)) { pr_err("failed to register HMM device memory\n"); + devm_release_mem_region(adev->dev, res->start, + res->end - res->start + 1); return PTR_ERR(r); } @@ -927,5 +929,9 @@ int svm_migrate_init(struct amdgpu_device *adev) void svm_migrate_fini(struct amdgpu_device *adev) { - memunmap_pages(&adev->kfd.dev->pgmap); + struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap; + + devm_memunmap_pages(adev->dev, pgmap); + devm_release_mem_region(adev->dev, pgmap->range.start, + pgmap->range.end - pgmap->range.start + 1); } -- cgit From dd03daec0ff170c0622c9545137e8ad9846b967d Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 26 Apr 2021 10:06:13 +0200 Subject: drm/amdgpu: restructure amdgpu_vram_mgr_new MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge the two loops, loosen the restriction for big allocations. This reduces the CPU overhead in the good case, but increases it a bit under memory pressure. Signed-off-by: Christian König Acked-and-Tested-by: Nirmoy Das Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 58 +++++++++++++--------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 529c5c32a205..e2cbe19404c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -358,13 +358,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, const struct ttm_place *place, struct ttm_resource *mem) { + unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages; struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + uint64_t vis_usage = 0, mem_bytes, max_bytes; struct drm_mm *mm = &mgr->mm; - struct drm_mm_node *nodes; enum drm_mm_insert_mode mode; - unsigned long lpfn, num_nodes, pages_per_node, pages_left; - uint64_t vis_usage = 0, mem_bytes, max_bytes; + struct drm_mm_node *nodes; unsigned i; int r; @@ -391,9 +391,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, pages_per_node = HPAGE_PMD_NR; #else /* default to 2MB */ - pages_per_node = (2UL << (20UL - PAGE_SHIFT)); + pages_per_node = 2UL << (20UL - PAGE_SHIFT); #endif - pages_per_node = max((uint32_t)pages_per_node, mem->page_alignment); + pages_per_node = max_t(uint32_t, pages_per_node, + mem->page_alignment); num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node); } @@ -411,42 +412,37 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, mem->start = 0; pages_left = mem->num_pages; - spin_lock(&mgr->lock); - for (i = 0; pages_left >= pages_per_node; ++i) { - unsigned long pages = rounddown_pow_of_two(pages_left); - - /* Limit maximum size to 2GB due to SG table limitations */ - pages = min(pages, (2UL << (30 - PAGE_SHIFT))); + /* Limit maximum size to 2GB due to SG table limitations */ + pages = min(pages_left, 2UL << (30 - PAGE_SHIFT)); - r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, - pages_per_node, 0, - place->fpfn, lpfn, - mode); - if (unlikely(r)) - break; - - vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); - amdgpu_vram_mgr_virt_start(mem, &nodes[i]); - pages_left -= pages; - } - - for (; pages_left; ++i) { - unsigned long pages = min(pages_left, pages_per_node); + i = 0; + spin_lock(&mgr->lock); + while (pages_left) { uint32_t alignment = mem->page_alignment; - if (pages == pages_per_node) + if (pages >= pages_per_node) alignment = pages_per_node; - r = drm_mm_insert_node_in_range(mm, &nodes[i], - pages, alignment, 0, - place->fpfn, lpfn, - mode); - if (unlikely(r)) + r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, alignment, + 0, place->fpfn, lpfn, mode); + if (unlikely(r)) { + if (pages > pages_per_node) { + if (is_power_of_2(pages)) + pages = pages / 2; + else + pages = rounddown_pow_of_two(pages); + continue; + } goto error; + } vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); amdgpu_vram_mgr_virt_start(mem, &nodes[i]); pages_left -= pages; + ++i; + + if (pages > pages_left) + pages = pages_left; } spin_unlock(&mgr->lock); -- cgit From 95ea3dbc4e9548d35ab6fbf67675cef8c293e2f5 Mon Sep 17 00:00:00 2001 From: Jack Zhang Date: Tue, 27 Apr 2021 17:08:47 +0800 Subject: drm/amd/amdgpu/sriov disable all ip hw status by default Disable all ip's hw status to false before any hw_init. Only set it to true until its hw_init is executed. The old 5.9 branch has this change but somehow the 5.11 kernrel does not have this fix. Without this change, sriov tdr have gfx IB test fail. Signed-off-by: Jack Zhang Review-by: Emily Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index eef54b265ffd..5cb171c2273c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2843,7 +2843,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_IH, }; - for (i = 0; i < ARRAY_SIZE(ip_order); i++) { + for (i = 0; i < adev->num_ip_blocks; i++) { int j; struct amdgpu_ip_block *block; -- cgit From c3c5cc9a83dca59b3a883eb0200fcbf467a9115e Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Mon, 26 Apr 2021 15:38:18 -0400 Subject: drm/amdkfd: fix spelling mistake in packet manager The plural of 'process' should be 'processes'. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 0ce507d7208a..f688451cb299 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -124,14 +124,14 @@ static int pm_create_runlist_ib(struct packet_manager *pm, { unsigned int alloc_size_bytes; unsigned int *rl_buffer, rl_wptr, i; - int retval, proccesses_mapped; + int retval, processes_mapped; struct device_process_node *cur; struct qcm_process_device *qpd; struct queue *q; struct kernel_queue *kq; bool is_over_subscription; - rl_wptr = retval = proccesses_mapped = 0; + rl_wptr = retval = processes_mapped = 0; retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, &alloc_size_bytes, &is_over_subscription); @@ -148,7 +148,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, list_for_each_entry(cur, queues, list) { qpd = cur->qpd; /* build map process packet */ - if (proccesses_mapped >= pm->dqm->processes_count) { + if (processes_mapped >= pm->dqm->processes_count) { pr_debug("Not enough space left in runlist IB\n"); pm_release_ib(pm); return -ENOMEM; @@ -158,7 +158,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, if (retval) return retval; - proccesses_mapped++; + processes_mapped++; inc_wptr(&rl_wptr, pm->pmf->map_process_size, alloc_size_bytes); -- cgit From 4999e398e281b336c7e08a3bf0da014d9cc2119f Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 19 Apr 2021 21:51:27 -0400 Subject: drm/amdkfd: retry validation to recover range GPU vm retry fault recover range need retry validation if 1. range is split in parallel by unmap while recover 2. range migrate to system memory and range is updated in system memory while recover Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index e4ce97ab6e26..30d142f6272f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1402,11 +1402,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm, svm_range_lock(prange); if (!prange->actual_loc) { if (amdgpu_hmm_range_get_pages_done(hmm_range)) { + pr_debug("hmm update the range, need validate again\n"); r = -EAGAIN; goto unlock_out; } } if (!list_empty(&prange->child_list)) { + pr_debug("range split by unmap in parallel, validate again\n"); r = -EAGAIN; goto unlock_out; } @@ -2355,6 +2357,10 @@ out_unlock_svms: out: kfd_unref_process(p); + if (r == -EAGAIN) { + pr_debug("recover vm fault later\n"); + r = 0; + } return r; } -- cgit From 11dd55d1743881c7c8f62171066292fc034e1c3c Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Sat, 17 Apr 2021 22:37:21 -0400 Subject: drm/amdgpu: return IH ring drain finished if ring is empty Sometimes IH do not setup ring wptr overflow flag after wptr exceed rptr. As a workaround, if IH rptr equals to wptr, ring is empty, return true to indicate IH ring checkpoint is processed, IH ring drain is finished. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index faaa6aa2faaf..a36e191cf086 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -175,7 +175,9 @@ static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev, cur_rptr += ih->ptr_mask + 1; *prev_rptr = cur_rptr; - return cur_rptr >= checkpoint_wptr; + /* check ring is empty to workaround missing wptr overflow flag */ + return cur_rptr >= checkpoint_wptr || + (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih); } /** -- cgit From 373e3ccd859b4bc52ccb511e00b0c14e7e57430a Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 19 Apr 2021 21:19:57 -0400 Subject: drm/amdkfd: handle stale retry fault Retry fault interrupt maybe pending in IH ring after GPU page table is updated to recover the vm fault, because each page of the range generate retry fault interrupt. There is race if application unmap range to remove and free the range first and then retry fault work restore_pages handle the retry fault interrupt, because range can not be found, this vm fault can not be recovered and report incorrect GPU vm fault to application. Before unmap to remove and free range, drain retry fault interrupt from IH ring1 to ensure no retry fault comes after the range is removed. Drain retry fault interrupt skip the range which is on deferred list to remove, or the range is child range, which is split by unmap, does not add to svms and have interval notifier. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 69 +++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 30d142f6272f..00d759b257f4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1830,6 +1830,28 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) } } +static void svm_range_drain_retry_fault(struct svm_range_list *svms) +{ + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + struct kfd_process *p; + uint32_t i; + + p = container_of(svms, struct kfd_process, svms); + + for (i = 0; i < p->n_pdds; i++) { + pdd = p->pdds[i]; + if (!pdd) + continue; + + pr_debug("drain retry fault gpu %d svms %p\n", i, svms); + adev = (struct amdgpu_device *)pdd->dev->kgd; + + amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1); + pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); + } +} + static void svm_range_deferred_list_work(struct work_struct *work) { struct svm_range_list *svms; @@ -1847,6 +1869,10 @@ static void svm_range_deferred_list_work(struct work_struct *work) pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange, prange->start, prange->last, prange->work_item.op); + /* Make sure no stale retry fault coming after range is freed */ + if (prange->work_item.op == SVM_OP_UNMAP_RANGE) + svm_range_drain_retry_fault(prange->svms); + mm = prange->work_item.mm; mmap_write_lock(mm); mutex_lock(&svms->lock); @@ -2234,6 +2260,44 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, return prange; } +/* svm_range_skip_recover - decide if prange can be recovered + * @prange: svm range structure + * + * GPU vm retry fault handle skip recover the range for cases: + * 1. prange is on deferred list to be removed after unmap, it is stale fault, + * deferred list work will drain the stale fault before free the prange. + * 2. prange is on deferred list to add interval notifier after split, or + * 3. prange is child range, it is split from parent prange, recover later + * after interval notifier is added. + * + * Return: true to skip recover, false to recover + */ +static bool svm_range_skip_recover(struct svm_range *prange) +{ + struct svm_range_list *svms = prange->svms; + + spin_lock(&svms->deferred_list_lock); + if (list_empty(&prange->deferred_list) && + list_empty(&prange->child_list)) { + spin_unlock(&svms->deferred_list_lock); + return false; + } + spin_unlock(&svms->deferred_list_lock); + + if (prange->work_item.op == SVM_OP_UNMAP_RANGE) { + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n", + svms, prange, prange->start, prange->last); + return true; + } + if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP || + prange->work_item.op == SVM_OP_ADD_RANGE) { + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n", + svms, prange, prange->start, prange->last); + return true; + } + return false; +} + int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint64_t addr) @@ -2271,7 +2335,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, retry_write_locked: mutex_lock(&svms->lock); prange = svm_range_from_addr(svms, addr, NULL); - if (!prange) { pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", svms, addr); @@ -2299,6 +2362,10 @@ retry_write_locked: mmap_write_downgrade(mm); mutex_lock(&prange->migrate_mutex); + + if (svm_range_skip_recover(prange)) + goto out_unlock_range; + timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp; /* skip duplicate vm fault on different pages of same range */ if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) { -- cgit From 36255b5f619594504e9d76de23b58e99c5e08ceb Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 20 Apr 2021 10:05:44 -0400 Subject: drm/amdgpu: address remove from fault filter Add interface to remove address from fault filter ring by resetting fault ring entry key, then future vm fault on the address will be processed to recover. Define fault key as atomic64_t type to use atomic read/set/cmpxchg key to protect fault ring access by interrupt handler and interrupt deferred work for vg20. Change fault->timestamp to 48-bit to share same uint64_t with 8-bit fault->next, it is enough for 48bit IH timestamp. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 48 ++++++++++++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 +++-- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c39ed9eb0987..dfa67c2255f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -332,6 +332,17 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) mc->agp_size >> 20, mc->agp_start, mc->agp_end); } +/** + * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid + * + * @addr: 48 bit physical address, page aligned (36 significant bits) + * @pasid: 16 bit process address space identifier + */ +static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid) +{ + return addr << 4 | pasid; +} + /** * amdgpu_gmc_filter_faults - filter VM faults * @@ -348,8 +359,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid, uint64_t timestamp) { struct amdgpu_gmc *gmc = &adev->gmc; - - uint64_t stamp, key = addr << 4 | pasid; + uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid); struct amdgpu_gmc_fault *fault; uint32_t hash; @@ -365,7 +375,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, while (fault->timestamp >= stamp) { uint64_t tmp; - if (fault->key == key) + if (atomic64_read(&fault->key) == key) return true; tmp = fault->timestamp; @@ -378,7 +388,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, /* Add the fault to the ring */ fault = &gmc->fault_ring[gmc->last_fault]; - fault->key = key; + atomic64_set(&fault->key, key); fault->timestamp = timestamp; /* And update the hash */ @@ -387,6 +397,36 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, return false; } +/** + * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter + * + * @adev: amdgpu device structure + * @addr: address of the VM fault + * @pasid: PASID of the process causing the fault + * + * Remove the address from fault filter, then future vm fault on this address + * will pass to retry fault handler to recover. + */ +void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, + uint16_t pasid) +{ + struct amdgpu_gmc *gmc = &adev->gmc; + uint64_t key = amdgpu_gmc_fault_key(addr, pasid); + struct amdgpu_gmc_fault *fault; + uint32_t hash; + uint64_t tmp; + + hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER); + fault = &gmc->fault_ring[gmc->fault_hash[hash].idx]; + do { + if (atomic64_cmpxchg(&fault->key, key, 0) == key) + break; + + tmp = fault->timestamp; + fault = &gmc->fault_ring[fault->next]; + } while (fault->timestamp < tmp); +} + int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) { int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 9d11c02a3938..6aa1d52d3aee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -66,9 +66,9 @@ struct firmware; * GMC page fault information */ struct amdgpu_gmc_fault { - uint64_t timestamp; + uint64_t timestamp:48; uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER; - uint64_t key:52; + atomic64_t key; }; /* @@ -318,6 +318,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid, uint64_t timestamp); +void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, + uint16_t pasid); int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); -- cgit From b3dc91f973172dd71594076eb20484471d981a89 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 20 Apr 2021 15:13:59 -0400 Subject: drm/amdkfd: enable subsequent retry fault After draining the stale retry fault, or failed to validate the range to recover, have to remove the fault address from fault filter ring, to be able to handle subsequent retry interrupt on same address. Otherwise the retry fault will not be processed to recover until timeout passed. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 00d759b257f4..d9111fea724b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2363,8 +2363,10 @@ retry_write_locked: mutex_lock(&prange->migrate_mutex); - if (svm_range_skip_recover(prange)) + if (svm_range_skip_recover(prange)) { + amdgpu_gmc_filter_faults_remove(adev, addr, pasid); goto out_unlock_range; + } timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp; /* skip duplicate vm fault on different pages of same range */ @@ -2426,6 +2428,7 @@ out: if (r == -EAGAIN) { pr_debug("recover vm fault later\n"); + amdgpu_gmc_filter_faults_remove(adev, addr, pasid); r = 0; } return r; -- cgit From 8baa6018b750570e43c160874f754083e7bf7eb4 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Tue, 20 Apr 2021 21:09:10 -0400 Subject: drm/amdkfd: Add Aldebaran gws support v2: updated MEC FW version after validating gws with debugger Signed-off-by: Harish Kasiviswanathan Reviewed-by: Joseph Greathouse Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 348fd3e49017..ae3cabb47a26 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -699,7 +699,9 @@ static int kfd_gws_init(struct kfd_dev *kfd) && kfd->device_info->asic_family <= CHIP_RAVEN && kfd->mec2_fw_version >= 0x1b3) || (kfd->device_info->asic_family == CHIP_ARCTURUS - && kfd->mec2_fw_version >= 0x30)) + && kfd->mec2_fw_version >= 0x30) + || (kfd->device_info->asic_family == CHIP_ALDEBARAN + && kfd->mec2_fw_version >= 0x28)) ret = amdgpu_amdkfd_alloc_gws(kfd->kgd, amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws); -- cgit From 0e0036c7d13b945260ff1ce8377eca7ea877c008 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Tue, 27 Apr 2021 22:21:03 +0800 Subject: drm/amdgpu: fix no full coverage issue for gprs initialization The wave's number per simd in aldebaran is changed to 8, so it is impossible to use old algorithm to initiate all sgprs with one threadgroup. The new algorithm firstly use three threadgroups to initiate most sgprs simultaneously and then use another threadgroup with 4 waves to cover other uninitiated sgprs. v2: Add more description about the new algorithm to clear sgprs and add some comment for shader binaries Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 779 +++++++++++++++++++++----------- 2 files changed, 516 insertions(+), 265 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index a2fe2dac32c1..2e6789a7dc46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -328,7 +328,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) { if (i == AMDGPU_IB_POOL_DIRECT) - size = PAGE_SIZE * 2; + size = PAGE_SIZE * 6; else size = AMDGPU_IB_POOL_SIZE; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index d17e57dea178..025b1e42e31b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -32,6 +32,11 @@ #include "amdgpu_ras.h" #include "amdgpu_gfx.h" +#define SE_ID_MAX 8 +#define CU_ID_MAX 16 +#define SIMD_ID_MAX 4 +#define WAVE_ID_MAX 10 + enum gfx_v9_4_2_utc_type { VML2_MEM, VML2_WALKER_MEM, @@ -80,101 +85,106 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20), }; +/** + * This shader is used to clear VGPRS and LDS, and also write the input + * pattern into the write back buffer, which will be used by driver to + * check whether all SIMDs have been covered. +*/ static const u32 vgpr_init_compute_shader_aldebaran[] = { - 0xb8840904, 0xb8851a04, 0xb8861344, 0x9207c006, 0x92088405, 0x81070807, - 0x81070407, 0x8e078207, 0xbe88008f, 0xc0410200, 0x00000007, 0xd3d94000, - 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003, - 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006, - 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009, - 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c, - 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f, - 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012, - 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015, - 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018, - 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b, - 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e, - 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021, - 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024, - 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027, - 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a, - 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d, - 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030, - 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033, - 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036, - 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039, - 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c, - 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f, - 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042, - 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045, - 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048, - 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b, - 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e, - 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051, - 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054, - 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057, - 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a, - 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d, - 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060, - 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063, - 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066, - 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069, - 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c, - 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f, - 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072, - 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075, - 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078, - 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b, - 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e, - 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081, - 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084, - 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087, - 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a, - 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d, - 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090, - 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093, - 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096, - 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099, - 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c, - 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f, - 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2, - 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5, - 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8, - 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab, - 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae, - 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1, - 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4, - 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7, - 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba, - 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd, - 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0, - 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3, - 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6, - 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9, - 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc, - 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf, - 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2, - 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5, - 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8, - 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db, - 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de, - 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1, - 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4, - 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7, - 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea, - 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed, - 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0, - 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3, - 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6, - 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9, - 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc, - 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff, - 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280, - 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280, - 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001, - 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 0xb78b4000, - 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201, - 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8, - 0xbf810000, + 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280, + 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208, + 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xd3d94000, 0x18000080, + 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003, 0x18000080, + 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006, 0x18000080, + 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009, 0x18000080, + 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c, 0x18000080, + 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f, 0x18000080, + 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012, 0x18000080, + 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015, 0x18000080, + 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018, 0x18000080, + 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b, 0x18000080, + 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e, 0x18000080, + 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021, 0x18000080, + 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024, 0x18000080, + 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027, 0x18000080, + 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a, 0x18000080, + 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d, 0x18000080, + 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030, 0x18000080, + 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033, 0x18000080, + 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036, 0x18000080, + 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039, 0x18000080, + 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c, 0x18000080, + 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f, 0x18000080, + 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042, 0x18000080, + 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045, 0x18000080, + 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048, 0x18000080, + 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b, 0x18000080, + 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e, 0x18000080, + 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051, 0x18000080, + 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054, 0x18000080, + 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057, 0x18000080, + 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a, 0x18000080, + 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d, 0x18000080, + 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060, 0x18000080, + 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063, 0x18000080, + 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066, 0x18000080, + 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069, 0x18000080, + 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c, 0x18000080, + 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f, 0x18000080, + 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072, 0x18000080, + 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075, 0x18000080, + 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078, 0x18000080, + 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b, 0x18000080, + 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e, 0x18000080, + 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081, 0x18000080, + 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084, 0x18000080, + 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087, 0x18000080, + 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a, 0x18000080, + 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d, 0x18000080, + 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090, 0x18000080, + 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093, 0x18000080, + 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096, 0x18000080, + 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099, 0x18000080, + 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c, 0x18000080, + 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f, 0x18000080, + 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2, 0x18000080, + 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5, 0x18000080, + 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8, 0x18000080, + 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab, 0x18000080, + 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae, 0x18000080, + 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1, 0x18000080, + 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4, 0x18000080, + 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7, 0x18000080, + 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba, 0x18000080, + 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd, 0x18000080, + 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0, 0x18000080, + 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3, 0x18000080, + 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6, 0x18000080, + 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9, 0x18000080, + 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc, 0x18000080, + 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf, 0x18000080, + 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2, 0x18000080, + 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5, 0x18000080, + 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8, 0x18000080, + 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db, 0x18000080, + 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de, 0x18000080, + 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1, 0x18000080, + 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4, 0x18000080, + 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7, 0x18000080, + 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea, 0x18000080, + 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed, 0x18000080, + 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0, 0x18000080, + 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3, 0x18000080, + 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6, 0x18000080, + 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9, 0x18000080, + 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc, 0x18000080, + 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff, 0x18000080, + 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280, 0x7e020280, + 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280, 0x7e0e0280, + 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001, 0x0001007f, + 0xd28d0001, 0x0002027e, 0x10020288, 0xbe8b0004, 0xb78b4000, 0xd1196a01, + 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201, 0xd89cc080, + 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8, 0xbf810000, }; const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = { @@ -183,7 +193,7 @@ const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = { { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 4 }, { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0xbf }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400004 }, /* 64KB LDS */ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400006 }, /* 64KB LDS */ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, @@ -195,262 +205,503 @@ const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = { { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, }; -static const u32 sgpr_init_compute_shader_aldebaran[] = { - 0xb8840904, 0xb8851a04, 0xb8861344, 0x9207c006, 0x92088405, 0x81070807, - 0x81070407, 0x8e078207, 0xbefc0006, 0xbf800000, 0xbf900001, 0xbe88008f, - 0xc0410200, 0x00000007, 0xb07c0000, 0xbe8000ff, 0x0000005f, 0xbee50080, - 0xbe812c65, 0xbe822c65, 0xbe832c65, 0xbe842c65, 0xbe852c65, 0xb77c0005, - 0x80808500, 0xbf84fff8, 0xbe800080, 0xbf810000, +/** + * The below shaders are used to clear SGPRS, and also write the input + * pattern into the write back buffer. The first two dispatch should be + * scheduled simultaneously which make sure that all SGPRS could be + * allocated, so the dispatch 1 need check write back buffer before scheduled, + * make sure that waves of dispatch 0 are all dispacthed to all simds + * balanced. both dispatch 0 and dispatch 1 should be halted until all waves + * are dispatched, and then driver write a pattern to the shared memory to make + * all waves continue. +*/ +static const u32 sgpr112_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280, + 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208, + 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200, + 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102, + 0xc0410080, 0x00000007, 0xbf8c0000, 0xbefc0080, 0xbe880080, 0xbe890080, + 0xbe8a0080, 0xbe8b0080, 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, + 0xbe900080, 0xbe910080, 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, + 0xbe960080, 0xbe970080, 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, + 0xbe9c0080, 0xbe9d0080, 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, + 0xbea20080, 0xbea30080, 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, + 0xbea80080, 0xbea90080, 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, + 0xbeae0080, 0xbeaf0080, 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, + 0xbeb40080, 0xbeb50080, 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, + 0xbeba0080, 0xbebb0080, 0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, + 0xbec00080, 0xbec10080, 0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, + 0xbec60080, 0xbec70080, 0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, + 0xbecc0080, 0xbecd0080, 0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, + 0xbed20080, 0xbed30080, 0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, + 0xbed80080, 0xbed90080, 0xbeda0080, 0xbedb0080, 0xbedc0080, 0xbedd0080, + 0xbede0080, 0xbedf0080, 0xbee00080, 0xbee10080, 0xbee20080, 0xbee30080, + 0xbee40080, 0xbee50080, 0xbf810000 }; -static const struct soc15_reg_entry sgpr1_init_regs_aldebaran[] = { +const struct soc15_reg_entry sgpr112_init_regs_aldebaran[] = { { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 }, { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS): SGPRS[9:6] VGPRS[5:0] */ - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x4 }, /* USER_SGPR[5:1]*/ - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */ - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x2c0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +static const u32 sgpr96_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280, + 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208, + 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200, + 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102, + 0xc0410080, 0x00000007, 0xbf8c0000, 0xbefc0080, 0xbe880080, 0xbe890080, + 0xbe8a0080, 0xbe8b0080, 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, + 0xbe900080, 0xbe910080, 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, + 0xbe960080, 0xbe970080, 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, + 0xbe9c0080, 0xbe9d0080, 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, + 0xbea20080, 0xbea30080, 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, + 0xbea80080, 0xbea90080, 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, + 0xbeae0080, 0xbeaf0080, 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, + 0xbeb40080, 0xbeb50080, 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, + 0xbeba0080, 0xbebb0080, 0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, + 0xbec00080, 0xbec10080, 0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, + 0xbec60080, 0xbec70080, 0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, + 0xbecc0080, 0xbecd0080, 0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, + 0xbed20080, 0xbed30080, 0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, + 0xbed80080, 0xbed90080, 0xbf810000, }; -static const struct soc15_reg_entry sgpr2_init_regs_aldebaran[] = { +const struct soc15_reg_entry sgpr96_init_regs_aldebaran[] = { { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0xc }, { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x4 }, /* USER_SGPR[5:1]*/ - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */ - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, - { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x240 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, }; -static int gfx_v9_4_2_check_gprs_init_coverage(struct amdgpu_device *adev, - uint32_t *wb) -{ - uint32_t se_id, cu_id, simd_id; - uint32_t simd_cnt = 0; - uint32_t se_offset, cu_offset, data; - - for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { - se_offset = se_id * 16 * 4; - for (cu_id = 0; cu_id < 16; cu_id++) { - cu_offset = cu_id * 4; - for (simd_id = 0; simd_id < 4; simd_id++) { - data = wb[se_offset + cu_offset + simd_id]; - if (data == 0xF) - simd_cnt++; - } - } - } - - if (adev->gfx.cu_info.number * 4 == simd_cnt) - return 0; - - dev_warn(adev->dev, "SIMD Count: %d, %d\n", - adev->gfx.cu_info.number * 4, simd_cnt); - - for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { - se_offset = se_id * 16 * 4; - for (cu_id = 0; cu_id < 16; cu_id++) { - cu_offset = cu_id * 4; - for (simd_id = 0; simd_id < 4; simd_id++) { - data = wb[se_offset + cu_offset + simd_id]; - if (data != 0xF) - dev_warn(adev->dev, "SE[%d]CU[%d]SIMD[%d]: isn't inited\n", - se_id, cu_id, simd_id); - } - } - } +/** + * This shader is used to clear the uninitiated sgprs after the above + * two dispatches, because of hardware feature, dispath 0 couldn't clear + * top hole sgprs. Therefore need 4 waves per SIMD to cover these sgprs +*/ +static const u32 sgpr64_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280, + 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208, + 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbefc0080, 0xbe880080, + 0xbe890080, 0xbe8a0080, 0xbe8b0080, 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, + 0xbe8f0080, 0xbe900080, 0xbe910080, 0xbe920080, 0xbe930080, 0xbe940080, + 0xbe950080, 0xbe960080, 0xbe970080, 0xbe980080, 0xbe990080, 0xbe9a0080, + 0xbe9b0080, 0xbe9c0080, 0xbe9d0080, 0xbe9e0080, 0xbe9f0080, 0xbea00080, + 0xbea10080, 0xbea20080, 0xbea30080, 0xbea40080, 0xbea50080, 0xbea60080, + 0xbea70080, 0xbea80080, 0xbea90080, 0xbeaa0080, 0xbeab0080, 0xbeac0080, + 0xbead0080, 0xbeae0080, 0xbeaf0080, 0xbeb00080, 0xbeb10080, 0xbeb20080, + 0xbeb30080, 0xbeb40080, 0xbeb50080, 0xbeb60080, 0xbeb70080, 0xbeb80080, + 0xbeb90080, 0xbf810000, +}; - return -EFAULT; -} +const struct soc15_reg_entry sgpr64_init_regs_aldebaran[] = { + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0x10 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x1c0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev, - const uint32_t *shader_ptr, uint32_t shader_size, - const struct soc15_reg_entry *init_regs, uint32_t regs_size, - uint32_t compute_dim_x, u64 wb_gpu_addr) + struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + const u32 *shader_ptr, u32 shader_size, + const struct soc15_reg_entry *init_regs, u32 regs_size, + u32 compute_dim_x, u64 wb_gpu_addr, u32 pattern, + struct dma_fence **fence_ptr) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; - struct amdgpu_ib ib; - struct dma_fence *f = NULL; int r, i; uint32_t total_size, shader_offset; u64 gpu_addr; - total_size = (regs_size * 3 + 4 + 4 + 5 + 2) * 4; + total_size = (regs_size * 3 + 4 + 5 + 5) * 4; total_size = ALIGN(total_size, 256); shader_offset = total_size; total_size += ALIGN(shader_size, 256); /* allocate an indirect buffer to put the commands in */ - memset(&ib, 0, sizeof(ib)); + memset(ib, 0, sizeof(*ib)); r = amdgpu_ib_get(adev, NULL, total_size, - AMDGPU_IB_POOL_DIRECT, &ib); + AMDGPU_IB_POOL_DIRECT, ib); if (r) { - DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + dev_err(adev->dev, "failed to get ib (%d).\n", r); return r; } /* load the compute shaders */ for (i = 0; i < shader_size/sizeof(u32); i++) - ib.ptr[i + (shader_offset / 4)] = shader_ptr[i]; + ib->ptr[i + (shader_offset / 4)] = shader_ptr[i]; /* init the ib length to 0 */ - ib.length_dw = 0; + ib->length_dw = 0; /* write the register state for the compute dispatch */ for (i = 0; i < regs_size; i++) { - ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); - ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i]) + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib->ptr[ib->length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i]) - PACKET3_SET_SH_REG_START; - ib.ptr[ib.length_dw++] = init_regs[i].reg_value; + ib->ptr[ib->length_dw++] = init_regs[i].reg_value; } /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ - gpu_addr = (ib.gpu_addr + (u64)shader_offset) >> 8; - ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); - ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO) + gpu_addr = (ib->gpu_addr + (u64)shader_offset) >> 8; + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO) - PACKET3_SET_SH_REG_START; - ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); - ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + ib->ptr[ib->length_dw++] = lower_32_bits(gpu_addr); + ib->ptr[ib->length_dw++] = upper_32_bits(gpu_addr); /* write the wb buffer address */ - ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); - ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0) + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 3); + ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0) - PACKET3_SET_SH_REG_START; - ib.ptr[ib.length_dw++] = lower_32_bits(wb_gpu_addr); - ib.ptr[ib.length_dw++] = upper_32_bits(wb_gpu_addr); + ib->ptr[ib->length_dw++] = lower_32_bits(wb_gpu_addr); + ib->ptr[ib->length_dw++] = upper_32_bits(wb_gpu_addr); + ib->ptr[ib->length_dw++] = pattern; /* write dispatch packet */ - ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = compute_dim_x; /* x */ - ib.ptr[ib.length_dw++] = 1; /* y */ - ib.ptr[ib.length_dw++] = 1; /* z */ - ib.ptr[ib.length_dw++] = + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib->ptr[ib->length_dw++] = compute_dim_x; /* x */ + ib->ptr[ib->length_dw++] = 1; /* y */ + ib->ptr[ib->length_dw++] = 1; /* z */ + ib->ptr[ib->length_dw++] = REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); - /* write CS partial flush packet */ - ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); - ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); - /* shedule the ib on the ring */ - r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr); if (r) { - DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); - goto fail; + dev_err(adev->dev, "ib submit failed (%d).\n", r); + amdgpu_ib_free(adev, ib, NULL); } + return r; +} - /* wait for the GPU to finish processing the IB */ - r = dma_fence_wait(f, false); - if (r) { - DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); - goto fail; +static void gfx_v9_4_2_log_wave_assignment(struct amdgpu_device *adev, uint32_t *wb_ptr) +{ + uint32_t se, cu, simd, wave; + uint32_t offset = 0; + char *str; + int size; + + str = kmalloc(256, GFP_KERNEL); + if (!str) + return; + + dev_dbg(adev->dev, "wave assignment:\n"); + + for (se = 0; se < adev->gfx.config.max_shader_engines; se++) { + for (cu = 0; cu < CU_ID_MAX; cu++) { + memset(str, 0, 256); + size = sprintf(str, "SE[%02d]CU[%02d]: ", se, cu); + for (simd = 0; simd < SIMD_ID_MAX; simd++) { + size += sprintf(str + size, "["); + for (wave = 0; wave < WAVE_ID_MAX; wave++) { + size += sprintf(str + size, "%x", wb_ptr[offset]); + offset++; + } + size += sprintf(str + size, "] "); + } + dev_dbg(adev->dev, "%s\n", str); + } } -fail: - amdgpu_ib_free(adev, &ib, NULL); - dma_fence_put(f); - return r; + kfree(str); } -int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev) +static int gfx_v9_4_2_wait_for_waves_assigned(struct amdgpu_device *adev, + uint32_t *wb_ptr, uint32_t mask, + uint32_t pattern, uint32_t num_wave, bool wait) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; - int r; - int compute_dim_x = adev->gfx.config.max_shader_engines * - adev->gfx.config.max_cu_per_sh * - adev->gfx.config.max_sh_per_se; - int sgpr_work_group_size = 5; - /* CU_ID: 0~15, SIMD_ID: 0~3 */ - int wb_size = adev->gfx.config.max_shader_engines * 16 * 4; - struct amdgpu_ib ib; + uint32_t se, cu, simd, wave; + uint32_t loop = 0; + uint32_t wave_cnt; + uint32_t offset; - /* only support when RAS is enabled */ - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) - return 0; + do { + wave_cnt = 0; + offset = 0; + + for (se = 0; se < adev->gfx.config.max_shader_engines; se++) + for (cu = 0; cu < CU_ID_MAX; cu++) + for (simd = 0; simd < SIMD_ID_MAX; simd++) + for (wave = 0; wave < WAVE_ID_MAX; wave++) { + if (((1 << wave) & mask) && + (wb_ptr[offset] == pattern)) + wave_cnt++; + + offset++; + } + + if (wave_cnt == num_wave) + return 0; + + mdelay(1); + } while (++loop < 2000 && wait); + + dev_err(adev->dev, "actual wave num: %d, expected wave num: %d\n", + wave_cnt, num_wave); + + gfx_v9_4_2_log_wave_assignment(adev, wb_ptr); + + return -EBADSLT; +} + +static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev) +{ + int r; + int wb_size = adev->gfx.config.max_shader_engines * + CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX; + struct amdgpu_ib wb_ib; + struct amdgpu_ib disp_ibs[3]; + struct dma_fence *fences[3]; + u32 pattern[3] = { 0x1, 0x5, 0xa }; /* bail if the compute ring is not ready */ - if (!ring->sched.ready) + if (!adev->gfx.compute_ring[0].sched.ready || + !adev->gfx.compute_ring[1].sched.ready) return 0; - /* allocate an indirect buffer to put the commands in */ - memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, wb_size * sizeof(uint32_t), - AMDGPU_IB_POOL_DIRECT, &ib); + /* allocate the write-back buffer from IB */ + memset(&wb_ib, 0, sizeof(wb_ib)); + r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t), + AMDGPU_IB_POOL_DIRECT, &wb_ib); if (r) { - DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + dev_err(adev->dev, "failed to get ib (%d) for wb\n", r); return r; } + memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t)); + + r = gfx_v9_4_2_run_shader(adev, + &adev->gfx.compute_ring[0], + &disp_ibs[0], + sgpr112_init_compute_shader_aldebaran, + sizeof(sgpr112_init_compute_shader_aldebaran), + sgpr112_init_regs_aldebaran, + ARRAY_SIZE(sgpr112_init_regs_aldebaran), + adev->gfx.cu_info.number, + wb_ib.gpu_addr, pattern[0], &fences[0]); + if (r) { + dev_err(adev->dev, "failed to clear first 224 sgprs\n"); + goto pro_end; + } - memset(ib.ptr, 0, wb_size * sizeof(uint32_t)); - r = gfx_v9_4_2_run_shader(adev, vgpr_init_compute_shader_aldebaran, - sizeof(vgpr_init_compute_shader_aldebaran), - vgpr_init_regs_aldebaran, - ARRAY_SIZE(vgpr_init_regs_aldebaran), - compute_dim_x * 2, ib.gpu_addr); + r = gfx_v9_4_2_wait_for_waves_assigned(adev, + &wb_ib.ptr[1], 0b11, + pattern[0], + adev->gfx.cu_info.number * SIMD_ID_MAX * 2, + true); if (r) { - dev_err(adev->dev, "Init VGPRS: failed to run shader\n"); - goto failed; + dev_err(adev->dev, "wave coverage failed when clear first 224 sgprs\n"); + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + goto disp0_failed; } - r = gfx_v9_4_2_check_gprs_init_coverage(adev, ib.ptr); + r = gfx_v9_4_2_run_shader(adev, + &adev->gfx.compute_ring[1], + &disp_ibs[1], + sgpr96_init_compute_shader_aldebaran, + sizeof(sgpr96_init_compute_shader_aldebaran), + sgpr96_init_regs_aldebaran, + ARRAY_SIZE(sgpr96_init_regs_aldebaran), + adev->gfx.cu_info.number * 2, + wb_ib.gpu_addr, pattern[1], &fences[1]); if (r) { - dev_err(adev->dev, "Init VGPRS: failed to cover all SIMDs\n"); - goto failed; - } else { - dev_info(adev->dev, "Init VGPRS Successfully\n"); + dev_err(adev->dev, "failed to clear next 576 sgprs\n"); + goto disp0_failed; } - memset(ib.ptr, 0, wb_size * sizeof(uint32_t)); - r = gfx_v9_4_2_run_shader(adev, sgpr_init_compute_shader_aldebaran, - sizeof(sgpr_init_compute_shader_aldebaran), - sgpr1_init_regs_aldebaran, - ARRAY_SIZE(sgpr1_init_regs_aldebaran), - compute_dim_x / 2 * sgpr_work_group_size, - ib.gpu_addr); + r = gfx_v9_4_2_wait_for_waves_assigned(adev, + &wb_ib.ptr[1], 0b11111100, + pattern[1], adev->gfx.cu_info.number * SIMD_ID_MAX * 6, + true); if (r) { - dev_err(adev->dev, "Init SGPRS Part1: failed to run shader\n"); - goto failed; + dev_err(adev->dev, "wave coverage failed when clear first 576 sgprs\n"); + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + goto disp1_failed; } - r = gfx_v9_4_2_run_shader(adev, sgpr_init_compute_shader_aldebaran, - sizeof(sgpr_init_compute_shader_aldebaran), - sgpr2_init_regs_aldebaran, - ARRAY_SIZE(sgpr2_init_regs_aldebaran), - compute_dim_x / 2 * sgpr_work_group_size, - ib.gpu_addr); + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + + /* wait for the GPU to finish processing the IB */ + r = dma_fence_wait(fences[0], false); if (r) { - dev_err(adev->dev, "Init SGPRS Part2: failed to run shader\n"); - goto failed; + dev_err(adev->dev, "timeout to clear first 224 sgprs\n"); + goto disp1_failed; } - r = gfx_v9_4_2_check_gprs_init_coverage(adev, ib.ptr); + r = dma_fence_wait(fences[1], false); + if (r) { + dev_err(adev->dev, "timeout to clear first 576 sgprs\n"); + goto disp1_failed; + } + + memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t)); + r = gfx_v9_4_2_run_shader(adev, + &adev->gfx.compute_ring[0], + &disp_ibs[2], + sgpr64_init_compute_shader_aldebaran, + sizeof(sgpr64_init_compute_shader_aldebaran), + sgpr64_init_regs_aldebaran, + ARRAY_SIZE(sgpr64_init_regs_aldebaran), + adev->gfx.cu_info.number, + wb_ib.gpu_addr, pattern[2], &fences[2]); + if (r) { + dev_err(adev->dev, "failed to clear first 256 sgprs\n"); + goto disp1_failed; + } + + r = dma_fence_wait(fences[2], false); + if (r) { + dev_err(adev->dev, "timeout to clear first 256 sgprs\n"); + goto disp2_failed; + } + + r = gfx_v9_4_2_wait_for_waves_assigned(adev, + &wb_ib.ptr[1], 0b1111, + pattern[2], + adev->gfx.cu_info.number * SIMD_ID_MAX * 4, + false); + if (r) { + dev_err(adev->dev, "wave coverage failed when clear first 256 sgprs\n"); + goto disp2_failed; + } + +disp2_failed: + amdgpu_ib_free(adev, &disp_ibs[2], NULL); + dma_fence_put(fences[2]); +disp1_failed: + amdgpu_ib_free(adev, &disp_ibs[1], NULL); + dma_fence_put(fences[1]); +disp0_failed: + amdgpu_ib_free(adev, &disp_ibs[0], NULL); + dma_fence_put(fences[0]); +pro_end: + amdgpu_ib_free(adev, &wb_ib, NULL); + if (r) - dev_err(adev->dev, - "Init SGPRS: failed to cover all SIMDs\n"); + dev_info(adev->dev, "Init SGPRS Failed\n"); else dev_info(adev->dev, "Init SGPRS Successfully\n"); -failed: - amdgpu_ib_free(adev, &ib, NULL); return r; } +static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev) +{ + int r; + /* CU_ID: 0~15, SIMD_ID: 0~3, WAVE_ID: 0 ~ 9 */ + int wb_size = adev->gfx.config.max_shader_engines * + CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX; + struct amdgpu_ib wb_ib; + struct amdgpu_ib disp_ib; + struct dma_fence *fence; + u32 pattern = 0xa; + + /* bail if the compute ring is not ready */ + if (!adev->gfx.compute_ring[0].sched.ready) + return 0; + + /* allocate the write-back buffer from IB */ + memset(&wb_ib, 0, sizeof(wb_ib)); + r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t), + AMDGPU_IB_POOL_DIRECT, &wb_ib); + if (r) { + dev_err(adev->dev, "failed to get ib (%d) for wb.\n", r); + return r; + } + memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t)); + + r = gfx_v9_4_2_run_shader(adev, + &adev->gfx.compute_ring[0], + &disp_ib, + vgpr_init_compute_shader_aldebaran, + sizeof(vgpr_init_compute_shader_aldebaran), + vgpr_init_regs_aldebaran, + ARRAY_SIZE(vgpr_init_regs_aldebaran), + adev->gfx.cu_info.number, + wb_ib.gpu_addr, pattern, &fence); + if (r) { + dev_err(adev->dev, "failed to clear vgprs\n"); + goto pro_end; + } + + /* wait for the GPU to finish processing the IB */ + r = dma_fence_wait(fence, false); + if (r) { + dev_err(adev->dev, "timeout to clear vgprs\n"); + goto disp_failed; + } + + r = gfx_v9_4_2_wait_for_waves_assigned(adev, + &wb_ib.ptr[1], 0b1, + pattern, + adev->gfx.cu_info.number * SIMD_ID_MAX, + false); + if (r) { + dev_err(adev->dev, "failed to cover all simds when clearing vgprs\n"); + goto disp_failed; + } + +disp_failed: + amdgpu_ib_free(adev, &disp_ib, NULL); + dma_fence_put(fence); +pro_end: + amdgpu_ib_free(adev, &wb_ib, NULL); + + if (r) + dev_info(adev->dev, "Init VGPRS Failed\n"); + else + dev_info(adev->dev, "Init VGPRS Successfully\n"); + + return r; +} + +int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev) +{ + /* only support when RAS is enabled */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return 0; + + gfx_v9_4_2_do_sgprs_init(adev); + + gfx_v9_4_2_do_vgprs_init(adev); + + return 0; +} + static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev); static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev); -- cgit From 3f50033dd88af5b3acfc164c53925189cad4c677 Mon Sep 17 00:00:00 2001 From: Anand Moon Date: Wed, 3 Feb 2021 14:40:29 +0530 Subject: drm/i915/adl_s: ADL-S platform Update PCI ids for Mobile BGA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As per Bspec: 53655 Update PCI ids for Mobile BGA. Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Anand Moon Reviewed-by: Aditya Swarup Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210203091029.2089-1-anandx.ram.moon@intel.com --- include/drm/i915_pciids.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index ebd0dd1c35b3..3be25768321d 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -640,6 +640,8 @@ INTEL_VGA_DEVICE(0x4681, info), \ INTEL_VGA_DEVICE(0x4682, info), \ INTEL_VGA_DEVICE(0x4683, info), \ + INTEL_VGA_DEVICE(0x4688, info), \ + INTEL_VGA_DEVICE(0x4689, info), \ INTEL_VGA_DEVICE(0x4690, info), \ INTEL_VGA_DEVICE(0x4691, info), \ INTEL_VGA_DEVICE(0x4692, info), \ -- cgit From b08a759df332c4152ef75c7e011195c04abfac04 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 28 Apr 2021 13:14:28 +0300 Subject: drm/i915/backlight: clean up backlight device register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add connector and backlight device name to logging, and propagate error code from backlight_device_register() instead of flattening to -ENODEV. Storing the name in an allocated buffer is unnecessary here, but makes follow-up work on names much cleaner. Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/271206461d9c0f42755792236330b588df3b532e.1619604743.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_panel.c | 37 +++++++++++++++++++----------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 551fcaa77c2c..3088677ab8a7 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -1372,6 +1372,9 @@ int intel_backlight_device_register(struct intel_connector *connector) struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; struct backlight_properties props; + struct backlight_device *bd; + const char *name; + int ret = 0; if (WARN_ON(panel->backlight.device)) return -ENODEV; @@ -1402,24 +1405,30 @@ int intel_backlight_device_register(struct intel_connector *connector) * Note: using the same name independent of the connector prevents * registration of multiple backlight devices in the driver. */ - panel->backlight.device = - backlight_device_register("intel_backlight", - connector->base.kdev, - connector, - &intel_backlight_device_ops, &props); - - if (IS_ERR(panel->backlight.device)) { - drm_err(&i915->drm, "Failed to register backlight: %ld\n", - PTR_ERR(panel->backlight.device)); - panel->backlight.device = NULL; - return -ENODEV; + name = kstrdup("intel_backlight", GFP_KERNEL); + if (!name) + return -ENOMEM; + + bd = backlight_device_register(name, connector->base.kdev, connector, + &intel_backlight_device_ops, &props); + if (IS_ERR(bd)) { + drm_err(&i915->drm, + "[CONNECTOR:%d:%s] backlight device %s register failed: %ld\n", + connector->base.base.id, connector->base.name, name, PTR_ERR(bd)); + ret = PTR_ERR(bd); + goto out; } + panel->backlight.device = bd; + drm_dbg_kms(&i915->drm, - "Connector %s backlight sysfs interface registered\n", - connector->base.name); + "[CONNECTOR:%d:%s] backlight device %s registered\n", + connector->base.base.id, connector->base.name, name); - return 0; +out: + kfree(name); + + return ret; } void intel_backlight_device_unregister(struct intel_connector *connector) -- cgit From 20f85ef89d94e12041bc9b0a335a0e7d6c61daf7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 28 Apr 2021 13:14:29 +0300 Subject: drm/i915/backlight: use unique backlight device names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Registering multiple backlight devices with intel_backlight name will obviously fail, regardless of whether they're two connectors in the same drm device or two different drm devices. It would be preferrable to switch to completely unique names, and sunset the generic intel_backlight name. However, there are apparently users out there that hardcode the name, so the change would break backward compatibility. As a compromise, register the first device with intel_backlight name. In the common case, this is the only backlight device anyway. From the second device on, use card%d-%s-backlight format, for example card0-eDP-2-backlight, to make the name unique. This approach does not preclude us from registering the first device using the same naming scheme in the future. v2: Keep using intel_backlight name for first backlight device Cc: Ville Syrjälä Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2794 Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/7dc3f6974711ce44522189dc9db05d1e6e24e6d8.1619604743.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_panel.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 3088677ab8a7..a20761079ae0 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -1401,16 +1401,31 @@ int intel_backlight_device_register(struct intel_connector *connector) else props.power = FB_BLANK_POWERDOWN; - /* - * Note: using the same name independent of the connector prevents - * registration of multiple backlight devices in the driver. - */ name = kstrdup("intel_backlight", GFP_KERNEL); if (!name) return -ENOMEM; bd = backlight_device_register(name, connector->base.kdev, connector, &intel_backlight_device_ops, &props); + + /* + * Using the same name independent of the drm device or connector + * prevents registration of multiple backlight devices in the + * driver. However, we need to use the default name for backward + * compatibility. Use unique names for subsequent backlight devices as a + * fallback when the default name already exists. + */ + if (IS_ERR(bd) && PTR_ERR(bd) == -EEXIST) { + kfree(name); + name = kasprintf(GFP_KERNEL, "card%d-%s-backlight", + i915->drm.primary->index, connector->base.name); + if (!name) + return -ENOMEM; + + bd = backlight_device_register(name, connector->base.kdev, connector, + &intel_backlight_device_ops, &props); + } + if (IS_ERR(bd)) { drm_err(&i915->drm, "[CONNECTOR:%d:%s] backlight device %s register failed: %ld\n", -- cgit From ff76d506030daeeeb967be8b8a189bf7aee8e7a8 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 29 Apr 2021 16:12:26 +1200 Subject: KVM: x86/mmu: Avoid unnecessary page table allocation in kvm_tdp_mmu_map() In kvm_tdp_mmu_map(), while iterating TDP MMU page table entries, it is possible SPTE has already been frozen by another thread but the frozen is not done yet, for instance, when another thread is still in middle of zapping large page. In this case, the !is_shadow_present_pte() check for old SPTE in tdp_mmu_for_each_pte() may hit true, and in this case allocating new page table is unnecessary since tdp_mmu_set_spte_atomic() later will return false and page table will need to be freed. Add is_removed_spte() check before allocating new page table to avoid this. Signed-off-by: Kai Huang Message-Id: <20210429041226.50279-1-kai.huang@intel.com> Reviewed-by: Ben Gardon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 88f69a6cc492..3c8284841bed 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1009,6 +1009,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, } if (!is_shadow_present_pte(iter.old_spte)) { + /* + * If SPTE has been forzen by another thread, just + * give up and retry, avoiding unnecessary page table + * allocation and free. + */ + if (is_removed_spte(iter.old_spte)) + break; + sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); child_pt = sp->spt; -- cgit From 1699f65c8b658d434fe92563c906cd1a136c9cb6 Mon Sep 17 00:00:00 2001 From: "Shahin, Md Shahadat Hossain" Date: Fri, 30 Apr 2021 11:52:31 +0000 Subject: kvm/x86: Fix 'lpages' kvm stat for TDM MMU Large pages not being created properly may result in increased memory access time. The 'lpages' kvm stat used to keep track of the current number of large pages in the system, but with TDP MMU enabled the stat is not showing the correct number. This patch extends the lpages counter to cover the TDP case. Signed-off-by: Md Shahadat Hossain Shahin Cc: Bartosz Szczepanek Message-Id: <1619783551459.35424@amazon.de> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 3c8284841bed..c743894fe0b7 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -444,6 +444,13 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte); + if (is_large_pte(old_spte) != is_large_pte(new_spte)) { + if (is_large_pte(old_spte)) + atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages); + else + atomic64_add(1, (atomic64_t*)&kvm->stat.lpages); + } + /* * The only times a SPTE should be changed from a non-present to * non-present state is when an MMIO entry is installed/modified/ -- cgit From d981dd15498b188636ec5a7d8ad485e650f63d8d Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 28 Apr 2021 19:08:02 +0800 Subject: KVM: LAPIC: Accurately guarantee busy wait for timer to expire when using hv_timer Commit ee66e453db13d (KVM: lapic: Busy wait for timer to expire when using hv_timer) tries to set ktime->expired_tscdeadline by checking ktime->hv_timer_in_use since lapic timer oneshot/periodic modes which are emulated by vmx preemption timer also get advanced, they leverage the same vmx preemption timer logic with tsc-deadline mode. However, ktime->hv_timer_in_use is cleared before apic_timer_expired() handling, let's delay this clearing in preemption-disabled region. Fixes: ee66e453db13d ("KVM: lapic: Busy wait for timer to expire when using hv_timer") Reviewed-by: Sean Christopherson Signed-off-by: Wanpeng Li Message-Id: <1619608082-4187-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 152591f9243a..c0ebef560bd1 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1913,8 +1913,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) if (!apic->lapic_timer.hv_timer_in_use) goto out; WARN_ON(rcuwait_active(&vcpu->wait)); - cancel_hv_timer(apic); apic_timer_expired(apic, false); + cancel_hv_timer(apic); if (apic_lvtt_period(apic) && apic->lapic_timer.period) { advance_periodic_target_expiration(apic); -- cgit From 262de4102c7bb8e59f26a967a8ffe8cce85cc537 Mon Sep 17 00:00:00 2001 From: Benjamin Segall Date: Thu, 29 Apr 2021 16:22:34 +0000 Subject: kvm: exit halt polling on need_resched() as well single_task_running() is usually more general than need_resched() but CFS_BANDWIDTH throttling will use resched_task() when there is just one task to get the task to block. This was causing long-need_resched warnings and was likely allowing VMs to overrun their quota when halt polling. Signed-off-by: Ben Segall Signed-off-by: Venkatesh Srinivas Message-Id: <20210429162233.116849-1-venkateshs@chromium.org> Signed-off-by: Paolo Bonzini Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson --- virt/kvm/kvm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2799c6660cce..b9f12da6af0e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2973,7 +2973,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) goto out; } poll_end = cur = ktime_get(); - } while (single_task_running() && ktime_before(cur, stop)); + } while (single_task_running() && !need_resched() && + ktime_before(cur, stop)); } prepare_to_rcuwait(&vcpu->wait); -- cgit From deee59bacb2402c20e6b1b6800f9a5127367eb2a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 3 May 2021 15:54:42 +0300 Subject: KVM: nSVM: fix a typo in svm_leave_nested When forcibly leaving the nested mode, we should switch to vmcb01 Fixes: 4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest") Signed-off-by: Maxim Levitsky Message-Id: <20210503125446.1353307-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 540d43ba2cf4..3321220f3deb 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -886,7 +886,7 @@ void svm_leave_nested(struct vcpu_svm *svm) svm->nested.nested_run_pending = 0; leave_guest_mode(vcpu); - svm_switch_vmcb(svm, &svm->nested.vmcb02); + svm_switch_vmcb(svm, &svm->vmcb01); nested_svm_uninit_mmu_context(vcpu); vmcb_mark_all_dirty(svm->vmcb); -- cgit From c74ad08f3333db2e44d3346b863f6d10d35e37dd Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 3 May 2021 15:54:43 +0300 Subject: KVM: nSVM: fix few bugs in the vmcb02 caching logic * Define and use an invalid GPA (all ones) for init value of last and current nested vmcb physical addresses. * Reset the current vmcb12 gpa to the invalid value when leaving the nested mode, similar to what is done on nested vmexit. * Reset the last seen vmcb12 address when disabling the nested SVM, as it relies on vmcb02 fields which are freed at that point. Fixes: 4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest") Signed-off-by: Maxim Levitsky Message-Id: <20210503125446.1353307-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/nested.c | 11 +++++++++++ arch/x86/kvm/svm/svm.c | 4 ++-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cbbcee0a84f9..848956bb3cf1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -113,6 +113,7 @@ #define VALID_PAGE(x) ((x) != INVALID_PAGE) #define UNMAPPED_GVA (~(gpa_t)0) +#define INVALID_GPA (~(gpa_t)0) /* KVM Hugepage definitions for x86 */ #define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3321220f3deb..a88c64e004c3 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -872,6 +872,15 @@ void svm_free_nested(struct vcpu_svm *svm) __free_page(virt_to_page(svm->nested.vmcb02.ptr)); svm->nested.vmcb02.ptr = NULL; + /* + * When last_vmcb12_gpa matches the current vmcb12 gpa, + * some vmcb12 fields are not loaded if they are marked clean + * in the vmcb12, since in this case they are up to date already. + * + * When the vmcb02 is freed, this optimization becomes invalid. + */ + svm->nested.last_vmcb12_gpa = INVALID_GPA; + svm->nested.initialized = false; } @@ -884,6 +893,8 @@ void svm_leave_nested(struct vcpu_svm *svm) if (is_guest_mode(vcpu)) { svm->nested.nested_run_pending = 0; + svm->nested.vmcb12_gpa = INVALID_GPA; + leave_guest_mode(vcpu); svm_switch_vmcb(svm, &svm->vmcb01); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9790c73f2a32..be5cf612ab1f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1235,8 +1235,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm->current_vmcb->asid_generation = 0; svm->asid = 0; - svm->nested.vmcb12_gpa = 0; - svm->nested.last_vmcb12_gpa = 0; + svm->nested.vmcb12_gpa = INVALID_GPA; + svm->nested.last_vmcb12_gpa = INVALID_GPA; vcpu->arch.hflags = 0; if (!kvm_pause_in_guest(vcpu->kvm)) { -- cgit From 9d290e16432cacd448475d38dec2753b75b9665f Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 3 May 2021 15:54:44 +0300 Subject: KVM: nSVM: leave the guest mode prior to loading a nested state This allows the KVM to load the nested state more than once without warnings. Signed-off-by: Maxim Levitsky Message-Id: <20210503125446.1353307-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a88c64e004c3..32400cba608d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1309,12 +1309,15 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, * L2 registers if needed are moved from the current VMCB to VMCB02. */ + if (is_guest_mode(vcpu)) + svm_leave_nested(svm); + else + svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; + svm->nested.nested_run_pending = !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; - if (svm->current_vmcb == &svm->vmcb01) - svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; svm->vmcb01.ptr->save.es = save->es; svm->vmcb01.ptr->save.cs = save->cs; -- cgit From 7f6231a39117c2781beead59d6ae4923c2703147 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 3 May 2021 16:24:46 +1200 Subject: KVM: x86/mmu: Fix kdoc of __handle_changed_spte The function name of kdoc of __handle_changed_spte() should be itself, rather than handle_changed_spte(). Fix the typo. Signed-off-by: Kai Huang Message-Id: <20210503042446.154695-1-kai.huang@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index c743894fe0b7..95eeb5ac6a8a 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -388,7 +388,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, } /** - * handle_changed_spte - handle bookkeeping associated with an SPTE change + * __handle_changed_spte - handle bookkeeping associated with an SPTE change * @kvm: kvm instance * @as_id: the address space of the paging structure the SPTE was a part of * @gfn: the base GFN that was mapped by the SPTE -- cgit From 8899a5fc7da516460f841189a28aac0b52b554fd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 30 Apr 2021 18:03:03 +0100 Subject: KVM: x86: Fix potential fput on a null source_kvm_file The fget can potentially return null, so the fput on the error return path can cause a null pointer dereference. Fix this by checking for a null source_kvm_file before doing a fput. Addresses-Coverity: ("Dereference null return") Fixes: 54526d1fd593 ("KVM: x86: Support KVM VMs sharing SEV context") Signed-off-by: Colin Ian King Message-Id: <20210430170303.131924-1-colin.king@canonical.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 1356ee095cd5..8b11c711a0e4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1764,7 +1764,8 @@ e_mirror_unlock: e_source_unlock: mutex_unlock(&source_kvm->lock); e_source_put: - fput(source_kvm_file); + if (source_kvm_file) + fput(source_kvm_file); return ret; } -- cgit From f4c50deecaed632e9f413135fc3c13bacfbe71f5 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 4 May 2021 11:14:00 +0300 Subject: drm/i915/audio: simplify, don't mask out in all branches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lift the masking outside of the if branches. No functional changes. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/a87fd5e66b52c4d52a568888e1b8037841786fd2.1620115982.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_audio.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index b40e929a167e..60083431228c 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -600,31 +600,24 @@ static void enable_audio_dsc_wa(struct intel_encoder *encoder, (crtc_state->hw.adjusted_mode.hdisplay >= 3840 && crtc_state->hw.adjusted_mode.vdisplay >= 2160)) { /* Get hblank early enable value required */ + val &= ~HBLANK_START_COUNT_MASK(pipe); hblank_early_prog = calc_hblank_early_prog(encoder, crtc_state); - if (hblank_early_prog < 32) { - val &= ~HBLANK_START_COUNT_MASK(pipe); + if (hblank_early_prog < 32) val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_32); - } else if (hblank_early_prog < 64) { - val &= ~HBLANK_START_COUNT_MASK(pipe); + else if (hblank_early_prog < 64) val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_64); - } else if (hblank_early_prog < 96) { - val &= ~HBLANK_START_COUNT_MASK(pipe); + else if (hblank_early_prog < 96) val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_96); - } else { - val &= ~HBLANK_START_COUNT_MASK(pipe); + else val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_128); - } /* Get samples room value required */ + val &= ~NUMBER_SAMPLES_PER_LINE_MASK(pipe); samples_room = calc_samples_room(crtc_state); - if (samples_room < 3) { - val &= ~NUMBER_SAMPLES_PER_LINE_MASK(pipe); + if (samples_room < 3) val |= NUMBER_SAMPLES_PER_LINE(pipe, samples_room); - } else { - /* Program 0 i.e "All Samples available in buffer" */ - val &= ~NUMBER_SAMPLES_PER_LINE_MASK(pipe); + else /* Program 0 i.e "All Samples available in buffer" */ val |= NUMBER_SAMPLES_PER_LINE(pipe, 0x0); - } } intel_de_write(i915, AUD_CONFIG_BE, val); -- cgit From 31824c03faac432171032fe9a50567ea7973d68e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 4 May 2021 11:14:01 +0300 Subject: drm/i915/audio: fix indentation, remove extra braces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup the code. No functional changes. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/6c2f6afa4c8866f8c1714b4f8dba9ea2d1509e4a.1620115983.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/display/intel_audio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 60083431228c..75871ee544a7 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -597,8 +597,8 @@ static void enable_audio_dsc_wa(struct intel_encoder *encoder, val |= HBLANK_EARLY_ENABLE_TGL(pipe); if (crtc_state->dsc.compression_enable && - (crtc_state->hw.adjusted_mode.hdisplay >= 3840 && - crtc_state->hw.adjusted_mode.vdisplay >= 2160)) { + crtc_state->hw.adjusted_mode.hdisplay >= 3840 && + crtc_state->hw.adjusted_mode.vdisplay >= 2160) { /* Get hblank early enable value required */ val &= ~HBLANK_START_COUNT_MASK(pipe); hblank_early_prog = calc_hblank_early_prog(encoder, crtc_state); -- cgit From 5e753a817b2d5991dfe8a801b7b1e8e79a1c5a20 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 30 Apr 2021 19:59:51 +0800 Subject: btrfs: fix unmountable seed device after fstrim The following test case reproduces an issue of wrongly freeing in-use blocks on the readonly seed device when fstrim is called on the rw sprout device. As shown below. Create a seed device and add a sprout device to it: $ mkfs.btrfs -fq -dsingle -msingle /dev/loop0 $ btrfstune -S 1 /dev/loop0 $ mount /dev/loop0 /btrfs $ btrfs dev add -f /dev/loop1 /btrfs BTRFS info (device loop0): relocating block group 290455552 flags system BTRFS info (device loop0): relocating block group 1048576 flags system BTRFS info (device loop0): disk added /dev/loop1 $ umount /btrfs Mount the sprout device and run fstrim: $ mount /dev/loop1 /btrfs $ fstrim /btrfs $ umount /btrfs Now try to mount the seed device, and it fails: $ mount /dev/loop0 /btrfs mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error. Block 5292032 is missing on the readonly seed device: $ dmesg -kt | tail BTRFS error (device loop0): bad tree block start, want 5292032 have 0 BTRFS warning (device loop0): couldn't read-tree root BTRFS error (device loop0): open_ctree failed From the dump-tree of the seed device (taken before the fstrim). Block 5292032 belonged to the block group starting at 5242880: $ btrfs inspect dump-tree -e /dev/loop0 | grep -A1 BLOCK_GROUP item 3 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16169 itemsize 24 block group used 114688 chunk_objectid 256 flags METADATA From the dump-tree of the sprout device (taken before the fstrim). fstrim used block-group 5242880 to find the related free space to free: $ btrfs inspect dump-tree -e /dev/loop1 | grep -A1 BLOCK_GROUP item 1 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16226 itemsize 24 block group used 32768 chunk_objectid 256 flags METADATA BPF kernel tracing the fstrim command finds the missing block 5292032 within the range of the discarded blocks as below: kprobe:btrfs_discard_extent { printf("freeing start %llu end %llu num_bytes %llu:\n", arg1, arg1+arg2, arg2); } freeing start 5259264 end 5406720 num_bytes 147456 Fix this by avoiding the discard command to the readonly seed device. Reported-by: Chris Murphy CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7a28314189b4..f1d15b68994a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1340,12 +1340,16 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, stripe = bbio->stripes; for (i = 0; i < bbio->num_stripes; i++, stripe++) { u64 bytes; + struct btrfs_device *device = stripe->dev; - if (!stripe->dev->bdev) { + if (!device->bdev) { ASSERT(btrfs_test_opt(fs_info, DEGRADED)); continue; } + if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) + continue; + ret = do_discard_extent(stripe, &bytes); if (!ret) { discarded_bytes += bytes; -- cgit From 784daf2b9628f2d0117f1f0b578cfe5ab6634919 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 30 Apr 2021 15:34:17 +0200 Subject: btrfs: zoned: sanity check zone type The fstests test case generic/475 creates a dm-linear device that gets changed to a dm-error device. This leads to errors in loading the block group's zone information when running on a zoned file system, ultimately resulting in a list corruption. When running on a kernel with list debugging enabled this leads to the following crash. BTRFS: error (device dm-2) in cleanup_transaction:1953: errno=-5 IO failure kernel BUG at lib/list_debug.c:54! invalid opcode: 0000 [#1] SMP PTI CPU: 1 PID: 2433 Comm: umount Tainted: G W 5.12.0+ #1018 RIP: 0010:__list_del_entry_valid.cold+0x1d/0x47 RSP: 0018:ffffc90001473df0 EFLAGS: 00010296 RAX: 0000000000000054 RBX: ffff8881038fd000 RCX: ffffc90001473c90 RDX: 0000000100001a31 RSI: 0000000000000003 RDI: 0000000000000003 RBP: ffff888308871108 R08: 0000000000000003 R09: 0000000000000001 R10: 3961373532383838 R11: 6666666620736177 R12: ffff888308871000 R13: ffff8881038fd088 R14: ffff8881038fdc78 R15: dead000000000100 FS: 00007f353c9b1540(0000) GS:ffff888627d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f353cc2c710 CR3: 000000018e13c000 CR4: 00000000000006a0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0xc9/0x310 [btrfs] close_ctree+0x2ee/0x31a [btrfs] ? call_rcu+0x8f/0x270 ? mutex_lock+0x1c/0x40 generic_shutdown_super+0x67/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x90 cleanup_mnt+0x13e/0x1b0 task_work_run+0x63/0xb0 exit_to_user_mode_loop+0xd9/0xe0 exit_to_user_mode_prepare+0x3e/0x60 syscall_exit_to_user_mode+0x1d/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xae As dm-error has no support for zones, btrfs will run it's zone emulation mode on this device. The zone emulation mode emulates conventional zones, so bail out if the zone bitmap that gets populated on mount sees the zone as sequential while we're thinking it's a conventional zone when creating a block group. Note: this scenario is unlikely in a real wold application and can only happen by this (ab)use of device-mapper targets. CC: stable@vger.kernel.org # 5.12+ Signed-off-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 70b23a0d03b1..304ce64c70a4 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1126,6 +1126,11 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } + if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { + ret = -EIO; + goto out; + } + switch (zone.cond) { case BLK_ZONE_COND_OFFLINE: case BLK_ZONE_COND_READONLY: -- cgit From 959086ecd677745f24526426c33a559fc3d2d2c1 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 14 Apr 2021 18:51:58 +0300 Subject: drm/i915: Pass intel_framebuffer instad of drm_framebuffer to intel_fill_fb_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make one step to pass intel_framebuffer to all intel_fb functions. Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210414155208.3161335-2-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/display/intel_fb.c | 63 ++++++++++++++-------------- drivers/gpu/drm/i915/display/intel_fb.h | 2 +- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index e246e5cf7586..78745fa77c07 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11253,7 +11253,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, fb->obj[i] = &obj->base; } - ret = intel_fill_fb_info(dev_priv, fb); + ret = intel_fill_fb_info(dev_priv, intel_fb); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index c8aaca3e79e9..21271a6976f1 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -163,17 +163,17 @@ void intel_fb_plane_get_subsampling(int *hsub, int *vsub, *vsub = 32; } -static void intel_fb_plane_dims(int *w, int *h, struct drm_framebuffer *fb, int color_plane) +static void intel_fb_plane_dims(const struct intel_framebuffer *fb, int color_plane, int *w, int *h) { - int main_plane = is_ccs_plane(fb, color_plane) ? - skl_ccs_to_main_plane(fb, color_plane) : 0; + int main_plane = is_ccs_plane(&fb->base, color_plane) ? + skl_ccs_to_main_plane(&fb->base, color_plane) : 0; int main_hsub, main_vsub; int hsub, vsub; - intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, fb, main_plane); - intel_fb_plane_get_subsampling(&hsub, &vsub, fb, color_plane); - *w = fb->width / main_hsub / hsub; - *h = fb->height / main_vsub / vsub; + intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, &fb->base, main_plane); + intel_fb_plane_get_subsampling(&hsub, &vsub, &fb->base, color_plane); + *w = fb->base.width / main_hsub / hsub; + *h = fb->base.height / main_vsub / vsub; } static u32 intel_adjust_tile_offset(int *x, int *y, @@ -735,19 +735,18 @@ static void intel_fb_view_init(struct intel_fb_view *view, enum i915_ggtt_view_t view->gtt.type = view_type; } -int intel_fill_fb_info(struct drm_i915_private *i915, struct drm_framebuffer *fb) +int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer *fb) { - struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); - struct drm_i915_gem_object *obj = intel_fb_obj(fb); + struct drm_i915_gem_object *obj = intel_fb_obj(&fb->base); u32 gtt_offset_rotated = 0; u32 gtt_offset_remapped = 0; unsigned int max_size = 0; - int i, num_planes = fb->format->num_planes; + int i, num_planes = fb->base.format->num_planes; unsigned int tile_size = intel_tile_size(i915); - intel_fb_view_init(&intel_fb->normal_view, I915_GGTT_VIEW_NORMAL); - intel_fb_view_init(&intel_fb->rotated_view, I915_GGTT_VIEW_ROTATED); - intel_fb_view_init(&intel_fb->remapped_view, I915_GGTT_VIEW_REMAPPED); + intel_fb_view_init(&fb->normal_view, I915_GGTT_VIEW_NORMAL); + intel_fb_view_init(&fb->rotated_view, I915_GGTT_VIEW_ROTATED); + intel_fb_view_init(&fb->remapped_view, I915_GGTT_VIEW_REMAPPED); for (i = 0; i < num_planes; i++) { struct fb_plane_view_dims view_dims; @@ -762,45 +761,45 @@ int intel_fill_fb_info(struct drm_i915_private *i915, struct drm_framebuffer *fb * is consumed by the driver and not passed to DE. Skip the * arithmetic related to alignment and offset calculation. */ - if (is_gen12_ccs_cc_plane(fb, i)) { - if (IS_ALIGNED(fb->offsets[i], PAGE_SIZE)) + if (is_gen12_ccs_cc_plane(&fb->base, i)) { + if (IS_ALIGNED(fb->base.offsets[i], PAGE_SIZE)) continue; else return -EINVAL; } - cpp = fb->format->cpp[i]; - intel_fb_plane_dims(&width, &height, fb, i); + cpp = fb->base.format->cpp[i]; + intel_fb_plane_dims(fb, i, &width, &height); - ret = convert_plane_offset_to_xy(intel_fb, i, width, &x, &y); + ret = convert_plane_offset_to_xy(fb, i, width, &x, &y); if (ret) return ret; - init_plane_view_dims(intel_fb, i, width, height, &view_dims); + init_plane_view_dims(fb, i, width, height, &view_dims); /* * First pixel of the framebuffer from * the start of the normal gtt mapping. */ - intel_fb->normal_view.color_plane[i].x = x; - intel_fb->normal_view.color_plane[i].y = y; - intel_fb->normal_view.color_plane[i].stride = intel_fb->base.pitches[i]; + fb->normal_view.color_plane[i].x = x; + fb->normal_view.color_plane[i].y = y; + fb->normal_view.color_plane[i].stride = fb->base.pitches[i]; - offset = calc_plane_aligned_offset(intel_fb, i, &x, &y); + offset = calc_plane_aligned_offset(fb, i, &x, &y); /* Y or Yf modifiers required for 90/270 rotation */ - if (fb->modifier == I915_FORMAT_MOD_Y_TILED || - fb->modifier == I915_FORMAT_MOD_Yf_TILED) - gtt_offset_rotated += calc_plane_remap_info(intel_fb, i, &view_dims, + if (fb->base.modifier == I915_FORMAT_MOD_Y_TILED || + fb->base.modifier == I915_FORMAT_MOD_Yf_TILED) + gtt_offset_rotated += calc_plane_remap_info(fb, i, &view_dims, offset, gtt_offset_rotated, x, y, - &intel_fb->rotated_view); + &fb->rotated_view); - if (intel_fb_needs_pot_stride_remap(intel_fb)) - gtt_offset_remapped += calc_plane_remap_info(intel_fb, i, &view_dims, + if (intel_fb_needs_pot_stride_remap(fb)) + gtt_offset_remapped += calc_plane_remap_info(fb, i, &view_dims, offset, gtt_offset_remapped, x, y, - &intel_fb->remapped_view); + &fb->remapped_view); - size = calc_plane_normal_size(intel_fb, i, &view_dims, x, y); + size = calc_plane_normal_size(fb, i, &view_dims, x, y); /* how many tiles in total needed in the bo */ max_size = max(max_size, offset + size); } diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index 13244ec1ad21..7cec77bb5046 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -45,7 +45,7 @@ u32 intel_plane_compute_aligned_offset(int *x, int *y, const struct intel_plane_state *state, int color_plane); -int intel_fill_fb_info(struct drm_i915_private *i915, struct drm_framebuffer *fb); +int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer *fb); void intel_fb_fill_view(const struct intel_framebuffer *fb, unsigned int rotation, struct intel_fb_view *view); int intel_plane_compute_gtt(struct intel_plane_state *plane_state); -- cgit From 77364faf21b4105ee5adbb4844fdfb461334d249 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 30 Apr 2021 11:06:55 -0700 Subject: btrfs: initialize return variable in cleanup_free_space_cache_v1 Static analysis reports this problem free-space-cache.c:3965:2: warning: Undefined or garbage value returned return ret; ^~~~~~~~~~ ret is set in the node handling loop. Treat doing nothing as a success and initialize ret to 0, although it's unlikely the loop would be skipped. We always have block groups, but as it could lead to transaction abort in the caller it's better to be safe. CC: stable@vger.kernel.org # 5.12+ Signed-off-by: Tom Rix Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e54466fc101f..4806295116d8 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3949,7 +3949,7 @@ static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info, { struct btrfs_block_group *block_group; struct rb_node *node; - int ret; + int ret = 0; btrfs_info(fs_info, "cleaning free space cache v1"); -- cgit From 0a269a008f837e76ce285679ab3005059fadc2a6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 14 Apr 2021 14:35:40 +0200 Subject: x86/kvm: Fix pr_info() for async PF setup/teardown 'pr_fmt' already has 'kvm-guest: ' so 'KVM' prefix is redundant. "Unregister pv shared memory" is very ambiguous, it's hard to say which particular PV feature it relates to. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210414123544.1060604-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d307c22e5c18..dc440bb69222 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -345,7 +345,7 @@ static void kvm_guest_cpu_init(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); __this_cpu_write(apf_reason.enabled, 1); - pr_info("KVM setup async PF for cpu %d\n", smp_processor_id()); + pr_info("setup async PF for cpu %d\n", smp_processor_id()); } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { @@ -371,7 +371,7 @@ static void kvm_pv_disable_apf(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); __this_cpu_write(apf_reason.enabled, 0); - pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id()); + pr_info("disable async PF for cpu %d\n", smp_processor_id()); } static void kvm_pv_guest_cpu_reboot(void *unused) -- cgit From 703cd9ae0d674a7a1d5d2713e0ddbad87dc0e7a9 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Sat, 1 May 2021 03:28:52 +0300 Subject: drm/i915/tgl+: Add the missing MC CCS/XYUV8888 format support Make sure that the XYUV8888 format is handled correctly when it's used with a MC_CCS modifier framebuffer. Besides this format not working, the driver will also return an incorrect error value when trying to use it, indicating that the second color plane in the framebuffer is set unexpectedly. Signed-off-by: Imre Deak Reviewed-by: Juha-Pekka Heikkila Link: https://patchwork.freedesktop.org/patch/msgid/20210501002853.4132009-1-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 78745fa77c07..cc673bb63941 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1262,6 +1262,9 @@ static const struct drm_format_info gen12_ccs_formats[] = { { .format = DRM_FORMAT_VYUY, .num_planes = 2, .char_per_block = { 2, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, .hsub = 2, .vsub = 1, .is_yuv = true }, + { .format = DRM_FORMAT_XYUV8888, .num_planes = 2, + .char_per_block = { 4, 1 }, .block_w = { 1, 2 }, .block_h = { 1, 1 }, + .hsub = 1, .vsub = 1, .is_yuv = true }, { .format = DRM_FORMAT_NV12, .num_planes = 4, .char_per_block = { 1, 2, 1, 1 }, .block_w = { 1, 1, 4, 4 }, .block_h = { 1, 1, 1, 1 }, .hsub = 2, .vsub = 2, .is_yuv = true }, -- cgit From 7785ae0b51a0441ad79fb331472f2d4b7159ab71 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 30 Apr 2021 17:39:44 +0300 Subject: drm/i915: Don't include intel_de.h from intel_display_types.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hoist the intel_de.h include from intel_display_types.h one level up. I need this in order to untangle the include order so that I can add tracepoints into intel_de.h. This little cocci script did most of the work for me: @find@ @@ ( intel_de_read(...) | intel_de_read_fw(...) | intel_de_write(...) | intel_de_write_fw(...) ) @has_include@ @@ ( #include "intel_de.h" | #include "display/intel_de.h" ) @depends on find && !has_include@ @@ + #include "intel_de.h" #include "intel_display_types.h" @depends on find && !has_include@ @@ + #include "display/intel_de.h" #include "display/intel_display_types.h" Cc: Cooper Chiou Reviewed-by: Anshuman Gupta Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210430143945.6776-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/g4x_dp.c | 1 + drivers/gpu/drm/i915/display/g4x_hdmi.c | 1 + drivers/gpu/drm/i915/display/i9xx_plane.c | 1 + drivers/gpu/drm/i915/display/icl_dsi.c | 1 + drivers/gpu/drm/i915/display/intel_audio.c | 1 + drivers/gpu/drm/i915/display/intel_cdclk.c | 1 + drivers/gpu/drm/i915/display/intel_color.c | 1 + drivers/gpu/drm/i915/display/intel_combo_phy.c | 1 + drivers/gpu/drm/i915/display/intel_crt.c | 1 + drivers/gpu/drm/i915/display/intel_cursor.c | 1 + drivers/gpu/drm/i915/display/intel_ddi.c | 1 + drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c | 1 + drivers/gpu/drm/i915/display/intel_display.c | 1 + drivers/gpu/drm/i915/display/intel_display_debugfs.c | 1 + drivers/gpu/drm/i915/display/intel_display_power.c | 1 + drivers/gpu/drm/i915/display/intel_display_types.h | 1 - drivers/gpu/drm/i915/display/intel_dp.c | 1 + drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 1 + drivers/gpu/drm/i915/display/intel_dp_mst.c | 1 + drivers/gpu/drm/i915/display/intel_dpio_phy.c | 1 + drivers/gpu/drm/i915/display/intel_dpll.c | 1 + drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 1 + drivers/gpu/drm/i915/display/intel_dsb.c | 1 + drivers/gpu/drm/i915/display/intel_dvo.c | 1 + drivers/gpu/drm/i915/display/intel_fbc.c | 1 + drivers/gpu/drm/i915/display/intel_fdi.c | 1 + drivers/gpu/drm/i915/display/intel_fifo_underrun.c | 1 + drivers/gpu/drm/i915/display/intel_gmbus.c | 1 + drivers/gpu/drm/i915/display/intel_hdcp.c | 1 + drivers/gpu/drm/i915/display/intel_hdmi.c | 1 + drivers/gpu/drm/i915/display/intel_lspcon.c | 1 + drivers/gpu/drm/i915/display/intel_lvds.c | 1 + drivers/gpu/drm/i915/display/intel_overlay.c | 1 + drivers/gpu/drm/i915/display/intel_panel.c | 1 + drivers/gpu/drm/i915/display/intel_pipe_crc.c | 1 + drivers/gpu/drm/i915/display/intel_pps.c | 1 + drivers/gpu/drm/i915/display/intel_psr.c | 1 + drivers/gpu/drm/i915/display/intel_sdvo.c | 1 + drivers/gpu/drm/i915/display/intel_sprite.c | 1 + drivers/gpu/drm/i915/display/intel_tv.c | 1 + drivers/gpu/drm/i915/display/intel_vdsc.c | 1 + drivers/gpu/drm/i915/display/intel_vrr.c | 1 + drivers/gpu/drm/i915/display/skl_scaler.c | 1 + drivers/gpu/drm/i915/display/skl_universal_plane.c | 1 + drivers/gpu/drm/i915/display/vlv_dsi.c | 1 + drivers/gpu/drm/i915/display/vlv_dsi_pll.c | 1 + drivers/gpu/drm/i915/i915_irq.c | 1 + drivers/gpu/drm/i915/intel_pm.c | 1 + 48 files changed, 47 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index dfe3cf328d13..de0f358184aa 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -8,6 +8,7 @@ #include "g4x_dp.h" #include "intel_audio.h" #include "intel_connector.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_link_training.h" diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index 78f93506ffaf..be352e9f0afc 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -8,6 +8,7 @@ #include "g4x_hdmi.h" #include "intel_audio.h" #include "intel_connector.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dpio_phy.h" #include "intel_fifo_underrun.h" diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 393ef09ba0ac..9643c45a2209 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -10,6 +10,7 @@ #include "intel_atomic.h" #include "intel_atomic_plane.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_fb.h" #include "intel_sprite.h" diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 781630a40f06..ce544e20f35c 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -33,6 +33,7 @@ #include "intel_connector.h" #include "intel_crtc.h" #include "intel_ddi.h" +#include "intel_de.h" #include "intel_dsi.h" #include "intel_panel.h" #include "intel_vdsc.h" diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 75871ee544a7..5f4f316b3ab5 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -31,6 +31,7 @@ #include "intel_atomic.h" #include "intel_audio.h" #include "intel_cdclk.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_lpe_audio.h" diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 1f0bd23bb883..f95896f7c113 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -26,6 +26,7 @@ #include "intel_atomic.h" #include "intel_bw.h" #include "intel_cdclk.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_sideband.h" diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 5fae69879adf..dab892d2251b 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -23,6 +23,7 @@ */ #include "intel_color.h" +#include "intel_de.h" #include "intel_display_types.h" #define CTM_COEFF_SIGN (1ULL << 63) diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c index 5df57d16a401..487c54cd5982 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c @@ -4,6 +4,7 @@ */ #include "intel_combo_phy.h" +#include "intel_de.h" #include "intel_display_types.h" #define for_each_combo_phy(__dev_priv, __phy) \ diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 1aac8bead4eb..648f1c0d3d39 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -38,6 +38,7 @@ #include "intel_crt.h" #include "intel_crtc.h" #include "intel_ddi.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_fdi.h" #include "intel_fifo_underrun.h" diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index 2345f2efd60b..2ab389b38694 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -13,6 +13,7 @@ #include "intel_atomic.h" #include "intel_atomic_plane.h" #include "intel_cursor.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_display.h" #include "intel_fb.h" diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 93d94d50b63d..0b382e40d594 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -34,6 +34,7 @@ #include "intel_crtc.h" #include "intel_ddi.h" #include "intel_ddi_buf_trans.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_link_training.h" diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c index 58d6417b8f3e..7bcdd5c12028 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "intel_ddi.h" #include "intel_ddi_buf_trans.h" +#include "intel_de.h" #include "intel_display_types.h" /* HDMI/DVI modes ignore everything but the last 2 items. So we share diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index cc673bb63941..72c548d7e00d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -78,6 +78,7 @@ #include "intel_color.h" #include "intel_crtc.h" #include "intel_csr.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dp_link_training.h" #include "intel_fbc.h" diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 183c414d554a..d55ae83d037a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -10,6 +10,7 @@ #include "intel_csr.h" #include "intel_display_debugfs.h" #include "intel_display_power.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_fbc.h" diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index d48dd15a4f6e..653744d831af 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -11,6 +11,7 @@ #include "intel_combo_phy.h" #include "intel_csr.h" #include "intel_display_power.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dpio_phy.h" #include "intel_hotplug.h" diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index e2e707c4dff5..6d8cdaa36748 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -45,7 +45,6 @@ #include #include "i915_drv.h" -#include "intel_de.h" struct drm_printer; struct __intel_global_objs_state; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index dfa7da928ae5..cbbba8e33b24 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -46,6 +46,7 @@ #include "intel_audio.h" #include "intel_connector.h" #include "intel_ddi.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_aux.h" diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index d7c3a74b81a3..d697d169e8c1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -12,6 +12,7 @@ #include #include "intel_ddi.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_hdcp.h" diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index a30ca4380a06..5f770fd53f1d 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -34,6 +34,7 @@ #include "intel_connector.h" #include "intel_crtc.h" #include "intel_ddi.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dp_hdcp.h" diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c index 514c4a7adffc..48507ed79950 100644 --- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c @@ -23,6 +23,7 @@ #include "display/intel_dp.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dpio_phy.h" #include "intel_sideband.h" diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c index 9114953f57f1..89635da9f6f6 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.c +++ b/drivers/gpu/drm/i915/display/intel_dpll.c @@ -4,6 +4,7 @@ */ #include #include "intel_crtc.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_display.h" #include "intel_dpll.h" diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index da2ff0b3ceac..18bfe8d09277 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -21,6 +21,7 @@ * DEALINGS IN THE SOFTWARE. */ +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dpio_phy.h" #include "intel_dpll.h" diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 857126822a88..62a8a69f9f5d 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "intel_de.h" #include "intel_display_types.h" #define DSB_BUF_SIZE (2 * PAGE_SIZE) diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 090cd76266c6..77419f8c05e9 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -33,6 +33,7 @@ #include "i915_drv.h" #include "intel_connector.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dvo.h" #include "intel_dvo_dev.h" diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 91dad8004c34..b2f3ac846f5b 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -43,6 +43,7 @@ #include "i915_drv.h" #include "i915_trace.h" #include "i915_vgpu.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index d719cd9c5b73..cef1061fd6cb 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -5,6 +5,7 @@ #include "intel_atomic.h" #include "intel_ddi.h" #include "intel_ddi_buf_trans.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_fdi.h" diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c index 0fce9fd6e0a9..3315aa1d4d5a 100644 --- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c @@ -27,6 +27,7 @@ #include "i915_drv.h" #include "i915_trace.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_fbc.h" #include "intel_fifo_underrun.h" diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 17ab3cb81e02..fcf47f98ea36 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -34,6 +34,7 @@ #include #include "i915_drv.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_gmbus.h" diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index d254fe67ab7f..ebc2e32aec0b 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -18,6 +18,7 @@ #include "i915_drv.h" #include "i915_reg.h" #include "intel_display_power.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_hdcp.h" #include "intel_sideband.h" diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 47a8f0a1c5e2..27bfa0bba77e 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -43,6 +43,7 @@ #include "intel_atomic.h" #include "intel_connector.h" #include "intel_ddi.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_gmbus.h" diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c index e4ff533e3a69..328d31263357 100644 --- a/drivers/gpu/drm/i915/display/intel_lspcon.c +++ b/drivers/gpu/drm/i915/display/intel_lspcon.c @@ -27,6 +27,7 @@ #include #include +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_lspcon.h" diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index dd12d15f47c7..7f40e9f60bc2 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -41,6 +41,7 @@ #include "i915_drv.h" #include "intel_atomic.h" #include "intel_connector.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_gmbus.h" #include "intel_lvds.h" diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index fffbde4256db..83c25d84c391 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -34,6 +34,7 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_frontbuffer.h" #include "intel_overlay.h" diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index a20761079ae0..7d7a60b4d2de 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -35,6 +35,7 @@ #include #include "intel_connector.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dp_aux_backlight.h" #include "intel_dsi_dcs_backlight.h" diff --git a/drivers/gpu/drm/i915/display/intel_pipe_crc.c b/drivers/gpu/drm/i915/display/intel_pipe_crc.c index acc64b87d29f..8ac263f471be 100644 --- a/drivers/gpu/drm/i915/display/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/display/intel_pipe_crc.c @@ -30,6 +30,7 @@ #include #include "intel_atomic.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_pipe_crc.h" diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 0fd28902d779..a36ec4a818ff 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -5,6 +5,7 @@ #include "g4x_dp.h" #include "i915_drv.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dp.h" #include "intel_dpll.h" diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index c61e1d774667..e3c30dcadcd1 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -27,6 +27,7 @@ #include "i915_drv.h" #include "intel_atomic.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dp_aux.h" #include "intel_hdmi.h" diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index f6eb95c717d2..e4f91d7a5c60 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -39,6 +39,7 @@ #include "intel_atomic.h" #include "intel_connector.h" #include "intel_crtc.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_fifo_underrun.h" #include "intel_gmbus.h" diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index acbf4e63b245..4ae9a7455b23 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -43,6 +43,7 @@ #include "i915_trace.h" #include "i915_vgpu.h" #include "intel_atomic_plane.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_frontbuffer.h" #include "intel_sprite.h" diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 2c5c77693474..ce73ebdfc669 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -36,6 +36,7 @@ #include "i915_drv.h" #include "intel_connector.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_hotplug.h" #include "intel_tv.h" diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 3a21c65ffa85..42e3f9f7a37e 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -7,6 +7,7 @@ */ #include "i915_drv.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dsi.h" #include "intel_vdsc.h" diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index a9c2b2fd9252..046210ae1de0 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_vrr.h" diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index 17a98cb627df..394b7bbf48d8 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -2,6 +2,7 @@ /* * Copyright © 2020 Intel Corporation */ +#include "intel_de.h" #include "intel_display_types.h" #include "skl_scaler.h" #include "skl_universal_plane.h" diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 553bc937ad90..0d34a5ad4e2b 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -10,6 +10,7 @@ #include "i915_drv.h" #include "intel_atomic_plane.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_fb.h" #include "intel_pm.h" diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index ac0553d492aa..084c9c43b2ed 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -34,6 +34,7 @@ #include "intel_atomic.h" #include "intel_connector.h" #include "intel_crtc.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dsi.h" #include "intel_fifo_underrun.h" diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c index 4070b00c3690..90185b219447 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c @@ -28,6 +28,7 @@ #include #include "i915_drv.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dsi.h" #include "intel_sideband.h" diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 798ecc718e3f..f6967a93ec7a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -35,6 +35,7 @@ #include #include +#include "display/intel_de.h" #include "display/intel_display_types.h" #include "display/intel_fifo_underrun.h" #include "display/intel_hotplug.h" diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cd584474d1e8..06d5b7cc8b62 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -35,6 +35,7 @@ #include "display/intel_atomic.h" #include "display/intel_atomic_plane.h" #include "display/intel_bw.h" +#include "display/intel_de.h" #include "display/intel_display_types.h" #include "display/intel_fbc.h" #include "display/intel_sprite.h" -- cgit From fcf83a21141eea91d877c1afb6e15ad9d40ae125 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 30 Apr 2021 17:39:45 +0300 Subject: drm/i915: Include intel_de_{read,write}_fw() in i915_reg_rw traces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We lost the i915_reg_rw tracepoint for a lot of display registers when we switched from the heavyweight normal register accessors to the lightweight _fw() variants. See eg. commit dd584fc0711a ("drm/i915: Use I915_READ_FW for plane updates"). Put the tracepoints back so that the register traces might actually be useful. Hopefully these should be close to free when the tracepoint is not enabled and thus not slow down our vblank critical sections significantly. v2: Copy paste the same-cacheline-hang warning from intel_uncore.h (Anshuman) Cc: Cooper Chiou Reviewed-by: Anshuman Gupta Reviewed-by: Jani Nikula Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210430143945.6776-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_de.h | 41 ++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_de.h b/drivers/gpu/drm/i915/display/intel_de.h index 00da10bf35f5..9d8c177aa228 100644 --- a/drivers/gpu/drm/i915/display/intel_de.h +++ b/drivers/gpu/drm/i915/display/intel_de.h @@ -8,6 +8,7 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "i915_trace.h" #include "intel_uncore.h" static inline u32 @@ -22,26 +23,12 @@ intel_de_posting_read(struct drm_i915_private *i915, i915_reg_t reg) intel_uncore_posting_read(&i915->uncore, reg); } -/* Note: read the warnings for intel_uncore_*_fw() functions! */ -static inline u32 -intel_de_read_fw(struct drm_i915_private *i915, i915_reg_t reg) -{ - return intel_uncore_read_fw(&i915->uncore, reg); -} - static inline void intel_de_write(struct drm_i915_private *i915, i915_reg_t reg, u32 val) { intel_uncore_write(&i915->uncore, reg, val); } -/* Note: read the warnings for intel_uncore_*_fw() functions! */ -static inline void -intel_de_write_fw(struct drm_i915_private *i915, i915_reg_t reg, u32 val) -{ - intel_uncore_write_fw(&i915->uncore, reg, val); -} - static inline void intel_de_rmw(struct drm_i915_private *i915, i915_reg_t reg, u32 clear, u32 set) { @@ -69,4 +56,30 @@ intel_de_wait_for_clear(struct drm_i915_private *i915, i915_reg_t reg, return intel_de_wait_for_register(i915, reg, mask, 0, timeout); } +/* + * Unlocked mmio-accessors, think carefully before using these. + * + * Certain architectures will die if the same cacheline is concurrently accessed + * by different clients (e.g. on Ivybridge). Access to registers should + * therefore generally be serialised, by either the dev_priv->uncore.lock or + * a more localised lock guarding all access to that bank of registers. + */ +static inline u32 +intel_de_read_fw(struct drm_i915_private *i915, i915_reg_t reg) +{ + u32 val; + + val = intel_uncore_read_fw(&i915->uncore, reg); + trace_i915_reg_rw(false, reg, val, sizeof(val), true); + + return val; +} + +static inline void +intel_de_write_fw(struct drm_i915_private *i915, i915_reg_t reg, u32 val) +{ + trace_i915_reg_rw(true, reg, val, sizeof(val), true); + intel_uncore_write_fw(&i915->uncore, reg, val); +} + #endif /* __INTEL_DE_H__ */ -- cgit From b41df85a9834932370806df189d5997a4891fd88 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 30 Apr 2021 18:34:40 +0300 Subject: drm/i915: Extract some helpers to compute cdclk register values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract a few of the switch statements into helper functions to reduce the pollution in the cdclk programming functions. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210430153444.29270-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 186 ++++++++++++++--------------- 1 file changed, 88 insertions(+), 98 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index f95896f7c113..f089fd8ea066 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -724,6 +724,23 @@ static void bdw_get_cdclk(struct drm_i915_private *dev_priv, bdw_calc_voltage_level(cdclk_config->cdclk); } +static u32 bdw_cdclk_freq_sel(int cdclk) +{ + switch (cdclk) { + default: + MISSING_CASE(cdclk); + fallthrough; + case 337500: + return LCPLL_CLK_FREQ_337_5_BDW; + case 450000: + return LCPLL_CLK_FREQ_450; + case 540000: + return LCPLL_CLK_FREQ_54O_BDW; + case 675000: + return LCPLL_CLK_FREQ_675_BDW; + } +} + static void bdw_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_config *cdclk_config, enum pipe pipe) @@ -763,25 +780,7 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, val = intel_de_read(dev_priv, LCPLL_CTL); val &= ~LCPLL_CLK_FREQ_MASK; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - fallthrough; - case 337500: - val |= LCPLL_CLK_FREQ_337_5_BDW; - break; - case 450000: - val |= LCPLL_CLK_FREQ_450; - break; - case 540000: - val |= LCPLL_CLK_FREQ_54O_BDW; - break; - case 675000: - val |= LCPLL_CLK_FREQ_675_BDW; - break; - } - + val |= bdw_cdclk_freq_sel(cdclk); intel_de_write(dev_priv, LCPLL_CTL, val); val = intel_de_read(dev_priv, LCPLL_CTL); @@ -955,10 +954,8 @@ static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, intel_update_max_cdclk(dev_priv); } -static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) +static u32 skl_dpll0_link_rate(struct drm_i915_private *dev_priv, int vco) { - u32 val; - drm_WARN_ON(&dev_priv->drm, vco != 8100000 && vco != 8640000); /* @@ -970,17 +967,22 @@ static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) * rate later on, with the constraint of choosing a frequency that * works with vco. */ + if (vco == 8640000) + return DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0); + else + return DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, SKL_DPLL0); +} + +static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) +{ + u32 val; + val = intel_de_read(dev_priv, DPLL_CTRL1); val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); - if (vco == 8640000) - val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, - SKL_DPLL0); - else - val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, - SKL_DPLL0); + val |= skl_dpll0_link_rate(dev_priv, vco); intel_de_write(dev_priv, DPLL_CTRL1, val); intel_de_posting_read(dev_priv, DPLL_CTRL1); @@ -1007,6 +1009,29 @@ static void skl_dpll0_disable(struct drm_i915_private *dev_priv) dev_priv->cdclk.hw.vco = 0; } +static u32 skl_cdclk_freq_sel(struct drm_i915_private *dev_priv, + int cdclk, int vco) +{ + switch (cdclk) { + default: + drm_WARN_ON(&dev_priv->drm, + cdclk != dev_priv->cdclk.hw.bypass); + drm_WARN_ON(&dev_priv->drm, vco != 0); + fallthrough; + case 308571: + case 337500: + return CDCLK_FREQ_337_308; + case 450000: + case 432000: + return CDCLK_FREQ_450_432; + case 540000: + return CDCLK_FREQ_540; + case 617143: + case 675000: + return CDCLK_FREQ_675_617; + } +} + static void skl_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_config *cdclk_config, enum pipe pipe) @@ -1037,29 +1062,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, return; } - /* Choose frequency for this cdclk */ - switch (cdclk) { - default: - drm_WARN_ON(&dev_priv->drm, - cdclk != dev_priv->cdclk.hw.bypass); - drm_WARN_ON(&dev_priv->drm, vco != 0); - fallthrough; - case 308571: - case 337500: - freq_select = CDCLK_FREQ_337_308; - break; - case 450000: - case 432000: - freq_select = CDCLK_FREQ_450_432; - break; - case 540000: - freq_select = CDCLK_FREQ_540; - break; - case 617143: - case 675000: - freq_select = CDCLK_FREQ_675_617; - break; - } + freq_select = skl_cdclk_freq_sel(dev_priv, cdclk, vco); if (dev_priv->cdclk.hw.vco != 0 && dev_priv->cdclk.hw.vco != vco) @@ -1550,13 +1553,40 @@ static u32 bxt_cdclk_cd2x_pipe(struct drm_i915_private *dev_priv, enum pipe pipe } } +static u32 bxt_cdclk_cd2x_div_sel(struct drm_i915_private *dev_priv, + int cdclk, int vco) +{ + /* cdclk = vco / 2 / div{1,1.5,2,4} */ + switch (DIV_ROUND_CLOSEST(vco, cdclk)) { + default: + drm_WARN_ON(&dev_priv->drm, + cdclk != dev_priv->cdclk.hw.bypass); + drm_WARN_ON(&dev_priv->drm, vco != 0); + fallthrough; + case 2: + return BXT_CDCLK_CD2X_DIV_SEL_1; + case 3: + drm_WARN(&dev_priv->drm, + DISPLAY_VER(dev_priv) >= 10, + "Unsupported divider\n"); + return BXT_CDCLK_CD2X_DIV_SEL_1_5; + case 4: + return BXT_CDCLK_CD2X_DIV_SEL_2; + case 8: + drm_WARN(&dev_priv->drm, + DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv), + "Unsupported divider\n"); + return BXT_CDCLK_CD2X_DIV_SEL_4; + } +} + static void bxt_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_config *cdclk_config, enum pipe pipe) { int cdclk = cdclk_config->cdclk; int vco = cdclk_config->vco; - u32 val, divider; + u32 val; int ret; /* Inform power controller of upcoming frequency change. */ @@ -1581,33 +1611,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, return; } - /* cdclk = vco / 2 / div{1,1.5,2,4} */ - switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - default: - drm_WARN_ON(&dev_priv->drm, - cdclk != dev_priv->cdclk.hw.bypass); - drm_WARN_ON(&dev_priv->drm, vco != 0); - fallthrough; - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - case 3: - drm_WARN(&dev_priv->drm, - DISPLAY_VER(dev_priv) >= 10, - "Unsupported divider\n"); - divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; - break; - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; - break; - case 8: - drm_WARN(&dev_priv->drm, - DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv), - "Unsupported divider\n"); - divider = BXT_CDCLK_CD2X_DIV_SEL_4; - break; - } - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) { if (dev_priv->cdclk.hw.vco != 0 && dev_priv->cdclk.hw.vco != vco) @@ -1625,8 +1628,9 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, bxt_de_pll_enable(dev_priv, vco); } - val = divider | skl_cdclk_decimal(cdclk) | - bxt_cdclk_cd2x_pipe(dev_priv, pipe); + val = bxt_cdclk_cd2x_div_sel(dev_priv, cdclk, vco) | + bxt_cdclk_cd2x_pipe(dev_priv, pipe) | + skl_cdclk_decimal(cdclk); /* * Disable SSA Precharge when CD clock frequency < 500 MHz, @@ -1712,23 +1716,9 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) expected = skl_cdclk_decimal(cdclk); /* Figure out what CD2X divider we should be using for this cdclk */ - switch (DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.vco, - dev_priv->cdclk.hw.cdclk)) { - case 2: - expected |= BXT_CDCLK_CD2X_DIV_SEL_1; - break; - case 3: - expected |= BXT_CDCLK_CD2X_DIV_SEL_1_5; - break; - case 4: - expected |= BXT_CDCLK_CD2X_DIV_SEL_2; - break; - case 8: - expected |= BXT_CDCLK_CD2X_DIV_SEL_4; - break; - default: - goto sanitize; - } + expected |= bxt_cdclk_cd2x_div_sel(dev_priv, + dev_priv->cdclk.hw.cdclk, + dev_priv->cdclk.hw.vco); /* * Disable SSA Precharge when CD clock frequency < 500 MHz, -- cgit From fb12fbb18f38b1350f04ef0b25684624fc8cca1c Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 30 Apr 2021 18:34:41 +0300 Subject: drm/i915: Use intel_de_rmw() in bdw cdclk programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the hand rolled rmw sequences with intel_de_rmw(). Jani pointed out that intel_de_rmw() skips the write if the value does not change. That should be totally fine here, but let's at least acknowledge the change in behaviour in case I'm somehow wrong... Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210430153444.29270-3-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index f089fd8ea066..024620520746 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -746,7 +746,6 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, enum pipe pipe) { int cdclk = cdclk_config->cdclk; - u32 val; int ret; if (drm_WARN(&dev_priv->drm, @@ -766,9 +765,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, return; } - val = intel_de_read(dev_priv, LCPLL_CTL); - val |= LCPLL_CD_SOURCE_FCLK; - intel_de_write(dev_priv, LCPLL_CTL, val); + intel_de_rmw(dev_priv, LCPLL_CTL, + 0, LCPLL_CD_SOURCE_FCLK); /* * According to the spec, it should be enough to poll for this 1 us. @@ -778,14 +776,11 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, LCPLL_CD_SOURCE_FCLK_DONE, 100)) drm_err(&dev_priv->drm, "Switching to FCLK failed\n"); - val = intel_de_read(dev_priv, LCPLL_CTL); - val &= ~LCPLL_CLK_FREQ_MASK; - val |= bdw_cdclk_freq_sel(cdclk); - intel_de_write(dev_priv, LCPLL_CTL, val); + intel_de_rmw(dev_priv, LCPLL_CTL, + LCPLL_CLK_FREQ_MASK, bdw_cdclk_freq_sel(cdclk)); - val = intel_de_read(dev_priv, LCPLL_CTL); - val &= ~LCPLL_CD_SOURCE_FCLK; - intel_de_write(dev_priv, LCPLL_CTL, val); + intel_de_rmw(dev_priv, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK, 0); if (wait_for_us((intel_de_read(dev_priv, LCPLL_CTL) & LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) -- cgit From 3b71a9b6cffd0aaa0fe2fb0e38ba068cab319c77 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 30 Apr 2021 18:34:42 +0300 Subject: drm/i915: Use intel_de_rmw() in skl cdclk programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the hand rolled rmw sequences with intel_de_rmw(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210430153444.29270-4-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 024620520746..45c4070910dc 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -970,20 +970,16 @@ static u32 skl_dpll0_link_rate(struct drm_i915_private *dev_priv, int vco) static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) { - u32 val; - - val = intel_de_read(dev_priv, DPLL_CTRL1); - - val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | - DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); - val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); - val |= skl_dpll0_link_rate(dev_priv, vco); - - intel_de_write(dev_priv, DPLL_CTRL1, val); + intel_de_rmw(dev_priv, DPLL_CTRL1, + DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | + DPLL_CTRL1_SSC(SKL_DPLL0) | + DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0), + DPLL_CTRL1_OVERRIDE(SKL_DPLL0) | + skl_dpll0_link_rate(dev_priv, vco)); intel_de_posting_read(dev_priv, DPLL_CTRL1); - intel_de_write(dev_priv, LCPLL1_CTL, - intel_de_read(dev_priv, LCPLL1_CTL) | LCPLL_PLL_ENABLE); + intel_de_rmw(dev_priv, LCPLL1_CTL, + 0, LCPLL_PLL_ENABLE); if (intel_de_wait_for_set(dev_priv, LCPLL1_CTL, LCPLL_PLL_LOCK, 5)) drm_err(&dev_priv->drm, "DPLL0 not locked\n"); @@ -996,8 +992,9 @@ static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) static void skl_dpll0_disable(struct drm_i915_private *dev_priv) { - intel_de_write(dev_priv, LCPLL1_CTL, - intel_de_read(dev_priv, LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); + intel_de_rmw(dev_priv, LCPLL1_CTL, + LCPLL_PLL_ENABLE, 0); + if (intel_de_wait_for_clear(dev_priv, LCPLL1_CTL, LCPLL_PLL_LOCK, 1)) drm_err(&dev_priv->drm, "Couldn't disable DPLL0\n"); -- cgit From 468f9e8d048294f2a0a5e3b898e37ad636688295 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 30 Apr 2021 18:34:43 +0300 Subject: drm/i915: Use intel_de_rmw() in bxt/glk/cnl+ cdclk programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the hand rolled rmw sequences with intel_de_rmw(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210430153444.29270-5-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 45c4070910dc..b7d4aa2d7297 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1473,12 +1473,9 @@ static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) { int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); - u32 val; - val = intel_de_read(dev_priv, BXT_DE_PLL_CTL); - val &= ~BXT_DE_PLL_RATIO_MASK; - val |= BXT_DE_PLL_RATIO(ratio); - intel_de_write(dev_priv, BXT_DE_PLL_CTL, val); + intel_de_rmw(dev_priv, BXT_DE_PLL_CTL, + BXT_DE_PLL_RATIO_MASK, BXT_DE_PLL_RATIO(ratio)); intel_de_write(dev_priv, BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); @@ -1492,11 +1489,8 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) { - u32 val; - - val = intel_de_read(dev_priv, BXT_DE_PLL_ENABLE); - val &= ~BXT_DE_PLL_PLL_ENABLE; - intel_de_write(dev_priv, BXT_DE_PLL_ENABLE, val); + intel_de_rmw(dev_priv, BXT_DE_PLL_ENABLE, + BXT_DE_PLL_PLL_ENABLE, 0); /* Timeout 200us */ if (wait_for((intel_de_read(dev_priv, BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) == 0, 1)) -- cgit From 7973cff7be1db3433451258e02dfdaf22ddf6b73 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 30 Apr 2021 18:34:44 +0300 Subject: drm/i915: Use intel_de_wait_for_*() in cnl+ cdclk programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the hand rolled PLL lock bit waits with intel_de_wait_for_*(). Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210430153444.29270-6-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_cdclk.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index b7d4aa2d7297..25ef077dc389 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1493,9 +1493,8 @@ static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) BXT_DE_PLL_PLL_ENABLE, 0); /* Timeout 200us */ - if (wait_for((intel_de_read(dev_priv, BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) == 0, 1)) - drm_err(&dev_priv->drm, - "timeout waiting for CDCLK PLL unlock\n"); + if (intel_de_wait_for_clear(dev_priv, BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 1)) + drm_err(&dev_priv->drm, "timeout waiting for CDCLK PLL unlock\n"); dev_priv->cdclk.hw.vco = 0; } @@ -1512,9 +1511,8 @@ static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) intel_de_write(dev_priv, BXT_DE_PLL_ENABLE, val); /* Timeout 200us */ - if (wait_for((intel_de_read(dev_priv, BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) != 0, 1)) - drm_err(&dev_priv->drm, - "timeout waiting for CDCLK PLL lock\n"); + if (intel_de_wait_for_set(dev_priv, BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 1)) + drm_err(&dev_priv->drm, "timeout waiting for CDCLK PLL lock\n"); dev_priv->cdclk.hw.vco = vco; } -- cgit From 9a5b5e1b981309179e030aca0c535a4f037dfa95 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Wed, 5 May 2021 14:38:01 -0700 Subject: drm/i915/display: Disable PSR2 sel fetch in TGL pre-production MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The implementation of two workarounds are missing causing failures in CI with pre-production HW. Cc: Gwan-gyeong Mun Signed-off-by: José Roberto de Souza Reviewed-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20210505213801.80772-1-jose.souza@intel.com --- drivers/gpu/drm/i915/display/intel_psr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index e3c30dcadcd1..406ba9a712a8 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -720,6 +720,13 @@ static bool intel_psr2_sel_fetch_config_valid(struct intel_dp *intel_dp, } } + /* Wa_14010254185 Wa_14010103792 */ + if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B1)) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 sel fetch not enabled, missing the implementation of WAs\n"); + return false; + } + return crtc_state->enable_psr2_sel_fetch = true; } -- cgit From d9aa6571b28ba0022de1e48801ff03a1854c7ef2 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Wed, 21 Apr 2021 16:37:35 -0700 Subject: drm/msm/dp: check sink_count before update is_connected status Link status is different from display connected status in the case of something like an Apple dongle where the type-c plug can be connected, and therefore the link is connected, but no sink is connected until an HDMI cable is plugged into the dongle. The sink_count of DPCD of dongle will increase to 1 once an HDMI cable is plugged into the dongle so that display connected status will become true. This checking also apply at pm_resume. Changes in v4: -- none Fixes: 94e58e2d06e3 ("drm/msm/dp: reset dp controller only at boot up and pm_resume") Reported-by: Stephen Boyd Reviewed-by: Stephen Boyd Tested-by: Stephen Boyd Signed-off-by: Kuogee Hsieh Fixes: 8ede2ecc3e5e ("drm/msm/dp: Add DP compliance tests on Snapdragon Chipsets") Link: https://lore.kernel.org/r/1619048258-8717-2-git-send-email-khsieh@codeaurora.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 5a39da6e1eaf..0ba71c7a8dd4 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -586,10 +586,8 @@ static int dp_connect_pending_timeout(struct dp_display_private *dp, u32 data) mutex_lock(&dp->event_mutex); state = dp->hpd_state; - if (state == ST_CONNECT_PENDING) { - dp_display_enable(dp, 0); + if (state == ST_CONNECT_PENDING) dp->hpd_state = ST_CONNECTED; - } mutex_unlock(&dp->event_mutex); @@ -669,10 +667,8 @@ static int dp_disconnect_pending_timeout(struct dp_display_private *dp, u32 data mutex_lock(&dp->event_mutex); state = dp->hpd_state; - if (state == ST_DISCONNECT_PENDING) { - dp_display_disable(dp, 0); + if (state == ST_DISCONNECT_PENDING) dp->hpd_state = ST_DISCONNECTED; - } mutex_unlock(&dp->event_mutex); @@ -1272,7 +1268,12 @@ static int dp_pm_resume(struct device *dev) status = dp_catalog_link_is_connected(dp->catalog); - if (status) + /* + * can not declared display is connected unless + * HDMI cable is plugged in and sink_count of + * dongle become 1 + */ + if (status && dp->link->sink_count) dp->dp_display.is_connected = true; else dp->dp_display.is_connected = false; -- cgit From f2f46b878777e0d3f885c7ddad48f477b4dea247 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Wed, 21 Apr 2021 16:37:36 -0700 Subject: drm/msm/dp: initialize audio_comp when audio starts Initialize audio_comp when audio starts and wait for audio_comp at dp_display_disable(). This will take care of both dongle unplugged and display off (suspend) cases. Changes in v2: -- add dp_display_signal_audio_start() Changes in v3: -- restore dp_display_handle_plugged_change() at dp_hpd_unplug_handle(). Changes in v4: -- none Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Tested-by: Stephen Boyd Fixes: c703d5789590 ("drm/msm/dp: trigger unplug event in msm_dp_display_disable") Link: https://lore.kernel.org/r/1619048258-8717-3-git-send-email-khsieh@codeaurora.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_audio.c | 1 + drivers/gpu/drm/msm/dp/dp_display.c | 11 +++++++++-- drivers/gpu/drm/msm/dp/dp_display.h | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c index 82a8673ab8da..d7e4a39a904e 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.c +++ b/drivers/gpu/drm/msm/dp/dp_audio.c @@ -527,6 +527,7 @@ int dp_audio_hw_params(struct device *dev, dp_audio_setup_acr(audio); dp_audio_safe_to_exit_level(audio); dp_audio_enable(audio, true); + dp_display_signal_audio_start(dp_display); dp_display->audio_enabled = true; end: diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 0ba71c7a8dd4..1784e119269b 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -178,6 +178,15 @@ static int dp_del_event(struct dp_display_private *dp_priv, u32 event) return 0; } +void dp_display_signal_audio_start(struct msm_dp *dp_display) +{ + struct dp_display_private *dp; + + dp = container_of(dp_display, struct dp_display_private, dp_display); + + reinit_completion(&dp->audio_comp); +} + void dp_display_signal_audio_complete(struct msm_dp *dp_display) { struct dp_display_private *dp; @@ -649,7 +658,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) dp_add_event(dp, EV_DISCONNECT_PENDING_TIMEOUT, 0, DP_TIMEOUT_5_SECOND); /* signal the disconnect event early to ensure proper teardown */ - reinit_completion(&dp->audio_comp); dp_display_handle_plugged_change(g_dp_display, false); dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK | @@ -894,7 +902,6 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data) /* wait only if audio was enabled */ if (dp_display->audio_enabled) { /* signal the disconnect event */ - reinit_completion(&dp->audio_comp); dp_display_handle_plugged_change(dp_display, false); if (!wait_for_completion_timeout(&dp->audio_comp, HZ * 5)) diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h index 6092ba1ed85e..5173c89eedf7 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.h +++ b/drivers/gpu/drm/msm/dp/dp_display.h @@ -34,6 +34,7 @@ int dp_display_get_modes(struct msm_dp *dp_display, int dp_display_request_irq(struct msm_dp *dp_display); bool dp_display_check_video_test(struct msm_dp *dp_display); int dp_display_get_test_bpp(struct msm_dp *dp_display); +void dp_display_signal_audio_start(struct msm_dp *dp_display); void dp_display_signal_audio_complete(struct msm_dp *dp_display); #endif /* _DP_DISPLAY_H_ */ -- cgit From e7c46e43bdd28f16ace4415a0d63c7740671438a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 5 May 2021 22:11:40 +0300 Subject: drm/i915: Nuke display error state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I doubt anyone has used the display error state since CS flips went the way of the dodo. Just nuke it. It might be semi interesting to have something like this for FIFO underruns and the like, but as it stands this wouldn't provide a sufficient amount of information. So would need an extensive rewrite anyway. The lockless power well handling is also racy, so this could just be contributing noise to test results if we end up accessing something with the relevant power well already disabled. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210505191140.14215-1-ville.syrjala@linux.intel.com Acked-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 204 --------------------------- drivers/gpu/drm/i915/display/intel_display.h | 6 - drivers/gpu/drm/i915/i915_gpu_error.c | 6 - drivers/gpu/drm/i915/i915_gpu_error.h | 2 - 4 files changed, 218 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 72c548d7e00d..91e5ae157953 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12978,207 +12978,3 @@ void intel_display_driver_unregister(struct drm_i915_private *i915) acpi_video_unregister(); intel_opregion_unregister(i915); } - -#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) - -struct intel_display_error_state { - - u32 power_well_driver; - - struct intel_cursor_error_state { - u32 control; - u32 position; - u32 base; - u32 size; - } cursor[I915_MAX_PIPES]; - - struct intel_pipe_error_state { - bool power_domain_on; - u32 source; - u32 stat; - } pipe[I915_MAX_PIPES]; - - struct intel_plane_error_state { - u32 control; - u32 stride; - u32 size; - u32 pos; - u32 addr; - u32 surface; - u32 tile_offset; - } plane[I915_MAX_PIPES]; - - struct intel_transcoder_error_state { - bool available; - bool power_domain_on; - enum transcoder cpu_transcoder; - - u32 conf; - - u32 htotal; - u32 hblank; - u32 hsync; - u32 vtotal; - u32 vblank; - u32 vsync; - } transcoder[5]; -}; - -struct intel_display_error_state * -intel_display_capture_error_state(struct drm_i915_private *dev_priv) -{ - struct intel_display_error_state *error; - int transcoders[] = { - TRANSCODER_A, - TRANSCODER_B, - TRANSCODER_C, - TRANSCODER_D, - TRANSCODER_EDP, - }; - int i; - - BUILD_BUG_ON(ARRAY_SIZE(transcoders) != ARRAY_SIZE(error->transcoder)); - - if (!HAS_DISPLAY(dev_priv)) - return NULL; - - error = kzalloc(sizeof(*error), GFP_ATOMIC); - if (error == NULL) - return NULL; - - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - error->power_well_driver = intel_de_read(dev_priv, - HSW_PWR_WELL_CTL2); - - for_each_pipe(dev_priv, i) { - error->pipe[i].power_domain_on = - __intel_display_power_is_enabled(dev_priv, - POWER_DOMAIN_PIPE(i)); - if (!error->pipe[i].power_domain_on) - continue; - - error->cursor[i].control = intel_de_read(dev_priv, CURCNTR(i)); - error->cursor[i].position = intel_de_read(dev_priv, CURPOS(i)); - error->cursor[i].base = intel_de_read(dev_priv, CURBASE(i)); - - error->plane[i].control = intel_de_read(dev_priv, DSPCNTR(i)); - error->plane[i].stride = intel_de_read(dev_priv, DSPSTRIDE(i)); - if (DISPLAY_VER(dev_priv) <= 3) { - error->plane[i].size = intel_de_read(dev_priv, - DSPSIZE(i)); - error->plane[i].pos = intel_de_read(dev_priv, - DSPPOS(i)); - } - if (DISPLAY_VER(dev_priv) <= 7 && !IS_HASWELL(dev_priv)) - error->plane[i].addr = intel_de_read(dev_priv, - DSPADDR(i)); - if (DISPLAY_VER(dev_priv) >= 4) { - error->plane[i].surface = intel_de_read(dev_priv, - DSPSURF(i)); - error->plane[i].tile_offset = intel_de_read(dev_priv, - DSPTILEOFF(i)); - } - - error->pipe[i].source = intel_de_read(dev_priv, PIPESRC(i)); - - if (HAS_GMCH(dev_priv)) - error->pipe[i].stat = intel_de_read(dev_priv, - PIPESTAT(i)); - } - - for (i = 0; i < ARRAY_SIZE(error->transcoder); i++) { - enum transcoder cpu_transcoder = transcoders[i]; - - if (!HAS_TRANSCODER(dev_priv, cpu_transcoder)) - continue; - - error->transcoder[i].available = true; - error->transcoder[i].power_domain_on = - __intel_display_power_is_enabled(dev_priv, - POWER_DOMAIN_TRANSCODER(cpu_transcoder)); - if (!error->transcoder[i].power_domain_on) - continue; - - error->transcoder[i].cpu_transcoder = cpu_transcoder; - - error->transcoder[i].conf = intel_de_read(dev_priv, - PIPECONF(cpu_transcoder)); - error->transcoder[i].htotal = intel_de_read(dev_priv, - HTOTAL(cpu_transcoder)); - error->transcoder[i].hblank = intel_de_read(dev_priv, - HBLANK(cpu_transcoder)); - error->transcoder[i].hsync = intel_de_read(dev_priv, - HSYNC(cpu_transcoder)); - error->transcoder[i].vtotal = intel_de_read(dev_priv, - VTOTAL(cpu_transcoder)); - error->transcoder[i].vblank = intel_de_read(dev_priv, - VBLANK(cpu_transcoder)); - error->transcoder[i].vsync = intel_de_read(dev_priv, - VSYNC(cpu_transcoder)); - } - - return error; -} - -#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__) - -void -intel_display_print_error_state(struct drm_i915_error_state_buf *m, - struct intel_display_error_state *error) -{ - struct drm_i915_private *dev_priv = m->i915; - int i; - - if (!error) - return; - - err_printf(m, "Num Pipes: %d\n", INTEL_NUM_PIPES(dev_priv)); - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - err_printf(m, "PWR_WELL_CTL2: %08x\n", - error->power_well_driver); - for_each_pipe(dev_priv, i) { - err_printf(m, "Pipe [%d]:\n", i); - err_printf(m, " Power: %s\n", - onoff(error->pipe[i].power_domain_on)); - err_printf(m, " SRC: %08x\n", error->pipe[i].source); - err_printf(m, " STAT: %08x\n", error->pipe[i].stat); - - err_printf(m, "Plane [%d]:\n", i); - err_printf(m, " CNTR: %08x\n", error->plane[i].control); - err_printf(m, " STRIDE: %08x\n", error->plane[i].stride); - if (DISPLAY_VER(dev_priv) <= 3) { - err_printf(m, " SIZE: %08x\n", error->plane[i].size); - err_printf(m, " POS: %08x\n", error->plane[i].pos); - } - if (DISPLAY_VER(dev_priv) <= 7 && !IS_HASWELL(dev_priv)) - err_printf(m, " ADDR: %08x\n", error->plane[i].addr); - if (DISPLAY_VER(dev_priv) >= 4) { - err_printf(m, " SURF: %08x\n", error->plane[i].surface); - err_printf(m, " TILEOFF: %08x\n", error->plane[i].tile_offset); - } - - err_printf(m, "Cursor [%d]:\n", i); - err_printf(m, " CNTR: %08x\n", error->cursor[i].control); - err_printf(m, " POS: %08x\n", error->cursor[i].position); - err_printf(m, " BASE: %08x\n", error->cursor[i].base); - } - - for (i = 0; i < ARRAY_SIZE(error->transcoder); i++) { - if (!error->transcoder[i].available) - continue; - - err_printf(m, "CPU transcoder: %s\n", - transcoder_name(error->transcoder[i].cpu_transcoder)); - err_printf(m, " Power: %s\n", - onoff(error->transcoder[i].power_domain_on)); - err_printf(m, " CONF: %08x\n", error->transcoder[i].conf); - err_printf(m, " HTOTAL: %08x\n", error->transcoder[i].htotal); - err_printf(m, " HBLANK: %08x\n", error->transcoder[i].hblank); - err_printf(m, " HSYNC: %08x\n", error->transcoder[i].hsync); - err_printf(m, " VTOTAL: %08x\n", error->transcoder[i].vtotal); - err_printf(m, " VBLANK: %08x\n", error->transcoder[i].vblank); - err_printf(m, " VSYNC: %08x\n", error->transcoder[i].vsync); - } -} - -#endif diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index fc0df4c63e8d..3e11cf3dfa65 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -37,7 +37,6 @@ struct drm_encoder; struct drm_file; struct drm_format_info; struct drm_framebuffer; -struct drm_i915_error_state_buf; struct drm_i915_gem_object; struct drm_i915_private; struct drm_mode_fb_cmd2; @@ -611,11 +610,6 @@ void ilk_pfit_disable(const struct intel_crtc_state *old_crtc_state); int bdw_get_pipemisc_bpp(struct intel_crtc *crtc); unsigned int intel_plane_fence_y_offset(const struct intel_plane_state *plane_state); -struct intel_display_error_state * -intel_display_capture_error_state(struct drm_i915_private *dev_priv); -void intel_display_print_error_state(struct drm_i915_error_state_buf *e, - struct intel_display_error_state *error); - bool intel_format_info_is_yuv_semiplanar(const struct drm_format_info *info, u64 modifier); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index bb181fe5d47e..99ca242ec13b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -36,7 +36,6 @@ #include -#include "display/intel_atomic.h" #include "display/intel_csr.h" #include "display/intel_overlay.h" @@ -808,9 +807,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, if (error->overlay) intel_overlay_print_error_state(m, error->overlay); - if (error->display) - intel_display_print_error_state(m, error->display); - err_print_capabilities(m, error); err_print_params(m, &error->params); } @@ -974,7 +970,6 @@ void __i915_gpu_coredump_free(struct kref *error_ref) } kfree(error->overlay); - kfree(error->display); cleanup_params(error); @@ -1826,7 +1821,6 @@ i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) } error->overlay = intel_overlay_capture_error_state(i915); - error->display = intel_display_capture_error_state(i915); return error; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 16bc42de4b84..eb435f9e0220 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -29,7 +29,6 @@ struct drm_i915_private; struct i915_vma_compress; struct intel_engine_capture_vma; struct intel_overlay_error_state; -struct intel_display_error_state; struct i915_vma_coredump { struct i915_vma_coredump *next; @@ -182,7 +181,6 @@ struct i915_gpu_coredump { struct i915_params params; struct intel_overlay_error_state *overlay; - struct intel_display_error_state *display; struct scatterlist *sgl, *fit; }; -- cgit From 7c653e15e2ba4c476ea9aa1f5ddf14ca3ccacc17 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 6 May 2021 10:38:36 +0300 Subject: drm/i915: Reorder skl+ scaler vs. plane updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When scanning out NV12 if we at any time have the plane enabled while the scaler is disabled we get a pretty catastrophic underrun. Let's reorder the operations so that we try to avoid that happening even if our vblank evade fails and the scaler enable/disable and the plane enable/disable get latched during two diffent frames. This takes care of the most common cases. I suppose there is still at least a theoretical possibility of hitting this if one plane takes the scaler away from another plane before the second plane had a chance to set up another scaler for its use. But that is starting to get a bit complicated, especially since the plane commit order already has to be carefully sequenced to avoid any dbuf overlaps. So plugging this 100% may prove somewhat hard... Cc: Cooper Chiou Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210506073836.14848-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/display/intel_display.c | 30 ++++++++++++++++------ drivers/gpu/drm/i915/display/skl_universal_plane.c | 11 +++++--- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 91e5ae157953..42f63914fa0c 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -9697,8 +9697,6 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, /* on skylake this is done by detaching scalers */ if (DISPLAY_VER(dev_priv) >= 9) { - skl_detach_scalers(new_crtc_state); - if (new_crtc_state->pch_pfit.enabled) skl_pfit_enable(new_crtc_state); } else if (HAS_PCH_SPLIT(dev_priv)) { @@ -9724,8 +9722,8 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, icl_set_pipe_chicken(crtc); } -static void commit_pipe_config(struct intel_atomic_state *state, - struct intel_crtc *crtc) +static void commit_pipe_pre_planes(struct intel_atomic_state *state, + struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); const struct intel_crtc_state *old_crtc_state = @@ -9743,9 +9741,6 @@ static void commit_pipe_config(struct intel_atomic_state *state, new_crtc_state->update_pipe) intel_color_commit(new_crtc_state); - if (DISPLAY_VER(dev_priv) >= 9) - skl_detach_scalers(new_crtc_state); - if (DISPLAY_VER(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) bdw_set_pipemisc(new_crtc_state); @@ -9759,6 +9754,23 @@ static void commit_pipe_config(struct intel_atomic_state *state, dev_priv->display.atomic_update_watermarks(state, crtc); } +static void commit_pipe_post_planes(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + + /* + * Disable the scaler(s) after the plane(s) so that we don't + * get a catastrophic underrun even if the two operations + * end up happening in two different frames. + */ + if (DISPLAY_VER(dev_priv) >= 9 && + !intel_crtc_needs_modeset(new_crtc_state)) + skl_detach_scalers(new_crtc_state); +} + static void intel_enable_crtc(struct intel_atomic_state *state, struct intel_crtc *crtc) { @@ -9810,13 +9822,15 @@ static void intel_update_crtc(struct intel_atomic_state *state, /* Perform vblank evasion around commit operation */ intel_pipe_update_start(new_crtc_state); - commit_pipe_config(state, crtc); + commit_pipe_pre_planes(state, crtc); if (DISPLAY_VER(dev_priv) >= 9) skl_update_planes_on_crtc(state, crtc); else i9xx_update_planes_on_crtc(state, crtc); + commit_pipe_post_planes(state, crtc); + intel_pipe_update_end(new_crtc_state); /* diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 0d34a5ad4e2b..6ad85d7cb219 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1032,6 +1032,14 @@ skl_program_plane(struct intel_plane *plane, if (!drm_atomic_crtc_needs_modeset(&crtc_state->uapi)) intel_psr2_program_plane_sel_fetch(plane, crtc_state, plane_state, color_plane); + /* + * Enable the scaler before the plane so that we don't + * get a catastrophic underrun even if the two operations + * end up happening in two different frames. + */ + if (plane_state->scaler_id >= 0) + skl_program_plane_scaler(plane, crtc_state, plane_state); + /* * The control register self-arms if the plane was previously * disabled. Try to make the plane enable atomic by writing @@ -1041,9 +1049,6 @@ skl_program_plane(struct intel_plane *plane, intel_de_write_fw(dev_priv, PLANE_SURF(pipe, plane_id), intel_plane_ggtt_offset(plane_state) + surf_addr); - if (plane_state->scaler_id >= 0) - skl_program_plane_scaler(plane, crtc_state, plane_state); - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -- cgit From 8398024b6e888518dff688e627328a9db5b48f98 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 6 May 2021 19:19:21 +0300 Subject: drm/i915/xelpd: add XE_LPD display characteristics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's start preparing for upcoming platforms that will use an XE_LPD design. v2: - Use the now-preferred "XE_LPD" term to refer to this design - Utilize DISPLAY_VER() rather than a feature flag - Drop unused mbus_size field (Lucas) v3: - Adjust for dbuf.{size,slice_mask} (Ville) Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza (v2) Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20210506161930.309688-2-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display_power.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index f3ca5d5c9778..acf47252d9e7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -380,6 +380,8 @@ intel_display_power_put_all_in_set(struct drm_i915_private *i915, enum dbuf_slice { DBUF_S1, DBUF_S2, + DBUF_S3, + DBUF_S4, I915_MAX_DBUF_SLICES }; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 7786217638ed..1417e26bb1b6 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -939,6 +939,16 @@ static const struct intel_device_info adl_s_info = { .dma_mask_size = 46, }; +#define XE_LPD_FEATURES \ + .display.ver = 13, \ + .display.has_psr_hw_tracking = 0, \ + .abox_mask = GENMASK(1, 0), \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \ + .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ + BIT(TRANSCODER_C) | BIT(TRANSCODER_D), \ + .dbuf.size = 4096, \ + .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2) | BIT(DBUF_S3) | BIT(DBUF_S4) + #undef GEN #undef PLATFORM -- cgit From 760759f2cf711be86f360e6a7cdd01a9d4252b8e Mon Sep 17 00:00:00 2001 From: Clinton Taylor Date: Thu, 6 May 2021 19:19:22 +0300 Subject: drm/i915/adl_p: Add PCI Devices IDs Add 18 known PCI device IDs Bspec: 55376 Cc: Caz Yokoyama Cc: Matt Atwood Cc: Matt Roper Signed-off-by: Clinton Taylor Signed-off-by: Matt Roper Reviewed-by: Anusha Srivatsa Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20210506161930.309688-3-imre.deak@intel.com --- include/drm/i915_pciids.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 3be25768321d..eee18fa53b54 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -647,4 +647,25 @@ INTEL_VGA_DEVICE(0x4692, info), \ INTEL_VGA_DEVICE(0x4693, info) +/* ADL-P */ +#define INTEL_ADLP_IDS(info) \ + INTEL_VGA_DEVICE(0x46A0, info), \ + INTEL_VGA_DEVICE(0x46A1, info), \ + INTEL_VGA_DEVICE(0x46A2, info), \ + INTEL_VGA_DEVICE(0x46A3, info), \ + INTEL_VGA_DEVICE(0x46A6, info), \ + INTEL_VGA_DEVICE(0x46A8, info), \ + INTEL_VGA_DEVICE(0x46AA, info), \ + INTEL_VGA_DEVICE(0x462A, info), \ + INTEL_VGA_DEVICE(0x4626, info), \ + INTEL_VGA_DEVICE(0x4628, info), \ + INTEL_VGA_DEVICE(0x46B0, info), \ + INTEL_VGA_DEVICE(0x46B1, info), \ + INTEL_VGA_DEVICE(0x46B2, info), \ + INTEL_VGA_DEVICE(0x46B3, info), \ + INTEL_VGA_DEVICE(0x46C0, info), \ + INTEL_VGA_DEVICE(0x46C1, info), \ + INTEL_VGA_DEVICE(0x46C2, info), \ + INTEL_VGA_DEVICE(0x46C3, info) + #endif /* _I915_PCIIDS_H */ -- cgit From bdd27cad22379a665fe246a0f9b37acb502df0fe Mon Sep 17 00:00:00 2001 From: Clinton Taylor Date: Thu, 6 May 2021 19:19:23 +0300 Subject: drm/i915/adl_p: ADL_P device info enabling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ADL-P to the device_info table and support MACROS. Bspec: 49185, 55372, 55373 Cc: Matt Atwood Cc: Matt Roper Signed-off-by: Clinton Taylor Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20210506161930.309688-4-imre.deak@intel.com --- arch/x86/kernel/early-quirks.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_pci.c | 12 ++++++++++++ drivers/gpu/drm/i915/intel_device_info.c | 1 + drivers/gpu/drm/i915/intel_device_info.h | 1 + 5 files changed, 16 insertions(+) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 6edd1e2ee8af..b553ffe9b985 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -552,6 +552,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_TGL_12_IDS(&gen11_early_ops), INTEL_RKL_IDS(&gen11_early_ops), INTEL_ADLS_IDS(&gen11_early_ops), + INTEL_ADLP_IDS(&gen11_early_ops), }; struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e20294e9227a..e5513e19beb5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1392,6 +1392,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ROCKETLAKE) #define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG1) #define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_S) +#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_P) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) #define IS_BDW_ULT(dev_priv) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 1417e26bb1b6..1fd1b482ec9f 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -949,6 +949,17 @@ static const struct intel_device_info adl_s_info = { .dbuf.size = 4096, \ .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2) | BIT(DBUF_S3) | BIT(DBUF_S4) +static const struct intel_device_info adl_p_info = { + GEN12_FEATURES, + XE_LPD_FEATURES, + PLATFORM(INTEL_ALDERLAKE_P), + .require_force_probe = 1, + .platform_engine_mask = + BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), + .ppgtt_size = 48, + .dma_mask_size = 39, +}; + #undef GEN #undef PLATFORM @@ -1026,6 +1037,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_TGL_12_IDS(&tgl_info), INTEL_RKL_IDS(&rkl_info), INTEL_ADLS_IDS(&adl_s_info), + INTEL_ADLP_IDS(&adl_p_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 6a351a709417..3b975ce1ff59 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -67,6 +67,7 @@ static const char * const platform_names[] = { PLATFORM_NAME(ROCKETLAKE), PLATFORM_NAME(DG1), PLATFORM_NAME(ALDERLAKE_S), + PLATFORM_NAME(ALDERLAKE_P), }; #undef PLATFORM_NAME diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 6aefe4fde197..e98b36959736 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -87,6 +87,7 @@ enum intel_platform { INTEL_ROCKETLAKE, INTEL_DG1, INTEL_ALDERLAKE_S, + INTEL_ALDERLAKE_P, INTEL_MAX_PLATFORMS }; -- cgit From 33e7a975103cebb20e7dee743adc8f9335958139 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 6 May 2021 19:19:24 +0300 Subject: drm/i915/xelpd: First stab at DPT support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for DPT (display page table). DPT is a slightly peculiar two level page table scheme used for tiled scanout buffers (linear uses direct ggtt mapping still). The plane surface address will point at a page in the DPT which holds the PTEs for 512 actual pages. Thus we require 1/512 of the ggttt address space compared to a direct ggtt mapping. We create a new DPT address space for each framebuffer and track two vmas (one for the DPT, another for the ggtt). TODO: - Is the i915_address_space approaach sane? - Maybe don't map the whole DPT to write the PTEs? - Deal with remapping/rotation? Need to create a separate DPT for each remapped/rotated plane I guess. Or else we'd need to make the per-fb DPT large enough to support potentially several remapped/rotated vmas. How large should that be? Signed-off-by: Ville Syrjälä Signed-off-by: Bommu Krishnaiah Cc: Wilson Chris P Cc: Tang CQ Cc: Auld Matthew Reviewed-by: Uma Shankar Reviewed-by: Wilson Chris P Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20210506161930.309688-5-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_atomic_plane.c | 7 +- drivers/gpu/drm/i915/display/intel_display.c | 351 ++++++++++++++++++++- drivers/gpu/drm/i915/display/intel_display.h | 1 + drivers/gpu/drm/i915/display/intel_display_types.h | 15 +- drivers/gpu/drm/i915/display/intel_fbc.c | 6 +- drivers/gpu/drm/i915/display/skl_universal_plane.c | 19 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 7 + drivers/gpu/drm/i915/gt/intel_ggtt.c | 7 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 5 + 9 files changed, 392 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index 7bfb26ca0bd0..36f52a1d7552 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -102,7 +102,8 @@ intel_plane_duplicate_state(struct drm_plane *plane) __drm_atomic_helper_plane_duplicate_state(plane, &intel_state->uapi); - intel_state->vma = NULL; + intel_state->ggtt_vma = NULL; + intel_state->dpt_vma = NULL; intel_state->flags = 0; /* add reference to fb */ @@ -125,7 +126,9 @@ intel_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { struct intel_plane_state *plane_state = to_intel_plane_state(state); - drm_WARN_ON(plane->dev, plane_state->vma); + + drm_WARN_ON(plane->dev, plane_state->ggtt_vma); + drm_WARN_ON(plane->dev, plane_state->dpt_vma); __drm_atomic_helper_plane_destroy_state(&plane_state->uapi); if (plane_state->hw.fb) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 42f63914fa0c..c978d5d41472 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -63,9 +63,11 @@ #include "display/intel_vdsc.h" #include "display/intel_vrr.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_object.h" #include "gt/intel_rps.h" +#include "gt/gen8_ppgtt.h" #include "g4x_dp.h" #include "g4x_hdmi.h" @@ -123,6 +125,176 @@ static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state); static void intel_modeset_setup_hw_state(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx); +struct i915_dpt { + struct i915_address_space vm; + + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + void __iomem *iomem; +}; + +#define i915_is_dpt(vm) ((vm)->is_dpt) + +static inline struct i915_dpt * +i915_vm_to_dpt(struct i915_address_space *vm) +{ + BUILD_BUG_ON(offsetof(struct i915_dpt, vm)); + GEM_BUG_ON(!i915_is_dpt(vm)); + return container_of(vm, struct i915_dpt, vm); +} + +#define dpt_total_entries(dpt) ((dpt)->vm.total >> PAGE_SHIFT) + +static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) +{ + writeq(pte, addr); +} + +static void dpt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + struct i915_dpt *dpt = i915_vm_to_dpt(vm); + gen8_pte_t __iomem *base = dpt->iomem; + + gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE, + vm->pte_encode(addr, level, flags)); +} + +static void dpt_insert_entries(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level level, + u32 flags) +{ + struct i915_dpt *dpt = i915_vm_to_dpt(vm); + gen8_pte_t __iomem *base = dpt->iomem; + const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags); + struct sgt_iter sgt_iter; + dma_addr_t addr; + int i; + + /* + * Note that we ignore PTE_READ_ONLY here. The caller must be careful + * not to allow the user to override access to a read only page. + */ + + i = vma->node.start / I915_GTT_PAGE_SIZE; + for_each_sgt_daddr(addr, sgt_iter, vma->pages) + gen8_set_pte(&base[i++], pte_encode | addr); +} + +static void dpt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ +} + +static void dpt_bind_vma(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + struct drm_i915_gem_object *obj = vma->obj; + u32 pte_flags; + + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ + pte_flags = 0; + if (vma->vm->has_read_only && i915_gem_object_is_readonly(obj)) + pte_flags |= PTE_READ_ONLY; + if (i915_gem_object_is_lmem(obj)) + pte_flags |= PTE_LM; + + vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + + /* + * Without aliasing PPGTT there's no difference between + * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally + * upgrade to both bound if we bind either to avoid double-binding. + */ + atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); +} + +static void dpt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) +{ + vm->clear_range(vm, vma->node.start, vma->size); +} + +static void dpt_cleanup(struct i915_address_space *vm) +{ + struct i915_dpt *dpt = i915_vm_to_dpt(vm); + + i915_gem_object_put(dpt->obj); +} + +static struct i915_address_space * +intel_dpt_create(struct drm_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->dev); + size_t size = DIV_ROUND_UP_ULL(obj->size, 512); + struct drm_i915_gem_object *dpt_obj; + struct i915_address_space *vm; + struct i915_dpt *dpt; + int ret; + + size = round_up(size, 4096); + + if (HAS_LMEM(i915)) + dpt_obj = i915_gem_object_create_lmem(i915, size, 0); + else + dpt_obj = i915_gem_object_create_stolen(i915, size); + if (IS_ERR(dpt_obj)) + return ERR_CAST(dpt_obj); + + ret = i915_gem_object_set_cache_level(dpt_obj, I915_CACHE_NONE); + if (ret) { + i915_gem_object_put(dpt_obj); + return ERR_PTR(ret); + } + + dpt = kzalloc(sizeof(*dpt), GFP_KERNEL); + if (!dpt) { + i915_gem_object_put(dpt_obj); + return ERR_PTR(-ENOMEM); + } + + vm = &dpt->vm; + + vm->gt = &i915->gt; + vm->i915 = i915; + vm->dma = i915->drm.dev; + vm->total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; + vm->is_dpt = true; + + i915_address_space_init(vm, VM_CLASS_DPT); + + vm->insert_page = dpt_insert_page; + vm->clear_range = dpt_clear_range; + vm->insert_entries = dpt_insert_entries; + vm->cleanup = dpt_cleanup; + + vm->vma_ops.bind_vma = dpt_bind_vma; + vm->vma_ops.unbind_vma = dpt_unbind_vma; + vm->vma_ops.set_pages = ggtt_set_pages; + vm->vma_ops.clear_pages = clear_pages; + + vm->pte_encode = gen8_ggtt_pte_encode; + + dpt->obj = dpt_obj; + + return &dpt->vm; +} + +static void intel_dpt_destroy(struct i915_address_space *vm) +{ + struct i915_dpt *dpt = i915_vm_to_dpt(vm); + + i915_vm_close(&dpt->vm); +} + /* returns HPLL frequency in kHz */ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv) { @@ -973,6 +1145,9 @@ unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, { struct drm_i915_private *dev_priv = to_i915(fb->dev); + if (intel_fb_uses_dpt(fb)) + return 512 * 4096; + /* AUX_DIST needs only 4K alignment */ if (is_ccs_plane(fb, color_plane)) return 4096; @@ -1026,6 +1201,62 @@ static bool intel_plane_uses_fence(const struct intel_plane_state *plane_state) plane_state->view.gtt.type == I915_GGTT_VIEW_NORMAL); } +static struct i915_vma * +intel_pin_fb_obj_dpt(struct drm_framebuffer *fb, + const struct i915_ggtt_view *view, + bool uses_fence, + unsigned long *out_flags, + struct i915_address_space *vm) +{ + struct drm_device *dev = fb->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_gem_object *obj = intel_fb_obj(fb); + struct i915_vma *vma; + u32 alignment; + int ret; + + if (WARN_ON(!i915_gem_object_is_framebuffer(obj))) + return ERR_PTR(-EINVAL); + + alignment = 4096 * 512; + + atomic_inc(&dev_priv->gpu_error.pending_fb_pin); + + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); + if (ret) { + vma = ERR_PTR(ret); + goto err; + } + + vma = i915_vma_instance(obj, vm, view); + if (IS_ERR(vma)) + goto err; + + if (i915_vma_misplaced(vma, 0, alignment, 0)) { + ret = i915_vma_unbind(vma); + if (ret) { + vma = ERR_PTR(ret); + goto err; + } + } + + ret = i915_vma_pin(vma, 0, alignment, PIN_GLOBAL); + if (ret) { + vma = ERR_PTR(ret); + goto err; + } + + vma->display_alignment = max_t(u64, vma->display_alignment, alignment); + + i915_gem_object_flush_if_display(obj); + + i915_vma_get(vma); +err: + atomic_dec(&dev_priv->gpu_error.pending_fb_pin); + + return vma; +} + struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, bool phys_cursor, @@ -1629,6 +1860,49 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc, intel_wait_for_vblank(dev_priv, crtc->pipe); } +static struct i915_vma *intel_dpt_pin(struct i915_address_space *vm) +{ + struct drm_i915_private *i915 = vm->i915; + struct i915_dpt *dpt = i915_vm_to_dpt(vm); + intel_wakeref_t wakeref; + struct i915_vma *vma; + void __iomem *iomem; + + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + atomic_inc(&i915->gpu_error.pending_fb_pin); + + vma = i915_gem_object_ggtt_pin(dpt->obj, NULL, 0, 4096, + HAS_LMEM(i915) ? 0 : PIN_MAPPABLE); + if (IS_ERR(vma)) + goto err; + + iomem = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(iomem)) { + vma = iomem; + goto err; + } + + dpt->vma = vma; + dpt->iomem = iomem; + + i915_vma_get(vma); + +err: + atomic_dec(&i915->gpu_error.pending_fb_pin); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + + return vma; +} + +static void intel_dpt_unpin(struct i915_address_space *vm) +{ + struct i915_dpt *dpt = i915_vm_to_dpt(vm); + + i915_vma_unpin_iomap(dpt->vma); + i915_vma_put(dpt->vma); +} + static void intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct intel_initial_plane_config *plane_config) @@ -1674,12 +1948,12 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, continue; state = to_intel_plane_state(c->primary->state); - if (!state->vma) + if (!state->ggtt_vma) continue; if (intel_plane_ggtt_offset(state) == plane_config->base) { fb = state->hw.fb; - vma = state->vma; + vma = state->ggtt_vma; goto valid_fb; } } @@ -1706,7 +1980,7 @@ valid_fb: &intel_state->view); __i915_vma_pin(vma); - intel_state->vma = i915_vma_get(vma); + intel_state->ggtt_vma = i915_vma_get(vma); if (intel_plane_uses_fence(intel_state) && i915_vma_pin_fence(vma) == 0) if (vma->fence) intel_state->flags |= PLANE_HAS_FENCE; @@ -10564,25 +10838,60 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state) plane->id == PLANE_CURSOR && INTEL_INFO(dev_priv)->display.cursor_needs_physical; - vma = intel_pin_and_fence_fb_obj(fb, phys_cursor, - &plane_state->view.gtt, - intel_plane_uses_fence(plane_state), - &plane_state->flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); + if (!intel_fb_uses_dpt(fb)) { + vma = intel_pin_and_fence_fb_obj(fb, phys_cursor, + &plane_state->view.gtt, + intel_plane_uses_fence(plane_state), + &plane_state->flags); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + plane_state->ggtt_vma = vma; + } else { + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); - plane_state->vma = vma; + vma = intel_dpt_pin(intel_fb->dpt_vm); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + plane_state->ggtt_vma = vma; + + vma = intel_pin_fb_obj_dpt(fb, &plane_state->view.gtt, false, + &plane_state->flags, intel_fb->dpt_vm); + if (IS_ERR(vma)) { + intel_dpt_unpin(intel_fb->dpt_vm); + plane_state->ggtt_vma = NULL; + return PTR_ERR(vma); + } + + plane_state->dpt_vma = vma; + + WARN_ON(plane_state->ggtt_vma == plane_state->dpt_vma); + } return 0; } void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) { + struct drm_framebuffer *fb = old_plane_state->hw.fb; struct i915_vma *vma; - vma = fetch_and_zero(&old_plane_state->vma); - if (vma) - intel_unpin_fb_vma(vma, old_plane_state->flags); + if (!intel_fb_uses_dpt(fb)) { + vma = fetch_and_zero(&old_plane_state->ggtt_vma); + if (vma) + intel_unpin_fb_vma(vma, old_plane_state->flags); + } else { + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + + vma = fetch_and_zero(&old_plane_state->dpt_vma); + if (vma) + intel_unpin_fb_vma(vma, old_plane_state->flags); + + vma = fetch_and_zero(&old_plane_state->ggtt_vma); + if (vma) + intel_dpt_unpin(intel_fb->dpt_vm); + } } /** @@ -11105,6 +11414,10 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); drm_framebuffer_cleanup(fb); + + if (intel_fb_uses_dpt(fb)) + intel_dpt_destroy(intel_fb->dpt_vm); + intel_frontbuffer_put(intel_fb->frontbuffer); kfree(intel_fb); @@ -11275,6 +11588,18 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (ret) goto err; + if (intel_fb_uses_dpt(fb)) { + struct i915_address_space *vm; + + vm = intel_dpt_create(&obj->base); + if (IS_ERR(vm)) { + ret = PTR_ERR(vm); + goto err; + } + + intel_fb->dpt_vm = vm; + } + ret = drm_framebuffer_init(&dev_priv->drm, fb, &intel_fb_funcs); if (ret) { drm_err(&dev_priv->drm, "framebuffer init failed %d\n", ret); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 3e11cf3dfa65..e7764e746c6a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -43,6 +43,7 @@ struct drm_mode_fb_cmd2; struct drm_modeset_acquire_ctx; struct drm_plane; struct drm_plane_state; +struct i915_address_space; struct i915_ggtt_view; struct intel_atomic_state; struct intel_crtc; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 6d8cdaa36748..7fe96777dc67 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -128,6 +128,8 @@ struct intel_framebuffer { struct intel_fb_view normal_view; struct intel_fb_view rotated_view; struct intel_fb_view remapped_view; + + struct i915_address_space *dpt_vm; }; struct intel_fbdev { @@ -610,7 +612,8 @@ struct intel_plane_state { enum drm_scaling_filter scaling_filter; } hw; - struct i915_vma *vma; + struct i915_vma *ggtt_vma; + struct i915_vma *dpt_vma; unsigned long flags; #define PLANE_HAS_FENCE BIT(0) @@ -1972,9 +1975,15 @@ intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, enum pipe pip intel_wait_for_vblank(dev_priv, pipe); } -static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state) +static inline bool intel_fb_uses_dpt(const struct drm_framebuffer *fb) +{ + return fb && DISPLAY_VER(to_i915(fb->dev)) >= 13 && + fb->modifier != DRM_FORMAT_MOD_LINEAR; +} + +static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *plane_state) { - return i915_ggtt_offset(state->vma); + return i915_ggtt_offset(plane_state->ggtt_vma); } static inline struct intel_frontbuffer * diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index b2f3ac846f5b..1847a161cb37 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -737,11 +737,11 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->fence_y_offset = intel_plane_fence_y_offset(plane_state); drm_WARN_ON(&dev_priv->drm, plane_state->flags & PLANE_HAS_FENCE && - !plane_state->vma->fence); + !plane_state->ggtt_vma->fence); if (plane_state->flags & PLANE_HAS_FENCE && - plane_state->vma->fence) - cache->fence_id = plane_state->vma->fence->id; + plane_state->ggtt_vma->fence) + cache->fence_id = plane_state->ggtt_vma->fence->id; else cache->fence_id = -1; diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 6ad85d7cb219..e8e61237e9a5 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -934,6 +934,21 @@ static u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, return plane_color_ctl; } +static u32 skl_surf_address(const struct intel_plane_state *plane_state, + int color_plane) +{ + const struct drm_framebuffer *fb = plane_state->hw.fb; + u32 offset = plane_state->view.color_plane[color_plane].offset; + + if (intel_fb_uses_dpt(fb)) { + WARN_ON(offset & 0x1fffff); + return offset >> 9; + } else { + WARN_ON(offset & 0xfff); + return offset; + } +} + static void skl_program_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, @@ -944,7 +959,7 @@ skl_program_plane(struct intel_plane *plane, enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - u32 surf_addr = plane_state->view.color_plane[color_plane].offset; + u32 surf_addr = skl_surf_address(plane_state, color_plane); u32 stride = skl_plane_stride(plane_state, color_plane); const struct drm_framebuffer *fb = plane_state->hw.fb; int aux_plane = skl_main_to_aux_plane(fb, color_plane); @@ -983,7 +998,7 @@ skl_program_plane(struct intel_plane *plane, } if (aux_plane) { - aux_dist = plane_state->view.color_plane[aux_plane].offset - surf_addr; + aux_dist = skl_surf_address(plane_state, aux_plane) - surf_addr; if (DISPLAY_VER(dev_priv) < 12) aux_dist |= skl_plane_stride(plane_state, aux_plane); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h index 76a08b9c1f5c..b9028c2ad3c7 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h @@ -6,8 +6,15 @@ #ifndef __GEN8_PPGTT_H__ #define __GEN8_PPGTT_H__ +#include + +struct i915_address_space; struct intel_gt; +enum i915_cache_level; struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt); +u64 gen8_ggtt_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags); #endif diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 670c1271e7d5..38742bf33fa3 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -18,6 +18,7 @@ #include "i915_vgpu.h" #include "intel_gtt.h" +#include "gen8_ppgtt.h" static int i915_get_ggtt_vma_pages(struct i915_vma *vma); @@ -187,9 +188,9 @@ static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) intel_gtt_chipset_flush(); } -static u64 gen8_ggtt_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) +u64 gen8_ggtt_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) { gen8_pte_t pte = addr | _PAGE_PRESENT; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index e67e34e17913..79f565aeb8c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -245,6 +245,7 @@ struct i915_address_space { struct dma_resv resv; /* reservation lock for all pd objects, and buffer pool */ #define VM_CLASS_GGTT 0 #define VM_CLASS_PPGTT 1 +#define VM_CLASS_DPT 2 struct drm_i915_gem_object *scratch[4]; /** @@ -255,6 +256,9 @@ struct i915_address_space { /* Global GTT */ bool is_ggtt:1; + /* Display page table */ + bool is_dpt:1; + /* Some systems support read-only mappings for GGTT and/or PPGTT */ bool has_read_only:1; @@ -351,6 +355,7 @@ struct i915_ppgtt { }; #define i915_is_ggtt(vm) ((vm)->is_ggtt) +#define i915_is_dpt(vm) ((vm)->is_dpt) int __must_check i915_vm_lock_objects(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww); -- cgit From ad2f92c9f0e4c66e28ac5f3bc9df3bf77f871c51 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 6 May 2021 19:19:25 +0300 Subject: drm/i915/xelpd: Fallback to plane stride limitations when using DPT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GTT remapping allow us to have planes with strides larger than HW supports but DPT + GTT remapping is still not properly handled so falling back to plane HW limitations for now. This patch can be dropped when DPT + GTT remapping is correctly handled but until then we need this limitation for all display13 platforms to avoid pipe faults. Cc: Ville Syrjälä Cc: Clint Taylor Cc: Matt Roper Suggested-by: Ville Syrjälä Signed-off-by: José Roberto de Souza Reviewed-by: Matt Roper Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20210506161930.309688-6-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 15 +++++++-------- drivers/gpu/drm/i915/display/intel_display_types.h | 8 ++++++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c978d5d41472..53dbf3d0f3b2 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1607,14 +1607,13 @@ u32 intel_fb_max_stride(struct drm_i915_private *dev_priv, * * The new CCS hash mode makes remapping impossible */ - if (!is_ccs_modifier(modifier)) { - if (DISPLAY_VER(dev_priv) >= 7) - return 256*1024; - else if (DISPLAY_VER(dev_priv) >= 4) - return 128*1024; - } - - return intel_plane_fb_max_stride(dev_priv, pixel_format, modifier); + if (DISPLAY_VER(dev_priv) < 4 || is_ccs_modifier(modifier) || + intel_modifier_uses_dpt(dev_priv, modifier)) + return intel_plane_fb_max_stride(dev_priv, pixel_format, modifier); + else if (DISPLAY_VER(dev_priv) >= 7) + return 256 * 1024; + else + return 128 * 1024; } static u32 diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 7fe96777dc67..5f4c3e5beb2f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1975,10 +1975,14 @@ intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, enum pipe pip intel_wait_for_vblank(dev_priv, pipe); } +static inline bool intel_modifier_uses_dpt(struct drm_i915_private *i915, u64 modifier) +{ + return DISPLAY_VER(i915) >= 13 && modifier != DRM_FORMAT_MOD_LINEAR; +} + static inline bool intel_fb_uses_dpt(const struct drm_framebuffer *fb) { - return fb && DISPLAY_VER(to_i915(fb->dev)) >= 13 && - fb->modifier != DRM_FORMAT_MOD_LINEAR; + return fb && intel_modifier_uses_dpt(to_i915(fb->dev), fb->modifier); } static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *plane_state) -- cgit From e7367af1ee3df07820de61e9b9896c4979f4132c Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkilä Date: Thu, 6 May 2021 19:19:26 +0300 Subject: drm/i915/xelpd: Support 128k plane stride MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XE_LPD supports plane strides up to 128KB. Cc: Vandita Kulkarni Signed-off-by: Juha-Pekka Heikkilä Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20210506161930.309688-7-imre.deak@intel.com --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 46 +++++++++++++++++----- drivers/gpu/drm/i915/i915_reg.h | 2 + 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index e8e61237e9a5..1261ed9d277f 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -482,17 +482,35 @@ skl_plane_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, unsigned int rotation) { + struct drm_i915_private *i915 = to_i915(plane->base.dev); const struct drm_format_info *info = drm_format_info(pixel_format); int cpp = info->cpp[0]; + int max_horizontal_pixels = 8192; + int max_stride_bytes; + + if (DISPLAY_VER(i915) >= 13) { + /* + * The stride in bytes must not exceed of the size + * of 128K bytes. For pixel formats of 64bpp will allow + * for a 16K pixel surface. + */ + max_stride_bytes = 131072; + if (cpp == 8) + max_horizontal_pixels = 16384; + else + max_horizontal_pixels = 65536; + } else { + /* + * "The stride in bytes must not exceed the + * of the size of 8K pixels and 32K bytes." + */ + max_stride_bytes = 32768; + } - /* - * "The stride in bytes must not exceed the - * of the size of 8K pixels and 32K bytes." - */ if (drm_rotation_90_or_270(rotation)) - return min(8192, 32768 / cpp); + return min(max_horizontal_pixels, max_stride_bytes / cpp); else - return min(8192 * cpp, 32768); + return min(max_horizontal_pixels * cpp, max_stride_bytes); } @@ -1457,7 +1475,10 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) } } - drm_WARN_ON(&dev_priv->drm, x > 8191 || y > 8191); + if (DISPLAY_VER(dev_priv) >= 13) + drm_WARN_ON(&dev_priv->drm, x > 65535 || y > 65535); + else + drm_WARN_ON(&dev_priv->drm, x > 8191 || y > 8191); plane_state->view.color_plane[0].offset = offset; plane_state->view.color_plane[0].x = x; @@ -1531,7 +1552,10 @@ static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) } } - drm_WARN_ON(&i915->drm, x > 8191 || y > 8191); + if (DISPLAY_VER(i915) >= 13) + drm_WARN_ON(&i915->drm, x > 65535 || y > 65535); + else + drm_WARN_ON(&i915->drm, x > 8191 || y > 8191); plane_state->view.color_plane[uv_plane].offset = offset; plane_state->view.color_plane[uv_plane].x = x; @@ -2244,7 +2268,11 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, val = intel_de_read(dev_priv, PLANE_STRIDE(pipe, plane_id)); stride_mult = skl_plane_stride_mult(fb, 0, DRM_MODE_ROTATE_0); - fb->pitches[0] = (val & 0x3ff) * stride_mult; + + if (DISPLAY_VER(dev_priv) >= 13) + fb->pitches[0] = (val & PLANE_STRIDE_MASK_XELPD) * stride_mult; + else + fb->pitches[0] = (val & PLANE_STRIDE_MASK) * stride_mult; aligned_height = intel_fb_align_height(fb, 0, fb->height); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a47399781773..db94616b1972 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7206,6 +7206,8 @@ enum { _PIPE(pipe, _PLANE_STRIDE_3_A, _PLANE_STRIDE_3_B) #define PLANE_STRIDE(pipe, plane) \ _MMIO_PLANE(plane, _PLANE_STRIDE_1(pipe), _PLANE_STRIDE_2(pipe)) +#define PLANE_STRIDE_MASK REG_GENMASK(10, 0) +#define PLANE_STRIDE_MASK_XELPD REG_GENMASK(11, 0) #define _PLANE_POS_1_B 0x7118c #define _PLANE_POS_2_B 0x7128c -- cgit From 2ac5620fbe31d4a2364b82390deaf59892a0d4f4 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Thu, 6 May 2021 19:19:27 +0300 Subject: drm/i915/adl_p: Add stride restriction when using DPT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alderlake-P have a new stride restriction when using DPT and it is used by non linear framebuffers. Stride needs to be a power of two to take full DPT rows, but stride is a parameter set by userspace. What we could do is use a fake stride when doing DPT allocation so HW requirements are met and userspace don't need to be changed to met this power of two restrictions but this change will take a while to be implemented so for now adding this restriction in driver to reject atomic commits that would cause visual corruptions. BSpec: 53393 Acked-by: Matt Roper Cc: Matt Roper Cc: Ville Syrjälä Cc: Stanislav Lisovskiy Signed-off-by: José Roberto de Souza Signed-off-by: Imre Deak Reviewed-by: Clint Taylor Link: https://patchwork.freedesktop.org/patch/msgid/20210506161930.309688-8-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 53dbf3d0f3b2..bd6655045118 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11580,6 +11580,15 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, } } + if (IS_ALDERLAKE_P(dev_priv) && + mode_cmd->modifier[i] != DRM_FORMAT_MOD_LINEAR && + !is_power_of_2(mode_cmd->pitches[i])) { + drm_dbg_kms(&dev_priv->drm, + "plane %d pitch (%d) must be power of two for tiled buffers\n", + i, mode_cmd->pitches[i]); + goto err; + } + fb->obj[i] = &obj->base; } -- cgit From d3b4aa43eea3aebea51bfb02ca5ce93df0abc9c4 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 6 May 2021 19:19:28 +0300 Subject: drm/i915/adl_p: Disable support for 90/270 FB rotation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The latest specification removed the support for 90/270 FB rotation on ADL_P, even though legacy Y-tiled surfaces are supported. Align the code accordingly. Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210506161930.309688-9-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display_types.h | 6 ++++-- drivers/gpu/drm/i915/display/intel_fb.c | 24 +++++++++++++++++----- drivers/gpu/drm/i915/display/intel_fb.h | 2 ++ drivers/gpu/drm/i915/display/skl_universal_plane.c | 3 +-- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 5f4c3e5beb2f..9c0adfc60c6f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -126,8 +126,10 @@ struct intel_framebuffer { /* Params to remap the FB pages and program the plane registers in each view. */ struct intel_fb_view normal_view; - struct intel_fb_view rotated_view; - struct intel_fb_view remapped_view; + union { + struct intel_fb_view rotated_view; + struct intel_fb_view remapped_view; + }; struct i915_address_space *dpt_vm; }; diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 21271a6976f1..927440ed14f4 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -735,6 +735,15 @@ static void intel_fb_view_init(struct intel_fb_view *view, enum i915_ggtt_view_t view->gtt.type = view_type; } +bool intel_fb_supports_90_270_rotation(const struct intel_framebuffer *fb) +{ + if (DISPLAY_VER(to_i915(fb->base.dev)) >= 13) + return false; + + return fb->base.modifier == I915_FORMAT_MOD_Y_TILED || + fb->base.modifier == I915_FORMAT_MOD_Yf_TILED; +} + int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer *fb) { struct drm_i915_gem_object *obj = intel_fb_obj(&fb->base); @@ -745,8 +754,15 @@ int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer * unsigned int tile_size = intel_tile_size(i915); intel_fb_view_init(&fb->normal_view, I915_GGTT_VIEW_NORMAL); - intel_fb_view_init(&fb->rotated_view, I915_GGTT_VIEW_ROTATED); - intel_fb_view_init(&fb->remapped_view, I915_GGTT_VIEW_REMAPPED); + + drm_WARN_ON(&i915->drm, + intel_fb_supports_90_270_rotation(fb) && + intel_fb_needs_pot_stride_remap(fb)); + + if (intel_fb_supports_90_270_rotation(fb)) + intel_fb_view_init(&fb->rotated_view, I915_GGTT_VIEW_ROTATED); + if (intel_fb_needs_pot_stride_remap(fb)) + intel_fb_view_init(&fb->remapped_view, I915_GGTT_VIEW_REMAPPED); for (i = 0; i < num_planes; i++) { struct fb_plane_view_dims view_dims; @@ -787,9 +803,7 @@ int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer * offset = calc_plane_aligned_offset(fb, i, &x, &y); - /* Y or Yf modifiers required for 90/270 rotation */ - if (fb->base.modifier == I915_FORMAT_MOD_Y_TILED || - fb->base.modifier == I915_FORMAT_MOD_Yf_TILED) + if (intel_fb_supports_90_270_rotation(fb)) gtt_offset_rotated += calc_plane_remap_info(fb, i, &view_dims, offset, gtt_offset_rotated, x, y, &fb->rotated_view); diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index 7cec77bb5046..d77d9f914cf4 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -45,6 +45,8 @@ u32 intel_plane_compute_aligned_offset(int *x, int *y, const struct intel_plane_state *state, int color_plane); +bool intel_fb_supports_90_270_rotation(const struct intel_framebuffer *fb); + int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer *fb); void intel_fb_fill_view(const struct intel_framebuffer *fb, unsigned int rotation, struct intel_fb_view *view); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 1261ed9d277f..8588b70a8241 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1166,8 +1166,7 @@ static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state, } if (drm_rotation_90_or_270(rotation)) { - if (fb->modifier != I915_FORMAT_MOD_Y_TILED && - fb->modifier != I915_FORMAT_MOD_Yf_TILED) { + if (!intel_fb_supports_90_270_rotation(to_intel_framebuffer(fb))) { drm_dbg_kms(&dev_priv->drm, "Y/Yf tiling required for 90/270!\n"); return -EINVAL; -- cgit From f6b044e4d9fb3b2b00681c96a3fc438e9c281ef2 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 6 May 2021 19:19:29 +0300 Subject: drm/i915/adl_p: Require a minimum of 8 tiles stride for DPT FBs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The specification only requires DPT FB strides to be POT aligned, but there seems to be also a minimum of 8 stride tile requirement. Scanning out FBs with < 8 stride tiles will result in pipe faults (even though the stride is POT aligned). Signed-off-by: Imre Deak Acked-by: Ville Syrjälä Reviewed-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20210506161930.309688-10-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_fb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 927440ed14f4..29c558fbb397 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -601,7 +601,11 @@ plane_view_dst_stride_tiles(const struct intel_framebuffer *fb, int color_plane, unsigned int pitch_tiles) { if (intel_fb_needs_pot_stride_remap(fb)) - return roundup_pow_of_two(pitch_tiles); + /* + * ADL_P, the only platform needing a POT stride has a minimum + * of 8 stride tiles. + */ + return roundup_pow_of_two(max(pitch_tiles, 8u)); else return pitch_tiles; } -- cgit From 8a4053738c0584dd1bee45d1878c71265617cb17 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 6 May 2021 19:19:30 +0300 Subject: drm/i915/adl_p: Enable remapping to pad DPT FB strides to POT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable padding of DPT FB strides to POT, using the FB remapping logic. Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210506161930.309688-11-imre.deak@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 16 ++++++++++++---- drivers/gpu/drm/i915/display/intel_fb.c | 7 +++++-- drivers/gpu/drm/i915/display/intel_fb.h | 1 + 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index bd6655045118..a45916791e81 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -231,16 +231,22 @@ static void dpt_cleanup(struct i915_address_space *vm) } static struct i915_address_space * -intel_dpt_create(struct drm_gem_object *obj) +intel_dpt_create(struct intel_framebuffer *fb) { + struct drm_gem_object *obj = &intel_fb_obj(&fb->base)->base; struct drm_i915_private *i915 = to_i915(obj->dev); - size_t size = DIV_ROUND_UP_ULL(obj->size, 512); struct drm_i915_gem_object *dpt_obj; struct i915_address_space *vm; struct i915_dpt *dpt; + size_t size; int ret; - size = round_up(size, 4096); + if (intel_fb_needs_pot_stride_remap(fb)) + size = intel_remapped_info_size(&fb->remapped_view.gtt.remapped); + else + size = DIV_ROUND_UP_ULL(obj->size, I915_GTT_PAGE_SIZE); + + size = round_up(size * sizeof(gen8_pte_t), I915_GTT_PAGE_SIZE); if (HAS_LMEM(i915)) dpt_obj = i915_gem_object_create_lmem(i915, size, 0); @@ -11580,8 +11586,10 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, } } + /* TODO: Add POT stride remapping support for CCS formats as well. */ if (IS_ALDERLAKE_P(dev_priv) && mode_cmd->modifier[i] != DRM_FORMAT_MOD_LINEAR && + !intel_fb_needs_pot_stride_remap(intel_fb) && !is_power_of_2(mode_cmd->pitches[i])) { drm_dbg_kms(&dev_priv->drm, "plane %d pitch (%d) must be power of two for tiled buffers\n", @@ -11599,7 +11607,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (intel_fb_uses_dpt(fb)) { struct i915_address_space *vm; - vm = intel_dpt_create(&obj->base); + vm = intel_dpt_create(intel_fb); if (IS_ERR(vm)) { ret = PTR_ERR(vm); goto err; diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 29c558fbb397..a005c68889e7 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -478,9 +478,12 @@ static bool intel_plane_can_remap(const struct intel_plane_state *plane_state) return true; } -static bool intel_fb_needs_pot_stride_remap(const struct intel_framebuffer *fb) +bool intel_fb_needs_pot_stride_remap(const struct intel_framebuffer *fb) { - return false; + struct drm_i915_private *i915 = to_i915(fb->base.dev); + + return IS_ALDERLAKE_P(i915) && fb->base.modifier != DRM_FORMAT_MOD_LINEAR && + !is_ccs_modifier(fb->base.modifier); } static int intel_fb_pitch(const struct intel_framebuffer *fb, int color_plane, unsigned int rotation) diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index d77d9f914cf4..739d1b91754b 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -45,6 +45,7 @@ u32 intel_plane_compute_aligned_offset(int *x, int *y, const struct intel_plane_state *state, int color_plane); +bool intel_fb_needs_pot_stride_remap(const struct intel_framebuffer *fb); bool intel_fb_supports_90_270_rotation(const struct intel_framebuffer *fb); int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer *fb); -- cgit From 8b79feffeca28c5459458fe78676b081e87c93a4 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 14 Apr 2021 14:35:41 +0200 Subject: x86/kvm: Teardown PV features on boot CPU as well Various PV features (Async PF, PV EOI, steal time) work through memory shared with hypervisor and when we restore from hibernation we must properly teardown all these features to make sure hypervisor doesn't write to stale locations after we jump to the previously hibernated kernel (which can try to place anything there). For secondary CPUs the job is already done by kvm_cpu_down_prepare(), register syscore ops to do the same for boot CPU. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210414123544.1060604-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 56 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index dc440bb69222..9d5f96321c7f 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -451,6 +452,25 @@ static void __init sev_map_percpu_data(void) } } +static void kvm_guest_cpu_offline(void) +{ + kvm_disable_steal_time(); + if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) + wrmsrl(MSR_KVM_PV_EOI_EN, 0); + kvm_pv_disable_apf(); + apf_task_wake_all(); +} + +static int kvm_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + kvm_guest_cpu_init(); + local_irq_restore(flags); + return 0; +} + #ifdef CONFIG_SMP static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask); @@ -635,32 +655,34 @@ static void __init kvm_smp_prepare_boot_cpu(void) kvm_spinlock_init(); } -static void kvm_guest_cpu_offline(void) +static int kvm_cpu_down_prepare(unsigned int cpu) { - kvm_disable_steal_time(); - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) - wrmsrl(MSR_KVM_PV_EOI_EN, 0); - kvm_pv_disable_apf(); - apf_task_wake_all(); -} + unsigned long flags; -static int kvm_cpu_online(unsigned int cpu) -{ - local_irq_disable(); - kvm_guest_cpu_init(); - local_irq_enable(); + local_irq_save(flags); + kvm_guest_cpu_offline(); + local_irq_restore(flags); return 0; } -static int kvm_cpu_down_prepare(unsigned int cpu) +#endif + +static int kvm_suspend(void) { - local_irq_disable(); kvm_guest_cpu_offline(); - local_irq_enable(); + return 0; } -#endif +static void kvm_resume(void) +{ + kvm_cpu_online(raw_smp_processor_id()); +} + +static struct syscore_ops kvm_syscore_ops = { + .suspend = kvm_suspend, + .resume = kvm_resume, +}; static void __init kvm_guest_init(void) { @@ -704,6 +726,8 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif + register_syscore_ops(&kvm_syscore_ops); + /* * Hard lockup detection is enabled by default. Disable it, as guests * can get false positives too easily, for example if the host is -- cgit From c02027b5742b5aa804ef08a4a9db433295533046 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 14 Apr 2021 14:35:42 +0200 Subject: x86/kvm: Disable kvmclock on all CPUs on shutdown Currenly, we disable kvmclock from machine_shutdown() hook and this only happens for boot CPU. We need to disable it for all CPUs to guard against memory corruption e.g. on restore from hibernate. Note, writing '0' to kvmclock MSR doesn't clear memory location, it just prevents hypervisor from updating the location so for the short while after write and while CPU is still alive, the clock remains usable and correct so we don't need to switch to some other clocksource. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210414123544.1060604-4-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_para.h | 4 ++-- arch/x86/kernel/kvm.c | 1 + arch/x86/kernel/kvmclock.c | 5 +---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 338119852512..9c56e0defd45 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -7,8 +7,6 @@ #include #include -extern void kvmclock_init(void); - #ifdef CONFIG_KVM_GUEST bool kvm_check_and_clear_guest_paused(void); #else @@ -86,6 +84,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, } #ifdef CONFIG_KVM_GUEST +void kvmclock_init(void); +void kvmclock_disable(void); bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9d5f96321c7f..25dd126a3325 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -459,6 +459,7 @@ static void kvm_guest_cpu_offline(void) wrmsrl(MSR_KVM_PV_EOI_EN, 0); kvm_pv_disable_apf(); apf_task_wake_all(); + kvmclock_disable(); } static int kvm_cpu_online(unsigned int cpu) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d37ed4e1d033..081686df6cd8 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -220,11 +220,9 @@ static void kvm_crash_shutdown(struct pt_regs *regs) } #endif -static void kvm_shutdown(void) +void kvmclock_disable(void) { native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_shutdown(); } static void __init kvmclock_init_mem(void) @@ -351,7 +349,6 @@ void __init kvmclock_init(void) #endif x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; - machine_ops.shutdown = kvm_shutdown; #ifdef CONFIG_KEXEC_CORE machine_ops.crash_shutdown = kvm_crash_shutdown; #endif -- cgit From 3d6b84132d2a57b5a74100f6923a8feb679ac2ce Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 14 Apr 2021 14:35:43 +0200 Subject: x86/kvm: Disable all PV features on crash Crash shutdown handler only disables kvmclock and steal time, other PV features remain active so we risk corrupting memory or getting some side-effects in kdump kernel. Move crash handler to kvm.c and unify with CPU offline. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210414123544.1060604-5-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_para.h | 6 ------ arch/x86/kernel/kvm.c | 44 ++++++++++++++++++++++++++++++----------- arch/x86/kernel/kvmclock.c | 21 -------------------- 3 files changed, 32 insertions(+), 39 deletions(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 9c56e0defd45..69299878b200 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -92,7 +92,6 @@ unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait_schedule(u32 token); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_apf_flags(void); -void kvm_disable_steal_time(void); bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token); DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled); @@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void) return 0; } -static inline void kvm_disable_steal_time(void) -{ - return; -} - static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) { return false; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 25dd126a3325..8eb91dc0f5a8 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -38,6 +38,7 @@ #include #include #include +#include #include DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled); @@ -375,6 +376,14 @@ static void kvm_pv_disable_apf(void) pr_info("disable async PF for cpu %d\n", smp_processor_id()); } +static void kvm_disable_steal_time(void) +{ + if (!has_steal_clock) + return; + + wrmsr(MSR_KVM_STEAL_TIME, 0, 0); +} + static void kvm_pv_guest_cpu_reboot(void *unused) { /* @@ -417,14 +426,6 @@ static u64 kvm_steal_clock(int cpu) return steal; } -void kvm_disable_steal_time(void) -{ - if (!has_steal_clock) - return; - - wrmsr(MSR_KVM_STEAL_TIME, 0, 0); -} - static inline void __set_percpu_decrypted(void *ptr, unsigned long size) { early_set_memory_decrypted((unsigned long) ptr, size); @@ -452,13 +453,14 @@ static void __init sev_map_percpu_data(void) } } -static void kvm_guest_cpu_offline(void) +static void kvm_guest_cpu_offline(bool shutdown) { kvm_disable_steal_time(); if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) wrmsrl(MSR_KVM_PV_EOI_EN, 0); kvm_pv_disable_apf(); - apf_task_wake_all(); + if (!shutdown) + apf_task_wake_all(); kvmclock_disable(); } @@ -661,7 +663,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu) unsigned long flags; local_irq_save(flags); - kvm_guest_cpu_offline(); + kvm_guest_cpu_offline(false); local_irq_restore(flags); return 0; } @@ -670,7 +672,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu) static int kvm_suspend(void) { - kvm_guest_cpu_offline(); + kvm_guest_cpu_offline(false); return 0; } @@ -685,6 +687,20 @@ static struct syscore_ops kvm_syscore_ops = { .resume = kvm_resume, }; +/* + * After a PV feature is registered, the host will keep writing to the + * registered memory location. If the guest happens to shutdown, this memory + * won't be valid. In cases like kexec, in which you install a new kernel, this + * means a random memory location will be kept being written. + */ +#ifdef CONFIG_KEXEC_CORE +static void kvm_crash_shutdown(struct pt_regs *regs) +{ + kvm_guest_cpu_offline(true); + native_machine_crash_shutdown(regs); +} +#endif + static void __init kvm_guest_init(void) { int i; @@ -727,6 +743,10 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif +#ifdef CONFIG_KEXEC_CORE + machine_ops.crash_shutdown = kvm_crash_shutdown; +#endif + register_syscore_ops(&kvm_syscore_ops); /* diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 081686df6cd8..ad273e5861c1 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -20,7 +20,6 @@ #include #include #include -#include #include static int kvmclock __initdata = 1; @@ -203,23 +202,6 @@ static void kvm_setup_secondary_clock(void) } #endif -/* - * After the clock is registered, the host will keep writing to the - * registered memory location. If the guest happens to shutdown, this memory - * won't be valid. In cases like kexec, in which you install a new kernel, this - * means a random memory location will be kept being written. So before any - * kind of shutdown from our side, we unregister the clock by writing anything - * that does not have the 'enable' bit set in the msr - */ -#ifdef CONFIG_KEXEC_CORE -static void kvm_crash_shutdown(struct pt_regs *regs) -{ - native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_crash_shutdown(regs); -} -#endif - void kvmclock_disable(void) { native_write_msr(msr_kvm_system_time, 0, 0); @@ -349,9 +331,6 @@ void __init kvmclock_init(void) #endif x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; -#ifdef CONFIG_KEXEC_CORE - machine_ops.crash_shutdown = kvm_crash_shutdown; -#endif kvm_get_preset_lpj(); /* -- cgit From 384fc672f528d3b84eacd9a86ecf35df3363b8ba Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 14 Apr 2021 14:35:44 +0200 Subject: x86/kvm: Unify kvm_pv_guest_cpu_reboot() with kvm_guest_cpu_offline() Simplify the code by making PV features shutdown happen in one place. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210414123544.1060604-6-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8eb91dc0f5a8..a26643dc6bd6 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -384,31 +384,6 @@ static void kvm_disable_steal_time(void) wrmsr(MSR_KVM_STEAL_TIME, 0, 0); } -static void kvm_pv_guest_cpu_reboot(void *unused) -{ - /* - * We disable PV EOI before we load a new kernel by kexec, - * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory. - * New kernel can re-enable when it boots. - */ - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) - wrmsrl(MSR_KVM_PV_EOI_EN, 0); - kvm_pv_disable_apf(); - kvm_disable_steal_time(); -} - -static int kvm_pv_reboot_notify(struct notifier_block *nb, - unsigned long code, void *unused) -{ - if (code == SYS_RESTART) - on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); - return NOTIFY_DONE; -} - -static struct notifier_block kvm_pv_reboot_nb = { - .notifier_call = kvm_pv_reboot_notify, -}; - static u64 kvm_steal_clock(int cpu) { u64 steal; @@ -687,6 +662,23 @@ static struct syscore_ops kvm_syscore_ops = { .resume = kvm_resume, }; +static void kvm_pv_guest_cpu_reboot(void *unused) +{ + kvm_guest_cpu_offline(true); +} + +static int kvm_pv_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + if (code == SYS_RESTART) + on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_reboot_nb = { + .notifier_call = kvm_pv_reboot_notify, +}; + /* * After a PV feature is registered, the host will keep writing to the * registered memory location. If the guest happens to shutdown, this memory -- cgit From 46a63924b05f335b0765ad13dae4d2d7569f25c9 Mon Sep 17 00:00:00 2001 From: Siddharth Chandrasekaran Date: Mon, 3 May 2021 14:00:58 +0200 Subject: doc/kvm: Fix wrong entry for KVM_CAP_X86_MSR_FILTER The capability that exposes new ioctl KVM_X86_SET_MSR_FILTER to userspace is specified incorrectly as the ioctl itself (instead of KVM_CAP_X86_MSR_FILTER). This patch fixes it. Fixes: 1a155254ff93 ("KVM: x86: Introduce MSR filtering") Reviewed-by: Alexander Graf Signed-off-by: Siddharth Chandrasekaran Message-Id: <20210503120059.9283-1-sidcha@amazon.de> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 22d077562149..7fcb2fd38f42 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4803,7 +4803,7 @@ KVM_PV_VM_VERIFY 4.126 KVM_X86_SET_MSR_FILTER ---------------------------- -:Capability: KVM_X86_SET_MSR_FILTER +:Capability: KVM_CAP_X86_MSR_FILTER :Architectures: x86 :Type: vm ioctl :Parameters: struct kvm_msr_filter @@ -6715,7 +6715,7 @@ accesses that would usually trigger a #GP by KVM into the guest will instead get bounced to user space through the KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications. -8.27 KVM_X86_SET_MSR_FILTER +8.27 KVM_CAP_X86_MSR_FILTER --------------------------- :Architectures: x86 -- cgit From f5c7e8425f18fdb9bdb7d13340651d7876890329 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 3 May 2021 17:08:51 +0200 Subject: KVM: nVMX: Always make an attempt to map eVMCS after migration When enlightened VMCS is in use and nested state is migrated with vmx_get_nested_state()/vmx_set_nested_state() KVM can't map evmcs page right away: evmcs gpa is not 'struct kvm_vmx_nested_state_hdr' and we can't read it from VP assist page because userspace may decide to restore HV_X64_MSR_VP_ASSIST_PAGE after restoring nested state (and QEMU, for example, does exactly that). To make sure eVMCS is mapped /vmx_set_nested_state() raises KVM_REQ_GET_NESTED_STATE_PAGES request. Commit f2c7ef3ba955 ("KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit") added KVM_REQ_GET_NESTED_STATE_PAGES clearing to nested_vmx_vmexit() to make sure MSR permission bitmap is not switched when an immediate exit from L2 to L1 happens right after migration (caused by a pending event, for example). Unfortunately, in the exact same situation we still need to have eVMCS mapped so nested_sync_vmcs12_to_shadow() reflects changes in VMCS12 to eVMCS. As a band-aid, restore nested_get_evmcs_page() when clearing KVM_REQ_GET_NESTED_STATE_PAGES in nested_vmx_vmexit(). The 'fix' is far from being ideal as we can't easily propagate possible failures and even if we could, this is most likely already too late to do so. The whole 'KVM_REQ_GET_NESTED_STATE_PAGES' idea for mapping eVMCS after migration seems to be fragile as we diverge too much from the 'native' path when vmptr loading happens on vmx_set_nested_state(). Fixes: f2c7ef3ba955 ("KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit") Signed-off-by: Vitaly Kuznetsov Message-Id: <20210503150854.1144255-2-vkuznets@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index bced76637823..6058a65a6ede 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3098,15 +3098,8 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu) nested_vmx_handle_enlightened_vmptrld(vcpu, false); if (evmptrld_status == EVMPTRLD_VMFAIL || - evmptrld_status == EVMPTRLD_ERROR) { - pr_debug_ratelimited("%s: enlightened vmptrld failed\n", - __func__); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + evmptrld_status == EVMPTRLD_ERROR) return false; - } } return true; @@ -3194,8 +3187,16 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu) { - if (!nested_get_evmcs_page(vcpu)) + if (!nested_get_evmcs_page(vcpu)) { + pr_debug_ratelimited("%s: enlightened vmptrld failed\n", + __func__); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return false; + } if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu)) return false; @@ -4435,7 +4436,15 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, /* Similarly, triple faults in L2 should never escape. */ WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)); - kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); + if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { + /* + * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map + * Enlightened VMCS after migration and we still need to + * do that when something is forcing L2->L1 exit prior to + * the first L2 run. + */ + (void)nested_get_evmcs_page(vcpu); + } /* Service the TLB flush request for L2 before switching to L1. */ if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) -- cgit From 32d1b3ab588c1231dbfa9eb08819c50529ce77d7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 5 May 2021 17:18:21 +0200 Subject: KVM: selftests: evmcs_test: Check that VMLAUNCH with bogus EVMPTR is causing #UD 'run->exit_reason == KVM_EXIT_SHUTDOWN' check is not ideal as we may be getting some unexpected exception. Directly check for #UD instead. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210505151823.1341678-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/evmcs_test.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index ca22ee6d19cb..b01f64ac6ce3 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -19,6 +19,14 @@ #define VCPU_ID 5 +static int ud_count; + +static void guest_ud_handler(struct ex_regs *regs) +{ + ud_count++; + regs->rip += 3; /* VMLAUNCH */ +} + void l2_guest_code(void) { GUEST_SYNC(7); @@ -71,11 +79,11 @@ void guest_code(struct vmx_pages *vmx_pages) if (vmx_pages) l1_guest_code(vmx_pages); - GUEST_DONE(); - /* Try enlightened vmptrld with an incorrect GPA */ evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs); GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(ud_count == 1); + GUEST_DONE(); } int main(int argc, char *argv[]) @@ -109,6 +117,10 @@ int main(int argc, char *argv[]) vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); + for (stage = 1;; stage++) { _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -124,7 +136,7 @@ int main(int argc, char *argv[]) case UCALL_SYNC: break; case UCALL_DONE: - goto part1_done; + goto done; default: TEST_FAIL("Unknown ucall %lu", uc.cmd); } @@ -156,10 +168,6 @@ int main(int argc, char *argv[]) (ulong) regs2.rdi, (ulong) regs2.rsi); } -part1_done: - _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, - "Unexpected successful VMEnter with invalid eVMCS pointer!"); - +done: kvm_vm_free(vm); } -- cgit From c9ecafaf0113a305f5085ceb9c7a4b64ca70eae9 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 5 May 2021 17:18:22 +0200 Subject: KVM: selftests: evmcs_test: Check that VMCS12 is alway properly synced to eVMCS after restore Add a test for the regression, introduced by commit f2c7ef3ba955 ("KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit"). When L2->L1 exit is forced immediately after restoring nested state, KVM_REQ_GET_NESTED_STATE_PAGES request is cleared and VMCS12 changes (e.g. fresh RIP) are not reflected to eVMCS. The consequent nested vCPU run gets broken. Utilize NMI injection to do the job. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210505151823.1341678-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/evmcs_test.c | 66 ++++++++++++++++++++----- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index b01f64ac6ce3..63096cea26c6 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -18,30 +18,52 @@ #include "vmx.h" #define VCPU_ID 5 +#define NMI_VECTOR 2 static int ud_count; +void enable_x2apic(void) +{ + uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4); + + wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) | + MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD); + wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED); +} + static void guest_ud_handler(struct ex_regs *regs) { ud_count++; regs->rip += 3; /* VMLAUNCH */ } +static void guest_nmi_handler(struct ex_regs *regs) +{ +} + void l2_guest_code(void) { GUEST_SYNC(7); GUEST_SYNC(8); + /* Forced exit to L1 upon restore */ + GUEST_SYNC(9); + /* Done, exit to L1 and never come back. */ vmcall(); } -void l1_guest_code(struct vmx_pages *vmx_pages) +void guest_code(struct vmx_pages *vmx_pages) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + enable_x2apic(); + + GUEST_SYNC(1); + GUEST_SYNC(2); + enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist); GUEST_ASSERT(vmx_pages->vmcs_gpa); @@ -63,21 +85,22 @@ void l1_guest_code(struct vmx_pages *vmx_pages) current_evmcs->revision_id = EVMCS_VERSION; GUEST_SYNC(6); + current_evmcs->pin_based_vm_exec_control |= + PIN_BASED_NMI_EXITING; GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); - GUEST_SYNC(9); + + /* + * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is + * up-to-date (RIP points where it should and not at the beginning + * of l2_guest_code(). GUEST_SYNC(9) checkes that. + */ GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - GUEST_SYNC(10); -} -void guest_code(struct vmx_pages *vmx_pages) -{ - GUEST_SYNC(1); - GUEST_SYNC(2); + GUEST_SYNC(10); - if (vmx_pages) - l1_guest_code(vmx_pages); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_SYNC(11); /* Try enlightened vmptrld with an incorrect GPA */ evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs); @@ -86,6 +109,18 @@ void guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } +void inject_nmi(struct kvm_vm *vm) +{ + struct kvm_vcpu_events events; + + vcpu_events_get(vm, VCPU_ID, &events); + + events.nmi.pending = 1; + events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING; + + vcpu_events_set(vm, VCPU_ID, &events); +} + int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva = 0; @@ -120,6 +155,9 @@ int main(int argc, char *argv[]) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); + vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler); + + pr_info("Running L1 which uses EVMCS to run L2\n"); for (stage = 1;; stage++) { _vcpu_run(vm, VCPU_ID); @@ -166,6 +204,12 @@ int main(int argc, char *argv[]) TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", (ulong) regs2.rdi, (ulong) regs2.rsi); + + /* Force immediate L2->L1 exit before resuming */ + if (stage == 8) { + pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n"); + inject_nmi(vm); + } } done: -- cgit From 70f094f4f01dc4d6f78ac6407f85627293a6553c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 3 May 2021 17:08:52 +0200 Subject: KVM: nVMX: Properly pad 'struct kvm_vmx_nested_state_hdr' Eliminate the probably unwanted hole in 'struct kvm_vmx_nested_state_hdr': Pre-patch: struct kvm_vmx_nested_state_hdr { __u64 vmxon_pa; /* 0 8 */ __u64 vmcs12_pa; /* 8 8 */ struct { __u16 flags; /* 16 2 */ } smm; /* 16 2 */ /* XXX 2 bytes hole, try to pack */ __u32 flags; /* 20 4 */ __u64 preemption_timer_deadline; /* 24 8 */ }; Post-patch: struct kvm_vmx_nested_state_hdr { __u64 vmxon_pa; /* 0 8 */ __u64 vmcs12_pa; /* 8 8 */ struct { __u16 flags; /* 16 2 */ } smm; /* 16 2 */ __u16 pad; /* 18 2 */ __u32 flags; /* 20 4 */ __u64 preemption_timer_deadline; /* 24 8 */ }; Signed-off-by: Vitaly Kuznetsov Message-Id: <20210503150854.1144255-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/kvm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 5a3022c8af82..0662f644aad9 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr { __u16 flags; } smm; + __u16 pad; + __u32 flags; __u64 preemption_timer_deadline; }; -- cgit From 5f443e424efab56baa8021da04878f88eb0815d4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 10 Dec 2020 17:23:17 -0800 Subject: selftests: kvm: remove reassignment of non-absolute variables Clang's integrated assembler does not allow symbols with non-absolute values to be reassigned. Modify the interrupt entry loop macro to be compatible with IAS by using a label and an offset. Cc: Jian Cai Signed-off-by: Bill Wendling References: https://lore.kernel.org/lkml/20200714233024.1789985-1-caij2003@gmail.com/ Message-Id: <20201211012317.3722214-1-morbo@google.com> Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/x86_64/handlers.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S index aaf7bc7d2ce1..7629819734af 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S +++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S @@ -54,9 +54,9 @@ idt_handlers: .align 8 /* Fetch current address and append it to idt_handlers. */ - current_handler = . +666 : .pushsection .rodata -.quad current_handler + .quad 666b .popsection .if ! \has_error -- cgit From aca352886ebdd675b5131ed4c83bf5477eee5d72 Mon Sep 17 00:00:00 2001 From: Siddharth Chandrasekaran Date: Mon, 3 May 2021 14:21:11 +0200 Subject: KVM: x86: Hoist input checks in kvm_add_msr_filter() In ioctl KVM_X86_SET_MSR_FILTER, input from user space is validated after a memdup_user(). For invalid inputs we'd memdup and then call kfree unnecessarily. Hoist input validation to avoid kfree altogether. Signed-off-by: Siddharth Chandrasekaran Message-Id: <20210503122111.13775-1-sidcha@amazon.de> Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cebdaa1e3cf5..102f116d9bb4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5468,14 +5468,18 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter) static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, struct kvm_msr_filter_range *user_range) { - struct msr_bitmap_range range; unsigned long *bitmap = NULL; size_t bitmap_size; - int r; if (!user_range->nmsrs) return 0; + if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) + return -EINVAL; + + if (!user_range->flags) + return -EINVAL; + bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long); if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE) return -EINVAL; @@ -5484,31 +5488,15 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, if (IS_ERR(bitmap)) return PTR_ERR(bitmap); - range = (struct msr_bitmap_range) { + msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) { .flags = user_range->flags, .base = user_range->base, .nmsrs = user_range->nmsrs, .bitmap = bitmap, }; - if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) { - r = -EINVAL; - goto err; - } - - if (!range.flags) { - r = -EINVAL; - goto err; - } - - /* Everything ok, add this range identifier. */ - msr_filter->ranges[msr_filter->count] = range; msr_filter->count++; - return 0; -err: - kfree(bitmap); - return r; } static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) -- cgit From 063ab16c14db5a2ef52d54d0475b7fed19c982d7 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 4 May 2021 17:39:35 +0300 Subject: KVM: nSVM: always restore the L1's GIF on migration While usually the L1's GIF is set while L2 runs, and usually migration nested state is loaded after a vCPU reset which also sets L1's GIF to true, this is not guaranteed. Signed-off-by: Maxim Levitsky Message-Id: <20210504143936.1644378-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 32400cba608d..b331446f67f3 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1314,6 +1314,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, else svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; + svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); + svm->nested.nested_run_pending = !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); -- cgit From 809c79137a192d7e881a517f803ebbf96305f066 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 4 May 2021 17:39:36 +0300 Subject: KVM: nSVM: remove a warning about vmcb01 VM exit reason While in most cases, when returning to use the VMCB01, the exit reason stored in it will be SVM_EXIT_VMRUN, on first VM exit after a nested migration this field can contain anything since the VM entry did happen before the migration. Remove this warning to avoid the false positive. Signed-off-by: Maxim Levitsky Message-Id: <20210504143936.1644378-3-mlevitsk@redhat.com> Fixes: 9a7de6ecc3ed ("KVM: nSVM: If VMRUN is single-stepped, queue the #DB intercept in nested_svm_vmexit()") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b331446f67f3..5e8d8443154e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -764,7 +764,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr); svm_switch_vmcb(svm, &svm->vmcb01); - WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN); /* * On vmexit the GIF is set to false and -- cgit From 8aec21c04caa2000f91cf8822ae0811e4b0c3971 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:20 -0700 Subject: KVM: VMX: Do not advertise RDPID if ENABLE_RDTSCP control is unsupported Clear KVM's RDPID capability if the ENABLE_RDTSCP secondary exec control is unsupported. Despite being enumerated in a separate CPUID flag, RDPID is bundled under the same VMCS control as RDTSCP and will #UD in VMX non-root if ENABLE_RDTSCP is not enabled. Fixes: 41cd02c6f7f6 ("kvm: x86: Expose RDPID in KVM_GET_SUPPORTED_CPUID") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-2-seanjc@google.com> Reviewed-by: Jim Mattson Reviewed-by: Reiji Watanabe Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cbe0cdade38a..46573b862638 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7377,9 +7377,11 @@ static __init void vmx_set_cpu_caps(void) if (!cpu_has_vmx_xsaves()) kvm_cpu_cap_clear(X86_FEATURE_XSAVES); - /* CPUID 0x80000001 */ - if (!cpu_has_vmx_rdtscp()) + /* CPUID 0x80000001 and 0x7 (RDPID) */ + if (!cpu_has_vmx_rdtscp()) { kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); + kvm_cpu_cap_clear(X86_FEATURE_RDPID); + } if (cpu_has_vmx_waitpkg()) kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); -- cgit From 85d0011264da24be08ae907d7f29983a597ca9b1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:21 -0700 Subject: KVM: x86: Emulate RDPID only if RDTSCP is supported Do not advertise emulation support for RDPID if RDTSCP is unsupported. RDPID emulation subtly relies on MSR_TSC_AUX to exist in hardware, as both vmx_get_msr() and svm_get_msr() will return an error if the MSR is unsupported, i.e. ctxt->ops->get_msr() will fail and the emulator will inject a #UD. Note, RDPID emulation also relies on RDTSCP being enabled in the guest, but this is a KVM bug and will eventually be fixed. Fixes: fb6d4d340e05 ("KVM: x86: emulate RDPID") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-3-seanjc@google.com> Reviewed-by: Jim Mattson Reviewed-by: Reiji Watanabe Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 19606a341888..c0e8c5e92189 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -637,7 +637,8 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func) case 7: entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; entry->eax = 0; - entry->ecx = F(RDPID); + if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) + entry->ecx = F(RDPID); ++array->nent; default: break; -- cgit From 3b195ac9260235624b1c18f7bdaef184479c1d41 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:22 -0700 Subject: KVM: SVM: Inject #UD on RDTSCP when it should be disabled in the guest Intercept RDTSCP to inject #UD if RDTSC is disabled in the guest. Note, SVM does not support intercepting RDPID. Unlike VMX's ENABLE_RDTSCP control, RDTSCP interception does not apply to RDPID. This is a benign virtualization hole as the host kernel (incorrectly) sets MSR_TSC_AUX if RDTSCP is supported, and KVM loads the guest's MSR_TSC_AUX into hardware if RDTSCP is supported in the host, i.e. KVM will not leak the host's MSR_TSC_AUX to the guest. But, when the kernel bug is fixed, KVM will start leaking the host's MSR_TSC_AUX if RDPID is supported in hardware, but RDTSCP isn't available for whatever reason. This leak will be remedied in a future commit. Fixes: 46896c73c1a4 ("KVM: svm: add support for RDTSCP") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-4-seanjc@google.com> Reviewed-by: Jim Mattson Reviewed-by: Reiji Watanabe Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index be5cf612ab1f..ebcb5849d69b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1100,7 +1100,9 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) return svm->vmcb->control.tsc_offset; } -static void svm_check_invpcid(struct vcpu_svm *svm) +/* Evaluate instruction intercepts that depend on guest CPUID features. */ +static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu, + struct vcpu_svm *svm) { /* * Intercept INVPCID if shadow paging is enabled to sync/free shadow @@ -1113,6 +1115,13 @@ static void svm_check_invpcid(struct vcpu_svm *svm) else svm_clr_intercept(svm, INTERCEPT_INVPCID); } + + if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) { + if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + svm_clr_intercept(svm, INTERCEPT_RDTSCP); + else + svm_set_intercept(svm, INTERCEPT_RDTSCP); + } } static void init_vmcb(struct kvm_vcpu *vcpu) @@ -1248,7 +1257,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_clr_intercept(svm, INTERCEPT_PAUSE); } - svm_check_invpcid(svm); + svm_recalc_instruction_intercepts(vcpu, svm); /* * If the host supports V_SPEC_CTRL then disable the interception @@ -3084,6 +3093,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = skinit_interception, + [SVM_EXIT_RDTSCP] = kvm_handle_invalid_op, [SVM_EXIT_WBINVD] = kvm_emulate_wbinvd, [SVM_EXIT_MONITOR] = kvm_emulate_monitor, [SVM_EXIT_MWAIT] = kvm_emulate_mwait, @@ -4007,8 +4017,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) && guest_cpuid_has(vcpu, X86_FEATURE_NRIPS); - /* Check again if INVPCID interception if required */ - svm_check_invpcid(svm); + svm_recalc_instruction_intercepts(vcpu, svm); /* For sev guests, the memory encryption bit is not reserved in CR3. */ if (sev_guest(vcpu->kvm)) { -- cgit From 2183de4161b90bd3851ccd3910c87b2c9adfc6ed Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:23 -0700 Subject: KVM: x86: Move RDPID emulation intercept to its own enum Add a dedicated intercept enum for RDPID instead of piggybacking RDTSCP. Unlike VMX's ENABLE_RDTSCP, RDPID is not bound to SVM's RDTSCP intercept. Fixes: fb6d4d340e05 ("KVM: x86: emulate RDPID") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-5-seanjc@google.com> Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/kvm_emulate.h | 1 + arch/x86/kvm/vmx/vmx.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 77e1c89a95a7..8a0ccdb56076 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4502,7 +4502,7 @@ static const struct opcode group8[] = { * from the register case of group9. */ static const struct gprefix pfx_0f_c7_7 = { - N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp), + N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid), }; diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 0d359115429a..f016838faedd 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -468,6 +468,7 @@ enum x86_intercept { x86_intercept_clgi, x86_intercept_skinit, x86_intercept_rdtscp, + x86_intercept_rdpid, x86_intercept_icebp, x86_intercept_wbinvd, x86_intercept_monitor, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 46573b862638..4a625c748275 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7437,8 +7437,9 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, /* * RDPID causes #UD if disabled through secondary execution controls. * Because it is marked as EmulateOnUD, we need to intercept it here. + * Note, RDPID is hidden behind ENABLE_RDTSCP. */ - case x86_intercept_rdtscp: + case x86_intercept_rdpid: if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) { exception->vector = UD_VECTOR; exception->error_code_valid = false; -- cgit From 5104d7ffcf24749939bea7fdb5378d186473f890 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:24 -0700 Subject: KVM: VMX: Disable preemption when probing user return MSRs Disable preemption when probing a user return MSR via RDSMR/WRMSR. If the MSR holds a different value per logical CPU, the WRMSR could corrupt the host's value if KVM is preempted between the RDMSR and WRMSR, and then rescheduled on a different CPU. Opportunistically land the helper in common x86, SVM will use the helper in a future commit. Fixes: 4be534102624 ("KVM: VMX: Initialize vmx->guest_msrs[] right after allocation") Cc: stable@vger.kernel.org Cc: Xiaoyao Li Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-6-seanjc@google.com> Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/vmx.c | 5 +---- arch/x86/kvm/x86.c | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 848956bb3cf1..e8dcbc632cf8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1777,6 +1777,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long icr, int op_64_bit); void kvm_define_user_return_msr(unsigned index, u32 msr); +int kvm_probe_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4a625c748275..11ff9c3d95d5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6914,12 +6914,9 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) { u32 index = vmx_uret_msrs_list[i]; - u32 data_low, data_high; int j = vmx->nr_uret_msrs; - if (rdmsr_safe(index, &data_low, &data_high) < 0) - continue; - if (wrmsr_safe(index, data_low, data_high) < 0) + if (kvm_probe_user_return_msr(index)) continue; vmx->guest_uret_msrs[j].slot = i; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 102f116d9bb4..bd90c73c37b4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -339,6 +339,22 @@ static void kvm_on_user_return(struct user_return_notifier *urn) } } +int kvm_probe_user_return_msr(u32 msr) +{ + u64 val; + int ret; + + preempt_disable(); + ret = rdmsrl_safe(msr, &val); + if (ret) + goto out; + ret = wrmsrl_safe(msr, val); +out: + preempt_enable(); + return ret; +} +EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr); + void kvm_define_user_return_msr(unsigned slot, u32 msr) { BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS); -- cgit From 0caa0a77c2f6fcd0830cdcd018db1af98fe35e28 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:25 -0700 Subject: KVM: SVM: Probe and load MSR_TSC_AUX regardless of RDTSCP support in host Probe MSR_TSC_AUX whether or not RDTSCP is supported in the host, and if probing succeeds, load the guest's MSR_TSC_AUX into hardware prior to VMRUN. Because SVM doesn't support interception of RDPID, RDPID cannot be disallowed in the guest (without resorting to binary translation). Leaving the host's MSR_TSC_AUX in hardware would leak the host's value to the guest if RDTSCP is not supported. Note, there is also a kernel bug that prevents leaking the host's value. The host kernel initializes MSR_TSC_AUX if and only if RDTSCP is supported, even though the vDSO usage consumes MSR_TSC_AUX via RDPID. I.e. if RDTSCP is not supported, there is no host value to leak. But, if/when the host kernel bug is fixed, KVM would start leaking MSR_TSC_AUX in the case where hardware supports RDPID but RDTSCP is unavailable for whatever reason. Probing MSR_TSC_AUX will also allow consolidating the probe and define logic in common x86, and will make it simpler to condition the existence of MSR_TSX_AUX (from the guest's perspective) on RDTSCP *or* RDPID. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ebcb5849d69b..13e4dd128177 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -212,7 +212,7 @@ DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); * RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to * defer the restoration of TSC_AUX until the CPU returns to userspace. */ -#define TSC_AUX_URET_SLOT 0 +static int tsc_aux_uret_slot __read_mostly = -1; static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; @@ -959,8 +959,10 @@ static __init int svm_hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 32; } - if (boot_cpu_has(X86_FEATURE_RDTSCP)) - kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX); + if (!kvm_probe_user_return_msr(MSR_TSC_AUX)) { + tsc_aux_uret_slot = 0; + kvm_define_user_return_msr(tsc_aux_uret_slot, MSR_TSC_AUX); + } /* Check for pause filtering support */ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { @@ -1454,8 +1456,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) } } - if (static_cpu_has(X86_FEATURE_RDTSCP)) - kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull); + if (likely(tsc_aux_uret_slot >= 0)) + kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); svm->guest_state_loaded = true; } @@ -2664,7 +2666,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data |= (u64)svm->sysenter_esp_hi << 32; break; case MSR_TSC_AUX: - if (!boot_cpu_has(X86_FEATURE_RDTSCP)) + if (tsc_aux_uret_slot < 0) return 1; if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) @@ -2885,7 +2887,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0; break; case MSR_TSC_AUX: - if (!boot_cpu_has(X86_FEATURE_RDTSCP)) + if (tsc_aux_uret_slot < 0) return 1; if (!msr->host_initiated && @@ -2908,7 +2910,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) * guest via direct_access_msrs, and switch it via user return. */ preempt_disable(); - r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull); + r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull); preempt_enable(); if (r) return 1; -- cgit From 36fa06f9ff39f23e03cd8206dc6bbb7711c23be6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:26 -0700 Subject: KVM: x86: Add support for RDPID without RDTSCP Allow userspace to enable RDPID for a guest without also enabling RDTSCP. Aside from checking for RDPID support in the obvious flows, VMX also needs to set ENABLE_RDTSCP=1 when RDPID is exposed. For the record, there is no known scenario where enabling RDPID without RDTSCP is desirable. But, both AMD and Intel architectures allow for the condition, i.e. this is purely to make KVM more architecturally accurate. Fixes: 41cd02c6f7f6 ("kvm: x86: Expose RDPID in KVM_GET_SUPPORTED_CPUID") Cc: stable@vger.kernel.org Reported-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 6 ++++-- arch/x86/kvm/vmx/vmx.c | 27 +++++++++++++++++++++++---- arch/x86/kvm/x86.c | 3 ++- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 13e4dd128177..0ba0a00f8dc6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2669,7 +2669,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (tsc_aux_uret_slot < 0) return 1; if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) return 1; msr_info->data = svm->tsc_aux; break; @@ -2891,7 +2892,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) return 1; if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) return 1; /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 11ff9c3d95d5..b304e372aab3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1788,7 +1788,8 @@ static void setup_msrs(struct vcpu_vmx *vmx) if (update_transition_efer(vmx)) vmx_setup_uret_msr(vmx, MSR_EFER); - if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) + if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) || + guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID)) vmx_setup_uret_msr(vmx, MSR_TSC_AUX); vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL); @@ -1994,7 +1995,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_TSC_AUX: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) return 1; goto find_uret_msr; case MSR_IA32_DEBUGCTLMSR: @@ -2314,7 +2316,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_TSC_AUX: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) return 1; /* Check reserved bit, higher 32 bits should be zero */ if ((data >> 32) != 0) @@ -4368,7 +4371,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) xsaves_enabled, false); } - vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP); + /* + * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either + * feature is exposed to the guest. This creates a virtualization hole + * if both are supported in hardware but only one is exposed to the + * guest, but letting the guest execute RDTSCP or RDPID when either one + * is advertised is preferable to emulating the advertised instruction + * in KVM on #UD, and obviously better than incorrectly injecting #UD. + */ + if (cpu_has_vmx_rdtscp()) { + bool rdpid_or_rdtscp_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) || + guest_cpuid_has(vcpu, X86_FEATURE_RDPID); + + vmx_adjust_secondary_exec_control(vmx, &exec_control, + SECONDARY_EXEC_ENABLE_RDTSCP, + rdpid_or_rdtscp_enabled, false); + } vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID); vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd90c73c37b4..0856636efc44 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5941,7 +5941,8 @@ static void kvm_init_msr_list(void) continue; break; case MSR_TSC_AUX: - if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) + if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) && + !kvm_cpu_cap_has(X86_FEATURE_RDPID)) continue; break; case MSR_IA32_UMWAIT_CONTROL: -- cgit From b6194b94a2ca4affce5aab1bbf773a977ad73671 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:27 -0700 Subject: KVM: VMX: Configure list of user return MSRs at module init Configure the list of user return MSRs that are actually supported at module init instead of reprobing the list of possible MSRs every time a vCPU is created. Curating the list on a per-vCPU basis is pointless; KVM is completely hosed if the set of supported MSRs changes after module init, or if the set of MSRs differs per physical PCU. The per-vCPU lists also increase complexity (see __vmx_find_uret_msr()) and creates corner cases that _should_ be impossible, but theoretically exist in KVM, e.g. advertising RDTSCP to userspace without actually being able to virtualize RDTSCP if probing MSR_TSC_AUX fails. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 61 +++++++++++++++++++++++++++++++++----------------- arch/x86/kvm/vmx/vmx.h | 10 ++++++++- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b304e372aab3..887db1af1312 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -461,7 +461,7 @@ static unsigned long host_idt_base; * support this emulation, IA32_STAR must always be included in * vmx_uret_msrs_list[], even in i386 builds. */ -static const u32 vmx_uret_msrs_list[] = { +static u32 vmx_uret_msrs_list[] = { #ifdef CONFIG_X86_64 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, #endif @@ -469,6 +469,12 @@ static const u32 vmx_uret_msrs_list[] = { MSR_IA32_TSX_CTRL, }; +/* + * Number of user return MSRs that are actually supported in hardware. + * vmx_uret_msrs_list is modified when KVM is loaded to drop unsupported MSRs. + */ +static int vmx_nr_uret_msrs; + #if IS_ENABLED(CONFIG_HYPERV) static bool __read_mostly enlightened_vmcs = true; module_param(enlightened_vmcs, bool, 0444); @@ -700,9 +706,16 @@ static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) { int i; - for (i = 0; i < vmx->nr_uret_msrs; ++i) + /* + * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list, + * but is ordered differently. The MSR is matched against the list of + * supported uret MSRs using "slot", but the index that is returned is + * the index into guest_uret_msrs. + */ + for (i = 0; i < vmx_nr_uret_msrs; ++i) { if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr) return i; + } return -1; } @@ -6929,18 +6942,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) goto free_vpid; } - BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS); - - for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) { - u32 index = vmx_uret_msrs_list[i]; - int j = vmx->nr_uret_msrs; + for (i = 0; i < vmx_nr_uret_msrs; ++i) { + vmx->guest_uret_msrs[i].data = 0; - if (kvm_probe_user_return_msr(index)) - continue; - - vmx->guest_uret_msrs[j].slot = i; - vmx->guest_uret_msrs[j].data = 0; - switch (index) { + switch (vmx_uret_msrs_list[i]) { case MSR_IA32_TSX_CTRL: /* * TSX_CTRL_CPUID_CLEAR is handled in the CPUID @@ -6954,15 +6959,14 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) * host so that TSX remains always disabled. */ if (boot_cpu_has(X86_FEATURE_RTM)) - vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; + vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; else - vmx->guest_uret_msrs[j].mask = 0; + vmx->guest_uret_msrs[i].mask = 0; break; default: - vmx->guest_uret_msrs[j].mask = -1ull; + vmx->guest_uret_msrs[i].mask = -1ull; break; } - ++vmx->nr_uret_msrs; } err = alloc_loaded_vmcs(&vmx->vmcs01); @@ -7821,17 +7825,34 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, }; +static __init void vmx_setup_user_return_msrs(void) +{ + u32 msr; + int i; + + BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS); + + for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) { + msr = vmx_uret_msrs_list[i]; + + if (kvm_probe_user_return_msr(msr)) + continue; + + kvm_define_user_return_msr(vmx_nr_uret_msrs, msr); + vmx_uret_msrs_list[vmx_nr_uret_msrs++] = msr; + } +} + static __init int hardware_setup(void) { unsigned long host_bndcfgs; struct desc_ptr dt; - int r, i, ept_lpage_level; + int r, ept_lpage_level; store_idt(&dt); host_idt_base = dt.address; - for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) - kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]); + vmx_setup_user_return_msrs(); if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) return -EIO; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 008cb87ff088..d71ed8b425c5 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -245,8 +245,16 @@ struct vcpu_vmx { u32 idt_vectoring_info; ulong rflags; + /* + * User return MSRs are always emulated when enabled in the guest, but + * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside + * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to + * be loaded into hardware if those conditions aren't met. + * nr_active_uret_msrs tracks the number of MSRs that need to be loaded + * into hardware when running the guest. guest_uret_msrs[] is resorted + * whenever the number of "active" uret MSRs is modified. + */ struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS]; - int nr_uret_msrs; int nr_active_uret_msrs; bool guest_uret_msrs_loaded; #ifdef CONFIG_X86_64 -- cgit From ee9d22e08d1341692a43926e5e1d84c90a5dac1d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:28 -0700 Subject: KVM: VMX: Use flag to indicate "active" uret MSRs instead of sorting list Explicitly flag a uret MSR as needing to be loaded into hardware instead of resorting the list of "active" MSRs and tracking how many MSRs in total need to be loaded. The only benefit to sorting the list is that the loop to load MSRs during vmx_prepare_switch_to_guest() doesn't need to iterate over all supported uret MRS, only those that are active. But that is a pointless optimization, as the most common case, running a 64-bit guest, will load the vast majority of MSRs. Not to mention that a single WRMSR is far more expensive than iterating over the list. Providing a stable list order obviates the need to track a given MSR's "slot" in the per-CPU list of user return MSRs; all lists simply use the same ordering. Future patches will take advantage of the stable order to further simplify the related code. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 80 ++++++++++++++++++++++++++------------------------ arch/x86/kvm/vmx/vmx.h | 2 +- 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 887db1af1312..adefedac0e3b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -458,8 +458,9 @@ static unsigned long host_idt_base; * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm * will emulate SYSCALL in legacy mode if the vendor string in guest * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To - * support this emulation, IA32_STAR must always be included in - * vmx_uret_msrs_list[], even in i386 builds. + * support this emulation, MSR_STAR is included in the list for i386, + * but is never loaded into hardware. MSR_CSTAR is also never loaded + * into hardware and is here purely for emulation purposes. */ static u32 vmx_uret_msrs_list[] = { #ifdef CONFIG_X86_64 @@ -702,18 +703,12 @@ static bool is_valid_passthrough_msr(u32 msr) return r; } -static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) +static inline int __vmx_find_uret_msr(u32 msr) { int i; - /* - * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list, - * but is ordered differently. The MSR is matched against the list of - * supported uret MSRs using "slot", but the index that is returned is - * the index into guest_uret_msrs. - */ for (i = 0; i < vmx_nr_uret_msrs; ++i) { - if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr) + if (vmx_uret_msrs_list[i] == msr) return i; } return -1; @@ -723,7 +718,7 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) { int i; - i = __vmx_find_uret_msr(vmx, msr); + i = __vmx_find_uret_msr(msr); if (i >= 0) return &vmx->guest_uret_msrs[i]; return NULL; @@ -732,13 +727,14 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx, struct vmx_uret_msr *msr, u64 data) { + unsigned int slot = msr - vmx->guest_uret_msrs; int ret = 0; u64 old_msr_data = msr->data; msr->data = data; - if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) { + if (msr->load_into_hardware) { preempt_disable(); - ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask); + ret = kvm_set_user_return_msr(slot, msr->data, msr->mask); preempt_enable(); if (ret) msr->data = old_msr_data; @@ -1090,7 +1086,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx) return false; } - i = __vmx_find_uret_msr(vmx, MSR_EFER); + i = __vmx_find_uret_msr(MSR_EFER); if (i < 0) return false; @@ -1252,11 +1248,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) */ if (!vmx->guest_uret_msrs_loaded) { vmx->guest_uret_msrs_loaded = true; - for (i = 0; i < vmx->nr_active_uret_msrs; ++i) - kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot, + for (i = 0; i < vmx_nr_uret_msrs; ++i) { + if (!vmx->guest_uret_msrs[i].load_into_hardware) + continue; + + kvm_set_user_return_msr(i, vmx->guest_uret_msrs[i].data, vmx->guest_uret_msrs[i].mask); - + } } if (vmx->nested.need_vmcs12_to_shadow_sync) @@ -1763,19 +1762,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) vmx_clear_hlt(vcpu); } -static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr) +static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr, + bool load_into_hardware) { - struct vmx_uret_msr tmp; - int from, to; + struct vmx_uret_msr *uret_msr; - from = __vmx_find_uret_msr(vmx, msr); - if (from < 0) + uret_msr = vmx_find_uret_msr(vmx, msr); + if (!uret_msr) return; - to = vmx->nr_active_uret_msrs++; - tmp = vmx->guest_uret_msrs[to]; - vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from]; - vmx->guest_uret_msrs[from] = tmp; + uret_msr->load_into_hardware = load_into_hardware; } /* @@ -1785,30 +1781,36 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr) */ static void setup_msrs(struct vcpu_vmx *vmx) { - vmx->guest_uret_msrs_loaded = false; - vmx->nr_active_uret_msrs = 0; #ifdef CONFIG_X86_64 + bool load_syscall_msrs; + /* * The SYSCALL MSRs are only needed on long mode guests, and only * when EFER.SCE is set. */ - if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) { - vmx_setup_uret_msr(vmx, MSR_STAR); - vmx_setup_uret_msr(vmx, MSR_LSTAR); - vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK); - } + load_syscall_msrs = is_long_mode(&vmx->vcpu) && + (vmx->vcpu.arch.efer & EFER_SCE); + + vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs); + vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs); + vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs); #endif - if (update_transition_efer(vmx)) - vmx_setup_uret_msr(vmx, MSR_EFER); + vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx)); - if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) || - guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID)) - vmx_setup_uret_msr(vmx, MSR_TSC_AUX); + vmx_setup_uret_msr(vmx, MSR_TSC_AUX, + guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) || + guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID)); - vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL); + vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, true); if (cpu_has_vmx_msr_bitmap()) vmx_update_msr_bitmap(&vmx->vcpu); + + /* + * The set of MSRs to load may have changed, reload MSRs before the + * next VM-Enter. + */ + vmx->guest_uret_msrs_loaded = false; } static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index d71ed8b425c5..16e4e457ba23 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -36,7 +36,7 @@ struct vmx_msrs { }; struct vmx_uret_msr { - unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */ + bool load_into_hardware; u64 data; u64 mask; }; -- cgit From 8ea8b8d6f869425e21f34e60bdbe7e47e6c9d6b9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:29 -0700 Subject: KVM: VMX: Use common x86's uret MSR list as the one true list Drop VMX's global list of user return MSRs now that VMX doesn't resort said list to isolate "active" MSRs, i.e. now that VMX's list and x86's list have the same MSRs in the same order. In addition to eliminating the redundant list, this will also allow moving more of the list management into common x86. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-11-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/vmx.c | 97 +++++++++++++++++------------------------ arch/x86/kvm/x86.c | 12 +++++ 3 files changed, 53 insertions(+), 57 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e8dcbc632cf8..6b15f27f49d0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1777,6 +1777,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long icr, int op_64_bit); void kvm_define_user_return_msr(unsigned index, u32 msr); +int kvm_find_user_return_msr(u32 msr); int kvm_probe_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index adefedac0e3b..39506704be96 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -454,26 +454,7 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) static unsigned long host_idt_base; -/* - * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm - * will emulate SYSCALL in legacy mode if the vendor string in guest - * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To - * support this emulation, MSR_STAR is included in the list for i386, - * but is never loaded into hardware. MSR_CSTAR is also never loaded - * into hardware and is here purely for emulation purposes. - */ -static u32 vmx_uret_msrs_list[] = { -#ifdef CONFIG_X86_64 - MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, -#endif - MSR_EFER, MSR_TSC_AUX, MSR_STAR, - MSR_IA32_TSX_CTRL, -}; - -/* - * Number of user return MSRs that are actually supported in hardware. - * vmx_uret_msrs_list is modified when KVM is loaded to drop unsupported MSRs. - */ +/* Number of user return MSRs that are actually supported in hardware. */ static int vmx_nr_uret_msrs; #if IS_ENABLED(CONFIG_HYPERV) @@ -703,22 +684,11 @@ static bool is_valid_passthrough_msr(u32 msr) return r; } -static inline int __vmx_find_uret_msr(u32 msr) -{ - int i; - - for (i = 0; i < vmx_nr_uret_msrs; ++i) { - if (vmx_uret_msrs_list[i] == msr) - return i; - } - return -1; -} - struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) { int i; - i = __vmx_find_uret_msr(msr); + i = kvm_find_user_return_msr(msr); if (i >= 0) return &vmx->guest_uret_msrs[i]; return NULL; @@ -1086,7 +1056,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx) return false; } - i = __vmx_find_uret_msr(MSR_EFER); + i = kvm_find_user_return_msr(MSR_EFER); if (i < 0) return false; @@ -6922,6 +6892,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) static int vmx_create_vcpu(struct kvm_vcpu *vcpu) { + struct vmx_uret_msr *tsx_ctrl; struct vcpu_vmx *vmx; int i, cpu, err; @@ -6946,29 +6917,25 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) for (i = 0; i < vmx_nr_uret_msrs; ++i) { vmx->guest_uret_msrs[i].data = 0; - - switch (vmx_uret_msrs_list[i]) { - case MSR_IA32_TSX_CTRL: - /* - * TSX_CTRL_CPUID_CLEAR is handled in the CPUID - * interception. Keep the host value unchanged to avoid - * changing CPUID bits under the host kernel's feet. - * - * hle=0, rtm=0, tsx_ctrl=1 can be found with some - * combinations of new kernel and old userspace. If - * those guests run on a tsx=off host, do allow guests - * to use TSX_CTRL, but do not change the value on the - * host so that TSX remains always disabled. - */ - if (boot_cpu_has(X86_FEATURE_RTM)) - vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; - else - vmx->guest_uret_msrs[i].mask = 0; - break; - default: - vmx->guest_uret_msrs[i].mask = -1ull; - break; - } + vmx->guest_uret_msrs[i].mask = -1ull; + } + tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL); + if (tsx_ctrl) { + /* + * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception. + * Keep the host value unchanged to avoid changing CPUID bits + * under the host kernel's feet. + * + * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations + * of new kernel and old userspace. If those guests run on a + * tsx=off host, do allow guests to use TSX_CTRL, but do not + * change the value on the host so that TSX remains always + * disabled. + */ + if (boot_cpu_has(X86_FEATURE_RTM)) + vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; + else + vmx->guest_uret_msrs[i].mask = 0; } err = alloc_loaded_vmcs(&vmx->vmcs01); @@ -7829,6 +7796,22 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { static __init void vmx_setup_user_return_msrs(void) { + + /* + * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm + * will emulate SYSCALL in legacy mode if the vendor string in guest + * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To + * support this emulation, MSR_STAR is included in the list for i386, + * but is never loaded into hardware. MSR_CSTAR is also never loaded + * into hardware and is here purely for emulation purposes. + */ + const u32 vmx_uret_msrs_list[] = { + #ifdef CONFIG_X86_64 + MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, + #endif + MSR_EFER, MSR_TSC_AUX, MSR_STAR, + MSR_IA32_TSX_CTRL, + }; u32 msr; int i; @@ -7841,7 +7824,7 @@ static __init void vmx_setup_user_return_msrs(void) continue; kvm_define_user_return_msr(vmx_nr_uret_msrs, msr); - vmx_uret_msrs_list[vmx_nr_uret_msrs++] = msr; + vmx_nr_uret_msrs++; } } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0856636efc44..d514031ed25f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -364,6 +364,18 @@ void kvm_define_user_return_msr(unsigned slot, u32 msr) } EXPORT_SYMBOL_GPL(kvm_define_user_return_msr); +int kvm_find_user_return_msr(u32 msr) +{ + int i; + + for (i = 0; i < user_return_msrs_global.nr; ++i) { + if (user_return_msrs_global.msrs[i] == msr) + return i; + } + return -1; +} +EXPORT_SYMBOL_GPL(kvm_find_user_return_msr); + static void kvm_user_return_msr_cpu_online(void) { unsigned int cpu = smp_processor_id(); -- cgit From 5e17c624010a82bbcca9b955155781927eb6532a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:30 -0700 Subject: KVM: VMX: Disable loading of TSX_CTRL MSR the more conventional way Tag TSX_CTRL as not needing to be loaded when RTM isn't supported in the host. Crushing the write mask to '0' has the same effect, but requires more mental gymnastics to understand. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-12-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 39506704be96..bd2187b4cc53 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1771,7 +1771,13 @@ static void setup_msrs(struct vcpu_vmx *vmx) guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) || guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID)); - vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, true); + /* + * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new + * kernel and old userspace. If those guests run on a tsx=off host, do + * allow guests to use TSX_CTRL, but don't change the value in hardware + * so that TSX remains always disabled. + */ + vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM)); if (cpu_has_vmx_msr_bitmap()) vmx_update_msr_bitmap(&vmx->vcpu); @@ -6919,23 +6925,15 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) vmx->guest_uret_msrs[i].data = 0; vmx->guest_uret_msrs[i].mask = -1ull; } - tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL); - if (tsx_ctrl) { + if (boot_cpu_has(X86_FEATURE_RTM)) { /* * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception. * Keep the host value unchanged to avoid changing CPUID bits * under the host kernel's feet. - * - * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations - * of new kernel and old userspace. If those guests run on a - * tsx=off host, do allow guests to use TSX_CTRL, but do not - * change the value on the host so that TSX remains always - * disabled. */ - if (boot_cpu_has(X86_FEATURE_RTM)) + tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL); + if (tsx_ctrl) vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; - else - vmx->guest_uret_msrs[i].mask = 0; } err = alloc_loaded_vmcs(&vmx->vmcs01); -- cgit From 9cc39a5a43c05f8eda206bf9e144119820ecf5c8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:31 -0700 Subject: KVM: x86: Export the number of uret MSRs to vendor modules Split out and export the number of configured user return MSRs so that VMX can iterate over the set of MSRs without having to do its own tracking. Keep the list itself internal to x86 so that vendor code still has to go through the "official" APIs to add/modify entries. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-13-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 29 +++++++++++++---------------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6b15f27f49d0..22505e74c3da 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1418,6 +1418,7 @@ struct kvm_arch_async_pf { bool direct_map; }; +extern u32 __read_mostly kvm_nr_uret_msrs; extern u64 __read_mostly host_efer; extern bool __read_mostly allow_smaller_maxphyaddr; extern struct kvm_x86_ops kvm_x86_ops; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d514031ed25f..5e1deed8ea5d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -184,11 +184,6 @@ module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR); */ #define KVM_MAX_NR_USER_RETURN_MSRS 16 -struct kvm_user_return_msrs_global { - int nr; - u32 msrs[KVM_MAX_NR_USER_RETURN_MSRS]; -}; - struct kvm_user_return_msrs { struct user_return_notifier urn; bool registered; @@ -198,7 +193,9 @@ struct kvm_user_return_msrs { } values[KVM_MAX_NR_USER_RETURN_MSRS]; }; -static struct kvm_user_return_msrs_global __read_mostly user_return_msrs_global; +u32 __read_mostly kvm_nr_uret_msrs; +EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs); +static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS]; static struct kvm_user_return_msrs __percpu *user_return_msrs; #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ @@ -330,10 +327,10 @@ static void kvm_on_user_return(struct user_return_notifier *urn) user_return_notifier_unregister(urn); } local_irq_restore(flags); - for (slot = 0; slot < user_return_msrs_global.nr; ++slot) { + for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) { values = &msrs->values[slot]; if (values->host != values->curr) { - wrmsrl(user_return_msrs_global.msrs[slot], values->host); + wrmsrl(kvm_uret_msrs_list[slot], values->host); values->curr = values->host; } } @@ -358,9 +355,9 @@ EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr); void kvm_define_user_return_msr(unsigned slot, u32 msr) { BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS); - user_return_msrs_global.msrs[slot] = msr; - if (slot >= user_return_msrs_global.nr) - user_return_msrs_global.nr = slot + 1; + kvm_uret_msrs_list[slot] = msr; + if (slot >= kvm_nr_uret_msrs) + kvm_nr_uret_msrs = slot + 1; } EXPORT_SYMBOL_GPL(kvm_define_user_return_msr); @@ -368,8 +365,8 @@ int kvm_find_user_return_msr(u32 msr) { int i; - for (i = 0; i < user_return_msrs_global.nr; ++i) { - if (user_return_msrs_global.msrs[i] == msr) + for (i = 0; i < kvm_nr_uret_msrs; ++i) { + if (kvm_uret_msrs_list[i] == msr) return i; } return -1; @@ -383,8 +380,8 @@ static void kvm_user_return_msr_cpu_online(void) u64 value; int i; - for (i = 0; i < user_return_msrs_global.nr; ++i) { - rdmsrl_safe(user_return_msrs_global.msrs[i], &value); + for (i = 0; i < kvm_nr_uret_msrs; ++i) { + rdmsrl_safe(kvm_uret_msrs_list[i], &value); msrs->values[i].host = value; msrs->values[i].curr = value; } @@ -399,7 +396,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) value = (value & mask) | (msrs->values[slot].host & ~mask); if (value == msrs->values[slot].curr) return 0; - err = wrmsrl_safe(user_return_msrs_global.msrs[slot], value); + err = wrmsrl_safe(kvm_uret_msrs_list[slot], value); if (err) return 1; -- cgit From e5fda4bbadb053e3b5164476146cf43092785c0b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:32 -0700 Subject: KVM: x86: Move uret MSR slot management to common x86 Now that SVM and VMX both probe MSRs before "defining" user return slots for them, consolidate the code for probe+define into common x86 and eliminate the odd behavior of having the vendor code define the slot for a given MSR. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-14-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +-- arch/x86/kvm/svm/svm.c | 5 +---- arch/x86/kvm/vmx/vmx.c | 19 ++++--------------- arch/x86/kvm/x86.c | 19 +++++++++++-------- 4 files changed, 17 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 22505e74c3da..8155eaac22a7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1777,9 +1777,8 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit); -void kvm_define_user_return_msr(unsigned index, u32 msr); +int kvm_add_user_return_msr(u32 msr); int kvm_find_user_return_msr(u32 msr); -int kvm_probe_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0ba0a00f8dc6..d13b72bfb736 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -959,10 +959,7 @@ static __init int svm_hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 32; } - if (!kvm_probe_user_return_msr(MSR_TSC_AUX)) { - tsc_aux_uret_slot = 0; - kvm_define_user_return_msr(tsc_aux_uret_slot, MSR_TSC_AUX); - } + tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX); /* Check for pause filtering support */ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index bd2187b4cc53..042823b49273 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -454,9 +454,6 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) static unsigned long host_idt_base; -/* Number of user return MSRs that are actually supported in hardware. */ -static int vmx_nr_uret_msrs; - #if IS_ENABLED(CONFIG_HYPERV) static bool __read_mostly enlightened_vmcs = true; module_param(enlightened_vmcs, bool, 0444); @@ -1218,7 +1215,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) */ if (!vmx->guest_uret_msrs_loaded) { vmx->guest_uret_msrs_loaded = true; - for (i = 0; i < vmx_nr_uret_msrs; ++i) { + for (i = 0; i < kvm_nr_uret_msrs; ++i) { if (!vmx->guest_uret_msrs[i].load_into_hardware) continue; @@ -6921,7 +6918,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) goto free_vpid; } - for (i = 0; i < vmx_nr_uret_msrs; ++i) { + for (i = 0; i < kvm_nr_uret_msrs; ++i) { vmx->guest_uret_msrs[i].data = 0; vmx->guest_uret_msrs[i].mask = -1ull; } @@ -7810,20 +7807,12 @@ static __init void vmx_setup_user_return_msrs(void) MSR_EFER, MSR_TSC_AUX, MSR_STAR, MSR_IA32_TSX_CTRL, }; - u32 msr; int i; BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS); - for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) { - msr = vmx_uret_msrs_list[i]; - - if (kvm_probe_user_return_msr(msr)) - continue; - - kvm_define_user_return_msr(vmx_nr_uret_msrs, msr); - vmx_nr_uret_msrs++; - } + for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) + kvm_add_user_return_msr(vmx_uret_msrs_list[i]); } static __init int hardware_setup(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5e1deed8ea5d..0a0eebf35dd5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -336,7 +336,7 @@ static void kvm_on_user_return(struct user_return_notifier *urn) } } -int kvm_probe_user_return_msr(u32 msr) +static int kvm_probe_user_return_msr(u32 msr) { u64 val; int ret; @@ -350,16 +350,18 @@ out: preempt_enable(); return ret; } -EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr); -void kvm_define_user_return_msr(unsigned slot, u32 msr) +int kvm_add_user_return_msr(u32 msr) { - BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS); - kvm_uret_msrs_list[slot] = msr; - if (slot >= kvm_nr_uret_msrs) - kvm_nr_uret_msrs = slot + 1; + BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS); + + if (kvm_probe_user_return_msr(msr)) + return -1; + + kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr; + return kvm_nr_uret_msrs++; } -EXPORT_SYMBOL_GPL(kvm_define_user_return_msr); +EXPORT_SYMBOL_GPL(kvm_add_user_return_msr); int kvm_find_user_return_msr(u32 msr) { @@ -8132,6 +8134,7 @@ int kvm_arch_init(void *opaque) printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n"); goto out_free_x86_emulator_cache; } + kvm_nr_uret_msrs = 0; r = kvm_mmu_module_init(); if (r) -- cgit From 61a05d444d2ca8d40add453a5f7058fbb1b57eca Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:33 -0700 Subject: KVM: x86: Tie Intel and AMD behavior for MSR_TSC_AUX to guest CPU model Squish the Intel and AMD emulation of MSR_TSC_AUX together and tie it to the guest CPU model instead of the host CPU behavior. While not strictly necessary to avoid guest breakage, emulating cross-vendor "architecture" will provide consistent behavior for the guest, e.g. WRMSR fault behavior won't change if the vCPU is migrated to a host with divergent behavior. Note, the "new" kvm_is_supported_user_return_msr() checks do not add new functionality on either SVM or VMX. On SVM, the equivalent was "tsc_aux_uret_slot < 0", and on VMX the check was buried in the vmx_find_uret_msr() call at the find_uret_msr label. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-15-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 5 +++++ arch/x86/kvm/svm/svm.c | 24 ------------------------ arch/x86/kvm/vmx/vmx.c | 15 --------------- arch/x86/kvm/x86.c | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 39 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8155eaac22a7..f85480b1d215 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1781,6 +1781,11 @@ int kvm_add_user_return_msr(u32 msr); int kvm_find_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); +static inline bool kvm_is_supported_user_return_msr(u32 msr) +{ + return kvm_find_user_return_msr(msr) >= 0; +} + u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d13b72bfb736..0922d8e173e6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2663,12 +2663,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data |= (u64)svm->sysenter_esp_hi << 32; break; case MSR_TSC_AUX: - if (tsc_aux_uret_slot < 0) - return 1; - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && - !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) - return 1; msr_info->data = svm->tsc_aux; break; /* @@ -2885,24 +2879,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0; break; case MSR_TSC_AUX: - if (tsc_aux_uret_slot < 0) - return 1; - - if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && - !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) - return 1; - - /* - * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has - * incomplete and conflicting architectural behavior. Current - * AMD CPUs completely ignore bits 63:32, i.e. they aren't - * reserved and always read as zeros. Emulate AMD CPU behavior - * to avoid explosions if the vCPU is migrated from an AMD host - * to an Intel host. - */ - data = (u32)data; - /* * TSC_AUX is usually changed only during boot and never read * directly. Intercept TSC_AUX instead of exposing it to the diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 042823b49273..61f7e5221bf3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1981,12 +1981,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) else msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; break; - case MSR_TSC_AUX: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && - !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) - return 1; - goto find_uret_msr; case MSR_IA32_DEBUGCTLMSR: msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL); break; @@ -2302,15 +2296,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) else vmx->pt_desc.guest.addr_a[index / 2] = data; break; - case MSR_TSC_AUX: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && - !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) - return 1; - /* Check reserved bit, higher 32 bits should be zero */ - if ((data >> 32) != 0) - return 1; - goto find_uret_msr; case MSR_IA32_PERF_CAPABILITIES: if (data && !vcpu_to_pmu(vcpu)->version) return 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0a0eebf35dd5..4efd2978ec08 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1642,6 +1642,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, * invokes 64-bit SYSENTER. */ data = get_canonical(data, vcpu_virt_addr_bits(vcpu)); + break; + case MSR_TSC_AUX: + if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) + return 1; + + if (!host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) + return 1; + + /* + * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has + * incomplete and conflicting architectural behavior. Current + * AMD CPUs completely ignore bits 63:32, i.e. they aren't + * reserved and always read as zeros. Enforce Intel's reserved + * bits check if and only if the guest CPU is Intel, and clear + * the bits in all other cases. This ensures cross-vendor + * migration will provide consistent behavior for the guest. + */ + if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0) + return 1; + + data = (u32)data; + break; } msr.data = data; @@ -1678,6 +1702,18 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) return KVM_MSR_RET_FILTERED; + switch (index) { + case MSR_TSC_AUX: + if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) + return 1; + + if (!host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) + return 1; + break; + } + msr.index = index; msr.host_initiated = host_initiated; -- cgit From 78bba966ee3cdbbfc585d8e39237378fba50a142 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:34 -0700 Subject: KVM: x86: Hide RDTSCP and RDPID if MSR_TSC_AUX probing failed If probing MSR_TSC_AUX failed, hide RDTSCP and RDPID, and WARN if either feature was reported as supported. In theory, such a scenario should never happen as both Intel and AMD state that MSR_TSC_AUX is available if RDTSCP or RDPID is supported. But, KVM injects #GP on MSR_TSC_AUX accesses if probing failed, faults on WRMSR(MSR_TSC_AUX) may be fatal to the guest (because they happen during early CPU bringup), and KVM itself has effectively misreported RDPID support in the past. Note, this also has the happy side effect of omitting MSR_TSC_AUX from the list of MSRs that are exposed to userspace if probing the MSR fails. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-16-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c0e8c5e92189..f42e9491a6c8 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -567,6 +567,21 @@ void kvm_set_cpu_caps(void) F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | F(PMM) | F(PMM_EN) ); + + /* + * Hide RDTSCP and RDPID if either feature is reported as supported but + * probing MSR_TSC_AUX failed. This is purely a sanity check and + * should never happen, but the guest will likely crash if RDTSCP or + * RDPID is misreported, and KVM has botched MSR_TSC_AUX emulation in + * the past. For example, the sanity check may fire if this instance of + * KVM is running as L1 on top of an older, broken KVM. + */ + if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) || + kvm_cpu_cap_has(X86_FEATURE_RDPID)) && + !kvm_is_supported_user_return_msr(MSR_TSC_AUX))) { + kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); + kvm_cpu_cap_clear(X86_FEATURE_RDPID); + } } EXPORT_SYMBOL_GPL(kvm_set_cpu_caps); -- cgit From 34114136f725cbd0c83e7b5a0c8a977976cd82f7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 5 May 2021 22:15:09 +1000 Subject: KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier callbacks Commit b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier callbacks") causes unmap_gfn_range and age_gfn callbacks to only work on the first gfn in the range. It also makes the aging callbacks call into both radix and hash aging functions for radix guests. Fix this. Add warnings for the single-gfn calls that have been converted to range callbacks, in case they ever receieve ranges greater than 1. Fixes: b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier callbacks") Reported-by: Bharata B Rao Tested-by: Bharata B Rao Signed-off-by: Nicholas Piggin Message-Id: <20210505121509.1470207-1-npiggin@gmail.com> Signed-off-by: Paolo Bonzini --- arch/powerpc/include/asm/kvm_book3s.h | 2 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 46 ++++++++++++++++++++++++---------- arch/powerpc/kvm/book3s_64_mmu_radix.c | 5 ++-- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index a6e9a5585e61..e6b53c6e21e3 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -210,7 +210,7 @@ extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid); extern int kvmppc_radix_init(void); extern void kvmppc_radix_exit(void); -extern bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, +extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); extern bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index b7bd9ca040b8..2d9193cd73be 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -795,7 +795,7 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i, } } -static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, +static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { unsigned long i; @@ -829,15 +829,21 @@ static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, unlock_rmap(rmapp); __unlock_hpte(hptep, be64_to_cpu(hptep[0])); } - return false; } bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range) { - if (kvm_is_radix(kvm)) - return kvm_unmap_radix(kvm, range->slot, range->start); + gfn_t gfn; + + if (kvm_is_radix(kvm)) { + for (gfn = range->start; gfn < range->end; gfn++) + kvm_unmap_radix(kvm, range->slot, gfn); + } else { + for (gfn = range->start; gfn < range->end; gfn++) + kvm_unmap_rmapp(kvm, range->slot, range->start); + } - return kvm_unmap_rmapp(kvm, range->slot, range->start); + return false; } void kvmppc_core_flush_memslot_hv(struct kvm *kvm, @@ -924,10 +930,18 @@ static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) { - if (kvm_is_radix(kvm)) - kvm_age_radix(kvm, range->slot, range->start); + gfn_t gfn; + bool ret = false; - return kvm_age_rmapp(kvm, range->slot, range->start); + if (kvm_is_radix(kvm)) { + for (gfn = range->start; gfn < range->end; gfn++) + ret |= kvm_age_radix(kvm, range->slot, gfn); + } else { + for (gfn = range->start; gfn < range->end; gfn++) + ret |= kvm_age_rmapp(kvm, range->slot, gfn); + } + + return ret; } static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -965,18 +979,24 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) { - if (kvm_is_radix(kvm)) - kvm_test_age_radix(kvm, range->slot, range->start); + WARN_ON(range->start + 1 != range->end); - return kvm_test_age_rmapp(kvm, range->slot, range->start); + if (kvm_is_radix(kvm)) + return kvm_test_age_radix(kvm, range->slot, range->start); + else + return kvm_test_age_rmapp(kvm, range->slot, range->start); } bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) { + WARN_ON(range->start + 1 != range->end); + if (kvm_is_radix(kvm)) - return kvm_unmap_radix(kvm, range->slot, range->start); + kvm_unmap_radix(kvm, range->slot, range->start); + else + kvm_unmap_rmapp(kvm, range->slot, range->start); - return kvm_unmap_rmapp(kvm, range->slot, range->start); + return false; } static int vcpus_running(struct kvm *kvm) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index ec4f58fa9f5a..d909c069363e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -993,7 +993,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu, } /* Called with kvm->mmu_lock held */ -bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, +void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { pte_t *ptep; @@ -1002,14 +1002,13 @@ bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) { uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT); - return false; + return; } ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); - return false; } /* Called with kvm->mmu_lock held */ -- cgit From e8ea85fb280ec55674bca88ea7cd85f60d19567f Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Tue, 2 Feb 2021 17:04:32 +0800 Subject: KVM: X86: Add support for the emulation of DR6_BUS_LOCK bit Bus lock debug exception introduces a new bit DR6_BUS_LOCK (bit 11 of DR6) to indicate that bus lock #DB exception is generated. The set/clear of DR6_BUS_LOCK is similar to the DR6_RTM. The processor clears DR6_BUS_LOCK when the exception is generated. For all other #DB, the processor sets this bit to 1. Software #DB handler should set this bit before returning to the interrupted task. In VMM, to avoid breaking the CPUs without bus lock #DB exception support, activate the DR6_BUS_LOCK conditionally in DR6_FIXED_1 bits. When intercepting the #DB exception caused by bus locks, bit 11 of the exit qualification is set to identify it. The VMM should emulate the exception by clearing the bit 11 of the guest DR6. Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Signed-off-by: Chenyi Qiang Message-Id: <20210202090433.13441-3-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/x86.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f85480b1d215..8836b3096217 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -200,6 +200,7 @@ enum x86_intercept_stage; #define KVM_NR_DB_REGS 4 +#define DR6_BUS_LOCK (1 << 11) #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) #define DR6_BT (1 << 15) @@ -213,7 +214,7 @@ enum x86_intercept_stage; * DR6_ACTIVE_LOW is also used as the init/reset value for DR6. */ #define DR6_ACTIVE_LOW 0xffff0ff0 -#define DR6_VOLATILE 0x0001e00f +#define DR6_VOLATILE 0x0001e80f #define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE) #define DR7_BP_EN_MASK 0x000000ff diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4efd2978ec08..9b1a7040eae3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1176,6 +1176,9 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM)) fixed |= DR6_RTM; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + fixed |= DR6_BUS_LOCK; return fixed; } -- cgit From 76ea438b4afcd9ee8da3387e9af4625eaccff58f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 6 May 2021 06:30:04 -0400 Subject: KVM: X86: Expose bus lock debug exception to guest Bus lock debug exception is an ability to notify the kernel by an #DB trap after the instruction acquires a bus lock and is executed when CPL>0. This allows the kernel to enforce user application throttling or mitigations. Existence of bus lock debug exception is enumerated via CPUID.(EAX=7,ECX=0).ECX[24]. Software can enable these exceptions by setting bit 2 of the MSR_IA32_DEBUGCTL. Expose the CPUID to guest and emulate the MSR handling when guest enables it. Support for this feature was originally developed by Xiaoyao Li and Chenyi Qiang, but code has since changed enough that this patch has nothing in common with theirs, except for this commit message. Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Signed-off-by: Chenyi Qiang Message-Id: <20210202090433.13441-4-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/vmx/capabilities.h | 3 +++ arch/x86/kvm/vmx/vmx.c | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index f42e9491a6c8..9a48f138832d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -458,7 +458,7 @@ void kvm_set_cpu_caps(void) F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ | - F(SGX_LC) + F(SGX_LC) | F(BUS_LOCK_DETECT) ); /* Set LA57 based on hardware capability. */ if (cpuid_ecx(7) & F(LA57)) diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index d1d77985e889..8dee8a5fbc17 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -398,6 +398,9 @@ static inline u64 vmx_supported_debugctl(void) { u64 debugctl = 0; + if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; + if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) debugctl |= DEBUGCTLMSR_LBR_MASK; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 61f7e5221bf3..f2fd447eed45 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2014,6 +2014,9 @@ static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu) if (!intel_pmu_lbr_is_enabled(vcpu)) debugctl &= ~DEBUGCTLMSR_LBR_MASK; + if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; + return debugctl; } -- cgit From 03ca4589fabcc66b27e4cb8f8e95d64cf43badd0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 May 2021 13:42:21 -0700 Subject: KVM: x86: Prevent KVM SVM from loading on kernels with 5-level paging Disallow loading KVM SVM if 5-level paging is supported. In theory, NPT for L1 should simply work, but there unknowns with respect to how the guest's MAXPHYADDR will be handled by hardware. Nested NPT is more problematic, as running an L1 VMM that is using 2-level page tables requires stacking single-entry PDP and PML4 tables in KVM's NPT for L2, as there are no equivalent entries in L1's NPT to shadow. Barring hardware magic, for 5-level paging, KVM would need stack another layer to handle PML5. Opportunistically rename the lm_root pointer, which is used for the aforementioned stacking when shadowing 2-level L1 NPT, to pml4_root to call out that it's specifically for PML4. Suggested-by: Paolo Bonzini Signed-off-by: Sean Christopherson Message-Id: <20210505204221.1934471-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu/mmu.c | 20 ++++++++++---------- arch/x86/kvm/svm/svm.c | 5 +++++ 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8836b3096217..55efbacfc244 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -409,7 +409,7 @@ struct kvm_mmu { u32 pkru_mask; u64 *pae_root; - u64 *lm_root; + u64 *pml4_root; /* * check zero bits on shadow page table entries, these diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4b3ee244ebe0..0144c40d09c7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3310,12 +3310,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) { pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; - if (WARN_ON_ONCE(!mmu->lm_root)) { + if (WARN_ON_ONCE(!mmu->pml4_root)) { r = -EIO; goto out_unlock; } - mmu->lm_root[0] = __pa(mmu->pae_root) | pm_mask; + mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask; } for (i = 0; i < 4; ++i) { @@ -3335,7 +3335,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) } if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) - mmu->root_hpa = __pa(mmu->lm_root); + mmu->root_hpa = __pa(mmu->pml4_root); else mmu->root_hpa = __pa(mmu->pae_root); @@ -3350,7 +3350,7 @@ out_unlock: static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; - u64 *lm_root, *pae_root; + u64 *pml4_root, *pae_root; /* * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP @@ -3369,14 +3369,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL)) return -EIO; - if (mmu->pae_root && mmu->lm_root) + if (mmu->pae_root && mmu->pml4_root) return 0; /* * The special roots should always be allocated in concert. Yell and * bail if KVM ends up in a state where only one of the roots is valid. */ - if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->lm_root)) + if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root)) return -EIO; /* @@ -3387,14 +3387,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) if (!pae_root) return -ENOMEM; - lm_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); - if (!lm_root) { + pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); + if (!pml4_root) { free_page((unsigned long)pae_root); return -ENOMEM; } mmu->pae_root = pae_root; - mmu->lm_root = lm_root; + mmu->pml4_root = pml4_root; return 0; } @@ -5261,7 +5261,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu) if (!tdp_enabled && mmu->pae_root) set_memory_encrypted((unsigned long)mmu->pae_root, 1); free_page((unsigned long)mmu->pae_root); - free_page((unsigned long)mmu->lm_root); + free_page((unsigned long)mmu->pml4_root); } static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0922d8e173e6..8124f51e9488 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -447,6 +447,11 @@ static int has_svm(void) return 0; } + if (pgtable_l5_enabled()) { + pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n"); + return 0; + } + return 1; } -- cgit From 594b27e677b35f9734b1969d175ebc6146741109 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 5 May 2021 23:48:17 +0200 Subject: KVM: x86: Cancel pvclock_gtod_work on module removal Nothing prevents the following: pvclock_gtod_notify() queue_work(system_long_wq, &pvclock_gtod_work); ... remove_module(kvm); ... work_queue_run() pvclock_gtod_work() <- UAF Ditto for any other operation on that workqueue list head which touches pvclock_gtod_work after module removal. Cancel the work in kvm_arch_exit() to prevent that. Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes") Signed-off-by: Thomas Gleixner Message-Id: <87czu4onry.ffs@nanos.tec.linutronix.de> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9b1a7040eae3..259139a145cb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8224,6 +8224,7 @@ void kvm_arch_exit(void) cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); #ifdef CONFIG_X86_64 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); + cancel_work_sync(&pvclock_gtod_work); #endif kvm_x86_ops.hardware_enable = NULL; kvm_mmu_module_exit(); -- cgit From 3f804f6d201ca93adf4c3df04d1bfd152c1129d6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 6 May 2021 15:21:37 +0200 Subject: KVM: x86: Prevent deadlock against tk_core.seq syzbot reported a possible deadlock in pvclock_gtod_notify(): CPU 0 CPU 1 write_seqcount_begin(&tk_core.seq); pvclock_gtod_notify() spin_lock(&pool->lock); queue_work(..., &pvclock_gtod_work) ktime_get() spin_lock(&pool->lock); do { seq = read_seqcount_begin(tk_core.seq) ... } while (read_seqcount_retry(&tk_core.seq, seq); While this is unlikely to happen, it's possible. Delegate queue_work() to irq_work() which postpones it until the tk_core.seq write held region is left and interrupts are reenabled. Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes") Reported-by: syzbot+6beae4000559d41d80f8@syzkaller.appspotmail.com Signed-off-by: Thomas Gleixner Message-Id: <87h7jgm1zy.ffs@nanos.tec.linutronix.de> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 259139a145cb..5bd550eaf683 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8094,6 +8094,18 @@ static void pvclock_gtod_update_fn(struct work_struct *work) static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); +/* + * Indirection to move queue_work() out of the tk_core.seq write held + * region to prevent possible deadlocks against time accessors which + * are invoked with work related locks held. + */ +static void pvclock_irq_work_fn(struct irq_work *w) +{ + queue_work(system_long_wq, &pvclock_gtod_work); +} + +static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn); + /* * Notification about pvclock gtod data update. */ @@ -8105,13 +8117,14 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused, update_pvclock_gtod(tk); - /* disable master clock if host does not trust, or does not - * use, TSC based clocksource. + /* + * Disable master clock if host does not trust, or does not use, + * TSC based clocksource. Delegate queue_work() to irq_work as + * this is invoked with tk_core.seq write held. */ if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) && atomic_read(&kvm_guest_has_master_clock) != 0) - queue_work(system_long_wq, &pvclock_gtod_work); - + irq_work_queue(&pvclock_irq_work); return 0; } @@ -8224,6 +8237,7 @@ void kvm_arch_exit(void) cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); #ifdef CONFIG_X86_64 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); + irq_work_sync(&pvclock_irq_work); cancel_work_sync(&pvclock_gtod_work); #endif kvm_x86_ops.hardware_enable = NULL; -- cgit From b26990987ffce0525abbd84b36595869cfdbbfe6 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Thu, 6 May 2021 16:03:52 +0200 Subject: tools/kvm_stat: Fix documentation typo Makes the dash in front of option '-z' disappear in the generated man-page. Signed-off-by: Stefan Raspl Message-Id: <20210506140352.4178789-1-raspl@linux.ibm.com> Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index feaf46451e83..3a9f2037bd23 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -111,7 +111,7 @@ OPTIONS --tracepoints:: retrieve statistics from tracepoints -*z*:: +-z:: --skip-zero-records:: omit records with all zeros in logging mode -- cgit From 258785ef08b323bddd844b4926a32c2b2045a1b0 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Thu, 6 May 2021 15:24:43 +0000 Subject: kvm: Cap halt polling at kvm->max_halt_poll_ns When growing halt-polling, there is no check that the poll time exceeds the per-VM limit. It's possible for vcpu->halt_poll_ns to grow past kvm->max_halt_poll_ns and stay there until a halt which takes longer than kvm->halt_poll_ns. Signed-off-by: David Matlack Signed-off-by: Venkatesh Srinivas Message-Id: <20210506152442.4010298-1-venkateshs@chromium.org> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b9f12da6af0e..6b4feb92dc79 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2893,8 +2893,8 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) if (val < grow_start) val = grow_start; - if (val > halt_poll_ns) - val = halt_poll_ns; + if (val > vcpu->kvm->max_halt_poll_ns) + val = vcpu->kvm->max_halt_poll_ns; vcpu->halt_poll_ns = val; out: -- cgit From 368340a3c7d9a207bfe544721d464b7109be8eae Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 May 2021 16:15:42 -0700 Subject: KVM: SVM: Invert user pointer casting in SEV {en,de}crypt helpers Invert the user pointer params for SEV's helpers for encrypting and decrypting guest memory so that they take a pointer and cast to an unsigned long as necessary, as opposed to doing the opposite. Tagging a non-pointer as __user is confusing and weird since a cast of some form needs to occur to actually access the user data. This also fixes Sparse warnings triggered by directly consuming the unsigned longs, which are "noderef" due to the __user tag. Cc: Brijesh Singh Cc: Tom Lendacky Cc: Ashish Kalra Signed-off-by: Sean Christopherson Message-Id: <20210506231542.2331138-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 8b11c711a0e4..eb241c1a4add 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -763,7 +763,7 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, } static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, - unsigned long __user dst_uaddr, + void __user *dst_uaddr, unsigned long dst_paddr, int size, int *err) { @@ -787,8 +787,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, if (tpage) { offset = paddr & 15; - if (copy_to_user((void __user *)(uintptr_t)dst_uaddr, - page_address(tpage) + offset, size)) + if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size)) ret = -EFAULT; } @@ -800,9 +799,9 @@ e_free: } static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, - unsigned long __user vaddr, + void __user *vaddr, unsigned long dst_paddr, - unsigned long __user dst_vaddr, + void __user *dst_vaddr, int size, int *error) { struct page *src_tpage = NULL; @@ -810,13 +809,12 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, int ret, len = size; /* If source buffer is not aligned then use an intermediate buffer */ - if (!IS_ALIGNED(vaddr, 16)) { + if (!IS_ALIGNED((unsigned long)vaddr, 16)) { src_tpage = alloc_page(GFP_KERNEL); if (!src_tpage) return -ENOMEM; - if (copy_from_user(page_address(src_tpage), - (void __user *)(uintptr_t)vaddr, size)) { + if (copy_from_user(page_address(src_tpage), vaddr, size)) { __free_page(src_tpage); return -EFAULT; } @@ -830,7 +828,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, * - copy the source buffer in an intermediate buffer * - use the intermediate buffer as source buffer */ - if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { + if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { int dst_offset; dst_tpage = alloc_page(GFP_KERNEL); @@ -855,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, page_address(src_tpage), size); else { if (copy_from_user(page_address(dst_tpage) + dst_offset, - (void __user *)(uintptr_t)vaddr, size)) { + vaddr, size)) { ret = -EFAULT; goto e_free; } @@ -935,15 +933,15 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) if (dec) ret = __sev_dbg_decrypt_user(kvm, __sme_page_pa(src_p[0]) + s_off, - dst_vaddr, + (void __user *)dst_vaddr, __sme_page_pa(dst_p[0]) + d_off, len, &argp->error); else ret = __sev_dbg_encrypt_user(kvm, __sme_page_pa(src_p[0]) + s_off, - vaddr, + (void __user *)vaddr, __sme_page_pa(dst_p[0]) + d_off, - dst_vaddr, + (void __user *)dst_vaddr, len, &argp->error); sev_unpin_memory(kvm, src_p, n); -- cgit From ce7ea0cfdc2e9ff31d12da31c3226deddb9644f5 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 6 May 2021 15:14:41 -0500 Subject: KVM: SVM: Move GHCB unmapping to fix RCU warning When an SEV-ES guest is running, the GHCB is unmapped as part of the vCPU run support. However, kvm_vcpu_unmap() triggers an RCU dereference warning with CONFIG_PROVE_LOCKING=y because the SRCU lock is released before invoking the vCPU run support. Move the GHCB unmapping into the prepare_guest_switch callback, which is invoked while still holding the SRCU lock, eliminating the RCU dereference warning. Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT") Reported-by: Borislav Petkov Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 5 +---- arch/x86/kvm/svm/svm.c | 3 +++ arch/x86/kvm/svm/svm.h | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index eb241c1a4add..5bc887e9a986 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2197,7 +2197,7 @@ vmgexit_err: return -EINVAL; } -static void pre_sev_es_run(struct vcpu_svm *svm) +void sev_es_unmap_ghcb(struct vcpu_svm *svm) { if (!svm->ghcb) return; @@ -2233,9 +2233,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int asid = sev_get_asid(svm->vcpu.kvm); - /* Perform any SEV-ES pre-run actions */ - pre_sev_es_run(svm); - /* Assign the asid allocated with this SEV guest */ svm->asid = asid; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8124f51e9488..4dd9b7856e5b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1437,6 +1437,9 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + if (sev_es_guest(vcpu->kvm)) + sev_es_unmap_ghcb(svm); + if (svm->guest_state_loaded) return; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 84b3133c2251..e44567ceb865 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -581,6 +581,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_create_vcpu(struct vcpu_svm *svm); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu); +void sev_es_unmap_ghcb(struct vcpu_svm *svm); /* vmenter.S */ -- cgit From 698ab77aebffe08b312fbcdddeb0e8bd08b78717 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 28 Apr 2021 15:03:12 -0400 Subject: dax: Add an enum for specifying dax wakup mode Dan mentioned that he is not very fond of passing around a boolean true/false to specify if only next waiter should be woken up or all waiters should be woken up. He instead prefers that we introduce an enum and make it very explicity at the callsite itself. Easier to read code. This patch should not introduce any change of behavior. Reviewed-by: Greg Kurz Reviewed-by: Jan Kara Suggested-by: Dan Williams Signed-off-by: Vivek Goyal Link: https://lore.kernel.org/r/20210428190314.1865312-2-vgoyal@redhat.com Signed-off-by: Dan Williams --- fs/dax.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index b3d27fdc6775..5ecee51c44ee 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -144,6 +144,16 @@ struct wait_exceptional_entry_queue { struct exceptional_entry_key key; }; +/** + * enum dax_wake_mode: waitqueue wakeup behaviour + * @WAKE_ALL: wake all waiters in the waitqueue + * @WAKE_NEXT: wake only the first waiter in the waitqueue + */ +enum dax_wake_mode { + WAKE_ALL, + WAKE_NEXT, +}; + static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas, void *entry, struct exceptional_entry_key *key) { @@ -182,7 +192,8 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait, * The important information it's conveying is whether the entry at * this index used to be a PMD entry. */ -static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all) +static void dax_wake_entry(struct xa_state *xas, void *entry, + enum dax_wake_mode mode) { struct exceptional_entry_key key; wait_queue_head_t *wq; @@ -196,7 +207,7 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all) * must be in the waitqueue and the following check will see them. */ if (waitqueue_active(wq)) - __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); + __wake_up(wq, TASK_NORMAL, mode == WAKE_ALL ? 0 : 1, &key); } /* @@ -268,7 +279,7 @@ static void put_unlocked_entry(struct xa_state *xas, void *entry) { /* If we were the only waiter woken, wake the next one */ if (entry && !dax_is_conflict(entry)) - dax_wake_entry(xas, entry, false); + dax_wake_entry(xas, entry, WAKE_NEXT); } /* @@ -286,7 +297,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry) old = xas_store(xas, entry); xas_unlock_irq(xas); BUG_ON(!dax_is_locked(old)); - dax_wake_entry(xas, entry, false); + dax_wake_entry(xas, entry, WAKE_NEXT); } /* @@ -524,7 +535,7 @@ retry: dax_disassociate_entry(entry, mapping, false); xas_store(xas, NULL); /* undo the PMD join */ - dax_wake_entry(xas, entry, true); + dax_wake_entry(xas, entry, WAKE_ALL); mapping->nrexceptional--; entry = NULL; xas_set(xas, index); @@ -937,7 +948,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, xas_lock_irq(xas); xas_store(xas, entry); xas_clear_mark(xas, PAGECACHE_TAG_DIRTY); - dax_wake_entry(xas, entry, false); + dax_wake_entry(xas, entry, WAKE_NEXT); trace_dax_writeback_one(mapping->host, index, count); return ret; -- cgit From 4c3d043d271d4d629aa2328796cdfc96b37d3b3c Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 28 Apr 2021 15:03:13 -0400 Subject: dax: Add a wakeup mode parameter to put_unlocked_entry() As of now put_unlocked_entry() always wakes up next waiter. In next patches we want to wake up all waiters at one callsite. Hence, add a parameter to the function. This patch does not introduce any change of behavior. Reviewed-by: Greg Kurz Reviewed-by: Jan Kara Suggested-by: Dan Williams Signed-off-by: Vivek Goyal Link: https://lore.kernel.org/r/20210428190314.1865312-3-vgoyal@redhat.com Signed-off-by: Dan Williams --- fs/dax.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 5ecee51c44ee..56eb1c759ca5 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -275,11 +275,11 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry) finish_wait(wq, &ewait.wait); } -static void put_unlocked_entry(struct xa_state *xas, void *entry) +static void put_unlocked_entry(struct xa_state *xas, void *entry, + enum dax_wake_mode mode) { - /* If we were the only waiter woken, wake the next one */ if (entry && !dax_is_conflict(entry)) - dax_wake_entry(xas, entry, WAKE_NEXT); + dax_wake_entry(xas, entry, mode); } /* @@ -633,7 +633,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, entry = get_unlocked_entry(&xas, 0); if (entry) page = dax_busy_page(entry); - put_unlocked_entry(&xas, entry); + put_unlocked_entry(&xas, entry, WAKE_NEXT); if (page) break; if (++scanned % XA_CHECK_SCHED) @@ -675,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space *mapping, mapping->nrexceptional--; ret = 1; out: - put_unlocked_entry(&xas, entry); + put_unlocked_entry(&xas, entry, WAKE_NEXT); xas_unlock_irq(&xas); return ret; } @@ -954,7 +954,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, return ret; put_unlocked: - put_unlocked_entry(xas, entry); + put_unlocked_entry(xas, entry, WAKE_NEXT); return ret; } @@ -1695,7 +1695,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) /* Did we race with someone splitting entry or so? */ if (!entry || dax_is_conflict(entry) || (order == 0 && !dax_is_pte_entry(entry))) { - put_unlocked_entry(&xas, entry); + put_unlocked_entry(&xas, entry, WAKE_NEXT); xas_unlock_irq(&xas); trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, VM_FAULT_NOPAGE); -- cgit From 237388320deffde7c2d65ed8fc9eef670dc979b3 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 28 Apr 2021 15:03:14 -0400 Subject: dax: Wake up all waiters after invalidating dax entry I am seeing missed wakeups which ultimately lead to a deadlock when I am using virtiofs with DAX enabled and running "make -j". I had to mount virtiofs as rootfs and also reduce to dax window size to 256M to reproduce the problem consistently. So here is the problem. put_unlocked_entry() wakes up waiters only if entry is not null as well as !dax_is_conflict(entry). But if I call multiple instances of invalidate_inode_pages2() in parallel, then I can run into a situation where there are waiters on this index but nobody will wake these waiters. invalidate_inode_pages2() invalidate_inode_pages2_range() invalidate_exceptional_entry2() dax_invalidate_mapping_entry_sync() __dax_invalidate_entry() { xas_lock_irq(&xas); entry = get_unlocked_entry(&xas, 0); ... ... dax_disassociate_entry(entry, mapping, trunc); xas_store(&xas, NULL); ... ... put_unlocked_entry(&xas, entry); xas_unlock_irq(&xas); } Say a fault in in progress and it has locked entry at offset say "0x1c". Now say three instances of invalidate_inode_pages2() are in progress (A, B, C) and they all try to invalidate entry at offset "0x1c". Given dax entry is locked, all tree instances A, B, C will wait in wait queue. When dax fault finishes, say A is woken up. It will store NULL entry at index "0x1c" and wake up B. When B comes along it will find "entry=0" at page offset 0x1c and it will call put_unlocked_entry(&xas, 0). And this means put_unlocked_entry() will not wake up next waiter, given the current code. And that means C continues to wait and is not woken up. This patch fixes the issue by waking up all waiters when a dax entry has been invalidated. This seems to fix the deadlock I am facing and I can make forward progress. Reported-by: Sergio Lopez Fixes: ac401cc78242 ("dax: New fault locking") Reviewed-by: Jan Kara Suggested-by: Dan Williams Signed-off-by: Vivek Goyal Link: https://lore.kernel.org/r/20210428190314.1865312-4-vgoyal@redhat.com Signed-off-by: Dan Williams --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 56eb1c759ca5..df5485b4bddf 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -675,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space *mapping, mapping->nrexceptional--; ret = 1; out: - put_unlocked_entry(&xas, entry, WAKE_NEXT); + put_unlocked_entry(&xas, entry, WAKE_ALL); xas_unlock_irq(&xas); return ret; } -- cgit From a298232ee6b9a1d5d732aa497ff8be0d45b5bd82 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 7 May 2021 21:06:38 +0100 Subject: io_uring: fix link timeout refs WARNING: CPU: 0 PID: 10242 at lib/refcount.c:28 refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28 RIP: 0010:refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28 Call Trace: __refcount_sub_and_test include/linux/refcount.h:283 [inline] __refcount_dec_and_test include/linux/refcount.h:315 [inline] refcount_dec_and_test include/linux/refcount.h:333 [inline] io_put_req fs/io_uring.c:2140 [inline] io_queue_linked_timeout fs/io_uring.c:6300 [inline] __io_queue_sqe+0xbef/0xec0 fs/io_uring.c:6354 io_submit_sqe fs/io_uring.c:6534 [inline] io_submit_sqes+0x2bbd/0x7c50 fs/io_uring.c:6660 __do_sys_io_uring_enter fs/io_uring.c:9240 [inline] __se_sys_io_uring_enter+0x256/0x1d60 fs/io_uring.c:9182 io_link_timeout_fn() should put only one reference of the linked timeout request, however in case of racing with the master request's completion first io_req_complete() puts one and then io_put_req_deferred() is called. Cc: stable@vger.kernel.org # 5.12+ Fixes: 9ae1f8dd372e0 ("io_uring: fix inconsistent lock state") Reported-by: syzbot+a2910119328ce8e7996f@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/ff51018ff29de5ffa76f09273ef48cb24c720368.1620417627.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f46acbbeed57..9ac5e278a91e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6363,10 +6363,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) if (prev) { io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME); io_put_req_deferred(prev, 1); + io_put_req_deferred(req, 1); } else { io_req_complete_post(req, -ETIME, 0); } - io_put_req_deferred(req, 1); return HRTIMER_NORESTART; } -- cgit From e759959fe3b8313c81d6200be44cb8a644d845ea Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Tue, 27 Apr 2021 06:16:34 -0500 Subject: x86/sev-es: Rename sev-es.{ch} to sev.{ch} SEV-SNP builds upon the SEV-ES functionality while adding new hardware protection. Version 2 of the GHCB specification adds new NAE events that are SEV-SNP specific. Rename the sev-es.{ch} to sev.{ch} so that all SEV* functionality can be consolidated in one place. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Acked-by: Joerg Roedel Link: https://lkml.kernel.org/r/20210427111636.1207-2-brijesh.singh@amd.com --- arch/x86/boot/compressed/Makefile | 6 +- arch/x86/boot/compressed/sev-es.c | 206 ------ arch/x86/boot/compressed/sev.c | 206 ++++++ arch/x86/include/asm/sev-es.h | 114 --- arch/x86/include/asm/sev.h | 114 +++ arch/x86/kernel/Makefile | 6 +- arch/x86/kernel/head64.c | 2 +- arch/x86/kernel/nmi.c | 2 +- arch/x86/kernel/sev-es-shared.c | 525 ------------- arch/x86/kernel/sev-es.c | 1461 ------------------------------------- arch/x86/kernel/sev-shared.c | 525 +++++++++++++ arch/x86/kernel/sev.c | 1461 +++++++++++++++++++++++++++++++++++++ arch/x86/mm/extable.c | 2 +- arch/x86/platform/efi/efi_64.c | 2 +- arch/x86/realmode/init.c | 2 +- 15 files changed, 2317 insertions(+), 2317 deletions(-) delete mode 100644 arch/x86/boot/compressed/sev-es.c create mode 100644 arch/x86/boot/compressed/sev.c delete mode 100644 arch/x86/include/asm/sev-es.h create mode 100644 arch/x86/include/asm/sev.h delete mode 100644 arch/x86/kernel/sev-es-shared.c delete mode 100644 arch/x86/kernel/sev-es.c create mode 100644 arch/x86/kernel/sev-shared.c create mode 100644 arch/x86/kernel/sev.c diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6e5522aebbbd..2a2975236c9e 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -48,10 +48,10 @@ KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no) KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h KBUILD_CFLAGS += $(CLANG_FLAGS) -# sev-es.c indirectly inludes inat-table.h which is generated during +# sev.c indirectly inludes inat-table.h which is generated during # compilation and stored in $(objtree). Add the directory to the includes so # that the compiler finds it even with out-of-tree builds (make O=/some/path). -CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/ +CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n @@ -93,7 +93,7 @@ ifdef CONFIG_X86_64 vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o vmlinux-objs-y += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o - vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev-es.o + vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c deleted file mode 100644 index 82041bd380e5..000000000000 --- a/arch/x86/boot/compressed/sev-es.c +++ /dev/null @@ -1,206 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * AMD Encrypted Register State Support - * - * Author: Joerg Roedel - */ - -/* - * misc.h needs to be first because it knows how to include the other kernel - * headers in the pre-decompression code in a way that does not break - * compilation. - */ -#include "misc.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "error.h" - -struct ghcb boot_ghcb_page __aligned(PAGE_SIZE); -struct ghcb *boot_ghcb; - -/* - * Copy a version of this function here - insn-eval.c can't be used in - * pre-decompression code. - */ -static bool insn_has_rep_prefix(struct insn *insn) -{ - insn_byte_t p; - int i; - - insn_get_prefixes(insn); - - for_each_insn_prefix(insn, i, p) { - if (p == 0xf2 || p == 0xf3) - return true; - } - - return false; -} - -/* - * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and - * doesn't use segments. - */ -static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) -{ - return 0UL; -} - -static inline u64 sev_es_rd_ghcb_msr(void) -{ - unsigned long low, high; - - asm volatile("rdmsr" : "=a" (low), "=d" (high) : - "c" (MSR_AMD64_SEV_ES_GHCB)); - - return ((high << 32) | low); -} - -static inline void sev_es_wr_ghcb_msr(u64 val) -{ - u32 low, high; - - low = val & 0xffffffffUL; - high = val >> 32; - - asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB), - "a"(low), "d" (high) : "memory"); -} - -static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) -{ - char buffer[MAX_INSN_SIZE]; - int ret; - - memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); - - ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); - if (ret < 0) - return ES_DECODE_FAILED; - - return ES_OK; -} - -static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, - void *dst, char *buf, size_t size) -{ - memcpy(dst, buf, size); - - return ES_OK; -} - -static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, - void *src, char *buf, size_t size) -{ - memcpy(buf, src, size); - - return ES_OK; -} - -#undef __init -#undef __pa -#define __init -#define __pa(x) ((unsigned long)(x)) - -#define __BOOT_COMPRESSED - -/* Basic instruction decoding support needed */ -#include "../../lib/inat.c" -#include "../../lib/insn.c" - -/* Include code for early handlers */ -#include "../../kernel/sev-es-shared.c" - -static bool early_setup_sev_es(void) -{ - if (!sev_es_negotiate_protocol()) - sev_es_terminate(GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED); - - if (set_page_decrypted((unsigned long)&boot_ghcb_page)) - return false; - - /* Page is now mapped decrypted, clear it */ - memset(&boot_ghcb_page, 0, sizeof(boot_ghcb_page)); - - boot_ghcb = &boot_ghcb_page; - - /* Initialize lookup tables for the instruction decoder */ - inat_init_tables(); - - return true; -} - -void sev_es_shutdown_ghcb(void) -{ - if (!boot_ghcb) - return; - - if (!sev_es_check_cpu_features()) - error("SEV-ES CPU Features missing."); - - /* - * GHCB Page must be flushed from the cache and mapped encrypted again. - * Otherwise the running kernel will see strange cache effects when - * trying to use that page. - */ - if (set_page_encrypted((unsigned long)&boot_ghcb_page)) - error("Can't map GHCB page encrypted"); - - /* - * GHCB page is mapped encrypted again and flushed from the cache. - * Mark it non-present now to catch bugs when #VC exceptions trigger - * after this point. - */ - if (set_page_non_present((unsigned long)&boot_ghcb_page)) - error("Can't unmap GHCB page"); -} - -bool sev_es_check_ghcb_fault(unsigned long address) -{ - /* Check whether the fault was on the GHCB page */ - return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page); -} - -void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) -{ - struct es_em_ctxt ctxt; - enum es_result result; - - if (!boot_ghcb && !early_setup_sev_es()) - sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); - - vc_ghcb_invalidate(boot_ghcb); - result = vc_init_em_ctxt(&ctxt, regs, exit_code); - if (result != ES_OK) - goto finish; - - switch (exit_code) { - case SVM_EXIT_RDTSC: - case SVM_EXIT_RDTSCP: - result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code); - break; - case SVM_EXIT_IOIO: - result = vc_handle_ioio(boot_ghcb, &ctxt); - break; - case SVM_EXIT_CPUID: - result = vc_handle_cpuid(boot_ghcb, &ctxt); - break; - default: - result = ES_UNSUPPORTED; - break; - } - -finish: - if (result == ES_OK) - vc_finish_insn(&ctxt); - else if (result != ES_RETRY) - sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); -} diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c new file mode 100644 index 000000000000..670e998fe930 --- /dev/null +++ b/arch/x86/boot/compressed/sev.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD Encrypted Register State Support + * + * Author: Joerg Roedel + */ + +/* + * misc.h needs to be first because it knows how to include the other kernel + * headers in the pre-decompression code in a way that does not break + * compilation. + */ +#include "misc.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "error.h" + +struct ghcb boot_ghcb_page __aligned(PAGE_SIZE); +struct ghcb *boot_ghcb; + +/* + * Copy a version of this function here - insn-eval.c can't be used in + * pre-decompression code. + */ +static bool insn_has_rep_prefix(struct insn *insn) +{ + insn_byte_t p; + int i; + + insn_get_prefixes(insn); + + for_each_insn_prefix(insn, i, p) { + if (p == 0xf2 || p == 0xf3) + return true; + } + + return false; +} + +/* + * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and + * doesn't use segments. + */ +static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) +{ + return 0UL; +} + +static inline u64 sev_es_rd_ghcb_msr(void) +{ + unsigned long low, high; + + asm volatile("rdmsr" : "=a" (low), "=d" (high) : + "c" (MSR_AMD64_SEV_ES_GHCB)); + + return ((high << 32) | low); +} + +static inline void sev_es_wr_ghcb_msr(u64 val) +{ + u32 low, high; + + low = val & 0xffffffffUL; + high = val >> 32; + + asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB), + "a"(low), "d" (high) : "memory"); +} + +static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) +{ + char buffer[MAX_INSN_SIZE]; + int ret; + + memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); + + ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); + if (ret < 0) + return ES_DECODE_FAILED; + + return ES_OK; +} + +static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, + void *dst, char *buf, size_t size) +{ + memcpy(dst, buf, size); + + return ES_OK; +} + +static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, + void *src, char *buf, size_t size) +{ + memcpy(buf, src, size); + + return ES_OK; +} + +#undef __init +#undef __pa +#define __init +#define __pa(x) ((unsigned long)(x)) + +#define __BOOT_COMPRESSED + +/* Basic instruction decoding support needed */ +#include "../../lib/inat.c" +#include "../../lib/insn.c" + +/* Include code for early handlers */ +#include "../../kernel/sev-shared.c" + +static bool early_setup_sev_es(void) +{ + if (!sev_es_negotiate_protocol()) + sev_es_terminate(GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED); + + if (set_page_decrypted((unsigned long)&boot_ghcb_page)) + return false; + + /* Page is now mapped decrypted, clear it */ + memset(&boot_ghcb_page, 0, sizeof(boot_ghcb_page)); + + boot_ghcb = &boot_ghcb_page; + + /* Initialize lookup tables for the instruction decoder */ + inat_init_tables(); + + return true; +} + +void sev_es_shutdown_ghcb(void) +{ + if (!boot_ghcb) + return; + + if (!sev_es_check_cpu_features()) + error("SEV-ES CPU Features missing."); + + /* + * GHCB Page must be flushed from the cache and mapped encrypted again. + * Otherwise the running kernel will see strange cache effects when + * trying to use that page. + */ + if (set_page_encrypted((unsigned long)&boot_ghcb_page)) + error("Can't map GHCB page encrypted"); + + /* + * GHCB page is mapped encrypted again and flushed from the cache. + * Mark it non-present now to catch bugs when #VC exceptions trigger + * after this point. + */ + if (set_page_non_present((unsigned long)&boot_ghcb_page)) + error("Can't unmap GHCB page"); +} + +bool sev_es_check_ghcb_fault(unsigned long address) +{ + /* Check whether the fault was on the GHCB page */ + return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page); +} + +void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) +{ + struct es_em_ctxt ctxt; + enum es_result result; + + if (!boot_ghcb && !early_setup_sev_es()) + sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); + + vc_ghcb_invalidate(boot_ghcb); + result = vc_init_em_ctxt(&ctxt, regs, exit_code); + if (result != ES_OK) + goto finish; + + switch (exit_code) { + case SVM_EXIT_RDTSC: + case SVM_EXIT_RDTSCP: + result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code); + break; + case SVM_EXIT_IOIO: + result = vc_handle_ioio(boot_ghcb, &ctxt); + break; + case SVM_EXIT_CPUID: + result = vc_handle_cpuid(boot_ghcb, &ctxt); + break; + default: + result = ES_UNSUPPORTED; + break; + } + +finish: + if (result == ES_OK) + vc_finish_insn(&ctxt); + else if (result != ES_RETRY) + sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); +} diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev-es.h deleted file mode 100644 index cf1d957c7091..000000000000 --- a/arch/x86/include/asm/sev-es.h +++ /dev/null @@ -1,114 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * AMD Encrypted Register State Support - * - * Author: Joerg Roedel - */ - -#ifndef __ASM_ENCRYPTED_STATE_H -#define __ASM_ENCRYPTED_STATE_H - -#include -#include - -#define GHCB_SEV_INFO 0x001UL -#define GHCB_SEV_INFO_REQ 0x002UL -#define GHCB_INFO(v) ((v) & 0xfffUL) -#define GHCB_PROTO_MAX(v) (((v) >> 48) & 0xffffUL) -#define GHCB_PROTO_MIN(v) (((v) >> 32) & 0xffffUL) -#define GHCB_PROTO_OUR 0x0001UL -#define GHCB_SEV_CPUID_REQ 0x004UL -#define GHCB_CPUID_REQ_EAX 0 -#define GHCB_CPUID_REQ_EBX 1 -#define GHCB_CPUID_REQ_ECX 2 -#define GHCB_CPUID_REQ_EDX 3 -#define GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \ - (((unsigned long)reg & 3) << 30) | \ - (((unsigned long)fn) << 32)) - -#define GHCB_PROTOCOL_MAX 0x0001UL -#define GHCB_DEFAULT_USAGE 0x0000UL - -#define GHCB_SEV_CPUID_RESP 0x005UL -#define GHCB_SEV_TERMINATE 0x100UL -#define GHCB_SEV_TERMINATE_REASON(reason_set, reason_val) \ - (((((u64)reason_set) & 0x7) << 12) | \ - ((((u64)reason_val) & 0xff) << 16)) -#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 -#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 - -#define GHCB_SEV_GHCB_RESP_CODE(v) ((v) & 0xfff) -#define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); } - -enum es_result { - ES_OK, /* All good */ - ES_UNSUPPORTED, /* Requested operation not supported */ - ES_VMM_ERROR, /* Unexpected state from the VMM */ - ES_DECODE_FAILED, /* Instruction decoding failed */ - ES_EXCEPTION, /* Instruction caused exception */ - ES_RETRY, /* Retry instruction emulation */ -}; - -struct es_fault_info { - unsigned long vector; - unsigned long error_code; - unsigned long cr2; -}; - -struct pt_regs; - -/* ES instruction emulation context */ -struct es_em_ctxt { - struct pt_regs *regs; - struct insn insn; - struct es_fault_info fi; -}; - -void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code); - -static inline u64 lower_bits(u64 val, unsigned int bits) -{ - u64 mask = (1ULL << bits) - 1; - - return (val & mask); -} - -struct real_mode_header; -enum stack_type; - -/* Early IDT entry points for #VC handler */ -extern void vc_no_ghcb(void); -extern void vc_boot_ghcb(void); -extern bool handle_vc_boot_ghcb(struct pt_regs *regs); - -#ifdef CONFIG_AMD_MEM_ENCRYPT -extern struct static_key_false sev_es_enable_key; -extern void __sev_es_ist_enter(struct pt_regs *regs); -extern void __sev_es_ist_exit(void); -static __always_inline void sev_es_ist_enter(struct pt_regs *regs) -{ - if (static_branch_unlikely(&sev_es_enable_key)) - __sev_es_ist_enter(regs); -} -static __always_inline void sev_es_ist_exit(void) -{ - if (static_branch_unlikely(&sev_es_enable_key)) - __sev_es_ist_exit(); -} -extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh); -extern void __sev_es_nmi_complete(void); -static __always_inline void sev_es_nmi_complete(void) -{ - if (static_branch_unlikely(&sev_es_enable_key)) - __sev_es_nmi_complete(); -} -extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); -#else -static inline void sev_es_ist_enter(struct pt_regs *regs) { } -static inline void sev_es_ist_exit(void) { } -static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } -static inline void sev_es_nmi_complete(void) { } -static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } -#endif - -#endif diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h new file mode 100644 index 000000000000..cf1d957c7091 --- /dev/null +++ b/arch/x86/include/asm/sev.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Encrypted Register State Support + * + * Author: Joerg Roedel + */ + +#ifndef __ASM_ENCRYPTED_STATE_H +#define __ASM_ENCRYPTED_STATE_H + +#include +#include + +#define GHCB_SEV_INFO 0x001UL +#define GHCB_SEV_INFO_REQ 0x002UL +#define GHCB_INFO(v) ((v) & 0xfffUL) +#define GHCB_PROTO_MAX(v) (((v) >> 48) & 0xffffUL) +#define GHCB_PROTO_MIN(v) (((v) >> 32) & 0xffffUL) +#define GHCB_PROTO_OUR 0x0001UL +#define GHCB_SEV_CPUID_REQ 0x004UL +#define GHCB_CPUID_REQ_EAX 0 +#define GHCB_CPUID_REQ_EBX 1 +#define GHCB_CPUID_REQ_ECX 2 +#define GHCB_CPUID_REQ_EDX 3 +#define GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \ + (((unsigned long)reg & 3) << 30) | \ + (((unsigned long)fn) << 32)) + +#define GHCB_PROTOCOL_MAX 0x0001UL +#define GHCB_DEFAULT_USAGE 0x0000UL + +#define GHCB_SEV_CPUID_RESP 0x005UL +#define GHCB_SEV_TERMINATE 0x100UL +#define GHCB_SEV_TERMINATE_REASON(reason_set, reason_val) \ + (((((u64)reason_set) & 0x7) << 12) | \ + ((((u64)reason_val) & 0xff) << 16)) +#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 +#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 + +#define GHCB_SEV_GHCB_RESP_CODE(v) ((v) & 0xfff) +#define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); } + +enum es_result { + ES_OK, /* All good */ + ES_UNSUPPORTED, /* Requested operation not supported */ + ES_VMM_ERROR, /* Unexpected state from the VMM */ + ES_DECODE_FAILED, /* Instruction decoding failed */ + ES_EXCEPTION, /* Instruction caused exception */ + ES_RETRY, /* Retry instruction emulation */ +}; + +struct es_fault_info { + unsigned long vector; + unsigned long error_code; + unsigned long cr2; +}; + +struct pt_regs; + +/* ES instruction emulation context */ +struct es_em_ctxt { + struct pt_regs *regs; + struct insn insn; + struct es_fault_info fi; +}; + +void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code); + +static inline u64 lower_bits(u64 val, unsigned int bits) +{ + u64 mask = (1ULL << bits) - 1; + + return (val & mask); +} + +struct real_mode_header; +enum stack_type; + +/* Early IDT entry points for #VC handler */ +extern void vc_no_ghcb(void); +extern void vc_boot_ghcb(void); +extern bool handle_vc_boot_ghcb(struct pt_regs *regs); + +#ifdef CONFIG_AMD_MEM_ENCRYPT +extern struct static_key_false sev_es_enable_key; +extern void __sev_es_ist_enter(struct pt_regs *regs); +extern void __sev_es_ist_exit(void); +static __always_inline void sev_es_ist_enter(struct pt_regs *regs) +{ + if (static_branch_unlikely(&sev_es_enable_key)) + __sev_es_ist_enter(regs); +} +static __always_inline void sev_es_ist_exit(void) +{ + if (static_branch_unlikely(&sev_es_enable_key)) + __sev_es_ist_exit(); +} +extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh); +extern void __sev_es_nmi_complete(void); +static __always_inline void sev_es_nmi_complete(void) +{ + if (static_branch_unlikely(&sev_es_enable_key)) + __sev_es_nmi_complete(); +} +extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); +#else +static inline void sev_es_ist_enter(struct pt_regs *regs) { } +static inline void sev_es_ist_exit(void) { } +static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } +static inline void sev_es_nmi_complete(void) { } +static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } +#endif + +#endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0704c2a94272..0f66682ac02a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -20,7 +20,7 @@ CFLAGS_REMOVE_kvmclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg -CFLAGS_REMOVE_sev-es.o = -pg +CFLAGS_REMOVE_sev.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n @@ -28,7 +28,7 @@ KASAN_SANITIZE_dumpstack.o := n KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n -KASAN_SANITIZE_sev-es.o := n +KASAN_SANITIZE_sev.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. @@ -148,7 +148,7 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o -obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev-es.o +obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 18be44163a50..de01903c3735 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include /* * Manage page tables very early on. diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 2ef961cf4cfc..4bce802d25fb 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #define CREATE_TRACE_POINTS #include diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c deleted file mode 100644 index 0aa9f13efd57..000000000000 --- a/arch/x86/kernel/sev-es-shared.c +++ /dev/null @@ -1,525 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * AMD Encrypted Register State Support - * - * Author: Joerg Roedel - * - * This file is not compiled stand-alone. It contains code shared - * between the pre-decompression boot code and the running Linux kernel - * and is included directly into both code-bases. - */ - -#ifndef __BOOT_COMPRESSED -#define error(v) pr_err(v) -#define has_cpuflag(f) boot_cpu_has(f) -#endif - -static bool __init sev_es_check_cpu_features(void) -{ - if (!has_cpuflag(X86_FEATURE_RDRAND)) { - error("RDRAND instruction not supported - no trusted source of randomness available\n"); - return false; - } - - return true; -} - -static void __noreturn sev_es_terminate(unsigned int reason) -{ - u64 val = GHCB_SEV_TERMINATE; - - /* - * Tell the hypervisor what went wrong - only reason-set 0 is - * currently supported. - */ - val |= GHCB_SEV_TERMINATE_REASON(0, reason); - - /* Request Guest Termination from Hypvervisor */ - sev_es_wr_ghcb_msr(val); - VMGEXIT(); - - while (true) - asm volatile("hlt\n" : : : "memory"); -} - -static bool sev_es_negotiate_protocol(void) -{ - u64 val; - - /* Do the GHCB protocol version negotiation */ - sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ); - VMGEXIT(); - val = sev_es_rd_ghcb_msr(); - - if (GHCB_INFO(val) != GHCB_SEV_INFO) - return false; - - if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR || - GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR) - return false; - - return true; -} - -static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) -{ - memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); -} - -static bool vc_decoding_needed(unsigned long exit_code) -{ - /* Exceptions don't require to decode the instruction */ - return !(exit_code >= SVM_EXIT_EXCP_BASE && - exit_code <= SVM_EXIT_LAST_EXCP); -} - -static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt, - struct pt_regs *regs, - unsigned long exit_code) -{ - enum es_result ret = ES_OK; - - memset(ctxt, 0, sizeof(*ctxt)); - ctxt->regs = regs; - - if (vc_decoding_needed(exit_code)) - ret = vc_decode_insn(ctxt); - - return ret; -} - -static void vc_finish_insn(struct es_em_ctxt *ctxt) -{ - ctxt->regs->ip += ctxt->insn.length; -} - -static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, - struct es_em_ctxt *ctxt, - u64 exit_code, u64 exit_info_1, - u64 exit_info_2) -{ - enum es_result ret; - - /* Fill in protocol and format specifiers */ - ghcb->protocol_version = GHCB_PROTOCOL_MAX; - ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; - - ghcb_set_sw_exit_code(ghcb, exit_code); - ghcb_set_sw_exit_info_1(ghcb, exit_info_1); - ghcb_set_sw_exit_info_2(ghcb, exit_info_2); - - sev_es_wr_ghcb_msr(__pa(ghcb)); - VMGEXIT(); - - if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1) { - u64 info = ghcb->save.sw_exit_info_2; - unsigned long v; - - info = ghcb->save.sw_exit_info_2; - v = info & SVM_EVTINJ_VEC_MASK; - - /* Check if exception information from hypervisor is sane. */ - if ((info & SVM_EVTINJ_VALID) && - ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) && - ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) { - ctxt->fi.vector = v; - if (info & SVM_EVTINJ_VALID_ERR) - ctxt->fi.error_code = info >> 32; - ret = ES_EXCEPTION; - } else { - ret = ES_VMM_ERROR; - } - } else { - ret = ES_OK; - } - - return ret; -} - -/* - * Boot VC Handler - This is the first VC handler during boot, there is no GHCB - * page yet, so it only supports the MSR based communication with the - * hypervisor and only the CPUID exit-code. - */ -void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) -{ - unsigned int fn = lower_bits(regs->ax, 32); - unsigned long val; - - /* Only CPUID is supported via MSR protocol */ - if (exit_code != SVM_EXIT_CPUID) - goto fail; - - sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX)); - VMGEXIT(); - val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) - goto fail; - regs->ax = val >> 32; - - sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX)); - VMGEXIT(); - val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) - goto fail; - regs->bx = val >> 32; - - sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX)); - VMGEXIT(); - val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) - goto fail; - regs->cx = val >> 32; - - sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX)); - VMGEXIT(); - val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) - goto fail; - regs->dx = val >> 32; - - /* - * This is a VC handler and the #VC is only raised when SEV-ES is - * active, which means SEV must be active too. Do sanity checks on the - * CPUID results to make sure the hypervisor does not trick the kernel - * into the no-sev path. This could map sensitive data unencrypted and - * make it accessible to the hypervisor. - * - * In particular, check for: - * - Availability of CPUID leaf 0x8000001f - * - SEV CPUID bit. - * - * The hypervisor might still report the wrong C-bit position, but this - * can't be checked here. - */ - - if (fn == 0x80000000 && (regs->ax < 0x8000001f)) - /* SEV leaf check */ - goto fail; - else if ((fn == 0x8000001f && !(regs->ax & BIT(1)))) - /* SEV bit */ - goto fail; - - /* Skip over the CPUID two-byte opcode */ - regs->ip += 2; - - return; - -fail: - /* Terminate the guest */ - sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); -} - -static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, - void *src, char *buf, - unsigned int data_size, - unsigned int count, - bool backwards) -{ - int i, b = backwards ? -1 : 1; - enum es_result ret = ES_OK; - - for (i = 0; i < count; i++) { - void *s = src + (i * data_size * b); - char *d = buf + (i * data_size); - - ret = vc_read_mem(ctxt, s, d, data_size); - if (ret != ES_OK) - break; - } - - return ret; -} - -static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, - void *dst, char *buf, - unsigned int data_size, - unsigned int count, - bool backwards) -{ - int i, s = backwards ? -1 : 1; - enum es_result ret = ES_OK; - - for (i = 0; i < count; i++) { - void *d = dst + (i * data_size * s); - char *b = buf + (i * data_size); - - ret = vc_write_mem(ctxt, d, b, data_size); - if (ret != ES_OK) - break; - } - - return ret; -} - -#define IOIO_TYPE_STR BIT(2) -#define IOIO_TYPE_IN 1 -#define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR) -#define IOIO_TYPE_OUT 0 -#define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR) - -#define IOIO_REP BIT(3) - -#define IOIO_ADDR_64 BIT(9) -#define IOIO_ADDR_32 BIT(8) -#define IOIO_ADDR_16 BIT(7) - -#define IOIO_DATA_32 BIT(6) -#define IOIO_DATA_16 BIT(5) -#define IOIO_DATA_8 BIT(4) - -#define IOIO_SEG_ES (0 << 10) -#define IOIO_SEG_DS (3 << 10) - -static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) -{ - struct insn *insn = &ctxt->insn; - *exitinfo = 0; - - switch (insn->opcode.bytes[0]) { - /* INS opcodes */ - case 0x6c: - case 0x6d: - *exitinfo |= IOIO_TYPE_INS; - *exitinfo |= IOIO_SEG_ES; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; - break; - - /* OUTS opcodes */ - case 0x6e: - case 0x6f: - *exitinfo |= IOIO_TYPE_OUTS; - *exitinfo |= IOIO_SEG_DS; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; - break; - - /* IN immediate opcodes */ - case 0xe4: - case 0xe5: - *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (u8)insn->immediate.value << 16; - break; - - /* OUT immediate opcodes */ - case 0xe6: - case 0xe7: - *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (u8)insn->immediate.value << 16; - break; - - /* IN register opcodes */ - case 0xec: - case 0xed: - *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; - break; - - /* OUT register opcodes */ - case 0xee: - case 0xef: - *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; - break; - - default: - return ES_DECODE_FAILED; - } - - switch (insn->opcode.bytes[0]) { - case 0x6c: - case 0x6e: - case 0xe4: - case 0xe6: - case 0xec: - case 0xee: - /* Single byte opcodes */ - *exitinfo |= IOIO_DATA_8; - break; - default: - /* Length determined by instruction parsing */ - *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 - : IOIO_DATA_32; - } - switch (insn->addr_bytes) { - case 2: - *exitinfo |= IOIO_ADDR_16; - break; - case 4: - *exitinfo |= IOIO_ADDR_32; - break; - case 8: - *exitinfo |= IOIO_ADDR_64; - break; - } - - if (insn_has_rep_prefix(insn)) - *exitinfo |= IOIO_REP; - - return ES_OK; -} - -static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) -{ - struct pt_regs *regs = ctxt->regs; - u64 exit_info_1, exit_info_2; - enum es_result ret; - - ret = vc_ioio_exitinfo(ctxt, &exit_info_1); - if (ret != ES_OK) - return ret; - - if (exit_info_1 & IOIO_TYPE_STR) { - - /* (REP) INS/OUTS */ - - bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF); - unsigned int io_bytes, exit_bytes; - unsigned int ghcb_count, op_count; - unsigned long es_base; - u64 sw_scratch; - - /* - * For the string variants with rep prefix the amount of in/out - * operations per #VC exception is limited so that the kernel - * has a chance to take interrupts and re-schedule while the - * instruction is emulated. - */ - io_bytes = (exit_info_1 >> 4) & 0x7; - ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes; - - op_count = (exit_info_1 & IOIO_REP) ? regs->cx : 1; - exit_info_2 = min(op_count, ghcb_count); - exit_bytes = exit_info_2 * io_bytes; - - es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); - - /* Read bytes of OUTS into the shared buffer */ - if (!(exit_info_1 & IOIO_TYPE_IN)) { - ret = vc_insn_string_read(ctxt, - (void *)(es_base + regs->si), - ghcb->shared_buffer, io_bytes, - exit_info_2, df); - if (ret) - return ret; - } - - /* - * Issue an VMGEXIT to the HV to consume the bytes from the - * shared buffer or to have it write them into the shared buffer - * depending on the instruction: OUTS or INS. - */ - sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer); - ghcb_set_sw_scratch(ghcb, sw_scratch); - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, - exit_info_1, exit_info_2); - if (ret != ES_OK) - return ret; - - /* Read bytes from shared buffer into the guest's destination. */ - if (exit_info_1 & IOIO_TYPE_IN) { - ret = vc_insn_string_write(ctxt, - (void *)(es_base + regs->di), - ghcb->shared_buffer, io_bytes, - exit_info_2, df); - if (ret) - return ret; - - if (df) - regs->di -= exit_bytes; - else - regs->di += exit_bytes; - } else { - if (df) - regs->si -= exit_bytes; - else - regs->si += exit_bytes; - } - - if (exit_info_1 & IOIO_REP) - regs->cx -= exit_info_2; - - ret = regs->cx ? ES_RETRY : ES_OK; - - } else { - - /* IN/OUT into/from rAX */ - - int bits = (exit_info_1 & 0x70) >> 1; - u64 rax = 0; - - if (!(exit_info_1 & IOIO_TYPE_IN)) - rax = lower_bits(regs->ax, bits); - - ghcb_set_rax(ghcb, rax); - - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0); - if (ret != ES_OK) - return ret; - - if (exit_info_1 & IOIO_TYPE_IN) { - if (!ghcb_rax_is_valid(ghcb)) - return ES_VMM_ERROR; - regs->ax = lower_bits(ghcb->save.rax, bits); - } - } - - return ret; -} - -static enum es_result vc_handle_cpuid(struct ghcb *ghcb, - struct es_em_ctxt *ctxt) -{ - struct pt_regs *regs = ctxt->regs; - u32 cr4 = native_read_cr4(); - enum es_result ret; - - ghcb_set_rax(ghcb, regs->ax); - ghcb_set_rcx(ghcb, regs->cx); - - if (cr4 & X86_CR4_OSXSAVE) - /* Safe to read xcr0 */ - ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); - else - /* xgetbv will cause #GP - use reset value for xcr0 */ - ghcb_set_xcr0(ghcb, 1); - - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); - if (ret != ES_OK) - return ret; - - if (!(ghcb_rax_is_valid(ghcb) && - ghcb_rbx_is_valid(ghcb) && - ghcb_rcx_is_valid(ghcb) && - ghcb_rdx_is_valid(ghcb))) - return ES_VMM_ERROR; - - regs->ax = ghcb->save.rax; - regs->bx = ghcb->save.rbx; - regs->cx = ghcb->save.rcx; - regs->dx = ghcb->save.rdx; - - return ES_OK; -} - -static enum es_result vc_handle_rdtsc(struct ghcb *ghcb, - struct es_em_ctxt *ctxt, - unsigned long exit_code) -{ - bool rdtscp = (exit_code == SVM_EXIT_RDTSCP); - enum es_result ret; - - ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0); - if (ret != ES_OK) - return ret; - - if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) && - (!rdtscp || ghcb_rcx_is_valid(ghcb)))) - return ES_VMM_ERROR; - - ctxt->regs->ax = ghcb->save.rax; - ctxt->regs->dx = ghcb->save.rdx; - if (rdtscp) - ctxt->regs->cx = ghcb->save.rcx; - - return ES_OK; -} diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c deleted file mode 100644 index 73873b007838..000000000000 --- a/arch/x86/kernel/sev-es.c +++ /dev/null @@ -1,1461 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * AMD Memory Encryption Support - * - * Copyright (C) 2019 SUSE - * - * Author: Joerg Roedel - */ - -#define pr_fmt(fmt) "SEV-ES: " fmt - -#include /* For show_regs() */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DR7_RESET_VALUE 0x400 - -/* For early boot hypervisor communication in SEV-ES enabled guests */ -static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); - -/* - * Needs to be in the .data section because we need it NULL before bss is - * cleared - */ -static struct ghcb __initdata *boot_ghcb; - -/* #VC handler runtime per-CPU data */ -struct sev_es_runtime_data { - struct ghcb ghcb_page; - - /* Physical storage for the per-CPU IST stack of the #VC handler */ - char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); - - /* - * Physical storage for the per-CPU fall-back stack of the #VC handler. - * The fall-back stack is used when it is not safe to switch back to the - * interrupted stack in the #VC entry code. - */ - char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); - - /* - * Reserve one page per CPU as backup storage for the unencrypted GHCB. - * It is needed when an NMI happens while the #VC handler uses the real - * GHCB, and the NMI handler itself is causing another #VC exception. In - * that case the GHCB content of the first handler needs to be backed up - * and restored. - */ - struct ghcb backup_ghcb; - - /* - * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. - * There is no need for it to be atomic, because nothing is written to - * the GHCB between the read and the write of ghcb_active. So it is safe - * to use it when a nested #VC exception happens before the write. - * - * This is necessary for example in the #VC->NMI->#VC case when the NMI - * happens while the first #VC handler uses the GHCB. When the NMI code - * raises a second #VC handler it might overwrite the contents of the - * GHCB written by the first handler. To avoid this the content of the - * GHCB is saved and restored when the GHCB is detected to be in use - * already. - */ - bool ghcb_active; - bool backup_ghcb_active; - - /* - * Cached DR7 value - write it on DR7 writes and return it on reads. - * That value will never make it to the real hardware DR7 as debugging - * is currently unsupported in SEV-ES guests. - */ - unsigned long dr7; -}; - -struct ghcb_state { - struct ghcb *ghcb; -}; - -static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); -DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); - -/* Needed in vc_early_forward_exception */ -void do_early_exception(struct pt_regs *regs, int trapnr); - -static void __init setup_vc_stacks(int cpu) -{ - struct sev_es_runtime_data *data; - struct cpu_entry_area *cea; - unsigned long vaddr; - phys_addr_t pa; - - data = per_cpu(runtime_data, cpu); - cea = get_cpu_entry_area(cpu); - - /* Map #VC IST stack */ - vaddr = CEA_ESTACK_BOT(&cea->estacks, VC); - pa = __pa(data->ist_stack); - cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); - - /* Map VC fall-back stack */ - vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2); - pa = __pa(data->fallback_stack); - cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); -} - -static __always_inline bool on_vc_stack(struct pt_regs *regs) -{ - unsigned long sp = regs->sp; - - /* User-mode RSP is not trusted */ - if (user_mode(regs)) - return false; - - /* SYSCALL gap still has user-mode RSP */ - if (ip_within_syscall_gap(regs)) - return false; - - return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); -} - -/* - * This function handles the case when an NMI is raised in the #VC - * exception handler entry code, before the #VC handler has switched off - * its IST stack. In this case, the IST entry for #VC must be adjusted, - * so that any nested #VC exception will not overwrite the stack - * contents of the interrupted #VC handler. - * - * The IST entry is adjusted unconditionally so that it can be also be - * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a - * nested sev_es_ist_exit() call may adjust back the IST entry too - * early. - * - * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run - * on the NMI IST stack, as they are only called from NMI handling code - * right now. - */ -void noinstr __sev_es_ist_enter(struct pt_regs *regs) -{ - unsigned long old_ist, new_ist; - - /* Read old IST entry */ - new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); - - /* - * If NMI happened while on the #VC IST stack, set the new IST - * value below regs->sp, so that the interrupted stack frame is - * not overwritten by subsequent #VC exceptions. - */ - if (on_vc_stack(regs)) - new_ist = regs->sp; - - /* - * Reserve additional 8 bytes and store old IST value so this - * adjustment can be unrolled in __sev_es_ist_exit(). - */ - new_ist -= sizeof(old_ist); - *(unsigned long *)new_ist = old_ist; - - /* Set new IST entry */ - this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); -} - -void noinstr __sev_es_ist_exit(void) -{ - unsigned long ist; - - /* Read IST entry */ - ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); - - if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) - return; - - /* Read back old IST entry and write it to the TSS */ - this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); -} - -static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) -{ - struct sev_es_runtime_data *data; - struct ghcb *ghcb; - - data = this_cpu_read(runtime_data); - ghcb = &data->ghcb_page; - - if (unlikely(data->ghcb_active)) { - /* GHCB is already in use - save its contents */ - - if (unlikely(data->backup_ghcb_active)) - return NULL; - - /* Mark backup_ghcb active before writing to it */ - data->backup_ghcb_active = true; - - state->ghcb = &data->backup_ghcb; - - /* Backup GHCB content */ - *state->ghcb = *ghcb; - } else { - state->ghcb = NULL; - data->ghcb_active = true; - } - - return ghcb; -} - -static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) -{ - struct sev_es_runtime_data *data; - struct ghcb *ghcb; - - data = this_cpu_read(runtime_data); - ghcb = &data->ghcb_page; - - if (state->ghcb) { - /* Restore GHCB from Backup */ - *ghcb = *state->ghcb; - data->backup_ghcb_active = false; - state->ghcb = NULL; - } else { - data->ghcb_active = false; - } -} - -/* Needed in vc_early_forward_exception */ -void do_early_exception(struct pt_regs *regs, int trapnr); - -static inline u64 sev_es_rd_ghcb_msr(void) -{ - return __rdmsr(MSR_AMD64_SEV_ES_GHCB); -} - -static __always_inline void sev_es_wr_ghcb_msr(u64 val) -{ - u32 low, high; - - low = (u32)(val); - high = (u32)(val >> 32); - - native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); -} - -static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, - unsigned char *buffer) -{ - return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); -} - -static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) -{ - char buffer[MAX_INSN_SIZE]; - int res; - - res = insn_fetch_from_user_inatomic(ctxt->regs, buffer); - if (!res) { - ctxt->fi.vector = X86_TRAP_PF; - ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; - ctxt->fi.cr2 = ctxt->regs->ip; - return ES_EXCEPTION; - } - - if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res)) - return ES_DECODE_FAILED; - - if (ctxt->insn.immediate.got) - return ES_OK; - else - return ES_DECODE_FAILED; -} - -static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) -{ - char buffer[MAX_INSN_SIZE]; - int res, ret; - - res = vc_fetch_insn_kernel(ctxt, buffer); - if (res) { - ctxt->fi.vector = X86_TRAP_PF; - ctxt->fi.error_code = X86_PF_INSTR; - ctxt->fi.cr2 = ctxt->regs->ip; - return ES_EXCEPTION; - } - - ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); - if (ret < 0) - return ES_DECODE_FAILED; - else - return ES_OK; -} - -static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) -{ - if (user_mode(ctxt->regs)) - return __vc_decode_user_insn(ctxt); - else - return __vc_decode_kern_insn(ctxt); -} - -static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, - char *dst, char *buf, size_t size) -{ - unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; - char __user *target = (char __user *)dst; - u64 d8; - u32 d4; - u16 d2; - u8 d1; - - /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ - if (!user_mode(ctxt->regs) && !access_ok(target, size)) { - memcpy(dst, buf, size); - return ES_OK; - } - - switch (size) { - case 1: - memcpy(&d1, buf, 1); - if (put_user(d1, target)) - goto fault; - break; - case 2: - memcpy(&d2, buf, 2); - if (put_user(d2, target)) - goto fault; - break; - case 4: - memcpy(&d4, buf, 4); - if (put_user(d4, target)) - goto fault; - break; - case 8: - memcpy(&d8, buf, 8); - if (put_user(d8, target)) - goto fault; - break; - default: - WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); - return ES_UNSUPPORTED; - } - - return ES_OK; - -fault: - if (user_mode(ctxt->regs)) - error_code |= X86_PF_USER; - - ctxt->fi.vector = X86_TRAP_PF; - ctxt->fi.error_code = error_code; - ctxt->fi.cr2 = (unsigned long)dst; - - return ES_EXCEPTION; -} - -static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, - char *src, char *buf, size_t size) -{ - unsigned long error_code = X86_PF_PROT; - char __user *s = (char __user *)src; - u64 d8; - u32 d4; - u16 d2; - u8 d1; - - /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ - if (!user_mode(ctxt->regs) && !access_ok(s, size)) { - memcpy(buf, src, size); - return ES_OK; - } - - switch (size) { - case 1: - if (get_user(d1, s)) - goto fault; - memcpy(buf, &d1, 1); - break; - case 2: - if (get_user(d2, s)) - goto fault; - memcpy(buf, &d2, 2); - break; - case 4: - if (get_user(d4, s)) - goto fault; - memcpy(buf, &d4, 4); - break; - case 8: - if (get_user(d8, s)) - goto fault; - memcpy(buf, &d8, 8); - break; - default: - WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); - return ES_UNSUPPORTED; - } - - return ES_OK; - -fault: - if (user_mode(ctxt->regs)) - error_code |= X86_PF_USER; - - ctxt->fi.vector = X86_TRAP_PF; - ctxt->fi.error_code = error_code; - ctxt->fi.cr2 = (unsigned long)src; - - return ES_EXCEPTION; -} - -static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, - unsigned long vaddr, phys_addr_t *paddr) -{ - unsigned long va = (unsigned long)vaddr; - unsigned int level; - phys_addr_t pa; - pgd_t *pgd; - pte_t *pte; - - pgd = __va(read_cr3_pa()); - pgd = &pgd[pgd_index(va)]; - pte = lookup_address_in_pgd(pgd, va, &level); - if (!pte) { - ctxt->fi.vector = X86_TRAP_PF; - ctxt->fi.cr2 = vaddr; - ctxt->fi.error_code = 0; - - if (user_mode(ctxt->regs)) - ctxt->fi.error_code |= X86_PF_USER; - - return ES_EXCEPTION; - } - - if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) - /* Emulated MMIO to/from encrypted memory not supported */ - return ES_UNSUPPORTED; - - pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; - pa |= va & ~page_level_mask(level); - - *paddr = pa; - - return ES_OK; -} - -/* Include code shared with pre-decompression boot stage */ -#include "sev-es-shared.c" - -void noinstr __sev_es_nmi_complete(void) -{ - struct ghcb_state state; - struct ghcb *ghcb; - - ghcb = sev_es_get_ghcb(&state); - - vc_ghcb_invalidate(ghcb); - ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); - ghcb_set_sw_exit_info_1(ghcb, 0); - ghcb_set_sw_exit_info_2(ghcb, 0); - - sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); - VMGEXIT(); - - sev_es_put_ghcb(&state); -} - -static u64 get_jump_table_addr(void) -{ - struct ghcb_state state; - unsigned long flags; - struct ghcb *ghcb; - u64 ret = 0; - - local_irq_save(flags); - - ghcb = sev_es_get_ghcb(&state); - - vc_ghcb_invalidate(ghcb); - ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); - ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); - ghcb_set_sw_exit_info_2(ghcb, 0); - - sev_es_wr_ghcb_msr(__pa(ghcb)); - VMGEXIT(); - - if (ghcb_sw_exit_info_1_is_valid(ghcb) && - ghcb_sw_exit_info_2_is_valid(ghcb)) - ret = ghcb->save.sw_exit_info_2; - - sev_es_put_ghcb(&state); - - local_irq_restore(flags); - - return ret; -} - -int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) -{ - u16 startup_cs, startup_ip; - phys_addr_t jump_table_pa; - u64 jump_table_addr; - u16 __iomem *jump_table; - - jump_table_addr = get_jump_table_addr(); - - /* On UP guests there is no jump table so this is not a failure */ - if (!jump_table_addr) - return 0; - - /* Check if AP Jump Table is page-aligned */ - if (jump_table_addr & ~PAGE_MASK) - return -EINVAL; - - jump_table_pa = jump_table_addr & PAGE_MASK; - - startup_cs = (u16)(rmh->trampoline_start >> 4); - startup_ip = (u16)(rmh->sev_es_trampoline_start - - rmh->trampoline_start); - - jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); - if (!jump_table) - return -EIO; - - writew(startup_ip, &jump_table[0]); - writew(startup_cs, &jump_table[1]); - - iounmap(jump_table); - - return 0; -} - -/* - * This is needed by the OVMF UEFI firmware which will use whatever it finds in - * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu - * runtime GHCBs used by the kernel are also mapped in the EFI page-table. - */ -int __init sev_es_efi_map_ghcbs(pgd_t *pgd) -{ - struct sev_es_runtime_data *data; - unsigned long address, pflags; - int cpu; - u64 pfn; - - if (!sev_es_active()) - return 0; - - pflags = _PAGE_NX | _PAGE_RW; - - for_each_possible_cpu(cpu) { - data = per_cpu(runtime_data, cpu); - - address = __pa(&data->ghcb_page); - pfn = address >> PAGE_SHIFT; - - if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) - return 1; - } - - return 0; -} - -static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) -{ - struct pt_regs *regs = ctxt->regs; - enum es_result ret; - u64 exit_info_1; - - /* Is it a WRMSR? */ - exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; - - ghcb_set_rcx(ghcb, regs->cx); - if (exit_info_1) { - ghcb_set_rax(ghcb, regs->ax); - ghcb_set_rdx(ghcb, regs->dx); - } - - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0); - - if ((ret == ES_OK) && (!exit_info_1)) { - regs->ax = ghcb->save.rax; - regs->dx = ghcb->save.rdx; - } - - return ret; -} - -/* - * This function runs on the first #VC exception after the kernel - * switched to virtual addresses. - */ -static bool __init sev_es_setup_ghcb(void) -{ - /* First make sure the hypervisor talks a supported protocol. */ - if (!sev_es_negotiate_protocol()) - return false; - - /* - * Clear the boot_ghcb. The first exception comes in before the bss - * section is cleared. - */ - memset(&boot_ghcb_page, 0, PAGE_SIZE); - - /* Alright - Make the boot-ghcb public */ - boot_ghcb = &boot_ghcb_page; - - return true; -} - -#ifdef CONFIG_HOTPLUG_CPU -static void sev_es_ap_hlt_loop(void) -{ - struct ghcb_state state; - struct ghcb *ghcb; - - ghcb = sev_es_get_ghcb(&state); - - while (true) { - vc_ghcb_invalidate(ghcb); - ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); - ghcb_set_sw_exit_info_1(ghcb, 0); - ghcb_set_sw_exit_info_2(ghcb, 0); - - sev_es_wr_ghcb_msr(__pa(ghcb)); - VMGEXIT(); - - /* Wakeup signal? */ - if (ghcb_sw_exit_info_2_is_valid(ghcb) && - ghcb->save.sw_exit_info_2) - break; - } - - sev_es_put_ghcb(&state); -} - -/* - * Play_dead handler when running under SEV-ES. This is needed because - * the hypervisor can't deliver an SIPI request to restart the AP. - * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the - * hypervisor wakes it up again. - */ -static void sev_es_play_dead(void) -{ - play_dead_common(); - - /* IRQs now disabled */ - - sev_es_ap_hlt_loop(); - - /* - * If we get here, the VCPU was woken up again. Jump to CPU - * startup code to get it back online. - */ - start_cpu0(); -} -#else /* CONFIG_HOTPLUG_CPU */ -#define sev_es_play_dead native_play_dead -#endif /* CONFIG_HOTPLUG_CPU */ - -#ifdef CONFIG_SMP -static void __init sev_es_setup_play_dead(void) -{ - smp_ops.play_dead = sev_es_play_dead; -} -#else -static inline void sev_es_setup_play_dead(void) { } -#endif - -static void __init alloc_runtime_data(int cpu) -{ - struct sev_es_runtime_data *data; - - data = memblock_alloc(sizeof(*data), PAGE_SIZE); - if (!data) - panic("Can't allocate SEV-ES runtime data"); - - per_cpu(runtime_data, cpu) = data; -} - -static void __init init_ghcb(int cpu) -{ - struct sev_es_runtime_data *data; - int err; - - data = per_cpu(runtime_data, cpu); - - err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, - sizeof(data->ghcb_page)); - if (err) - panic("Can't map GHCBs unencrypted"); - - memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); - - data->ghcb_active = false; - data->backup_ghcb_active = false; -} - -void __init sev_es_init_vc_handling(void) -{ - int cpu; - - BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); - - if (!sev_es_active()) - return; - - if (!sev_es_check_cpu_features()) - panic("SEV-ES CPU Features missing"); - - /* Enable SEV-ES special handling */ - static_branch_enable(&sev_es_enable_key); - - /* Initialize per-cpu GHCB pages */ - for_each_possible_cpu(cpu) { - alloc_runtime_data(cpu); - init_ghcb(cpu); - setup_vc_stacks(cpu); - } - - sev_es_setup_play_dead(); - - /* Secondary CPUs use the runtime #VC handler */ - initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication; -} - -static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) -{ - int trapnr = ctxt->fi.vector; - - if (trapnr == X86_TRAP_PF) - native_write_cr2(ctxt->fi.cr2); - - ctxt->regs->orig_ax = ctxt->fi.error_code; - do_early_exception(ctxt->regs, trapnr); -} - -static long *vc_insn_get_reg(struct es_em_ctxt *ctxt) -{ - long *reg_array; - int offset; - - reg_array = (long *)ctxt->regs; - offset = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs); - - if (offset < 0) - return NULL; - - offset /= sizeof(long); - - return reg_array + offset; -} - -static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) -{ - long *reg_array; - int offset; - - reg_array = (long *)ctxt->regs; - offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); - - if (offset < 0) - return NULL; - - offset /= sizeof(long); - - return reg_array + offset; -} -static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, - unsigned int bytes, bool read) -{ - u64 exit_code, exit_info_1, exit_info_2; - unsigned long ghcb_pa = __pa(ghcb); - enum es_result res; - phys_addr_t paddr; - void __user *ref; - - ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); - if (ref == (void __user *)-1L) - return ES_UNSUPPORTED; - - exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; - - res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); - if (res != ES_OK) { - if (res == ES_EXCEPTION && !read) - ctxt->fi.error_code |= X86_PF_WRITE; - - return res; - } - - exit_info_1 = paddr; - /* Can never be greater than 8 */ - exit_info_2 = bytes; - - ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); - - return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); -} - -static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb, - struct es_em_ctxt *ctxt) -{ - struct insn *insn = &ctxt->insn; - unsigned int bytes = 0; - enum es_result ret; - int sign_byte; - long *reg_data; - - switch (insn->opcode.bytes[1]) { - /* MMIO Read w/ zero-extension */ - case 0xb6: - bytes = 1; - fallthrough; - case 0xb7: - if (!bytes) - bytes = 2; - - ret = vc_do_mmio(ghcb, ctxt, bytes, true); - if (ret) - break; - - /* Zero extend based on operand size */ - reg_data = vc_insn_get_reg(ctxt); - if (!reg_data) - return ES_DECODE_FAILED; - - memset(reg_data, 0, insn->opnd_bytes); - - memcpy(reg_data, ghcb->shared_buffer, bytes); - break; - - /* MMIO Read w/ sign-extension */ - case 0xbe: - bytes = 1; - fallthrough; - case 0xbf: - if (!bytes) - bytes = 2; - - ret = vc_do_mmio(ghcb, ctxt, bytes, true); - if (ret) - break; - - /* Sign extend based on operand size */ - reg_data = vc_insn_get_reg(ctxt); - if (!reg_data) - return ES_DECODE_FAILED; - - if (bytes == 1) { - u8 *val = (u8 *)ghcb->shared_buffer; - - sign_byte = (*val & 0x80) ? 0xff : 0x00; - } else { - u16 *val = (u16 *)ghcb->shared_buffer; - - sign_byte = (*val & 0x8000) ? 0xff : 0x00; - } - memset(reg_data, sign_byte, insn->opnd_bytes); - - memcpy(reg_data, ghcb->shared_buffer, bytes); - break; - - default: - ret = ES_UNSUPPORTED; - } - - return ret; -} - -/* - * The MOVS instruction has two memory operands, which raises the - * problem that it is not known whether the access to the source or the - * destination caused the #VC exception (and hence whether an MMIO read - * or write operation needs to be emulated). - * - * Instead of playing games with walking page-tables and trying to guess - * whether the source or destination is an MMIO range, split the move - * into two operations, a read and a write with only one memory operand. - * This will cause a nested #VC exception on the MMIO address which can - * then be handled. - * - * This implementation has the benefit that it also supports MOVS where - * source _and_ destination are MMIO regions. - * - * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a - * rare operation. If it turns out to be a performance problem the split - * operations can be moved to memcpy_fromio() and memcpy_toio(). - */ -static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, - unsigned int bytes) -{ - unsigned long ds_base, es_base; - unsigned char *src, *dst; - unsigned char buffer[8]; - enum es_result ret; - bool rep; - int off; - - ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); - es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); - - if (ds_base == -1L || es_base == -1L) { - ctxt->fi.vector = X86_TRAP_GP; - ctxt->fi.error_code = 0; - return ES_EXCEPTION; - } - - src = ds_base + (unsigned char *)ctxt->regs->si; - dst = es_base + (unsigned char *)ctxt->regs->di; - - ret = vc_read_mem(ctxt, src, buffer, bytes); - if (ret != ES_OK) - return ret; - - ret = vc_write_mem(ctxt, dst, buffer, bytes); - if (ret != ES_OK) - return ret; - - if (ctxt->regs->flags & X86_EFLAGS_DF) - off = -bytes; - else - off = bytes; - - ctxt->regs->si += off; - ctxt->regs->di += off; - - rep = insn_has_rep_prefix(&ctxt->insn); - if (rep) - ctxt->regs->cx -= 1; - - if (!rep || ctxt->regs->cx == 0) - return ES_OK; - else - return ES_RETRY; -} - -static enum es_result vc_handle_mmio(struct ghcb *ghcb, - struct es_em_ctxt *ctxt) -{ - struct insn *insn = &ctxt->insn; - unsigned int bytes = 0; - enum es_result ret; - long *reg_data; - - switch (insn->opcode.bytes[0]) { - /* MMIO Write */ - case 0x88: - bytes = 1; - fallthrough; - case 0x89: - if (!bytes) - bytes = insn->opnd_bytes; - - reg_data = vc_insn_get_reg(ctxt); - if (!reg_data) - return ES_DECODE_FAILED; - - memcpy(ghcb->shared_buffer, reg_data, bytes); - - ret = vc_do_mmio(ghcb, ctxt, bytes, false); - break; - - case 0xc6: - bytes = 1; - fallthrough; - case 0xc7: - if (!bytes) - bytes = insn->opnd_bytes; - - memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); - - ret = vc_do_mmio(ghcb, ctxt, bytes, false); - break; - - /* MMIO Read */ - case 0x8a: - bytes = 1; - fallthrough; - case 0x8b: - if (!bytes) - bytes = insn->opnd_bytes; - - ret = vc_do_mmio(ghcb, ctxt, bytes, true); - if (ret) - break; - - reg_data = vc_insn_get_reg(ctxt); - if (!reg_data) - return ES_DECODE_FAILED; - - /* Zero-extend for 32-bit operation */ - if (bytes == 4) - *reg_data = 0; - - memcpy(reg_data, ghcb->shared_buffer, bytes); - break; - - /* MOVS instruction */ - case 0xa4: - bytes = 1; - fallthrough; - case 0xa5: - if (!bytes) - bytes = insn->opnd_bytes; - - ret = vc_handle_mmio_movs(ctxt, bytes); - break; - /* Two-Byte Opcodes */ - case 0x0f: - ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt); - break; - default: - ret = ES_UNSUPPORTED; - } - - return ret; -} - -static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, - struct es_em_ctxt *ctxt) -{ - struct sev_es_runtime_data *data = this_cpu_read(runtime_data); - long val, *reg = vc_insn_get_rm(ctxt); - enum es_result ret; - - if (!reg) - return ES_DECODE_FAILED; - - val = *reg; - - /* Upper 32 bits must be written as zeroes */ - if (val >> 32) { - ctxt->fi.vector = X86_TRAP_GP; - ctxt->fi.error_code = 0; - return ES_EXCEPTION; - } - - /* Clear out other reserved bits and set bit 10 */ - val = (val & 0xffff23ffL) | BIT(10); - - /* Early non-zero writes to DR7 are not supported */ - if (!data && (val & ~DR7_RESET_VALUE)) - return ES_UNSUPPORTED; - - /* Using a value of 0 for ExitInfo1 means RAX holds the value */ - ghcb_set_rax(ghcb, val); - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); - if (ret != ES_OK) - return ret; - - if (data) - data->dr7 = val; - - return ES_OK; -} - -static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, - struct es_em_ctxt *ctxt) -{ - struct sev_es_runtime_data *data = this_cpu_read(runtime_data); - long *reg = vc_insn_get_rm(ctxt); - - if (!reg) - return ES_DECODE_FAILED; - - if (data) - *reg = data->dr7; - else - *reg = DR7_RESET_VALUE; - - return ES_OK; -} - -static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, - struct es_em_ctxt *ctxt) -{ - return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); -} - -static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) -{ - enum es_result ret; - - ghcb_set_rcx(ghcb, ctxt->regs->cx); - - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); - if (ret != ES_OK) - return ret; - - if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) - return ES_VMM_ERROR; - - ctxt->regs->ax = ghcb->save.rax; - ctxt->regs->dx = ghcb->save.rdx; - - return ES_OK; -} - -static enum es_result vc_handle_monitor(struct ghcb *ghcb, - struct es_em_ctxt *ctxt) -{ - /* - * Treat it as a NOP and do not leak a physical address to the - * hypervisor. - */ - return ES_OK; -} - -static enum es_result vc_handle_mwait(struct ghcb *ghcb, - struct es_em_ctxt *ctxt) -{ - /* Treat the same as MONITOR/MONITORX */ - return ES_OK; -} - -static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, - struct es_em_ctxt *ctxt) -{ - enum es_result ret; - - ghcb_set_rax(ghcb, ctxt->regs->ax); - ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); - - if (x86_platform.hyper.sev_es_hcall_prepare) - x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); - - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); - if (ret != ES_OK) - return ret; - - if (!ghcb_rax_is_valid(ghcb)) - return ES_VMM_ERROR; - - ctxt->regs->ax = ghcb->save.rax; - - /* - * Call sev_es_hcall_finish() after regs->ax is already set. - * This allows the hypervisor handler to overwrite it again if - * necessary. - */ - if (x86_platform.hyper.sev_es_hcall_finish && - !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) - return ES_VMM_ERROR; - - return ES_OK; -} - -static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, - struct es_em_ctxt *ctxt) -{ - /* - * Calling ecx_alignment_check() directly does not work, because it - * enables IRQs and the GHCB is active. Forward the exception and call - * it later from vc_forward_exception(). - */ - ctxt->fi.vector = X86_TRAP_AC; - ctxt->fi.error_code = 0; - return ES_EXCEPTION; -} - -static __always_inline void vc_handle_trap_db(struct pt_regs *regs) -{ - if (user_mode(regs)) - noist_exc_debug(regs); - else - exc_debug(regs); -} - -static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, - struct ghcb *ghcb, - unsigned long exit_code) -{ - enum es_result result; - - switch (exit_code) { - case SVM_EXIT_READ_DR7: - result = vc_handle_dr7_read(ghcb, ctxt); - break; - case SVM_EXIT_WRITE_DR7: - result = vc_handle_dr7_write(ghcb, ctxt); - break; - case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: - result = vc_handle_trap_ac(ghcb, ctxt); - break; - case SVM_EXIT_RDTSC: - case SVM_EXIT_RDTSCP: - result = vc_handle_rdtsc(ghcb, ctxt, exit_code); - break; - case SVM_EXIT_RDPMC: - result = vc_handle_rdpmc(ghcb, ctxt); - break; - case SVM_EXIT_INVD: - pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); - result = ES_UNSUPPORTED; - break; - case SVM_EXIT_CPUID: - result = vc_handle_cpuid(ghcb, ctxt); - break; - case SVM_EXIT_IOIO: - result = vc_handle_ioio(ghcb, ctxt); - break; - case SVM_EXIT_MSR: - result = vc_handle_msr(ghcb, ctxt); - break; - case SVM_EXIT_VMMCALL: - result = vc_handle_vmmcall(ghcb, ctxt); - break; - case SVM_EXIT_WBINVD: - result = vc_handle_wbinvd(ghcb, ctxt); - break; - case SVM_EXIT_MONITOR: - result = vc_handle_monitor(ghcb, ctxt); - break; - case SVM_EXIT_MWAIT: - result = vc_handle_mwait(ghcb, ctxt); - break; - case SVM_EXIT_NPF: - result = vc_handle_mmio(ghcb, ctxt); - break; - default: - /* - * Unexpected #VC exception - */ - result = ES_UNSUPPORTED; - } - - return result; -} - -static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) -{ - long error_code = ctxt->fi.error_code; - int trapnr = ctxt->fi.vector; - - ctxt->regs->orig_ax = ctxt->fi.error_code; - - switch (trapnr) { - case X86_TRAP_GP: - exc_general_protection(ctxt->regs, error_code); - break; - case X86_TRAP_UD: - exc_invalid_op(ctxt->regs); - break; - case X86_TRAP_AC: - exc_alignment_check(ctxt->regs, error_code); - break; - default: - pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); - BUG(); - } -} - -static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) -{ - unsigned long sp = (unsigned long)regs; - - return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); -} - -/* - * Main #VC exception handler. It is called when the entry code was able to - * switch off the IST to a safe kernel stack. - * - * With the current implementation it is always possible to switch to a safe - * stack because #VC exceptions only happen at known places, like intercepted - * instructions or accesses to MMIO areas/IO ports. They can also happen with - * code instrumentation when the hypervisor intercepts #DB, but the critical - * paths are forbidden to be instrumented, so #DB exceptions currently also - * only happen in safe places. - */ -DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) -{ - struct sev_es_runtime_data *data = this_cpu_read(runtime_data); - irqentry_state_t irq_state; - struct ghcb_state state; - struct es_em_ctxt ctxt; - enum es_result result; - struct ghcb *ghcb; - - /* - * Handle #DB before calling into !noinstr code to avoid recursive #DB. - */ - if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) { - vc_handle_trap_db(regs); - return; - } - - irq_state = irqentry_nmi_enter(regs); - lockdep_assert_irqs_disabled(); - instrumentation_begin(); - - /* - * This is invoked through an interrupt gate, so IRQs are disabled. The - * code below might walk page-tables for user or kernel addresses, so - * keep the IRQs disabled to protect us against concurrent TLB flushes. - */ - - ghcb = sev_es_get_ghcb(&state); - if (!ghcb) { - /* - * Mark GHCBs inactive so that panic() is able to print the - * message. - */ - data->ghcb_active = false; - data->backup_ghcb_active = false; - - panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); - } - - vc_ghcb_invalidate(ghcb); - result = vc_init_em_ctxt(&ctxt, regs, error_code); - - if (result == ES_OK) - result = vc_handle_exitcode(&ctxt, ghcb, error_code); - - sev_es_put_ghcb(&state); - - /* Done - now check the result */ - switch (result) { - case ES_OK: - vc_finish_insn(&ctxt); - break; - case ES_UNSUPPORTED: - pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", - error_code, regs->ip); - goto fail; - case ES_VMM_ERROR: - pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", - error_code, regs->ip); - goto fail; - case ES_DECODE_FAILED: - pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", - error_code, regs->ip); - goto fail; - case ES_EXCEPTION: - vc_forward_exception(&ctxt); - break; - case ES_RETRY: - /* Nothing to do */ - break; - default: - pr_emerg("Unknown result in %s():%d\n", __func__, result); - /* - * Emulating the instruction which caused the #VC exception - * failed - can't continue so print debug information - */ - BUG(); - } - -out: - instrumentation_end(); - irqentry_nmi_exit(regs, irq_state); - - return; - -fail: - if (user_mode(regs)) { - /* - * Do not kill the machine if user-space triggered the - * exception. Send SIGBUS instead and let user-space deal with - * it. - */ - force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); - } else { - pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n", - result); - - /* Show some debug info */ - show_regs(regs); - - /* Ask hypervisor to sev_es_terminate */ - sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); - - /* If that fails and we get here - just panic */ - panic("Returned from Terminate-Request to Hypervisor\n"); - } - - goto out; -} - -/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */ -DEFINE_IDTENTRY_VC_IST(exc_vmm_communication) -{ - instrumentation_begin(); - panic("Can't handle #VC exception from unsupported context\n"); - instrumentation_end(); -} - -DEFINE_IDTENTRY_VC(exc_vmm_communication) -{ - if (likely(!on_vc_fallback_stack(regs))) - safe_stack_exc_vmm_communication(regs, error_code); - else - ist_exc_vmm_communication(regs, error_code); -} - -bool __init handle_vc_boot_ghcb(struct pt_regs *regs) -{ - unsigned long exit_code = regs->orig_ax; - struct es_em_ctxt ctxt; - enum es_result result; - - /* Do initial setup or terminate the guest */ - if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb())) - sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); - - vc_ghcb_invalidate(boot_ghcb); - - result = vc_init_em_ctxt(&ctxt, regs, exit_code); - if (result == ES_OK) - result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); - - /* Done - now check the result */ - switch (result) { - case ES_OK: - vc_finish_insn(&ctxt); - break; - case ES_UNSUPPORTED: - early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", - exit_code, regs->ip); - goto fail; - case ES_VMM_ERROR: - early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", - exit_code, regs->ip); - goto fail; - case ES_DECODE_FAILED: - early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", - exit_code, regs->ip); - goto fail; - case ES_EXCEPTION: - vc_early_forward_exception(&ctxt); - break; - case ES_RETRY: - /* Nothing to do */ - break; - default: - BUG(); - } - - return true; - -fail: - show_regs(regs); - - while (true) - halt(); -} diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c new file mode 100644 index 000000000000..0aa9f13efd57 --- /dev/null +++ b/arch/x86/kernel/sev-shared.c @@ -0,0 +1,525 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD Encrypted Register State Support + * + * Author: Joerg Roedel + * + * This file is not compiled stand-alone. It contains code shared + * between the pre-decompression boot code and the running Linux kernel + * and is included directly into both code-bases. + */ + +#ifndef __BOOT_COMPRESSED +#define error(v) pr_err(v) +#define has_cpuflag(f) boot_cpu_has(f) +#endif + +static bool __init sev_es_check_cpu_features(void) +{ + if (!has_cpuflag(X86_FEATURE_RDRAND)) { + error("RDRAND instruction not supported - no trusted source of randomness available\n"); + return false; + } + + return true; +} + +static void __noreturn sev_es_terminate(unsigned int reason) +{ + u64 val = GHCB_SEV_TERMINATE; + + /* + * Tell the hypervisor what went wrong - only reason-set 0 is + * currently supported. + */ + val |= GHCB_SEV_TERMINATE_REASON(0, reason); + + /* Request Guest Termination from Hypvervisor */ + sev_es_wr_ghcb_msr(val); + VMGEXIT(); + + while (true) + asm volatile("hlt\n" : : : "memory"); +} + +static bool sev_es_negotiate_protocol(void) +{ + u64 val; + + /* Do the GHCB protocol version negotiation */ + sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ); + VMGEXIT(); + val = sev_es_rd_ghcb_msr(); + + if (GHCB_INFO(val) != GHCB_SEV_INFO) + return false; + + if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR || + GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR) + return false; + + return true; +} + +static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) +{ + memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); +} + +static bool vc_decoding_needed(unsigned long exit_code) +{ + /* Exceptions don't require to decode the instruction */ + return !(exit_code >= SVM_EXIT_EXCP_BASE && + exit_code <= SVM_EXIT_LAST_EXCP); +} + +static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt, + struct pt_regs *regs, + unsigned long exit_code) +{ + enum es_result ret = ES_OK; + + memset(ctxt, 0, sizeof(*ctxt)); + ctxt->regs = regs; + + if (vc_decoding_needed(exit_code)) + ret = vc_decode_insn(ctxt); + + return ret; +} + +static void vc_finish_insn(struct es_em_ctxt *ctxt) +{ + ctxt->regs->ip += ctxt->insn.length; +} + +static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, + struct es_em_ctxt *ctxt, + u64 exit_code, u64 exit_info_1, + u64 exit_info_2) +{ + enum es_result ret; + + /* Fill in protocol and format specifiers */ + ghcb->protocol_version = GHCB_PROTOCOL_MAX; + ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; + + ghcb_set_sw_exit_code(ghcb, exit_code); + ghcb_set_sw_exit_info_1(ghcb, exit_info_1); + ghcb_set_sw_exit_info_2(ghcb, exit_info_2); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + VMGEXIT(); + + if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1) { + u64 info = ghcb->save.sw_exit_info_2; + unsigned long v; + + info = ghcb->save.sw_exit_info_2; + v = info & SVM_EVTINJ_VEC_MASK; + + /* Check if exception information from hypervisor is sane. */ + if ((info & SVM_EVTINJ_VALID) && + ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) && + ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) { + ctxt->fi.vector = v; + if (info & SVM_EVTINJ_VALID_ERR) + ctxt->fi.error_code = info >> 32; + ret = ES_EXCEPTION; + } else { + ret = ES_VMM_ERROR; + } + } else { + ret = ES_OK; + } + + return ret; +} + +/* + * Boot VC Handler - This is the first VC handler during boot, there is no GHCB + * page yet, so it only supports the MSR based communication with the + * hypervisor and only the CPUID exit-code. + */ +void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) +{ + unsigned int fn = lower_bits(regs->ax, 32); + unsigned long val; + + /* Only CPUID is supported via MSR protocol */ + if (exit_code != SVM_EXIT_CPUID) + goto fail; + + sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX)); + VMGEXIT(); + val = sev_es_rd_ghcb_msr(); + if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + goto fail; + regs->ax = val >> 32; + + sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX)); + VMGEXIT(); + val = sev_es_rd_ghcb_msr(); + if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + goto fail; + regs->bx = val >> 32; + + sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX)); + VMGEXIT(); + val = sev_es_rd_ghcb_msr(); + if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + goto fail; + regs->cx = val >> 32; + + sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX)); + VMGEXIT(); + val = sev_es_rd_ghcb_msr(); + if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + goto fail; + regs->dx = val >> 32; + + /* + * This is a VC handler and the #VC is only raised when SEV-ES is + * active, which means SEV must be active too. Do sanity checks on the + * CPUID results to make sure the hypervisor does not trick the kernel + * into the no-sev path. This could map sensitive data unencrypted and + * make it accessible to the hypervisor. + * + * In particular, check for: + * - Availability of CPUID leaf 0x8000001f + * - SEV CPUID bit. + * + * The hypervisor might still report the wrong C-bit position, but this + * can't be checked here. + */ + + if (fn == 0x80000000 && (regs->ax < 0x8000001f)) + /* SEV leaf check */ + goto fail; + else if ((fn == 0x8000001f && !(regs->ax & BIT(1)))) + /* SEV bit */ + goto fail; + + /* Skip over the CPUID two-byte opcode */ + regs->ip += 2; + + return; + +fail: + /* Terminate the guest */ + sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); +} + +static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, + void *src, char *buf, + unsigned int data_size, + unsigned int count, + bool backwards) +{ + int i, b = backwards ? -1 : 1; + enum es_result ret = ES_OK; + + for (i = 0; i < count; i++) { + void *s = src + (i * data_size * b); + char *d = buf + (i * data_size); + + ret = vc_read_mem(ctxt, s, d, data_size); + if (ret != ES_OK) + break; + } + + return ret; +} + +static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, + void *dst, char *buf, + unsigned int data_size, + unsigned int count, + bool backwards) +{ + int i, s = backwards ? -1 : 1; + enum es_result ret = ES_OK; + + for (i = 0; i < count; i++) { + void *d = dst + (i * data_size * s); + char *b = buf + (i * data_size); + + ret = vc_write_mem(ctxt, d, b, data_size); + if (ret != ES_OK) + break; + } + + return ret; +} + +#define IOIO_TYPE_STR BIT(2) +#define IOIO_TYPE_IN 1 +#define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR) +#define IOIO_TYPE_OUT 0 +#define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR) + +#define IOIO_REP BIT(3) + +#define IOIO_ADDR_64 BIT(9) +#define IOIO_ADDR_32 BIT(8) +#define IOIO_ADDR_16 BIT(7) + +#define IOIO_DATA_32 BIT(6) +#define IOIO_DATA_16 BIT(5) +#define IOIO_DATA_8 BIT(4) + +#define IOIO_SEG_ES (0 << 10) +#define IOIO_SEG_DS (3 << 10) + +static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) +{ + struct insn *insn = &ctxt->insn; + *exitinfo = 0; + + switch (insn->opcode.bytes[0]) { + /* INS opcodes */ + case 0x6c: + case 0x6d: + *exitinfo |= IOIO_TYPE_INS; + *exitinfo |= IOIO_SEG_ES; + *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + break; + + /* OUTS opcodes */ + case 0x6e: + case 0x6f: + *exitinfo |= IOIO_TYPE_OUTS; + *exitinfo |= IOIO_SEG_DS; + *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + break; + + /* IN immediate opcodes */ + case 0xe4: + case 0xe5: + *exitinfo |= IOIO_TYPE_IN; + *exitinfo |= (u8)insn->immediate.value << 16; + break; + + /* OUT immediate opcodes */ + case 0xe6: + case 0xe7: + *exitinfo |= IOIO_TYPE_OUT; + *exitinfo |= (u8)insn->immediate.value << 16; + break; + + /* IN register opcodes */ + case 0xec: + case 0xed: + *exitinfo |= IOIO_TYPE_IN; + *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + break; + + /* OUT register opcodes */ + case 0xee: + case 0xef: + *exitinfo |= IOIO_TYPE_OUT; + *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + break; + + default: + return ES_DECODE_FAILED; + } + + switch (insn->opcode.bytes[0]) { + case 0x6c: + case 0x6e: + case 0xe4: + case 0xe6: + case 0xec: + case 0xee: + /* Single byte opcodes */ + *exitinfo |= IOIO_DATA_8; + break; + default: + /* Length determined by instruction parsing */ + *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 + : IOIO_DATA_32; + } + switch (insn->addr_bytes) { + case 2: + *exitinfo |= IOIO_ADDR_16; + break; + case 4: + *exitinfo |= IOIO_ADDR_32; + break; + case 8: + *exitinfo |= IOIO_ADDR_64; + break; + } + + if (insn_has_rep_prefix(insn)) + *exitinfo |= IOIO_REP; + + return ES_OK; +} + +static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +{ + struct pt_regs *regs = ctxt->regs; + u64 exit_info_1, exit_info_2; + enum es_result ret; + + ret = vc_ioio_exitinfo(ctxt, &exit_info_1); + if (ret != ES_OK) + return ret; + + if (exit_info_1 & IOIO_TYPE_STR) { + + /* (REP) INS/OUTS */ + + bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF); + unsigned int io_bytes, exit_bytes; + unsigned int ghcb_count, op_count; + unsigned long es_base; + u64 sw_scratch; + + /* + * For the string variants with rep prefix the amount of in/out + * operations per #VC exception is limited so that the kernel + * has a chance to take interrupts and re-schedule while the + * instruction is emulated. + */ + io_bytes = (exit_info_1 >> 4) & 0x7; + ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes; + + op_count = (exit_info_1 & IOIO_REP) ? regs->cx : 1; + exit_info_2 = min(op_count, ghcb_count); + exit_bytes = exit_info_2 * io_bytes; + + es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); + + /* Read bytes of OUTS into the shared buffer */ + if (!(exit_info_1 & IOIO_TYPE_IN)) { + ret = vc_insn_string_read(ctxt, + (void *)(es_base + regs->si), + ghcb->shared_buffer, io_bytes, + exit_info_2, df); + if (ret) + return ret; + } + + /* + * Issue an VMGEXIT to the HV to consume the bytes from the + * shared buffer or to have it write them into the shared buffer + * depending on the instruction: OUTS or INS. + */ + sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer); + ghcb_set_sw_scratch(ghcb, sw_scratch); + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, + exit_info_1, exit_info_2); + if (ret != ES_OK) + return ret; + + /* Read bytes from shared buffer into the guest's destination. */ + if (exit_info_1 & IOIO_TYPE_IN) { + ret = vc_insn_string_write(ctxt, + (void *)(es_base + regs->di), + ghcb->shared_buffer, io_bytes, + exit_info_2, df); + if (ret) + return ret; + + if (df) + regs->di -= exit_bytes; + else + regs->di += exit_bytes; + } else { + if (df) + regs->si -= exit_bytes; + else + regs->si += exit_bytes; + } + + if (exit_info_1 & IOIO_REP) + regs->cx -= exit_info_2; + + ret = regs->cx ? ES_RETRY : ES_OK; + + } else { + + /* IN/OUT into/from rAX */ + + int bits = (exit_info_1 & 0x70) >> 1; + u64 rax = 0; + + if (!(exit_info_1 & IOIO_TYPE_IN)) + rax = lower_bits(regs->ax, bits); + + ghcb_set_rax(ghcb, rax); + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0); + if (ret != ES_OK) + return ret; + + if (exit_info_1 & IOIO_TYPE_IN) { + if (!ghcb_rax_is_valid(ghcb)) + return ES_VMM_ERROR; + regs->ax = lower_bits(ghcb->save.rax, bits); + } + } + + return ret; +} + +static enum es_result vc_handle_cpuid(struct ghcb *ghcb, + struct es_em_ctxt *ctxt) +{ + struct pt_regs *regs = ctxt->regs; + u32 cr4 = native_read_cr4(); + enum es_result ret; + + ghcb_set_rax(ghcb, regs->ax); + ghcb_set_rcx(ghcb, regs->cx); + + if (cr4 & X86_CR4_OSXSAVE) + /* Safe to read xcr0 */ + ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); + else + /* xgetbv will cause #GP - use reset value for xcr0 */ + ghcb_set_xcr0(ghcb, 1); + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); + if (ret != ES_OK) + return ret; + + if (!(ghcb_rax_is_valid(ghcb) && + ghcb_rbx_is_valid(ghcb) && + ghcb_rcx_is_valid(ghcb) && + ghcb_rdx_is_valid(ghcb))) + return ES_VMM_ERROR; + + regs->ax = ghcb->save.rax; + regs->bx = ghcb->save.rbx; + regs->cx = ghcb->save.rcx; + regs->dx = ghcb->save.rdx; + + return ES_OK; +} + +static enum es_result vc_handle_rdtsc(struct ghcb *ghcb, + struct es_em_ctxt *ctxt, + unsigned long exit_code) +{ + bool rdtscp = (exit_code == SVM_EXIT_RDTSCP); + enum es_result ret; + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0); + if (ret != ES_OK) + return ret; + + if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) && + (!rdtscp || ghcb_rcx_is_valid(ghcb)))) + return ES_VMM_ERROR; + + ctxt->regs->ax = ghcb->save.rax; + ctxt->regs->dx = ghcb->save.rdx; + if (rdtscp) + ctxt->regs->cx = ghcb->save.rcx; + + return ES_OK; +} diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c new file mode 100644 index 000000000000..9578c82832aa --- /dev/null +++ b/arch/x86/kernel/sev.c @@ -0,0 +1,1461 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Memory Encryption Support + * + * Copyright (C) 2019 SUSE + * + * Author: Joerg Roedel + */ + +#define pr_fmt(fmt) "SEV-ES: " fmt + +#include /* For show_regs() */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DR7_RESET_VALUE 0x400 + +/* For early boot hypervisor communication in SEV-ES enabled guests */ +static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); + +/* + * Needs to be in the .data section because we need it NULL before bss is + * cleared + */ +static struct ghcb __initdata *boot_ghcb; + +/* #VC handler runtime per-CPU data */ +struct sev_es_runtime_data { + struct ghcb ghcb_page; + + /* Physical storage for the per-CPU IST stack of the #VC handler */ + char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); + + /* + * Physical storage for the per-CPU fall-back stack of the #VC handler. + * The fall-back stack is used when it is not safe to switch back to the + * interrupted stack in the #VC entry code. + */ + char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); + + /* + * Reserve one page per CPU as backup storage for the unencrypted GHCB. + * It is needed when an NMI happens while the #VC handler uses the real + * GHCB, and the NMI handler itself is causing another #VC exception. In + * that case the GHCB content of the first handler needs to be backed up + * and restored. + */ + struct ghcb backup_ghcb; + + /* + * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. + * There is no need for it to be atomic, because nothing is written to + * the GHCB between the read and the write of ghcb_active. So it is safe + * to use it when a nested #VC exception happens before the write. + * + * This is necessary for example in the #VC->NMI->#VC case when the NMI + * happens while the first #VC handler uses the GHCB. When the NMI code + * raises a second #VC handler it might overwrite the contents of the + * GHCB written by the first handler. To avoid this the content of the + * GHCB is saved and restored when the GHCB is detected to be in use + * already. + */ + bool ghcb_active; + bool backup_ghcb_active; + + /* + * Cached DR7 value - write it on DR7 writes and return it on reads. + * That value will never make it to the real hardware DR7 as debugging + * is currently unsupported in SEV-ES guests. + */ + unsigned long dr7; +}; + +struct ghcb_state { + struct ghcb *ghcb; +}; + +static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); +DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); + +/* Needed in vc_early_forward_exception */ +void do_early_exception(struct pt_regs *regs, int trapnr); + +static void __init setup_vc_stacks(int cpu) +{ + struct sev_es_runtime_data *data; + struct cpu_entry_area *cea; + unsigned long vaddr; + phys_addr_t pa; + + data = per_cpu(runtime_data, cpu); + cea = get_cpu_entry_area(cpu); + + /* Map #VC IST stack */ + vaddr = CEA_ESTACK_BOT(&cea->estacks, VC); + pa = __pa(data->ist_stack); + cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); + + /* Map VC fall-back stack */ + vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2); + pa = __pa(data->fallback_stack); + cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); +} + +static __always_inline bool on_vc_stack(struct pt_regs *regs) +{ + unsigned long sp = regs->sp; + + /* User-mode RSP is not trusted */ + if (user_mode(regs)) + return false; + + /* SYSCALL gap still has user-mode RSP */ + if (ip_within_syscall_gap(regs)) + return false; + + return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); +} + +/* + * This function handles the case when an NMI is raised in the #VC + * exception handler entry code, before the #VC handler has switched off + * its IST stack. In this case, the IST entry for #VC must be adjusted, + * so that any nested #VC exception will not overwrite the stack + * contents of the interrupted #VC handler. + * + * The IST entry is adjusted unconditionally so that it can be also be + * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a + * nested sev_es_ist_exit() call may adjust back the IST entry too + * early. + * + * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run + * on the NMI IST stack, as they are only called from NMI handling code + * right now. + */ +void noinstr __sev_es_ist_enter(struct pt_regs *regs) +{ + unsigned long old_ist, new_ist; + + /* Read old IST entry */ + new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); + + /* + * If NMI happened while on the #VC IST stack, set the new IST + * value below regs->sp, so that the interrupted stack frame is + * not overwritten by subsequent #VC exceptions. + */ + if (on_vc_stack(regs)) + new_ist = regs->sp; + + /* + * Reserve additional 8 bytes and store old IST value so this + * adjustment can be unrolled in __sev_es_ist_exit(). + */ + new_ist -= sizeof(old_ist); + *(unsigned long *)new_ist = old_ist; + + /* Set new IST entry */ + this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); +} + +void noinstr __sev_es_ist_exit(void) +{ + unsigned long ist; + + /* Read IST entry */ + ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); + + if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) + return; + + /* Read back old IST entry and write it to the TSS */ + this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); +} + +static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) +{ + struct sev_es_runtime_data *data; + struct ghcb *ghcb; + + data = this_cpu_read(runtime_data); + ghcb = &data->ghcb_page; + + if (unlikely(data->ghcb_active)) { + /* GHCB is already in use - save its contents */ + + if (unlikely(data->backup_ghcb_active)) + return NULL; + + /* Mark backup_ghcb active before writing to it */ + data->backup_ghcb_active = true; + + state->ghcb = &data->backup_ghcb; + + /* Backup GHCB content */ + *state->ghcb = *ghcb; + } else { + state->ghcb = NULL; + data->ghcb_active = true; + } + + return ghcb; +} + +static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) +{ + struct sev_es_runtime_data *data; + struct ghcb *ghcb; + + data = this_cpu_read(runtime_data); + ghcb = &data->ghcb_page; + + if (state->ghcb) { + /* Restore GHCB from Backup */ + *ghcb = *state->ghcb; + data->backup_ghcb_active = false; + state->ghcb = NULL; + } else { + data->ghcb_active = false; + } +} + +/* Needed in vc_early_forward_exception */ +void do_early_exception(struct pt_regs *regs, int trapnr); + +static inline u64 sev_es_rd_ghcb_msr(void) +{ + return __rdmsr(MSR_AMD64_SEV_ES_GHCB); +} + +static __always_inline void sev_es_wr_ghcb_msr(u64 val) +{ + u32 low, high; + + low = (u32)(val); + high = (u32)(val >> 32); + + native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); +} + +static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, + unsigned char *buffer) +{ + return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); +} + +static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) +{ + char buffer[MAX_INSN_SIZE]; + int res; + + res = insn_fetch_from_user_inatomic(ctxt->regs, buffer); + if (!res) { + ctxt->fi.vector = X86_TRAP_PF; + ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; + ctxt->fi.cr2 = ctxt->regs->ip; + return ES_EXCEPTION; + } + + if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res)) + return ES_DECODE_FAILED; + + if (ctxt->insn.immediate.got) + return ES_OK; + else + return ES_DECODE_FAILED; +} + +static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) +{ + char buffer[MAX_INSN_SIZE]; + int res, ret; + + res = vc_fetch_insn_kernel(ctxt, buffer); + if (res) { + ctxt->fi.vector = X86_TRAP_PF; + ctxt->fi.error_code = X86_PF_INSTR; + ctxt->fi.cr2 = ctxt->regs->ip; + return ES_EXCEPTION; + } + + ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); + if (ret < 0) + return ES_DECODE_FAILED; + else + return ES_OK; +} + +static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) +{ + if (user_mode(ctxt->regs)) + return __vc_decode_user_insn(ctxt); + else + return __vc_decode_kern_insn(ctxt); +} + +static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, + char *dst, char *buf, size_t size) +{ + unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; + char __user *target = (char __user *)dst; + u64 d8; + u32 d4; + u16 d2; + u8 d1; + + /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ + if (!user_mode(ctxt->regs) && !access_ok(target, size)) { + memcpy(dst, buf, size); + return ES_OK; + } + + switch (size) { + case 1: + memcpy(&d1, buf, 1); + if (put_user(d1, target)) + goto fault; + break; + case 2: + memcpy(&d2, buf, 2); + if (put_user(d2, target)) + goto fault; + break; + case 4: + memcpy(&d4, buf, 4); + if (put_user(d4, target)) + goto fault; + break; + case 8: + memcpy(&d8, buf, 8); + if (put_user(d8, target)) + goto fault; + break; + default: + WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); + return ES_UNSUPPORTED; + } + + return ES_OK; + +fault: + if (user_mode(ctxt->regs)) + error_code |= X86_PF_USER; + + ctxt->fi.vector = X86_TRAP_PF; + ctxt->fi.error_code = error_code; + ctxt->fi.cr2 = (unsigned long)dst; + + return ES_EXCEPTION; +} + +static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, + char *src, char *buf, size_t size) +{ + unsigned long error_code = X86_PF_PROT; + char __user *s = (char __user *)src; + u64 d8; + u32 d4; + u16 d2; + u8 d1; + + /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ + if (!user_mode(ctxt->regs) && !access_ok(s, size)) { + memcpy(buf, src, size); + return ES_OK; + } + + switch (size) { + case 1: + if (get_user(d1, s)) + goto fault; + memcpy(buf, &d1, 1); + break; + case 2: + if (get_user(d2, s)) + goto fault; + memcpy(buf, &d2, 2); + break; + case 4: + if (get_user(d4, s)) + goto fault; + memcpy(buf, &d4, 4); + break; + case 8: + if (get_user(d8, s)) + goto fault; + memcpy(buf, &d8, 8); + break; + default: + WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); + return ES_UNSUPPORTED; + } + + return ES_OK; + +fault: + if (user_mode(ctxt->regs)) + error_code |= X86_PF_USER; + + ctxt->fi.vector = X86_TRAP_PF; + ctxt->fi.error_code = error_code; + ctxt->fi.cr2 = (unsigned long)src; + + return ES_EXCEPTION; +} + +static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, + unsigned long vaddr, phys_addr_t *paddr) +{ + unsigned long va = (unsigned long)vaddr; + unsigned int level; + phys_addr_t pa; + pgd_t *pgd; + pte_t *pte; + + pgd = __va(read_cr3_pa()); + pgd = &pgd[pgd_index(va)]; + pte = lookup_address_in_pgd(pgd, va, &level); + if (!pte) { + ctxt->fi.vector = X86_TRAP_PF; + ctxt->fi.cr2 = vaddr; + ctxt->fi.error_code = 0; + + if (user_mode(ctxt->regs)) + ctxt->fi.error_code |= X86_PF_USER; + + return ES_EXCEPTION; + } + + if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) + /* Emulated MMIO to/from encrypted memory not supported */ + return ES_UNSUPPORTED; + + pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; + pa |= va & ~page_level_mask(level); + + *paddr = pa; + + return ES_OK; +} + +/* Include code shared with pre-decompression boot stage */ +#include "sev-shared.c" + +void noinstr __sev_es_nmi_complete(void) +{ + struct ghcb_state state; + struct ghcb *ghcb; + + ghcb = sev_es_get_ghcb(&state); + + vc_ghcb_invalidate(ghcb); + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); + ghcb_set_sw_exit_info_1(ghcb, 0); + ghcb_set_sw_exit_info_2(ghcb, 0); + + sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); + VMGEXIT(); + + sev_es_put_ghcb(&state); +} + +static u64 get_jump_table_addr(void) +{ + struct ghcb_state state; + unsigned long flags; + struct ghcb *ghcb; + u64 ret = 0; + + local_irq_save(flags); + + ghcb = sev_es_get_ghcb(&state); + + vc_ghcb_invalidate(ghcb); + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); + ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); + ghcb_set_sw_exit_info_2(ghcb, 0); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + VMGEXIT(); + + if (ghcb_sw_exit_info_1_is_valid(ghcb) && + ghcb_sw_exit_info_2_is_valid(ghcb)) + ret = ghcb->save.sw_exit_info_2; + + sev_es_put_ghcb(&state); + + local_irq_restore(flags); + + return ret; +} + +int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) +{ + u16 startup_cs, startup_ip; + phys_addr_t jump_table_pa; + u64 jump_table_addr; + u16 __iomem *jump_table; + + jump_table_addr = get_jump_table_addr(); + + /* On UP guests there is no jump table so this is not a failure */ + if (!jump_table_addr) + return 0; + + /* Check if AP Jump Table is page-aligned */ + if (jump_table_addr & ~PAGE_MASK) + return -EINVAL; + + jump_table_pa = jump_table_addr & PAGE_MASK; + + startup_cs = (u16)(rmh->trampoline_start >> 4); + startup_ip = (u16)(rmh->sev_es_trampoline_start - + rmh->trampoline_start); + + jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); + if (!jump_table) + return -EIO; + + writew(startup_ip, &jump_table[0]); + writew(startup_cs, &jump_table[1]); + + iounmap(jump_table); + + return 0; +} + +/* + * This is needed by the OVMF UEFI firmware which will use whatever it finds in + * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu + * runtime GHCBs used by the kernel are also mapped in the EFI page-table. + */ +int __init sev_es_efi_map_ghcbs(pgd_t *pgd) +{ + struct sev_es_runtime_data *data; + unsigned long address, pflags; + int cpu; + u64 pfn; + + if (!sev_es_active()) + return 0; + + pflags = _PAGE_NX | _PAGE_RW; + + for_each_possible_cpu(cpu) { + data = per_cpu(runtime_data, cpu); + + address = __pa(&data->ghcb_page); + pfn = address >> PAGE_SHIFT; + + if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) + return 1; + } + + return 0; +} + +static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +{ + struct pt_regs *regs = ctxt->regs; + enum es_result ret; + u64 exit_info_1; + + /* Is it a WRMSR? */ + exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; + + ghcb_set_rcx(ghcb, regs->cx); + if (exit_info_1) { + ghcb_set_rax(ghcb, regs->ax); + ghcb_set_rdx(ghcb, regs->dx); + } + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0); + + if ((ret == ES_OK) && (!exit_info_1)) { + regs->ax = ghcb->save.rax; + regs->dx = ghcb->save.rdx; + } + + return ret; +} + +/* + * This function runs on the first #VC exception after the kernel + * switched to virtual addresses. + */ +static bool __init sev_es_setup_ghcb(void) +{ + /* First make sure the hypervisor talks a supported protocol. */ + if (!sev_es_negotiate_protocol()) + return false; + + /* + * Clear the boot_ghcb. The first exception comes in before the bss + * section is cleared. + */ + memset(&boot_ghcb_page, 0, PAGE_SIZE); + + /* Alright - Make the boot-ghcb public */ + boot_ghcb = &boot_ghcb_page; + + return true; +} + +#ifdef CONFIG_HOTPLUG_CPU +static void sev_es_ap_hlt_loop(void) +{ + struct ghcb_state state; + struct ghcb *ghcb; + + ghcb = sev_es_get_ghcb(&state); + + while (true) { + vc_ghcb_invalidate(ghcb); + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); + ghcb_set_sw_exit_info_1(ghcb, 0); + ghcb_set_sw_exit_info_2(ghcb, 0); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + VMGEXIT(); + + /* Wakeup signal? */ + if (ghcb_sw_exit_info_2_is_valid(ghcb) && + ghcb->save.sw_exit_info_2) + break; + } + + sev_es_put_ghcb(&state); +} + +/* + * Play_dead handler when running under SEV-ES. This is needed because + * the hypervisor can't deliver an SIPI request to restart the AP. + * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the + * hypervisor wakes it up again. + */ +static void sev_es_play_dead(void) +{ + play_dead_common(); + + /* IRQs now disabled */ + + sev_es_ap_hlt_loop(); + + /* + * If we get here, the VCPU was woken up again. Jump to CPU + * startup code to get it back online. + */ + start_cpu0(); +} +#else /* CONFIG_HOTPLUG_CPU */ +#define sev_es_play_dead native_play_dead +#endif /* CONFIG_HOTPLUG_CPU */ + +#ifdef CONFIG_SMP +static void __init sev_es_setup_play_dead(void) +{ + smp_ops.play_dead = sev_es_play_dead; +} +#else +static inline void sev_es_setup_play_dead(void) { } +#endif + +static void __init alloc_runtime_data(int cpu) +{ + struct sev_es_runtime_data *data; + + data = memblock_alloc(sizeof(*data), PAGE_SIZE); + if (!data) + panic("Can't allocate SEV-ES runtime data"); + + per_cpu(runtime_data, cpu) = data; +} + +static void __init init_ghcb(int cpu) +{ + struct sev_es_runtime_data *data; + int err; + + data = per_cpu(runtime_data, cpu); + + err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, + sizeof(data->ghcb_page)); + if (err) + panic("Can't map GHCBs unencrypted"); + + memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); + + data->ghcb_active = false; + data->backup_ghcb_active = false; +} + +void __init sev_es_init_vc_handling(void) +{ + int cpu; + + BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); + + if (!sev_es_active()) + return; + + if (!sev_es_check_cpu_features()) + panic("SEV-ES CPU Features missing"); + + /* Enable SEV-ES special handling */ + static_branch_enable(&sev_es_enable_key); + + /* Initialize per-cpu GHCB pages */ + for_each_possible_cpu(cpu) { + alloc_runtime_data(cpu); + init_ghcb(cpu); + setup_vc_stacks(cpu); + } + + sev_es_setup_play_dead(); + + /* Secondary CPUs use the runtime #VC handler */ + initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication; +} + +static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) +{ + int trapnr = ctxt->fi.vector; + + if (trapnr == X86_TRAP_PF) + native_write_cr2(ctxt->fi.cr2); + + ctxt->regs->orig_ax = ctxt->fi.error_code; + do_early_exception(ctxt->regs, trapnr); +} + +static long *vc_insn_get_reg(struct es_em_ctxt *ctxt) +{ + long *reg_array; + int offset; + + reg_array = (long *)ctxt->regs; + offset = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs); + + if (offset < 0) + return NULL; + + offset /= sizeof(long); + + return reg_array + offset; +} + +static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) +{ + long *reg_array; + int offset; + + reg_array = (long *)ctxt->regs; + offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); + + if (offset < 0) + return NULL; + + offset /= sizeof(long); + + return reg_array + offset; +} +static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, + unsigned int bytes, bool read) +{ + u64 exit_code, exit_info_1, exit_info_2; + unsigned long ghcb_pa = __pa(ghcb); + enum es_result res; + phys_addr_t paddr; + void __user *ref; + + ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); + if (ref == (void __user *)-1L) + return ES_UNSUPPORTED; + + exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; + + res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); + if (res != ES_OK) { + if (res == ES_EXCEPTION && !read) + ctxt->fi.error_code |= X86_PF_WRITE; + + return res; + } + + exit_info_1 = paddr; + /* Can never be greater than 8 */ + exit_info_2 = bytes; + + ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); + + return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); +} + +static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb, + struct es_em_ctxt *ctxt) +{ + struct insn *insn = &ctxt->insn; + unsigned int bytes = 0; + enum es_result ret; + int sign_byte; + long *reg_data; + + switch (insn->opcode.bytes[1]) { + /* MMIO Read w/ zero-extension */ + case 0xb6: + bytes = 1; + fallthrough; + case 0xb7: + if (!bytes) + bytes = 2; + + ret = vc_do_mmio(ghcb, ctxt, bytes, true); + if (ret) + break; + + /* Zero extend based on operand size */ + reg_data = vc_insn_get_reg(ctxt); + if (!reg_data) + return ES_DECODE_FAILED; + + memset(reg_data, 0, insn->opnd_bytes); + + memcpy(reg_data, ghcb->shared_buffer, bytes); + break; + + /* MMIO Read w/ sign-extension */ + case 0xbe: + bytes = 1; + fallthrough; + case 0xbf: + if (!bytes) + bytes = 2; + + ret = vc_do_mmio(ghcb, ctxt, bytes, true); + if (ret) + break; + + /* Sign extend based on operand size */ + reg_data = vc_insn_get_reg(ctxt); + if (!reg_data) + return ES_DECODE_FAILED; + + if (bytes == 1) { + u8 *val = (u8 *)ghcb->shared_buffer; + + sign_byte = (*val & 0x80) ? 0xff : 0x00; + } else { + u16 *val = (u16 *)ghcb->shared_buffer; + + sign_byte = (*val & 0x8000) ? 0xff : 0x00; + } + memset(reg_data, sign_byte, insn->opnd_bytes); + + memcpy(reg_data, ghcb->shared_buffer, bytes); + break; + + default: + ret = ES_UNSUPPORTED; + } + + return ret; +} + +/* + * The MOVS instruction has two memory operands, which raises the + * problem that it is not known whether the access to the source or the + * destination caused the #VC exception (and hence whether an MMIO read + * or write operation needs to be emulated). + * + * Instead of playing games with walking page-tables and trying to guess + * whether the source or destination is an MMIO range, split the move + * into two operations, a read and a write with only one memory operand. + * This will cause a nested #VC exception on the MMIO address which can + * then be handled. + * + * This implementation has the benefit that it also supports MOVS where + * source _and_ destination are MMIO regions. + * + * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a + * rare operation. If it turns out to be a performance problem the split + * operations can be moved to memcpy_fromio() and memcpy_toio(). + */ +static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, + unsigned int bytes) +{ + unsigned long ds_base, es_base; + unsigned char *src, *dst; + unsigned char buffer[8]; + enum es_result ret; + bool rep; + int off; + + ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); + es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); + + if (ds_base == -1L || es_base == -1L) { + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + return ES_EXCEPTION; + } + + src = ds_base + (unsigned char *)ctxt->regs->si; + dst = es_base + (unsigned char *)ctxt->regs->di; + + ret = vc_read_mem(ctxt, src, buffer, bytes); + if (ret != ES_OK) + return ret; + + ret = vc_write_mem(ctxt, dst, buffer, bytes); + if (ret != ES_OK) + return ret; + + if (ctxt->regs->flags & X86_EFLAGS_DF) + off = -bytes; + else + off = bytes; + + ctxt->regs->si += off; + ctxt->regs->di += off; + + rep = insn_has_rep_prefix(&ctxt->insn); + if (rep) + ctxt->regs->cx -= 1; + + if (!rep || ctxt->regs->cx == 0) + return ES_OK; + else + return ES_RETRY; +} + +static enum es_result vc_handle_mmio(struct ghcb *ghcb, + struct es_em_ctxt *ctxt) +{ + struct insn *insn = &ctxt->insn; + unsigned int bytes = 0; + enum es_result ret; + long *reg_data; + + switch (insn->opcode.bytes[0]) { + /* MMIO Write */ + case 0x88: + bytes = 1; + fallthrough; + case 0x89: + if (!bytes) + bytes = insn->opnd_bytes; + + reg_data = vc_insn_get_reg(ctxt); + if (!reg_data) + return ES_DECODE_FAILED; + + memcpy(ghcb->shared_buffer, reg_data, bytes); + + ret = vc_do_mmio(ghcb, ctxt, bytes, false); + break; + + case 0xc6: + bytes = 1; + fallthrough; + case 0xc7: + if (!bytes) + bytes = insn->opnd_bytes; + + memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); + + ret = vc_do_mmio(ghcb, ctxt, bytes, false); + break; + + /* MMIO Read */ + case 0x8a: + bytes = 1; + fallthrough; + case 0x8b: + if (!bytes) + bytes = insn->opnd_bytes; + + ret = vc_do_mmio(ghcb, ctxt, bytes, true); + if (ret) + break; + + reg_data = vc_insn_get_reg(ctxt); + if (!reg_data) + return ES_DECODE_FAILED; + + /* Zero-extend for 32-bit operation */ + if (bytes == 4) + *reg_data = 0; + + memcpy(reg_data, ghcb->shared_buffer, bytes); + break; + + /* MOVS instruction */ + case 0xa4: + bytes = 1; + fallthrough; + case 0xa5: + if (!bytes) + bytes = insn->opnd_bytes; + + ret = vc_handle_mmio_movs(ctxt, bytes); + break; + /* Two-Byte Opcodes */ + case 0x0f: + ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt); + break; + default: + ret = ES_UNSUPPORTED; + } + + return ret; +} + +static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, + struct es_em_ctxt *ctxt) +{ + struct sev_es_runtime_data *data = this_cpu_read(runtime_data); + long val, *reg = vc_insn_get_rm(ctxt); + enum es_result ret; + + if (!reg) + return ES_DECODE_FAILED; + + val = *reg; + + /* Upper 32 bits must be written as zeroes */ + if (val >> 32) { + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + return ES_EXCEPTION; + } + + /* Clear out other reserved bits and set bit 10 */ + val = (val & 0xffff23ffL) | BIT(10); + + /* Early non-zero writes to DR7 are not supported */ + if (!data && (val & ~DR7_RESET_VALUE)) + return ES_UNSUPPORTED; + + /* Using a value of 0 for ExitInfo1 means RAX holds the value */ + ghcb_set_rax(ghcb, val); + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); + if (ret != ES_OK) + return ret; + + if (data) + data->dr7 = val; + + return ES_OK; +} + +static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, + struct es_em_ctxt *ctxt) +{ + struct sev_es_runtime_data *data = this_cpu_read(runtime_data); + long *reg = vc_insn_get_rm(ctxt); + + if (!reg) + return ES_DECODE_FAILED; + + if (data) + *reg = data->dr7; + else + *reg = DR7_RESET_VALUE; + + return ES_OK; +} + +static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, + struct es_em_ctxt *ctxt) +{ + return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); +} + +static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) +{ + enum es_result ret; + + ghcb_set_rcx(ghcb, ctxt->regs->cx); + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); + if (ret != ES_OK) + return ret; + + if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) + return ES_VMM_ERROR; + + ctxt->regs->ax = ghcb->save.rax; + ctxt->regs->dx = ghcb->save.rdx; + + return ES_OK; +} + +static enum es_result vc_handle_monitor(struct ghcb *ghcb, + struct es_em_ctxt *ctxt) +{ + /* + * Treat it as a NOP and do not leak a physical address to the + * hypervisor. + */ + return ES_OK; +} + +static enum es_result vc_handle_mwait(struct ghcb *ghcb, + struct es_em_ctxt *ctxt) +{ + /* Treat the same as MONITOR/MONITORX */ + return ES_OK; +} + +static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, + struct es_em_ctxt *ctxt) +{ + enum es_result ret; + + ghcb_set_rax(ghcb, ctxt->regs->ax); + ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); + + if (x86_platform.hyper.sev_es_hcall_prepare) + x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); + + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); + if (ret != ES_OK) + return ret; + + if (!ghcb_rax_is_valid(ghcb)) + return ES_VMM_ERROR; + + ctxt->regs->ax = ghcb->save.rax; + + /* + * Call sev_es_hcall_finish() after regs->ax is already set. + * This allows the hypervisor handler to overwrite it again if + * necessary. + */ + if (x86_platform.hyper.sev_es_hcall_finish && + !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) + return ES_VMM_ERROR; + + return ES_OK; +} + +static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, + struct es_em_ctxt *ctxt) +{ + /* + * Calling ecx_alignment_check() directly does not work, because it + * enables IRQs and the GHCB is active. Forward the exception and call + * it later from vc_forward_exception(). + */ + ctxt->fi.vector = X86_TRAP_AC; + ctxt->fi.error_code = 0; + return ES_EXCEPTION; +} + +static __always_inline void vc_handle_trap_db(struct pt_regs *regs) +{ + if (user_mode(regs)) + noist_exc_debug(regs); + else + exc_debug(regs); +} + +static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, + struct ghcb *ghcb, + unsigned long exit_code) +{ + enum es_result result; + + switch (exit_code) { + case SVM_EXIT_READ_DR7: + result = vc_handle_dr7_read(ghcb, ctxt); + break; + case SVM_EXIT_WRITE_DR7: + result = vc_handle_dr7_write(ghcb, ctxt); + break; + case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: + result = vc_handle_trap_ac(ghcb, ctxt); + break; + case SVM_EXIT_RDTSC: + case SVM_EXIT_RDTSCP: + result = vc_handle_rdtsc(ghcb, ctxt, exit_code); + break; + case SVM_EXIT_RDPMC: + result = vc_handle_rdpmc(ghcb, ctxt); + break; + case SVM_EXIT_INVD: + pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); + result = ES_UNSUPPORTED; + break; + case SVM_EXIT_CPUID: + result = vc_handle_cpuid(ghcb, ctxt); + break; + case SVM_EXIT_IOIO: + result = vc_handle_ioio(ghcb, ctxt); + break; + case SVM_EXIT_MSR: + result = vc_handle_msr(ghcb, ctxt); + break; + case SVM_EXIT_VMMCALL: + result = vc_handle_vmmcall(ghcb, ctxt); + break; + case SVM_EXIT_WBINVD: + result = vc_handle_wbinvd(ghcb, ctxt); + break; + case SVM_EXIT_MONITOR: + result = vc_handle_monitor(ghcb, ctxt); + break; + case SVM_EXIT_MWAIT: + result = vc_handle_mwait(ghcb, ctxt); + break; + case SVM_EXIT_NPF: + result = vc_handle_mmio(ghcb, ctxt); + break; + default: + /* + * Unexpected #VC exception + */ + result = ES_UNSUPPORTED; + } + + return result; +} + +static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) +{ + long error_code = ctxt->fi.error_code; + int trapnr = ctxt->fi.vector; + + ctxt->regs->orig_ax = ctxt->fi.error_code; + + switch (trapnr) { + case X86_TRAP_GP: + exc_general_protection(ctxt->regs, error_code); + break; + case X86_TRAP_UD: + exc_invalid_op(ctxt->regs); + break; + case X86_TRAP_AC: + exc_alignment_check(ctxt->regs, error_code); + break; + default: + pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); + BUG(); + } +} + +static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) +{ + unsigned long sp = (unsigned long)regs; + + return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); +} + +/* + * Main #VC exception handler. It is called when the entry code was able to + * switch off the IST to a safe kernel stack. + * + * With the current implementation it is always possible to switch to a safe + * stack because #VC exceptions only happen at known places, like intercepted + * instructions or accesses to MMIO areas/IO ports. They can also happen with + * code instrumentation when the hypervisor intercepts #DB, but the critical + * paths are forbidden to be instrumented, so #DB exceptions currently also + * only happen in safe places. + */ +DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) +{ + struct sev_es_runtime_data *data = this_cpu_read(runtime_data); + irqentry_state_t irq_state; + struct ghcb_state state; + struct es_em_ctxt ctxt; + enum es_result result; + struct ghcb *ghcb; + + /* + * Handle #DB before calling into !noinstr code to avoid recursive #DB. + */ + if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) { + vc_handle_trap_db(regs); + return; + } + + irq_state = irqentry_nmi_enter(regs); + lockdep_assert_irqs_disabled(); + instrumentation_begin(); + + /* + * This is invoked through an interrupt gate, so IRQs are disabled. The + * code below might walk page-tables for user or kernel addresses, so + * keep the IRQs disabled to protect us against concurrent TLB flushes. + */ + + ghcb = sev_es_get_ghcb(&state); + if (!ghcb) { + /* + * Mark GHCBs inactive so that panic() is able to print the + * message. + */ + data->ghcb_active = false; + data->backup_ghcb_active = false; + + panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); + } + + vc_ghcb_invalidate(ghcb); + result = vc_init_em_ctxt(&ctxt, regs, error_code); + + if (result == ES_OK) + result = vc_handle_exitcode(&ctxt, ghcb, error_code); + + sev_es_put_ghcb(&state); + + /* Done - now check the result */ + switch (result) { + case ES_OK: + vc_finish_insn(&ctxt); + break; + case ES_UNSUPPORTED: + pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", + error_code, regs->ip); + goto fail; + case ES_VMM_ERROR: + pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", + error_code, regs->ip); + goto fail; + case ES_DECODE_FAILED: + pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", + error_code, regs->ip); + goto fail; + case ES_EXCEPTION: + vc_forward_exception(&ctxt); + break; + case ES_RETRY: + /* Nothing to do */ + break; + default: + pr_emerg("Unknown result in %s():%d\n", __func__, result); + /* + * Emulating the instruction which caused the #VC exception + * failed - can't continue so print debug information + */ + BUG(); + } + +out: + instrumentation_end(); + irqentry_nmi_exit(regs, irq_state); + + return; + +fail: + if (user_mode(regs)) { + /* + * Do not kill the machine if user-space triggered the + * exception. Send SIGBUS instead and let user-space deal with + * it. + */ + force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); + } else { + pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n", + result); + + /* Show some debug info */ + show_regs(regs); + + /* Ask hypervisor to sev_es_terminate */ + sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); + + /* If that fails and we get here - just panic */ + panic("Returned from Terminate-Request to Hypervisor\n"); + } + + goto out; +} + +/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */ +DEFINE_IDTENTRY_VC_IST(exc_vmm_communication) +{ + instrumentation_begin(); + panic("Can't handle #VC exception from unsupported context\n"); + instrumentation_end(); +} + +DEFINE_IDTENTRY_VC(exc_vmm_communication) +{ + if (likely(!on_vc_fallback_stack(regs))) + safe_stack_exc_vmm_communication(regs, error_code); + else + ist_exc_vmm_communication(regs, error_code); +} + +bool __init handle_vc_boot_ghcb(struct pt_regs *regs) +{ + unsigned long exit_code = regs->orig_ax; + struct es_em_ctxt ctxt; + enum es_result result; + + /* Do initial setup or terminate the guest */ + if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb())) + sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST); + + vc_ghcb_invalidate(boot_ghcb); + + result = vc_init_em_ctxt(&ctxt, regs, exit_code); + if (result == ES_OK) + result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); + + /* Done - now check the result */ + switch (result) { + case ES_OK: + vc_finish_insn(&ctxt); + break; + case ES_UNSUPPORTED: + early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", + exit_code, regs->ip); + goto fail; + case ES_VMM_ERROR: + early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", + exit_code, regs->ip); + goto fail; + case ES_DECODE_FAILED: + early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", + exit_code, regs->ip); + goto fail; + case ES_EXCEPTION: + vc_early_forward_exception(&ctxt); + break; + case ES_RETRY: + /* Nothing to do */ + break; + default: + BUG(); + } + + return true; + +fail: + show_regs(regs); + + while (true) + halt(); +} diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index b93d6cd08a7f..121921b2927c 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index df7b5477fc4f..7515e78ef898 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include /* * We allocate runtime services regions top-down, starting from -4G, i.e. diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 1be71ef5e4c4..2e1c1bec0f9e 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include struct real_mode_header *real_mode_header; u32 *trampoline_cr4_features; -- cgit From b81fc74d53d1248de6db3136dd6b29e5d5528021 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Tue, 27 Apr 2021 06:16:35 -0500 Subject: x86/sev: Move GHCB MSR protocol and NAE definitions in a common header The guest and the hypervisor contain separate macros to get and set the GHCB MSR protocol and NAE event fields. Consolidate the GHCB protocol definitions and helper macros in one place. Leave the supported protocol version define in separate files to keep the guest and hypervisor flexibility to support different GHCB version in the same release. There is no functional change intended. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Acked-by: Joerg Roedel Link: https://lkml.kernel.org/r/20210427111636.1207-3-brijesh.singh@amd.com --- arch/x86/include/asm/sev-common.h | 62 +++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/sev.h | 30 +++---------------- arch/x86/kernel/sev-shared.c | 20 ++++++------- arch/x86/kvm/svm/svm.h | 38 +++--------------------- 4 files changed, 80 insertions(+), 70 deletions(-) create mode 100644 arch/x86/include/asm/sev-common.h diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h new file mode 100644 index 000000000000..629c3df243f0 --- /dev/null +++ b/arch/x86/include/asm/sev-common.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD SEV header common between the guest and the hypervisor. + * + * Author: Brijesh Singh + */ + +#ifndef __ASM_X86_SEV_COMMON_H +#define __ASM_X86_SEV_COMMON_H + +#define GHCB_MSR_INFO_POS 0 +#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) + +#define GHCB_MSR_SEV_INFO_RESP 0x001 +#define GHCB_MSR_SEV_INFO_REQ 0x002 +#define GHCB_MSR_VER_MAX_POS 48 +#define GHCB_MSR_VER_MAX_MASK 0xffff +#define GHCB_MSR_VER_MIN_POS 32 +#define GHCB_MSR_VER_MIN_MASK 0xffff +#define GHCB_MSR_CBIT_POS 24 +#define GHCB_MSR_CBIT_MASK 0xff +#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ + ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ + (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ + (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ + GHCB_MSR_SEV_INFO_RESP) +#define GHCB_MSR_INFO(v) ((v) & 0xfffUL) +#define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK) +#define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK) + +#define GHCB_MSR_CPUID_REQ 0x004 +#define GHCB_MSR_CPUID_RESP 0x005 +#define GHCB_MSR_CPUID_FUNC_POS 32 +#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff +#define GHCB_MSR_CPUID_VALUE_POS 32 +#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff +#define GHCB_MSR_CPUID_REG_POS 30 +#define GHCB_MSR_CPUID_REG_MASK 0x3 +#define GHCB_CPUID_REQ_EAX 0 +#define GHCB_CPUID_REQ_EBX 1 +#define GHCB_CPUID_REQ_ECX 2 +#define GHCB_CPUID_REQ_EDX 3 +#define GHCB_CPUID_REQ(fn, reg) \ + (GHCB_MSR_CPUID_REQ | \ + (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ + (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) + +#define GHCB_MSR_TERM_REQ 0x100 +#define GHCB_MSR_TERM_REASON_SET_POS 12 +#define GHCB_MSR_TERM_REASON_SET_MASK 0xf +#define GHCB_MSR_TERM_REASON_POS 16 +#define GHCB_MSR_TERM_REASON_MASK 0xff +#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \ + (((((u64)reason_set) & GHCB_MSR_TERM_REASON_SET_MASK) << GHCB_MSR_TERM_REASON_SET_POS) | \ + ((((u64)reason_val) & GHCB_MSR_TERM_REASON_MASK) << GHCB_MSR_TERM_REASON_POS)) + +#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 +#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 + +#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) + +#endif diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index cf1d957c7091..fa5cd05d3b5b 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -10,34 +10,12 @@ #include #include +#include -#define GHCB_SEV_INFO 0x001UL -#define GHCB_SEV_INFO_REQ 0x002UL -#define GHCB_INFO(v) ((v) & 0xfffUL) -#define GHCB_PROTO_MAX(v) (((v) >> 48) & 0xffffUL) -#define GHCB_PROTO_MIN(v) (((v) >> 32) & 0xffffUL) -#define GHCB_PROTO_OUR 0x0001UL -#define GHCB_SEV_CPUID_REQ 0x004UL -#define GHCB_CPUID_REQ_EAX 0 -#define GHCB_CPUID_REQ_EBX 1 -#define GHCB_CPUID_REQ_ECX 2 -#define GHCB_CPUID_REQ_EDX 3 -#define GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \ - (((unsigned long)reg & 3) << 30) | \ - (((unsigned long)fn) << 32)) +#define GHCB_PROTO_OUR 0x0001UL +#define GHCB_PROTOCOL_MAX 1ULL +#define GHCB_DEFAULT_USAGE 0ULL -#define GHCB_PROTOCOL_MAX 0x0001UL -#define GHCB_DEFAULT_USAGE 0x0000UL - -#define GHCB_SEV_CPUID_RESP 0x005UL -#define GHCB_SEV_TERMINATE 0x100UL -#define GHCB_SEV_TERMINATE_REASON(reason_set, reason_val) \ - (((((u64)reason_set) & 0x7) << 12) | \ - ((((u64)reason_val) & 0xff) << 16)) -#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 -#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 - -#define GHCB_SEV_GHCB_RESP_CODE(v) ((v) & 0xfff) #define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); } enum es_result { diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 0aa9f13efd57..6ec8b3bfd76e 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -26,13 +26,13 @@ static bool __init sev_es_check_cpu_features(void) static void __noreturn sev_es_terminate(unsigned int reason) { - u64 val = GHCB_SEV_TERMINATE; + u64 val = GHCB_MSR_TERM_REQ; /* * Tell the hypervisor what went wrong - only reason-set 0 is * currently supported. */ - val |= GHCB_SEV_TERMINATE_REASON(0, reason); + val |= GHCB_SEV_TERM_REASON(0, reason); /* Request Guest Termination from Hypvervisor */ sev_es_wr_ghcb_msr(val); @@ -47,15 +47,15 @@ static bool sev_es_negotiate_protocol(void) u64 val; /* Do the GHCB protocol version negotiation */ - sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ); + sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_INFO(val) != GHCB_SEV_INFO) + if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) return false; - if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR || - GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR) + if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR || + GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR) return false; return true; @@ -153,28 +153,28 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->ax = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->bx = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->cx = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->dx = val >> 32; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 84b3133c2251..42f8a7b9048f 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -20,6 +20,7 @@ #include #include +#include #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) @@ -525,40 +526,9 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); /* sev.c */ -#define GHCB_VERSION_MAX 1ULL -#define GHCB_VERSION_MIN 1ULL - -#define GHCB_MSR_INFO_POS 0 -#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) - -#define GHCB_MSR_SEV_INFO_RESP 0x001 -#define GHCB_MSR_SEV_INFO_REQ 0x002 -#define GHCB_MSR_VER_MAX_POS 48 -#define GHCB_MSR_VER_MAX_MASK 0xffff -#define GHCB_MSR_VER_MIN_POS 32 -#define GHCB_MSR_VER_MIN_MASK 0xffff -#define GHCB_MSR_CBIT_POS 24 -#define GHCB_MSR_CBIT_MASK 0xff -#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ - ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ - (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ - (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ - GHCB_MSR_SEV_INFO_RESP) - -#define GHCB_MSR_CPUID_REQ 0x004 -#define GHCB_MSR_CPUID_RESP 0x005 -#define GHCB_MSR_CPUID_FUNC_POS 32 -#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff -#define GHCB_MSR_CPUID_VALUE_POS 32 -#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff -#define GHCB_MSR_CPUID_REG_POS 30 -#define GHCB_MSR_CPUID_REG_MASK 0x3 - -#define GHCB_MSR_TERM_REQ 0x100 -#define GHCB_MSR_TERM_REASON_SET_POS 12 -#define GHCB_MSR_TERM_REASON_SET_MASK 0xf -#define GHCB_MSR_TERM_REASON_POS 16 -#define GHCB_MSR_TERM_REASON_MASK 0xff +#define GHCB_VERSION_MAX 1ULL +#define GHCB_VERSION_MIN 1ULL + extern unsigned int max_sev_asid; -- cgit From 059e5c321a65657877924256ea8ad9c0df257b45 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Tue, 27 Apr 2021 06:16:36 -0500 Subject: x86/msr: Rename MSR_K8_SYSCFG to MSR_AMD64_SYSCFG The SYSCFG MSR continued being updated beyond the K8 family; drop the K8 name from it. Suggested-by: Borislav Petkov Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Acked-by: Joerg Roedel Link: https://lkml.kernel.org/r/20210427111636.1207-4-brijesh.singh@amd.com --- Documentation/virt/kvm/amd-memory-encryption.rst | 2 +- Documentation/x86/amd-memory-encryption.rst | 6 +++--- arch/x86/include/asm/msr-index.h | 6 +++--- arch/x86/kernel/cpu/amd.c | 4 ++-- arch/x86/kernel/cpu/mtrr/cleanup.c | 2 +- arch/x86/kernel/cpu/mtrr/generic.c | 4 ++-- arch/x86/kernel/mmconf-fam10h_64.c | 2 +- arch/x86/kvm/svm/svm.c | 4 ++-- arch/x86/kvm/x86.c | 2 +- arch/x86/mm/mem_encrypt_identity.c | 6 +++--- arch/x86/pci/amd_bus.c | 2 +- arch/x86/realmode/rm/trampoline_64.S | 4 ++-- drivers/edac/amd64_edac.c | 2 +- tools/arch/x86/include/asm/msr-index.h | 6 +++--- 14 files changed, 26 insertions(+), 26 deletions(-) diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst index 5ec8a1902e15..5c081c8c7164 100644 --- a/Documentation/virt/kvm/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/amd-memory-encryption.rst @@ -22,7 +22,7 @@ to SEV:: [ecx]: Bits[31:0] Number of encrypted guests supported simultaneously -If support for SEV is present, MSR 0xc001_0010 (MSR_K8_SYSCFG) and MSR 0xc001_0015 +If support for SEV is present, MSR 0xc001_0010 (MSR_AMD64_SYSCFG) and MSR 0xc001_0015 (MSR_K7_HWCR) can be used to determine if it can be enabled:: 0xc001_0010: diff --git a/Documentation/x86/amd-memory-encryption.rst b/Documentation/x86/amd-memory-encryption.rst index c48d452d0718..a1940ebe7be5 100644 --- a/Documentation/x86/amd-memory-encryption.rst +++ b/Documentation/x86/amd-memory-encryption.rst @@ -53,7 +53,7 @@ CPUID function 0x8000001f reports information related to SME:: system physical addresses, not guest physical addresses) -If support for SME is present, MSR 0xc00100010 (MSR_K8_SYSCFG) can be used to +If support for SME is present, MSR 0xc00100010 (MSR_AMD64_SYSCFG) can be used to determine if SME is enabled and/or to enable memory encryption:: 0xc0010010: @@ -79,7 +79,7 @@ The state of SME in the Linux kernel can be documented as follows: The CPU supports SME (determined through CPUID instruction). - Enabled: - Supported and bit 23 of MSR_K8_SYSCFG is set. + Supported and bit 23 of MSR_AMD64_SYSCFG is set. - Active: Supported, Enabled and the Linux kernel is actively applying @@ -89,7 +89,7 @@ The state of SME in the Linux kernel can be documented as follows: SME can also be enabled and activated in the BIOS. If SME is enabled and activated in the BIOS, then all memory accesses will be encrypted and it will not be necessary to activate the Linux memory encryption support. If the BIOS -merely enables SME (sets bit 23 of the MSR_K8_SYSCFG), then Linux can activate +merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), then Linux can activate memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or by supplying mem_encrypt=on on the kernel command line. However, if BIOS does not enable SME, then Linux will not be able to activate memory encryption, even diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 742d89a00721..211ba3375ee9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -537,9 +537,9 @@ /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG 0xc0010010 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2d11384dc9ab..0adb0341cd7c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -593,8 +593,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) */ if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) { /* Check if memory encryption is enabled */ - rdmsrl(MSR_K8_SYSCFG, msr); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + rdmsrl(MSR_AMD64_SYSCFG, msr); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) goto clear_all; /* diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 0c3b372318b7..b5f43049fa5f 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -836,7 +836,7 @@ int __init amd_special_default_mtrr(void) if (boot_cpu_data.x86 < 0xf) return 0; /* In case some hypervisor doesn't pass SYSCFG through: */ - if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) + if (rdmsr_safe(MSR_AMD64_SYSCFG, &l, &h) < 0) return 0; /* * Memory between 4GB and top of mem is forced WB by this magic bit. diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index b90f3f437765..558108296f3c 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -53,13 +53,13 @@ static inline void k8_check_syscfg_dram_mod_en(void) (boot_cpu_data.x86 >= 0x0f))) return; - rdmsr(MSR_K8_SYSCFG, lo, hi); + rdmsr(MSR_AMD64_SYSCFG, lo, hi); if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) { pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" " not cleared by BIOS, clearing this bit\n", smp_processor_id()); lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY; - mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi); + mtrr_wrmsr(MSR_AMD64_SYSCFG, lo, hi); } } diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index b5cb49e57df8..c94dec6a1834 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -95,7 +95,7 @@ static void get_fam10h_pci_mmconf_base(void) return; /* SYS_CFG */ - address = MSR_K8_SYSCFG; + address = MSR_AMD64_SYSCFG; rdmsrl(address, val); /* TOP_MEM2 is not enabled? */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index b649f92287a2..433e8e4fb3a6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -858,8 +858,8 @@ static __init void svm_adjust_mmio_mask(void) return; /* If memory encryption is not enabled, use existing mask */ - rdmsrl(MSR_K8_SYSCFG, msr); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + rdmsrl(MSR_AMD64_SYSCFG, msr); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return; enc_bit = cpuid_ebx(0x8000001f) & 0x3f; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6eda2834fc05..853c40e89335 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3402,7 +3402,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_LASTBRANCHTOIP: case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: - case MSR_K8_SYSCFG: + case MSR_AMD64_SYSCFG: case MSR_K8_TSEG_ADDR: case MSR_K8_TSEG_MASK: case MSR_VM_HSAVE_PA: diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 04aba7e80a36..a9639f663d25 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -529,7 +529,7 @@ void __init sme_enable(struct boot_params *bp) /* * No SME if Hypervisor bit is set. This check is here to * prevent a guest from trying to enable SME. For running as a - * KVM guest the MSR_K8_SYSCFG will be sufficient, but there + * KVM guest the MSR_AMD64_SYSCFG will be sufficient, but there * might be other hypervisors which emulate that MSR as non-zero * or even pass it through to the guest. * A malicious hypervisor can still trick a guest into this @@ -542,8 +542,8 @@ void __init sme_enable(struct boot_params *bp) return; /* For SME, check the SYSCFG MSR */ - msr = __rdmsr(MSR_K8_SYSCFG); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + msr = __rdmsr(MSR_AMD64_SYSCFG); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return; } else { /* SEV state cannot be controlled by a command line option */ diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index ae744b6a0785..dd40d3fea74e 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -284,7 +284,7 @@ static int __init early_root_info_init(void) /* need to take out [4G, TOM2) for RAM*/ /* SYS_CFG */ - address = MSR_K8_SYSCFG; + address = MSR_AMD64_SYSCFG; rdmsrl(address, val); /* TOP_MEM2 is enabled? */ if (val & (1<<21)) { diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index 84c5d1b33d10..cc8391f86cdb 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -123,9 +123,9 @@ SYM_CODE_START(startup_32) */ btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags jnc .Ldone - movl $MSR_K8_SYSCFG, %ecx + movl $MSR_AMD64_SYSCFG, %ecx rdmsr - bts $MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax + bts $MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT, %eax jc .Ldone /* diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 9fa4dfc6ebee..f0d8f60acee1 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3083,7 +3083,7 @@ static void read_mc_regs(struct amd64_pvt *pvt) edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem); /* Check first whether TOP_MEM2 is enabled: */ - rdmsrl(MSR_K8_SYSCFG, msr_val); + rdmsrl(MSR_AMD64_SYSCFG, msr_val); if (msr_val & BIT(21)) { rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2); edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2); diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 45029354e0a8..c60b09e7602f 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -533,9 +533,9 @@ /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG 0xc0010010 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 -- cgit From 970655aa9b42461f8394e4457307005bdeee14d9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 23 Apr 2021 07:40:38 +0200 Subject: xen/gntdev: fix gntdev_mmap() error exit path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit d3eeb1d77c5d0af ("xen/gntdev: use mmu_interval_notifier_insert") introduced an error in gntdev_mmap(): in case the call of mmu_interval_notifier_insert_locked() fails the exit path should not call mmu_interval_notifier_remove(), as this might result in NULL dereferences. One reason for failure is e.g. a signal pending for the running process. Fixes: d3eeb1d77c5d0af ("xen/gntdev: use mmu_interval_notifier_insert") Cc: stable@vger.kernel.org Reported-by: Marek Marczykowski-Górecki Tested-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Reviewed-by: Luca Fancellu Link: https://lore.kernel.org/r/20210423054038.26696-1-jgross@suse.com Signed-off-by: Juergen Gross --- drivers/xen/gntdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index f01d58c7a042..a3e7be96527d 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -1017,8 +1017,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) err = mmu_interval_notifier_insert_locked( &map->notifier, vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, &gntdev_mmu_ops); - if (err) + if (err) { + map->vma = NULL; goto out_unlock_put; + } } mutex_unlock(&priv->lock); -- cgit From dbc03e81586fc33e4945263fd6e09e22eb4b980f Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 8 May 2021 10:19:13 +0800 Subject: xen/unpopulated-alloc: fix error return code in fill_list() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: a4574f63edc6 ("mm/memremap_pages: convert to 'struct range'") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20210508021913.1727-1-thunder.leizhen@huawei.com Signed-off-by: Juergen Gross --- drivers/xen/unpopulated-alloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c index e64e6befc63b..87e6b7db892f 100644 --- a/drivers/xen/unpopulated-alloc.c +++ b/drivers/xen/unpopulated-alloc.c @@ -39,8 +39,10 @@ static int fill_list(unsigned int nr_pages) } pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL); - if (!pgmap) + if (!pgmap) { + ret = -ENOMEM; goto err_pgmap; + } pgmap->type = MEMORY_DEVICE_GENERIC; pgmap->range = (struct range) { -- cgit From 06841148c570832d4d247b0f6befc1922a84120b Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 19 Jan 2021 14:51:08 +0100 Subject: drm/imx: ipuv3-plane: do not advertise YUV formats on planes without CSC Only planes that are displayed via the Display Processor (DP) path support color space conversion. Limit formats on planes that are shown via the direct Display Controller (DC) path to RGB. Reported-by: Fabio Estevam Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 41 +++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index fa5009705365..fc8f4834ed7b 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -35,7 +35,7 @@ static inline struct ipu_plane *to_ipu_plane(struct drm_plane *p) return container_of(p, struct ipu_plane, base); } -static const uint32_t ipu_plane_formats[] = { +static const uint32_t ipu_plane_all_formats[] = { DRM_FORMAT_ARGB1555, DRM_FORMAT_XRGB1555, DRM_FORMAT_ABGR1555, @@ -72,6 +72,31 @@ static const uint32_t ipu_plane_formats[] = { DRM_FORMAT_BGRX8888_A8, }; +static const uint32_t ipu_plane_rgb_formats[] = { + DRM_FORMAT_ARGB1555, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_ABGR1555, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_BGRA5551, + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_RGBX8888, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_BGRX8888, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGB565_A8, + DRM_FORMAT_BGR565_A8, + DRM_FORMAT_RGB888_A8, + DRM_FORMAT_BGR888_A8, + DRM_FORMAT_RGBX8888_A8, + DRM_FORMAT_BGRX8888_A8, +}; + static const uint64_t ipu_format_modifiers[] = { DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID @@ -830,16 +855,24 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu, struct ipu_plane *ipu_plane; const uint64_t *modifiers = ipu_format_modifiers; unsigned int zpos = (type == DRM_PLANE_TYPE_PRIMARY) ? 0 : 1; + unsigned int format_count; + const uint32_t *formats; int ret; DRM_DEBUG_KMS("channel %d, dp flow %d, possible_crtcs=0x%x\n", dma, dp, possible_crtcs); + if (dp == IPU_DP_FLOW_SYNC_BG || dp == IPU_DP_FLOW_SYNC_FG) { + formats = ipu_plane_all_formats; + format_count = ARRAY_SIZE(ipu_plane_all_formats); + } else { + formats = ipu_plane_rgb_formats; + format_count = ARRAY_SIZE(ipu_plane_rgb_formats); + } ipu_plane = drmm_universal_plane_alloc(dev, struct ipu_plane, base, possible_crtcs, &ipu_plane_funcs, - ipu_plane_formats, - ARRAY_SIZE(ipu_plane_formats), - modifiers, type, NULL); + formats, format_count, modifiers, + type, NULL); if (IS_ERR(ipu_plane)) { DRM_ERROR("failed to allocate and initialize %s plane\n", zpos ? "overlay" : "primary"); -- cgit From f4b34faa08428d813fc3629f882c503487f94a12 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 21 Jan 2021 16:29:55 +0100 Subject: drm/imx: Annotate dma-fence critical section in commit path drm_atomic_helper_commit_hw_done() is the last thing (no plane cleanup apparrently), so it's the entire function. And a nice comment explaining why the wait_for_flip_done is ahead, unlike the usual sequence. Aside, I think since the atomic helpers do track plane disabling now separately this might no longer be a real problem since: commit 21a01abbe32a3cbeb903378a24e504bfd9fe0648 Author: Maarten Lankhorst Date: Mon Sep 4 12:48:37 2017 +0200 drm/atomic: Fix freeing connector/plane state too early by tracking commits, v3. Plus the subsequent bugfixes of course, this was tricky to get right. Signed-off-by: Daniel Vetter Cc: Philipp Zabel Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/imx-drm-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index e6a88c8cbd69..be6001d8c984 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -81,6 +81,7 @@ static void imx_drm_atomic_commit_tail(struct drm_atomic_state *state) struct drm_plane_state *old_plane_state, *new_plane_state; bool plane_disabling = false; int i; + bool fence_cookie = dma_fence_begin_signalling(); drm_atomic_helper_commit_modeset_disables(dev, state); @@ -111,6 +112,7 @@ static void imx_drm_atomic_commit_tail(struct drm_atomic_state *state) } drm_atomic_helper_commit_hw_done(state); + dma_fence_end_signalling(fence_cookie); } static const struct drm_mode_config_helper_funcs imx_drm_mode_config_helpers = { -- cgit From 0c6c2d3615efb7c292573f2e6c886929a2b2da6c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 28 Apr 2021 13:12:31 +0100 Subject: arm64: Generate cpucaps.h The arm64 code allocates an internal constant to every CPU feature it can detect, distinct from the public hwcap numbers we use to expose some features to userspace. Currently this is maintained manually which is an irritating source of conflicts when working on new features, to avoid this replace the header with a simple text file listing the names we've assigned and sort it to minimise conflicts. As part of doing this we also do the Kbuild hookup required to hook up an arch tools directory and to generate header files in there. This will result in a renumbering and reordering of the existing constants, since they are all internal only the values should not be important. The reordering will impact the order in which some steps in enumeration handle features but the algorithm is not intended to depend on this and I haven't seen any issues when testing. Due to the UAO cpucap having been removed in the past we end up with ARM64_NCAPS being 1 smaller than it was before. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Tested-by: Mark Rutland Link: https://lore.kernel.org/r/20210428121231.11219-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 3 ++ arch/arm64/include/asm/Kbuild | 2 ++ arch/arm64/include/asm/cpucaps.h | 74 ---------------------------------------- arch/arm64/tools/Makefile | 22 ++++++++++++ arch/arm64/tools/cpucaps | 65 +++++++++++++++++++++++++++++++++++ arch/arm64/tools/gen-cpucaps.awk | 40 ++++++++++++++++++++++ 6 files changed, 132 insertions(+), 74 deletions(-) delete mode 100644 arch/arm64/include/asm/cpucaps.h create mode 100644 arch/arm64/tools/Makefile create mode 100644 arch/arm64/tools/cpucaps create mode 100755 arch/arm64/tools/gen-cpucaps.awk diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7ef44478560d..b52481f0605d 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -175,6 +175,9 @@ vdso_install: $(if $(CONFIG_COMPAT_VDSO), \ $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@) +archprepare: + $(Q)$(MAKE) $(build)=arch/arm64/tools kapi + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 07ac208edc89..26889dbfe904 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -5,3 +5,5 @@ generic-y += qrwlock.h generic-y += qspinlock.h generic-y += set_memory.h generic-y += user.h + +generated-y += cpucaps.h diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h deleted file mode 100644 index b0c5eda0498f..000000000000 --- a/arch/arm64/include/asm/cpucaps.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm64/include/asm/cpucaps.h - * - * Copyright (C) 2016 ARM Ltd. - */ -#ifndef __ASM_CPUCAPS_H -#define __ASM_CPUCAPS_H - -#define ARM64_WORKAROUND_CLEAN_CACHE 0 -#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 -#define ARM64_WORKAROUND_845719 2 -#define ARM64_HAS_SYSREG_GIC_CPUIF 3 -#define ARM64_HAS_PAN 4 -#define ARM64_HAS_LSE_ATOMICS 5 -#define ARM64_WORKAROUND_CAVIUM_23154 6 -#define ARM64_WORKAROUND_834220 7 -#define ARM64_HAS_NO_HW_PREFETCH 8 -#define ARM64_HAS_VIRT_HOST_EXTN 11 -#define ARM64_WORKAROUND_CAVIUM_27456 12 -#define ARM64_HAS_32BIT_EL0 13 -#define ARM64_SPECTRE_V3A 14 -#define ARM64_HAS_CNP 15 -#define ARM64_HAS_NO_FPSIMD 16 -#define ARM64_WORKAROUND_REPEAT_TLBI 17 -#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 -#define ARM64_WORKAROUND_858921 19 -#define ARM64_WORKAROUND_CAVIUM_30115 20 -#define ARM64_HAS_DCPOP 21 -#define ARM64_SVE 22 -#define ARM64_UNMAP_KERNEL_AT_EL0 23 -#define ARM64_SPECTRE_V2 24 -#define ARM64_HAS_RAS_EXTN 25 -#define ARM64_WORKAROUND_843419 26 -#define ARM64_HAS_CACHE_IDC 27 -#define ARM64_HAS_CACHE_DIC 28 -#define ARM64_HW_DBM 29 -#define ARM64_SPECTRE_V4 30 -#define ARM64_MISMATCHED_CACHE_TYPE 31 -#define ARM64_HAS_STAGE2_FWB 32 -#define ARM64_HAS_CRC32 33 -#define ARM64_SSBS 34 -#define ARM64_WORKAROUND_1418040 35 -#define ARM64_HAS_SB 36 -#define ARM64_WORKAROUND_SPECULATIVE_AT 37 -#define ARM64_HAS_ADDRESS_AUTH_ARCH 38 -#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39 -#define ARM64_HAS_GENERIC_AUTH_ARCH 40 -#define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41 -#define ARM64_HAS_IRQ_PRIO_MASKING 42 -#define ARM64_HAS_DCPODP 43 -#define ARM64_WORKAROUND_1463225 44 -#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 -#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 -#define ARM64_WORKAROUND_1542419 47 -#define ARM64_HAS_E0PD 48 -#define ARM64_HAS_RNG 49 -#define ARM64_HAS_AMU_EXTN 50 -#define ARM64_HAS_ADDRESS_AUTH 51 -#define ARM64_HAS_GENERIC_AUTH 52 -#define ARM64_HAS_32BIT_EL1 53 -#define ARM64_BTI 54 -#define ARM64_HAS_ARMv8_4_TTL 55 -#define ARM64_HAS_TLB_RANGE 56 -#define ARM64_MTE 57 -#define ARM64_WORKAROUND_1508412 58 -#define ARM64_HAS_LDAPR 59 -#define ARM64_KVM_PROTECTED_MODE 60 -#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP 61 -#define ARM64_HAS_EPAN 62 - -#define ARM64_NCAPS 63 - -#endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile new file mode 100644 index 000000000000..932b4fe5c768 --- /dev/null +++ b/arch/arm64/tools/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 + +gen := arch/$(ARCH)/include/generated +kapi := $(gen)/asm + +kapi-hdrs-y := $(kapi)/cpucaps.h + +targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y)) + +PHONY += kapi + +kapi: $(kapi-hdrs-y) $(gen-y) + +# Create output directory if not already present +_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') + +quiet_cmd_gen_cpucaps = GEN $@ + cmd_gen_cpucaps = mkdir -p $(dir $@) && \ + $(AWK) -f $(filter-out $(PHONY),$^) > $@ + +$(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE + $(call if_changed,gen_cpucaps) diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps new file mode 100644 index 000000000000..21fbdda7086e --- /dev/null +++ b/arch/arm64/tools/cpucaps @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Internal CPU capabilities constants, keep this list sorted + +BTI +HAS_32BIT_EL0 +HAS_32BIT_EL1 +HAS_ADDRESS_AUTH +HAS_ADDRESS_AUTH_ARCH +HAS_ADDRESS_AUTH_IMP_DEF +HAS_AMU_EXTN +HAS_ARMv8_4_TTL +HAS_CACHE_DIC +HAS_CACHE_IDC +HAS_CNP +HAS_CRC32 +HAS_DCPODP +HAS_DCPOP +HAS_E0PD +HAS_EPAN +HAS_GENERIC_AUTH +HAS_GENERIC_AUTH_ARCH +HAS_GENERIC_AUTH_IMP_DEF +HAS_IRQ_PRIO_MASKING +HAS_LDAPR +HAS_LSE_ATOMICS +HAS_NO_FPSIMD +HAS_NO_HW_PREFETCH +HAS_PAN +HAS_RAS_EXTN +HAS_RNG +HAS_SB +HAS_STAGE2_FWB +HAS_SYSREG_GIC_CPUIF +HAS_TLB_RANGE +HAS_VIRT_HOST_EXTN +HW_DBM +KVM_PROTECTED_MODE +MISMATCHED_CACHE_TYPE +MTE +SPECTRE_V2 +SPECTRE_V3A +SPECTRE_V4 +SSBS +SVE +UNMAP_KERNEL_AT_EL0 +WORKAROUND_834220 +WORKAROUND_843419 +WORKAROUND_845719 +WORKAROUND_858921 +WORKAROUND_1418040 +WORKAROUND_1463225 +WORKAROUND_1508412 +WORKAROUND_1542419 +WORKAROUND_CAVIUM_23154 +WORKAROUND_CAVIUM_27456 +WORKAROUND_CAVIUM_30115 +WORKAROUND_CAVIUM_TX2_219_PRFM +WORKAROUND_CAVIUM_TX2_219_TVM +WORKAROUND_CLEAN_CACHE +WORKAROUND_DEVICE_LOAD_ACQUIRE +WORKAROUND_NVIDIA_CARMEL_CNP +WORKAROUND_QCOM_FALKOR_E1003 +WORKAROUND_REPEAT_TLBI +WORKAROUND_SPECULATIVE_AT diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk new file mode 100755 index 000000000000..18737a1ce044 --- /dev/null +++ b/arch/arm64/tools/gen-cpucaps.awk @@ -0,0 +1,40 @@ +#!/bin/awk -f +# SPDX-License-Identifier: GPL-2.0 +# gen-cpucaps.awk: arm64 cpucaps header generator +# +# Usage: awk -f gen-cpucaps.awk cpucaps.txt + +# Log an error and terminate +function fatal(msg) { + print "Error at line " NR ": " msg > "/dev/stderr" + exit 1 +} + +# skip blank lines and comment lines +/^$/ { next } +/^#/ { next } + +BEGIN { + print "#ifndef __ASM_CPUCAPS_H" + print "#define __ASM_CPUCAPS_H" + print "" + print "/* Generated file - do not edit */" + cap_num = 0 + print "" +} + +/^[vA-Z0-9_]+$/ { + printf("#define ARM64_%-30s\t%d\n", $0, cap_num++) + next +} + +END { + printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num) + print "" + print "#endif" +} + +# Any lines not handled by previous rules are unexpected +{ + fatal("unhandled statement") +} -- cgit From a1bed090fc56e6e24517d96bc076595544fb5317 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 7 May 2021 17:25:42 +0100 Subject: kselftest/arm64: Add missing stddef.h include to BTI tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Explicitly include stddef.h when building the BTI tests so that we have a definition of NULL, with at least some toolchains this is not done implicitly by anything else: test.c: In function ‘start’: test.c:214:25: error: ‘NULL’ undeclared (first use in this function) 214 | sigaction(SIGILL, &sa, NULL); | ^~~~ test.c:20:1: note: ‘NULL’ is defined in header ‘’; did you forget to ‘#include ’? Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210507162542.23149-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/bti/test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c index 656b04976ccc..67b77ab83c20 100644 --- a/tools/testing/selftests/arm64/bti/test.c +++ b/tools/testing/selftests/arm64/bti/test.c @@ -6,6 +6,7 @@ #include "system.h" +#include #include #include #include -- cgit From e5af36b2adb858e982d78d41d7363d05d951a19a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Apr 2021 19:40:56 +0200 Subject: cpufreq: intel_pstate: Use HWP if enabled by platform firmware It turns out that there are systems where HWP is enabled during initialization by the platform firmware (BIOS), but HWP EPP support is not advertised. After commit 7aa1031223bc ("cpufreq: intel_pstate: Avoid enabling HWP if EPP is not supported") intel_pstate refuses to use HWP on those systems, but the fallback PERF_CTL interface does not work on them either because of enabled HWP, and once enabled, HWP cannot be disabled. Consequently, the users of those systems cannot control CPU performance scaling. Address this issue by making intel_pstate use HWP unconditionally if it is enabled already when the driver starts. Fixes: 7aa1031223bc ("cpufreq: intel_pstate: Avoid enabling HWP if EPP is not supported") Reported-by: Srinivas Pandruvada Tested-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki Cc: 5.9+ # 5.9+ --- drivers/cpufreq/intel_pstate.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f0401064d7aa..0e69dffd5a76 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3033,6 +3033,14 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = { {} }; +static bool intel_pstate_hwp_is_enabled(void) +{ + u64 value; + + rdmsrl(MSR_PM_ENABLE, value); + return !!(value & 0x1); +} + static int __init intel_pstate_init(void) { const struct x86_cpu_id *id; @@ -3051,8 +3059,12 @@ static int __init intel_pstate_init(void) * Avoid enabling HWP for processors without EPP support, * because that means incomplete HWP implementation which is a * corner case and supporting it is generally problematic. + * + * If HWP is enabled already, though, there is no choice but to + * deal with it. */ - if (!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) { + if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || + intel_pstate_hwp_is_enabled()) { hwp_active++; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; -- cgit From a3bc4ffeedf4693262fe7c6d133dcfcacd3d18c2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 May 2021 11:48:26 -0300 Subject: tools headers UAPI: Update tools's copy of drm.h headers Picking the changes from: b603e810f740e76b ("drm/uapi: document kernel capabilities") Doesn't result in any tooling changes: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/drm.h tools/include/uapi/drm/drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after Silencing these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/drm/drm.h' differs from latest version at 'include/uapi/drm/drm.h' diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h Cc: Simon Ser Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 125 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 121 insertions(+), 4 deletions(-) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 0827037c5484..67b94bc3c885 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -625,30 +625,147 @@ struct drm_gem_open { __u64 size; }; +/** + * DRM_CAP_DUMB_BUFFER + * + * If set to 1, the driver supports creating dumb buffers via the + * &DRM_IOCTL_MODE_CREATE_DUMB ioctl. + */ #define DRM_CAP_DUMB_BUFFER 0x1 +/** + * DRM_CAP_VBLANK_HIGH_CRTC + * + * If set to 1, the kernel supports specifying a CRTC index in the high bits of + * &drm_wait_vblank_request.type. + * + * Starting kernel version 2.6.39, this capability is always set to 1. + */ #define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +/** + * DRM_CAP_DUMB_PREFERRED_DEPTH + * + * The preferred bit depth for dumb buffers. + * + * The bit depth is the number of bits used to indicate the color of a single + * pixel excluding any padding. This is different from the number of bits per + * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per + * pixel. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ #define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +/** + * DRM_CAP_DUMB_PREFER_SHADOW + * + * If set to 1, the driver prefers userspace to render to a shadow buffer + * instead of directly rendering to a dumb buffer. For best speed, userspace + * should do streaming ordered memory copies into the dumb buffer and never + * read from it. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ #define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +/** + * DRM_CAP_PRIME + * + * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT + * and &DRM_PRIME_CAP_EXPORT. + * + * PRIME buffers are exposed as dma-buf file descriptors. See + * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing". + */ #define DRM_CAP_PRIME 0x5 +/** + * DRM_PRIME_CAP_IMPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME + * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + */ #define DRM_PRIME_CAP_IMPORT 0x1 +/** + * DRM_PRIME_CAP_EXPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME + * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + */ #define DRM_PRIME_CAP_EXPORT 0x2 +/** + * DRM_CAP_TIMESTAMP_MONOTONIC + * + * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in + * struct drm_event_vblank. If set to 1, the kernel will report timestamps with + * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these + * clocks. + * + * Starting from kernel version 2.6.39, the default value for this capability + * is 1. Starting kernel version 4.15, this capability is always set to 1. + */ #define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 +/** + * DRM_CAP_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC. + */ #define DRM_CAP_ASYNC_PAGE_FLIP 0x7 -/* - * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight - * combination for the hardware cursor. The intention is that a hardware - * agnostic userspace can query a cursor plane size to use. +/** + * DRM_CAP_CURSOR_WIDTH + * + * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid + * width x height combination for the hardware cursor. The intention is that a + * hardware agnostic userspace can query a cursor plane size to use. * * Note that the cross-driver contract is to merely return a valid size; * drivers are free to attach another meaning on top, eg. i915 returns the * maximum plane size. */ #define DRM_CAP_CURSOR_WIDTH 0x8 +/** + * DRM_CAP_CURSOR_HEIGHT + * + * See &DRM_CAP_CURSOR_WIDTH. + */ #define DRM_CAP_CURSOR_HEIGHT 0x9 +/** + * DRM_CAP_ADDFB2_MODIFIERS + * + * If set to 1, the driver supports supplying modifiers in the + * &DRM_IOCTL_MODE_ADDFB2 ioctl. + */ #define DRM_CAP_ADDFB2_MODIFIERS 0x10 +/** + * DRM_CAP_PAGE_FLIP_TARGET + * + * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and + * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in + * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP + * ioctl. + */ #define DRM_CAP_PAGE_FLIP_TARGET 0x11 +/** + * DRM_CAP_CRTC_IN_VBLANK_EVENT + * + * If set to 1, the kernel supports reporting the CRTC ID in + * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and + * &DRM_EVENT_FLIP_COMPLETE events. + * + * Starting kernel version 4.12, this capability is always set to 1. + */ #define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 +/** + * DRM_CAP_SYNCOBJ + * + * If set to 1, the driver supports sync objects. See + * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + */ #define DRM_CAP_SYNCOBJ 0x13 +/** + * DRM_CAP_SYNCOBJ_TIMELINE + * + * If set to 1, the driver supports timeline operations on sync objects. See + * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + */ #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 /* DRM_IOCTL_GET_CAP ioctl argument type */ -- cgit From 0fdee797d60d71e5a6fd59aa573d84a858e715dd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 May 2021 11:51:17 -0300 Subject: tools headers UAPI: Sync drm/i915_drm.h with the kernel sources To pick the changes in: b5b6f6a610127b17 ("drm/i915/gem: Drop legacy execbuffer support (v2)") That don't result in any change in tooling as this is just adding a comment. Only silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: Daniel Vetter Cc: Jason Ekstrand Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 1987e2ea79a3..ddc47bbf48b6 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -943,6 +943,7 @@ struct drm_i915_gem_exec_object { __u64 offset; }; +/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */ struct drm_i915_gem_execbuffer { /** * List of buffers to be validated with their relocations to be -- cgit From b3172585b13d7171c32cfabdf938eca7fdfe9b31 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 May 2021 11:53:37 -0300 Subject: tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes from these csets: d0946a882e622022 ("perf/x86/intel: Hybrid PMU support for perf capabilities") That cause no changes to tooling as it isn't adding any new MSR, just some capabilities for a pre-existing one: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after $ Just silences this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Cc: Kan Liang Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 45029354e0a8..742d89a00721 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -185,6 +185,9 @@ #define MSR_PEBS_DATA_CFG 0x000003f2 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define PERF_CAP_METRICS_IDX 15 +#define PERF_CAP_PT_IDX 16 + #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 #define MSR_IA32_RTIT_CTL 0x00000570 @@ -265,6 +268,7 @@ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) #define DEBUGCTLMSR_TR (1UL << 6) #define DEBUGCTLMSR_BTS (1UL << 7) #define DEBUGCTLMSR_BTINT (1UL << 8) -- cgit From e8c1167606c63fd8f9934d0b6ce80281463a4945 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 2 Apr 2021 18:40:20 +0900 Subject: perf record: Disallow -c and -F option at the same time It's confusing which one is effective when the both options are given. The current code happens to use -c in this case but users might not be aware of it. We can change it to complain about that instead of relying on the implicit priority. Before: $ perf record -c 111111 -F 99 true [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.031 MB perf.data (8 samples) ] $ perf evlist -F cycles: sample_period=111111 $ After: $ perf record -c 111111 -F 99 true cannot set frequency and period at the same time $ So this change can break existing usages, but I think it's rare to have both options and it'd be better changing them. Suggested-by: Alexey Alexandrov Signed-off-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210402094020.28164-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/record.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index f99852d54b14..43e5b563dee8 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -157,9 +157,15 @@ static int get_max_rate(unsigned int *rate) static int record_opts__config_freq(struct record_opts *opts) { bool user_freq = opts->user_freq != UINT_MAX; + bool user_interval = opts->user_interval != ULLONG_MAX; unsigned int max_rate; - if (opts->user_interval != ULLONG_MAX) + if (user_interval && user_freq) { + pr_err("cannot set frequency and period at the same time\n"); + return -1; + } + + if (user_interval) opts->default_interval = opts->user_interval; if (user_freq) opts->freq = opts->user_freq; -- cgit From 7aa3c9eabdf76017679e975e2ffd50cde3c010b8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 6 May 2021 15:56:40 -0700 Subject: perf jevents: Silence warning for ArchStd files JSON files in the level 1 directory are used for ArchStd events (see preprocess_arch_std_files), as such they shouldn't be warned about. Signed-off-by: Ian Rogers Reviewed-by: John Garry Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kajol Jain Cc: Kim Phillips Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210506225640.1461000-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index ed4f0bd72e5a..7422b0ea8790 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1123,8 +1123,10 @@ static int process_one_file(const char *fpath, const struct stat *sb, mapfile = strdup(fpath); return 0; } - - pr_info("%s: Ignoring file %s\n", prog, fpath); + if (is_json_file(bname)) + pr_debug("%s: ArchStd json is preprocessed %s\n", prog, fpath); + else + pr_info("%s: Ignoring file %s\n", prog, fpath); return 0; } -- cgit From a11c9a6e472457cf9eeafb585fc5c912f51d1b23 Mon Sep 17 00:00:00 2001 From: Dmitry Koshelev Date: Thu, 6 May 2021 13:11:49 +0000 Subject: perf session: Fix swapping of cpu_map and stat_config records 'data' field in perf_record_cpu_map_data struct is 16-bit wide and so should be swapped using bswap_16(). 'nr' field in perf_record_stat_config struct should be swapped before being used for size calculation. Signed-off-by: Dmitry Koshelev Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210506131244.13328-1-karaghiozis@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a12cf4f0e97a..106b3d60881a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -904,7 +904,7 @@ static void perf_event__cpu_map_swap(union perf_event *event, struct perf_record_record_cpu_map *mask; unsigned i; - data->type = bswap_64(data->type); + data->type = bswap_16(data->type); switch (data->type) { case PERF_CPU_MAP__CPUS: @@ -937,7 +937,7 @@ static void perf_event__stat_config_swap(union perf_event *event, { u64 size; - size = event->stat_config.nr * sizeof(event->stat_config.data[0]); + size = bswap_64(event->stat_config.nr) * sizeof(event->stat_config.data[0]); size += 1; /* nr item itself */ mem_bswap_64(&event->stat_config.nr, size); } -- cgit From ad1237c30d975535a669746496cbed136aa5a045 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 8 May 2021 22:50:20 +0200 Subject: perf tools: Fix dynamic libbpf link Justin reported broken build with LIBBPF_DYNAMIC=1. When linking libbpf dynamically we need to use perf's hashmap object, because it's not exported in libbpf.so (only in libbpf.a). Following build is now passing: $ make LIBBPF_DYNAMIC=1 BUILD: Doing 'make -j8' parallel build ... $ ldd perf | grep libbpf libbpf.so.0 => /lib64/libbpf.so.0 (0x00007fa7630db000) Fixes: eee19501926d ("perf tools: Grab a copy of libbpf's hashmap") Reported-by: Justin M. Forbes Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210508205020.617984-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 1 + tools/perf/util/Build | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 0d6619064a83..406a9519145e 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -540,6 +540,7 @@ ifndef NO_LIBELF ifdef LIBBPF_DYNAMIC ifeq ($(feature-libbpf), 1) EXTLIBS += -lbpf + $(call detected,CONFIG_LIBBPF_DYNAMIC) else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8c0d9f368ebc..b64bdc1a7026 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -145,7 +145,14 @@ perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o perf-$(CONFIG_LIBELF) += probe-event.o +ifdef CONFIG_LIBBPF_DYNAMIC + hashmap := 1 +endif ifndef CONFIG_LIBBPF + hashmap := 1 +endif + +ifdef hashmap perf-y += hashmap.o endif -- cgit From 0d943d5fde6070c2661a99618ea95b99655589ad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 09:39:02 -0300 Subject: tools headers UAPI: Sync linux/kvm.h with the kernel sources To pick the changes in: 15fb7de1a7f5af0d ("KVM: SVM: Add KVM_SEV_RECEIVE_UPDATE_DATA command") 3bf725699bf62494 ("KVM: arm64: Add support for the KVM PTP service") 4cfdd47d6d95aca4 ("KVM: SVM: Add KVM_SEV SEND_START command") 54526d1fd59338fd ("KVM: x86: Support KVM VMs sharing SEV context") 5569e2e7a650dfff ("KVM: SVM: Add support for KVM_SEV_SEND_CANCEL command") 8b13c36493d8cb56 ("KVM: introduce KVM_CAP_SET_GUEST_DEBUG2") af43cbbf954b50ca ("KVM: SVM: Add support for KVM_SEV_RECEIVE_START command") d3d1af85e2c75bb5 ("KVM: SVM: Add KVM_SEND_UPDATE_DATA command") fe7e948837f312d8 ("KVM: x86: Add capability to grant VM access to privileged SGX attribute") That don't cause any change in tooling as it doesn't introduce any new ioctl. $ grep kvm tools/perf/trace/beauty/*.sh tools/perf/trace/beauty/kvm_ioctl.sh:printf "static const char *kvm_ioctl_cmds[] = {\n" tools/perf/trace/beauty/kvm_ioctl.sh:egrep $regex ${header_dir}/kvm.h | \ $ $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after $ This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Brijesh Singh Cc: Jianyong Wu Cc: Marc Zyngier Cc: Nathan Tempelman Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Steve Rutherford Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index f6afee209620..3fd9a7e9d90c 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1078,6 +1078,10 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DIRTY_LOG_RING 192 #define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_PPC_DAWR1 194 +#define KVM_CAP_SET_GUEST_DEBUG2 195 +#define KVM_CAP_SGX_ATTRIBUTE 196 +#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 +#define KVM_CAP_PTP_KVM 198 #ifdef KVM_CAP_IRQ_ROUTING @@ -1671,6 +1675,8 @@ enum sev_cmd_id { KVM_SEV_CERT_EXPORT, /* Attestation report */ KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, KVM_SEV_NR_MAX, }; @@ -1729,6 +1735,45 @@ struct kvm_sev_attestation_report { __u32 len; }; +struct kvm_sev_send_start { + __u32 policy; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) -- cgit From b35629bc2fd59691504debda99c320cf966c8e3a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 09:45:25 -0300 Subject: tools headers kvm: Sync kvm headers with the kernel sources To pick the changes in: 3c0c2ad1ae75963c ("KVM: VMX: Add basic handling of VM-Exit from SGX enclave") None of them trigger any changes in tooling, this time this is just to silence these perf build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h' diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Cc: Paolo Bonzini Cc: Sean Christopherson Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/vmx.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index b8e650a985e3..946d761adbd3 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -27,6 +27,7 @@ #define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 +#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE 0x08000000 #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 -- cgit From a00b7e39d6b56e6f49cdd51a9ebf92627a19d877 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 7 May 2021 17:54:35 +0900 Subject: perf tools: Fix a build error on arm64 with clang Since clang's -Wmissing-field-initializers warns if a data structure is initialized with a signle NULL as below, ---- tools/perf $ make CC=clang LLVM=1 ... arch/arm64/util/kvm-stat.c:74:9: error: missing field 'ops' initializer [-Werror,-Wmissing-field-initializers] { NULL }, ^ 1 error generated. ---- add another field initializer expressly as same as other arch's kvm-stat.c code. Signed-off-by: Masami Hiramatsu Cc: Anders Roxell Cc: Leo Yan Cc: Sergey Senozhatsky Link: http://lore.kernel.org/lkml/162037767540.94840.15758657049033010518.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/kvm-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c index 2303256b7d05..73d18e0ed6f6 100644 --- a/tools/perf/arch/arm64/util/kvm-stat.c +++ b/tools/perf/arch/arm64/util/kvm-stat.c @@ -71,7 +71,7 @@ struct kvm_reg_events_ops kvm_reg_events_ops[] = { .name = "vmexit", .ops = &exit_events, }, - { NULL }, + { NULL, NULL }, }; const char * const kvm_skip_events[] = { -- cgit From f8bcb061ea013a9b39a071b9dd9f6ea0aa2caf72 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 09:55:30 -0300 Subject: tools headers UAPI: Sync files changed by landlock, quotactl_path and mount_settattr new syscalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pick the changes in these csets: a49f4f81cb48925e ("arch: Wire up Landlock syscalls") 2a1867219c7b27f9 ("fs: add mount_setattr()") fa8b90070a80bb1a ("quota: wire up quotactl_path") That silences these perf build warnings and add support for those new syscalls in tools such as 'perf trace'. For instance, this is now possible: # ~acme/bin/perf trace -v -e landlock* event qualifier tracepoint filter: (common_pid != 129365 && common_pid != 3502) && (id == 444 || id == 445 || id == 446) ^C# That is tha filter expression attached to the raw_syscalls:sys_{enter,exit} tracepoints. $ grep landlock tools/perf/arch/x86/entry/syscalls/syscall_64.tbl 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self $ This addresses these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl Warning: Kernel ABI header at 'tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl' differs from latest version at 'arch/mips/kernel/syscalls/syscall_n64.tbl' diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl Cc: Christian Brauner Cc: James Morris Cc: Jan Kara Cc: Mickaël Salaün Cc: Sascha Hauer Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 11 ++++++++++- tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 5 +++++ tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 4 ++++ tools/perf/arch/s390/entry/syscalls/syscall.tbl | 4 ++++ tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 4 ++++ 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index ce58cff99b66..6de5a7fc066b 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -863,9 +863,18 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) +#define __NR_quotactl_path 443 +__SYSCALL(__NR_quotactl_path, sys_quotactl_path) + +#define __NR_landlock_create_ruleset 444 +__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) +#define __NR_landlock_add_rule 445 +__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) +#define __NR_landlock_restrict_self 446 +__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) #undef __NR_syscalls -#define __NR_syscalls 443 +#define __NR_syscalls 447 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 91649690b52f..9974f5f8e49b 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -356,3 +356,8 @@ 439 n64 faccessat2 sys_faccessat2 440 n64 process_madvise sys_process_madvise 441 n64 epoll_pwait2 sys_epoll_pwait2 +442 n64 mount_setattr sys_mount_setattr +443 n64 quotactl_path sys_quotactl_path +444 n64 landlock_create_ruleset sys_landlock_create_ruleset +445 n64 landlock_add_rule sys_landlock_add_rule +446 n64 landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 0b2480cf3e47..2e68fbb57cc6 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -522,3 +522,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr +443 common quotactl_path sys_quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 3abef2144dac..7e4a2aba366d 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -445,3 +445,7 @@ 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr +443 common quotactl_path sys_quotactl_path sys_quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 7bf01cbe582f..ecd551b08d05 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,6 +364,10 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr +443 common quotactl_path sys_quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self # # Due to a historical design error, certain syscalls are numbered differently -- cgit From 5a80ee4219a52194f0e815bbceec40eb32c523ec Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 10:06:40 -0300 Subject: tools headers UAPI: Sync linux/prctl.h with the kernel sources To pick a new prctl introduced in: 201698626fbca1cf ("arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS)") That results in $ grep prctl tools/perf/trace/beauty/*.sh tools/perf/trace/beauty/prctl_option.sh:printf "static const char *prctl_options[] = {\n" tools/perf/trace/beauty/prctl_option.sh:egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \ tools/perf/trace/beauty/prctl_option.sh:printf "static const char *prctl_set_mm_options[] = {\n" tools/perf/trace/beauty/prctl_option.sh:egrep $regex ${header_dir}/prctl.h | \ tools/perf/trace/beauty/x86_arch_prctl.sh:prctl_arch_header=${x86_header_dir}/prctl.h tools/perf/trace/beauty/x86_arch_prctl.sh: printf "#define x86_arch_prctl_codes_%d_offset %s\n" $idx $first_entry tools/perf/trace/beauty/x86_arch_prctl.sh: printf "static const char *x86_arch_prctl_codes_%d[] = {\n" $idx tools/perf/trace/beauty/x86_arch_prctl.sh: egrep -q $regex ${prctl_arch_header} && \ tools/perf/trace/beauty/x86_arch_prctl.sh: (egrep $regex ${prctl_arch_header} | \ $ tools/perf/trace/beauty/prctl_option.sh > before $ cp include/uapi/linux/prctl.h tools/include/uapi/linux/prctl.h $ tools/perf/trace/beauty/prctl_option.sh > after $ diff -u before after --- before 2021-05-09 10:06:10.064559675 -0300 +++ after 2021-05-09 10:06:21.319791396 -0300 @@ -54,6 +54,8 @@ [57] = "SET_IO_FLUSHER", [58] = "GET_IO_FLUSHER", [59] = "SET_SYSCALL_USER_DISPATCH", + [60] = "PAC_SET_ENABLED_KEYS", + [61] = "PAC_GET_ENABLED_KEYS", }; static const char *prctl_set_mm_options[] = { [1] = "START_CODE", $ Now users can do: # perf trace -e syscalls:sys_enter_prctl --filter "option==PAC_GET_ENABLED_KEYS" ^C# # trace -v -e syscalls:sys_enter_prctl --filter "option==PAC_GET_ENABLED_KEYS" New filter for syscalls:sys_enter_prctl: (option==0x3d) && (common_pid != 5519 && common_pid != 3404) ^C# And also when prctl appears in a session, its options will be translated to the string. Cc: Catalin Marinas Cc: Peter Collingbourne Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 667f1aed091c..18a9f59dc067 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -255,4 +255,8 @@ struct prctl_mm_map { # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 +/* Set/get enabled arm64 pointer authentication keys */ +#define PR_PAC_SET_ENABLED_KEYS 60 +#define PR_PAC_GET_ENABLED_KEYS 61 + #endif /* _LINUX_PRCTL_H */ -- cgit From fb24e308b6310541e70d11a3f19dc40742974b95 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 10:19:37 -0300 Subject: tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' To bring in the change made in this cset: 5e21a3ecad1500e3 ("x86/alternative: Merge include files") This just silences these perf tools build warnings, no change in the tools: Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S' diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S Cc: Borislav Petkov Cc: Juergen Gross Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/lib/memcpy_64.S | 2 +- tools/arch/x86/lib/memset_64.S | 2 +- tools/include/asm/alternative-asm.h | 10 ---------- tools/include/asm/alternative.h | 10 ++++++++++ 4 files changed, 12 insertions(+), 12 deletions(-) delete mode 100644 tools/include/asm/alternative-asm.h create mode 100644 tools/include/asm/alternative.h diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 1e299ac73c86..1cc9da6e29c7 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include .pushsection .noinstr.text, "ax" diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 0bfd26e4ca9e..9827ae267f96 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -3,7 +3,7 @@ #include #include -#include +#include #include /* diff --git a/tools/include/asm/alternative-asm.h b/tools/include/asm/alternative-asm.h deleted file mode 100644 index b54bd860dff6..000000000000 --- a/tools/include/asm/alternative-asm.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H -#define _TOOLS_ASM_ALTERNATIVE_ASM_H - -/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ - -#define altinstruction_entry # -#define ALTERNATIVE_2 # - -#endif diff --git a/tools/include/asm/alternative.h b/tools/include/asm/alternative.h new file mode 100644 index 000000000000..b54bd860dff6 --- /dev/null +++ b/tools/include/asm/alternative.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H +#define _TOOLS_ASM_ALTERNATIVE_ASM_H + +/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ + +#define altinstruction_entry # +#define ALTERNATIVE_2 # + +#endif -- cgit From 3916329309eace19e8c32bc821064a119474c309 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 10:21:33 -0300 Subject: tools include UAPI powerpc: Sync errno.h with the kernel headers To pick the change in: 7de21e679e6a789f ("powerpc: fix EDEADLOCK redefinition error in uapi/asm/errno.h") That will make the errno number -> string tables to pick this change on powerpc. Silencing this perf build warning: Warning: Kernel ABI header at 'tools/arch/powerpc/include/uapi/asm/errno.h' differs from latest version at 'arch/powerpc/include/uapi/asm/errno.h' diff -u tools/arch/powerpc/include/uapi/asm/errno.h arch/powerpc/include/uapi/asm/errno.h Cc: Michael Ellerman Cc: Tony Ambardar Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/powerpc/include/uapi/asm/errno.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/powerpc/include/uapi/asm/errno.h b/tools/arch/powerpc/include/uapi/asm/errno.h index cc79856896a1..4ba87de32be0 100644 --- a/tools/arch/powerpc/include/uapi/asm/errno.h +++ b/tools/arch/powerpc/include/uapi/asm/errno.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_ERRNO_H #define _ASM_POWERPC_ERRNO_H +#undef EDEADLOCK #include #undef EDEADLOCK -- cgit From 6faf64f5248166ecaf50107e883c383e0b66bb70 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 10:24:29 -0300 Subject: tools headers cpufeatures: Sync with the kernel sources To pick the changes from: 4e6292114c741221 ("x86/paravirt: Add new features for paravirt patching") a161545ab53b174c ("x86/cpufeatures: Enumerate Intel Hybrid Technology feature bit") a89dfde3dc3c2dbf ("x86: Remove dynamic NOP selection") b8921dccf3b25798 ("x86/cpufeatures: Add SGX1 and SGX2 sub-features") f21d4d3b97a86035 ("x86/cpufeatures: Enumerate #DB for bus lock detection") f333374e108e7e4c ("x86/cpufeatures: Add the Virtual SPEC_CTRL feature") This only causes these perf files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Babu Moger Cc: Borislav Petkov Cc: Fenghua Yu Cc: Juergen Gross Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ricardo Neri Cc: Sean Christopherson Cc: Thomas Gleixner Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index cc96e26d69f7..ac37830ae941 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -84,7 +84,7 @@ /* CPU types for specific tunings: */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ @@ -236,6 +236,8 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -290,6 +292,8 @@ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -336,6 +340,7 @@ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ @@ -354,6 +359,7 @@ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ @@ -374,6 +380,7 @@ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ -- cgit From 71d7924b3e8acaca6a3b0fc3261170031ada3b70 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 10:29:10 -0300 Subject: tools headers UAPI: Sync perf_event.h with the kernel sources To pick up the changes in: 2b26f0aa004995f4 ("perf: Support only inheriting events if cloned with CLONE_THREAD") 2e498d0a74e5b88a ("perf: Add support for event removal on exec") 547b60988e631f74 ("perf: aux: Add flags for the buffer format") 55bcf6ef314ae8ba ("perf: Extend PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE") 7dde51767ca5339e ("perf: aux: Add CoreSight PMU buffer formats") 97ba62b278674293 ("perf: Add support for SIGTRAP on perf events") d0d1dd628527c77d ("perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event") Also change the expected sizeof(struct perf_event_attr) from 120 to 128 due to fields being added for the SIGTRAP changes. Addressing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Cc: Kan Liang Cc: Marco Elver Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki K Poulose Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 26 +++++++++++++++++++++----- tools/perf/tests/attr/base-record | 2 +- tools/perf/tests/attr/base-stat | 2 +- tools/perf/tests/attr/system-wide-dummy | 2 +- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 14332f4cf816..bf8143505c49 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -127,6 +127,7 @@ enum perf_sw_ids { PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_DUMMY = 9, PERF_COUNT_SW_BPF_OUTPUT = 10, + PERF_COUNT_SW_CGROUP_SWITCHES = 11, PERF_COUNT_SW_MAX, /* non-ABI */ }; @@ -326,6 +327,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ #define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -404,7 +406,10 @@ struct perf_event_attr { cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ build_id : 1, /* use build id in mmap2 events */ - __reserved_1 : 29; + inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ + remove_on_exec : 1, /* event is removed from task on exec */ + sigtrap : 1, /* send synchronous SIGTRAP on event */ + __reserved_1 : 26; union { __u32 wakeup_events; /* wakeup every n events */ @@ -456,6 +461,12 @@ struct perf_event_attr { __u16 __reserved_2; __u32 aux_sample_size; __u32 __reserved_3; + + /* + * User provided data if sigtrap=1, passed back to user via + * siginfo_t::si_perf, e.g. to permit user to identify the event. + */ + __u64 sig_data; }; /* @@ -1171,10 +1182,15 @@ enum perf_callchain_context { /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ + +/* CoreSight PMU AUX buffer formats */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 645009c08b3c..4a7b8deef3fd 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=120 +size=128 config=0 sample_period=* sample_type=263 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index b0f42c34882e..408164456530 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=120 +size=128 config=0 sample_period=0 sample_type=65536 diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy index eba723cc0d38..86a15dd359d9 100644 --- a/tools/perf/tests/attr/system-wide-dummy +++ b/tools/perf/tests/attr/system-wide-dummy @@ -7,7 +7,7 @@ cpu=* pid=-1 flags=8 type=1 -size=120 +size=128 config=9 sample_period=4000 sample_type=455 -- cgit From 29038ae2ae566d9441e81cda3539db17c20bf06a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 10 May 2021 14:02:17 +0200 Subject: Revert "Revert "ACPI: scan: Turn off unused power resources during initialization"" Revert commit 5db91e9cb5b3 ("Revert "ACPI: scan: Turn off unused power resources during initialization") which was not necessary. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/internal.h | 1 + drivers/acpi/power.c | 2 +- drivers/acpi/scan.c | 2 ++ drivers/acpi/sleep.h | 1 - 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index e6a5d997241c..9fcefcdc1dbe 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -139,6 +139,7 @@ int acpi_device_sleep_wake(struct acpi_device *dev, int acpi_power_get_inferred_state(struct acpi_device *device, int *state); int acpi_power_on_resources(struct acpi_device *device, int state); int acpi_power_transition(struct acpi_device *device, int state); +void acpi_turn_off_unused_power_resources(void); /* -------------------------------------------------------------------------- Device Power Management diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 7e69931be828..bacae6d178ff 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -996,6 +996,7 @@ void acpi_resume_power_resources(void) mutex_unlock(&power_resource_list_lock); } +#endif void acpi_turn_off_unused_power_resources(void) { @@ -1016,4 +1017,3 @@ void acpi_turn_off_unused_power_resources(void) mutex_unlock(&power_resource_list_lock); } -#endif diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index a184529d8fa4..1584c9e463bd 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2360,6 +2360,8 @@ int __init acpi_scan_init(void) } } + acpi_turn_off_unused_power_resources(); + acpi_scan_initialized = true; out: diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h index 1856f76ac83f..7fe41ee489d6 100644 --- a/drivers/acpi/sleep.h +++ b/drivers/acpi/sleep.h @@ -8,7 +8,6 @@ extern struct list_head acpi_wakeup_device_list; extern struct mutex acpi_device_lock; extern void acpi_resume_power_resources(void); -extern void acpi_turn_off_unused_power_resources(void); static inline acpi_status acpi_set_waking_vector(u32 wakeup_address) { -- cgit From 14b6cff54edaca5740068e9ed070152727ed7718 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Apr 2021 17:26:19 +0200 Subject: staging: rtl8723bs: avoid bogus gcc warning gcc gets confused by some of the type casts and produces an apparently senseless warning about an out-of-bound memcpy to an unrelated array in the same structure: drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c: In function 'rtw_cfg80211_ap_set_encryption': cc1: error: writing 8 bytes into a region of size 0 [-Werror=stringop-overflow=] In file included from drivers/staging/rtl8723bs/include/drv_types.h:32, from drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c:10: drivers/staging/rtl8723bs/include/rtw_security.h:98:15: note: at offset [184, 4264] into destination object 'dot11AuthAlgrthm' of size 4 98 | u32 dot11AuthAlgrthm; /* 802.11 auth, could be open, shared, 8021x and authswitch */ | ^~~~~~~~~~~~~~~~ cc1: error: writing 8 bytes into a region of size 0 [-Werror=stringop-overflow=] drivers/staging/rtl8723bs/include/rtw_security.h:98:15: note: at offset [264, 4344] into destination object 'dot11AuthAlgrthm' of size 4 This is a known gcc bug, and the patch here is only a workaround, but the approach of using a temporary variable to hold a pointer to the key also improves readability in addition to avoiding the warning, so overall this should still help. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99673 Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210422152648.2891996-1-arnd@kernel.org Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 23 +++++++++++++---------- drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 21 ++++++++++++--------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index c1dac6eec59f..a6d731e959a2 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -527,6 +527,9 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct security_priv *psecuritypriv = &(padapter->securitypriv); struct sta_priv *pstapriv = &padapter->stapriv; + char *grpkey = padapter->securitypriv.dot118021XGrpKey[param->u.crypt.idx].skey; + char *txkey = padapter->securitypriv.dot118021XGrptxmickey[param->u.crypt.idx].skey; + char *rxkey = padapter->securitypriv.dot118021XGrprxmickey[param->u.crypt.idx].skey; param->u.crypt.err = 0; param->u.crypt.alg[IEEE_CRYPT_ALG_NAME_LEN - 1] = '\0'; @@ -609,7 +612,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { if (strcmp(param->u.crypt.alg, "WEP") == 0) { - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); psecuritypriv->dot118021XGrpPrivacy = _WEP40_; if (param->u.crypt.key_len == 13) @@ -622,12 +625,12 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { psecuritypriv->dot118021XGrpPrivacy = _TKIP_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */ /* set mic key */ - memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8); - memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8); + memcpy(txkey, &(param->u.crypt.key[16]), 8); + memcpy(rxkey, &(param->u.crypt.key[24]), 8); psecuritypriv->busetkipkey = true; @@ -636,7 +639,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { psecuritypriv->dot118021XGrpPrivacy = _AES_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); } else { @@ -713,7 +716,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { if (strcmp(param->u.crypt.alg, "WEP") == 0) { - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); psecuritypriv->dot118021XGrpPrivacy = _WEP40_; if (param->u.crypt.key_len == 13) @@ -725,12 +728,12 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { psecuritypriv->dot118021XGrpPrivacy = _TKIP_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */ /* set mic key */ - memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8); - memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8); + memcpy(txkey, &(param->u.crypt.key[16]), 8); + memcpy(rxkey, &(param->u.crypt.key[24]), 8); psecuritypriv->busetkipkey = true; @@ -739,7 +742,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { psecuritypriv->dot118021XGrpPrivacy = _AES_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); } else { diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index e98e5388d5c7..5088c3731b6d 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -2963,6 +2963,9 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct security_priv *psecuritypriv = &(padapter->securitypriv); struct sta_priv *pstapriv = &padapter->stapriv; + char *txkey = padapter->securitypriv.dot118021XGrptxmickey[param->u.crypt.idx].skey; + char *rxkey = padapter->securitypriv.dot118021XGrprxmickey[param->u.crypt.idx].skey; + char *grpkey = psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey; param->u.crypt.err = 0; param->u.crypt.alg[IEEE_CRYPT_ALG_NAME_LEN - 1] = '\0'; @@ -3064,7 +3067,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, if (!psta && check_fwstate(pmlmepriv, WIFI_AP_STATE)) { /* group key */ if (param->u.crypt.set_tx == 1) { if (strcmp(param->u.crypt.alg, "WEP") == 0) { - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); psecuritypriv->dot118021XGrpPrivacy = _WEP40_; if (param->u.crypt.key_len == 13) @@ -3073,11 +3076,11 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, } else if (strcmp(param->u.crypt.alg, "TKIP") == 0) { psecuritypriv->dot118021XGrpPrivacy = _TKIP_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */ /* set mic key */ - memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8); + memcpy(txkey, &(param->u.crypt.key[16]), 8); memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8); psecuritypriv->busetkipkey = true; @@ -3086,7 +3089,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, else if (strcmp(param->u.crypt.alg, "CCMP") == 0) { psecuritypriv->dot118021XGrpPrivacy = _AES_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); } else { psecuritypriv->dot118021XGrpPrivacy = _NO_PRIVACY_; } @@ -3142,7 +3145,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, } else { /* group key??? */ if (strcmp(param->u.crypt.alg, "WEP") == 0) { - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); psecuritypriv->dot118021XGrpPrivacy = _WEP40_; if (param->u.crypt.key_len == 13) @@ -3150,19 +3153,19 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, } else if (strcmp(param->u.crypt.alg, "TKIP") == 0) { psecuritypriv->dot118021XGrpPrivacy = _TKIP_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */ /* set mic key */ - memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8); - memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8); + memcpy(txkey, &(param->u.crypt.key[16]), 8); + memcpy(rxkey, &(param->u.crypt.key[24]), 8); psecuritypriv->busetkipkey = true; } else if (strcmp(param->u.crypt.alg, "CCMP") == 0) { psecuritypriv->dot118021XGrpPrivacy = _AES_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); } else { psecuritypriv->dot118021XGrpPrivacy = _NO_PRIVACY_; } -- cgit From 18abf874367456540846319574864e6ff32752e2 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 26 Apr 2021 11:26:22 +0200 Subject: cdc-wdm: untangle a circular dependency between callback and softint We have a cycle of callbacks scheduling works which submit URBs with those callbacks. This needs to be blocked, stopped and unblocked to untangle the circle. Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20210426092622.20433-1-oneukum@suse.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 508b1c3f8b73..d1e4a7379beb 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -321,12 +321,23 @@ exit: } -static void kill_urbs(struct wdm_device *desc) +static void poison_urbs(struct wdm_device *desc) { /* the order here is essential */ - usb_kill_urb(desc->command); - usb_kill_urb(desc->validity); - usb_kill_urb(desc->response); + usb_poison_urb(desc->command); + usb_poison_urb(desc->validity); + usb_poison_urb(desc->response); +} + +static void unpoison_urbs(struct wdm_device *desc) +{ + /* + * the order here is not essential + * it is symmetrical just to be nice + */ + usb_unpoison_urb(desc->response); + usb_unpoison_urb(desc->validity); + usb_unpoison_urb(desc->command); } static void free_urbs(struct wdm_device *desc) @@ -741,11 +752,12 @@ static int wdm_release(struct inode *inode, struct file *file) if (!desc->count) { if (!test_bit(WDM_DISCONNECTING, &desc->flags)) { dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n"); - kill_urbs(desc); + poison_urbs(desc); spin_lock_irq(&desc->iuspin); desc->resp_count = 0; spin_unlock_irq(&desc->iuspin); desc->manage_power(desc->intf, 0); + unpoison_urbs(desc); } else { /* must avoid dev_printk here as desc->intf is invalid */ pr_debug(KBUILD_MODNAME " %s: device gone - cleaning up\n", __func__); @@ -1037,9 +1049,9 @@ static void wdm_disconnect(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); - kill_urbs(desc); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); @@ -1080,9 +1092,10 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message) set_bit(WDM_SUSPENDING, &desc->flags); spin_unlock_irq(&desc->iuspin); /* callback submits work - order is essential */ - kill_urbs(desc); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); + unpoison_urbs(desc); } if (!PMSG_IS_AUTO(message)) { mutex_unlock(&desc->wlock); @@ -1140,7 +1153,7 @@ static int wdm_pre_reset(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); - kill_urbs(desc); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); return 0; @@ -1151,6 +1164,7 @@ static int wdm_post_reset(struct usb_interface *intf) struct wdm_device *desc = wdm_find_device(intf); int rv; + unpoison_urbs(desc); clear_bit(WDM_OVERFLOW, &desc->flags); clear_bit(WDM_RESETTING, &desc->flags); rv = recover_from_urb_loss(desc); -- cgit From 04357fafea9c7ed34525eb9680c760245c3bb958 Mon Sep 17 00:00:00 2001 From: Ferry Toth Date: Sun, 25 Apr 2021 17:09:47 +0200 Subject: usb: dwc3: pci: Enable usb2-gadget-lpm-disable for Intel Merrifield On Intel Merrifield LPM is causing host to reset port after a timeout. By disabling LPM entirely this is prevented. Fixes: 066c09593454 ("usb: dwc3: pci: Enable extcon driver for Intel Merrifield") Reviewed-by: Andy Shevchenko Signed-off-by: Ferry Toth Cc: stable Link: https://lore.kernel.org/r/20210425150947.5862-1-ftoth@exalondelft.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index e7b932dcbf82..1e51460938b8 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -123,6 +123,7 @@ static const struct property_entry dwc3_pci_mrfld_properties[] = { PROPERTY_ENTRY_STRING("linux,extcon-name", "mrfld_bcove_pwrsrc"), PROPERTY_ENTRY_BOOL("snps,dis_u3_susphy_quirk"), PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"), + PROPERTY_ENTRY_BOOL("snps,usb2-gadget-lpm-disable"), PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"), {} }; -- cgit From 9cbc7eb17cdf6d1adaa2aebfe0079077d31d39a9 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 26 Apr 2021 14:08:40 -0700 Subject: usb: dwc3: core: Add missing GHWPARAMS9 doc Add missing documentation for struct dwc3_hwparams new field hwparams9 to avoid kernel doc build warning. Fixes: 16710380d3aa ("usb: dwc3: Capture new capability register GHWPARAMS9") Reported-by: Stephen Rothwell Acked-by: Felipe Balbi Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/f4c491f7614e623755fafe640b7e690e7c5634e2.1619471127.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index b1e875c58f20..3859d8cad3cb 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -850,6 +850,7 @@ struct dwc3_trb { * @hwparams6: GHWPARAMS6 * @hwparams7: GHWPARAMS7 * @hwparams8: GHWPARAMS8 + * @hwparams9: GHWPARAMS9 */ struct dwc3_hwparams { u32 hwparams0; -- cgit From 6c05cdbb9ef1de0264cac9135f6e90dad1e8763f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sun, 25 Apr 2021 12:32:53 -0300 Subject: usb: Restore the reference to ch9.h Keep the textual reference to ch9.h as it was prior to commit caa93d9bd2d7 ("usb: Fix up movement of USB core kerneldoc location"). As linux/usb/ch9.h does not contain comments anymore, explain that drivers/usb/common/common.c includes such header and provides declarations of a few utilities routines for manipulating the data types from ch9.h. Also mention that drivers/usb/common/debug.c contains some functions for creating debug output. Fixes: caa93d9bd2d7 ("usb: Fix up movement of USB core kerneldoc location") Reported-by: Alan Stern Suggested-by: Alan Stern Acked-by: Alan Stern Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20210425153253.2542816-1-festevam@gmail.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/usb/usb.rst | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Documentation/driver-api/usb/usb.rst b/Documentation/driver-api/usb/usb.rst index 543e70434da2..820e867af45a 100644 --- a/Documentation/driver-api/usb/usb.rst +++ b/Documentation/driver-api/usb/usb.rst @@ -109,16 +109,19 @@ well as to make sure they aren't relying on some HCD-specific behavior. USB-Standard Types ================== -In ``drivers/usb/common/common.c`` and ``drivers/usb/common/debug.c`` you -will find the USB data types defined in chapter 9 of the USB specification. -These data types are used throughout USB, and in APIs including this host -side API, gadget APIs, usb character devices and debugfs interfaces. +In ``include/uapi/linux/usb/ch9.h`` you will find the USB data types defined +in chapter 9 of the USB specification. These data types are used throughout +USB, and in APIs including this host side API, gadget APIs, usb character +devices and debugfs interfaces. That file is itself included by +``include/linux/usb/ch9.h``, which also contains declarations of a few +utility routines for manipulating these data types; the implementations +are in ``drivers/usb/common/common.c``. .. kernel-doc:: drivers/usb/common/common.c :export: -.. kernel-doc:: drivers/usb/common/debug.c - :export: +In addition, some functions useful for creating debugging output are +defined in ``drivers/usb/common/debug.c``. Host-Side Data Types and Macros =============================== -- cgit From d1d90dd27254c44d087ad3f8b5b3e4fff0571f45 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Wed, 28 Apr 2021 02:01:10 -0700 Subject: usb: dwc3: gadget: Enable suspend events commit 72704f876f50 ("dwc3: gadget: Implement the suspend entry event handler") introduced (nearly 5 years ago!) an interrupt handler for U3/L1-L2 suspend events. The problem is that these events aren't currently enabled in the DEVTEN register so the handler is never even invoked. Fix this simply by enabling the corresponding bit in dwc3_gadget_enable_irq() using the same revision check as found in the handler. Fixes: 72704f876f50 ("dwc3: gadget: Implement the suspend entry event handler") Acked-by: Felipe Balbi Signed-off-by: Jack Pham Cc: stable Link: https://lore.kernel.org/r/20210428090111.3370-1-jackp@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index dd80e5ca8c78..cab3a9184068 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2323,6 +2323,10 @@ static void dwc3_gadget_enable_irq(struct dwc3 *dwc) if (DWC3_VER_IS_PRIOR(DWC3, 250A)) reg |= DWC3_DEVTEN_ULSTCNGEN; + /* On 2.30a and above this bit enables U3/L2-L1 Suspend Events */ + if (!DWC3_VER_IS_PRIOR(DWC3, 230A)) + reg |= DWC3_DEVTEN_EOPFEN; + dwc3_writel(dwc->regs, DWC3_DEVTEN, reg); } -- cgit From 6f26ebb79a84bcad211cb2d8a2ef74dfc427322d Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Wed, 28 Apr 2021 02:01:11 -0700 Subject: usb: dwc3: gadget: Rename EOPF event macros to Suspend The device event corresponding to End of Periodic Frame is only found on older IP revisions (2.10a and prior, according to a cursory SNPS databook search). On revisions 2.30a and newer, including DWC3.1, the same event value and corresponding DEVTEN bit were repurposed to indicate that the link has gone into suspend state (U3 or L2/L1). EOPF events had never been enabled before in this driver, and going forward we expect current and future DWC3-based devices won't likely to be using such old DWC3 IP revisions either. Hence rather than keeping the deprecated EOPF macro names let's rename them to indicate their usage for suspend events. Acked-by: Felipe Balbi Signed-off-by: Jack Pham Link: https://lore.kernel.org/r/20210428090111.3370-2-jackp@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 6 +++--- drivers/usb/dwc3/debug.h | 8 ++++---- drivers/usb/dwc3/gadget.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 3859d8cad3cb..c5d5760cdf53 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -57,7 +57,7 @@ #define DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE 3 #define DWC3_DEVICE_EVENT_WAKEUP 4 #define DWC3_DEVICE_EVENT_HIBER_REQ 5 -#define DWC3_DEVICE_EVENT_EOPF 6 +#define DWC3_DEVICE_EVENT_SUSPEND 6 #define DWC3_DEVICE_EVENT_SOF 7 #define DWC3_DEVICE_EVENT_ERRATIC_ERROR 9 #define DWC3_DEVICE_EVENT_CMD_CMPL 10 @@ -460,7 +460,7 @@ #define DWC3_DEVTEN_CMDCMPLTEN BIT(10) #define DWC3_DEVTEN_ERRTICERREN BIT(9) #define DWC3_DEVTEN_SOFEN BIT(7) -#define DWC3_DEVTEN_EOPFEN BIT(6) +#define DWC3_DEVTEN_U3L2L1SUSPEN BIT(6) #define DWC3_DEVTEN_HIBERNATIONREQEVTEN BIT(5) #define DWC3_DEVTEN_WKUPEVTEN BIT(4) #define DWC3_DEVTEN_ULSTCNGEN BIT(3) @@ -1375,7 +1375,7 @@ struct dwc3_event_depevt { * 3 - ULStChng * 4 - WkUpEvt * 5 - Reserved - * 6 - EOPF + * 6 - Suspend (EOPF on revisions 2.10a and prior) * 7 - SOF * 8 - Reserved * 9 - ErrticErr diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h index db231de46bb3..d0ac89c5b317 100644 --- a/drivers/usb/dwc3/debug.h +++ b/drivers/usb/dwc3/debug.h @@ -221,8 +221,8 @@ static inline const char *dwc3_gadget_event_string(char *str, size_t size, snprintf(str, size, "WakeUp [%s]", dwc3_gadget_link_string(state)); break; - case DWC3_DEVICE_EVENT_EOPF: - snprintf(str, size, "End-Of-Frame [%s]", + case DWC3_DEVICE_EVENT_SUSPEND: + snprintf(str, size, "Suspend [%s]", dwc3_gadget_link_string(state)); break; case DWC3_DEVICE_EVENT_SOF: @@ -353,8 +353,8 @@ static inline const char *dwc3_gadget_event_type_string(u8 event) return "Wake-Up"; case DWC3_DEVICE_EVENT_HIBER_REQ: return "Hibernation"; - case DWC3_DEVICE_EVENT_EOPF: - return "End of Periodic Frame"; + case DWC3_DEVICE_EVENT_SUSPEND: + return "Suspend"; case DWC3_DEVICE_EVENT_SOF: return "Start of Frame"; case DWC3_DEVICE_EVENT_ERRATIC_ERROR: diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index cab3a9184068..6eab78f8a1a7 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2325,7 +2325,7 @@ static void dwc3_gadget_enable_irq(struct dwc3 *dwc) /* On 2.30a and above this bit enables U3/L2-L1 Suspend Events */ if (!DWC3_VER_IS_PRIOR(DWC3, 230A)) - reg |= DWC3_DEVTEN_EOPFEN; + reg |= DWC3_DEVTEN_U3L2L1SUSPEN; dwc3_writel(dwc->regs, DWC3_DEVTEN, reg); } @@ -3744,7 +3744,7 @@ static void dwc3_gadget_interrupt(struct dwc3 *dwc, case DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE: dwc3_gadget_linksts_change_interrupt(dwc, event->event_info); break; - case DWC3_DEVICE_EVENT_EOPF: + case DWC3_DEVICE_EVENT_SUSPEND: /* It changed to be suspend event for version 2.30a and above */ if (!DWC3_VER_IS_PRIOR(DWC3, 230A)) { /* -- cgit From 75a41ce46bae6cbe7d3bb2584eb844291d642874 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Thu, 6 May 2021 12:22:00 +0100 Subject: usb: dwc2: Fix gadget DMA unmap direction The dwc2 gadget support maps and unmaps DMA buffers as necessary. When mapping and unmapping it uses the direction of the endpoint to select the direction of the DMA transfer, but this fails for Control OUT transfers because the unmap occurs after the endpoint direction has been reversed for the status phase. A possible solution would be to unmap the buffer before the direction is changed, but a safer, less invasive fix is to remember the buffer direction independently of the endpoint direction. Fixes: fe0b94abcdf6 ("usb: dwc2: gadget: manage ep0 state in software") Acked-by: Minas Harutyunyan Cc: stable Signed-off-by: Phil Elwell Link: https://lore.kernel.org/r/20210506112200.2893922-1-phil@raspberrypi.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 2 ++ drivers/usb/dwc2/gadget.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index da5ac4a4595b..ab6b815e0089 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -113,6 +113,7 @@ struct dwc2_hsotg_req; * @debugfs: File entry for debugfs file for this endpoint. * @dir_in: Set to true if this endpoint is of the IN direction, which * means that it is sending data to the Host. + * @map_dir: Set to the value of dir_in when the DMA buffer is mapped. * @index: The index for the endpoint registers. * @mc: Multi Count - number of transactions per microframe * @interval: Interval for periodic endpoints, in frames or microframes. @@ -162,6 +163,7 @@ struct dwc2_hsotg_ep { unsigned short fifo_index; unsigned char dir_in; + unsigned char map_dir; unsigned char index; unsigned char mc; u16 interval; diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index e6bb1bdb2760..184964174dc0 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -422,7 +422,7 @@ static void dwc2_hsotg_unmap_dma(struct dwc2_hsotg *hsotg, { struct usb_request *req = &hs_req->req; - usb_gadget_unmap_request(&hsotg->gadget, req, hs_ep->dir_in); + usb_gadget_unmap_request(&hsotg->gadget, req, hs_ep->map_dir); } /* @@ -1242,6 +1242,7 @@ static int dwc2_hsotg_map_dma(struct dwc2_hsotg *hsotg, { int ret; + hs_ep->map_dir = hs_ep->dir_in; ret = usb_gadget_map_request(&hsotg->gadget, req, hs_ep->dir_in); if (ret) goto dma_error; -- cgit From bb9c74a5bd1462499fe5ccb1e3c5ac40dcfa9139 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Sat, 1 May 2021 02:35:58 -0700 Subject: usb: dwc3: gadget: Free gadget structure only after freeing endpoints As part of commit e81a7018d93a ("usb: dwc3: allocate gadget structure dynamically") the dwc3_gadget_release() was added which will free the dwc->gadget structure upon the device's removal when usb_del_gadget_udc() is called in dwc3_gadget_exit(). However, simply freeing the gadget results a dangling pointer situation: the endpoints created in dwc3_gadget_init_endpoints() have their dep->endpoint.ep_list members chained off the list_head anchored at dwc->gadget->ep_list. Thus when dwc->gadget is freed, the first dwc3_ep in the list now has a dangling prev pointer and likewise for the next pointer of the dwc3_ep at the tail of the list. The dwc3_gadget_free_endpoints() that follows will result in a use-after-free when it calls list_del(). This was caught by enabling KASAN and performing a driver unbind. The recent commit 568262bf5492 ("usb: dwc3: core: Add shutdown callback for dwc3") also exposes this as a panic during shutdown. There are a few possibilities to fix this. One could be to perform a list_del() of the gadget->ep_list itself which removes it from the rest of the dwc3_ep chain. Another approach is what this patch does, by splitting up the usb_del_gadget_udc() call into its separate "del" and "put" components. This allows dwc3_gadget_free_endpoints() to be called before the gadget is finally freed with usb_put_gadget(). Fixes: e81a7018d93a ("usb: dwc3: allocate gadget structure dynamically") Reviewed-by: Peter Chen Signed-off-by: Jack Pham Link: https://lore.kernel.org/r/20210501093558.7375-1-jackp@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 6eab78f8a1a7..dd1342403bb2 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4062,8 +4062,9 @@ err0: void dwc3_gadget_exit(struct dwc3 *dwc) { - usb_del_gadget_udc(dwc->gadget); + usb_del_gadget(dwc->gadget); dwc3_gadget_free_endpoints(dwc); + usb_put_gadget(dwc->gadget); dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce, dwc->bounce_addr); kfree(dwc->setup_buf); -- cgit From 18ffa988dbae69cc6e9949cddd9606f6fe533894 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Fri, 7 May 2021 10:55:19 -0700 Subject: usb: dwc3: gadget: Return success always for kick transfer in ep queue If an error is received when issuing a start or update transfer command, the error handler will stop all active requests (including the current USB request), and call dwc3_gadget_giveback() to notify function drivers of the requests which have been stopped. Avoid returning an error for kick transfer during EP queue, to remove duplicate cleanup operations on the request being queued. Fixes: 8d99087c2db8 ("usb: dwc3: gadget: Properly handle failed kick_transfer") cc: stable@vger.kernel.org Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/1620410119-24971-1-git-send-email-wcheng@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index dd1342403bb2..49ca5da5e279 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1684,7 +1684,9 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) } } - return __dwc3_gadget_kick_transfer(dep); + __dwc3_gadget_kick_transfer(dep); + + return 0; } static int dwc3_gadget_ep_queue(struct usb_ep *ep, struct usb_request *request, -- cgit From b96992081fde19806b5beb5b25f9327820ead77b Mon Sep 17 00:00:00 2001 From: Li Jun Date: Fri, 30 Apr 2021 14:57:16 +0800 Subject: usb: dwc3: imx8mp: detect dwc3 core node via compatible string New schema of usb controller DT-node should be named with prefix "^usb(@.*)?", dt changed the node name, but missed counter part change in driver, fix it by switching to use compatible string as the dwc3 core compatible string keeps "snps,dwc3" in all dt. Fixes: d1689cd3c0f4 ("arm64: dts: imx8mp: Use the correct name for child node "snps, dwc3"") Acked-by: Felipe Balbi Signed-off-by: Li Jun Link: https://lore.kernel.org/r/1619765836-20387-1-git-send-email-jun.li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-imx8mp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-imx8mp.c b/drivers/usb/dwc3/dwc3-imx8mp.c index b13cfab89d53..e9fced6f7a7c 100644 --- a/drivers/usb/dwc3/dwc3-imx8mp.c +++ b/drivers/usb/dwc3/dwc3-imx8mp.c @@ -165,7 +165,7 @@ static int dwc3_imx8mp_probe(struct platform_device *pdev) if (err < 0) goto disable_rpm; - dwc3_np = of_get_child_by_name(node, "dwc3"); + dwc3_np = of_get_compatible_child(node, "snps,dwc3"); if (!dwc3_np) { dev_err(dev, "failed to find dwc3 core child\n"); goto disable_rpm; -- cgit From 0b2b149e918f6dddb4ea53615551bf7bc131f875 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 8 May 2021 09:53:10 +0800 Subject: usb: dwc3: imx8mp: fix error return code in dwc3_imx8mp_probe() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 6dd2565989b4 ("usb: dwc3: add imx8mp dwc3 glue layer driver") Reported-by: Hulk Robot Acked-by: Felipe Balbi Signed-off-by: Zhen Lei Cc: stable Link: https://lore.kernel.org/r/20210508015310.1627-1-thunder.leizhen@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-imx8mp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-imx8mp.c b/drivers/usb/dwc3/dwc3-imx8mp.c index e9fced6f7a7c..756faa46d33a 100644 --- a/drivers/usb/dwc3/dwc3-imx8mp.c +++ b/drivers/usb/dwc3/dwc3-imx8mp.c @@ -167,6 +167,7 @@ static int dwc3_imx8mp_probe(struct platform_device *pdev) dwc3_np = of_get_compatible_child(node, "snps,dwc3"); if (!dwc3_np) { + err = -ENODEV; dev_err(dev, "failed to find dwc3 core child\n"); goto disable_rpm; } -- cgit From e89baeba4f64bab679618b3330cdcda5929fb8d5 Mon Sep 17 00:00:00 2001 From: Matthijs Kooijman Date: Mon, 3 May 2021 20:05:38 +0200 Subject: usb: dwc2: Remove obsolete MODULE_ constants from platform.c Originally, the core and platform drivers were separate modules, so each had its own module info. Since commit 2d1165a4b95e (usb: dwc2: remove dwc2_platform.ko) platform.c is included in the core module, which now contains duplicate module info (from core.c and platform.c). Due to the linking order and modinfo implementation, running `modinfo` on the resulting dwc2.ko shows just the info from platform.c, rather than that from core.c, suggesting that I am the author of the entire dwc2 module. Since platform.c is just a minor part of the entire module, this removes its module info in favor of the info from core.c. Acked-by: Minas Harutyunyan Signed-off-by: Matthijs Kooijman Link: https://lore.kernel.org/r/20210503180538.64423-1-matthijs@stdin.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/platform.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 3024785d84cb..520a0beef77c 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -776,7 +776,3 @@ static struct platform_driver dwc2_platform_driver = { }; module_platform_driver(dwc2_platform_driver); - -MODULE_DESCRIPTION("DESIGNWARE HS OTG Platform Glue"); -MODULE_AUTHOR("Matthijs Kooijman "); -MODULE_LICENSE("Dual BSD/GPL"); -- cgit From 2e2b8d15adc2f6ab2d4aa0550e241b9742a436a0 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Tue, 4 May 2021 01:18:49 +0800 Subject: usb: typec: tcpm: Fix wrong handling in GET_SINK_CAP After receiving Sink Capabilities Message in GET_SINK_CAP AMS, it is incorrect to call tcpm_pd_handle_state because the Message is expected and the current state is not Ready states. The result of this incorrect operation ends in Soft Reset which is definitely wrong. Simply forwarding to Ready States is enough to finish the AMS. Fixes: 8dea75e11380 ("usb: typec: tcpm: Protocol Error handling") Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Kyle Tso Cc: stable Link: https://lore.kernel.org/r/20210503171849.2605302-1-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index c4fdc00a3bc8..68e04e397e92 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2390,7 +2390,7 @@ static void tcpm_pd_data_request(struct tcpm_port *port, port->nr_sink_caps = cnt; port->sink_cap_done = true; if (port->ams == GET_SINK_CAPABILITIES) - tcpm_pd_handle_state(port, ready_state(port), NONE_AMS, 0); + tcpm_set_state(port, ready_state(port), 0); /* Unexpected Sink Capabilities */ else tcpm_pd_handle_msg(port, -- cgit From 8edb79af88efc6e49e735f9baf61d9f0748b881f Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Wed, 7 Apr 2021 11:49:27 +0800 Subject: iio: light: gp2ap002: Fix rumtime PM imbalance on error When devm_request_threaded_irq() fails, we should decrease the runtime PM counter to keep the counter balanced. But when iio_device_register() fails, we need not to decrease it because we have already decreased it before. Signed-off-by: Dinghao Liu Reviewed-by: Linus Walleij Fixes: 97d642e23037 ("iio: light: Add a driver for Sharp GP2AP002x00F") Link: https://lore.kernel.org/r/20210407034927.16882-1-dinghao.liu@zju.edu.cn Signed-off-by: Jonathan Cameron --- drivers/iio/light/gp2ap002.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/gp2ap002.c b/drivers/iio/light/gp2ap002.c index d048ae257c51..f960be7d4001 100644 --- a/drivers/iio/light/gp2ap002.c +++ b/drivers/iio/light/gp2ap002.c @@ -582,7 +582,7 @@ static int gp2ap002_probe(struct i2c_client *client, "gp2ap002", indio_dev); if (ret) { dev_err(dev, "unable to request IRQ\n"); - goto out_disable_vio; + goto out_put_pm; } gp2ap002->irq = client->irq; @@ -612,8 +612,9 @@ static int gp2ap002_probe(struct i2c_client *client, return 0; -out_disable_pm: +out_put_pm: pm_runtime_put_noidle(dev); +out_disable_pm: pm_runtime_disable(dev); out_disable_vio: regulator_disable(gp2ap002->vio); -- cgit From a2fa9242e89f27696515699fe0f0296bf1ac1815 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 12 Apr 2021 13:32:02 +0800 Subject: iio: proximity: pulsedlight: Fix rumtime PM imbalance on error When lidar_write_control() fails, a pairing PM usage counter decrement is needed to keep the counter balanced. Fixes: 4ac4e086fd8c5 ("iio: pulsedlight-lidar-lite: add runtime PM") Signed-off-by: Dinghao Liu Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210412053204.4889-1-dinghao.liu@zju.edu.cn Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/pulsedlight-lidar-lite-v2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c index c685f10b5ae4..cc206bfa09c7 100644 --- a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c +++ b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c @@ -160,6 +160,7 @@ static int lidar_get_measurement(struct lidar_data *data, u16 *reg) ret = lidar_write_control(data, LIDAR_REG_CONTROL_ACQUIRE); if (ret < 0) { dev_err(&client->dev, "cannot send start measurement command"); + pm_runtime_put_noidle(&client->dev); return ret; } -- cgit From 7061803522ee7876df1ca18cdd1e1551f761352d Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Wed, 14 Apr 2021 11:49:55 +0300 Subject: iio: hid-sensors: select IIO_TRIGGERED_BUFFER under HID_SENSOR_IIO_TRIGGER During commit 067fda1c065ff ("iio: hid-sensors: move triggered buffer setup into hid_sensor_setup_trigger"), the iio_triggered_buffer_{setup,cleanup}() functions got moved under the hid-sensor-trigger module. The above change works fine, if any of the sensors get built. However, when only the common hid-sensor-trigger module gets built (and none of the drivers), then the IIO_TRIGGERED_BUFFER symbol isn't selected/enforced. Previously, each driver would enforce/select the IIO_TRIGGERED_BUFFER symbol. With this change the HID_SENSOR_IIO_TRIGGER (for the hid-sensor-trigger module) will enforce that IIO_TRIGGERED_BUFFER gets selected. All HID sensor drivers select the HID_SENSOR_IIO_TRIGGER symbol. So, this change removes the IIO_TRIGGERED_BUFFER enforcement from each driver. Fixes: 067fda1c065ff ("iio: hid-sensors: move triggered buffer setup into hid_sensor_setup_trigger") Reported-by: Thomas Deutschmann Cc: Srinivas Pandruvada Signed-off-by: Alexandru Ardelean Acked-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20210414084955.260117-1-aardelean@deviqon.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/Kconfig | 1 - drivers/iio/common/hid-sensors/Kconfig | 1 + drivers/iio/gyro/Kconfig | 1 - drivers/iio/humidity/Kconfig | 1 - drivers/iio/light/Kconfig | 2 -- drivers/iio/magnetometer/Kconfig | 1 - drivers/iio/orientation/Kconfig | 2 -- drivers/iio/pressure/Kconfig | 1 - drivers/iio/temperature/Kconfig | 1 - 9 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index cceda3cecbcf..8b1723635cce 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -229,7 +229,6 @@ config DMARD10 config HID_SENSOR_ACCEL_3D depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Accelerometers 3D" diff --git a/drivers/iio/common/hid-sensors/Kconfig b/drivers/iio/common/hid-sensors/Kconfig index 24d492567336..2a3dd3b907be 100644 --- a/drivers/iio/common/hid-sensors/Kconfig +++ b/drivers/iio/common/hid-sensors/Kconfig @@ -19,6 +19,7 @@ config HID_SENSOR_IIO_TRIGGER tristate "Common module (trigger) for all HID Sensor IIO drivers" depends on HID_SENSOR_HUB && HID_SENSOR_IIO_COMMON && IIO_BUFFER select IIO_TRIGGER + select IIO_TRIGGERED_BUFFER help Say yes here to build trigger support for HID sensors. Triggers will be send if all requested attributes were read. diff --git a/drivers/iio/gyro/Kconfig b/drivers/iio/gyro/Kconfig index 5824f2edf975..20b5ac7ab66a 100644 --- a/drivers/iio/gyro/Kconfig +++ b/drivers/iio/gyro/Kconfig @@ -111,7 +111,6 @@ config FXAS21002C_SPI config HID_SENSOR_GYRO_3D depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Gyroscope 3D" diff --git a/drivers/iio/humidity/Kconfig b/drivers/iio/humidity/Kconfig index 6549fcf6db69..2de5494e7c22 100644 --- a/drivers/iio/humidity/Kconfig +++ b/drivers/iio/humidity/Kconfig @@ -52,7 +52,6 @@ config HID_SENSOR_HUMIDITY tristate "HID Environmental humidity sensor" depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER help diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 33ad4dd0b5c7..917f9becf9c7 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -256,7 +256,6 @@ config ISL29125 config HID_SENSOR_ALS depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID ALS" @@ -270,7 +269,6 @@ config HID_SENSOR_ALS config HID_SENSOR_PROX depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID PROX" diff --git a/drivers/iio/magnetometer/Kconfig b/drivers/iio/magnetometer/Kconfig index 5d4ffd66032e..74ad5701c6c2 100644 --- a/drivers/iio/magnetometer/Kconfig +++ b/drivers/iio/magnetometer/Kconfig @@ -95,7 +95,6 @@ config MAG3110 config HID_SENSOR_MAGNETOMETER_3D depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Magenetometer 3D" diff --git a/drivers/iio/orientation/Kconfig b/drivers/iio/orientation/Kconfig index a505583cc2fd..396cbbb867f4 100644 --- a/drivers/iio/orientation/Kconfig +++ b/drivers/iio/orientation/Kconfig @@ -9,7 +9,6 @@ menu "Inclinometer sensors" config HID_SENSOR_INCLINOMETER_3D depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Inclinometer 3D" @@ -20,7 +19,6 @@ config HID_SENSOR_INCLINOMETER_3D config HID_SENSOR_DEVICE_ROTATION depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Device Rotation" diff --git a/drivers/iio/pressure/Kconfig b/drivers/iio/pressure/Kconfig index 689b978db4f9..fc0d3cfca418 100644 --- a/drivers/iio/pressure/Kconfig +++ b/drivers/iio/pressure/Kconfig @@ -79,7 +79,6 @@ config DPS310 config HID_SENSOR_PRESS depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID PRESS" diff --git a/drivers/iio/temperature/Kconfig b/drivers/iio/temperature/Kconfig index f1f2a1499c9e..4df60082c1fa 100644 --- a/drivers/iio/temperature/Kconfig +++ b/drivers/iio/temperature/Kconfig @@ -45,7 +45,6 @@ config HID_SENSOR_TEMP tristate "HID Environmental temperature sensor" depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER help -- cgit From f73c730774d88a14d7b60feee6d0e13570f99499 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 23 Apr 2021 05:09:59 +0300 Subject: iio: gyro: mpu3050: Fix reported temperature value The raw temperature value is a 16-bit signed integer. The sign casting is missing in the code, which results in a wrong temperature reported by userspace tools, fix it. Cc: stable@vger.kernel.org Fixes: 3904b28efb2c ("iio: gyro: Add driver for the MPU-3050 gyroscope") Datasheet: https://www.cdiweb.com/datasheets/invensense/mpu-3000a.pdf Tested-by: Maxim Schwalm # Asus TF700T Tested-by: Svyatoslav Ryhel # Asus TF201 Reported-by: Svyatoslav Ryhel Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Dmitry Osipenko Acked-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20210423020959.5023-1-digetx@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/mpu3050-core.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index ac90be03332a..f17a93519535 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -272,7 +272,16 @@ static int mpu3050_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_OFFSET: switch (chan->type) { case IIO_TEMP: - /* The temperature scaling is (x+23000)/280 Celsius */ + /* + * The temperature scaling is (x+23000)/280 Celsius + * for the "best fit straight line" temperature range + * of -30C..85C. The 23000 includes room temperature + * offset of +35C, 280 is the precision scale and x is + * the 16-bit signed integer reported by hardware. + * + * Temperature value itself represents temperature of + * the sensor die. + */ *val = 23000; return IIO_VAL_INT; default: @@ -329,7 +338,7 @@ static int mpu3050_read_raw(struct iio_dev *indio_dev, goto out_read_raw_unlock; } - *val = be16_to_cpu(raw_val); + *val = (s16)be16_to_cpu(raw_val); ret = IIO_VAL_INT; goto out_read_raw_unlock; -- cgit From 901f84de0e16bde10a72d7eb2f2eb73fcde8fa1a Mon Sep 17 00:00:00 2001 From: Tomasz Duszynski Date: Fri, 23 Apr 2021 10:02:44 +0200 Subject: iio: core: fix ioctl handlers removal Currently ioctl handlers are removed twice. For the first time during iio_device_unregister() then later on inside iio_device_unregister_eventset() and iio_buffers_free_sysfs_and_mask(). Double free leads to kernel panic. Fix this by not touching ioctl handlers list directly but rather letting code responsible for registration call the matching cleanup routine itself. Fixes: 8dedcc3eee3ac ("iio: core: centralize ioctl() calls to the main chardev") Signed-off-by: Tomasz Duszynski Acked-by: Alexandru Ardelean Cc: Link: https://lore.kernel.org/r/20210423080244.2790-1-tomasz.duszynski@octakon.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index d92c58a94fe4..9e59f5da3d28 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1926,9 +1926,6 @@ EXPORT_SYMBOL(__iio_device_register); **/ void iio_device_unregister(struct iio_dev *indio_dev) { - struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); - struct iio_ioctl_handler *h, *t; - cdev_device_del(&indio_dev->chrdev, &indio_dev->dev); mutex_lock(&indio_dev->info_exist_lock); @@ -1939,9 +1936,6 @@ void iio_device_unregister(struct iio_dev *indio_dev) indio_dev->info = NULL; - list_for_each_entry_safe(h, t, &iio_dev_opaque->ioctl_handlers, entry) - list_del(&h->entry); - iio_device_wakeup_eventset(indio_dev); iio_buffer_wakeup_poll(indio_dev); -- cgit From af0670b0bf1b116fd729b1b1011cf814bc34e12e Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 3 May 2021 17:43:50 +0300 Subject: iio: core: return ENODEV if ioctl is unknown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the ioctl() mechanism was introduced in IIO core to centralize the registration of all ioctls in one place via commit 8dedcc3eee3ac ("iio: core: centralize ioctl() calls to the main chardev"), the return code was changed from ENODEV to EINVAL, when the ioctl code isn't known. This was done by accident. This change reverts back to the old behavior, where if the ioctl() code isn't known, ENODEV is returned (vs EINVAL). This was brought into perspective by this patch: https://lore.kernel.org/linux-iio/20210428150815.136150-1-paul@crapouillou.net/ Fixes: 8dedcc3eee3ac ("iio: core: centralize ioctl() calls to the main chardev") Signed-off-by: Alexandru Ardelean Reviewed-by: Nuno Sá Tested-by: Paul Cercueil Reviewed-by: Linus Walleij Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 9e59f5da3d28..59efb36db2c7 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1778,7 +1778,6 @@ static long iio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!indio_dev->info) goto out_unlock; - ret = -EINVAL; list_for_each_entry(h, &iio_dev_opaque->ioctl_handlers, entry) { ret = h->ioctl(indio_dev, filp, cmd, arg); if (ret != IIO_IOCTL_UNHANDLED) @@ -1786,7 +1785,7 @@ static long iio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } if (ret == IIO_IOCTL_UNHANDLED) - ret = -EINVAL; + ret = -ENODEV; out_unlock: mutex_unlock(&indio_dev->info_exist_lock); -- cgit From af0e1871d79cfbb91f732d2c6fa7558e45c31038 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 7 May 2021 19:30:41 +0100 Subject: iio: tsl2583: Fix division by a zero lux_val The lux_val returned from tsl2583_get_lux can potentially be zero, so check for this to avoid a division by zero and an overflowed gain_trim_val. Fixes clang scan-build warning: drivers/iio/light/tsl2583.c:345:40: warning: Either the condition 'lux_val<0' is redundant or there is division by zero at line 345. [zerodivcond] Fixes: ac4f6eee8fe8 ("staging: iio: TAOS tsl258x: Device driver") Signed-off-by: Colin Ian King Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/tsl2583.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iio/light/tsl2583.c b/drivers/iio/light/tsl2583.c index 0f787bfc88fc..c9d8f07a6fcd 100644 --- a/drivers/iio/light/tsl2583.c +++ b/drivers/iio/light/tsl2583.c @@ -341,6 +341,14 @@ static int tsl2583_als_calibrate(struct iio_dev *indio_dev) return lux_val; } + /* Avoid division by zero of lux_value later on */ + if (lux_val == 0) { + dev_err(&chip->client->dev, + "%s: lux_val of 0 will produce out of range trim_value\n", + __func__); + return -ENODATA; + } + gain_trim_val = (unsigned int)(((chip->als_settings.als_cal_target) * chip->als_settings.als_gain_trim) / lux_val); if ((gain_trim_val < 250) || (gain_trim_val > 4000)) { -- cgit From b9a0866a5bdf6a4643a52872ada6be6184c6f4f2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 May 2021 01:23:37 +0300 Subject: usb: typec: ucsi: Put fwnode in any case during ->probe() device_for_each_child_node() bumps a reference counting of a returned variable. We have to balance it whenever we return to the caller. Fixes: c1b0bc2dabfa ("usb: typec: Add support for UCSI interface") Cc: Heikki Krogerus Reviewed-by: Heikki Krogerus Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210504222337.3151726-1-andy.shevchenko@gmail.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 282c3c825c13..0e1cec346e0f 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -999,6 +999,7 @@ static const struct typec_operations ucsi_ops = { .pr_set = ucsi_pr_swap }; +/* Caller must call fwnode_handle_put() after use */ static struct fwnode_handle *ucsi_find_fwnode(struct ucsi_connector *con) { struct fwnode_handle *fwnode; @@ -1033,7 +1034,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) command |= UCSI_CONNECTOR_NUMBER(con->num); ret = ucsi_send_command(ucsi, command, &con->cap, sizeof(con->cap)); if (ret < 0) - goto out; + goto out_unlock; if (con->cap.op_mode & UCSI_CONCAP_OPMODE_DRP) cap->data = TYPEC_PORT_DRD; @@ -1151,6 +1152,8 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) trace_ucsi_register_port(con->num, &con->status); out: + fwnode_handle_put(cap->fwnode); +out_unlock: mutex_unlock(&con->lock); return ret; } -- cgit From e17b02d4970913233d543c79c9c66e72cac05bdd Mon Sep 17 00:00:00 2001 From: Marcel Hamer Date: Tue, 27 Apr 2021 14:21:18 +0200 Subject: usb: dwc3: omap: improve extcon initialization When extcon is used in combination with dwc3, it is assumed that the dwc3 registers are untouched and as such are only configured if VBUS is valid or ID is tied to ground. In case VBUS is not valid or ID is floating, the registers are not configured as such during driver initialization, causing a wrong default state during boot. If the registers are not in a default state, because they are for instance touched by a boot loader, this can cause for a kernel error. Signed-off-by: Marcel Hamer Link: https://lore.kernel.org/r/20210427122118.1948340-1-marcel@solidxs.se Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-omap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 3db17806e92e..e196673f5c64 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -437,8 +437,13 @@ static int dwc3_omap_extcon_register(struct dwc3_omap *omap) if (extcon_get_state(edev, EXTCON_USB) == true) dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_VALID); + else + dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_OFF); + if (extcon_get_state(edev, EXTCON_USB_HOST) == true) dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_GROUND); + else + dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_FLOAT); omap->edev = edev; } -- cgit From f75297853470627c4ee4e2b80eed40af7441c96b Mon Sep 17 00:00:00 2001 From: Wei Ming Chen Date: Thu, 6 May 2021 20:20:20 +0800 Subject: docs: usb: function: Modify path name Original path does not exists, so changed to "Documentation/ABI/testing/configfs-usb-gadget" Signed-off-by: Wei Ming Chen Link: https://lore.kernel.org/r/20210506122020.7117-1-jj251510319013@gmail.com Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/gadget_configfs.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/usb/gadget_configfs.rst b/Documentation/usb/gadget_configfs.rst index 158e48dab586..e4566ffb223f 100644 --- a/Documentation/usb/gadget_configfs.rst +++ b/Documentation/usb/gadget_configfs.rst @@ -140,7 +140,7 @@ is an arbitrary string allowed in a filesystem, e.g.:: Each function provides its specific set of attributes, with either read-only or read-write access. Where applicable they need to be written to as appropriate. -Please refer to Documentation/ABI/*/configfs-usb-gadget* for more information. +Please refer to Documentation/ABI/testing/configfs-usb-gadget for more information. 4. Associating the functions with their configurations ------------------------------------------------------ -- cgit From a60a34366e0d09ca002c966dd7c43a68c28b1f82 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 6 May 2021 22:39:10 +0200 Subject: usb: fotg210-hcd: Fix an error message 'retval' is known to be -ENODEV here. This is a hard-coded default error code which is not useful in the error message. Moreover, another error message is printed at the end of the error handling path. The corresponding error code (-ENOMEM) is more informative. So remove simplify the first error message. While at it, also remove the useless initialization of 'retval'. Fixes: 7d50195f6c50 ("usb: host: Faraday fotg210-hcd driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/94531bcff98e46d4f9c20183a90b7f47f699126c.1620333419.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/fotg210-hcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c index 6cac642520fc..9c2eda0918e1 100644 --- a/drivers/usb/host/fotg210-hcd.c +++ b/drivers/usb/host/fotg210-hcd.c @@ -5568,7 +5568,7 @@ static int fotg210_hcd_probe(struct platform_device *pdev) struct usb_hcd *hcd; struct resource *res; int irq; - int retval = -ENODEV; + int retval; struct fotg210_hcd *fotg210; if (usb_disabled()) @@ -5588,7 +5588,7 @@ static int fotg210_hcd_probe(struct platform_device *pdev) hcd = usb_create_hcd(&fotg210_fotg210_hc_driver, dev, dev_name(dev)); if (!hcd) { - dev_err(dev, "failed to create hcd with err %d\n", retval); + dev_err(dev, "failed to create hcd\n"); retval = -ENOMEM; goto fail_create_hcd; } -- cgit From 726c945ab2ebd104631b6105ab455a5bc604a3f1 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 27 Apr 2021 12:42:19 +0800 Subject: hwmon: (corsair-psu) Remove unneeded semicolons Fix the following coccicheck warning: ./drivers/hwmon/corsair-psu.c:379:2-3: Unneeded semicolon Remove unneeded semicolons. Signed-off-by: Wan Jiabing Link: https://lore.kernel.org/r/20210427044219.7799-1-wanjiabing@vivo.com Signed-off-by: Guenter Roeck --- drivers/hwmon/corsair-psu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c index 3a5807e4a2ef..02298b86b57b 100644 --- a/drivers/hwmon/corsair-psu.c +++ b/drivers/hwmon/corsair-psu.c @@ -355,7 +355,7 @@ static umode_t corsairpsu_hwmon_power_is_visible(const struct corsairpsu_data *p return 0444; default: return 0; - }; + } } static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv, u32 attr, @@ -376,7 +376,7 @@ static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv break; default: break; - }; + } return res; } -- cgit From 5216dff22dc2bbbbe6f00335f9fd2879670e753b Mon Sep 17 00:00:00 2001 From: Eddie James Date: Thu, 29 Apr 2021 10:13:36 -0500 Subject: hwmon: (occ) Fix poll rate limiting The poll rate limiter time was initialized at zero. This breaks the comparison in time_after if jiffies is large. Switch to storing the next update time rather than the previous time, and initialize the time when the device is probed. Fixes: c10e753d43eb ("hwmon (occ): Add sensor types and versions") Signed-off-by: Eddie James Link: https://lore.kernel.org/r/20210429151336.18980-1-eajames@linux.ibm.com Signed-off-by: Guenter Roeck --- drivers/hwmon/occ/common.c | 5 +++-- drivers/hwmon/occ/common.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index f1ac153d0b56..967532afb1c0 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -217,9 +217,9 @@ int occ_update_response(struct occ *occ) return rc; /* limit the maximum rate of polling the OCC */ - if (time_after(jiffies, occ->last_update + OCC_UPDATE_FREQUENCY)) { + if (time_after(jiffies, occ->next_update)) { rc = occ_poll(occ); - occ->last_update = jiffies; + occ->next_update = jiffies + OCC_UPDATE_FREQUENCY; } else { rc = occ->last_error; } @@ -1165,6 +1165,7 @@ int occ_setup(struct occ *occ, const char *name) return rc; } + occ->next_update = jiffies + OCC_UPDATE_FREQUENCY; occ_parse_poll_response(occ); rc = occ_setup_sensor_attrs(occ); diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h index 67e6968b8978..e6df719770e8 100644 --- a/drivers/hwmon/occ/common.h +++ b/drivers/hwmon/occ/common.h @@ -99,7 +99,7 @@ struct occ { u8 poll_cmd_data; /* to perform OCC poll command */ int (*send_cmd)(struct occ *occ, u8 *cmd); - unsigned long last_update; + unsigned long next_update; struct mutex lock; /* lock OCC access */ struct device *hwmon; -- cgit From 2d101db3e5be3bbee6001d4227705cec70ecb82e Mon Sep 17 00:00:00 2001 From: Václav Kubernát Date: Thu, 29 Apr 2021 09:53:38 +0200 Subject: hwmon: (pmbus/fsp-3y) Fix FSP-3Y YH-5151E non-compliant vout encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I didn't properly test the driver for YH-5151E, so it was completely broken. Firstly, the log/real mapping was incorrect in one case. Secondly, PMBus specifies that output voltages should be in the linear16 encoding. However, the YH-5151E is non-compliant and uses linear11. YM-2151E isn't affected by this. Fix this by converting the values inside the read functions. linear16 gets the exponent from the VOUT_MODE command. The device doesn't support it, so I have to manually supply the value for it. Both supported devices have now been tested to report correct vout values. Fixes: 1734b4135a62 ("hwmon: Add driver for fsp-3y PSUs and PDUs") Signed-off-by: Václav Kubernát Link: https://lore.kernel.org/r/20210429075337.110502-1-kubernat@cesnet.cz Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/fsp-3y.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/pmbus/fsp-3y.c b/drivers/hwmon/pmbus/fsp-3y.c index b177987286ae..e24842475254 100644 --- a/drivers/hwmon/pmbus/fsp-3y.c +++ b/drivers/hwmon/pmbus/fsp-3y.c @@ -57,7 +57,7 @@ static int page_log_to_page_real(int page_log, enum chips chip) case YH5151E_PAGE_12V_LOG: return YH5151E_PAGE_12V_REAL; case YH5151E_PAGE_5V_LOG: - return YH5151E_PAGE_5V_LOG; + return YH5151E_PAGE_5V_REAL; case YH5151E_PAGE_3V3_LOG: return YH5151E_PAGE_3V3_REAL; } @@ -103,8 +103,18 @@ static int set_page(struct i2c_client *client, int page_log) static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg) { + const struct pmbus_driver_info *info = pmbus_get_driver_info(client); + struct fsp3y_data *data = to_fsp3y_data(info); int rv; + /* + * YH5151-E outputs vout in linear11. The conversion is done when + * reading. Here, we have to inject pmbus_core with the correct + * exponent (it is -6). + */ + if (data->chip == yh5151e && reg == PMBUS_VOUT_MODE) + return 0x1A; + rv = set_page(client, page); if (rv < 0) return rv; @@ -114,6 +124,8 @@ static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg) static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase, int reg) { + const struct pmbus_driver_info *info = pmbus_get_driver_info(client); + struct fsp3y_data *data = to_fsp3y_data(info); int rv; /* @@ -144,7 +156,18 @@ static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase, if (rv < 0) return rv; - return i2c_smbus_read_word_data(client, reg); + rv = i2c_smbus_read_word_data(client, reg); + if (rv < 0) + return rv; + + /* + * YH-5151E is non-compliant and outputs output voltages in linear11 + * instead of linear16. + */ + if (data->chip == yh5151e && reg == PMBUS_READ_VOUT) + rv = sign_extend32(rv, 10) & 0xffff; + + return rv; } static struct pmbus_driver_info fsp3y_info[] = { -- cgit From 1f4642b72be79757f050924a9b9673b6a02034bc Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Mon, 3 May 2021 00:46:11 -0700 Subject: usb: typec: ucsi: Retrieve all the PDOs instead of just the first 4 commit 4dbc6a4ef06d ("usb: typec: ucsi: save power data objects in PD mode") introduced retrieval of the PDOs when connected to a PD-capable source. But only the first 4 PDOs are received since that is the maximum number that can be fetched at a time given the MESSAGE_IN length limitation (16 bytes). However, as per the PD spec a connected source may advertise up to a maximum of 7 PDOs. If such a source is connected it's possible the PPM could have negotiated a power contract with one of the PDOs at index greater than 4, and would be reflected in the request data object's (RDO) object position field. This would result in an out-of-bounds access when the rdo_index() is used to index into the src_pdos array in ucsi_psy_get_voltage_now(). With the help of the UBSAN -fsanitize=array-bounds checker enabled this exact issue is revealed when connecting to a PD source adapter that advertise 5 PDOs and the PPM enters a contract having selected the 5th one. [ 151.545106][ T70] Unexpected kernel BRK exception at EL1 [ 151.545112][ T70] Internal error: BRK handler: f2005512 [#1] PREEMPT SMP ... [ 151.545499][ T70] pc : ucsi_psy_get_prop+0x208/0x20c [ 151.545507][ T70] lr : power_supply_show_property+0xc0/0x328 ... [ 151.545542][ T70] Call trace: [ 151.545544][ T70] ucsi_psy_get_prop+0x208/0x20c [ 151.545546][ T70] power_supply_uevent+0x1a4/0x2f0 [ 151.545550][ T70] dev_uevent+0x200/0x384 [ 151.545555][ T70] kobject_uevent_env+0x1d4/0x7e8 [ 151.545557][ T70] power_supply_changed_work+0x174/0x31c [ 151.545562][ T70] process_one_work+0x244/0x6f0 [ 151.545564][ T70] worker_thread+0x3e0/0xa64 We can resolve this by instead retrieving and storing up to the maximum of 7 PDOs in the con->src_pdos array. This would involve two calls to the GET_PDOS command. Fixes: 992a60ed0d5e ("usb: typec: ucsi: register with power_supply class") Fixes: 4dbc6a4ef06d ("usb: typec: ucsi: save power data objects in PD mode") Cc: stable@vger.kernel.org Reported-and-tested-by: Subbaraman Narayanamurthy Reviewed-by: Heikki Krogerus Signed-off-by: Jack Pham Link: https://lore.kernel.org/r/20210503074611.30973-1-jackp@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 41 ++++++++++++++++++++++++++++++++--------- drivers/usb/typec/ucsi/ucsi.h | 6 ++++-- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 0e1cec346e0f..1d8b7df59ff4 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -495,7 +495,8 @@ static void ucsi_unregister_altmodes(struct ucsi_connector *con, u8 recipient) } } -static void ucsi_get_pdos(struct ucsi_connector *con, int is_partner) +static int ucsi_get_pdos(struct ucsi_connector *con, int is_partner, + u32 *pdos, int offset, int num_pdos) { struct ucsi *ucsi = con->ucsi; u64 command; @@ -503,17 +504,39 @@ static void ucsi_get_pdos(struct ucsi_connector *con, int is_partner) command = UCSI_COMMAND(UCSI_GET_PDOS) | UCSI_CONNECTOR_NUMBER(con->num); command |= UCSI_GET_PDOS_PARTNER_PDO(is_partner); - command |= UCSI_GET_PDOS_NUM_PDOS(UCSI_MAX_PDOS - 1); + command |= UCSI_GET_PDOS_PDO_OFFSET(offset); + command |= UCSI_GET_PDOS_NUM_PDOS(num_pdos - 1); command |= UCSI_GET_PDOS_SRC_PDOS; - ret = ucsi_send_command(ucsi, command, con->src_pdos, - sizeof(con->src_pdos)); - if (ret < 0) { + ret = ucsi_send_command(ucsi, command, pdos + offset, + num_pdos * sizeof(u32)); + if (ret < 0) dev_err(ucsi->dev, "UCSI_GET_PDOS failed (%d)\n", ret); + if (ret == 0 && offset == 0) + dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n"); + + return ret; +} + +static void ucsi_get_src_pdos(struct ucsi_connector *con, int is_partner) +{ + int ret; + + /* UCSI max payload means only getting at most 4 PDOs at a time */ + ret = ucsi_get_pdos(con, 1, con->src_pdos, 0, UCSI_MAX_PDOS); + if (ret < 0) return; - } + con->num_pdos = ret / sizeof(u32); /* number of bytes to 32-bit PDOs */ - if (ret == 0) - dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n"); + if (con->num_pdos < UCSI_MAX_PDOS) + return; + + /* get the remaining PDOs, if any */ + ret = ucsi_get_pdos(con, 1, con->src_pdos, UCSI_MAX_PDOS, + PDO_MAX_OBJECTS - UCSI_MAX_PDOS); + if (ret < 0) + return; + + con->num_pdos += ret / sizeof(u32); } static void ucsi_pwr_opmode_change(struct ucsi_connector *con) @@ -522,7 +545,7 @@ static void ucsi_pwr_opmode_change(struct ucsi_connector *con) case UCSI_CONSTAT_PWR_OPMODE_PD: con->rdo = con->status.request_data_obj; typec_set_pwr_opmode(con->port, TYPEC_PWR_MODE_PD); - ucsi_get_pdos(con, 1); + ucsi_get_src_pdos(con, 1); break; case UCSI_CONSTAT_PWR_OPMODE_TYPEC1_5: con->rdo = 0; diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 3920e20a9e9e..cee666790907 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -8,6 +8,7 @@ #include #include #include +#include #include /* -------------------------------------------------------------------------- */ @@ -134,7 +135,9 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num); /* GET_PDOS command bits */ #define UCSI_GET_PDOS_PARTNER_PDO(_r_) ((u64)(_r_) << 23) +#define UCSI_GET_PDOS_PDO_OFFSET(_r_) ((u64)(_r_) << 24) #define UCSI_GET_PDOS_NUM_PDOS(_r_) ((u64)(_r_) << 32) +#define UCSI_MAX_PDOS (4) #define UCSI_GET_PDOS_SRC_PDOS ((u64)1 << 34) /* -------------------------------------------------------------------------- */ @@ -302,7 +305,6 @@ struct ucsi { #define UCSI_MAX_SVID 5 #define UCSI_MAX_ALTMODES (UCSI_MAX_SVID * 6) -#define UCSI_MAX_PDOS (4) #define UCSI_TYPEC_VSAFE5V 5000 #define UCSI_TYPEC_1_5_CURRENT 1500 @@ -330,7 +332,7 @@ struct ucsi_connector { struct power_supply *psy; struct power_supply_desc psy_desc; u32 rdo; - u32 src_pdos[UCSI_MAX_PDOS]; + u32 src_pdos[PDO_MAX_OBJECTS]; int num_pdos; struct usb_role_switch *usb_role_sw; -- cgit From c34e85fa69b9f4568f19da3af06c3870dd8fcc50 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Fri, 7 May 2021 14:22:59 +0800 Subject: usb: typec: tcpm: Send DISCOVER_IDENTITY from dedicated work In current design, DISCOVER_IDENTITY is queued to VDM state machine immediately in Ready states and never retries if it fails in the AMS. Move the process to a delayed work so that when it fails for some reasons (e.g. Sink Tx No Go), it can be retried by queueing the work again. Also fix a problem that the vdm_state is not set to a proper state if it is blocked by Collision Avoidance mechanism. Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Kyle Tso Link: https://lore.kernel.org/r/20210507062300.1945009-2-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 85 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 10 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 68e04e397e92..ae1e84252d38 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -259,6 +259,7 @@ enum frs_typec_current { #define ALTMODE_DISCOVERY_MAX (SVID_DISCOVERY_MAX * MODE_DISCOVERY_MAX) #define GET_SINK_CAP_RETRY_MS 100 +#define SEND_DISCOVER_RETRY_MS 100 struct pd_mode_data { int svid_index; /* current SVID index */ @@ -366,6 +367,8 @@ struct tcpm_port { struct kthread_work vdm_state_machine; struct hrtimer enable_frs_timer; struct kthread_work enable_frs; + struct hrtimer send_discover_timer; + struct kthread_work send_discover_work; bool state_machine_running; bool vdm_sm_running; @@ -1178,6 +1181,16 @@ static void mod_enable_frs_delayed_work(struct tcpm_port *port, unsigned int del } } +static void mod_send_discover_delayed_work(struct tcpm_port *port, unsigned int delay_ms) +{ + if (delay_ms) { + hrtimer_start(&port->send_discover_timer, ms_to_ktime(delay_ms), HRTIMER_MODE_REL); + } else { + hrtimer_cancel(&port->send_discover_timer); + kthread_queue_work(port->wq, &port->send_discover_work); + } +} + static void tcpm_set_state(struct tcpm_port *port, enum tcpm_state state, unsigned int delay_ms) { @@ -1855,6 +1868,9 @@ static void vdm_run_state_machine(struct tcpm_port *port) res = tcpm_ams_start(port, DISCOVER_IDENTITY); if (res == 0) port->send_discover = false; + else if (res == -EAGAIN) + mod_send_discover_delayed_work(port, + SEND_DISCOVER_RETRY_MS); break; case CMD_DISCOVER_SVID: res = tcpm_ams_start(port, DISCOVER_SVIDS); @@ -1880,6 +1896,7 @@ static void vdm_run_state_machine(struct tcpm_port *port) } if (res < 0) { + port->vdm_state = VDM_STATE_ERR_BUSY; port->vdm_sm_running = false; return; } @@ -3682,14 +3699,6 @@ static inline enum tcpm_state unattached_state(struct tcpm_port *port) return SNK_UNATTACHED; } -static void tcpm_check_send_discover(struct tcpm_port *port) -{ - if ((port->data_role == TYPEC_HOST || port->negotiated_rev > PD_REV20) && - port->send_discover && port->pd_capable) - tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0); - port->send_discover = false; -} - static void tcpm_swap_complete(struct tcpm_port *port, int result) { if (port->swap_pending) { @@ -3926,7 +3935,18 @@ static void run_state_machine(struct tcpm_port *port) break; } - tcpm_check_send_discover(port); + /* + * 6.4.4.3.1 Discover Identity + * "The Discover Identity Command Shall only be sent to SOP when there is an + * Explicit Contract." + * For now, this driver only supports SOP for DISCOVER_IDENTITY, thus using + * port->explicit_contract to decide whether to send the command. + */ + if (port->explicit_contract) + mod_send_discover_delayed_work(port, 0); + else + port->send_discover = false; + /* * 6.3.5 * Sending ping messages is not necessary if @@ -4194,7 +4214,18 @@ static void run_state_machine(struct tcpm_port *port) break; } - tcpm_check_send_discover(port); + /* + * 6.4.4.3.1 Discover Identity + * "The Discover Identity Command Shall only be sent to SOP when there is an + * Explicit Contract." + * For now, this driver only supports SOP for DISCOVER_IDENTITY, thus using + * port->explicit_contract. + */ + if (port->explicit_contract) + mod_send_discover_delayed_work(port, 0); + else + port->send_discover = false; + power_supply_changed(port->psy); break; @@ -5288,6 +5319,29 @@ unlock: mutex_unlock(&port->lock); } +static void tcpm_send_discover_work(struct kthread_work *work) +{ + struct tcpm_port *port = container_of(work, struct tcpm_port, send_discover_work); + + mutex_lock(&port->lock); + /* No need to send DISCOVER_IDENTITY anymore */ + if (!port->send_discover) + goto unlock; + + /* Retry if the port is not idle */ + if ((port->state != SRC_READY && port->state != SNK_READY) || port->vdm_sm_running) { + mod_send_discover_delayed_work(port, SEND_DISCOVER_RETRY_MS); + goto unlock; + } + + /* Only send the Message if the port is host for PD rev2.0 */ + if (port->data_role == TYPEC_HOST || port->negotiated_rev > PD_REV20) + tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0); + +unlock: + mutex_unlock(&port->lock); +} + static int tcpm_dr_set(struct typec_port *p, enum typec_data_role data) { struct tcpm_port *port = typec_get_drvdata(p); @@ -6093,6 +6147,14 @@ static enum hrtimer_restart enable_frs_timer_handler(struct hrtimer *timer) return HRTIMER_NORESTART; } +static enum hrtimer_restart send_discover_timer_handler(struct hrtimer *timer) +{ + struct tcpm_port *port = container_of(timer, struct tcpm_port, send_discover_timer); + + kthread_queue_work(port->wq, &port->send_discover_work); + return HRTIMER_NORESTART; +} + struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) { struct tcpm_port *port; @@ -6123,12 +6185,15 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) kthread_init_work(&port->vdm_state_machine, vdm_state_machine_work); kthread_init_work(&port->event_work, tcpm_pd_event_handler); kthread_init_work(&port->enable_frs, tcpm_enable_frs_work); + kthread_init_work(&port->send_discover_work, tcpm_send_discover_work); hrtimer_init(&port->state_machine_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); port->state_machine_timer.function = state_machine_timer_handler; hrtimer_init(&port->vdm_state_machine_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); port->vdm_state_machine_timer.function = vdm_state_machine_timer_handler; hrtimer_init(&port->enable_frs_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); port->enable_frs_timer.function = enable_frs_timer_handler; + hrtimer_init(&port->send_discover_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + port->send_discover_timer.function = send_discover_timer_handler; spin_lock_init(&port->pd_event_lock); -- cgit From f1fbd950b59b67bc5c202216c8e1c6ca8c99a3b4 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Fri, 7 May 2021 14:23:00 +0800 Subject: usb: typec: tcpm: Fix wrong handling for Not_Supported in VDM AMS Not_Supported Message is acceptable in VDM AMS. Redirect the VDM state machine to VDM_STATE_DONE when receiving Not_Supported and finish the VDM AMS. Also, after the loop in vdm_state_machine_work, add more conditions of VDM states to clear the vdm_sm_running flag because those are all stopping states when leaving the loop. In addition, finish the VDM AMS if the port partner responds BUSY. Fixes: 8dea75e11380 ("usb: typec: tcpm: Protocol Error handling") Fixes: 8d3a0578ad1a ("usb: typec: tcpm: Respond Wait if VDM state machine is running") Reviewed-by: Guenter Roeck Signed-off-by: Kyle Tso Link: https://lore.kernel.org/r/20210507062300.1945009-3-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index ae1e84252d38..db567e6fde92 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1897,7 +1897,6 @@ static void vdm_run_state_machine(struct tcpm_port *port) if (res < 0) { port->vdm_state = VDM_STATE_ERR_BUSY; - port->vdm_sm_running = false; return; } } @@ -1913,6 +1912,7 @@ static void vdm_run_state_machine(struct tcpm_port *port) port->vdo_data[0] = port->vdo_retry; port->vdo_count = 1; port->vdm_state = VDM_STATE_READY; + tcpm_ams_finish(port); break; case VDM_STATE_BUSY: port->vdm_state = VDM_STATE_ERR_TMOUT; @@ -1978,7 +1978,7 @@ static void vdm_state_machine_work(struct kthread_work *work) port->vdm_state != VDM_STATE_BUSY && port->vdm_state != VDM_STATE_SEND_MESSAGE); - if (port->vdm_state == VDM_STATE_ERR_TMOUT) + if (port->vdm_state < VDM_STATE_READY) port->vdm_sm_running = false; mutex_unlock(&port->lock); @@ -2569,6 +2569,16 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port, port->sink_cap_done = true; tcpm_set_state(port, ready_state(port), 0); break; + case SRC_READY: + case SNK_READY: + if (port->vdm_state > VDM_STATE_READY) { + port->vdm_state = VDM_STATE_DONE; + if (tcpm_vdm_ams(port)) + tcpm_ams_finish(port); + mod_vdm_delayed_work(port, 0); + break; + } + fallthrough; default: tcpm_pd_handle_state(port, port->pwr_role == TYPEC_SOURCE ? -- cgit From a5be0c978cc498adb6886a9374644c393b6005a3 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 30 Apr 2021 15:38:05 -0700 Subject: drm/i915/display: move vbt check to intel_ddi_init() Since commit 45c0673aac97 ("drm/i915/bios: start using the intel_bios_encoder_data directly") we lookup the devdata for each port in intel_ddi_init() and just return if the port is not present in VBT (or if we didn't create a fake devdata for it if VBT is not available). So in intel_display.c we don't have to check intel_bios_is_port_present(), just rely on the check in intel_ddi_init(). v2: Rebase on commit 45c0673aac97 ("drm/i915/bios: start using the intel_bios_encoder_data directly") re-using that check in intel_ddi_init() instead of adding a new one. Signed-off-by: Lucas De Marchi Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210430223808.1078010-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a45916791e81..5066d8587bf6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11199,13 +11199,13 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_C); intel_ddi_init(dev_priv, PORT_D); intel_ddi_init(dev_priv, PORT_E); + /* - * On some ICL SKUs port F is not present. No strap bits for - * this, so rely on VBT. - * Work around broken VBTs on SKUs known to have no port F. + * On some ICL SKUs port F is not present, but broken VBTs mark + * the port as present. Only try to initialize port F for the + * SKUs that may actually have it. */ - if (IS_ICL_WITH_PORT_F(dev_priv) && - intel_bios_is_port_present(dev_priv, PORT_F)) + if (IS_ICL_WITH_PORT_F(dev_priv)) intel_ddi_init(dev_priv, PORT_F); icl_dsi_init(dev_priv); @@ -11259,10 +11259,8 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) /* * On SKL we don't have a way to detect DDI-E so we rely on VBT. */ - if (DISPLAY_VER(dev_priv) == 9 && - intel_bios_is_port_present(dev_priv, PORT_E)) + if (DISPLAY_VER(dev_priv) == 9) intel_ddi_init(dev_priv, PORT_E); - } else if (HAS_PCH_SPLIT(dev_priv)) { int found; -- cgit From ba9c5bf76734e70648bf61d81326b9c44abba643 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 30 Apr 2021 15:38:06 -0700 Subject: drm/i915/display: remove FIXME comment for intended feature Direction on gen >= 9 was to stop using straps and rely on VBT indicating if the port is present or not. Remove FIXME comment since this will never be "fixed". Signed-off-by: Lucas De Marchi Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210430223808.1078010-3-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 5066d8587bf6..42ec103cffe1 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11210,15 +11210,9 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) icl_dsi_init(dev_priv); } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { - /* - * FIXME: Broxton doesn't support port detection via the - * DDI_BUF_CTL_A or SFUSE_STRAP registers, find another way to - * detect the ports. - */ intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); - vlv_dsi_init(dev_priv); } else if (HAS_DDI(dev_priv)) { int found; -- cgit From 097d9e902068426e335ba7c0115f2c8ae7f9d08e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 30 Apr 2021 15:38:07 -0700 Subject: drm/i915/display: remove strap checks from gen 9 Direction on gen9+ was to stop reading the straps and only rely on the VBT for marking the port presence. This happened while dealing with WaIgnoreDDIAStrap and instead of using it as a WA, it should now be the normal flow. See commit 885d3e5b6f08 ("drm/i915/display: fix comment on skl straps"). For gen 10 it's hard to say if this will work or not since I can't test it, so leave it with the same behavior as before. For PCH_TGP we should still rely on the VBT to make ports E and F not available. v2 (Ville): - use display ver >= 9 to make it consistent with the rest of the driver instead of checking for == 9 - also handle CNL and only initialize port F if it is IS_CNL_WITH_PORT_F. Eventually CNL may be removed, but while it isn't let's keep it consistent everywhere Signed-off-by: Lucas De Marchi Reviewed-by: Anusha Srivatsa Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210430223808.1078010-4-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 38 ++++++++++------------------ 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 42ec103cffe1..d63775a8a8c6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11214,34 +11214,27 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); vlv_dsi_init(dev_priv); + } else if (DISPLAY_VER(dev_priv) >= 9) { + intel_ddi_init(dev_priv, PORT_A); + intel_ddi_init(dev_priv, PORT_B); + intel_ddi_init(dev_priv, PORT_C); + intel_ddi_init(dev_priv, PORT_D); + intel_ddi_init(dev_priv, PORT_E); + + if (IS_CNL_WITH_PORT_F(dev_priv)) + intel_ddi_init(dev_priv, PORT_F); } else if (HAS_DDI(dev_priv)) { - int found; + u32 found; if (intel_ddi_crt_present(dev_priv)) intel_crt_init(dev_priv); - /* - * Haswell uses DDI functions to detect digital outputs. - * On SKL pre-D0 the strap isn't connected. Later SKUs may or - * may not have it - it was supposed to be fixed by the same - * time we stopped using straps. Assume it's there. - */ + /* Haswell uses DDI functions to detect digital outputs. */ found = intel_de_read(dev_priv, DDI_BUF_CTL(PORT_A)) & DDI_INIT_DISPLAY_DETECTED; - /* WaIgnoreDDIAStrap: skl */ - if (found || DISPLAY_VER(dev_priv) == 9) + if (found) intel_ddi_init(dev_priv, PORT_A); - /* DDI B, C, D, and F detection is indicated by the SFUSE_STRAP - * register */ - if (HAS_PCH_TGP(dev_priv)) { - /* W/A due to lack of STRAP config on TGP PCH*/ - found = (SFUSE_STRAP_DDIB_DETECTED | - SFUSE_STRAP_DDIC_DETECTED | - SFUSE_STRAP_DDID_DETECTED); - } else { - found = intel_de_read(dev_priv, SFUSE_STRAP); - } - + found = intel_de_read(dev_priv, SFUSE_STRAP); if (found & SFUSE_STRAP_DDIB_DETECTED) intel_ddi_init(dev_priv, PORT_B); if (found & SFUSE_STRAP_DDIC_DETECTED) @@ -11250,11 +11243,6 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_D); if (found & SFUSE_STRAP_DDIF_DETECTED) intel_ddi_init(dev_priv, PORT_F); - /* - * On SKL we don't have a way to detect DDI-E so we rely on VBT. - */ - if (DISPLAY_VER(dev_priv) == 9) - intel_ddi_init(dev_priv, PORT_E); } else if (HAS_PCH_SPLIT(dev_priv)) { int found; -- cgit From 5a9d38b20a5a7b3ce335d059963a8c3e92b1ba3c Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 30 Apr 2021 15:38:08 -0700 Subject: drm/i915/display: hide workaround for broken vbt in intel_bios.c Instead of poluting the normal code path in intel_display.c, make intel_bios.c handle the brokenness of the VBT. Signed-off-by: Lucas De Marchi Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210430223808.1078010-5-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/display/intel_bios.c | 20 ++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_display.c | 14 ++------------ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index befab891a6b9..a783c5a40934 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1852,6 +1852,19 @@ intel_bios_encoder_supports_edp(const struct intel_bios_encoder_data *devdata) devdata->child.device_type & DEVICE_TYPE_INTERNAL_CONNECTOR; } +static bool is_port_valid(struct drm_i915_private *i915, enum port port) +{ + /* + * On some ICL/CNL SKUs port F is not present, but broken VBTs mark + * the port as present. Only try to initialize port F for the + * SKUs that may actually have it. + */ + if (port == PORT_F && (IS_ICELAKE(i915) || IS_CANNONLAKE(i915))) + return IS_ICL_WITH_PORT_F(i915) || IS_CNL_WITH_PORT_F(i915); + + return true; +} + static void parse_ddi_port(struct drm_i915_private *i915, struct intel_bios_encoder_data *devdata) { @@ -1865,6 +1878,13 @@ static void parse_ddi_port(struct drm_i915_private *i915, if (port == PORT_NONE) return; + if (!is_port_valid(i915, port)) { + drm_dbg_kms(&i915->drm, + "VBT reports port %c as supported, but that can't be true: skipping\n", + port_name(port)); + return; + } + info = &i915->vbt.ddi_port_info[port]; if (info->devdata) { diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index d63775a8a8c6..fc7e18ff680a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11199,15 +11199,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_C); intel_ddi_init(dev_priv, PORT_D); intel_ddi_init(dev_priv, PORT_E); - - /* - * On some ICL SKUs port F is not present, but broken VBTs mark - * the port as present. Only try to initialize port F for the - * SKUs that may actually have it. - */ - if (IS_ICL_WITH_PORT_F(dev_priv)) - intel_ddi_init(dev_priv, PORT_F); - + intel_ddi_init(dev_priv, PORT_F); icl_dsi_init(dev_priv); } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { intel_ddi_init(dev_priv, PORT_A); @@ -11220,9 +11212,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_C); intel_ddi_init(dev_priv, PORT_D); intel_ddi_init(dev_priv, PORT_E); - - if (IS_CNL_WITH_PORT_F(dev_priv)) - intel_ddi_init(dev_priv, PORT_F); + intel_ddi_init(dev_priv, PORT_F); } else if (HAS_DDI(dev_priv)) { u32 found; -- cgit From d9ff1096a840dddea3d5cfa2149ff7da9f499fb2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 4 May 2021 22:26:29 +0200 Subject: usb: musb: Fix an error message 'ret' is known to be 0 here. Initialize 'ret' with the expected error code before using it. Fixes: 0990366bab3c ("usb: musb: Add support for MediaTek musb controller") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/69f514dc7134e3c917cad208e73cc650cb9e2bd6.1620159879.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/mediatek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/mediatek.c b/drivers/usb/musb/mediatek.c index eebeadd26946..6b92d037d8fc 100644 --- a/drivers/usb/musb/mediatek.c +++ b/drivers/usb/musb/mediatek.c @@ -518,8 +518,8 @@ static int mtk_musb_probe(struct platform_device *pdev) glue->xceiv = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2); if (IS_ERR(glue->xceiv)) { - dev_err(dev, "fail to getting usb-phy %d\n", ret); ret = PTR_ERR(glue->xceiv); + dev_err(dev, "fail to getting usb-phy %d\n", ret); goto err_unregister_usb_phy; } -- cgit From 28ec344bb8911bb0d4910456b22ba0dd4f662521 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 5 May 2021 17:44:22 -0700 Subject: usb: typec: tcpm: Don't block probing of consumers of "connector" nodes fw_devlink expects DT device nodes with "compatible" property to have struct devices created for them. Since the connector node might not be populated as a device, mark it as such so that fw_devlink knows not to wait on this fwnode being populated as a struct device. Without this patch, USB functionality can be broken on some boards. Fixes: f7514a663016 ("of: property: fw_devlink: Add support for remote-endpoint") Reported-by: John Stultz Tested-by: John Stultz Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20210506004423.345199-1-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 3 ++- drivers/usb/typec/tcpm/tcpm.c | 9 +++++++++ include/linux/fwnode.h | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 4a8bf8cda52b..628e33939aca 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -150,7 +150,7 @@ void fwnode_links_purge(struct fwnode_handle *fwnode) fwnode_links_purge_consumers(fwnode); } -static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) +void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) { struct fwnode_handle *child; @@ -164,6 +164,7 @@ static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) fwnode_for_each_available_child_node(fwnode, child) fw_devlink_purge_absent_suppliers(child); } +EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers); #ifdef CONFIG_SRCU static DEFINE_MUTEX(device_links_lock); diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index c4fdc00a3bc8..bffa342d4e38 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5754,6 +5754,15 @@ static int tcpm_fw_get_caps(struct tcpm_port *port, if (!fwnode) return -EINVAL; + /* + * This fwnode has a "compatible" property, but is never populated as a + * struct device. Instead we simply parse it to read the properties. + * This it breaks fw_devlink=on. To maintain backward compatibility + * with existing DT files, we work around this by deleting any + * fwnode_links to/from this fwnode. + */ + fw_devlink_purge_absent_suppliers(fwnode); + /* USB data support is optional */ ret = fwnode_property_read_string(fwnode, "data-role", &cap_str); if (ret == 0) { diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index ed4e67a7ff1c..59828516ebaf 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -187,5 +187,6 @@ extern u32 fw_devlink_get_flags(void); extern bool fw_devlink_is_strict(void); int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); void fwnode_links_purge(struct fwnode_handle *fwnode); +void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode); #endif -- cgit From 94dfec48fca756cef90263a03e81f24dae24a5c6 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 29 Apr 2021 00:29:50 +0200 Subject: drm/imx: Add 8 pixel alignment fix Some standard resolutions like 1366x768 do not work properly with i.MX6 SoCs, since the horizontal resolution needs to be aligned to 8 pixels (so 1360x768 or 1368x768 would work). This patch allocates framebuffers allocated to 8 pixels. The extra time required to send the extra pixels are removed from the blank time. In order to expose the correct display size to userspace, the stride is increased without increasing the width. Without this patch systems with this display resolution hang indefinitely during boot up. Suggested-by: Boris Brezillon Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20210428222953.235280-3-sebastian.reichel@collabora.com Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/imx-drm-core.c | 19 ++++++++++++++++++- drivers/gpu/drm/imx/imx-ldb.c | 5 +++++ drivers/gpu/drm/imx/ipuv3-crtc.c | 11 ++++++++++- drivers/gpu/drm/imx/ipuv3-plane.c | 19 +++++++++++++++---- drivers/gpu/ipu-v3/ipu-dc.c | 5 +++++ drivers/gpu/ipu-v3/ipu-di.c | 7 +++++++ 6 files changed, 60 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index be6001d8c984..3709d343784a 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -147,9 +147,26 @@ static const struct drm_ioctl_desc imx_drm_ioctls[] = { /* none so far */ }; +static int imx_drm_dumb_create(struct drm_file *file_priv, + struct drm_device *drm, + struct drm_mode_create_dumb *args) +{ + u32 width = args->width; + int ret; + + args->width = ALIGN(width, 8); + + ret = drm_gem_cma_dumb_create(file_priv, drm, args); + if (ret) + return ret; + + args->width = width; + return ret; +} + static const struct drm_driver imx_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, - DRM_GEM_CMA_DRIVER_OPS, + DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(imx_drm_dumb_create), .ioctls = imx_drm_ioctls, .num_ioctls = ARRAY_SIZE(imx_drm_ioctls), .fops = &imx_drm_driver_fops, diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index ffdc492c5bc5..53132ddf9587 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -274,6 +274,11 @@ imx_ldb_encoder_atomic_mode_set(struct drm_encoder *encoder, "%s: mode exceeds 85 MHz pixel clock\n", __func__); } + if (!IS_ALIGNED(mode->hdisplay, 8)) { + dev_warn(ldb->dev, + "%s: hdisplay does not align to 8 byte\n", __func__); + } + if (dual) { serial_clk = 3500UL * mode->clock; imx_ldb_set_clock(ldb, mux, 0, serial_clk, di_clk); diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index e6431a227feb..9c8829f945b2 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -305,10 +305,19 @@ static void ipu_crtc_mode_set_nofb(struct drm_crtc *crtc) sig_cfg.vsync_pin = imx_crtc_state->di_vsync_pin; drm_display_mode_to_videomode(mode, &sig_cfg.mode); + if (!IS_ALIGNED(sig_cfg.mode.hactive, 8)) { + unsigned int new_hactive = ALIGN(sig_cfg.mode.hactive, 8); + + dev_warn(ipu_crtc->dev, "8-pixel align hactive %d -> %d\n", + sig_cfg.mode.hactive, new_hactive); + + sig_cfg.mode.hfront_porch = new_hactive - sig_cfg.mode.hactive; + sig_cfg.mode.hactive = new_hactive; + } ipu_dc_init_sync(ipu_crtc->dc, ipu_crtc->di, mode->flags & DRM_MODE_FLAG_INTERLACE, - imx_crtc_state->bus_format, mode->hdisplay); + imx_crtc_state->bus_format, sig_cfg.mode.hactive); ipu_di_init_sync_panel(ipu_crtc->di, &sig_cfg); } diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index fc8f4834ed7b..9a7150ac34a6 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -30,6 +30,11 @@ to_ipu_plane_state(struct drm_plane_state *p) return container_of(p, struct ipu_plane_state, base); } +static unsigned int ipu_src_rect_width(const struct drm_plane_state *state) +{ + return ALIGN(drm_rect_width(&state->src) >> 16, 8); +} + static inline struct ipu_plane *to_ipu_plane(struct drm_plane *p) { return container_of(p, struct ipu_plane, base); @@ -440,6 +445,12 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, if (old_fb && fb->pitches[0] != old_fb->pitches[0]) crtc_state->mode_changed = true; + if (ALIGN(fb->width, 8) * fb->format->cpp[0] > + fb->pitches[0] + fb->offsets[0]) { + dev_warn(dev, "pitch is not big enough for 8 pixels alignment"); + return -EINVAL; + } + switch (fb->format->format) { case DRM_FORMAT_YUV420: case DRM_FORMAT_YVU420: @@ -615,7 +626,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, if (ipu_state->use_pre) { axi_id = ipu_chan_assign_axi_id(ipu_plane->dma); ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, - drm_rect_width(&new_state->src) >> 16, + ipu_src_rect_width(new_state), drm_rect_height(&new_state->src) >> 16, fb->pitches[0], fb->format->format, fb->modifier, &eba); @@ -648,9 +659,9 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, break; } - ipu_dmfc_config_wait4eot(ipu_plane->dmfc, drm_rect_width(dst)); + ipu_dmfc_config_wait4eot(ipu_plane->dmfc, ALIGN(drm_rect_width(dst), 8)); - width = drm_rect_width(&new_state->src) >> 16; + width = ipu_src_rect_width(new_state); height = drm_rect_height(&new_state->src) >> 16; info = drm_format_info(fb->format->format); ipu_calculate_bursts(width, info->cpp[0], fb->pitches[0], @@ -715,7 +726,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, ipu_cpmem_zero(ipu_plane->alpha_ch); ipu_cpmem_set_resolution(ipu_plane->alpha_ch, - drm_rect_width(&new_state->src) >> 16, + ipu_src_rect_width(new_state), drm_rect_height(&new_state->src) >> 16); ipu_cpmem_set_format_passthrough(ipu_plane->alpha_ch, 8); ipu_cpmem_set_high_priority(ipu_plane->alpha_ch); diff --git a/drivers/gpu/ipu-v3/ipu-dc.c b/drivers/gpu/ipu-v3/ipu-dc.c index 34b4075a6a8e..ca96b235491a 100644 --- a/drivers/gpu/ipu-v3/ipu-dc.c +++ b/drivers/gpu/ipu-v3/ipu-dc.c @@ -167,6 +167,11 @@ int ipu_dc_init_sync(struct ipu_dc *dc, struct ipu_di *di, bool interlaced, dc->di = ipu_di_get_num(di); + if (!IS_ALIGNED(width, 8)) { + dev_warn(priv->dev, + "%s: hactive does not align to 8 byte\n", __func__); + } + map = ipu_bus_format_to_map(bus_format); /* diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c index e617f60afeea..666223c6bec4 100644 --- a/drivers/gpu/ipu-v3/ipu-di.c +++ b/drivers/gpu/ipu-v3/ipu-di.c @@ -506,6 +506,13 @@ int ipu_di_adjust_videomode(struct ipu_di *di, struct videomode *mode) { u32 diff; + if (!IS_ALIGNED(mode->hactive, 8) && + mode->hfront_porch < ALIGN(mode->hactive, 8) - mode->hactive) { + dev_err(di->ipu->dev, "hactive %d is not aligned to 8 and front porch is too small to compensate\n", + mode->hactive); + return -EINVAL; + } + if (mode->vfront_porch >= 2) return 0; -- cgit From 11e3c676683c340966b9467da7d09080e522e181 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Fri, 26 Mar 2021 11:15:09 +0800 Subject: drm/imx: ipuv3-plane: Remove two unnecessary export symbols The ipu_plane_disable_deferred() and ipu_planes_assign_pre() functions have not been used by any other modules but only imxdrm itself internally since imxdrm and imx-ipuv3-crtc were merged in one module. So, this patch removes export symbols for the two functions. Fixes: 3d1df96ad468 (drm/imx: merge imx-drm-core and ipuv3-crtc in one module) Signed-off-by: Liu Ying Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index 9a7150ac34a6..9c81cf708edc 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -294,7 +294,6 @@ void ipu_plane_disable_deferred(struct drm_plane *plane) ipu_plane_disable(ipu_plane, false); } } -EXPORT_SYMBOL_GPL(ipu_plane_disable_deferred); static void ipu_plane_state_reset(struct drm_plane *plane) { @@ -857,7 +856,6 @@ int ipu_planes_assign_pre(struct drm_device *dev, return 0; } -EXPORT_SYMBOL_GPL(ipu_planes_assign_pre); struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu, int dma, int dp, unsigned int possible_crtcs, -- cgit From 8370e5b093080c03cf89f7ebf0bef6984545429e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 13:01:36 +0300 Subject: hwmon: (ltc2992) Put fwnode in error case during ->probe() In each iteration fwnode_for_each_available_child_node() bumps a reference counting of a loop variable followed by dropping in on a next iteration, Since in error case the loop is broken, we have to drop a reference count by ourselves. Do it for port_fwnode in error case during ->probe(). Fixes: b0bd407e94b0 ("hwmon: (ltc2992) Add support") Cc: Alexandru Tachici Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510100136.3303142-1-andy.shevchenko@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ltc2992.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index 4382105bf142..2a4bed0ab226 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c @@ -900,11 +900,15 @@ static int ltc2992_parse_dt(struct ltc2992_state *st) fwnode_for_each_available_child_node(fwnode, child) { ret = fwnode_property_read_u32(child, "reg", &addr); - if (ret < 0) + if (ret < 0) { + fwnode_handle_put(child); return ret; + } - if (addr > 1) + if (addr > 1) { + fwnode_handle_put(child); return -EINVAL; + } ret = fwnode_property_read_u32(child, "shunt-resistor-micro-ohms", &val); if (!ret) -- cgit From 17b9a94656fe19aef3647c4f93d93be51697ceb1 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 10 May 2021 16:59:27 +0200 Subject: drm/imx: ipuv3-plane: fix PRG modifiers after drm managed resource conversion The conversion to drm managed resources introduced two bugs: the plane is now always initialized with the linear-only list, while the list with the Vivante GPU modifiers should have been used when the PRG/PRE engines are present. This masked another issue, as ipu_plane_format_mod_supported() is now called before the private plane data is set up, so if a non-linear modifier is supplied in the plane modifier list, we run into a NULL pointer dereference checking for the PRG presence. To fix this just remove the check from this function, as we know that it will only be called with a non-linear modifier, if the plane init code has already determined that the PRG/PRE is present. Fixes: 699e7e543f1a ("drm/imx: ipuv3-plane: use drm managed resources") Signed-off-by: Lucas Stach Link: https://lore.kernel.org/r/20210510145927.988661-1-l.stach@pengutronix.de Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index 9c81cf708edc..544d8eaf7c36 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -349,10 +349,11 @@ static bool ipu_plane_format_mod_supported(struct drm_plane *plane, if (modifier == DRM_FORMAT_MOD_LINEAR) return true; - /* without a PRG there are no supported modifiers */ - if (!ipu_prg_present(ipu)) - return false; - + /* + * Without a PRG the possible modifiers list only includes the linear + * modifier, so we always take the early return from this function and + * only end up here if the PRG is present. + */ return ipu_prg_format_supported(ipu, format, modifier); } @@ -878,6 +879,10 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu, formats = ipu_plane_rgb_formats; format_count = ARRAY_SIZE(ipu_plane_rgb_formats); } + + if (ipu_prg_present(ipu)) + modifiers = pre_format_modifiers; + ipu_plane = drmm_universal_plane_alloc(dev, struct ipu_plane, base, possible_crtcs, &ipu_plane_funcs, formats, format_count, modifiers, @@ -892,9 +897,6 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu, ipu_plane->dma = dma; ipu_plane->dp_flow = dp; - if (ipu_prg_present(ipu)) - modifiers = pre_format_modifiers; - drm_plane_helper_add(&ipu_plane->base, &ipu_plane_helper_funcs); if (dp == IPU_DP_FLOW_SYNC_BG || dp == IPU_DP_FLOW_SYNC_FG) -- cgit From 4618cb7903d6e37690fff85a47126a4917e06495 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 21 Jul 2020 11:52:18 +0200 Subject: gpu: ipu-v3: Add Rec.709 limited range support to DP Add YCbCr encoding and quantization range parameters to ipu_dp_setup_channel() and configure the CSC DP matrix accordingly. Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 9 ++++++--- drivers/gpu/ipu-v3/ipu-dp.c | 25 ++++++++++++++++++++++--- include/video/imx-ipu-v3.h | 2 ++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index 544d8eaf7c36..9d8f1b65ac0d 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -651,11 +651,14 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, ics = ipu_drm_fourcc_to_colorspace(fb->format->format); switch (ipu_plane->dp_flow) { case IPU_DP_FLOW_SYNC_BG: - ipu_dp_setup_channel(ipu_plane->dp, ics, IPUV3_COLORSPACE_RGB); + ipu_dp_setup_channel(ipu_plane->dp, DRM_COLOR_YCBCR_BT601, + DRM_COLOR_YCBCR_LIMITED_RANGE, ics, + IPUV3_COLORSPACE_RGB); break; case IPU_DP_FLOW_SYNC_FG: - ipu_dp_setup_channel(ipu_plane->dp, ics, - IPUV3_COLORSPACE_UNKNOWN); + ipu_dp_setup_channel(ipu_plane->dp, DRM_COLOR_YCBCR_BT601, + DRM_COLOR_YCBCR_LIMITED_RANGE, ics, + IPUV3_COLORSPACE_UNKNOWN); break; } diff --git a/drivers/gpu/ipu-v3/ipu-dp.c b/drivers/gpu/ipu-v3/ipu-dp.c index 8f67e985f26a..6a558205db96 100644 --- a/drivers/gpu/ipu-v3/ipu-dp.c +++ b/drivers/gpu/ipu-v3/ipu-dp.c @@ -10,6 +10,7 @@ #include #include +#include #include