Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Cross-merge networking fixes after downstream PR (net-6.13-rc2). No conflicts or adjacent changes. Signed-off-by: Jakub Kicinski <kuba@kernel.org>
author: Jakub Kicinski <kuba@kernel.org> 2024-12-05 11:48:58 -0800
committer: Jakub Kicinski <kuba@kernel.org> 2024-12-05 11:50:14 -0800
commit: 302cc446cbd92aadff72a647cebc13d5634f8342 (patch)
tree: 759fe019d2e1c82ca8590f34c7ba46ab9ddada6e /drivers/gpu/drm/amd/display
parent: da4fa00abe5674d3d165cfd8032c740e8aab4d3b (diff)
parent: 896d8946da97332d4dc80fa1937d8dd6b1c35ad4 (diff)
19 files changed, 185 insertions, 70 deletions
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index df17e79c45c7..11e3f2f3b174 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -7,20 +7,21 @@ menu "Display Engine Configuration"
 config DRM_AMD_DC
 	bool "AMD DC - Enable new display engine"
 	default y
-	depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64
+	depends on BROKEN || !CC_IS_CLANG || ARM64 || LOONGARCH || RISCV || SPARC64 || X86_64
 	select SND_HDA_COMPONENT if SND_HDA_CORE
 	# !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752
-	select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || RISCV))
+	select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || LOONGARCH || RISCV))
 	help
 	  Choose this option if you want to use the new display engine
 	  support for AMDGPU. This adds required support for Vega and
 	  Raven ASICs.
 
-	  calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64)
-	  architectures built with Clang (all released versions), whereby the stack
-	  frame gets blown up to well over 5k.  This would cause an immediate kernel
-	  panic on most architectures.  We'll revert this when the following bug report
-	  has been resolved: https://github.com/llvm/llvm-project/issues/41896.
+	  calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 ||
+	  ARM64 || LOONGARCH || RISCV) architectures built with Clang (all released
+	  versions), whereby the stack frame gets blown up to well over 5k.  This
+	  would cause an immediate kernel panic on most architectures.  We'll revert
+	  this when the following bug report has been resolved:
+	  https://github.com/llvm/llvm-project/issues/41896.
 
 config DRM_AMD_DC_FP
 	def_bool n
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 7872c6cabb14..1dd26d5df6b9 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3141,7 +3141,10 @@ static void restore_planes_and_stream_state(
 		return;
 
 	for (i = 0; i < status->plane_count; i++) {
+		/* refcount will always be valid, restore everything else */
+		struct kref refcount = status->plane_states[i]->refcount;
 		*status->plane_states[i] = scratch->plane_states[i];
+		status->plane_states[i]->refcount = refcount;
 	}
 	*stream = scratch->stream_state;
 }
@@ -6097,11 +6100,11 @@ struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state
 {
 	struct dc_power_profile profile = { 0 };
 
-	if (!context || !context->clk_mgr || !context->clk_mgr->ctx || !context->clk_mgr->ctx->dc)
+	profile.power_level = !context->bw_ctx.bw.dcn.clk.p_state_change_support;
+	if (!context->clk_mgr || !context->clk_mgr->ctx || !context->clk_mgr->ctx->dc)
 		return profile;
 	struct dc *dc = context->clk_mgr->ctx->dc;
 
-
 	if (dc->res_pool->funcs->get_power_profile)
 		profile.power_level = dc->res_pool->funcs->get_power_profile(context);
 	return profile;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 0419ee7f22a5..252af83e34a5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -898,6 +898,9 @@ void hwss_setup_dpp(union block_sequence_params *params)
 	struct dpp *dpp = pipe_ctx->plane_res.dpp;
 	struct dc_plane_state *plane_state = pipe_ctx->plane_state;
 
+	if (!plane_state)
+		return;
+
 	if (dpp && dpp->funcs->dpp_setup) {
 		// program the input csc
 		dpp->funcs->dpp_setup(dpp,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 33125b95c3a1..619fad17de55 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1501,6 +1501,10 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
 		res = spl_calculate_scaler_params(spl_in, spl_out);
 		// Convert respective out params from SPL to scaler data
 		translate_SPL_out_params_to_pipe_ctx(pipe_ctx, spl_out);
+
+		/* Ignore scaler failure if pipe context plane is phantom plane */
+		if (!res && plane_state->is_phantom)
+			res = true;
 	} else {
 #endif
 	/* depends on h_active */
@@ -1571,6 +1575,10 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
 					&plane_state->scaling_quality);
 	}
 
+	/* Ignore scaler failure if pipe context plane is phantom plane */
+	if (!res && plane_state->is_phantom)
+		res = true;
+
 	if (res && (pipe_ctx->plane_res.scl_data.taps.v_taps != temp.v_taps ||
 		pipe_ctx->plane_res.scl_data.taps.h_taps != temp.h_taps ||
 		pipe_ctx->plane_res.scl_data.taps.v_taps_c != temp.v_taps_c ||
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index e143fab00a86..104051935884 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -55,7 +55,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.309"
+#define DC_VER "3.2.310"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
index 838d72eaa87f..b363f5360818 100644
--- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
@@ -1392,10 +1392,10 @@ static void dccg35_set_dtbclk_dto(
 
 		/* The recommended programming sequence to enable DTBCLK DTO to generate
 		 * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should
-		 * be set only after DTO is enabled
+		 * be set only after DTO is enabled.
+		 * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the
+		 * programming is handled in program_pix_clk() regardless, so it can be removed from here.
 		 */
-		REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
-				PIPE_DTO_SRC_SEL[params->otg_inst], 2);
 	} else {
 		switch (params->otg_inst) {
 		case 0:
@@ -1412,9 +1412,12 @@ static void dccg35_set_dtbclk_dto(
 			break;
 		}
 
-		REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
-				DTBCLK_DTO_ENABLE[params->otg_inst], 0,
-				PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1);
+		/**
+		 * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the
+		 * programming is handled in program_pix_clk() regardless, so it can be removed from here.
+		 */
+		REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+				DTBCLK_DTO_ENABLE[params->otg_inst], 0);
 
 		REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
 		REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
index 92e43a1e4dd4..601320b1be81 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
@@ -11,6 +11,7 @@
 
 #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4
+#define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
 
 const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)
 {
@@ -3886,6 +3887,10 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch
 #endif
 
 	*p->hw_debug5 = false;
+#ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
+	if (p->NumberOfActiveSurfaces > 1)
+		*p->hw_debug5 = true;
+#else
 	for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
 		if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1)
 			&& p->display_cfg->plane_descriptors[k].surface.dcc.enable
@@ -3901,6 +3906,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch
 		dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5);
 #endif
 	}
+#endif
 }
 
 static enum dml2_odm_mode DecideODMMode(unsigned int HActive,
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
index 6eccf0241d85..1ed21c1b86a5 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c
@@ -258,12 +258,25 @@ static unsigned int find_preferred_pipe_candidates(const struct dc_state *existi
 	 * However this condition comes with a caveat. We need to ignore pipes that will
 	 * require a change in OPP but still have the same stream id. For example during
 	 * an MPC to ODM transiton.
+	 *
+	 * Adding check to avoid pipe select on the head pipe by utilizing dc resource
+	 * helper function resource_get_primary_dpp_pipe and comparing the pipe index.
 	 */
 	if (existing_state) {
 		for (i = 0; i < pipe_count; i++) {
 			if (existing_state->res_ctx.pipe_ctx[i].stream && existing_state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) {
+				struct pipe_ctx *head_pipe =
+					resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ?
+						resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) :
+							NULL;
+
+				// we should always respect the head pipe from selection
+				if (head_pipe && head_pipe->pipe_idx == i)
+					continue;
 				if (existing_state->res_ctx.pipe_ctx[i].plane_res.hubp &&
-					existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i)
+					existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i &&
+						(existing_state->res_ctx.pipe_ctx[i].prev_odm_pipe ||
+						existing_state->res_ctx.pipe_ctx[i].next_odm_pipe))
 					continue;
 
 				preferred_pipe_candidates[num_preferred_candidates++] = i;
@@ -292,6 +305,14 @@ static unsigned int find_last_resort_pipe_candidates(const struct dc_state *exis
 	 */
 	if (existing_state) {
 		for (i  = 0; i < pipe_count; i++) {
+			struct pipe_ctx *head_pipe =
+				resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ?
+					resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) :
+						NULL;
+
+			// we should always respect the head pipe from selection
+			if (head_pipe && head_pipe->pipe_idx == i)
+				continue;
 			if ((existing_state->res_ctx.pipe_ctx[i].plane_res.hubp &&
 				existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) ||
 				existing_state->res_ctx.pipe_ctx[i].stream_res.tg)
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
index ebd5df1a36e8..d9aaebfa3a0a 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
@@ -1093,14 +1093,11 @@ static bool setup_dsc_config(
 	if (!is_dsc_possible)
 		goto done;
 
-	// Final decission: can we do DSC or not?
-	if (is_dsc_possible) {
-		// Fill out the rest of DSC settings
-		dsc_cfg->block_pred_enable = dsc_common_caps.is_block_pred_supported;
-		dsc_cfg->linebuf_depth = dsc_common_caps.lb_bit_depth;
-		dsc_cfg->version_minor = (dsc_common_caps.dsc_version & 0xf0) >> 4;
-		dsc_cfg->is_dp = dsc_sink_caps->is_dp;
-	}
+	/* Fill out the rest of DSC settings */
+	dsc_cfg->block_pred_enable = dsc_common_caps.is_block_pred_supported;
+	dsc_cfg->linebuf_depth = dsc_common_caps.lb_bit_depth;
+	dsc_cfg->version_minor = (dsc_common_caps.dsc_version & 0xf0) >> 4;
+	dsc_cfg->is_dp = dsc_sink_caps->is_dp;
 
 done:
 	if (!is_dsc_possible)
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h
index 4bd1dda07719..9fbd45c7dfef 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h
@@ -200,6 +200,7 @@ struct dcn_hubbub_registers {
 	uint32_t DCHUBBUB_ARB_FRAC_URG_BW_MALL_B;
 	uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL1;
 	uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL2;
+	uint32_t DCHUBBUB_CTRL_STATUS;
 };
 
 #define HUBBUB_REG_FIELD_LIST_DCN32(type) \
@@ -320,7 +321,12 @@ struct dcn_hubbub_registers {
 		type DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD;\
 		type DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD;\
 		type DCHUBBUB_TIMEOUT_DETECTION_EN;\
-		type DCHUBBUB_TIMEOUT_TIMER_RESET
+		type DCHUBBUB_TIMEOUT_TIMER_RESET;\
+		type ROB_UNDERFLOW_STATUS;\
+		type ROB_OVERFLOW_STATUS;\
+		type ROB_OVERFLOW_CLEAR;\
+		type DCHUBBUB_HW_DEBUG;\
+		type CSTATE_SWATH_CHK_GOOD_MODE
 
 #define HUBBUB_STUTTER_REG_FIELD_LIST(type) \
 		type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A;\
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h
index 036bb3e6c957..46d8f5c70750 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h
@@ -96,6 +96,7 @@ struct dcn20_hubbub {
 	unsigned int det1_size;
 	unsigned int det2_size;
 	unsigned int det3_size;
+	bool allow_sdpif_rate_limit_when_cstate_req;
 };
 
 void hubbub2_construct(struct dcn20_hubbub *hubbub,
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c
index 5d658e9bef64..92fab471b183 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c
@@ -1192,15 +1192,35 @@ static void dcn401_wait_for_det_update(struct hubbub *hubbub, int hubp_inst)
 	}
 }
 
-static void dcn401_program_timeout_thresholds(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs)
+static bool dcn401_program_arbiter(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower)
 {
 	struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
 
+	bool wm_pending = false;
+	uint32_t temp;
+
 	/* request backpressure and outstanding return threshold (unused)*/
 	//REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, arb_regs->req_stall_threshold);
 
 	/* P-State stall threshold */
 	REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, arb_regs->pstate_stall_threshold);
+
+	if (safe_to_lower || arb_regs->allow_sdpif_rate_limit_when_cstate_req > hubbub2->allow_sdpif_rate_limit_when_cstate_req) {
+		hubbub2->allow_sdpif_rate_limit_when_cstate_req = arb_regs->allow_sdpif_rate_limit_when_cstate_req;
+
+		/* only update the required bits */
+		REG_GET(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, &temp);
+		if (hubbub2->allow_sdpif_rate_limit_when_cstate_req) {
+			temp |= (1 << 5);
+		} else {
+			temp &= ~(1 << 5);
+		}
+		REG_UPDATE(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, temp);
+	} else {
+		wm_pending = true;
+	}
+
+	return wm_pending;
 }
 
 static const struct hubbub_funcs hubbub4_01_funcs = {
@@ -1226,7 +1246,7 @@ static const struct hubbub_funcs hubbub4_01_funcs = {
 	.program_det_segments = dcn401_program_det_segments,
 	.program_compbuf_segments = dcn401_program_compbuf_segments,
 	.wait_for_det_update = dcn401_wait_for_det_update,
-	.program_timeout_thresholds = dcn401_program_timeout_thresholds,
+	.program_arbiter = dcn401_program_arbiter,
 };
 
 void hubbub401_construct(struct dcn20_hubbub *hubbub2,
diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h
index 5f1960722ebd..b1d9ea9d1c3d 100644
--- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h
@@ -128,7 +128,12 @@
 	HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, mask_sh),\
 	HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, mask_sh),\
 	HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_DETECTION_EN, mask_sh),\
-	HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_TIMER_RESET, mask_sh)
+	HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_TIMER_RESET, mask_sh),\
+	HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_UNDERFLOW_STATUS, mask_sh),\
+	HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_OVERFLOW_STATUS, mask_sh),\
+	HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_OVERFLOW_CLEAR, mask_sh),\
+	HUBBUB_SF(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, mask_sh),\
+	HUBBUB_SF(DCHUBBUB_CTRL_STATUS, CSTATE_SWATH_CHK_GOOD_MODE, mask_sh)
 
 bool hubbub401_program_urgent_watermarks(
 		struct hubbub *hubbub,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 05424a9af58b..b029ec1b26d3 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -1925,9 +1925,9 @@ static void dcn20_program_pipe(
 				dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size);
 	}
 
-	if (pipe_ctx->update_flags.raw ||
-	    (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) ||
-	    pipe_ctx->stream->update_flags.raw)
+	if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw ||
+	    pipe_ctx->plane_state->update_flags.raw ||
+	    pipe_ctx->stream->update_flags.raw))
 		dcn20_update_dchubp_dpp(dc, pipe_ctx, context);
 
 	if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable ||
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
index e8cc1bfa73f3..5de11e2837c0 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
@@ -1488,6 +1488,10 @@ void dcn401_prepare_bandwidth(struct dc *dc,
 					&context->bw_ctx.bw.dcn.watermarks,
 					dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
 					false);
+	/* update timeout thresholds */
+	if (hubbub->funcs->program_arbiter) {
+		dc->wm_optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false);
+	}
 
 	/* decrease compbuf size */
 	if (hubbub->funcs->program_compbuf_segments) {
@@ -1529,6 +1533,10 @@ void dcn401_optimize_bandwidth(
 					&context->bw_ctx.bw.dcn.watermarks,
 					dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
 					true);
+	/* update timeout thresholds */
+	if (hubbub->funcs->program_arbiter) {
+		hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, true);
+	}
 
 	if (dc->clk_mgr->dc_mode_softmax_enabled)
 		if (dc->clk_mgr->clks.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 &&
@@ -1554,11 +1562,6 @@ void dcn401_optimize_bandwidth(
 						pipe_ctx->dlg_regs.min_dst_y_next_start);
 		}
 	}
-
-	/* update timeout thresholds */
-	if (hubbub->funcs->program_timeout_thresholds) {
-		hubbub->funcs->program_timeout_thresholds(hubbub, &context->bw_ctx.bw.dcn.arb_regs);
-	}
 }
 
 void dcn401_fams2_global_control_lock(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
index 6c1d41c0f099..52b745667ef7 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
@@ -228,7 +228,7 @@ struct hubbub_funcs {
 	void (*program_det_segments)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg);
 	void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase);
 	void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst);
-	void (*program_timeout_thresholds)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs);
+	bool (*program_arbiter)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower);
 };
 
 struct hubbub {
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
index 7c8d61db153d..19568c359669 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h
@@ -612,7 +612,8 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context);
 	SR(DCHUBBUB_SDPIF_CFG1),                                                 \
 	SR(DCHUBBUB_MEM_PWR_MODE_CTRL),                                          \
 	SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL1),                                    \
-	SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL2)
+	SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL2),									 \
+	SR(DCHUBBUB_CTRL_STATUS)
 
 /* DCCG */
 
diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
index 614276200aa0..73a65913cb12 100644
--- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
+++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
@@ -99,7 +99,7 @@ static struct spl_rect calculate_plane_rec_in_timing_active(
 	 *
 	 * recout_x = 128 + round(plane_x * 2304 / 1920)
 	 * recout_w = 128 + round((plane_x + plane_w) * 2304 / 1920) - recout_x
-	 * recout_y = 0 + round(plane_y * 1440 / 1280)
+	 * recout_y = 0 + round(plane_y * 1440 / 1200)
 	 * recout_h = 0 + round((plane_y + plane_h) * 1440 / 1200) - recout_y
 	 *
 	 * NOTE: fixed point division is not error free. To reduce errors
@@ -739,14 +739,13 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in,
 			return SCL_MODE_SCALING_444_RGB_ENABLE;
 	}
 
-	/* Bypass YUV if at 1:1 with no ISHARP or if doing 2:1 YUV
-	 *  downscale without EASF
+	/*
+	 * Bypass YUV if Y is 1:1 with no ISHARP
+	 * Do not bypass UV at 1:1 for cositing to be applied
 	 */
-	if ((!enable_isharp) && (!enable_easf)) {
+	if (!enable_isharp) {
 		if (data->ratios.horz.value == one && data->ratios.vert.value == one)
 			return SCL_MODE_SCALING_420_LUMA_BYPASS;
-		if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one)
-			return SCL_MODE_SCALING_420_CHROMA_BYPASS;
 	}
 
 	return SCL_MODE_SCALING_420_YCBCR_ENABLE;
@@ -933,6 +932,7 @@ static bool spl_get_optimal_number_of_taps(
 	int min_taps_y, min_taps_c;
 	enum lb_memory_config lb_config;
 	bool skip_easf = false;
+	bool is_ycbcr = spl_dscl_is_video_format(spl_in->basic_in.format);
 
 	if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active &&
 		max_downscale_src_width != 0 &&
@@ -1074,10 +1074,9 @@ static bool spl_get_optimal_number_of_taps(
 
 	/* Sharpener requires scaler to be enabled, including for 1:1
 	 * Check if ISHARP can be enabled
-	 * If ISHARP is not enabled, for 1:1, set taps to 1 and disable
-	 *  EASF
-	 * For case of 2:1 YUV where chroma is 1:1, set taps to 1 if
-	 *  EASF is not enabled
+	 * If ISHARP is not enabled, set taps to 1 if ratio is 1:1
+	 *  except for chroma taps.  Keep previous taps so it can
+	 *  handle cositing
 	 */
 
 	*enable_isharp = spl_get_isharp_en(spl_in, spl_scratch);
@@ -1087,20 +1086,28 @@ static bool spl_get_optimal_number_of_taps(
 			spl_scratch->scl_data.taps.h_taps = 1;
 			spl_scratch->scl_data.taps.v_taps = 1;
 
-			if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c))
+			if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_ycbcr)
 				spl_scratch->scl_data.taps.h_taps_c = 1;
 
-			if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))
+			if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_ycbcr)
 				spl_scratch->scl_data.taps.v_taps_c = 1;
 
 			*enable_easf_v = false;
 			*enable_easf_h = false;
 		} else {
 			if ((!*enable_easf_h) &&
+				(IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz)))
+				spl_scratch->scl_data.taps.h_taps = 1;
+
+			if ((!*enable_easf_v) &&
+				(IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert)))
+				spl_scratch->scl_data.taps.v_taps = 1;
+
+			if ((!*enable_easf_h) && !is_ycbcr &&
 				(IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c)))
 				spl_scratch->scl_data.taps.h_taps_c = 1;
 
-			if ((!*enable_easf_v) &&
+			if ((!*enable_easf_v) && !is_ycbcr &&
 				(IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c)))
 				spl_scratch->scl_data.taps.v_taps_c = 1;
 		}
@@ -1111,8 +1118,7 @@ static bool spl_get_optimal_number_of_taps(
 static void spl_set_black_color_data(enum spl_pixel_format format,
 			struct scl_black_color *scl_black_color)
 {
-	bool ycbcr = format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN
-					&& format <= SPL_PIXEL_FORMAT_VIDEO_END;
+	bool ycbcr = spl_dscl_is_video_format(format);
 	if (ycbcr)	{
 		scl_black_color->offset_rgb_y = BLACK_OFFSET_RGB_Y;
 		scl_black_color->offset_rgb_cbcr = BLACK_OFFSET_CBCR;
@@ -1746,6 +1752,32 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data,
 	spl_set_blur_scale_data(dscl_prog_data, data);
 }
 
+/* Calculate recout, scaling ratio, and viewport, then get optimal number of taps */
+static bool spl_calculate_number_of_taps(struct spl_in *spl_in, struct spl_scratch *spl_scratch, struct spl_out *spl_out,
+	bool *enable_easf_v, bool *enable_easf_h, bool *enable_isharp)
+{
+	bool res = false;
+
+	memset(spl_scratch, 0, sizeof(struct spl_scratch));
+	spl_scratch->scl_data.h_active = spl_in->h_active;
+	spl_scratch->scl_data.v_active = spl_in->v_active;
+
+	// All SPL calls
+	/* recout calculation */
+	/* depends on h_active */
+	spl_calculate_recout(spl_in, spl_scratch, spl_out);
+	/* depends on pixel format */
+	spl_calculate_scaling_ratios(spl_in, spl_scratch, spl_out);
+	/* depends on scaling ratios and recout, does not calculate offset yet */
+	spl_calculate_viewport_size(spl_in, spl_scratch);
+
+	res = spl_get_optimal_number_of_taps(
+			  spl_in->basic_out.max_downscale_src_width, spl_in,
+			  spl_scratch, &spl_in->scaling_quality, enable_easf_v,
+			  enable_easf_h, enable_isharp);
+	return res;
+}
+
 /* Calculate scaler parameters */
 bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out)
 {
@@ -1760,23 +1792,9 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out)
 	bool enable_isharp = false;
 	const struct spl_scaler_data *data = &spl_scratch.scl_data;
 
-	memset(&spl_scratch, 0, sizeof(struct spl_scratch));
-	spl_scratch.scl_data.h_active = spl_in->h_active;
-	spl_scratch.scl_data.v_active = spl_in->v_active;
+	res = spl_calculate_number_of_taps(spl_in, &spl_scratch, spl_out,
+		&enable_easf_v, &enable_easf_h, &enable_isharp);
 
-	// All SPL calls
-	/* recout calculation */
-	/* depends on h_active */
-	spl_calculate_recout(spl_in, &spl_scratch, spl_out);
-	/* depends on pixel format */
-	spl_calculate_scaling_ratios(spl_in, &spl_scratch, spl_out);
-	/* depends on scaling ratios and recout, does not calculate offset yet */
-	spl_calculate_viewport_size(spl_in, &spl_scratch);
-
-	res = spl_get_optimal_number_of_taps(
-			  spl_in->basic_out.max_downscale_src_width, spl_in,
-			  &spl_scratch, &spl_in->scaling_quality, &enable_easf_v,
-			  &enable_easf_h, &enable_isharp);
 	/*
 	 * Depends on recout, scaling ratios, h_active and taps
 	 * May need to re-check lb size after this in some obscure scenario
@@ -1824,3 +1842,20 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out)
 
 	return res;
 }
+
+/* External interface to get number of taps only */
+bool spl_get_number_of_taps(struct spl_in *spl_in, struct spl_out *spl_out)
+{
+	bool res = false;
+	bool enable_easf_v = false;
+	bool enable_easf_h = false;
+	bool enable_isharp = false;
+	struct spl_scratch spl_scratch;
+	struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data;
+	const struct spl_scaler_data *data = &spl_scratch.scl_data;
+
+	res = spl_calculate_number_of_taps(spl_in, &spl_scratch, spl_out,
+		&enable_easf_v, &enable_easf_h, &enable_isharp);
+	spl_set_taps_data(dscl_prog_data, data);
+	return res;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h
index 205e59a2a8ee..02a2d6725ed5 100644
--- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h
+++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h
@@ -13,4 +13,6 @@
 
 bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out);
 
+bool spl_get_number_of_taps(struct spl_in *spl_in, struct spl_out *spl_out);
+
 #endif /* __DC_SPL_H__ */
author	Jakub Kicinski <kuba@kernel.org>	2024-12-05 11:48:58 -0800
committer	Jakub Kicinski <kuba@kernel.org>	2024-12-05 11:50:14 -0800
commit	302cc446cbd92aadff72a647cebc13d5634f8342 (patch)
tree	759fe019d2e1c82ca8590f34c7ba46ab9ddada6e /drivers/gpu/drm/amd/display
parent	da4fa00abe5674d3d165cfd8032c740e8aab4d3b (diff)
parent	896d8946da97332d4dc80fa1937d8dd6b1c35ad4 (diff)