From 7506203089dceb1d9e1f35d37ad2e46d44798a6d Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 27 Jul 2023 14:45:59 -0400 Subject: intel_idle: Use __update_spec_ctrl() in intel_idle_ibrs() When intel_idle_ibrs() is called, it modifies the SPEC_CTRL MSR to 0 in order disable IBRS. However, the new MSR value isn't reflected in x86_spec_ctrl_current which is at odd with the other code that keep track of its state in that percpu variable. Use the new __update_spec_ctrl() to have the x86_spec_ctrl_current percpu value properly updated. Since spec-ctrl.h includes both msr.h and nospec-branch.h, we can remove those from the include file list. Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Acked-by: Rafael J. Wysocki Cc: Linus Torvalds Link: https://lore.kernel.org/r/20230727184600.26768-4-longman@redhat.com --- drivers/idle/intel_idle.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index ea5a6a14c553..86ac9a441f85 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -53,9 +53,8 @@ #include #include #include -#include #include -#include +#include #include #define INTEL_IDLE_VERSION "0.5.1" @@ -182,12 +181,12 @@ static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, int ret; if (smt_active) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + __update_spec_ctrl(0); ret = __intel_idle(dev, drv, index); if (smt_active) - native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); + __update_spec_ctrl(spec_ctrl); return ret; } -- cgit From aa1567a7e6440b8c3af4b0d8a8219d8fc5028c5f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 27 Jul 2023 14:46:00 -0400 Subject: intel_idle: Add ibrs_off module parameter to force-disable IBRS Commit bf5835bcdb96 ("intel_idle: Disable IBRS during long idle") disables IBRS when the cstate is 6 or lower. However, there are some use cases where a customer may want to use max_cstate=1 to lower latency. Such use cases will suffer from the performance degradation caused by the enabling of IBRS in the sibling idle thread. Add a "ibrs_off" module parameter to force disable IBRS and the CPUIDLE_FLAG_IRQ_ENABLE flag if set. In the case of a Skylake server with max_cstate=1, this new ibrs_off option will likely increase the IRQ response latency as IRQ will now be disabled. When running SPECjbb2015 with cstates set to C1 on a Skylake system. First test when the kernel is booted with: "intel_idle.ibrs_off": max-jOPS = 117828, critical-jOPS = 66047 Then retest when the kernel is booted without the "intel_idle.ibrs_off" added: max-jOPS = 116408, critical-jOPS = 58958 That means booting with "intel_idle.ibrs_off" improves performance by: max-jOPS: +1.2%, which could be considered noise range. critical-jOPS: +12%, which is definitely a solid improvement. The admin-guide/pm/intel_idle.rst file is updated to add a description about the new "ibrs_off" module parameter. Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Acked-by: Rafael J. Wysocki Cc: Linus Torvalds Link: https://lore.kernel.org/r/20230727184600.26768-5-longman@redhat.com --- drivers/idle/intel_idle.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 86ac9a441f85..dcda0afecfc5 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -68,6 +68,7 @@ static int max_cstate = CPUIDLE_STATE_MAX - 1; static unsigned int disabled_states_mask __read_mostly; static unsigned int preferred_states_mask __read_mostly; static bool force_irq_on __read_mostly; +static bool ibrs_off __read_mostly; static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; @@ -1852,11 +1853,13 @@ static void state_update_enter_method(struct cpuidle_state *state, int cstate) } if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && - state->flags & CPUIDLE_FLAG_IBRS) { + ((state->flags & CPUIDLE_FLAG_IBRS) || ibrs_off)) { /* * IBRS mitigation requires that C-states are entered * with interrupts disabled. */ + if (ibrs_off && (state->flags & CPUIDLE_FLAG_IRQ_ENABLE)) + state->flags &= ~CPUIDLE_FLAG_IRQ_ENABLE; WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); state->enter = intel_idle_ibrs; return; @@ -2175,3 +2178,9 @@ MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); * 'CPUIDLE_FLAG_INIT_XSTATE' and 'CPUIDLE_FLAG_IBRS' flags. */ module_param(force_irq_on, bool, 0444); +/* + * Force the disabling of IBRS when X86_FEATURE_KERNEL_IBRS is on and + * CPUIDLE_FLAG_IRQ_ENABLE isn't set. + */ +module_param(ibrs_off, bool, 0444); +MODULE_PARM_DESC(ibrs_off, "Disable IBRS when idle"); -- cgit