summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/apic/apic.c4
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c6
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c9
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c1
-rw-r--r--arch/x86/kernel/cpu/mce/core.c7
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c18
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c6
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c117
-rw-r--r--arch/x86/kernel/cpu/topology.c2
-rw-r--r--arch/x86/kernel/fpu/core.c9
-rw-r--r--arch/x86/kernel/hw_breakpoint.c61
-rw-r--r--arch/x86/kernel/sev-es-shared.c4
-rw-r--r--arch/x86/kernel/sev-es.c14
-rw-r--r--arch/x86/kernel/smpboot.c20
-rw-r--r--arch/x86/kernel/step.c10
16 files changed, 180 insertions, 112 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6bd20c0de8bc..7f4c081f59f0 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -41,6 +41,7 @@
#include <asm/perf_event.h>
#include <asm/x86_init.h>
#include <linux/atomic.h>
+#include <asm/barrier.h>
#include <asm/mpspec.h>
#include <asm/i8259.h>
#include <asm/proto.h>
@@ -477,6 +478,9 @@ static int lapic_next_deadline(unsigned long delta,
{
u64 tsc;
+ /* This MSR is special and need a special fence: */
+ weak_wrmsr_fence();
+
tsc = rdtsc();
wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
return 0;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index df6adc5674c9..f4da9bb69a88 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -29,7 +29,8 @@ static void x2apic_send_IPI(int cpu, int vector)
{
u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
- x2apic_wrmsr_fence();
+ /* x2apic MSRs are special and need a special fence: */
+ weak_wrmsr_fence();
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
}
@@ -41,7 +42,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
unsigned long flags;
u32 dest;
- x2apic_wrmsr_fence();
+ /* x2apic MSRs are special and need a special fence: */
+ weak_wrmsr_fence();
local_irq_save(flags);
tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 0e4e81971567..6bde05a86b4e 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -43,7 +43,8 @@ static void x2apic_send_IPI(int cpu, int vector)
{
u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
- x2apic_wrmsr_fence();
+ /* x2apic MSRs are special and need a special fence: */
+ weak_wrmsr_fence();
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL);
}
@@ -54,7 +55,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
unsigned long this_cpu;
unsigned long flags;
- x2apic_wrmsr_fence();
+ /* x2apic MSRs are special and need a special fence: */
+ weak_wrmsr_fence();
local_irq_save(flags);
@@ -125,7 +127,8 @@ void __x2apic_send_IPI_shorthand(int vector, u32 which)
{
unsigned long cfg = __prepare_ICR(which, vector, 0);
- x2apic_wrmsr_fence();
+ /* x2apic MSRs are special and need a special fence: */
+ weak_wrmsr_fence();
native_x2apic_icr_write(cfg, 0);
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f8ca66f3d861..347a956f71ca 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -542,12 +542,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
u32 ecx;
ecx = cpuid_ecx(0x8000001e);
- nodes_per_socket = ((ecx >> 8) & 7) + 1;
+ __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1;
} else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
u64 value;
rdmsrl(MSR_FAM10H_NODE_ID, value);
- nodes_per_socket = ((value >> 3) & 7) + 1;
+ __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1;
}
if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 59a1e3ce3f14..816fdbec795a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1159,6 +1159,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1),
{}
};
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 13d3f1cbda17..e133ce1e562b 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1992,10 +1992,9 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
* that out because it's an indirect call. Annotate it.
*/
instrumentation_begin();
- trace_hardirqs_off_finish();
+
machine_check_vector(regs);
- if (regs->flags & X86_EFLAGS_IF)
- trace_hardirqs_on_prepare();
+
instrumentation_end();
irqentry_nmi_exit(regs, irq_state);
}
@@ -2004,7 +2003,9 @@ static __always_inline void exc_machine_check_user(struct pt_regs *regs)
{
irqentry_enter_from_user_mode(regs);
instrumentation_begin();
+
machine_check_vector(regs);
+
instrumentation_end();
irqentry_exit_to_user_mode(regs);
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f628e3dc150f..43b54bef5448 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -135,14 +135,32 @@ static void hv_machine_shutdown(void)
{
if (kexec_in_progress && hv_kexec_handler)
hv_kexec_handler();
+
+ /*
+ * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor
+ * corrupts the old VP Assist Pages and can crash the kexec kernel.
+ */
+ if (kexec_in_progress && hyperv_init_cpuhp > 0)
+ cpuhp_remove_state(hyperv_init_cpuhp);
+
+ /* The function calls stop_other_cpus(). */
native_machine_shutdown();
+
+ /* Disable the hypercall page when there is only 1 active CPU. */
+ if (kexec_in_progress)
+ hyperv_cleanup();
}
static void hv_machine_crash_shutdown(struct pt_regs *regs)
{
if (hv_crash_handler)
hv_crash_handler(regs);
+
+ /* The function calls crash_smp_send_stop(). */
native_machine_crash_shutdown(regs);
+
+ /* Disable the hypercall page when there is only 1 active CPU. */
+ hyperv_cleanup();
}
#endif /* CONFIG_KEXEC_CORE */
#endif /* CONFIG_HYPERV */
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 23ad8e953dfb..a29997e6cf9e 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
*repeat = 0;
*uniform = 1;
- /* Make end inclusive instead of exclusive */
- end--;
-
prev_match = MTRR_TYPE_INVALID;
for (i = 0; i < num_var_ranges; ++i) {
unsigned short start_state, end_state, inclusive;
@@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
int repeat;
u64 partial_end;
+ /* Make end inclusive instead of exclusive */
+ end--;
+
if (!mtrr_state_set)
return MTRR_TYPE_INVALID;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 29ffb95b25ff..460f3e0df106 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -525,89 +525,70 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp)
kfree(rdtgrp);
}
-struct task_move_callback {
- struct callback_head work;
- struct rdtgroup *rdtgrp;
-};
-
-static void move_myself(struct callback_head *head)
+static void _update_task_closid_rmid(void *task)
{
- struct task_move_callback *callback;
- struct rdtgroup *rdtgrp;
-
- callback = container_of(head, struct task_move_callback, work);
- rdtgrp = callback->rdtgrp;
-
/*
- * If resource group was deleted before this task work callback
- * was invoked, then assign the task to root group and free the
- * resource group.
+ * If the task is still current on this CPU, update PQR_ASSOC MSR.
+ * Otherwise, the MSR is updated when the task is scheduled in.
*/
- if (atomic_dec_and_test(&rdtgrp->waitcount) &&
- (rdtgrp->flags & RDT_DELETED)) {
- current->closid = 0;
- current->rmid = 0;
- rdtgroup_remove(rdtgrp);
- }
-
- if (unlikely(current->flags & PF_EXITING))
- goto out;
-
- preempt_disable();
- /* update PQR_ASSOC MSR to make resource group go into effect */
- resctrl_sched_in();
- preempt_enable();
+ if (task == current)
+ resctrl_sched_in();
+}
-out:
- kfree(callback);
+static void update_task_closid_rmid(struct task_struct *t)
+{
+ if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
+ smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
+ else
+ _update_task_closid_rmid(t);
}
static int __rdtgroup_move_task(struct task_struct *tsk,
struct rdtgroup *rdtgrp)
{
- struct task_move_callback *callback;
- int ret;
-
- callback = kzalloc(sizeof(*callback), GFP_KERNEL);
- if (!callback)
- return -ENOMEM;
- callback->work.func = move_myself;
- callback->rdtgrp = rdtgrp;
+ /* If the task is already in rdtgrp, no need to move the task. */
+ if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
+ tsk->rmid == rdtgrp->mon.rmid) ||
+ (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
+ tsk->closid == rdtgrp->mon.parent->closid))
+ return 0;
/*
- * Take a refcount, so rdtgrp cannot be freed before the
- * callback has been invoked.
+ * Set the task's closid/rmid before the PQR_ASSOC MSR can be
+ * updated by them.
+ *
+ * For ctrl_mon groups, move both closid and rmid.
+ * For monitor groups, can move the tasks only from
+ * their parent CTRL group.
*/
- atomic_inc(&rdtgrp->waitcount);
- ret = task_work_add(tsk, &callback->work, TWA_RESUME);
- if (ret) {
- /*
- * Task is exiting. Drop the refcount and free the callback.
- * No need to check the refcount as the group cannot be
- * deleted before the write function unlocks rdtgroup_mutex.
- */
- atomic_dec(&rdtgrp->waitcount);
- kfree(callback);
- rdt_last_cmd_puts("Task exited\n");
- } else {
- /*
- * For ctrl_mon groups move both closid and rmid.
- * For monitor groups, can move the tasks only from
- * their parent CTRL group.
- */
- if (rdtgrp->type == RDTCTRL_GROUP) {
- tsk->closid = rdtgrp->closid;
+
+ if (rdtgrp->type == RDTCTRL_GROUP) {
+ tsk->closid = rdtgrp->closid;
+ tsk->rmid = rdtgrp->mon.rmid;
+ } else if (rdtgrp->type == RDTMON_GROUP) {
+ if (rdtgrp->mon.parent->closid == tsk->closid) {
tsk->rmid = rdtgrp->mon.rmid;
- } else if (rdtgrp->type == RDTMON_GROUP) {
- if (rdtgrp->mon.parent->closid == tsk->closid) {
- tsk->rmid = rdtgrp->mon.rmid;
- } else {
- rdt_last_cmd_puts("Can't move task to different control group\n");
- ret = -EINVAL;
- }
+ } else {
+ rdt_last_cmd_puts("Can't move task to different control group\n");
+ return -EINVAL;
}
}
- return ret;
+
+ /*
+ * Ensure the task's closid and rmid are written before determining if
+ * the task is current that will decide if it will be interrupted.
+ */
+ barrier();
+
+ /*
+ * By now, the task's closid and rmid are set. If the task is current
+ * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
+ * group go into effect. If the task is not current, the MSR will be
+ * updated when the task is scheduled in.
+ */
+ update_task_closid_rmid(tsk);
+
+ return 0;
}
static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 1068002c8532..8678864ce712 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -25,10 +25,10 @@
#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
-#ifdef CONFIG_SMP
unsigned int __max_die_per_package __read_mostly = 1;
EXPORT_SYMBOL(__max_die_per_package);
+#ifdef CONFIG_SMP
/*
* Check if given CPUID extended toplogy "leaf" is implemented
*/
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index eb86a2b831b1..571220ac8bea 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -121,7 +121,7 @@ int copy_fpregs_to_fpstate(struct fpu *fpu)
}
EXPORT_SYMBOL(copy_fpregs_to_fpstate);
-void kernel_fpu_begin(void)
+void kernel_fpu_begin_mask(unsigned int kfpu_mask)
{
preempt_disable();
@@ -141,13 +141,14 @@ void kernel_fpu_begin(void)
}
__cpu_invalidate_fpregs_state();
- if (boot_cpu_has(X86_FEATURE_XMM))
+ /* Put sane initial values into the control registers. */
+ if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
ldmxcsr(MXCSR_DEFAULT);
- if (boot_cpu_has(X86_FEATURE_FPU))
+ if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
asm volatile ("fninit");
}
-EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
void kernel_fpu_end(void)
{
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 03aa33b58165..668a4a6533d9 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -269,6 +269,20 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
CPU_ENTRY_AREA_TOTAL_SIZE))
return true;
+ /*
+ * When FSGSBASE is enabled, paranoid_entry() fetches the per-CPU
+ * GSBASE value via __per_cpu_offset or pcpu_unit_offsets.
+ */
+#ifdef CONFIG_SMP
+ if (within_area(addr, end, (unsigned long)__per_cpu_offset,
+ sizeof(unsigned long) * nr_cpu_ids))
+ return true;
+#else
+ if (within_area(addr, end, (unsigned long)&pcpu_unit_offsets,
+ sizeof(pcpu_unit_offsets)))
+ return true;
+#endif
+
for_each_possible_cpu(cpu) {
/* The original rw GDT is being used after load_direct_gdt() */
if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu),
@@ -293,6 +307,14 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
(unsigned long)&per_cpu(cpu_tlbstate, cpu),
sizeof(struct tlb_state)))
return true;
+
+ /*
+ * When in guest (X86_FEATURE_HYPERVISOR), local_db_save()
+ * will read per-cpu cpu_dr7 before clear dr7 register.
+ */
+ if (within_area(addr, end, (unsigned long)&per_cpu(cpu_dr7, cpu),
+ sizeof(cpu_dr7)))
+ return true;
}
return false;
@@ -491,15 +513,12 @@ static int hw_breakpoint_handler(struct die_args *args)
struct perf_event *bp;
unsigned long *dr6_p;
unsigned long dr6;
+ bool bpx;
/* The DR6 value is pointed by args->err */
dr6_p = (unsigned long *)ERR_PTR(args->err);
dr6 = *dr6_p;
- /* If it's a single step, TRAP bits are random */
- if (dr6 & DR_STEP)
- return NOTIFY_DONE;
-
/* Do an early return if no trap bits are set in DR6 */
if ((dr6 & DR_TRAP_BITS) == 0)
return NOTIFY_DONE;
@@ -509,28 +528,29 @@ static int hw_breakpoint_handler(struct die_args *args)
if (likely(!(dr6 & (DR_TRAP0 << i))))
continue;
+ bp = this_cpu_read(bp_per_reg[i]);
+ if (!bp)
+ continue;
+
+ bpx = bp->hw.info.type == X86_BREAKPOINT_EXECUTE;
+
/*
- * The counter may be concurrently released but that can only
- * occur from a call_rcu() path. We can then safely fetch
- * the breakpoint, use its callback, touch its counter
- * while we are in an rcu_read_lock() path.
+ * TF and data breakpoints are traps and can be merged, however
+ * instruction breakpoints are faults and will be raised
+ * separately.
+ *
+ * However DR6 can indicate both TF and instruction
+ * breakpoints. In that case take TF as that has precedence and
+ * delay the instruction breakpoint for the next exception.
*/
- rcu_read_lock();
+ if (bpx && (dr6 & DR_STEP))
+ continue;
- bp = this_cpu_read(bp_per_reg[i]);
/*
* Reset the 'i'th TRAP bit in dr6 to denote completion of
* exception handling
*/
(*dr6_p) &= ~(DR_TRAP0 << i);
- /*
- * bp can be NULL due to lazy debug register switching
- * or due to concurrent perf counter removing.
- */
- if (!bp) {
- rcu_read_unlock();
- break;
- }
perf_bp_event(bp, args->regs);
@@ -538,11 +558,10 @@ static int hw_breakpoint_handler(struct die_args *args)
* Set up resume flag to avoid breakpoint recursion when
* returning back to origin.
*/
- if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
+ if (bpx)
args->regs->flags |= X86_EFLAGS_RF;
-
- rcu_read_unlock();
}
+
/*
* Further processing in do_debug() is needed for a) user-space
* breakpoints (to generate signals) and b) when the system has
diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c
index 7d04b356d44d..cdc04d091242 100644
--- a/arch/x86/kernel/sev-es-shared.c
+++ b/arch/x86/kernel/sev-es-shared.c
@@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
case 0xe4:
case 0xe5:
*exitinfo |= IOIO_TYPE_IN;
- *exitinfo |= (u64)insn->immediate.value << 16;
+ *exitinfo |= (u8)insn->immediate.value << 16;
break;
/* OUT immediate opcodes */
case 0xe6:
case 0xe7:
*exitinfo |= IOIO_TYPE_OUT;
- *exitinfo |= (u64)insn->immediate.value << 16;
+ *exitinfo |= (u8)insn->immediate.value << 16;
break;
/* IN register opcodes */
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 0bd1a0fc587e..84c1821819af 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -225,7 +225,7 @@ static inline u64 sev_es_rd_ghcb_msr(void)
return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
}
-static inline void sev_es_wr_ghcb_msr(u64 val)
+static __always_inline void sev_es_wr_ghcb_msr(u64 val)
{
u32 low, high;
@@ -286,6 +286,12 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
u16 d2;
u8 d1;
+ /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
+ if (!user_mode(ctxt->regs) && !access_ok(target, size)) {
+ memcpy(dst, buf, size);
+ return ES_OK;
+ }
+
switch (size) {
case 1:
memcpy(&d1, buf, 1);
@@ -335,6 +341,12 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
u16 d2;
u8 d1;
+ /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
+ if (!user_mode(ctxt->regs) && !access_ok(s, size)) {
+ memcpy(buf, src, size);
+ return ES_OK;
+ }
+
switch (size) {
case 1:
if (get_user(d1, s))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8ca66af96a54..02813a7f3a7c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -56,6 +56,7 @@
#include <linux/numa.h>
#include <linux/pgtable.h>
#include <linux/overflow.h>
+#include <linux/syscore_ops.h>
#include <asm/acpi.h>
#include <asm/desc.h>
@@ -1832,6 +1833,7 @@ void arch_set_max_freq_ratio(bool turbo_disabled)
arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
arch_turbo_freq_ratio;
}
+EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
static bool turbo_disabled(void)
{
@@ -2083,6 +2085,23 @@ static void init_counter_refs(void)
this_cpu_write(arch_prev_mperf, mperf);
}
+#ifdef CONFIG_PM_SLEEP
+static struct syscore_ops freq_invariance_syscore_ops = {
+ .resume = init_counter_refs,
+};
+
+static void register_freq_invariance_syscore_ops(void)
+{
+ /* Bail out if registered already. */
+ if (freq_invariance_syscore_ops.node.prev)
+ return;
+
+ register_syscore_ops(&freq_invariance_syscore_ops);
+}
+#else
+static inline void register_freq_invariance_syscore_ops(void) {}
+#endif
+
static void init_freq_invariance(bool secondary, bool cppc_ready)
{
bool ret = false;
@@ -2109,6 +2128,7 @@ static void init_freq_invariance(bool secondary, bool cppc_ready)
if (ret) {
init_counter_refs();
static_branch_enable(&arch_scale_freq_key);
+ register_freq_invariance_syscore_ops();
pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
} else {
pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 60d2c3798ba2..0f3c307b37b3 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -127,12 +127,17 @@ static int enable_single_step(struct task_struct *child)
regs->flags |= X86_EFLAGS_TF;
/*
- * Always set TIF_SINGLESTEP - this guarantees that
- * we single-step system calls etc.. This will also
+ * Always set TIF_SINGLESTEP. This will also
* cause us to set TF when returning to user mode.
*/
set_tsk_thread_flag(child, TIF_SINGLESTEP);
+ /*
+ * Ensure that a trap is triggered once stepping out of a system
+ * call prior to executing any user instruction.
+ */
+ set_task_syscall_work(child, SYSCALL_EXIT_TRAP);
+
oflags = regs->flags;
/* Set TF on the kernel stack.. */
@@ -230,6 +235,7 @@ void user_disable_single_step(struct task_struct *child)
/* Always clear TIF_SINGLESTEP... */
clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+ clear_task_syscall_work(child, SYSCALL_EXIT_TRAP);
/* But touch TF only if it was set by us.. */
if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF))